aboutsummaryrefslogtreecommitdiffstats
path: root/scraper/google.php
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2025-10-21 00:35:20 -0400
committerlolcat <will@lolcat.ca>2025-10-21 00:35:20 -0400
commit46e6ed12e34b2d905ffb46526798406c7d50f100 (patch)
tree470e0c6b8c3b95a4b937b761dfa4d624646a118a /scraper/google.php
parentce75cbda812a31b5654be755d3b7436f853d8c40 (diff)
fix invalid sublinks on google scraper
Diffstat (limited to 'scraper/google.php')
-rw-r--r--scraper/google.php42
1 files changed, 26 insertions, 16 deletions
diff --git a/scraper/google.php b/scraper/google.php
index 148b1b5..03fa718 100644
--- a/scraper/google.php
+++ b/scraper/google.php
@@ -953,23 +953,33 @@ class google{
])
);
- if(count($probe) !== 0){
+ $url =
+ $this->unshiturl(
+ $a["attributes"]["href"]
+ );
+
+ if(
+ preg_match(
+ '/^http/',
+ $url
+ )
+ ){
- $sublinks[] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $probe[0]
- )
- ),
- "description" => null,
- "date" => null,
- "url" =>
- $this->unshiturl(
- $a["attributes"]["href"]
- )
- ];
+ if(count($probe) !== 0){
+
+ $sublinks[] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $probe[0]
+ )
+ ),
+ "description" => null,
+ "date" => null,
+ "url" => $url
+ ];
+ }
}
}
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage