diff options
| author | lolcat <will@lolcat.ca> | 2025-10-21 00:35:20 -0400 |
|---|---|---|
| committer | lolcat <will@lolcat.ca> | 2025-10-21 00:35:20 -0400 |
| commit | 46e6ed12e34b2d905ffb46526798406c7d50f100 (patch) | |
| tree | 470e0c6b8c3b95a4b937b761dfa4d624646a118a /scraper | |
| parent | ce75cbda812a31b5654be755d3b7436f853d8c40 (diff) | |
fix invalid sublinks on google scraper
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/google.php | 42 |
1 files changed, 26 insertions, 16 deletions
diff --git a/scraper/google.php b/scraper/google.php index 148b1b5..03fa718 100644 --- a/scraper/google.php +++ b/scraper/google.php @@ -953,23 +953,33 @@ class google{ ]) ); - if(count($probe) !== 0){ + $url = + $this->unshiturl( + $a["attributes"]["href"] + ); + + if( + preg_match( + '/^http/', + $url + ) + ){ - $sublinks[] = [ - "title" => - $this->titledots( - $this->fuckhtml - ->getTextContent( - $probe[0] - ) - ), - "description" => null, - "date" => null, - "url" => - $this->unshiturl( - $a["attributes"]["href"] - ) - ]; + if(count($probe) !== 0){ + + $sublinks[] = [ + "title" => + $this->titledots( + $this->fuckhtml + ->getTextContent( + $probe[0] + ) + ), + "description" => null, + "date" => null, + "url" => $url + ]; + } } } |
