diff options
| author | lolcat <will@lolcat.ca> | 2025-08-03 12:28:57 -0400 |
|---|---|---|
| committer | lolcat <will@lolcat.ca> | 2025-08-03 12:28:57 -0400 |
| commit | f30872134fc925ca47b2b20830290904141ea143 (patch) | |
| tree | dc752009144c6d862b318374b97b56e610ff3cc1 /scraper/mojeek.php | |
| parent | 2c4dc7da84dbed4dbea21e8093af4e77b66b03d6 (diff) | |
handle mojeek block
Diffstat (limited to 'scraper/mojeek.php')
| -rw-r--r-- | scraper/mojeek.php | 29 |
1 files changed, 24 insertions, 5 deletions
diff --git a/scraper/mojeek.php b/scraper/mojeek.php index b2d6ed5..c15d34c 100644 --- a/scraper/mojeek.php +++ b/scraper/mojeek.php @@ -501,11 +501,6 @@ class mojeek{ throw new Exception("Failed to get HTML"); } - /* - $handle = fopen("scraper/mojeek.html", "r"); - $html = fread($handle, filesize("scraper/mojeek.html")); - fclose($handle);*/ - } $out = [ @@ -526,6 +521,8 @@ class mojeek{ $this->fuckhtml->load($html); + $this->detect_block(); + $results = $this->fuckhtml ->getElementsByClassName("results-standard", "ul"); @@ -1034,6 +1031,8 @@ class mojeek{ $this->fuckhtml->load($html); + $this->detect_block(); + $articles = $this->fuckhtml->getElementsByTagName("article"); @@ -1166,6 +1165,26 @@ class mojeek{ return $out; } + private function detect_block(){ + + $title = + $this->fuckhtml + ->getElementsByTagName( + "title" + ); + + if( + count($title) !== 0 && + $this->fuckhtml + ->getTextContent( + $title[0]["innerHTML"] + ) == "403 - Forbidden" + ){ + + throw new Exception("Mojeek blocked this instance or request proxy."); + } + } + private function titledots($title){ return trim($title, ". \t\n\r\0\x0B"); |
