diff options
| author | lolcat <will@lolcat.ca> | 2026-02-24 17:53:38 -0500 |
|---|---|---|
| committer | lolcat <will@lolcat.ca> | 2026-02-24 17:53:38 -0500 |
| commit | cdf9164113a3cef01b4f26f6e02887a3e3c91267 (patch) | |
| tree | 47d23aab6cc17ea4a4c19c14000d81b382612bd8 /scraper | |
| parent | c42e62cb09c9fc16f627c96b8b5e1cbc9bac6937 (diff) | |
stop fucking changing how you redirect people you piece of shit
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/marginalia.php | 64 |
1 files changed, 49 insertions, 15 deletions
diff --git a/scraper/marginalia.php b/scraper/marginalia.php index c40ab85..f5be5e1 100644 --- a/scraper/marginalia.php +++ b/scraper/marginalia.php @@ -431,29 +431,63 @@ class marginalia{ $this->fuckhtml->load($html); // detect meta redirect - // <html lang="en-US"> <head> <title>Error</title> <link rel="stylesheet" href="/serp.css"> <meta http-equiv="refresh" content="3; URL='?query=asmr&sst=S-873f5da96e8b60'"> </head> <body> <div class="infobox"> <h1>Wait For A Moment</h1> <p>The search engine is currently barraged by queries from bots</p> <p>Please wait for <b id="countdown" data-tr="3">3</b> seconds. If your browser supports it, it will refresh on its own. Otherwise, you can use <a href="?query=asmr&sst=S-873f5da96e8b60">this link</a> to manually proceed. </div> </body> <script lang="javascript"> window.setInterval(()=>{ const cd = document.getElementById('countdown'); var tr = cd.getAttribute('data-tr'); tr--; cd.setAttribute('data-tr', tr); cd.innerHTML=tr; }, 1000); </script> </html> - - $metas = + $title = $this->fuckhtml - ->getElementsByAttributeValue( - "http-equiv", - "refresh", - "meta" + ->getElementsByTagName( + "title" ); - if(count($metas) !== 0){ + if( + count($title) !== 0 && + $this->fuckhtml + ->getTextContent( + $title[0] + ) == "Error" + ){ - preg_match( - '/^([0-9]+).*URL=\'([^\']*)/', + // redirect detected + + // get timeout + $timeout = + $this->fuckhtml + ->getElementById( + "countdown", + "b" + ); + + if(count($timeout) === null){ + + throw new Exception("Failed to find timeout value"); + } + + $timeout = $this->fuckhtml ->getTextContent( - $metas[0]["attributes"]["content"] - ), - $rules + $timeout + ); + + preg_match( + '/location\.replace\(\'([^\']+)\'\)/', + $html, + $redirect ); - sleep((int)$rules[1]); - return $this->web($get, $rules[2]); + if(!isset($redirect[1])){ + + throw new Exception("Failed to grep redirect value"); + } + + $one = 1; + $redirect = + str_replace( + "/search", + "", + $redirect[1], + $one + ); + + sleep((int)$timeout); + return $this->web($get, $redirect); } $sections = |
