aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2026-02-24 17:53:38 -0500
committerlolcat <will@lolcat.ca>2026-02-24 17:53:38 -0500
commitcdf9164113a3cef01b4f26f6e02887a3e3c91267 (patch)
tree47d23aab6cc17ea4a4c19c14000d81b382612bd8
parentc42e62cb09c9fc16f627c96b8b5e1cbc9bac6937 (diff)
stop fucking changing how you redirect people you piece of shit
-rw-r--r--scraper/marginalia.php64
1 files changed, 49 insertions, 15 deletions
diff --git a/scraper/marginalia.php b/scraper/marginalia.php
index c40ab85..f5be5e1 100644
--- a/scraper/marginalia.php
+++ b/scraper/marginalia.php
@@ -431,29 +431,63 @@ class marginalia{
$this->fuckhtml->load($html);
// detect meta redirect
- // <html lang="en-US"> <head> <title>Error</title> <link rel="stylesheet" href="/serp.css"> <meta http-equiv="refresh" content="3; URL='?query=asmr&sst=S-873f5da96e8b60'"> </head> <body> <div class="infobox"> <h1>Wait For A Moment</h1> <p>The search engine is currently barraged by queries from bots</p> <p>Please wait for <b id="countdown" data-tr="3">3</b> seconds. If your browser supports it, it will refresh on its own. Otherwise, you can use <a href="?query&#x3D;asmr&amp;sst&#x3D;S-873f5da96e8b60">this link</a> to manually proceed. </div> </body> <script lang="javascript"> window.setInterval(()=>{ const cd = document.getElementById('countdown'); var tr = cd.getAttribute('data-tr'); tr--; cd.setAttribute('data-tr', tr); cd.innerHTML=tr; }, 1000); </script> </html>
-
- $metas =
+ $title =
$this->fuckhtml
- ->getElementsByAttributeValue(
- "http-equiv",
- "refresh",
- "meta"
+ ->getElementsByTagName(
+ "title"
);
- if(count($metas) !== 0){
+ if(
+ count($title) !== 0 &&
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ ) == "Error"
+ ){
- preg_match(
- '/^([0-9]+).*URL=\'([^\']*)/',
+ // redirect detected
+
+ // get timeout
+ $timeout =
+ $this->fuckhtml
+ ->getElementById(
+ "countdown",
+ "b"
+ );
+
+ if(count($timeout) === null){
+
+ throw new Exception("Failed to find timeout value");
+ }
+
+ $timeout =
$this->fuckhtml
->getTextContent(
- $metas[0]["attributes"]["content"]
- ),
- $rules
+ $timeout
+ );
+
+ preg_match(
+ '/location\.replace\(\'([^\']+)\'\)/',
+ $html,
+ $redirect
);
- sleep((int)$rules[1]);
- return $this->web($get, $rules[2]);
+ if(!isset($redirect[1])){
+
+ throw new Exception("Failed to grep redirect value");
+ }
+
+ $one = 1;
+ $redirect =
+ str_replace(
+ "/search",
+ "",
+ $redirect[1],
+ $one
+ );
+
+ sleep((int)$timeout);
+ return $this->web($get, $redirect);
}
$sections =
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage