aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2025-06-20 01:18:57 -0400
committerlolcat <will@lolcat.ca>2025-06-20 01:18:57 -0400
commita2bc1e6190bab561b7244e2e9bbda994ab0d0d31 (patch)
treedfcc19b4f7c116887a01437edfd710efa0514782
parentf73b5f0298f06b44c5cd8a84e327b8e1d7d4ea95 (diff)
bypass anubis bullshit on marginalia
-rw-r--r--lib/anubis.php100
-rw-r--r--scraper/marginalia.php113
2 files changed, 206 insertions, 7 deletions
diff --git a/lib/anubis.php b/lib/anubis.php
new file mode 100644
index 0000000..ab075ff
--- /dev/null
+++ b/lib/anubis.php
@@ -0,0 +1,100 @@
+<?php
+
+//
+// Reference
+// https://github.com/TecharoHQ/anubis/blob/ecc716940e34ebe7249974f2789a99a2c7115e4e/web/js/proof-of-work.mjs
+//
+
+class anubis{
+
+ public function __construct(){
+
+ include_once "fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function scrape($html){
+
+ $this->fuckhtml->load($html);
+
+ $script =
+ $this->fuckhtml
+ ->getElementById(
+ "anubis_challenge",
+ "script"
+ );
+
+ if(count($script) === 0){
+
+ throw new Exception("Failed to scrape anubis challenge data");
+ }
+
+ $script =
+ json_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $script
+ ),
+ true
+ );
+
+ if($script === null){
+
+ throw new Exception("Failed to decode anubis challenge data");
+ }
+
+ if(
+ !isset($script["challenge"]) ||
+ !isset($script["rules"]["difficulty"]) ||
+ !is_int($script["rules"]["difficulty"]) ||
+ !is_string($script["challenge"])
+ ){
+
+ throw new Exception("Found invalid challenge data");
+ }
+
+ return $this->rape($script["challenge"], $script["rules"]["difficulty"]);
+ }
+
+ private function is_valid_hash($hash, $difficulty){
+
+ for ($i=0; $i<$difficulty; $i++) {
+
+ $index = (int)floor($i / 2);
+ $nibble = $i % 2;
+
+ $byte = ord($hash[$index]);
+ $nibble = ($byte >> ($nibble === 0 ? 4 : 0)) & 0x0f;
+
+ if($nibble !== 0){
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ public function rape($data, $difficulty = 5){
+
+ $nonce = 0;
+
+ while(true){
+
+ $hash_binary = hash("sha256", $data . $nonce, true);
+
+ if($this->is_valid_hash($hash_binary, $difficulty)){
+
+ $hash_hex = bin2hex($hash_binary);
+
+ return [
+ "response" => $hash_hex,
+ //"data" => $data,
+ //"difficulty" => $difficulty,
+ "nonce" => $nonce
+ ];
+ }
+
+ $nonce++;
+ }
+ }
+}
diff --git a/scraper/marginalia.php b/scraper/marginalia.php
index b9d555a..e62a485 100644
--- a/scraper/marginalia.php
+++ b/scraper/marginalia.php
@@ -3,7 +3,10 @@
class marginalia{
public function __construct(){
- include "lib/fuckhtml.php";
+ include "lib/anubis.php";
+ $this->anubis = new anubis();
+
+ include_once "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
include "lib/backend.php";
@@ -102,7 +105,40 @@ class marginalia{
);
}
- private function get($proxy, $url, $get = []){
+ private function get($proxy, $url, $get = [], $get_cookies = 1){
+
+ $curlproc = curl_init();
+
+ switch($get_cookies){
+
+ case 0:
+ $cookies = "";
+ $cookies_tmp = [];
+ curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
+
+ $length = strlen($header);
+
+ $header = explode(":", $header, 2);
+
+ if(trim(strtolower($header[0])) == "set-cookie"){
+
+ $cookie_tmp = explode("=", trim($header[1]), 2);
+
+ $cookies_tmp[trim($cookie_tmp[0])] =
+ explode(";", $cookie_tmp[1], 2)[0];
+ }
+
+ return $length;
+ });
+ break;
+
+ case 1:
+ $cookies = "";
+ break;
+
+ default:
+ $cookies = "Cookie: " . $get_cookies;
+ }
$headers = [
"User-Agent: " . config::USER_AGENT,
@@ -110,6 +146,7 @@ class marginalia{
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
+ $cookies,
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
@@ -118,8 +155,6 @@ class marginalia{
"Sec-Fetch-User: ?1"
];
- $curlproc = curl_init();
-
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
@@ -145,7 +180,19 @@ class marginalia{
throw new Exception(curl_error($curlproc));
}
- curl_close($curlproc);
+ if($get_cookies === 0){
+
+ $cookie = [];
+
+ foreach($cookies_tmp as $key => $value){
+
+ $cookie[] = $key . "=" . $value;
+ }
+
+ curl_close($curlproc);
+ return implode(";", $cookie);
+ }
+
return $data;
}
@@ -267,6 +314,55 @@ class marginalia{
// HTML parser
$proxy = $this->backend->get_ip();
+ //
+ // Bypass anubis check
+ //
+ if(($anubis_key = apcu_fetch("marginalia_cookie")) === false){
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://old-search.marginalia.nu/"
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get anubis challenge");
+ }
+
+ try{
+
+ $anubis_data = $this->anubis->scrape($html);
+ }catch(Exception $error){
+
+ throw new Exception($error);
+ }
+
+ // send anubis response & get cookies
+ // https://old-search.marginalia.nu/.within.website/x/cmd/anubis/api/pass-challenge?response=0000018966b086834f738bacba6031028adb5aa875974ead197a8b75778baf3a&nonce=39947&redir=https%3A%2F%2Fold-search.marginalia.nu%2F&elapsedTime=1164
+
+ try{
+
+ $anubis_key =
+ $this->get(
+ $proxy,
+ "https://old-search.marginalia.nu/.within.website/x/cmd/anubis/api/pass-challenge",
+ [
+ "response" => $anubis_data["response"],
+ "nonce" => $anubis_data["nonce"],
+ "redir" => "https://old-search.marginalia.nu/",
+ "elapsedTime" => random_int(1000, 2000)
+ ],
+ 0
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to submit anubis challenge");
+ }
+
+ apcu_store("marginalia_cookie", $anubis_key);
+ }
+
if($get["npt"]){
[$params, $proxy] =
@@ -279,7 +375,9 @@ class marginalia{
$html =
$this->get(
$proxy,
- "https://old-search.marginalia.nu/search?" . $params
+ "https://old-search.marginalia.nu/search?" . $params,
+ [],
+ $anubis_key
);
}catch(Exception $error){
@@ -309,7 +407,8 @@ class marginalia{
$this->get(
$proxy,
"https://old-search.marginalia.nu/search",
- $params
+ $params,
+ $anubis_key
);
}catch(Exception $error){
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage