aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2025-09-06 11:25:09 -0400
committerlolcat <will@lolcat.ca>2025-09-06 11:25:09 -0400
commit6d34d43a019ef1bf26a4b07d91413eacea2da653 (patch)
treeb28205d7ded046befce54ec8de47ef7e14e693a1
parentc44d6292a02041dca8d0f572ee7d00e01254ab48 (diff)
fixed greppr, again
-rw-r--r--scraper/greppr.php115
1 files changed, 66 insertions, 49 deletions
diff --git a/scraper/greppr.php b/scraper/greppr.php
index fc8511c..3d8b517 100644
--- a/scraper/greppr.php
+++ b/scraper/greppr.php
@@ -1,4 +1,6 @@
<?php
+// greppr dev probably monitors 4get code, lol
+// hello greppr dude, add an API you moron
class greppr{
@@ -16,7 +18,7 @@ class greppr{
return [];
}
- private function get($proxy, $url, $get = [], $cookie = false, $post){
+ private function get($proxy, $url, $get = [], $cookies = [], $post = false){
$curlproc = curl_init();
@@ -24,6 +26,14 @@ class greppr{
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ $cookie = [];
+ foreach($cookies as $k => $v){
+
+ $cookie[] = "{$k}={$v}";
+ }
+
+ $cookie = implode("; ", $cookie);
+
if($post === false){
if($get !== []){
@@ -31,7 +41,7 @@ class greppr{
$url .= "?" . $get;
}
- if($cookie === false){
+ if($cookie == ""){
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: " . config::USER_AGENT,
@@ -57,7 +67,7 @@ class greppr{
"Sec-GPC: 1",
"Connection: keep-alive",
"Referer: https://greppr.org/search",
- "Cookie: PHPSESSID=$cookie",
+ "Cookie: {$cookie}",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
@@ -85,7 +95,7 @@ class greppr{
"Sec-GPC: 1",
"Connection: keep-alive",
"Referer: https://greppr.org/",
- "Cookie: PHPSESSID=$cookie",
+ "Cookie: {$cookie}",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
@@ -119,7 +129,7 @@ class greppr{
return $len;
}
- $headers[strtolower(trim($header[0]))] = trim($header[1]);
+ $headers[strtolower(trim($header[0]))][] = trim($header[1]);
return $len;
}
@@ -157,7 +167,7 @@ class greppr{
$proxy,
"https://greppr.org" . $tokens["get"],
[],
- $tokens["cookie"],
+ $tokens["cookies"],
false
);
}catch(Exception $error){
@@ -185,12 +195,12 @@ class greppr{
$proxy,
"https://greppr.org",
[],
- false,
+ [],
false
);
}catch(Exception $error){
- throw new Exception("Failed to fetch search tokens");
+ throw new Exception("Failed to fetch homepage");
}
//
@@ -198,7 +208,11 @@ class greppr{
//
$this->fuckhtml->load($html["data"]);
- $tokens = [];
+ $tokens = [
+ "req" => null,
+ "data" => null,
+ "cookies" => null
+ ];
$inputs =
$this->fuckhtml
@@ -213,63 +227,66 @@ class greppr{
continue;
}
- switch($input["attributes"]["name"]){
+ if(
+ isset($input["attributes"]["value"]) &&
+ !empty($input["attributes"]["value"])
+ ){
- case "var1":
- case "var2":
- case "n":
- $tokens[$input["attributes"]["name"]] =
- $this->fuckhtml
- ->getTextContent(
- $input["attributes"]["value"]
- );
- break;
+ $tokens
+ ["data"]
+ [$this->fuckhtml
+ ->getTextContent(
+ $input["attributes"]["name"]
+ )] =
+ $this->fuckhtml
+ ->getTextContent(
+ $input["attributes"]["value"]
+ );
+ }else{
- default:
- $tokens["req"] =
- $this->fuckhtml
- ->getTextContent(
- $input["attributes"]["name"]
- );
- break;
+ $tokens["req"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $input["attributes"]["name"]
+ );
}
}
- // get cookie
- preg_match(
- '/PHPSESSID=([^;]+)/',
- $html["headers"]["set-cookie"],
- $cookie
- );
-
- if(!isset($cookie[1])){
+ if($tokens["req"] === null){
- // server sent an unexpected cookie
- throw new Exception("Got malformed cookie");
+ throw new Exception("Failed to get request ID");
}
- $tokens["cookie"] = $cookie[1];
-
- if($tokens === false){
+ if(isset($html["headers"]["set-cookie"])){
- throw new Exception("Failed to grep search tokens");
+ foreach($html["headers"]["set-cookie"] as $cookie){
+
+ if(
+ preg_match(
+ '/([^=]+)=([^;]+)/',
+ $cookie,
+ $matches
+ )
+ ){
+
+ $tokens["cookies"][$matches[1]] = $matches[2];
+ }
+ }
}
//
// Get initial search page
//
- try{
+ $tokens_req = $tokens["data"];
+ $tokens_req[$tokens["req"]] = $search;
+ try{
+
$html = $this->get(
$proxy,
"https://greppr.org/search",
- [
- "var1" => $tokens["var1"],
- "var2" => $tokens["var2"],
- $tokens["req"] => $search,
- "n" => $tokens["n"]
- ],
- $tokens["cookie"],
+ $tokens_req,
+ $tokens["cookies"],
true
);
}catch(Exception $error){
@@ -338,7 +355,7 @@ class greppr{
->getTextContent(
$a["attributes"]["href"]
),
- "cookie" => $tokens["cookie"]
+ "cookies" => $tokens["cookies"]
]),
"web",
$proxy
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage