aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2024-12-17 00:31:15 -0500
committerlolcat <will@lolcat.ca>2024-12-17 00:31:15 -0500
commit774f7113dfe3c1197c7820369c7acab9e0c55f37 (patch)
tree7a9f9bfb165320e4f34d8e8fe0a18ca370ef5a8b /lib
parent0b3bbe0f152ee2fb4c03c1d2ba11d010b06e284c (diff)
duckduckgo scraper rewrite
Diffstat (limited to 'lib')
-rw-r--r--lib/backend.php1
-rw-r--r--lib/fuckhtml.php81
2 files changed, 82 insertions, 0 deletions
diff --git a/lib/backend.php b/lib/backend.php
index 7c450da..66e78a1 100644
--- a/lib/backend.php
+++ b/lib/backend.php
@@ -75,6 +75,7 @@ class backend{
break;
case "socks5_hostname":
+ case "socks5h":
case "socks5a":
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
diff --git a/lib/fuckhtml.php b/lib/fuckhtml.php
index ee1353c..5b45578 100644
--- a/lib/fuckhtml.php
+++ b/lib/fuckhtml.php
@@ -526,4 +526,85 @@ class fuckhtml{
$string
);
}
+
+ public function extract_json($json){
+
+ $len = strlen($json);
+ $array_level = 0;
+ $object_level = 0;
+ $in_quote = null;
+ $start = null;
+
+ for($i=0; $i<$len; $i++){
+
+ switch($json[$i]){
+
+ case "[":
+ if($in_quote === null){
+
+ $array_level++;
+ if($start === null){
+
+ $start = $i;
+ }
+ }
+ break;
+
+ case "]":
+ if($in_quote === null){
+
+ $array_level--;
+ }
+ break;
+
+ case "{":
+ if($in_quote === null){
+
+ $object_level++;
+ if($start === null){
+
+ $start = $i;
+ }
+ }
+ break;
+
+ case "}":
+ if($in_quote === null){
+
+ $object_level--;
+ }
+ break;
+
+ case "\"":
+ case "'":
+ if(
+ $i !== 0 &&
+ $json[$i - 1] !== "\\"
+ ){
+ // found a non-escaped quote
+
+ if($in_quote === null){
+
+ // open quote
+ $in_quote = $json[$i];
+ }elseif($in_quote === $json[$i]){
+
+ // close quote
+ $in_quote = null;
+ }
+ }
+ break;
+ }
+
+ if(
+ $start !== null &&
+ $array_level === 0 &&
+ $object_level === 0
+ ){
+
+ return substr($json, $start, $i - $start + 1);
+ break;
+ }
+ }
+ }
}
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage