From 774f7113dfe3c1197c7820369c7acab9e0c55f37 Mon Sep 17 00:00:00 2001 From: lolcat Date: Tue, 17 Dec 2024 00:31:15 -0500 Subject: duckduckgo scraper rewrite --- lib/fuckhtml.php | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'lib/fuckhtml.php') diff --git a/lib/fuckhtml.php b/lib/fuckhtml.php index ee1353c..5b45578 100644 --- a/lib/fuckhtml.php +++ b/lib/fuckhtml.php @@ -526,4 +526,85 @@ class fuckhtml{ $string ); } + + public function extract_json($json){ + + $len = strlen($json); + $array_level = 0; + $object_level = 0; + $in_quote = null; + $start = null; + + for($i=0; $i<$len; $i++){ + + switch($json[$i]){ + + case "[": + if($in_quote === null){ + + $array_level++; + if($start === null){ + + $start = $i; + } + } + break; + + case "]": + if($in_quote === null){ + + $array_level--; + } + break; + + case "{": + if($in_quote === null){ + + $object_level++; + if($start === null){ + + $start = $i; + } + } + break; + + case "}": + if($in_quote === null){ + + $object_level--; + } + break; + + case "\"": + case "'": + if( + $i !== 0 && + $json[$i - 1] !== "\\" + ){ + // found a non-escaped quote + + if($in_quote === null){ + + // open quote + $in_quote = $json[$i]; + }elseif($in_quote === $json[$i]){ + + // close quote + $in_quote = null; + } + } + break; + } + + if( + $start !== null && + $array_level === 0 && + $object_level === 0 + ){ + + return substr($json, $start, $i - $start + 1); + break; + } + } + } } -- cgit v1.2.3