diff options
| author | lolcat <will@lolcat.ca> | 2024-08-02 21:25:39 -0400 |
|---|---|---|
| committer | lolcat <will@lolcat.ca> | 2024-08-02 21:25:39 -0400 |
| commit | 36993013e5325352d7dba4e6cf664f2c0692ce24 (patch) | |
| tree | 30753fb59d8aa93755d799e5f342f8d7213ea2fc /scraper/google.php | |
| parent | beb08f46e270d0a9c4e949f633a752e7436c103f (diff) | |
fixed google piece of shit website i hate it so much
Diffstat (limited to 'scraper/google.php')
| -rw-r--r-- | scraper/google.php | 218 |
1 files changed, 119 insertions, 99 deletions
diff --git a/scraper/google.php b/scraper/google.php index 377122f..ab526f8 100644 --- a/scraper/google.php +++ b/scraper/google.php @@ -799,128 +799,147 @@ class google{ $title = "Notice"; } - $description = []; - - $as = + $div = $this->fuckhtml ->getElementsByTagName( - "a" + "div" ); - if(count($as) !== 0){ + // probe for related searches div, if found, ignore it cause its shit + $probe = + $this->fuckhtml + ->getElementsByAttributeValue( + "role", + "list", + $div + ); + + // also probe for children + if(count($probe) === 0){ + + $probe = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "flex-shrink" => "0", + "-moz-box-flex" => "0", + "flex-grow" => "0", + "overflow" => "hidden" + ] + ), + $div + ); + } + + if(count($probe) === 0){ - $first = true; + $description = []; - foreach($as as $a){ + $as = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + if(count($as) !== 0){ - $text_link = - $this->fuckhtml - ->getTextContent( - $a - ); + $first = true; - if(stripos($text_link, "repeat the search") !== false){ + foreach($as as $a){ + + $text_link = + $this->fuckhtml + ->getTextContent( + $a + ); + + if(stripos($text_link, "repeat the search") !== false){ + + $last_page = true; + break 2; + } - $last_page = true; - break 2; + $parts = + explode( + $a["outerHTML"], + $card["innerHTML"], + 2 + ); + + $card["innerHTML"] = $parts[1]; + + $value = + preg_replace( + '/ +/', + " ", + $this->fuckhtml + ->getTextContent( + $parts[0], + false, + false + ) + ); + + if(strlen(trim($value)) !== 0){ + + $description[] = [ + "type" => "text", + "value" => $value + ]; + + if($first){ + + $description[0]["value"] = + ltrim($description[0]["value"]); + } + } + + $first = false; + + $description[] = [ + "type" => "link", + "url" => + $this->fuckhtml + ->getTextContent( + $a["attributes"] + ["href"] + ), + "value" => $text_link + ]; } - $parts = - explode( - $a["outerHTML"], + $text = + $this->fuckhtml + ->getTextContent( $card["innerHTML"], - 2 + false, + false ); - $card["innerHTML"] = $parts[1]; - - $value = - preg_replace( - '/ +/', - " ", - $this->fuckhtml - ->getTextContent( - $parts[0], - false, - false - ) - ); - - if(strlen(trim($value)) !== 0){ - + if(strlen(trim($text)) !== 0){ + $description[] = [ "type" => "text", - "value" => $value + "value" => + rtrim( + $text + ) ]; - - if($first){ - - $description[0]["value"] = - ltrim($description[0]["value"]); - } } - - $first = false; - - $description[] = [ - "type" => "link", - "url" => - $this->fuckhtml - ->getTextContent( - $a["attributes"] - ["href"] - ), - "value" => $text_link - ]; } - $text = - $this->fuckhtml - ->getTextContent( - $card["innerHTML"], - false, - false - ); - - if(strlen(trim($text)) !== 0){ + if(count($description) !== 0){ - $description[] = [ - "type" => "text", - "value" => - rtrim( - $text - ) + $out["answer"][] = [ + "title" => $title, + "description" => $description, + "url" => null, + "thumb" => null, + "table" => [], + "sublink" => [] ]; } - - }else{ - - // @TODO: Check if this ever gets populated without giving me garbage - /* - $text = - $this->fuckhtml - ->getTextContent( - $card - ); - - if($text != ""){ - $description[] = [ - "type" => "text", - "value" => $text - ]; - }*/ - } - - if(count($description) !== 0){ - - $out["answer"][] = [ - "title" => $title, - "description" => $description, - "url" => null, - "thumb" => null, - "table" => [], - "sublink" => [] - ]; } } @@ -2451,6 +2470,7 @@ class google{ $this->getstyle( [ "outline-offset" => "-1px", + "outline-width" => "1px", "display" => "flex", "flex-direction" => "column", "flex-grow" => "1" |
