aboutsummaryrefslogtreecommitdiffstats
path: root/scraper/google.php
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2024-08-02 21:25:39 -0400
committerlolcat <will@lolcat.ca>2024-08-02 21:25:39 -0400
commit36993013e5325352d7dba4e6cf664f2c0692ce24 (patch)
tree30753fb59d8aa93755d799e5f342f8d7213ea2fc /scraper/google.php
parentbeb08f46e270d0a9c4e949f633a752e7436c103f (diff)
fixed google piece of shit website i hate it so much
Diffstat (limited to 'scraper/google.php')
-rw-r--r--scraper/google.php218
1 files changed, 119 insertions, 99 deletions
diff --git a/scraper/google.php b/scraper/google.php
index 377122f..ab526f8 100644
--- a/scraper/google.php
+++ b/scraper/google.php
@@ -799,128 +799,147 @@ class google{
$title = "Notice";
}
- $description = [];
-
- $as =
+ $div =
$this->fuckhtml
->getElementsByTagName(
- "a"
+ "div"
);
- if(count($as) !== 0){
+ // probe for related searches div, if found, ignore it cause its shit
+ $probe =
+ $this->fuckhtml
+ ->getElementsByAttributeValue(
+ "role",
+ "list",
+ $div
+ );
+
+ // also probe for children
+ if(count($probe) === 0){
+
+ $probe =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->getstyle(
+ [
+ "flex-shrink" => "0",
+ "-moz-box-flex" => "0",
+ "flex-grow" => "0",
+ "overflow" => "hidden"
+ ]
+ ),
+ $div
+ );
+ }
+
+ if(count($probe) === 0){
- $first = true;
+ $description = [];
- foreach($as as $a){
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ if(count($as) !== 0){
- $text_link =
- $this->fuckhtml
- ->getTextContent(
- $a
- );
+ $first = true;
- if(stripos($text_link, "repeat the search") !== false){
+ foreach($as as $a){
+
+ $text_link =
+ $this->fuckhtml
+ ->getTextContent(
+ $a
+ );
+
+ if(stripos($text_link, "repeat the search") !== false){
+
+ $last_page = true;
+ break 2;
+ }
- $last_page = true;
- break 2;
+ $parts =
+ explode(
+ $a["outerHTML"],
+ $card["innerHTML"],
+ 2
+ );
+
+ $card["innerHTML"] = $parts[1];
+
+ $value =
+ preg_replace(
+ '/ +/',
+ " ",
+ $this->fuckhtml
+ ->getTextContent(
+ $parts[0],
+ false,
+ false
+ )
+ );
+
+ if(strlen(trim($value)) !== 0){
+
+ $description[] = [
+ "type" => "text",
+ "value" => $value
+ ];
+
+ if($first){
+
+ $description[0]["value"] =
+ ltrim($description[0]["value"]);
+ }
+ }
+
+ $first = false;
+
+ $description[] = [
+ "type" => "link",
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]
+ ["href"]
+ ),
+ "value" => $text_link
+ ];
}
- $parts =
- explode(
- $a["outerHTML"],
+ $text =
+ $this->fuckhtml
+ ->getTextContent(
$card["innerHTML"],
- 2
+ false,
+ false
);
- $card["innerHTML"] = $parts[1];
-
- $value =
- preg_replace(
- '/ +/',
- " ",
- $this->fuckhtml
- ->getTextContent(
- $parts[0],
- false,
- false
- )
- );
-
- if(strlen(trim($value)) !== 0){
-
+ if(strlen(trim($text)) !== 0){
+
$description[] = [
"type" => "text",
- "value" => $value
+ "value" =>
+ rtrim(
+ $text
+ )
];
-
- if($first){
-
- $description[0]["value"] =
- ltrim($description[0]["value"]);
- }
}
-
- $first = false;
-
- $description[] = [
- "type" => "link",
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]
- ["href"]
- ),
- "value" => $text_link
- ];
}
- $text =
- $this->fuckhtml
- ->getTextContent(
- $card["innerHTML"],
- false,
- false
- );
-
- if(strlen(trim($text)) !== 0){
+ if(count($description) !== 0){
- $description[] = [
- "type" => "text",
- "value" =>
- rtrim(
- $text
- )
+ $out["answer"][] = [
+ "title" => $title,
+ "description" => $description,
+ "url" => null,
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
];
}
-
- }else{
-
- // @TODO: Check if this ever gets populated without giving me garbage
- /*
- $text =
- $this->fuckhtml
- ->getTextContent(
- $card
- );
-
- if($text != ""){
- $description[] = [
- "type" => "text",
- "value" => $text
- ];
- }*/
- }
-
- if(count($description) !== 0){
-
- $out["answer"][] = [
- "title" => $title,
- "description" => $description,
- "url" => null,
- "thumb" => null,
- "table" => [],
- "sublink" => []
- ];
}
}
@@ -2451,6 +2470,7 @@ class google{
$this->getstyle(
[
"outline-offset" => "-1px",
+ "outline-width" => "1px",
"display" => "flex",
"flex-direction" => "column",
"flex-grow" => "1"
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage