aboutsummaryrefslogtreecommitdiffstats
path: root/scraper/google.php
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2025-06-01 13:03:39 -0400
committerlolcat <will@lolcat.ca>2025-06-01 13:03:39 -0400
commitb61bc6d07cbd62b544142b898f668c7f40ba8953 (patch)
treef65176b3cb89b7368b2250857b278ef686fdb115 /scraper/google.php
parent8d50667b0d9c9f5d2e1603fa2cb3d7bfb89feb8b (diff)
fix google image crash
Diffstat (limited to 'scraper/google.php')
-rw-r--r--scraper/google.php63
1 files changed, 51 insertions, 12 deletions
diff --git a/scraper/google.php b/scraper/google.php
index b3b3b13..cb8c5f2 100644
--- a/scraper/google.php
+++ b/scraper/google.php
@@ -634,7 +634,7 @@ class google{
private function scrape_imagearr($html){
// get image links arrays
preg_match_all(
- '/\[0,"([^"]+)",\["([^"]+)\",([0-9]+),([0-9]+)\],\["([^"]+)",([0-9]+),([0-9]+)\]/',
+ '/\[[0-9]+,"([^"]+)",\["([^"]+)\",([0-9]+),([0-9]+)\],\["([^"]+)",([0-9]+),([0-9]+)\]/',
$html,
$image_arr
);
@@ -644,14 +644,41 @@ class google{
for($i=0; $i<count($image_arr[1]); $i++){
- $this->image_arr[$image_arr[1][$i]] =
- [
+ $original =
+ $this->fuckhtml
+ ->parseJsString(
+ $image_arr[5][$i]
+ );
+
+ if(
+ preg_match(
+ '/^x-raw-image/',
+ $original
+ )
+ ){
+
+ // only add thumbnail, google doesnt have OG resolution
+ $this->image_arr[$image_arr[1][$i]] = [
[
"url" =>
- $this->fuckhtml
- ->parseJsString(
- $image_arr[5][$i]
+ $this->unshit_thumb(
+ $this->fuckhtml
+ ->parseJsString(
+ $image_arr[2][$i]
+ )
),
+ "width" => (int)$image_arr[7][$i], // pass the OG image width & height
+ "height" => (int)$image_arr[6][$i]
+ ]
+ ];
+
+ continue;
+ }
+
+ $this->image_arr[$image_arr[1][$i]] =
+ [
+ [
+ "url" => $original,
"width" => (int)$image_arr[7][$i],
"height" => (int)$image_arr[6][$i]
],
@@ -2635,8 +2662,8 @@ class google{
}
}
/*
- $handle = fopen("scraper/google-img.html", "r");
- $html = fread($handle, filesize("scraper/google-img.html"));
+ $handle = fopen("scraper/page.html", "r");
+ $html = fread($handle, filesize("scraper/page.html"));
fclose($handle);*/
try{
@@ -2679,6 +2706,21 @@ class google{
$this->fuckhtml
->getElementsByTagName("img")[0];
+ // make sure we dont attempt to show an image we dont have data for
+ if(
+ isset($div["attributes"]["data-docid"]) &&
+ isset($this->image_arr[$div["attributes"]["data-docid"]])
+ ){
+
+ $source =
+ $this->image_arr[
+ $div["attributes"]["data-docid"]
+ ];
+ }else{
+
+ continue;
+ }
+
$out["image"][] = [
"title" =>
$this->titledots(
@@ -2687,10 +2729,7 @@ class google{
$image["attributes"]["alt"]
)
),
- "source" =>
- $this->image_arr[
- $div["attributes"]["data-docid"]
- ],
+ "source" => $source,
"url" =>
$this->fuckhtml
->getTextContent(
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage