aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2026-03-18 00:01:22 -0400
committerlolcat <will@lolcat.ca>2026-03-18 00:01:22 -0400
commit61548e8b84e948ef71e405a980941a9fc996ae28 (patch)
tree9bfa394171127357efca7d981b73316f5d4e5fc2
parent2e5edda85b341074afd48a6a3c37ad9e2b249679 (diff)
fix google images
-rw-r--r--scraper/google.php1259
1 files changed, 61 insertions, 1198 deletions
diff --git a/scraper/google.php b/scraper/google.php
index 2f71e0e..73fd7a4 100644
--- a/scraper/google.php
+++ b/scraper/google.php
@@ -573,1148 +573,17 @@ class google{
public function web($get){
- // it broke again. lasted 3 months
- // lets hope for another solid 3 month
-
- $out = [
- "status" => "ok",
- "spelling" => [
- "type" => "no_correction",
- "using" => null,
- "correction" => null
- ],
- "npt" => null,
- "answer" => [],
- "web" => [],
- "image" => [],
- "video" => [],
- "news" => [],
- "related" => []
- ];
-
- if($get["npt"]){
-
- [$get, $proxy] = $this->backend->get($get["npt"], "web");
-
- try{
- $html =
- $this->get(
- $proxy,
- "https://www.google.com" . $get,
- [],
- true
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
- }else{
-
- $search = $get["s"];
- $country = $get["country"];
- $nsfw = $get["nsfw"];
- $lang = $get["lang"];
- $older = $get["older"];
- $newer = $get["newer"];
- $spellcheck = $get["spellcheck"];
- $proxy = $this->backend->get_ip();
-
- $offset = 0;
-
- $params = [
- "q" => $search,
- "hl" => "en",
- "udm" => 14
- ];
-
- // country
- if($country != "any"){
-
- $params["gl"] = $country;
- }
-
- // nsfw
- $params["safe"] = $nsfw == "yes" ? "off" : "active";
-
- // language
- if($lang != "any"){
-
- $params["lr"] = "lang_" . $lang;
- }
-
- // generate tbs
- $tbs = [];
-
- // get date
- $older = $older === false ? null : date("m/d/Y", $older);
- $newer = $newer === false ? null : date("m/d/Y", $newer);
-
- if(
- $older !== null ||
- $newer !== null
- ){
-
- $tbs["cdr"] = "1";
- $tbs["cd_min"] = $newer;
- $tbs["cd_max"] = $older;
- }
-
- // spellcheck filter
- if($spellcheck == "no"){
-
- $params["nfpr"] = "1";
- }
-
- if(count($tbs) !== 0){
-
- $params["tbs"] = "";
-
- foreach($tbs as $key => $value){
-
- $params["tbs"] .= $key . ":" . $value . ",";
- }
-
- $params["tbs"] = rtrim($params["tbs"], ",");
- }
-
- try{
- $html =
- $this->get(
- $proxy,
- "https://www.google.com/search",
- $params,
- true
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
-
- //$html = file_get_contents("scraper/google.html");
- }
-
- // init
- $this->fuckhtml->load($html);
- $this->detect_sorry();
- $this->parsestyles();
-
- // get javascript images
- $this->scrape_dimg($html);
- $this->scrape_imagearr($html);
-
- // get next page
- $npt =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "aria-label",
- "More search results",
- "a"
- );
-
- if(count($npt) === 0){
-
- // maybe we have the npt object from 2nd page, probe for that
- $npt =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "aria-label",
- "Next page",
- "a"
- );
- }
-
- if(count($npt) !== 0){
-
- $out["npt"] =
- $this->backend->store(
- $this->fuckhtml
- ->getTextContent(
- $npt[0]["attributes"]["href"]
- ),
- "web",
- $proxy
- );
- }
-
- // outer div is .MjjYud
- // inner div always contain role="presentation"
-
- $outer =
- $this->fuckhtml
- ->getElementsByClassName(
- "MjjYud",
- "div"
- );
-
- // used later
- $fancycontainer_class =
- explode(
- " ",
- $this->getstyle([
- "padding-top" => "4px",
- "padding-bottom" => "calc(12px*1)"
- ]),
- 2
- );
-
- if(count($fancycontainer_class) === 2){
-
- $fancycontainer_class = $fancycontainer_class[1];
- }else{
-
- $fancycontainer_class = false;
- }
-
- foreach($outer as $container){
-
- $this->fuckhtml->load($container);
-
- // probe for search result
- $title =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "role",
- "link",
- "div"
- );
-
- if(count($title) !== 0){
-
- // we found a search result
-
- $title =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $title[0]
- )
- );
-
- // get url
- $sprobe =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- $link = null;
-
- foreach($sprobe as $possible_link){
-
- if(
- isset($possible_link["attributes"]["href"]) &&
- preg_match(
- '/^\/url\?q=/',
- $possible_link["attributes"]["href"]
- )
- ){
-
- $link =
- $this->fuckhtml
- ->getTextContent(
- $possible_link["attributes"]["href"]
- );
-
- break;
- }
- }
-
- if($link === null){
-
- // should not happen
- continue;
- }
-
- // get description
- // as usual, theres a thousand fucking possible divs for this one
-
- // probe for youtube-like description
- $description =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle([
- "align-items" => "flex-start",
- "display" => "flex",
- "justify-content" => "center",
- "padding" => "7px 12px",
- "padding-right" => "0",
- "padding-top" => "0"
- ]),
- "div"
- );
-
- $ratio = "16:9";
-
- if(count($description) === 0){
-
- // fail. find the one with the image on the right handside
- $description =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "style",
- "padding-top:2px;padding-right:8px;padding-left:16px;padding-bottom:12px",
- "div"
- );
-
- $ratio = "1:1";
-
- if(count($description) === 0){
-
- // fail. find the one that is used the most
- $description =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "style",
- "-webkit-line-clamp:3",
- "div"
- );
-
- if(count($description) === 0){
-
- // last fail. this one appears with divs that have prices
- $description =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "style",
- "max-width:100vw;grid-area:nke7rc;padding-top:2px;padding-right:8px;padding-left:16px;padding-bottom:6px",
- "div"
- );
- }
- }
- }
-
- if(count($description) === 0){
-
- // should not happen but whatever
- $description = null;
- }else{
-
- $description =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
- );
- }
-
- // probe for date
- $desc2 = explode("—", $description, 2);
-
- $time = null;
-
- if(count($desc2) === 2){
-
- $time = strtotime($desc2[0]);
-
- if(
- strlen($desc2[0]) < 16 &&
- $time !== false
- ){
-
- $description = ltrim($desc2[1]);
- }else{
-
- $time = null;
- }
- }
-
- $thumb = [
- "ratio" => null,
- "url" => null,
- ];
-
- // get thumbnail
- $images =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- foreach($images as $image){
-
- if(isset($image["attributes"]["id"])){
-
- $thumb = [
- "ratio" => $ratio,
- "url" => $this->getdimg($image["attributes"]["id"])
- ];
- }
- }
-
- // get sublinks
- $sublinks = [];
-
- // probe for the fancy version
- if($fancycontainer_class !== false){
- $fancycontainer =
- $this->fuckhtml
- ->getElementsByClassName(
- $fancycontainer_class,
- "div"
- );
- }
-
- if(
- $fancycontainer_class !== false &&
- count($fancycontainer) !== 0
- ){
-
- $this->fuckhtml->load($fancycontainer[0]);
-
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- foreach($as as $a){
-
- $sublinks[] = [
- "title" =>
- $this->fuckhtml
- ->getTextContent(
- $a
- ),
- "description" => null,
- "date" => null,
- "url" =>
- $this->unshiturl(
- $a["attributes"]["href"]
- )
- ];
- }
- }
-
- $out["web"][] = [
- "title" => $title,
- "description" => $description,
- "url" => $this->unshiturl($link),
- "date" => $time,
- "type" => "web",
- "thumb" => $thumb,
- "sublink" => $sublinks,
- "table" => []
- ];
- continue;
- }
-
- // probe for containers with a title header
- $title_header =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle([
- "display" => "flex",
- "flex-wrap" => "wrap",
- "position" => "relative",
- "padding" => "16px"
- ])
- );
-
- if(count($title_header) !== 0){
-
- $title_header =
- strtolower(
- $this->fuckhtml
- ->getTextContent(
- $title_header[0]
- )
- );
-
- switch($title_header){
-
- case "people also search for":
- // get all related searches
- $relateds =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle([
- "display" => "flex",
- "height" => "100%",
- "flex-direction" => "column",
- "max-width" => "100%"
- ])
- );
-
- foreach($relateds as $r){
-
- $out["related"][] =
- $this->fuckhtml
- ->getTextContent(
- $r
- );
- }
- break;
- }
-
- continue;
- }
- }
-
- $out["related"] = array_values(array_unique($out["related"]));
-
- return $out;
+ throw new Exception("There are no known ways to scrape Google's /search endpoint without JS at this time. I'm working on a method that extracts cookies from browsers. Use Google API/CSE/Yahoo JP/Startpage for google results for now.");
}
public function video($get){
-
- if($get["npt"]){
-
- [$params, $proxy] = $this->backend->get($get["npt"], "video");
- $params = json_decode($params, true);
-
- }else{
- $search = $get["s"];
- $country = $get["country"];
- $nsfw = $get["nsfw"];
- $older = $get["older"];
- $newer = $get["newer"];
- $duration = $get["duration"];
- $quality = $get["quality"];
- $captions = $get["captions"];
- $proxy = $this->backend->get_ip();
-
- $params = [
- "q" => $search,
- "udm" => "7",
- "hl" => "en",
- "num" => 20
- ];
-
- // country
- if($country != "any"){
-
- $params["gl"] = $country;
- }
-
- // nsfw
- $params["safe"] = $nsfw == "yes" ? "off" : "active";
-
- $tbs = [];
-
- // get date
- $older = $older === false ? null : date("m/d/Y", $older);
- $newer = $newer === false ? null : date("m/d/Y", $newer);
-
- if(
- $older !== null ||
- $newer !== null
- ){
-
- $tbs["cdr"] = "1";
- $tbs["cd_min"] = $newer;
- $tbs["cd_max"] = $older;
- }
-
- // duration
- if($duration != "any"){
-
- $tbs[] = "dur:" . $duration;
- }
-
- // quality
- if($quality != "any"){
-
- $tbs[] = "hq:" . $quality;
- }
-
- // captions
- if($captions != "any"){
-
- $tbs[] = "cc:" . $captions;
- }
-
- // append tbs
- if(count($tbs) !== 0){
-
- $params["tbs"] =
- implode(",", $tbs);
- }
- }
-
- try{
- $html =
- $this->get(
- $proxy,
- "https://www.google.com/search",
- $params
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
-
- if(!isset($params["start"])){
-
- $params["start"] = 0;
- }
- $params["start"] += 20;
-
- $this->fuckhtml->load($html);
-
- //
- // Parse web video page
- //
- $this->detect_sorry();
-
- // parse all <style> tags
- $this->parsestyles();
-
- // get javascript images
- $this->scrape_dimg($html);
-
- $this->scrape_imagearr($html);
-
- $out = [
- "status" => "ok",
- "npt" =>
- $this->backend->store(
- json_encode($params),
- "videos",
- $proxy
- ),
- "video" => [],
- "author" => [],
- "livestream" => [],
- "playlist" => [],
- "reel" => []
- ];
-
- $search_div =
- $this->fuckhtml
- ->getElementById(
- "center_col"
- );
-
- if($search_div === false){
-
- throw new Exception("Failed to grep search div");
- }
-
- $this->fuckhtml->load($search_div);
-
- $results =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle([
- "margin" => "0px 0px 30px"
- ]),
- "div"
- );
-
- foreach($results as $result){
-
- $this->fuckhtml->load($result);
-
- $url =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($url) === 0){
-
- // no url, weird, continue
- continue;
- }
-
- $title =
- $this->fuckhtml
- ->getElementsByTagName(
- "h3"
- );
-
- if(count($title) === 0){
-
- // no title, weird, continue
- continue;
- }
-
- // get description
- $description =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle([
- "-webkit-box-orient" => "vertical",
- "display" => "-webkit-box",
- "-webkit-line-clamp" => "2",
- "overflow" => "hidden",
- "word-break" => "break-word"
- ]),
- "div"
- );
-
- if(count($description) === 0){
-
- $description = null;
- }else{
-
- $description =
- html_entity_decode(
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
- )
- );
- }
-
- // get author + date posted
- $metadiv =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle([
- "margin-top" => "12px"
- ]),
- "div"
- );
-
- $author = null;
- $date = null;
-
- if(count($metadiv) !== 0){
-
- $metadiv =
- explode(
- "·",
- $this->fuckhtml
- ->getTextContent(
- $metadiv[0]
- )
- );
-
- if(count($metadiv) === 3){
-
- $author = trim($metadiv[1]);
- $date = strtotime(trim($metadiv[2]));
- }elseif(count($metadiv) === 2){
-
- $author = trim($metadiv[0]);
- $date = strtotime(trim($metadiv[1]));
- }
- }
-
- $thumb = [
- "url" => null,
- "ratio" => null
- ];
-
- $image =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- $duration = null;
-
- if(
- count($image) !== 0 &&
- isset($image[0]["attributes"]["id"])
- ){
-
- $thumb = [
- "url" => $this->getdimg($image[0]["attributes"]["id"]),
- "ratio" => "16:9"
- ];
-
- // get duration
- $duration =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle([
- "background-color" => "rgba(0,0,0,0.6)",
- "color" => "#fff",
- "fill" => "#fff"
- ])
- );
-
- if(count($duration) !== 0){
-
- $duration =
- $this->hms2int(
- $this->fuckhtml
- ->getTextContent(
- $duration[0]
- ));
- }else{
-
- $duration = null;
- }
- }
-
- $out["video"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $title[0]
- )
- ),
- "description" => $description,
- "author" => [
- "name" => $author,
- "url" => null,
- "avatar" => null
- ],
- "date" => $date,
- "duration" => $duration,
- "views" => null,
- "thumb" => $thumb,
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $url[0]["attributes"]["href"]
- )
- ];
- }
-
- return $out;
+ throw new Exception("There are no known ways to scrape Google's /search endpoint without JS at this time. I'm working on a method that extracts cookies from browsers. Use Google API/CSE/Yahoo JP/Startpage for google results for now.");
}
public function news($get){
-
- throw new Exception("Broke for now, fuck off lol");
-
- if($get["npt"]){
-
- [$req, $proxy] = $this->backend->get($get["npt"], "news");
- /*parse_str(
- parse_url($req, PHP_URL_QUERY),
- $search
- );*/
-
- try{
-
- $html =
- $this->get(
- $proxy,
- "https://www.google.com" . $req,
- []
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
-
- }else{
- $search = $get["s"];
- $country = $get["country"];
- $nsfw = $get["nsfw"];
- $older = $get["older"];
- $newer = $get["newer"];
- $sort = $get["sort"];
- $proxy = $this->backend->get_ip();
-
- $params = [
- "q" => $search,
- "tbm" => "nws",
- "hl" => "en",
- "num" => "20"
- ];
-
- // country
- if($country != "any"){
-
- $params["gl"] = $country;
- }
-
- // nsfw
- $params["safe"] = $nsfw == "yes" ? "off" : "active";
-
- $tbs = [];
-
- // get date
- $older = $older === false ? null : date("m/d/Y", $older);
- $newer = $newer === false ? null : date("m/d/Y", $newer);
-
- if(
- $older !== null ||
- $newer !== null
- ){
-
- $tbs["cdr"] = "1";
- $tbs["cd_min"] = $newer;
- $tbs["cd_max"] = $older;
- }
-
- // relevance
- if($sort == "date"){
-
- $tbs["sbd"] = "1";
- }
-
- // append tbs
- if(count($tbs) !== 0){
-
- $params["tbs"] = "";
-
- foreach($tbs as $key => $value){
-
- $params["tbs"] .= $key . ":" . $value . ",";
- }
-
- $params["tbs"] = rtrim($params["tbs"], ",");
- }
-
- //$html = file_get_contents("scraper/google-news.html");
-
- $html =
- $this->get(
- $proxy,
- "https://www.google.com/search",
- $params
- );
- }
-
- $out = [
- "status" => "ok",
- "npt" => null,
- "news" => []
- ];
-
- $this->fuckhtml->load($html);
-
- $this->detect_sorry();
-
- // get images
- $this->scrape_dimg($html);
-
- // parse styles
- $this->parsestyles();
-
- $center_col =
- $this->fuckhtml
- ->getElementById(
- "center_col",
- "div"
- );
-
- if($center_col === null){
-
- throw new Exception("Could not grep result div");
- }
-
- $this->fuckhtml->load($center_col);
-
- // get next page
- $npt =
- $this->fuckhtml
- ->getElementById(
- "pnnext",
- "a"
- );
-
- if($npt !== false){
-
- $out["npt"] =
- $this->backend->store(
- $this->fuckhtml
- ->getTextContent(
- $npt["attributes"]
- ["href"]
- ),
- "news",
- $proxy
- );
- }
-
- $as =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "jsname",
- "a"
- );
-
- foreach($as as $a){
-
- $this->fuckhtml->load($a);
-
- // get title
- $title =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "role",
- "heading",
- "div"
- );
-
- if(count($title) === 0){
-
- continue;
- }
-
- $title =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $title[0]
- )
- );
-
- // get thumbnail
- $image =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "id",
- "img"
- );
-
- // check for padded title node, if found, we're inside a carousel
- $probe =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "padding" => "16px 16px 40px 16px"
- ]
- ),
- "div"
- );
-
- if(count($probe) !== 0){
-
- $probe = true;
- }else{
-
- $probe = false;
- }
-
- if(
- count($image) !== 0 &&
- !isset($image[0]["attributes"]["width"])
- ){
-
- $thumb = [
- "url" =>
- $this->getdimg(
- $image[0]["attributes"]["id"]
- ),
- "ratio" => $probe === true ? "16:9" : "1:1"
- ];
- }else{
-
- $thumb = [
- "url" => null,
- "ratio" => null
- ];
- }
-
- $description = null;
-
- if($probe === false){
-
- $desc_divs =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "style",
- "div"
- );
-
- foreach($desc_divs as $desc){
-
- if(
- strpos(
- $desc["attributes"]["style"],
- "margin-top:"
- ) !== false
- ){
-
- $description =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $desc
- )
- );
- break;
- }
- }
- }
-
- // get author
- $author =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "overflow" => "hidden",
- "text-align" => "left",
- "text-overflow" => "ellipsis",
- "white-space" => "nowrap",
- "margin-bottom" => "8px"
- ]
- ),
- "div"
- );
-
- if(count($author) !== 0){
-
- $author =
- $this->fuckhtml
- ->getTextContent(
- $author[0]
- );
- }else{
-
- $author = null;
- }
-
- // get date
- $date = null;
-
- $date_div =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "style",
- "div"
- );
-
- foreach($date_div as $d){
-
- $this->fuckhtml->load($d);
-
- $span =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- if(
- strpos(
- $d["attributes"]["style"],
- "bottom:"
- ) !== false
- ){
-
- $date =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $span[count($span) - 1]
- )
- );
- break;
- }
- }
-
- $out["news"][] = [
- "title" => $title,
- "author" => $author,
- "description" => $description,
- "date" => $date,
- "thumb" => $thumb,
- "url" =>
- $this->unshiturl(
- $a["attributes"]
- ["href"]
- )
- ];
- }
-
- return $out;
+ throw new Exception("There are no known ways to scrape Google's /search endpoint without JS at this time. I'm working on a method that extracts cookies from browsers. Use Google API/CSE/Yahoo JP/Startpage for google results for now.");
}
@@ -1730,6 +599,11 @@ class google{
);
$params = json_decode($params, true);
+
+ $page = $params["page"] + 1;
+ $params = $params["params"];
+ $params["async"] = "_fmt:json,p:1,ijn:{$page}";
+
}else{
$search = $get["s"];
@@ -1749,9 +623,13 @@ class google{
$format = $get["format"];
$rights = $get["rights"];
+ $page = 0;
+
$params = [
"q" => $search,
- "udm" => "2" // get images
+ "tbm" => "isch",
+ "asearch" => "isch",
+ "async" => "_fmt:json,p:1,ijn:{$page}", // ijn:0 = page 1
];
// country (image search uses cr instead of gl)
@@ -1834,13 +712,9 @@ class google{
$params["tbs"] = rtrim($params["tbs"], ",");
}
}
- /*
- $handle = fopen("scraper/page.html", "r");
- $html = fread($handle, filesize("scraper/page.html"));
- fclose($handle);*/
try{
- $html =
+ $json =
$this->get(
$proxy,
"https://www.google.com/search",
@@ -1851,12 +725,28 @@ class google{
throw new Exception("Failed to get search page");
}
- $this->fuckhtml->load($html);
+ unset($params["async"]);
+
+ //$json = file_get_contents("scraper/google.json");
+ // detect captcha
+ $this->fuckhtml->load($json);
$this->detect_sorry();
- // get javascript images
- $this->scrape_imagearr($html);
+ // remove xssi
+ $json =
+ preg_replace(
+ '/^[^{]*/',
+ "",
+ $json
+ );
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
$out = [
"status" => "ok",
@@ -1864,69 +754,42 @@ class google{
"image" => []
];
- $images =
- $this->fuckhtml
- ->getElementsByClassName(
- "ivg-i",
- "div"
- );
-
- foreach($images as $div){
-
- $this->fuckhtml->load($div);
-
- $image =
- $this->fuckhtml
- ->getElementsByTagName("img")[0];
+ if(!isset($json["ischj"]["metadata"])){
- // make sure we dont attempt to show an image we dont have data for
- if(
- isset($div["attributes"]["data-docid"]) &&
- isset($this->image_arr[$div["attributes"]["data-docid"]])
- ){
-
- $source =
- $this->image_arr[
- $div["attributes"]["data-docid"]
- ];
- }else{
-
- continue;
- }
+ throw new Exception("Google did not return an image array");
+ }
+
+ foreach($json["ischj"]["metadata"] as $image){
$out["image"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $image["attributes"]["alt"]
- )
- ),
- "source" => $source,
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $div["attributes"]["data-lpage"]
- )
+ "title" => $this->titledots($image["result"]["page_title"]),
+ "source" => [
+ [
+ "url" => $image["original_image"]["url"],
+ "width" => (int)$image["original_image"]["width"],
+ "height" => (int)$image["original_image"]["height"]
+ ],
+ [
+ "url" => $image["thumbnail"]["url"],
+ "width" => (int)$image["thumbnail"]["width"],
+ "height" => (int)$image["thumbnail"]["height"]
+ ]
+ ],
+ "url" => $image["result"]["referrer_url"]
];
}
- // as usual, no way to check if there is a next page reliably
- if(count($out["image"]) > 50){
-
- if(!isset($params["start"])){
-
- $params["start"] = 10;
- }else{
-
- $params["start"] += 10;
- }
+ $page++;
+
+ if(count($out["image"]) === 10){
$out["npt"] =
- $this->backend
- ->store(
- json_encode($params),
- "image",
+ $this->backend->store(
+ json_encode([
+ "params" => $params,
+ "page" => $page
+ ]),
+ "images",
$proxy
);
}
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage