diff options
| author | lolcat <will@lolcat.ca> | 2026-04-25 22:58:28 -0400 |
|---|---|---|
| committer | lolcat <will@lolcat.ca> | 2026-04-25 22:58:28 -0400 |
| commit | e63a17d6dbc474a8220b651f61353d871b2f9674 (patch) | |
| tree | 8d1c37dad8de68855abf035154aceca6bd9575e3 /scraper/google_api.php | |
| parent | 4349bf232d93fa1f868668f50b948bd9b90dff47 (diff) | |
added google api image scraper
Diffstat (limited to 'scraper/google_api.php')
| -rw-r--r-- | scraper/google_api.php | 294 |
1 files changed, 213 insertions, 81 deletions
diff --git a/scraper/google_api.php b/scraper/google_api.php index bb2cacf..a3863ca 100644 --- a/scraper/google_api.php +++ b/scraper/google_api.php @@ -264,6 +264,25 @@ class google_api{ "yes" => "Yes", // safe=active "no" => "No" // safe=off ] + ], + "sort" => [ // sort + "display" => "Sort by", + "option" => [ + "any" => "Any order", + "date:d" => "Oldest", + "date:a" => "Newest" + ] + ], + "newer" => [ + "display" => "Newer than", + "option" => "_DATE" + ], + "rm_dupes" => [ // filter + "display" => "Remove duplicates", + "option" => [ + "yes" => "Yes", // 1 + "no" => "No" // 0 + ] ] ]; @@ -313,86 +332,52 @@ class google_api{ "zh-CN" => "Chinese (Simplified)", "zh-TW" => "Chinese (Traditional)" ] - ], - "sort" => [ - "display" => "Sort by", - "option" => [ - "any" => "Any order", - "date:d" => "Oldest", - "date:a" => "Newest" - ] - ], - "newer" => [ - "display" => "Newer than", - "option" => "_DATE" - ], - "rm_dupes" => [ - "display" => "Remove duplicates", - "option" => [ - "yes" => "Yes", - "no" => "No" - ] ] ] ); break; - /* + case "images": return array_merge( $base, [ - "time" => [ // tbs=qdr:<time> - "display" => "Time posted", - "option" => [ - "any" => "Any time", - "d" => "Past 24 hours", - "w" => "Past week", - "m" => "Past month", - "y" => "Past year" - ] - ], - "size" => [ // imgsz + "size" => [ // imgSize "display" => "Size", "option" => [ "any" => "Any size", - "l" => "Large", - "m" => "Medium", - "i" => "Icon", - "qsvga" => "Larger than 400x300", - "vga" => "Larger than 640x480", - "svga" => "Larger than 800x600", - "xga" => "Larger than 1024x768", - "2mp" => "Larger than 2MP", - "4mp" => "Larger than 4MP", - "6mp" => "Larger than 6MP", - "8mp" => "Larger than 8MP", - "10mp" => "Larger than 10MP", - "12mp" => "Larger than 12MP", - "15mp" => "Larger than 15MP", - "20mp" => "Larger than 20MP", - "40mp" => "Larger than 40MP", - "70mp" => "Larger than 70MP" + "icon" => "Icon", + "small" => "Small", + "medium" => "Medium", + "large" => "Large", + "xlarge" => "X-Large", + "xxlarge" => "XX-Large", + "huge" => "Huge" ] ], - "ratio" => [ // imgar - "display" => "Aspect ratio", + "format" => [ // fileType + "display" => "Format", "option" => [ - "any" => "Any ratio", - "t|xt" => "Tall", - "s" => "Square", - "w" => "Wide", - "xw" => "Panoramic" + "any" => "Any format", + "jpg" => "JPG", + "gif" => "GIF", + "png" => "PNG", + "bmp" => "BMP", + "svg" => "SVG", + "webp" => "WEBP", + "ico" => "ICO", + "craw" => "RAW" ] ], - "color" => [ // imgc + "color" => [ "display" => "Color", "option" => [ "any" => "Any color", - "color" => "Full color", - "bnw" => "Black & white", - "trans" => "Transparent", - // from here, imgcolor - "red" => "Red", + + "color" => "Full color", // imgColorType + "mono" => "Black & White", + "trans" => "Transparent background", + + "red" => "Red", // imgDominantColor "orange" => "Orange", "yellow" => "Yellow", "green" => "Green", @@ -406,40 +391,32 @@ class google_api{ "brown" => "Brown" ] ], - "type" => [ // tbs=itp:<type> + "type" => [ // imgType "display" => "Type", "option" => [ "any" => "Any type", "clipart" => "Clip Art", + "face" => "Faces", "lineart" => "Line Drawing", - "animated" => "Animated" + "stock" => "Stock photos", + "photo" => "Photos", + "animated" => "Animated", ] ], - "format" => [ // as_filetype - "display" => "Format", - "option" => [ - "any" => "Any format", - "jpg" => "JPG", - "gif" => "GIF", - "png" => "PNG", - "bmp" => "BMP", - "svg" => "SVG", - "webp" => "WEBP", - "ico" => "ICO", - "craw" => "RAW" - ] - ], - "rights" => [ // tbs=sur:<rights> + "rights" => [ // rights "display" => "Usage rights", "option" => [ "any" => "Any license", - "cl" => "Creative Commons licenses", - "ol" => "Commercial & other licenses" + "cc_publicdomain" => "Public domain", + "cc_attribute" => "Attribution required", + "cc_sharealike" => "Sharealike", + "cc_noncommercial" => "Non-commercial use only", + "cc_nonderived" => "Original works" ] ] ] ); - break;*/ + break; } } @@ -485,6 +462,7 @@ class google_api{ return $data; } + public function web($get){ // rotate proxy + key on EVERY request @@ -731,6 +709,160 @@ class google_api{ return $out; } + + public function image($get){ + + // rotate proxy + key on EVERY request + $keydata = $this->backend->get_key(); + $proxy = $this->backend->get_ip($keydata["increment"]); + + if($get["npt"]){ + + // $p is never used + [$params, $p] = $this->backend->get( + $get["npt"], + "web" + ); + + $params = json_decode($params, true); + + $params["key"] = $keydata["key"]; + + }else{ + + //$json = file_get_contents("scraper/google.json"); + $params = [ + "q" => $get["s"], + "cx" => config::GOOGLE_CX_ENDPOINT, + "num" => 10, + "start" => 1, + "searchType" => "image", + "key" => $keydata["key"] + ]; + + // + // parse filters + // + if($get["newer"] !== false){ + + $params["dateRestrict"] = "d" . (round((time() - $get["newer"]) / 100000)); + } + + if($get["rm_dupes"] == "no"){ $params["filter"] = "0"; } + if($get["country"] != "any"){ $params["gl"] = $get["country"]; } + + if($get["nsfw"] == "yes"){ + + $params["safe"] = "off"; + }else{ + + $params["safe"] = "active"; + } + + if($get["sort"] != "any"){ $params["sort"] = $get["sort"]; } + + // image filters + if($get["size"] != "any"){ $params["imgSize"] = $get["size"]; } + if($get["format"] != "any"){ $params["fileType"] = $get["format"]; } + + switch($get["color"]){ + + case "any": + break; + + case "color": + case "mono": + case "trans": + $params["imgColorType"] = $get["color"]; + break; + + default: + $params["imgDominantColor"] = $get["color"]; + break; + } + + if($get["type"] != "any"){ $params["imgType"] = $get["type"]; } + if($get["rights"] != "any"){ $params["rights"] = $get["rights"]; } + } + + try{ + $json = + $this->get( + $proxy, + "https://www.googleapis.com/customsearch/v1", + $params + ); + }catch(Exception $error){ + + throw new Exception("Failed to fetch JSON"); + } + + $json = json_decode($json, true); + + if($json === null){ + + throw new Exception("Failed to decode JSON"); + } + + $out = [ + "status" => "ok", + "npt" => null, + "image" => [] + ]; + + if(isset($json["error"]["message"])){ + + throw new Exception( + "API returned an error: " . + $json["error"]["message"] . + " (key #" . $keydata["increment"] . ")" + ); + } + + if(!isset($json["items"])){ + + // google just doesnt return items when theres no results + return $out; + } + + foreach($json["items"] as $image){ + + $out["image"][] = [ + "title" => $this->titledots($image["title"]), + "source" => [ + [ + "url" => $image["link"], + "width" => (int)$image["image"]["width"], + "height" => (int)$image["image"]["height"] + ], + [ + "url" => $image["image"]["thumbnailLink"], + "width" => (int)$image["image"]["thumbnailWidth"], + "height" => (int)$image["image"]["thumbnailHeight"] + ] + ], + "url" => $image["image"]["contextLink"] + ]; + } + + // get npt + if(isset($json["queries"]["nextPage"][0]["startIndex"])){ + + unset($params["key"]); + $params["start"] = (int)$json["queries"]["nextPage"][0]["startIndex"]; + + $out["npt"] = + $this->backend->store( + json_encode($params), + "web", + $proxy + ); + } + + return $out; + } + + private function titledots($title){ return trim($title, " .\t\n\r\0\x0B…"); |
