aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2026-04-25 22:58:28 -0400
committerlolcat <will@lolcat.ca>2026-04-25 22:58:28 -0400
commite63a17d6dbc474a8220b651f61353d871b2f9674 (patch)
tree8d1c37dad8de68855abf035154aceca6bd9575e3
parent4349bf232d93fa1f868668f50b948bd9b90dff47 (diff)
added google api image scraper
-rw-r--r--lib/frontend.php67
-rw-r--r--scraper/google_api.php294
-rw-r--r--settings.php4
3 files changed, 282 insertions, 83 deletions
diff --git a/lib/frontend.php b/lib/frontend.php
index 7cf4e24..d3ff0c3 100644
--- a/lib/frontend.php
+++ b/lib/frontend.php
@@ -2,6 +2,52 @@
class frontend{
+ public function validateurl($url, $net_validate = false){
+
+ $url_parts = parse_url($url);
+
+ // check if required parts are there
+ if(
+ !isset($url_parts["scheme"]) ||
+ !(
+ $url_parts["scheme"] == "http" ||
+ $url_parts["scheme"] == "https"
+ ) ||
+ !isset($url_parts["host"])
+ ){
+ return false;
+ }
+
+ if($net_validate){
+ $ip =
+ str_replace(
+ ["[", "]"], // handle ipv6
+ "",
+ $url_parts["host"]
+ );
+
+ // if its not an IP
+ if(!filter_var($ip, FILTER_VALIDATE_IP)){
+
+ // resolve domain's IP
+ $ip = gethostbyname($url_parts["host"] . ".");
+ }
+
+ // check if its localhost
+ if(
+ filter_var(
+ $ip,
+ FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
+ ) === false
+ ){
+
+ return false;
+ }
+ }
+
+ return true;
+ }
+
public function load($template, $replacements = []){
$replacements["server_name"] = htmlspecialchars(config::SERVER_NAME);
@@ -622,6 +668,7 @@ class frontend{
"yandex" => "Yandex",
"brave" => "Brave",
"google" => "Google",
+ "google_api" => "Google API",
"google_cse" => "Google CSE",
"yahoo_japan" => "Yahoo! JAPAN",
"startpage" => "Startpage",
@@ -638,8 +685,7 @@ class frontend{
"fivehpx" => "500px",
"vsco" => "VSCO",
"imgur" => "Imgur",
- "ftm" => "FindThatMeme",
- //"sankakucomplex" => "SankakuComplex"
+ "ftm" => "FindThatMeme"
]
];
break;
@@ -695,6 +741,22 @@ class frontend{
]
];
break;
+
+ case "booru":
+ $filters["scraper"] = [
+ "display" => "Scraper",
+ "option" => [
+ "safebooru" => "Safebooru",
+ "konachan" => "Konachan",
+ "tbib" => "The Big Imageboard",
+ "gelbooru" => "Gelbooru",
+ "yandere" => "Yande.re",
+ "tbib" => "The Big Imageboard",
+ "sankakucomplex" => "SankakuComplex",
+ "soybooru" => "SoyBooru"
+ ]
+ ];
+ break;
}
// get scraper name from user input, or default out to preferred scraper
@@ -871,6 +933,7 @@ class frontend{
$html = null;
+ //foreach(["web", "images", "videos", "news", "music", "booru"] as $type){
foreach(["web", "images", "videos", "news", "music"] as $type){
$html .= '<a href="/' . $type . '?s=' . urlencode($query);
diff --git a/scraper/google_api.php b/scraper/google_api.php
index bb2cacf..a3863ca 100644
--- a/scraper/google_api.php
+++ b/scraper/google_api.php
@@ -264,6 +264,25 @@ class google_api{
"yes" => "Yes", // safe=active
"no" => "No" // safe=off
]
+ ],
+ "sort" => [ // sort
+ "display" => "Sort by",
+ "option" => [
+ "any" => "Any order",
+ "date:d" => "Oldest",
+ "date:a" => "Newest"
+ ]
+ ],
+ "newer" => [
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "rm_dupes" => [ // filter
+ "display" => "Remove duplicates",
+ "option" => [
+ "yes" => "Yes", // 1
+ "no" => "No" // 0
+ ]
]
];
@@ -313,86 +332,52 @@ class google_api{
"zh-CN" => "Chinese (Simplified)",
"zh-TW" => "Chinese (Traditional)"
]
- ],
- "sort" => [
- "display" => "Sort by",
- "option" => [
- "any" => "Any order",
- "date:d" => "Oldest",
- "date:a" => "Newest"
- ]
- ],
- "newer" => [
- "display" => "Newer than",
- "option" => "_DATE"
- ],
- "rm_dupes" => [
- "display" => "Remove duplicates",
- "option" => [
- "yes" => "Yes",
- "no" => "No"
- ]
]
]
);
break;
- /*
+
case "images":
return array_merge(
$base,
[
- "time" => [ // tbs=qdr:<time>
- "display" => "Time posted",
- "option" => [
- "any" => "Any time",
- "d" => "Past 24 hours",
- "w" => "Past week",
- "m" => "Past month",
- "y" => "Past year"
- ]
- ],
- "size" => [ // imgsz
+ "size" => [ // imgSize
"display" => "Size",
"option" => [
"any" => "Any size",
- "l" => "Large",
- "m" => "Medium",
- "i" => "Icon",
- "qsvga" => "Larger than 400x300",
- "vga" => "Larger than 640x480",
- "svga" => "Larger than 800x600",
- "xga" => "Larger than 1024x768",
- "2mp" => "Larger than 2MP",
- "4mp" => "Larger than 4MP",
- "6mp" => "Larger than 6MP",
- "8mp" => "Larger than 8MP",
- "10mp" => "Larger than 10MP",
- "12mp" => "Larger than 12MP",
- "15mp" => "Larger than 15MP",
- "20mp" => "Larger than 20MP",
- "40mp" => "Larger than 40MP",
- "70mp" => "Larger than 70MP"
+ "icon" => "Icon",
+ "small" => "Small",
+ "medium" => "Medium",
+ "large" => "Large",
+ "xlarge" => "X-Large",
+ "xxlarge" => "XX-Large",
+ "huge" => "Huge"
]
],
- "ratio" => [ // imgar
- "display" => "Aspect ratio",
+ "format" => [ // fileType
+ "display" => "Format",
"option" => [
- "any" => "Any ratio",
- "t|xt" => "Tall",
- "s" => "Square",
- "w" => "Wide",
- "xw" => "Panoramic"
+ "any" => "Any format",
+ "jpg" => "JPG",
+ "gif" => "GIF",
+ "png" => "PNG",
+ "bmp" => "BMP",
+ "svg" => "SVG",
+ "webp" => "WEBP",
+ "ico" => "ICO",
+ "craw" => "RAW"
]
],
- "color" => [ // imgc
+ "color" => [
"display" => "Color",
"option" => [
"any" => "Any color",
- "color" => "Full color",
- "bnw" => "Black & white",
- "trans" => "Transparent",
- // from here, imgcolor
- "red" => "Red",
+
+ "color" => "Full color", // imgColorType
+ "mono" => "Black & White",
+ "trans" => "Transparent background",
+
+ "red" => "Red", // imgDominantColor
"orange" => "Orange",
"yellow" => "Yellow",
"green" => "Green",
@@ -406,40 +391,32 @@ class google_api{
"brown" => "Brown"
]
],
- "type" => [ // tbs=itp:<type>
+ "type" => [ // imgType
"display" => "Type",
"option" => [
"any" => "Any type",
"clipart" => "Clip Art",
+ "face" => "Faces",
"lineart" => "Line Drawing",
- "animated" => "Animated"
+ "stock" => "Stock photos",
+ "photo" => "Photos",
+ "animated" => "Animated",
]
],
- "format" => [ // as_filetype
- "display" => "Format",
- "option" => [
- "any" => "Any format",
- "jpg" => "JPG",
- "gif" => "GIF",
- "png" => "PNG",
- "bmp" => "BMP",
- "svg" => "SVG",
- "webp" => "WEBP",
- "ico" => "ICO",
- "craw" => "RAW"
- ]
- ],
- "rights" => [ // tbs=sur:<rights>
+ "rights" => [ // rights
"display" => "Usage rights",
"option" => [
"any" => "Any license",
- "cl" => "Creative Commons licenses",
- "ol" => "Commercial & other licenses"
+ "cc_publicdomain" => "Public domain",
+ "cc_attribute" => "Attribution required",
+ "cc_sharealike" => "Sharealike",
+ "cc_noncommercial" => "Non-commercial use only",
+ "cc_nonderived" => "Original works"
]
]
]
);
- break;*/
+ break;
}
}
@@ -485,6 +462,7 @@ class google_api{
return $data;
}
+
public function web($get){
// rotate proxy + key on EVERY request
@@ -731,6 +709,160 @@ class google_api{
return $out;
}
+
+ public function image($get){
+
+ // rotate proxy + key on EVERY request
+ $keydata = $this->backend->get_key();
+ $proxy = $this->backend->get_ip($keydata["increment"]);
+
+ if($get["npt"]){
+
+ // $p is never used
+ [$params, $p] = $this->backend->get(
+ $get["npt"],
+ "web"
+ );
+
+ $params = json_decode($params, true);
+
+ $params["key"] = $keydata["key"];
+
+ }else{
+
+ //$json = file_get_contents("scraper/google.json");
+ $params = [
+ "q" => $get["s"],
+ "cx" => config::GOOGLE_CX_ENDPOINT,
+ "num" => 10,
+ "start" => 1,
+ "searchType" => "image",
+ "key" => $keydata["key"]
+ ];
+
+ //
+ // parse filters
+ //
+ if($get["newer"] !== false){
+
+ $params["dateRestrict"] = "d" . (round((time() - $get["newer"]) / 100000));
+ }
+
+ if($get["rm_dupes"] == "no"){ $params["filter"] = "0"; }
+ if($get["country"] != "any"){ $params["gl"] = $get["country"]; }
+
+ if($get["nsfw"] == "yes"){
+
+ $params["safe"] = "off";
+ }else{
+
+ $params["safe"] = "active";
+ }
+
+ if($get["sort"] != "any"){ $params["sort"] = $get["sort"]; }
+
+ // image filters
+ if($get["size"] != "any"){ $params["imgSize"] = $get["size"]; }
+ if($get["format"] != "any"){ $params["fileType"] = $get["format"]; }
+
+ switch($get["color"]){
+
+ case "any":
+ break;
+
+ case "color":
+ case "mono":
+ case "trans":
+ $params["imgColorType"] = $get["color"];
+ break;
+
+ default:
+ $params["imgDominantColor"] = $get["color"];
+ break;
+ }
+
+ if($get["type"] != "any"){ $params["imgType"] = $get["type"]; }
+ if($get["rights"] != "any"){ $params["rights"] = $get["rights"]; }
+ }
+
+ try{
+ $json =
+ $this->get(
+ $proxy,
+ "https://www.googleapis.com/customsearch/v1",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ if(isset($json["error"]["message"])){
+
+ throw new Exception(
+ "API returned an error: " .
+ $json["error"]["message"] .
+ " (key #" . $keydata["increment"] . ")"
+ );
+ }
+
+ if(!isset($json["items"])){
+
+ // google just doesnt return items when theres no results
+ return $out;
+ }
+
+ foreach($json["items"] as $image){
+
+ $out["image"][] = [
+ "title" => $this->titledots($image["title"]),
+ "source" => [
+ [
+ "url" => $image["link"],
+ "width" => (int)$image["image"]["width"],
+ "height" => (int)$image["image"]["height"]
+ ],
+ [
+ "url" => $image["image"]["thumbnailLink"],
+ "width" => (int)$image["image"]["thumbnailWidth"],
+ "height" => (int)$image["image"]["thumbnailHeight"]
+ ]
+ ],
+ "url" => $image["image"]["contextLink"]
+ ];
+ }
+
+ // get npt
+ if(isset($json["queries"]["nextPage"][0]["startIndex"])){
+
+ unset($params["key"]);
+ $params["start"] = (int)$json["queries"]["nextPage"][0]["startIndex"];
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($params),
+ "web",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+
private function titledots($title){
return trim($title, " .\t\n\r\0\x0B…");
diff --git a/settings.php b/settings.php
index e1b9dbe..b4b8150 100644
--- a/settings.php
+++ b/settings.php
@@ -224,6 +224,10 @@ $settings = [
"text" => "Google"
],
[
+ "value" => "google_api",
+ "text" => "Google API"
+ ],
+ [
"value" => "google_cse",
"text" => "Google CSE"
],
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage