aboutsummaryrefslogtreecommitdiffstats
path: root/scraper/unsplash.php
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/unsplash.php')
-rw-r--r--scraper/unsplash.php267
1 files changed, 267 insertions, 0 deletions
diff --git a/scraper/unsplash.php b/scraper/unsplash.php
new file mode 100644
index 0000000..dc57686
--- /dev/null
+++ b/scraper/unsplash.php
@@ -0,0 +1,267 @@
+<?php
+
+class unsplash{
+
+ public function __construct(){
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ include "lib/backend.php";
+ $this->backend = new backend("unsplash");
+ }
+
+ public function getfilters($page){
+
+ return [
+ "order_by" => [
+ "display" => "Order by",
+ "option" => [
+ "relevance" => "Relevance",
+ "latest" => "Newest",
+ "editorial" => "Curated"
+ ]
+ ],
+ "orientation" => [
+ "display" => "Order by",
+ "option" => [
+ "any" => "Any orientation",
+ "landscape" => "Landscape",
+ "portrait" => "Portrait",
+ "squarish" => "Square"
+ ]
+ ],
+ "license" => [
+ "display" => "License",
+ "option" => [
+ "any" => "Any license",
+ "only" => "Unsplash+",
+ "none" => "Free"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = [], $referer){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: */*",
+ "Accept-Language: en-US",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "Referer: {$referer}",
+ "client-geo-region: global",
+ "x-client-version: 8999df28be3f138bf2c646df5d656e4dc6970ba0",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "Priority: u=0",
+ "TE: trailers"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ [$filter, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "images"
+ );
+
+ $filter = json_decode($filter, true);
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ $filter = [
+ "page" => 1,
+ "per_page" => 20,
+ "query" => $search
+ ];
+
+ // add filters
+ if($get["order_by"] != "relevance"){
+
+ $filter["order_by"] = $get["order_by"];
+ }
+
+ if($get["orientation"] != "any"){
+
+ $filter["orientation"] = $get["orientation"];
+ }
+
+ if($get["license"] != "any"){
+
+ $filter["plus"] = $get["license"];
+ }
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ // https://unsplash.com/s/photos/shibuya-night?order_by=latest&orientation=landscape&license=free
+ // https://unsplash.com/s/photos/%3Ctest-%3F!-haha == <test ?! haha
+
+ // ?page=1&per_page=20&query=japan
+
+ try{
+ $json =
+ $this->get(
+ $proxy,
+ "https://unsplash.com/napi/search/photos",
+ $filter,
+ "https://unsplash.com/s/photos/" . rawurlencode(str_replace(" ", "-", $filter["query"])),
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Could not decode the JSON payload");
+ }
+
+ foreach($json["results"] as $image){
+
+ $base = explode("?", $image["urls"]["raw"])[0];
+
+ if(
+ (bool)$image["premium"] ||
+ (bool)$image["plus"]
+ ){
+
+ // when we get "premium" images, give
+ // 1. highest resolution with watermarks
+ // 2. highest resolution without watermarks
+ // (if width of image is above 900px, it has watermarks)
+ // 3. thumbnail without watermark
+ $x900 = $this->imgratio((int)$image["width"], (int)$image["height"], 900);
+ $x500 = $this->imgratio((int)$image["width"], (int)$image["height"], 500);
+
+ $source = [
+ [
+ "url" => $base,
+ "width" => (int)$image["width"],
+ "height" => (int)$image["height"]
+ ],
+ [
+ "url" => $base . "?w=900",
+ "width" => $x900[0],
+ "height" => $x900[1]
+ ],
+ [
+ "url" => $base . "?w=500",
+ "width" => $x500[0],
+ "height" => $x500[1]
+ ]
+ ];
+ }else{
+
+ $x500 = $this->imgratio((int)$image["width"], (int)$image["height"], 500);
+
+ // image is free as in freedom(tm)
+ $source = [
+ [
+ "url" => $base,
+ "width" => (int)$image["width"],
+ "height" => (int)$image["height"]
+ ],
+ [
+ "url" => $base . "?w=500",
+ "width" => $x500[0],
+ "height" => $x500[1]
+ ]
+ ];
+ }
+
+ $title = [];
+
+ $image["description"] = trim($image["description"]);
+ $image["alt_description"] = trim($image["alt_description"]);
+
+ if(!empty($image["description"])){ $title[] = $image["description"]; }
+ if(!empty($image["alt_description"])){ $title[] = $image["alt_description"]; }
+
+ $title = implode(": ", $title);
+
+ $out["image"][] = [
+ "title" => $title,
+ "source" => $source,
+ "url" => "https://unsplash.com/photos/" . $image["slug"]
+ ];
+ }
+
+ // next page stuff
+ if($filter["page"] < (int)$json["total_pages"]){
+
+ $filter["page"]++;
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($filter),
+ "images",
+ $proxy
+ );
+ }
+
+ return $out;
+ }
+
+ private function imgratio($width, $height, $max_width){
+
+ $ratio = $max_width / $width;
+ $new_height = floor($height * $ratio);
+
+ return [
+ $max_width,
+ $new_height
+ ];
+ }
+}
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage