aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2026-05-20 11:01:18 -0400
committerlolcat <will@lolcat.ca>2026-05-20 11:01:18 -0400
commite1e92d715ec22570f1bdd5a4cdf6ceecb5426428 (patch)
tree683a358e97afb7dddb82994096e5c1f33a38b40f
parent394f401921e90935ebe4374c563c384d2f7c104b (diff)
add support for yep api
-rw-r--r--data/api_keys/yep.txt1
-rw-r--r--data/config.php26
-rw-r--r--scraper/yep.php163
3 files changed, 168 insertions, 22 deletions
diff --git a/data/api_keys/yep.txt b/data/api_keys/yep.txt
new file mode 100644
index 0000000..80f6458
--- /dev/null
+++ b/data/api_keys/yep.txt
@@ -0,0 +1 @@
+# Paste Yep API keys here
diff --git a/data/config.php b/data/config.php
index 161458f..c9b9fb3 100644
--- a/data/config.php
+++ b/data/config.php
@@ -24,6 +24,16 @@ class config{
const API_ENABLED = true;
//
+ // 4play (session provider)
+ //
+ // Enable 4play API?
+ const FPLAY_ENABLE_API = true;
+
+ // 4play password. Please set this to something secure if you enable the 4play API.
+ // This password is used to POST sessions to /api/v2/provide_sesh
+ const FPLAY_PASSWORD = "1234";
+
+ //
// BOT PROTECTION
//
@@ -118,10 +128,10 @@ class config{
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
// Changing this might break things.
- const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:149.0) Gecko/20100101 Firefox/149.0";
+ const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:151.0) Gecko/20100101 Firefox/151.0";
// User agent to use with 4get-friendly APIs
- const USER_AGENT_FRIENDLY = "4get-scrapist";
+ const USER_AGENT_FRIENDLY = "4get-scrapist (+https://4get.ca)";
// Proxy pool assignments for each scraper
// false = Use server's raw IP
@@ -131,7 +141,6 @@ class config{
const PROXY_YAHOO = false;
const PROXY_YAHOO_JAPAN = false;
const PROXY_BRAVE = false;
- const PROXY_FB = false; // facebook
const PROXY_GOOGLE = false;
const PROXY_GOOGLE_API = false;
const PROXY_GOOGLE_CSE = false;
@@ -155,7 +164,6 @@ class config{
const PROXY_VIMEO = false;
const PROXY_YEP = false;
const PROXY_PINTEREST = false;
- const PROXY_SANKAKUCOMPLEX = false;
const PROXY_FLICKR = false;
const PROXY_PIXABAY = false;
const PROXY_UNSPLASH = false;
@@ -164,8 +172,6 @@ class config{
const PROXY_VSCO = false;
const PROXY_SEZNAM = false;
const PROXY_NAVER = false;
- const PROXY_GREPPR = false;
- const PROXY_CROWDVIEW = false;
const PROXY_MWMBL = false;
const PROXY_FTM = false; // findthatmeme
const PROXY_IMGUR = false;
@@ -173,6 +179,11 @@ class config{
const PROXY_YANDEX_W = false; // yandex web
const PROXY_YANDEX_I = false; // yandex images
const PROXY_YANDEX_V = false; // yandex videos
+ const PROXY_SAFEBOORU = false;
+ const PROXY_KONACHAN = false;
+ const PROXY_YANDERE = false;
+ const PROXY_TBIB = false;
+ const PROXY_GELBOORU = false;
//
// Scraper-specific parameters
@@ -185,4 +196,7 @@ class config{
// Use "null" to default out to HTML scraping OR specify a string to
// use the API (Eg: "public"). API has less filters.
const MARGINALIA_API_KEY = null;
+
+ // Yep
+ const YEP_USE_API = false;
}
diff --git a/scraper/yep.php b/scraper/yep.php
index ad6a4b0..c1855fb 100644
--- a/scraper/yep.php
+++ b/scraper/yep.php
@@ -216,7 +216,7 @@ class yep{
];
}
- private function get($proxy, $url, $get = []){
+ private function get($proxy, $url, $get = [], $use_api = false, $post_data = null, $bearer = null){
$curlproc = curl_init();
@@ -231,21 +231,37 @@ class yep{
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
- curl_setopt($curlproc, CURLOPT_HTTPHEADER,
- ["User-Agent: " . config::USER_AGENT,
- "Accept: */*",
- "Accept-Language: en-US,en;q=0.5",
- "Accept-Encoding: gzip, deflate, br, zstd",
- "Referer: https://yep.com/",
- "Origin: https://yep.com",
- "DNT: 1",
- "Connection: keep-alive",
- "Sec-Fetch-Dest: empty",
- "Sec-Fetch-Mode: cors",
- "Sec-Fetch-Site: same-site",
- "Priority: u=4",
- "TE: trailers"]
- );
+
+ if($use_api){
+
+ $post_data = json_encode($post_data);
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["Content-Type: application/json",
+ "Authorization: Bearer $bearer",
+ "Content-Length: " . strlen($post_data)]
+ );
+
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt($curlproc, CURLOPT_POSTFIELDS, $post_data);
+ }else{
+
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "Referer: https://yep.com/",
+ "Origin: https://yep.com",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-site",
+ "Priority: u=4",
+ "TE: trailers"]
+ );
+ }
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
@@ -270,6 +286,11 @@ class yep{
public function web($get){
+ if(config::YEP_USE_API){
+
+ return $this->web_api($get);
+ }
+
$search = $get["s"];
if(strlen($search) === 0){
@@ -392,6 +413,116 @@ class yep{
}
+ private function web_api($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ // parse filters
+ $filters = [
+ "query" => $search,
+ "limit" => 100
+ ];
+
+ if($get["nsfw"] == "no"){ $filters["safe_search"] = true; }
+ if($get["lang"] != "any"){ $filters["language"] = [ $get["lang"] ]; }
+
+ // add api key
+ $key_data = $this->backend->get_key();
+
+ print_r($filters);
+
+ try{
+
+ $json =
+ $this->get(
+ $this->backend->get_ip($key_data["increment"]),
+ "https://platform.yep.com/api/search",
+ [],
+ true,
+ $filters,
+ $key_data["key"]
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ // should never happen
+ //$this->detect_cf($json);
+
+ $json = json_decode($json, true);
+ //$json = json_decode(file_get_contents("scraper/yep.json"), true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ if(isset($json["error"])){
+
+ throw new Exception("Yep API returned an error: " . $json["error"]);
+ }
+
+ if(isset($json["errors"])){
+
+ throw new Exception("Yep API returned the following errors: {$json["message"]}");
+ }
+
+ if(
+ isset($json["success"]) &&
+ $json["success"] !== true
+ ){
+
+ throw new Exception("Yep API returned a false-y success value");
+ }
+
+ if(!isset($json["results"])){
+
+ throw new Exception("Yep API did not return a results object");
+ }
+
+ foreach($json["results"] as $item){
+
+ $out["web"][] = [
+ "title" => $item["title"],
+ "description" => $item["description"],
+ "url" => $item["url"],
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+
+
private function detect_cf($payload){
// detect cloudflare page
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage