From 883a650f846af7ac60d358d772aa22cbf89fd807 Mon Sep 17 00:00:00 2001 From: lolcat Date: Fri, 9 Aug 2024 10:06:08 -0400 Subject: implement SSL check for botretards --- scraper/mwmbl.php | 2 +- scraper/pinterest.php | 97 +++++++++++++++++++++++++++++++-------------------- 2 files changed, 60 insertions(+), 39 deletions(-) (limited to 'scraper') diff --git a/scraper/mwmbl.php b/scraper/mwmbl.php index f2f8b70..631b90c 100644 --- a/scraper/mwmbl.php +++ b/scraper/mwmbl.php @@ -52,7 +52,7 @@ class mwmbl{ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); - curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); // @todo reset + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); $this->backend->assign_proxy($curlproc, $proxy); diff --git a/scraper/pinterest.php b/scraper/pinterest.php index f3c4439..3787f77 100644 --- a/scraper/pinterest.php +++ b/scraper/pinterest.php @@ -13,7 +13,7 @@ class pinterest{ return []; } - private function get($url, $get = []){ + private function get($proxy, $url, $get = []){ $curlproc = curl_init(); @@ -45,7 +45,7 @@ class pinterest{ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); - $this->proxy->assign_proxy($curlproc); + $this->backend->assign_proxy($curlproc, $proxy); $data = curl_exec($curlproc); @@ -60,45 +60,63 @@ class pinterest{ public function image($get){ - $search = $get["s"]; - - $out = [ - "status" => "ok", - "npt" => null, - "image" => [] - ]; - - $filter = [ - "source_url" => "/search/pins/?q=" . urlencode($search), - "rs" => "typed", - "data" => - json_encode( - [ - "options" => [ - "article" => null, - "applied_filters" => null, - "appliedProductFilters" => "---", - "auto_correction_disabled" => false, - "corpus" => null, - "customized_rerank_type" => null, - "filters" => null, - "query" => $search, - "query_pin_sigs" => null, - "redux_normalize_feed" => true, - "rs" => "typed", - "scope" => "pins", // pins, boards, videos, - "source_id" => null - ], - "context" => [] - ] - ), - "_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1) - ]; + if($get["npt"]){ + + // @TODO + // post data for next page + $data = [ + "source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed", + "data" => + json_encode( + [ + // {"options":{"applied_filters":null,"appliedProductFilters":"---","article":null,"auto_correction_disabled":false,"corpus":null,"customized_rerank_type":null,"domains":null,"filters":null,"journey_depth":null,"page_size":null,"price_max":null,"price_min":null,"query_pin_sigs":null,"query":"higurashi","redux_normalize_feed":true,"rs":"typed","scope":"pins","selected_one_bar_modules":null,"source_id":null,"source_module_id":null,"top_pin_id":null,"bookmarks":["Y2JVSG81V2sxcmNHRlpWM1J5VFVad1ZsWlVRbXhpVmtreVZsZHpOV0pIU2tkV2FscFhVbXhhVkZreU1WSmtNREZWVjIxR1RrMXNTbEJXYlhSaFVtMVdjMVZ1U2xaaWEzQnpXVlJPVTJWV1pISlhhM1JYVm10V05sVldVbE5XVjBwMVVXMUdWVll6VFhoVWJYaFhWMVp3Ums1V1RsTmlSbGt5Vm10YWFtVkdWbkpOU0dSUFZsZG9XRmxzWkc5VlZscHlWbGhrYkdKR1NubFdWelZQWVVaYWRHVkVRbFppUmtwVVZrUktWMlJIVWtWV2JHaHBVakZLU0Zkc1pEUmtNVnBZVW10b2FsSXdXbkJXYlRWRFpHeGFSMWRzVG1oaGVrWllXV3RvVTFVeFpFaFZiRUpoVm5wRk1GbHFSbXRYVjA1R1YyczFWMVpHV2pSWFZtaDNVakZrY2sxWVRsaGlhM0JXV1ZSR1MyRkdiRlZTYm1SVVVteHdXbGxWVlRGVk1VbDVWRmhrVjAxdVVuWlVhMXBTWlVaT2MxcEhSbE5TTWswMVdtdGFWMU5YU2paVmJYaFRUVmhDUjFZeU5YZFVNVkY0VjJ0b1ZXRnJOVlpVVmxwTFVURndXR042VmxOV2ExcGFXVlZWTlZVeFNYZE5WRTVYVWtWYVZGWkhNVTlXTVU1WllVWk9hR1ZyV2s1WFZ6QXhZakpPVjFWWWFHRlNWbkJRVm14U1IwMUdXWGxOVkVKVlRWWnNORll5TURWV1YwVjVWV3hDV21FeGNETmFSVnByVjFkS1IyTkhhR2xYUjJkM1ZtdGFhMlF4VVhsVGJGcE9Wa1p3YjFwWGVFdFZWbFp4VW14YWJGWnRVbHBaTUdoTFZHMUtTR1ZJYUZkV2VrWjJWMVphU21ReVJYcGpSbFpwVW10d1RGZHJVa0pPVms1SFZHNVNUbFl3V2xoVmJYUldaVVpaZUZremFGUk5hM0JYVkZaYVYyRkZNSGxWYkVKYVlrWlZlRnBGV210WFIwNUpVMnMxVTFaR1dscFdWekI0VFVaV1IxTllaR3BUUlhCb1dWUkdWbVZHVm5SbFJuQnNZbFpKTWxSVlVYaFBSVGxGV1hwR1QyVnJSVEZVVlZKT1RrVXhSVkpVUWs5bGJFVXhWRmhzZDFOR1ZsWmtNMFp0VWpGYWIxZFhjRXBsUlRGSVZWaHdUbFl4YTNoVVZWSnFUVVUxV0ZadGFFOVNSVnB6Vkd0a1drMUdiRFpUVkVaT1pXMWplRmRzVWxkaFJuQllWVlJTVDJWdFRqWlVNVkpTWlZad2NWcEhkRTlsYTFwMFZGVlNhMkpWTVZWVFZFcE9Wa1pzTmxkWE1WSk9WVEYwVlcweFVGWXdXVFJXUjNSWFYwZGFRbEJVTVRoUFJHTXhUbnBCTlUxRVRUUk5SRVV3VG5wUk5VMTVjRWhWVlhkeFprUlZlRTlFVVRKWlZHc3lUMWRSTWsxVVVUSk9iVnBvV1RKWmVrNTZXWGhPTWs1cFQwUkZNVTlFVm1sTlZGcHBUV3BTYTFsWFRtcE9SR015VG1wVk5GbHFaR2haVjFacldWUmFiVmxxWkdoYVZGWnFUa1JXT0ZSclZsaG1RVDA5fFVIbzVhRkpYZUc1WFYyUlpWVEpHYkdGNk1XWk5ha1ptVFZSR09FOUVZekZPZWtFMVRVUk5ORTFFUlRCT2VsRTFUWGx3U0ZWVmQzRm1SMWw1VFZSUk1WbDZUVEJhUjFGNVQxZFNhVnB0VlRGT1JFVXdXVlJuZVU1cVRUUk5hbU40VDBSSk1VNXFWVEZOYlZwcVdsUnJlRTFFVVhwWmVsVjNXbXBvYkU1dFJYbE9ha0Y2VDFSSk5VMTZWVEJaYWtJNFZHdFdXR1pCUFQwPXxOb25lfDg3NTcwOTAzODAxNDc0OTMqR1FMKnwzMjM3YjM3ZGNhMGU3YjYyYzYzYzAyZGJkNGU1MjdlNzMyMTExMTNlMmUyMzEyOWM2MDAzYmU1ZTlmZjkwYjAwfE5FV3w="]},"context":{}} + ] + ); + ]; + + }else{ + + $search = $get["s"]; + if(strlen($search) === 0){ + + throw new Exception("Search term is empty!"); + } + + $filter = [ + "source_url" => "/search/pins/?q=" . urlencode($search), + "rs" => "typed", + "data" => + json_encode( + [ + "options" => [ + "article" => null, + "applied_filters" => null, + "appliedProductFilters" => "---", + "auto_correction_disabled" => false, + "corpus" => null, + "customized_rerank_type" => null, + "filters" => null, + "query" => $search, + "query_pin_sigs" => null, + "redux_normalize_feed" => true, + "rs" => "typed", + "scope" => "pins", // pins, boards, videos, + "source_id" => null + ], + "context" => [] + ] + ), + "_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1) + ]; + + $proxy = $this->backend->get_ip(); + } try{ $json = json_decode( $this->get( + $proxy, "https://www.pinterest.ca/resource/BaseSearchResource/get/", $filter ), @@ -115,7 +133,11 @@ class pinterest{ throw new Exception("Failed to decode JSON"); } - //print_r($json); + $out = [ + "status" => "ok", + "npt" => null, + "image" => [] + ]; foreach( $json @@ -189,7 +211,6 @@ class pinterest{ break; case "board": - if(isset($item["cover_pin"]["image_url"])){ $image = [ -- cgit v1.2.3