aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2026-05-02 17:44:53 -0400
committerlolcat <will@lolcat.ca>2026-05-02 17:44:53 -0400
commitd2b0a414ad5a7639bd107ea273dd1a27fc296811 (patch)
treea5eeeb575be52bdfced1e46aaa9e9f8dbb7f1792
parentc713d52b5f76906045a15418a04a8c4a6e15cff3 (diff)
yandex videos fix
-rw-r--r--scraper/yandex.php407
1 files changed, 133 insertions, 274 deletions
diff --git a/scraper/yandex.php b/scraper/yandex.php
index f73c3fd..50ce2fd 100644
--- a/scraper/yandex.php
+++ b/scraper/yandex.php
@@ -868,123 +868,71 @@ class yandex{
if($get["npt"]){
- [$params, $proxy] =
+ [$get, $proxy] =
$this->backend->get(
$get["npt"],
"video"
);
- $params = json_decode($params, true);
-
- $nsfw = $params["nsfw"];
- unset($params["nsfw"]);
+ $get = json_decode($get, true);
}else{
- $search = $get["s"];
- if(strlen($search) === 0){
+ if(strlen($get["s"]) === 0){
throw new Exception("Search term is empty!");
}
$proxy = $this->backend->get_ip();
- $nsfw = $get["nsfw"];
- $time = $get["time"];
- $duration = $get["duration"];
-
- // https://yandex.com/video/search
- // ?tmpl_version=releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63
- // &format=json
- // &request=
- // {
- // "blocks":[
- // {"block":"extra-content","params":{},"version":2},
- // {"block":"i-global__params:ajax","params":{},"version":2},
- // {"block":"search2:ajax","params":{},"version":2},
- // {"block":"vital-incut","params":{},"version":2},
- // {"block":"content_type_search","params":{},"version":2},
- // {"block":"serp-controller","params":{},"version":2},
- // {"block":"cookies_ajax","params":{},"version":2}
- // ],
- // "metadata":{
- // "bundles":{"lb":"^G]!q<X120"},
- // "assets":{"las":"react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"},
- // "extraContent":{"names":["i-react-ajax-adapter"]}
- // }
- // }
- // &yu=4861394161661655015
- // &from=tabbar
- // &reqid=1693106278500184-6825210746979814879-balancer-l7leveler-kubr-yp-sas-7-BAL-4237
- // &suggest_reqid=486139416166165501562797413447032
- // &text=minecraft
-
- $params = [
- "tmpl_version" => "releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63",
- "format" => "json",
- "request" => json_encode([
- "blocks" => [
- (object)[
- "block" => "extra-content",
- "params" => (object)[],
- "version" => 2
- ],
- (object)[
- "block" => "i-global__params:ajax",
- "params" => (object)[],
- "version" => 2
- ],
- (object)[
- "block" => "search2:ajax",
- "params" => (object)[],
- "version" => 2
- ],
- (object)[
- "block" => "vital-incut",
- "params" => (object)[],
- "version" => 2
- ],
- (object)[
- "block" => "content_type_search",
- "params" => (object)[],
- "version" => 2
- ],
- (object)[
- "block" => "serp-controller",
- "params" => (object)[],
- "version" => 2
- ],
- (object)[
- "block" => "cookies_ajax",
- "params" => (object)[],
- "version" => 2
- ]
- ],
- "metadata" => (object)[
- "bundles" => (object)[
- "lb" => "^G]!q<X120"
- ],
- "assets" => (object)[
- "las" => "react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"
- ],
- "extraContent" => (object)[
- "names" => [
- "i-react-ajax-adapter"
- ]
- ]
+ }
+
+ // https://yandex.com/video/search?text=skycamefalling&from=tabbar&format=json&ncrnd=7271&p=0&parent-reqid=&request={%22blocks%22%3A[{%22block%22%3A%22video-app%22%2C%22params%22%3A{}}]}&serpid=1777751040971457-16832445014469941403-balancer-l7leveler-kubr-yp-klg-151-BAL&yu=3091577281773194415&tmpl_version=releases-frontend-video-v1.1816.0__3bdc24e10a8a138a1194877428e220a3ca0dbc5a
+ // https://yandex.com/video/search
+ // ?text=skycamefalling
+ // &from=tabbar
+ // &format=json
+ // &ncrnd=7271
+ // &p=0
+ // &parent-reqid=
+ // &request={%22blocks%22%3A[{%22block%22%3A%22video-app%22%2C%22params%22%3A{}}]} {"blocks":[{"block":"video-app","params":{}}]}
+ // &serpid=1777751040971457-16832445014469941403-balancer-l7leveler-kubr-yp-klg-151-BAL
+ // &yu=3091577281773194415
+ // &tmpl_version=releases-frontend-video-v1.1816.0__3bdc24e10a8a138a1194877428e220a3ca0dbc5a
+
+ $params = [
+ "text" => $get["s"],
+ "from" => "tabbar",
+ "format" => "json",
+ "ncrnd" => 7271,
+ "p" => 0,
+ "parent-reqid" => "",
+ "request" => json_encode((object)[
+ "blocks" => [
+ (object)[
+ "block" => "video-app",
+ "params" => (object)[]
]
- ]),
- "text" => $search
- ];
+ ]
+ ]),
+ "serpid" => "1777751040971457-16832445014469941403-balancer-l7leveler-kubr-yp-klg-151-BAL",
+ "yu" => 3091577281773194415,
+ "tmpl_version" => "releases-frontend-video-v1.1816.0__3bdc24e10a8a138a1194877428e220a3ca0dbc5a"
+ ];
+
+ if(isset($get["p"])){
- if($duration != "any"){
-
- $params["duration"] = $duration;
- }
+ $params["p"] = $get["p"];
+ }
+
+ if($get["duration"] != "any"){
- if($time != "any"){
-
- $params["within"] = $time;
- }
+ $params["duration"] = $get["duration"];
+ }
+
+ if($get["time"] != "any"){
+
+ $params["within"] = $get["time"];
}
+
/*
$handle = fopen("scraper/yandex-video.json", "r");
$json = fread($handle, filesize("scraper/yandex-video.json"));
@@ -996,7 +944,7 @@ class yandex{
$proxy,
"https://yandex.com/video/search",
$params,
- $nsfw,
+ $get["nsfw"],
"yandex_v"
);
}catch(Exception $error){
@@ -1011,7 +959,7 @@ class yandex{
throw new Exception("Could not parse JSON");
}
- if(!isset($json["blocks"])){
+ if(!isset($json["results"]["clips"]["items"])){
throw new Exception("Yandex blocked this 4get instance. Please try again in 7~ minutes.");
}
@@ -1026,209 +974,120 @@ class yandex{
"reel" => []
];
- $html = null;
- foreach($json["blocks"] as $block){
+ foreach($json["results"]["clips"]["items"] as $k => $data){
- if(isset($block["html"])){
+ if(isset($data["preview"]["posterSrc"])){
- $html .= $block["html"];
- }
- }
-
- $this->fuckhtml->load($html);
-
- $div =
- $this->fuckhtml
- ->getElementsByTagName("div");
-
- /*
- Get nextpage
- */
- $npt =
- $this->fuckhtml
- ->getElementsByClassName(
- "more more_direction_next i-bem",
- $div
- );
-
- if(count($npt) !== 0){
-
- $params["p"] = "1";
- $params["nsfw"] = $nsfw;
- $out["npt"] =
- $this->backend->store(
- json_encode($params),
- "video",
- $proxy
- );
- }
-
- $items =
- $this->fuckhtml
- ->getElementsByClassName(
- "serp-item",
- $div
- );
-
- foreach($items as $item){
-
- $data =
- json_decode(
- $this->fuckhtml
- ->getTextContent(
- $item["attributes"]["data-video"]
- ),
- true
- );
-
- $this->fuckhtml->load($item);
-
- $thumb =
- $this->fuckhtml
- ->getElementsByClassName(
- "thumb-image__image",
- "img"
- );
-
- $c = 1;
- if(count($thumb) === 0){
+ $poster = $data["preview"]["posterSrc"];
+
+ if(
+ preg_match(
+ '/^\/\//',
+ $data["preview"]["posterSrc"]
+ )
+ ){
+
+ $poster = "https:" . $poster;
+ }
$thumb = [
- "url" => null,
- "ratio" => null
+ "ratio" => "16:9",
+ "url" => $poster
];
}else{
$thumb = [
- "url" =>
- str_replace(
- "//",
- "https://",
- $this->fuckhtml
- ->getTextContent(
- $thumb
- [0]
- ["attributes"]
- ["src"]
- ),
- $c
- ),
- "ratio" => "16:9"
+ "ratio" => null,
+ "url" => null
];
}
- $smallinfos =
- $this->fuckhtml
- ->getElementsByClassName(
- "serp-item__sitelinks-item",
- "div"
- );
-
- $date = null;
- $views = null;
- $first = true;
-
- foreach($smallinfos as $info){
-
- if($first){
-
- $first = false;
- continue;
- }
-
- $info =
- $this->fuckhtml
- ->getTextContent(
- $info
- );
-
- if($temp_date = strtotime($info)){
-
- $date = $temp_date;
- }else{
-
- $views = $this->parseviews($info);
- }
- }
-
- $description =
- $this->fuckhtml
- ->getElementsByClassName(
- "serp-item__text serp-item__text_visibleText_always",
- "div"
- );
-
- if(count($description) === 0){
-
- $description = null;
- }else{
-
- $description =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
- );
- }
-
$out["video"][] = [
- "title" =>
- $this->fuckhtml
- ->getTextContent(
- $this->titledots(
- $data["title"]
- )
- ),
- "description" => $description,
+ "title" => $data["relatedParams"]["text"],
+ "description" => $this->titledots($data["description"]),
"author" => [
- "name" => null,
- "url" => null,
+ "name" =>
+ isset($json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["name"]) ?
+ $json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["name"] : null,
+ "url" =>
+ isset($json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["origUrl"]) ?
+ $json["results"]["clips"]["dups"][$k]["host"]["secondPart"]["origUrl"] : null,
"avatar" => null
],
- "date" => $date,
+ "date" =>
+ isset($json["results"]["clips"]["dups"][$k]["date"]) ?
+ strtotime($json["results"]["clips"]["dups"][$k]["date"]) : null,
"duration" =>
- (int)$data
- ["counters"]
- ["toHostingLoaded"]
- ["stredParams"]
- ["duration"],
- "views" => $views,
+ isset($json["results"]["clips"]["dups"][$k]["duration"]["value"]) ?
+ (int)$json["results"]["clips"]["dups"][$k]["duration"]["value"] : null,
+ "views" =>
+ isset($json["results"]["clips"]["dups"][$k]["views"]["text"]) ?
+ $this->parseviews($json["results"]["clips"]["dups"][$k]["views"]["text"]) : null,
"thumb" => $thumb,
"url" =>
- str_replace(
- "http://",
+ preg_replace(
+ '/^http:\/\//',
"https://",
- $this->fuckhtml
- ->getTextContent(
- $data["counters"]
- ["toHostingLoaded"]
- ["postfix"]
- ["href"]
- ),
- $c
+ $data["relatedParams"]["related_url"]
)
];
}
+ // get npt
+ if($json["results"]["search"]["hasNextPage"]){
+
+ $get["p"] = (int)$json["results"]["search"]["currentPage"] + 1;
+
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($get),
+ "video",
+ $proxy
+ );
+ }
+
return $out;
}
- private function parseviews($text){
+ private function parseviews($number){
+
+ // decimal should always be 1 number long
+ $number = explode(" ", $number, 2);
+ $number = $number[0];
- $text = explode(" ", $text);
+ $unit = strtolower($number[strlen($number) - 1]);
- $num = (float)$text[0];
- $mod = $text[1];
+ $tmp = explode(".", $number, 2);
+ $number = (int)$number;
+
+ if(count($tmp) === 2){
+
+ $decimal = (int)$tmp[1];
+ }else{
+
+ $decimal = 0;
+ }
- switch($mod){
+ switch($unit){
- case "bln.": $num = $num * 1000000000; break;
- case "mln.": $num = $num * 1000000; break;
- case "thsd.": $num = $num * 1000; break;
+ case "k":
+ $exponant = 1000;
+ break;
+
+ case "m":
+ $exponant = 1000000;
+ break;
+
+ case "b";
+ $exponant = 1000000000;
+ break;
+
+ default:
+ $exponant = 1;
+ break;
}
- return $num;
+ return ($number * $exponant) + ($decimal * ($exponant / 10));
}
private function titledots($title){
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage