aboutsummaryrefslogtreecommitdiffstats
path: root/scraper/startpage.php
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2024-07-29 18:25:25 -0400
committerlolcat <will@lolcat.ca>2024-07-29 18:25:25 -0400
commit4e4796bb714a1672a6f9b6ea6c58a239a2606dd8 (patch)
tree95ded0983a39eff8c19aef63e606f1700963425d /scraper/startpage.php
parentff06bc1f51cc446966f7214207e2578cd8b18179 (diff)
startpage captcha handle
Diffstat (limited to 'scraper/startpage.php')
-rw-r--r--scraper/startpage.php50
1 files changed, 50 insertions, 0 deletions
diff --git a/scraper/startpage.php b/scraper/startpage.php
index ca15779..ef9def9 100644
--- a/scraper/startpage.php
+++ b/scraper/startpage.php
@@ -408,6 +408,8 @@ class startpage{
//$html = file_get_contents("scraper/startpage.html");
}
+ $this->detect_captcha($html);
+
if(
preg_match(
'/React\.createElement\(UIStartpage\.AppSerpWeb, ?(.+)\),$/m',
@@ -1057,6 +1059,8 @@ class startpage{
}
}
+ $this->detect_captcha($html);
+
$out = [
"status" => "ok",
"npt" => null,
@@ -1186,6 +1190,8 @@ class startpage{
}
}
+ $this->detect_captcha($html);
+
if(
preg_match(
'/React\.createElement\(UIStartpage\.AppSerpVideos, ?(.+)\),$/m',
@@ -1326,6 +1332,8 @@ class startpage{
}
}
+ $this->detect_captcha($html);
+
if(
preg_match(
'/React\.createElement\(UIStartpage\.AppSerpNews, ?(.+)\),$/m',
@@ -1526,4 +1534,46 @@ class startpage{
$text
);
}
+
+ private function detect_captcha($html){
+
+ $this->fuckhtml->load($html);
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "title"
+ );
+
+ if(
+ count($title) !== 0 &&
+ $title[0]["innerHTML"] == "Redirecting..."
+ ){
+
+ // check if it's a captcha
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ foreach($as as $a){
+
+ if(
+ strpos(
+ $this->fuckhtml
+ ->getTextContent(
+ $a["innerHTML"]
+ ),
+ "https://www.startpage.com/sp/captcha"
+ ) !== false
+ ){
+
+ throw new Exception("Startpage returned a captcha");
+ }
+ }
+
+ throw new Exception("Startpage redirected the scraper to an unhandled page");
+ }
+ }
}
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage