aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--scraper/mojeek.php29
1 files changed, 24 insertions, 5 deletions
diff --git a/scraper/mojeek.php b/scraper/mojeek.php
index b2d6ed5..c15d34c 100644
--- a/scraper/mojeek.php
+++ b/scraper/mojeek.php
@@ -501,11 +501,6 @@ class mojeek{
throw new Exception("Failed to get HTML");
}
- /*
- $handle = fopen("scraper/mojeek.html", "r");
- $html = fread($handle, filesize("scraper/mojeek.html"));
- fclose($handle);*/
-
}
$out = [
@@ -526,6 +521,8 @@ class mojeek{
$this->fuckhtml->load($html);
+ $this->detect_block();
+
$results =
$this->fuckhtml
->getElementsByClassName("results-standard", "ul");
@@ -1034,6 +1031,8 @@ class mojeek{
$this->fuckhtml->load($html);
+ $this->detect_block();
+
$articles =
$this->fuckhtml->getElementsByTagName("article");
@@ -1166,6 +1165,26 @@ class mojeek{
return $out;
}
+ private function detect_block(){
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "title"
+ );
+
+ if(
+ count($title) !== 0 &&
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]["innerHTML"]
+ ) == "403 - Forbidden"
+ ){
+
+ throw new Exception("Mojeek blocked this instance or request proxy.");
+ }
+ }
+
private function titledots($title){
return trim($title, ". \t\n\r\0\x0B");
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage