aboutsummaryrefslogtreecommitdiffstats
path: root/site_scraper.py
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-05-14 15:29:14 -0700
committerPinapelz <yukais@pinapelz.com>2025-05-14 15:29:14 -0700
commita98cbc880c6e2a862ee1a1716fe522410f91ccd9 (patch)
treea6d8f6160472e4c7b49426d0a6e670a860ca15c3 /site_scraper.py
parent046a668957f3827a59c9752869a0f7a060c9e79f (diff)
wmmt: 6rr and 6rr+ scraper
Diffstat (limited to 'site_scraper.py')
-rw-r--r--site_scraper.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/site_scraper.py b/site_scraper.py
index 9efa4b6..e9301b5 100644
--- a/site_scraper.py
+++ b/site_scraper.py
@@ -68,7 +68,7 @@ class SiteScraper:
print("WebDriver closed successfully")
-def download_site_as_html(url: str, timeout: int = 10) -> str:
+def download_site_as_html(url: str, timeout: int = 10, response_encoding=None) -> str:
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
@@ -84,6 +84,8 @@ def download_site_as_html(url: str, timeout: int = 10) -> str:
try:
response = requests.get(url, headers=headers, timeout=timeout)
+ if response_encoding:
+ response.encoding = response_encoding
response.raise_for_status()
return response.text
except requests.RequestException as e:
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage