aboutsummaryrefslogtreecommitdiffstats
path: root/taito
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-06-16 23:51:25 -0700
committerPinapelz <yukais@pinapelz.com>2025-06-16 23:51:25 -0700
commit05f0f7ea9749efc9d615dad12d49f1f3b53aa7b9 (patch)
tree1a08c2d9efd42a0d5df17e13876d5961ba52fc55 /taito
parentba74f5fd137d6b772e8f5a5f89dc7147ee741f92 (diff)
sf: skip image parsing if 403
they are very aggressive with blocking so whatever
Diffstat (limited to 'taito')
-rw-r--r--taito/street_fighter.py32
1 files changed, 28 insertions, 4 deletions
diff --git a/taito/street_fighter.py b/taito/street_fighter.py
index 1da80bd..987b72b 100644
--- a/taito/street_fighter.py
+++ b/taito/street_fighter.py
@@ -5,14 +5,32 @@ from datetime import datetime
from urllib.parse import urljoin
from enum import Enum
from constants import STREET_FIGHTER_NEWS_SITE
+import requests
+import base64
+
+IMAGE_LIMIT = 10 # only allow 10 images to be processed as b64 is expensive to store
class ParserVersion(Enum):
ALPHA = 1
+def _convert_image_to_base64(img_url: str):
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+ }
+ response = requests.get(img_url, headers=headers)
+ if response.status_code == 200:
+ img_data = response.content
+ img_base64 = base64.b64encode(img_data).decode('utf-8')
+ mime_type = response.headers['Content-Type']
+ return f"data:{mime_type};base64,{img_base64}"
+ else:
+ raise Exception(f"Failed to fetch image from URL: {img_url}, status code: {response.status_code}")
+
def make_sf_parser(identifier: str, parser: ParserVersion):
def alpha_parser(html: str):
soup = BeautifulSoup(html, "html.parser")
news_entries = []
+ img_processed = 0
news_links = soup.find_all('a', class_='btn_latestnews')
for link in news_links:
try:
@@ -49,10 +67,16 @@ def make_sf_parser(identifier: str, parser: ParserVersion):
img_src = img_tag.get('src', '')
if img_src.startswith('/'):
img_src = urljoin('https://sf6ta.jp', img_src)
- images.append({
- 'image': img_src,
- 'link': url
- })
+ if img_processed <= IMAGE_LIMIT:
+ try:
+ img_b64 = _convert_image_to_base64(img_src)
+ images.append({
+ 'image': img_b64,
+ 'link': url
+ })
+ except Exception:
+ pass # Failed likely due to 403. Just show no images in that case
+ img_processed += 1
news_entry = {
'date': post_date.strftime("%Y-%m-%d %H:%M"),
'identifier': identifier,
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage