aboutsummaryrefslogtreecommitdiffstats
path: root/sega
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-04-14 11:35:57 -0700
committerPinapelz <yukais@pinapelz.com>2025-04-14 11:35:57 -0700
commit1e5bdc4bdbda356453e6783db7e5cf0e8e844f8c (patch)
tree4dcbce6aa658f7cddabafd71322584ae454df873 /sega
parentd93b2060a22ea82f8028df80d4549abee2611f98 (diff)
add ONGEKI JPN to scraper
Diffstat (limited to 'sega')
-rw-r--r--sega/ongeki_jp.py48
1 files changed, 48 insertions, 0 deletions
diff --git a/sega/ongeki_jp.py b/sega/ongeki_jp.py
new file mode 100644
index 0000000..587f358
--- /dev/null
+++ b/sega/ongeki_jp.py
@@ -0,0 +1,48 @@
+from bs4 import BeautifulSoup
+from datetime import datetime
+import time
+
+def parse_ongeki_refresh_news_site(html: str):
+ soup = BeautifulSoup(html, "html.parser")
+ items = []
+
+ for li in soup.select("li.p-news__listChild"):
+ a_tag = li.select_one("a.p-news__listLink")
+ url = a_tag["href"] if a_tag else None
+
+ img_tag = li.select_one(".p-news__listThumb img")
+ image_url = img_tag["src"] if img_tag else None
+ image_alt = img_tag["alt"] if img_tag else ""
+ image_link = url if image_url else None
+
+ date_type_text = li.select_one(".p-news__listTextUpper")
+ date_text = date_type_text.text.strip().split("/")[0].strip() if date_type_text else None
+ type_text = date_type_text.text.strip().split("/")[-1].strip() if "/" in date_type_text.text else None
+
+ headline_tag = li.select_one(".p-news__listTextUnder")
+ headline = headline_tag.text.strip() if headline_tag else None
+
+ timestamp = None
+ if date_text:
+ try:
+ dt = datetime.strptime(date_text, "%Y.%m.%d %a")
+ timestamp = int(time.mktime(dt.timetuple()))
+ except:
+ timestamp = None
+ entry = {
+ "date": date_text,
+ "identifier": "ONGEKI_JPN_REFRESH",
+ "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None,
+ "timestamp": timestamp,
+ "headline": None,
+ "content": image_alt,
+ "url": url,
+ "images": [{
+ "image": image_url,
+ "link": image_link
+ }] if image_url else []
+ }
+
+ items.append(entry)
+
+ return items
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage