aboutsummaryrefslogtreecommitdiffstats
path: root/sega/ongeki_jp.py
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2026-03-12 13:56:30 -0700
committerPinapelz <yukais@pinapelz.com>2026-03-12 13:56:50 -0700
commitcaa3cf245186ab0f6fb33e63a7dd838d834da12e (patch)
treebc5742a134ecabf0b9d35cc12b1d6f67defd5da7 /sega/ongeki_jp.py
parent5658441ab9b703c95a48e654d41e45cc3a55ffd3 (diff)
refactor: move to common NewsSource interface
cleanup imports by defining initaliazers modules and decorator remove legacy scrapers remove single factory for sega games (sites don't change that much)
Diffstat (limited to 'sega/ongeki_jp.py')
-rw-r--r--sega/ongeki_jp.py102
1 files changed, 46 insertions, 56 deletions
diff --git a/sega/ongeki_jp.py b/sega/ongeki_jp.py
index f9c2dc4..c173189 100644
--- a/sega/ongeki_jp.py
+++ b/sega/ongeki_jp.py
@@ -1,68 +1,58 @@
-import time
-from datetime import datetime
-from enum import Enum
+from datetime import datetime, timezone, timedelta
from bs4 import BeautifulSoup
+JST = timezone(timedelta(hours=9))
-class ParserVersion(Enum):
- ALPHA = 1
+def parse_ongeki_news_site(html: str):
+ identifier = "ONGEKI_JPN"
+ soup = BeautifulSoup(html, "html.parser")
+ items = []
-def make_ongeki_parser(identifier: str, parser: ParserVersion):
- def alpha_parser(html: str):
- soup = BeautifulSoup(html, "html.parser")
- items = []
+ for li in soup.select("li.p-news__listChild"):
+ a_tag = li.select_one("a.p-news__listLink")
+ url = a_tag["href"] if a_tag else None
- for li in soup.select("li.p-news__listChild"):
- a_tag = li.select_one("a.p-news__listLink")
- url = a_tag["href"] if a_tag else None
+ img_tag = li.select_one(".p-news__listThumb img")
+ image_url = img_tag["src"] if img_tag else None
+ image_alt = img_tag["alt"] if img_tag else ""
+ image_link = url if image_url else None
- img_tag = li.select_one(".p-news__listThumb img")
- image_url = img_tag["src"] if img_tag else None
- image_alt = img_tag["alt"] if img_tag else ""
- image_link = url if image_url else None
+ date_type_text = li.select_one(".p-news__listTextUpper")
+ date_text = (
+ date_type_text.text.strip().split("/")[0].strip()
+ if date_type_text
+ else None
+ )
+ type_text = (
+ date_type_text.text.strip().split("/")[-1].strip()
+ if date_type_text and "/" in date_type_text.text
+ else None
+ )
- date_type_text = li.select_one(".p-news__listTextUpper")
- date_text = (
- date_type_text.text.strip().split("/")[0].strip()
- if date_type_text
- else None
- )
- type_text = (
- date_type_text.text.strip().split("/")[-1].strip()
- if "/" in date_type_text.text
- else None
- )
+ timestamp = None
+ if date_text:
+ try:
+ dt = datetime.strptime(date_text, "%Y.%m.%d %a").replace(tzinfo=JST)
+ timestamp = int(dt.timestamp())
+ except Exception:
+ timestamp = None
- timestamp = None
- if date_text:
- try:
- dt = datetime.strptime(date_text, "%Y.%m.%d %a")
- timestamp = int(time.mktime(dt.timetuple()))
- except:
- timestamp = None
+ entry = {
+ "date": date_text,
+ "identifier": identifier,
+ "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None,
+ "timestamp": timestamp,
+ "headline": None,
+ "content": image_alt,
+ "url": url,
+ "is_ai_summary": False,
+ "images": [{"image": image_url, "link": image_link}]
+ if image_url
+ else [],
+ }
- entry = {
- "date": date_text,
- "identifier": identifier,
- "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None,
- "timestamp": timestamp,
- "headline": None,
- "content": image_alt,
- "url": url,
- "is_ai_summary": False,
- "images": [{"image": image_url, "link": image_link}]
- if image_url
- else [],
- }
+ items.append(entry)
- items.append(entry)
-
- return items
-
- if parser == ParserVersion.ALPHA:
- return alpha_parser
-
-
-parse_ongeki_news_site = make_ongeki_parser("ONGEKI_JPN", ParserVersion.ALPHA)
+ return items \ No newline at end of file
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage