From caa3cf245186ab0f6fb33e63a7dd838d834da12e Mon Sep 17 00:00:00 2001 From: Pinapelz Date: Thu, 12 Mar 2026 13:56:30 -0700 Subject: refactor: move to common NewsSource interface cleanup imports by defining initaliazers modules and decorator remove legacy scrapers remove single factory for sega games (sites don't change that much) --- sega/ongeki_jp.py | 120 +++++++++++++++++++++++++----------------------------- 1 file changed, 55 insertions(+), 65 deletions(-) (limited to 'sega/ongeki_jp.py') diff --git a/sega/ongeki_jp.py b/sega/ongeki_jp.py index f9c2dc4..c173189 100644 --- a/sega/ongeki_jp.py +++ b/sega/ongeki_jp.py @@ -1,68 +1,58 @@ -import time -from datetime import datetime -from enum import Enum +from datetime import datetime, timezone, timedelta from bs4 import BeautifulSoup - -class ParserVersion(Enum): - ALPHA = 1 - - -def make_ongeki_parser(identifier: str, parser: ParserVersion): - def alpha_parser(html: str): - soup = BeautifulSoup(html, "html.parser") - items = [] - - for li in soup.select("li.p-news__listChild"): - a_tag = li.select_one("a.p-news__listLink") - url = a_tag["href"] if a_tag else None - - img_tag = li.select_one(".p-news__listThumb img") - image_url = img_tag["src"] if img_tag else None - image_alt = img_tag["alt"] if img_tag else "" - image_link = url if image_url else None - - date_type_text = li.select_one(".p-news__listTextUpper") - date_text = ( - date_type_text.text.strip().split("/")[0].strip() - if date_type_text - else None - ) - type_text = ( - date_type_text.text.strip().split("/")[-1].strip() - if "/" in date_type_text.text - else None - ) - - timestamp = None - if date_text: - try: - dt = datetime.strptime(date_text, "%Y.%m.%d %a") - timestamp = int(time.mktime(dt.timetuple())) - except: - timestamp = None - - entry = { - "date": date_text, - "identifier": identifier, - "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None, - "timestamp": timestamp, - "headline": None, - "content": image_alt, - "url": url, - "is_ai_summary": False, - "images": [{"image": image_url, "link": image_link}] - if image_url - else [], - } - - items.append(entry) - - return items - - if parser == ParserVersion.ALPHA: - return alpha_parser - - -parse_ongeki_news_site = make_ongeki_parser("ONGEKI_JPN", ParserVersion.ALPHA) +JST = timezone(timedelta(hours=9)) + + +def parse_ongeki_news_site(html: str): + identifier = "ONGEKI_JPN" + soup = BeautifulSoup(html, "html.parser") + items = [] + + for li in soup.select("li.p-news__listChild"): + a_tag = li.select_one("a.p-news__listLink") + url = a_tag["href"] if a_tag else None + + img_tag = li.select_one(".p-news__listThumb img") + image_url = img_tag["src"] if img_tag else None + image_alt = img_tag["alt"] if img_tag else "" + image_link = url if image_url else None + + date_type_text = li.select_one(".p-news__listTextUpper") + date_text = ( + date_type_text.text.strip().split("/")[0].strip() + if date_type_text + else None + ) + type_text = ( + date_type_text.text.strip().split("/")[-1].strip() + if date_type_text and "/" in date_type_text.text + else None + ) + + timestamp = None + if date_text: + try: + dt = datetime.strptime(date_text, "%Y.%m.%d %a").replace(tzinfo=JST) + timestamp = int(dt.timestamp()) + except Exception: + timestamp = None + + entry = { + "date": date_text, + "identifier": identifier, + "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None, + "timestamp": timestamp, + "headline": None, + "content": image_alt, + "url": url, + "is_ai_summary": False, + "images": [{"image": image_url, "link": image_link}] + if image_url + else [], + } + + items.append(entry) + + return items \ No newline at end of file -- cgit v1.2.3