From 5bf27feebd8087932de138bda1a4605acc95bef4 Mon Sep 17 00:00:00 2001 From: Pinapelz Date: Tue, 15 Apr 2025 00:25:29 -0700 Subject: refactor sega games to follow function factory design unlikely to be much change between each game --- sega/ongeki_jp.py | 100 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 55 insertions(+), 45 deletions(-) (limited to 'sega/ongeki_jp.py') diff --git a/sega/ongeki_jp.py b/sega/ongeki_jp.py index 587f358..a2a05fb 100644 --- a/sega/ongeki_jp.py +++ b/sega/ongeki_jp.py @@ -1,48 +1,58 @@ from bs4 import BeautifulSoup from datetime import datetime import time - -def parse_ongeki_refresh_news_site(html: str): - soup = BeautifulSoup(html, "html.parser") - items = [] - - for li in soup.select("li.p-news__listChild"): - a_tag = li.select_one("a.p-news__listLink") - url = a_tag["href"] if a_tag else None - - img_tag = li.select_one(".p-news__listThumb img") - image_url = img_tag["src"] if img_tag else None - image_alt = img_tag["alt"] if img_tag else "" - image_link = url if image_url else None - - date_type_text = li.select_one(".p-news__listTextUpper") - date_text = date_type_text.text.strip().split("/")[0].strip() if date_type_text else None - type_text = date_type_text.text.strip().split("/")[-1].strip() if "/" in date_type_text.text else None - - headline_tag = li.select_one(".p-news__listTextUnder") - headline = headline_tag.text.strip() if headline_tag else None - - timestamp = None - if date_text: - try: - dt = datetime.strptime(date_text, "%Y.%m.%d %a") - timestamp = int(time.mktime(dt.timetuple())) - except: - timestamp = None - entry = { - "date": date_text, - "identifier": "ONGEKI_JPN_REFRESH", - "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None, - "timestamp": timestamp, - "headline": None, - "content": image_alt, - "url": url, - "images": [{ - "image": image_url, - "link": image_link - }] if image_url else [] - } - - items.append(entry) - - return items +from enum import Enum + +class ParserVersion(Enum): + ALPHA=1 + +def make_ongeki_parser(identifier: str, parser: ParserVersion): + def alpha_parser(html: str): + soup = BeautifulSoup(html, "html.parser") + items = [] + + for li in soup.select("li.p-news__listChild"): + a_tag = li.select_one("a.p-news__listLink") + url = a_tag["href"] if a_tag else None + + img_tag = li.select_one(".p-news__listThumb img") + image_url = img_tag["src"] if img_tag else None + image_alt = img_tag["alt"] if img_tag else "" + image_link = url if image_url else None + + date_type_text = li.select_one(".p-news__listTextUpper") + date_text = date_type_text.text.strip().split("/")[0].strip() if date_type_text else None + type_text = date_type_text.text.strip().split("/")[-1].strip() if "/" in date_type_text.text else None + + headline_tag = li.select_one(".p-news__listTextUnder") + headline = headline_tag.text.strip() if headline_tag else None + + timestamp = None + if date_text: + try: + dt = datetime.strptime(date_text, "%Y.%m.%d %a") + timestamp = int(time.mktime(dt.timetuple())) + except: + timestamp = None + + entry = { + "date": date_text, + "identifier": identifier, + "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None, + "timestamp": timestamp, + "headline": None, + "content": image_alt, + "url": url, + "images": [{ + "image": image_url, + "link": image_link + }] if image_url else [] + } + + items.append(entry) + + return items + if parser == ParserVersion.ALPHA: + return alpha_parser + +parse_ongeki_refresh_news_site = make_ongeki_parser("ONGEKI_JPN_REFRESH", ParserVersion.ALPHA) -- cgit v1.2.3