diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-04-15 00:25:29 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-04-15 00:25:29 -0700 |
| commit | 5bf27feebd8087932de138bda1a4605acc95bef4 (patch) | |
| tree | 677dc1cafffaf7b9416e2307d2ba07a442662062 /sega/ongeki_jp.py | |
| parent | 91f4a6ba665ff92a759758bec5ae13528da6a3c1 (diff) | |
refactor sega games to follow function factory design
unlikely to be much change between each game
Diffstat (limited to 'sega/ongeki_jp.py')
| -rw-r--r-- | sega/ongeki_jp.py | 84 |
1 files changed, 47 insertions, 37 deletions
diff --git a/sega/ongeki_jp.py b/sega/ongeki_jp.py index 587f358..a2a05fb 100644 --- a/sega/ongeki_jp.py +++ b/sega/ongeki_jp.py @@ -1,48 +1,58 @@ from bs4 import BeautifulSoup from datetime import datetime import time +from enum import Enum -def parse_ongeki_refresh_news_site(html: str): - soup = BeautifulSoup(html, "html.parser") - items = [] +class ParserVersion(Enum): + ALPHA=1 - for li in soup.select("li.p-news__listChild"): - a_tag = li.select_one("a.p-news__listLink") - url = a_tag["href"] if a_tag else None +def make_ongeki_parser(identifier: str, parser: ParserVersion): + def alpha_parser(html: str): + soup = BeautifulSoup(html, "html.parser") + items = [] - img_tag = li.select_one(".p-news__listThumb img") - image_url = img_tag["src"] if img_tag else None - image_alt = img_tag["alt"] if img_tag else "" - image_link = url if image_url else None + for li in soup.select("li.p-news__listChild"): + a_tag = li.select_one("a.p-news__listLink") + url = a_tag["href"] if a_tag else None - date_type_text = li.select_one(".p-news__listTextUpper") - date_text = date_type_text.text.strip().split("/")[0].strip() if date_type_text else None - type_text = date_type_text.text.strip().split("/")[-1].strip() if "/" in date_type_text.text else None + img_tag = li.select_one(".p-news__listThumb img") + image_url = img_tag["src"] if img_tag else None + image_alt = img_tag["alt"] if img_tag else "" + image_link = url if image_url else None - headline_tag = li.select_one(".p-news__listTextUnder") - headline = headline_tag.text.strip() if headline_tag else None + date_type_text = li.select_one(".p-news__listTextUpper") + date_text = date_type_text.text.strip().split("/")[0].strip() if date_type_text else None + type_text = date_type_text.text.strip().split("/")[-1].strip() if "/" in date_type_text.text else None - timestamp = None - if date_text: - try: - dt = datetime.strptime(date_text, "%Y.%m.%d %a") - timestamp = int(time.mktime(dt.timetuple())) - except: - timestamp = None - entry = { - "date": date_text, - "identifier": "ONGEKI_JPN_REFRESH", - "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None, - "timestamp": timestamp, - "headline": None, - "content": image_alt, - "url": url, - "images": [{ - "image": image_url, - "link": image_link - }] if image_url else [] - } + headline_tag = li.select_one(".p-news__listTextUnder") + headline = headline_tag.text.strip() if headline_tag else None - items.append(entry) + timestamp = None + if date_text: + try: + dt = datetime.strptime(date_text, "%Y.%m.%d %a") + timestamp = int(time.mktime(dt.timetuple())) + except: + timestamp = None - return items + entry = { + "date": date_text, + "identifier": identifier, + "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None, + "timestamp": timestamp, + "headline": None, + "content": image_alt, + "url": url, + "images": [{ + "image": image_url, + "link": image_link + }] if image_url else [] + } + + items.append(entry) + + return items + if parser == ParserVersion.ALPHA: + return alpha_parser + +parse_ongeki_refresh_news_site = make_ongeki_parser("ONGEKI_JPN_REFRESH", ParserVersion.ALPHA) |
