diff options
Diffstat (limited to 'sega/ongeki_jp.py')
| -rw-r--r-- | sega/ongeki_jp.py | 102 |
1 files changed, 46 insertions, 56 deletions
diff --git a/sega/ongeki_jp.py b/sega/ongeki_jp.py index f9c2dc4..c173189 100644 --- a/sega/ongeki_jp.py +++ b/sega/ongeki_jp.py @@ -1,68 +1,58 @@ -import time -from datetime import datetime -from enum import Enum +from datetime import datetime, timezone, timedelta from bs4 import BeautifulSoup +JST = timezone(timedelta(hours=9)) -class ParserVersion(Enum): - ALPHA = 1 +def parse_ongeki_news_site(html: str): + identifier = "ONGEKI_JPN" + soup = BeautifulSoup(html, "html.parser") + items = [] -def make_ongeki_parser(identifier: str, parser: ParserVersion): - def alpha_parser(html: str): - soup = BeautifulSoup(html, "html.parser") - items = [] + for li in soup.select("li.p-news__listChild"): + a_tag = li.select_one("a.p-news__listLink") + url = a_tag["href"] if a_tag else None - for li in soup.select("li.p-news__listChild"): - a_tag = li.select_one("a.p-news__listLink") - url = a_tag["href"] if a_tag else None + img_tag = li.select_one(".p-news__listThumb img") + image_url = img_tag["src"] if img_tag else None + image_alt = img_tag["alt"] if img_tag else "" + image_link = url if image_url else None - img_tag = li.select_one(".p-news__listThumb img") - image_url = img_tag["src"] if img_tag else None - image_alt = img_tag["alt"] if img_tag else "" - image_link = url if image_url else None + date_type_text = li.select_one(".p-news__listTextUpper") + date_text = ( + date_type_text.text.strip().split("/")[0].strip() + if date_type_text + else None + ) + type_text = ( + date_type_text.text.strip().split("/")[-1].strip() + if date_type_text and "/" in date_type_text.text + else None + ) - date_type_text = li.select_one(".p-news__listTextUpper") - date_text = ( - date_type_text.text.strip().split("/")[0].strip() - if date_type_text - else None - ) - type_text = ( - date_type_text.text.strip().split("/")[-1].strip() - if "/" in date_type_text.text - else None - ) + timestamp = None + if date_text: + try: + dt = datetime.strptime(date_text, "%Y.%m.%d %a").replace(tzinfo=JST) + timestamp = int(dt.timestamp()) + except Exception: + timestamp = None - timestamp = None - if date_text: - try: - dt = datetime.strptime(date_text, "%Y.%m.%d %a") - timestamp = int(time.mktime(dt.timetuple())) - except: - timestamp = None + entry = { + "date": date_text, + "identifier": identifier, + "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None, + "timestamp": timestamp, + "headline": None, + "content": image_alt, + "url": url, + "is_ai_summary": False, + "images": [{"image": image_url, "link": image_link}] + if image_url + else [], + } - entry = { - "date": date_text, - "identifier": identifier, - "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None, - "timestamp": timestamp, - "headline": None, - "content": image_alt, - "url": url, - "is_ai_summary": False, - "images": [{"image": image_url, "link": image_link}] - if image_url - else [], - } + items.append(entry) - items.append(entry) - - return items - - if parser == ParserVersion.ALPHA: - return alpha_parser - - -parse_ongeki_news_site = make_ongeki_parser("ONGEKI_JPN", ParserVersion.ALPHA) + return items
\ No newline at end of file |
