aboutsummaryrefslogtreecommitdiffstats
path: root/sega/ongeki_jp.py
diff options
context:
space:
mode:
Diffstat (limited to 'sega/ongeki_jp.py')
-rw-r--r--sega/ongeki_jp.py102
1 files changed, 46 insertions, 56 deletions
diff --git a/sega/ongeki_jp.py b/sega/ongeki_jp.py
index f9c2dc4..c173189 100644
--- a/sega/ongeki_jp.py
+++ b/sega/ongeki_jp.py
@@ -1,68 +1,58 @@
-import time
-from datetime import datetime
-from enum import Enum
+from datetime import datetime, timezone, timedelta
from bs4 import BeautifulSoup
+JST = timezone(timedelta(hours=9))
-class ParserVersion(Enum):
- ALPHA = 1
+def parse_ongeki_news_site(html: str):
+ identifier = "ONGEKI_JPN"
+ soup = BeautifulSoup(html, "html.parser")
+ items = []
-def make_ongeki_parser(identifier: str, parser: ParserVersion):
- def alpha_parser(html: str):
- soup = BeautifulSoup(html, "html.parser")
- items = []
+ for li in soup.select("li.p-news__listChild"):
+ a_tag = li.select_one("a.p-news__listLink")
+ url = a_tag["href"] if a_tag else None
- for li in soup.select("li.p-news__listChild"):
- a_tag = li.select_one("a.p-news__listLink")
- url = a_tag["href"] if a_tag else None
+ img_tag = li.select_one(".p-news__listThumb img")
+ image_url = img_tag["src"] if img_tag else None
+ image_alt = img_tag["alt"] if img_tag else ""
+ image_link = url if image_url else None
- img_tag = li.select_one(".p-news__listThumb img")
- image_url = img_tag["src"] if img_tag else None
- image_alt = img_tag["alt"] if img_tag else ""
- image_link = url if image_url else None
+ date_type_text = li.select_one(".p-news__listTextUpper")
+ date_text = (
+ date_type_text.text.strip().split("/")[0].strip()
+ if date_type_text
+ else None
+ )
+ type_text = (
+ date_type_text.text.strip().split("/")[-1].strip()
+ if date_type_text and "/" in date_type_text.text
+ else None
+ )
- date_type_text = li.select_one(".p-news__listTextUpper")
- date_text = (
- date_type_text.text.strip().split("/")[0].strip()
- if date_type_text
- else None
- )
- type_text = (
- date_type_text.text.strip().split("/")[-1].strip()
- if "/" in date_type_text.text
- else None
- )
+ timestamp = None
+ if date_text:
+ try:
+ dt = datetime.strptime(date_text, "%Y.%m.%d %a").replace(tzinfo=JST)
+ timestamp = int(dt.timestamp())
+ except Exception:
+ timestamp = None
- timestamp = None
- if date_text:
- try:
- dt = datetime.strptime(date_text, "%Y.%m.%d %a")
- timestamp = int(time.mktime(dt.timetuple()))
- except:
- timestamp = None
+ entry = {
+ "date": date_text,
+ "identifier": identifier,
+ "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None,
+ "timestamp": timestamp,
+ "headline": None,
+ "content": image_alt,
+ "url": url,
+ "is_ai_summary": False,
+ "images": [{"image": image_url, "link": image_link}]
+ if image_url
+ else [],
+ }
- entry = {
- "date": date_text,
- "identifier": identifier,
- "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None,
- "timestamp": timestamp,
- "headline": None,
- "content": image_alt,
- "url": url,
- "is_ai_summary": False,
- "images": [{"image": image_url, "link": image_link}]
- if image_url
- else [],
- }
+ items.append(entry)
- items.append(entry)
-
- return items
-
- if parser == ParserVersion.ALPHA:
- return alpha_parser
-
-
-parse_ongeki_news_site = make_ongeki_parser("ONGEKI_JPN", ParserVersion.ALPHA)
+ return items \ No newline at end of file
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage