diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-04-14 12:56:08 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-04-14 12:56:08 -0700 |
| commit | f83f2c28021f309c6dc3c89d3d25637b570b4ec9 (patch) | |
| tree | 4fb4c22a82b79f50000ff7061e8e3bbaed6f7af4 /sega | |
| parent | c9c90128977c9e63f8151056d9c28ec61e6b717f (diff) | |
add maimai DX INTERNATIONAL basic info to scraper
Diffstat (limited to 'sega')
| -rw-r--r-- | sega/maimaidx_intl.py | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/sega/maimaidx_intl.py b/sega/maimaidx_intl.py new file mode 100644 index 0000000..7769b7c --- /dev/null +++ b/sega/maimaidx_intl.py @@ -0,0 +1,36 @@ +from bs4 import BeautifulSoup +from datetime import datetime, timezone, timedelta +import time + +def parse_maimaidx_intl_prism_news_site(html: str): + soup = BeautifulSoup(html, "html.parser") + items = soup.select(".dl--pop__item") + + entries = [] + for item in items: + date_text = item.select_one(".dl--pop__head").text.strip().replace(" UP", "") + dt = datetime.strptime(date_text, "%Y.%m.%d").replace(tzinfo=timezone(timedelta(hours=9))) + timestamp = int(dt.timestamp()) + + img_tag = item.select_one("a.dl--pop__thumb img") + image_url = img_tag["srcset"] if img_tag else None + full_image_url = image_url.replace("../", "https://maimai.sega.jp/") if image_url else None + + entry = { + "date": date_text, + "identifier": "MAIMAIDX_INTL_PRISM", + "type": None, + "timestamp": timestamp, + "headline": None, + "content": f"New maimai DX International News / maimai DX International の新しいお知らせ\n\n{full_image_url}", + "url": None, + "images": [ + { + "image": full_image_url, + "link": None + } + ] + } + + entries.append(entry) + return entries |
