diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-04-14 12:56:08 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-04-14 12:56:08 -0700 |
| commit | f83f2c28021f309c6dc3c89d3d25637b570b4ec9 (patch) | |
| tree | 4fb4c22a82b79f50000ff7061e8e3bbaed6f7af4 | |
| parent | c9c90128977c9e63f8151056d9c28ec61e6b717f (diff) | |
add maimai DX INTERNATIONAL basic info to scraper
| -rw-r--r-- | constants.py | 1 | ||||
| -rw-r--r-- | generate.py | 13 | ||||
| -rw-r--r-- | news_feed.py | 4 | ||||
| -rw-r--r-- | sega/maimaidx_intl.py | 36 |
4 files changed, 53 insertions, 1 deletions
diff --git a/constants.py b/constants.py index 4fb59f9..6ef036e 100644 --- a/constants.py +++ b/constants.py @@ -8,6 +8,7 @@ IIDX_PINKY_CRUSH_NEWS_SITE="https://p.eagate.573.jp/game/2dx/32/info/index.html" CHUNITHM_JP_NEWS_SITE="https://info-chunithm.sega.jp/" MAIMAIDX_JP_NEWS_SITE="https://info-maimai.sega.jp/" +MAIMAIDX_INTL_NEWS_SITE="https://maimai.sega.com/download/" ONGEKI_JP_NEWS_SITE="https://info-ongeki.sega.jp/" class CHUNITHM_VERSION(Enum): diff --git a/generate.py b/generate.py index d7f8c6f..e91c8ee 100644 --- a/generate.py +++ b/generate.py @@ -91,6 +91,17 @@ if __name__ == "__main__": ongeki_jp_news_data = json.load(json_file)['news_posts'] print("ONGEKI JPN Data not fetched, using existing data.") - news = create_merged_feed(iidx_news_data, sdvx_news_data, chunithm_jp_news_data, maimaidx_jp_news_data, ongeki_jp_news_data) + print("Fetching MAIMAIDX INTL Data") + maimaidx_intl_news_data = feed.get_news(constants.MAIMAIDX_INTL_NEWS_SITE, constants.MAIMAIDX_VERSION.PRISM) + if len(maimaidx_intl_news_data) != 0: + with open(OUTPUT_DIR+'/maimaidx_intl_news.json', 'w') as json_file: + json.dump(attach_news_meta_data(maimaidx_intl_news_data), json_file) + print("MAIMAIDX INTLN Data fetched and saved.") + elif len(maimaidx_intl_news_data) == 0 and os.path.exists(OUTPUT_DIR+'/maimaidx_intl_news.json'): + with open(OUTPUT_DIR+'/maimaidx_intl_news.json', 'r') as json_file: + maimaidx_intl_news_data = json.load(json_file)['news_posts'] + print("MAIMAIDX INTL Data not fetched, using existing data.") + + news = create_merged_feed(iidx_news_data, sdvx_news_data, chunithm_jp_news_data, maimaidx_jp_news_data, ongeki_jp_news_data, maimaidx_intl_news_data) with open(OUTPUT_DIR+'/news.json', 'w') as json_file: json.dump(attach_news_meta_data(news), json_file) diff --git a/news_feed.py b/news_feed.py index d796d64..13720a8 100644 --- a/news_feed.py +++ b/news_feed.py @@ -22,6 +22,7 @@ import bemani.sdvx as sound_voltex import bemani.iidx as iidx import sega.chuni_jp as chunithm_jp import sega.maimaidx_jp as maimaidx_jp +import sega.maimaidx_intl as maimaidx_intl import sega.ongeki_jp as ongeki_jp import constants @@ -38,6 +39,9 @@ def get_news(news_url: str, version=None) -> list: elif news_url == constants.MAIMAIDX_JP_NEWS_SITE: if version == constants.MAIMAIDX_VERSION.PRISM_PLUS: news_posts = sorted(maimaidx_jp.parse_maimaidx_jp_prism_plus_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) + elif news_url == constants.MAIMAIDX_INTL_NEWS_SITE: + if version == constants.MAIMAIDX_VERSION.PRISM: + news_posts = sorted(maimaidx_intl.parse_maimaidx_intl_prism_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) elif news_url == constants.ONGEKI_JP_NEWS_SITE: if version == constants.ONGEKI_VERSION.REFRESH: news_posts = sorted(ongeki_jp.parse_ongeki_refresh_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) diff --git a/sega/maimaidx_intl.py b/sega/maimaidx_intl.py new file mode 100644 index 0000000..7769b7c --- /dev/null +++ b/sega/maimaidx_intl.py @@ -0,0 +1,36 @@ +from bs4 import BeautifulSoup +from datetime import datetime, timezone, timedelta +import time + +def parse_maimaidx_intl_prism_news_site(html: str): + soup = BeautifulSoup(html, "html.parser") + items = soup.select(".dl--pop__item") + + entries = [] + for item in items: + date_text = item.select_one(".dl--pop__head").text.strip().replace(" UP", "") + dt = datetime.strptime(date_text, "%Y.%m.%d").replace(tzinfo=timezone(timedelta(hours=9))) + timestamp = int(dt.timestamp()) + + img_tag = item.select_one("a.dl--pop__thumb img") + image_url = img_tag["srcset"] if img_tag else None + full_image_url = image_url.replace("../", "https://maimai.sega.jp/") if image_url else None + + entry = { + "date": date_text, + "identifier": "MAIMAIDX_INTL_PRISM", + "type": None, + "timestamp": timestamp, + "headline": None, + "content": f"New maimai DX International News / maimai DX International の新しいお知らせ\n\n{full_image_url}", + "url": None, + "images": [ + { + "image": full_image_url, + "link": None + } + ] + } + + entries.append(entry) + return entries |
