aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-10-16 12:01:47 -0700
committerPinapelz <yukais@pinapelz.com>2025-10-16 12:01:47 -0700
commite7276713a35b2dbc7a2b0ab197d18645c9e52a53 (patch)
tree4199ce609a09fbdb33df4608b6d60a229ab67978
parentbcdec514d4823031b39d5c726b371a5bfbefd240 (diff)
migrate maimaidx_intl to scrape downloads api route
-rw-r--r--constants.py2
-rw-r--r--news_feed.py9
-rw-r--r--sega/maimaidx_intl.py36
3 files changed, 40 insertions, 7 deletions
diff --git a/constants.py b/constants.py
index 13ee71e..7bdf630 100644
--- a/constants.py
+++ b/constants.py
@@ -23,7 +23,7 @@ DANCE_RUSH_APP_ID="ns3maqirvf08ddhp"
CHUNITHM_JP_NEWS_SITE="https://info-chunithm.sega.jp/"
CHUNITHM_INTL_NEWS_SITE="https://info-chunithm.sega.com/"
MAIMAIDX_JP_NEWS_SITE="https://info-maimai.sega.jp/"
-MAIMAIDX_INTL_NEWS_SITE="https://maimai.sega.com/download/"
+MAIMAIDX_INTL_NEWS_SITE="https://maimai.sega.com/assets/data/index.json"
ONGEKI_JP_NEWS_SITE="https://info-ongeki.sega.jp/"
IDAC_NEWS_SITE="https://info-initialdac.sega.jp/"
diff --git a/news_feed.py b/news_feed.py
index 2b9a80d..2761f29 100644
--- a/news_feed.py
+++ b/news_feed.py
@@ -149,12 +149,9 @@ def get_news(news_url: str, version=None) -> list:
news_posts = translate.add_translate_text_to_en(news_posts)
elif news_url == constants.MAIMAIDX_INTL_NEWS_SITE:
- scraper = SiteScraper(headless=True)
- site_data = scraper.get_page_source(news_url)
- scraper.close()
- if version in [ constants.MAIMAIDX_VERSION.PRISM, constants.MAIMAIDX_VERSION.PRISM_PLUS ]:
- news_posts = sorted(maimaidx_intl.parse_maimaidx_intl_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
- _attach_llm_summaries(news_posts, "maimai DX International")
+ site_data = download_site_as_html(news_url)
+ news_posts = sorted(maimaidx_intl.parse_maimaidx_intl_api_route(site_data, "MAIMAIDX_INTL"), key=lambda x: x['timestamp'], reverse=True)
+ _attach_llm_summaries(news_posts, "maimai DX International")
elif news_url == constants.ONGEKI_JP_NEWS_SITE:
site_data = download_site_as_html(news_url)
diff --git a/sega/maimaidx_intl.py b/sega/maimaidx_intl.py
index 4ec69cb..073a211 100644
--- a/sega/maimaidx_intl.py
+++ b/sega/maimaidx_intl.py
@@ -1,11 +1,15 @@
from bs4 import BeautifulSoup
from datetime import datetime, timezone, timedelta
from enum import Enum
+import json
class ParserVersion(Enum):
ALPHA=1
def make_maimaidx_intl_parser(identifier: str, parser: ParserVersion):
+ """
+ Parses the download page of maimai dx intl site. API route method below is preferred as information is the same
+ """
def alpha_parser(html: str):
"""
Confirmed on:
@@ -45,4 +49,36 @@ def make_maimaidx_intl_parser(identifier: str, parser: ParserVersion):
if parser == ParserVersion.ALPHA:
return alpha_parser
+def parse_maimaidx_intl_api_route(raw_api_data: str, identifier: str):
+ route_data = json.loads(raw_api_data)
+ entries = []
+ for post_data in route_data:
+ date_data = post_data["date"]
+ date_str = ".".join([str(x) for x in date_data[:3]]) # YYYY.MM.DD
+ dt = datetime.strptime(date_str, "%Y.%m.%d").replace(tzinfo=timezone(timedelta(hours=9)))
+ timestamp = int(dt.timestamp())
+ full_image_url = f"https://maimai.sega.com/assets/assets/img/download/pop/download/{date_data[0]}-{date_data[1]}-{date_data[2]}/{post_data['thumb']}"
+ if len(date_data) == 4:
+ full_image_url = f"https://maimai.sega.com/assets/assets/img/download/pop/download/{date_data[0]}-{date_data[1]}-{date_data[2]}-{date_data[3]}/{post_data['thumb']}"
+ content = post_data["desc"] + f"\n\nNew maimai DX International News / maimai DX International の新しいお知らせ\n\n{full_image_url}"
+ headline = post_data["title"]
+ images = [{
+ "image": full_image_url,
+ "link": None
+ }]
+ entry = {
+ "date": date_str,
+ "identifier": identifier,
+ "type": None,
+ "timestamp": timestamp,
+ "headline": headline,
+ "content": content,
+ "url": None,
+ "images": images,
+ "is_ai_summary": False
+ }
+ entries.append(entry)
+ return entries
+
+
parse_maimaidx_intl_news_site = make_maimaidx_intl_parser("MAIMAIDX_INTL", ParserVersion.ALPHA)
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage