diff options
| -rw-r--r-- | constants.py | 2 | ||||
| -rw-r--r-- | news_feed.py | 9 | ||||
| -rw-r--r-- | sega/maimaidx_intl.py | 36 |
3 files changed, 40 insertions, 7 deletions
diff --git a/constants.py b/constants.py index 13ee71e..7bdf630 100644 --- a/constants.py +++ b/constants.py @@ -23,7 +23,7 @@ DANCE_RUSH_APP_ID="ns3maqirvf08ddhp" CHUNITHM_JP_NEWS_SITE="https://info-chunithm.sega.jp/" CHUNITHM_INTL_NEWS_SITE="https://info-chunithm.sega.com/" MAIMAIDX_JP_NEWS_SITE="https://info-maimai.sega.jp/" -MAIMAIDX_INTL_NEWS_SITE="https://maimai.sega.com/download/" +MAIMAIDX_INTL_NEWS_SITE="https://maimai.sega.com/assets/data/index.json" ONGEKI_JP_NEWS_SITE="https://info-ongeki.sega.jp/" IDAC_NEWS_SITE="https://info-initialdac.sega.jp/" diff --git a/news_feed.py b/news_feed.py index 2b9a80d..2761f29 100644 --- a/news_feed.py +++ b/news_feed.py @@ -149,12 +149,9 @@ def get_news(news_url: str, version=None) -> list: news_posts = translate.add_translate_text_to_en(news_posts) elif news_url == constants.MAIMAIDX_INTL_NEWS_SITE: - scraper = SiteScraper(headless=True) - site_data = scraper.get_page_source(news_url) - scraper.close() - if version in [ constants.MAIMAIDX_VERSION.PRISM, constants.MAIMAIDX_VERSION.PRISM_PLUS ]: - news_posts = sorted(maimaidx_intl.parse_maimaidx_intl_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) - _attach_llm_summaries(news_posts, "maimai DX International") + site_data = download_site_as_html(news_url) + news_posts = sorted(maimaidx_intl.parse_maimaidx_intl_api_route(site_data, "MAIMAIDX_INTL"), key=lambda x: x['timestamp'], reverse=True) + _attach_llm_summaries(news_posts, "maimai DX International") elif news_url == constants.ONGEKI_JP_NEWS_SITE: site_data = download_site_as_html(news_url) diff --git a/sega/maimaidx_intl.py b/sega/maimaidx_intl.py index 4ec69cb..073a211 100644 --- a/sega/maimaidx_intl.py +++ b/sega/maimaidx_intl.py @@ -1,11 +1,15 @@ from bs4 import BeautifulSoup from datetime import datetime, timezone, timedelta from enum import Enum +import json class ParserVersion(Enum): ALPHA=1 def make_maimaidx_intl_parser(identifier: str, parser: ParserVersion): + """ + Parses the download page of maimai dx intl site. API route method below is preferred as information is the same + """ def alpha_parser(html: str): """ Confirmed on: @@ -45,4 +49,36 @@ def make_maimaidx_intl_parser(identifier: str, parser: ParserVersion): if parser == ParserVersion.ALPHA: return alpha_parser +def parse_maimaidx_intl_api_route(raw_api_data: str, identifier: str): + route_data = json.loads(raw_api_data) + entries = [] + for post_data in route_data: + date_data = post_data["date"] + date_str = ".".join([str(x) for x in date_data[:3]]) # YYYY.MM.DD + dt = datetime.strptime(date_str, "%Y.%m.%d").replace(tzinfo=timezone(timedelta(hours=9))) + timestamp = int(dt.timestamp()) + full_image_url = f"https://maimai.sega.com/assets/assets/img/download/pop/download/{date_data[0]}-{date_data[1]}-{date_data[2]}/{post_data['thumb']}" + if len(date_data) == 4: + full_image_url = f"https://maimai.sega.com/assets/assets/img/download/pop/download/{date_data[0]}-{date_data[1]}-{date_data[2]}-{date_data[3]}/{post_data['thumb']}" + content = post_data["desc"] + f"\n\nNew maimai DX International News / maimai DX International の新しいお知らせ\n\n{full_image_url}" + headline = post_data["title"] + images = [{ + "image": full_image_url, + "link": None + }] + entry = { + "date": date_str, + "identifier": identifier, + "type": None, + "timestamp": timestamp, + "headline": headline, + "content": content, + "url": None, + "images": images, + "is_ai_summary": False + } + entries.append(entry) + return entries + + parse_maimaidx_intl_news_site = make_maimaidx_intl_parser("MAIMAIDX_INTL", ParserVersion.ALPHA) |
