diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-04-17 15:31:37 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-04-17 15:31:37 -0700 |
| commit | a87715649b4fdfbd549aad493fb262f91f563325 (patch) | |
| tree | ceb6179c9080f219451bb1008ec729d4e3b713e8 | |
| parent | ead6f998b47ff9e9f69ab636a995cbb30acdb775 (diff) | |
add MUSIC DIVER support
| -rw-r--r-- | constants.py | 2 | ||||
| -rw-r--r-- | generate.py | 7 | ||||
| -rw-r--r-- | news_feed.py | 6 | ||||
| -rw-r--r-- | taito/music_diver.py | 57 |
4 files changed, 71 insertions, 1 deletions
diff --git a/constants.py b/constants.py index 1893a3e..3f9af7e 100644 --- a/constants.py +++ b/constants.py @@ -21,6 +21,8 @@ MAIMAIDX_JP_NEWS_SITE="https://info-maimai.sega.jp/" MAIMAIDX_INTL_NEWS_SITE="https://maimai.sega.com/download/" ONGEKI_JP_NEWS_SITE="https://info-ongeki.sega.jp/" +MUSIC_DIVER_NEWS="https://mypage.musicdiver.jp/api/news?lang=en" + ADD_EN_TRANSLATION=True # Only takes effect if an API key is provided in .env class CHUNITHM_VERSION(Enum): diff --git a/generate.py b/generate.py index eafc579..382b505 100644 --- a/generate.py +++ b/generate.py @@ -104,6 +104,9 @@ def generate_maimaidx_intl_news_file(): def generate_chunithm_intl_news_file(): return generate_news_file("chunithm_intl_news", constants.CHUNITHM_INTL_NEWS_SITE, constants.CHUNITHM_VERSION.LUMINOUS_PLUS) +def generate_music_diver_news_file(): + return generate_news_file("music_diver_news", constants.MUSIC_DIVER_NEWS) + if __name__ == "__main__": log_output("JOB START", "TASK") if not os.path.exists(OUTPUT_DIR): @@ -122,6 +125,7 @@ if __name__ == "__main__": ongeki_jp_news_data = generate_ongeki_jp_news_file() maimaidx_intl_news_data = generate_maimaidx_intl_news_file() chunithm_intl_news_data = generate_chunithm_intl_news_file() + music_diver_news_data = generate_music_diver_news_file() news = create_merged_feed( iidx_news_data, @@ -135,7 +139,8 @@ if __name__ == "__main__": maimaidx_jp_news_data, ongeki_jp_news_data, maimaidx_intl_news_data, - chunithm_intl_news_data + chunithm_intl_news_data, + music_diver_news_data ) log_output("Creating merged news.json file for all news that are within " + str(constants.DAYS_LIMIT) + " days old") with open(OUTPUT_DIR+'/news.json', 'w') as json_file: diff --git a/news_feed.py b/news_feed.py index 0689abf..0854503 100644 --- a/news_feed.py +++ b/news_feed.py @@ -28,6 +28,7 @@ import sega.chuni_intl as chuni_intl import sega.maimaidx_jp as maimaidx_jp import sega.maimaidx_intl as maimaidx_intl import sega.ongeki_jp as ongeki_jp +import taito.music_diver as music_diver import constants import translate @@ -107,6 +108,11 @@ def get_news(news_url: str, version=None) -> list: if version == constants.ONGEKI_VERSION.REFRESH: news_posts = sorted(ongeki_jp.parse_ongeki_refresh_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts) + + elif news_url == constants.MUSIC_DIVER_NEWS: + api_data = download_site_as_html(news_url) + news_posts = sorted(music_diver.parse_music_diver_news_json(api_data), key=lambda x: x['timestamp'], reverse=True) + else: news_posts = [] return news_posts diff --git a/taito/music_diver.py b/taito/music_diver.py new file mode 100644 index 0000000..5469ad5 --- /dev/null +++ b/taito/music_diver.py @@ -0,0 +1,57 @@ +import json +from bs4 import BeautifulSoup +import re +from datetime import datetime + +def _parse_html_content(html: str): + soup = BeautifulSoup(html, "html.parser") + images = [] + for img in soup.find_all("img"): + parent = img.find_parent("a") + image_info = { + "image": img["src"], + "link": parent["href"] if parent else None + } + images.append(image_info) + img.decompose() + for br in soup.find_all("br"): + br.replace_with("\n\n") + for a in soup.find_all("a"): + text = a.get_text() + href = a.get("href") + if href: + markdown = f"[{text}]({href})" + a.replace_with(f" {markdown} ") + else: + a.unwrap() + a.insert_after(" ") + for tag in soup.find_all(True): + tag.insert_after(" ") + tag.unwrap() + text = soup.get_text() + text = re.sub(r"\n{3,}", "\n\n", text).strip() + return text, images + +def parse_music_diver_news_json(data_str: str): + data = json.loads(data_str) + if data["responseCode"] != 200: + return [] + + news_posts = [] + for post in data["response"]: + content, images = _parse_html_content(post["content"]) + show_date = datetime.fromisoformat(post["show_start"].replace("Z", "+00:00")) + jst_date = show_date.strftime("%Y-%m-%d") + timestamp = int(show_date.timestamp()) + + news_posts.append({ + "date": jst_date, + "identifier": "MUSIC_DIVER", + "type": None, + "timestamp": timestamp, + "headline": post["title"], + "content": content, + "url": None, + "images": images + }) + return news_posts |
