aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--constants.py2
-rw-r--r--generate.py7
-rw-r--r--news_feed.py6
-rw-r--r--taito/music_diver.py57
4 files changed, 71 insertions, 1 deletions
diff --git a/constants.py b/constants.py
index 1893a3e..3f9af7e 100644
--- a/constants.py
+++ b/constants.py
@@ -21,6 +21,8 @@ MAIMAIDX_JP_NEWS_SITE="https://info-maimai.sega.jp/"
MAIMAIDX_INTL_NEWS_SITE="https://maimai.sega.com/download/"
ONGEKI_JP_NEWS_SITE="https://info-ongeki.sega.jp/"
+MUSIC_DIVER_NEWS="https://mypage.musicdiver.jp/api/news?lang=en"
+
ADD_EN_TRANSLATION=True # Only takes effect if an API key is provided in .env
class CHUNITHM_VERSION(Enum):
diff --git a/generate.py b/generate.py
index eafc579..382b505 100644
--- a/generate.py
+++ b/generate.py
@@ -104,6 +104,9 @@ def generate_maimaidx_intl_news_file():
def generate_chunithm_intl_news_file():
return generate_news_file("chunithm_intl_news", constants.CHUNITHM_INTL_NEWS_SITE, constants.CHUNITHM_VERSION.LUMINOUS_PLUS)
+def generate_music_diver_news_file():
+ return generate_news_file("music_diver_news", constants.MUSIC_DIVER_NEWS)
+
if __name__ == "__main__":
log_output("JOB START", "TASK")
if not os.path.exists(OUTPUT_DIR):
@@ -122,6 +125,7 @@ if __name__ == "__main__":
ongeki_jp_news_data = generate_ongeki_jp_news_file()
maimaidx_intl_news_data = generate_maimaidx_intl_news_file()
chunithm_intl_news_data = generate_chunithm_intl_news_file()
+ music_diver_news_data = generate_music_diver_news_file()
news = create_merged_feed(
iidx_news_data,
@@ -135,7 +139,8 @@ if __name__ == "__main__":
maimaidx_jp_news_data,
ongeki_jp_news_data,
maimaidx_intl_news_data,
- chunithm_intl_news_data
+ chunithm_intl_news_data,
+ music_diver_news_data
)
log_output("Creating merged news.json file for all news that are within " + str(constants.DAYS_LIMIT) + " days old")
with open(OUTPUT_DIR+'/news.json', 'w') as json_file:
diff --git a/news_feed.py b/news_feed.py
index 0689abf..0854503 100644
--- a/news_feed.py
+++ b/news_feed.py
@@ -28,6 +28,7 @@ import sega.chuni_intl as chuni_intl
import sega.maimaidx_jp as maimaidx_jp
import sega.maimaidx_intl as maimaidx_intl
import sega.ongeki_jp as ongeki_jp
+import taito.music_diver as music_diver
import constants
import translate
@@ -107,6 +108,11 @@ def get_news(news_url: str, version=None) -> list:
if version == constants.ONGEKI_VERSION.REFRESH:
news_posts = sorted(ongeki_jp.parse_ongeki_refresh_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
+
+ elif news_url == constants.MUSIC_DIVER_NEWS:
+ api_data = download_site_as_html(news_url)
+ news_posts = sorted(music_diver.parse_music_diver_news_json(api_data), key=lambda x: x['timestamp'], reverse=True)
+
else:
news_posts = []
return news_posts
diff --git a/taito/music_diver.py b/taito/music_diver.py
new file mode 100644
index 0000000..5469ad5
--- /dev/null
+++ b/taito/music_diver.py
@@ -0,0 +1,57 @@
+import json
+from bs4 import BeautifulSoup
+import re
+from datetime import datetime
+
+def _parse_html_content(html: str):
+ soup = BeautifulSoup(html, "html.parser")
+ images = []
+ for img in soup.find_all("img"):
+ parent = img.find_parent("a")
+ image_info = {
+ "image": img["src"],
+ "link": parent["href"] if parent else None
+ }
+ images.append(image_info)
+ img.decompose()
+ for br in soup.find_all("br"):
+ br.replace_with("\n\n")
+ for a in soup.find_all("a"):
+ text = a.get_text()
+ href = a.get("href")
+ if href:
+ markdown = f"[{text}]({href})"
+ a.replace_with(f" {markdown} ")
+ else:
+ a.unwrap()
+ a.insert_after(" ")
+ for tag in soup.find_all(True):
+ tag.insert_after(" ")
+ tag.unwrap()
+ text = soup.get_text()
+ text = re.sub(r"\n{3,}", "\n\n", text).strip()
+ return text, images
+
+def parse_music_diver_news_json(data_str: str):
+ data = json.loads(data_str)
+ if data["responseCode"] != 200:
+ return []
+
+ news_posts = []
+ for post in data["response"]:
+ content, images = _parse_html_content(post["content"])
+ show_date = datetime.fromisoformat(post["show_start"].replace("Z", "+00:00"))
+ jst_date = show_date.strftime("%Y-%m-%d")
+ timestamp = int(show_date.timestamp())
+
+ news_posts.append({
+ "date": jst_date,
+ "identifier": "MUSIC_DIVER",
+ "type": None,
+ "timestamp": timestamp,
+ "headline": post["title"],
+ "content": content,
+ "url": None,
+ "images": images
+ })
+ return news_posts
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage