diff options
| -rw-r--r-- | bandai_namco/taiko.py | 58 | ||||
| -rw-r--r-- | constants.py | 2 | ||||
| -rw-r--r-- | generate.py | 7 | ||||
| -rw-r--r-- | news_feed.py | 7 |
4 files changed, 72 insertions, 2 deletions
diff --git a/bandai_namco/taiko.py b/bandai_namco/taiko.py new file mode 100644 index 0000000..0aa2e0e --- /dev/null +++ b/bandai_namco/taiko.py @@ -0,0 +1,58 @@ +from bs4 import BeautifulSoup +from datetime import datetime +import time +import re + +def parse_taiko_blog_site(html: str) -> list: + base_url: str = "https://taiko-ch.net" + soup = BeautifulSoup(html, "html.parser") + + entries = [] + + for article in soup.select("article"): + try: + # Get date and timestamp + date_tag = article.select_one("p.entryDate") + if not date_tag: + continue + date_str = date_tag.text.strip() + date_obj = datetime.strptime(date_str, "%Y年%m月%d日") + timestamp = int(time.mktime(date_obj.timetuple())) + + # Get headline + headline_tag = article.select_one("h1") + headline = headline_tag.text.strip() if headline_tag else None + + # Get subheaders + content = [] + for div in article.find_all("div", style=re.compile(r"background:\s?#ff4500")): + title_text = div.get_text(strip=True).replace("■", "").strip() + if title_text: + content.append(f"• {title_text}") + + # Get images + images = [] + for img in article.find_all("img"): + img_url = img.get("src") or img.get("data-src") + if img_url: + if img_url.startswith("/"): + img_url = base_url + img_url + images.append({"image": img_url, "link": None}) + + entry = { + "date": date_str, + "identifier": "TAIKO", + "type": None, + "timestamp": timestamp, + "headline": headline, + "content": "\n".join(content), + "url": None, + "images": images + } + + entries.append(entry) + except Exception as e: + print(f"Error parsing article: {e}") + continue + + return entries diff --git a/constants.py b/constants.py index 3f9af7e..9b0d3c7 100644 --- a/constants.py +++ b/constants.py @@ -23,6 +23,8 @@ ONGEKI_JP_NEWS_SITE="https://info-ongeki.sega.jp/" MUSIC_DIVER_NEWS="https://mypage.musicdiver.jp/api/news?lang=en" +TAIKO_BLOG_SITE="https://taiko-ch.net/blog/" + ADD_EN_TRANSLATION=True # Only takes effect if an API key is provided in .env class CHUNITHM_VERSION(Enum): diff --git a/generate.py b/generate.py index 382b505..90d40cc 100644 --- a/generate.py +++ b/generate.py @@ -107,6 +107,9 @@ def generate_chunithm_intl_news_file(): def generate_music_diver_news_file(): return generate_news_file("music_diver_news", constants.MUSIC_DIVER_NEWS) +def generate_taiko_news_file(): + return generate_news_file("taiko_news", constants.TAIKO_BLOG_SITE) + if __name__ == "__main__": log_output("JOB START", "TASK") if not os.path.exists(OUTPUT_DIR): @@ -126,6 +129,7 @@ if __name__ == "__main__": maimaidx_intl_news_data = generate_maimaidx_intl_news_file() chunithm_intl_news_data = generate_chunithm_intl_news_file() music_diver_news_data = generate_music_diver_news_file() + taiko_news_data = generate_taiko_news_file() news = create_merged_feed( iidx_news_data, @@ -140,7 +144,8 @@ if __name__ == "__main__": ongeki_jp_news_data, maimaidx_intl_news_data, chunithm_intl_news_data, - music_diver_news_data + music_diver_news_data, + taiko_news_data ) log_output("Creating merged news.json file for all news that are within " + str(constants.DAYS_LIMIT) + " days old") with open(OUTPUT_DIR+'/news.json', 'w') as json_file: diff --git a/news_feed.py b/news_feed.py index 0854503..62af645 100644 --- a/news_feed.py +++ b/news_feed.py @@ -29,6 +29,7 @@ import sega.maimaidx_jp as maimaidx_jp import sega.maimaidx_intl as maimaidx_intl import sega.ongeki_jp as ongeki_jp import taito.music_diver as music_diver +import bandai_namco.taiko as taiko import constants import translate @@ -66,7 +67,7 @@ def get_news(news_url: str, version=None) -> list: case constants.GITADORA_EAMUSE_APP_ID: news_posts= sorted(eamuse_app.parse_news_page(site_data, "GITADORA_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts) - case constants.NOSTALGIA_EAMUSE_APP_ID : + case constants.NOSTALGIA_EAMUSE_APP_ID: news_posts= sorted(eamuse_app.parse_news_page(site_data, "NOSTALGIA_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts) case _: @@ -113,6 +114,10 @@ def get_news(news_url: str, version=None) -> list: api_data = download_site_as_html(news_url) news_posts = sorted(music_diver.parse_music_diver_news_json(api_data), key=lambda x: x['timestamp'], reverse=True) + elif news_url == constants.TAIKO_BLOG_SITE: + site_data = download_site_as_html(news_url) + news_posts = sorted(taiko.parse_taiko_blog_site(site_data), key=lambda x: x['timestamp'], reverse=True) + else: news_posts = [] return news_posts |
