diff options
| -rw-r--r-- | constants.py | 4 | ||||
| -rw-r--r-- | generate.py | 21 | ||||
| -rw-r--r-- | konami/eamuse_app.py | 51 | ||||
| -rw-r--r-- | news_feed.py | 21 |
4 files changed, 89 insertions, 8 deletions
diff --git a/constants.py b/constants.py index ef92020..35f1fc8 100644 --- a/constants.py +++ b/constants.py @@ -6,6 +6,10 @@ SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE ="https://p.eagate.573.jp/game/sdvx/vi/news/i IIDX_PINKY_CRUSH_NEWS_SITE="https://p.eagate.573.jp/game/2dx/32/info/index.html" DDR_WORLD_NEWS_SITE="https://p.eagate.573.jp/game/ddr/ddrworld/info/index.html" +EAMUSE_APP_FEED="https://eam.573.jp/app/web/post/official" +IIDX_EAMUSE_APP_ID="s8svjrq62x592gvb" +DDR_EAMUSE_APP_ID="aegmtuzekqik0eyf" + CHUNITHM_JP_NEWS_SITE="https://info-chunithm.sega.jp/" CHUNITHM_INTL_NEWS_SITE="https://info-chunithm.sega.com/" MAIMAIDX_JP_NEWS_SITE="https://info-maimai.sega.jp/" diff --git a/generate.py b/generate.py index 46b689d..eada9b3 100644 --- a/generate.py +++ b/generate.py @@ -3,6 +3,7 @@ Generates news JSON files Generally you're expected to update the game versions manually as for most games you only ever want the latest version (supported) of the game """ +from ast import Constant import news_feed as feed import constants import json @@ -59,14 +60,22 @@ def generate_news_file(filename, url, version=None): print(f"Failed. Couldn't fetch {filename.upper()} data. Skipping...", "NEWS") return news_data -def generate_iidx_news_file(): - return generate_news_file("iidx_news", constants.IIDX_PINKY_CRUSH_NEWS_SITE) +# For e-amusement games you can choose to pull from a specific implementation of the scraper or the generic feed provided +# by the e-amusement app. Information is different +def generate_iidx_news_file(eamuse_feed: bool=False): + if eamuse_feed: + return generate_news_file("iidx_news", constants.EAMUSE_APP_FEED, constants.IIDX_EAMUSE_APP_ID) + else: + return generate_news_file("iidx_news", constants.IIDX_PINKY_CRUSH_NEWS_SITE) def generate_sdvx_news_file(): return generate_news_file("sdvx_news", constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE) -def generate_ddr_news_file(): - return generate_news_file("ddr_news", constants.DDR_WORLD_NEWS_SITE) +def generate_ddr_news_file(eamuse_feed: bool=False): + if eamuse_feed: + return generate_news_file("ddr_news", constants.EAMUSE_APP_FEED, constants.DDR_EAMUSE_APP_ID) + else: + return generate_news_file("ddr_news", constants.DDR_WORLD_NEWS_SITE) def generate_chunithm_jp_news_file(): return generate_news_file("chunithm_jp_news", constants.CHUNITHM_JP_NEWS_SITE, constants.CHUNITHM_VERSION.VERSE) @@ -89,9 +98,9 @@ if __name__ == "__main__": log_output(f"{OUTPUT_DIR} was not found. Creating this directory...") os.makedirs(OUTPUT_DIR) - iidx_news_data = generate_iidx_news_file() + iidx_news_data = generate_iidx_news_file(eamuse_feed=True) sdvx_news_data = generate_sdvx_news_file() - ddr_news_data = generate_ddr_news_file() + ddr_news_data = generate_ddr_news_file(eamuse_feed=True) chunithm_jp_news_data = generate_chunithm_jp_news_file() maimaidx_jp_news_data = generate_maimaidx_jp_news_file() ongeki_jp_news_data = generate_ongeki_jp_news_file() diff --git a/konami/eamuse_app.py b/konami/eamuse_app.py new file mode 100644 index 0000000..b552477 --- /dev/null +++ b/konami/eamuse_app.py @@ -0,0 +1,51 @@ +from bs4 import BeautifulSoup +from datetime import datetime +from urllib.parse import urljoin +import time + +BASE_URL = "https://eam.573.jp" + +def parse_news_page(html: str, identifier: str): + soup = BeautifulSoup(html, "html.parser") + entries = [] + + for li in soup.select("ul > li.ef"): + a_tag = li.find("a", href=True) + url = urljoin(BASE_URL, a_tag["href"]) if a_tag else None + + date_text = li.select_one(".post-date") + if not date_text: + continue + raw_date = date_text.get_text(strip=True).replace("年", "/").replace("月", "/").replace("日", "") + try: + date_obj = datetime.strptime(raw_date, "%Y/%m/%d") + except ValueError: + continue + date_str = date_obj.strftime("%Y-%m-%d") + timestamp = int(time.mktime(date_obj.timetuple())) + + content_tag = li.select_one(".article-text") + content = content_tag.get_text(strip=True) if content_tag else None + + img_tag = li.select_one(".article-img img") + image_url = img_tag["src"] if img_tag else None + images = [] + if image_url: + images.append({ + "image": image_url, + "link": url + }) + + entry = { + "date": date_str, + "identifier": identifier, + "type": None, + "timestamp": timestamp, + "headline": None, + "content": content, + "url": url, + "images": images + } + entries.append(entry) + + return entries diff --git a/news_feed.py b/news_feed.py index 75a3678..43be096 100644 --- a/news_feed.py +++ b/news_feed.py @@ -8,15 +8,18 @@ Generic format for a news entry. All keys are considered to be nullable 'headline': Headline, 'content': All text content of news, 'url': URL to full post if available, - 'images': { + 'images': [ + { 'image': URL to image, 'link': If there's an associated href. Else None + } - } + ] } """ from site_scraper import SiteScraper, download_site_as_html +import konami.eamuse_app as eamuse_app import bemani.sdvx as sound_voltex import bemani.iidx as iidx import bemani.ddr as ddr @@ -39,6 +42,20 @@ def get_news(news_url: str, version=None) -> list: news_posts = sorted(iidx.parse_pinky_crush_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL) + elif news_url == constants.EAMUSE_APP_FEED: + scraper = SiteScraper(headless=True) + site_data = scraper.get_page_source(news_url+"/?uuid_to="+version) + scraper.close() + match version: + case constants.IIDX_EAMUSE_APP_ID: + news_posts= sorted(eamuse_app.parse_news_page(site_data, "IIDX_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) + news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL) + case constants.DDR_EAMUSE_APP_ID: + news_posts= sorted(eamuse_app.parse_news_page(site_data, "DDR_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) + news_posts = translate.add_translate_text_to_en(news_posts) + case _: + raise ValueError("Cannot find provided e-amuse app gameId", version) + elif news_url == constants.DDR_WORLD_NEWS_SITE: scraper = SiteScraper(headless=True) site_data = scraper.get_page_source(news_url) |
