diff options
| -rw-r--r-- | constants.py | 4 | ||||
| -rw-r--r-- | konami/eamuse_app.py | 35 | ||||
| -rw-r--r-- | news_feed.py | 28 |
3 files changed, 50 insertions, 17 deletions
diff --git a/constants.py b/constants.py index fb5aff8..13ee71e 100644 --- a/constants.py +++ b/constants.py @@ -5,9 +5,11 @@ DAYS_LIMIT=14 SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE ="https://p.eagate.573.jp/game/sdvx/vi/news/index.html" IIDX_PINKY_CRUSH_NEWS_SITE="https://p.eagate.573.jp/game/2dx/32/info/index.html" # legacy should not be used, eamuse feed is more verbose DDR_WORLD_NEWS_SITE="https://p.eagate.573.jp/game/ddr/ddrworld/info/index.html" -POLARIS_CHORD_NEWS_SITE="https://p.eagate.573.jp/game/polarischord/pc/news/index.html" +POLARIS_CHORD_NEWS_SITE="https://p.eagate.573.jp/game/polarischord/pc/news/news.html" EAMUSE_APP_FEED="https://eam.573.jp/app/web/post/official" +EAMUSE_APP_API_ROUTE="https://eam.573.jp/app/web/post/official" +EAMUSE_POST_SITE="https://eam.573.jp/app/web/post/detail.php" IIDX_EAMUSE_APP_ID="s8svjrq62x592gvb" SDVX_EAMUSE_APP_ID="3215emnco2s2p1sx" DDR_EAMUSE_APP_ID="aegmtuzekqik0eyf" diff --git a/konami/eamuse_app.py b/konami/eamuse_app.py index d7a9b3a..f3cc643 100644 --- a/konami/eamuse_app.py +++ b/konami/eamuse_app.py @@ -1,11 +1,15 @@ from bs4 import BeautifulSoup from datetime import datetime from urllib.parse import urljoin +import json import time BASE_URL = "https://eam.573.jp" def parse_news_page(html: str, identifier: str): + """ + Legacy method of scraping. Should not be used if API method works since it will be much faster + """ soup = BeautifulSoup(html, "html.parser") entries = [] @@ -50,3 +54,34 @@ def parse_news_page(html: str, identifier: str): entries.append(entry) return entries + +def parse_news_api_route(raw_api_data: str, identifier: str, eam_news_site: str=""): + """ + Re-maps eamuse news app API routes to 573Updates JSON + """ + route_data = json.loads(raw_api_data) + entries = [] + for post_data in route_data["post_list"]: + date_str = post_data["entry_date"] + timestamp = post_data["entry_time"] + content = post_data["content"] + url = eam_news_site + "?post_id="+post_data["post_id"] + images = [] + if "image_url" in post_data: + images = [{ + "image": post_data["image_url"], + "link": url + }] + entry = { + "date": date_str, + "identifier": identifier, + "type": None, + "timestamp": timestamp, + "headline": None, + "content": content, + "url": url, + "images": images, + "is_ai_summary": False + } + entries.append(entry) + return entries diff --git a/news_feed.py b/news_feed.py index 6f58a48..2b9a80d 100644 --- a/news_feed.py +++ b/news_feed.py @@ -72,43 +72,39 @@ def get_news(news_url: str, version=None) -> list: news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL) elif news_url == constants.POLARIS_CHORD_NEWS_SITE: - scraper = SiteScraper(headless=True) - site_data = scraper.get_page_source(news_url) - scraper.close() + site_data = download_site_as_html(news_url) news_posts = sorted(polaris_chord.parse_polaris_chord_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL) - elif news_url == constants.EAMUSE_APP_FEED: - scraper = SiteScraper(headless=True) - site_data = scraper.get_page_source(news_url+"/?uuid_to="+version) - scraper.close() + elif news_url == constants.EAMUSE_APP_API_ROUTE: + site_data = download_site_as_html(news_url+"/?uuid_to="+version+"&format=json") match version: case constants.IIDX_EAMUSE_APP_ID: - news_posts= sorted(eamuse_app.parse_news_page(site_data, "IIDX_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) + news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "IIDX_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL) case constants.DDR_EAMUSE_APP_ID: - news_posts= sorted(eamuse_app.parse_news_page(site_data, "DDR_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) + news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "DDR_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts) case constants.SDVX_EAMUSE_APP_ID: - news_posts= sorted(eamuse_app.parse_news_page(site_data, "SOUND_VOLTEX_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) + news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "SOUND_VOLTEX_EAMUSEMENT", constants.EAMUSE_POST_SITE ), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts) case constants.JUBEAT_EAMUSE_APP_ID: - news_posts= sorted(eamuse_app.parse_news_page(site_data, "JUBEAT_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) + news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "JUBEAT_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts) case constants.POPN_MUSIC_EAMUSE_APP_ID: - news_posts= sorted(eamuse_app.parse_news_page(site_data, "POPN_MUSIC_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) + news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "POPN_MUSIC_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts) case constants.GITADORA_EAMUSE_APP_ID: - news_posts= sorted(eamuse_app.parse_news_page(site_data, "GITADORA_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) + news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "GITADORA_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts) case constants.NOSTALGIA_EAMUSE_APP_ID: - news_posts= sorted(eamuse_app.parse_news_page(site_data, "NOSTALGIA_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) + news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "NOSTALGIA_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts) case constants.DANCE_RUSH_APP_ID: - news_posts= sorted(eamuse_app.parse_news_page(site_data, "DANCE_RUSH_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) + news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "DANCE_RUSH_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts) case constants.DANCE_AROUND_APP_ID: - news_posts= sorted(eamuse_app.parse_news_page(site_data, "DANCE_AROUND_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True) + news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "DANCE_AROUND_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts) case _: raise ValueError("Cannot find provided e-amuse app gameId", version) |
