aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-04-17 12:08:12 -0700
committerPinapelz <yukais@pinapelz.com>2025-04-17 12:08:12 -0700
commitd6c2824bc67cfb708763fba9412c6610d9cd05bb (patch)
tree8914d0f0ba6e8039fa3ac9a59856ac01c8645094
parent9059bca2439f3c5f2f91e4bd8d97a0da184be393 (diff)
add generic parser for eamusement games
information is different and some is better suited for our format than others
-rw-r--r--constants.py4
-rw-r--r--generate.py21
-rw-r--r--konami/eamuse_app.py51
-rw-r--r--news_feed.py21
4 files changed, 89 insertions, 8 deletions
diff --git a/constants.py b/constants.py
index ef92020..35f1fc8 100644
--- a/constants.py
+++ b/constants.py
@@ -6,6 +6,10 @@ SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE ="https://p.eagate.573.jp/game/sdvx/vi/news/i
IIDX_PINKY_CRUSH_NEWS_SITE="https://p.eagate.573.jp/game/2dx/32/info/index.html"
DDR_WORLD_NEWS_SITE="https://p.eagate.573.jp/game/ddr/ddrworld/info/index.html"
+EAMUSE_APP_FEED="https://eam.573.jp/app/web/post/official"
+IIDX_EAMUSE_APP_ID="s8svjrq62x592gvb"
+DDR_EAMUSE_APP_ID="aegmtuzekqik0eyf"
+
CHUNITHM_JP_NEWS_SITE="https://info-chunithm.sega.jp/"
CHUNITHM_INTL_NEWS_SITE="https://info-chunithm.sega.com/"
MAIMAIDX_JP_NEWS_SITE="https://info-maimai.sega.jp/"
diff --git a/generate.py b/generate.py
index 46b689d..eada9b3 100644
--- a/generate.py
+++ b/generate.py
@@ -3,6 +3,7 @@ Generates news JSON files
Generally you're expected to update the game versions manually
as for most games you only ever want the latest version (supported) of the game
"""
+from ast import Constant
import news_feed as feed
import constants
import json
@@ -59,14 +60,22 @@ def generate_news_file(filename, url, version=None):
print(f"Failed. Couldn't fetch {filename.upper()} data. Skipping...", "NEWS")
return news_data
-def generate_iidx_news_file():
- return generate_news_file("iidx_news", constants.IIDX_PINKY_CRUSH_NEWS_SITE)
+# For e-amusement games you can choose to pull from a specific implementation of the scraper or the generic feed provided
+# by the e-amusement app. Information is different
+def generate_iidx_news_file(eamuse_feed: bool=False):
+ if eamuse_feed:
+ return generate_news_file("iidx_news", constants.EAMUSE_APP_FEED, constants.IIDX_EAMUSE_APP_ID)
+ else:
+ return generate_news_file("iidx_news", constants.IIDX_PINKY_CRUSH_NEWS_SITE)
def generate_sdvx_news_file():
return generate_news_file("sdvx_news", constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE)
-def generate_ddr_news_file():
- return generate_news_file("ddr_news", constants.DDR_WORLD_NEWS_SITE)
+def generate_ddr_news_file(eamuse_feed: bool=False):
+ if eamuse_feed:
+ return generate_news_file("ddr_news", constants.EAMUSE_APP_FEED, constants.DDR_EAMUSE_APP_ID)
+ else:
+ return generate_news_file("ddr_news", constants.DDR_WORLD_NEWS_SITE)
def generate_chunithm_jp_news_file():
return generate_news_file("chunithm_jp_news", constants.CHUNITHM_JP_NEWS_SITE, constants.CHUNITHM_VERSION.VERSE)
@@ -89,9 +98,9 @@ if __name__ == "__main__":
log_output(f"{OUTPUT_DIR} was not found. Creating this directory...")
os.makedirs(OUTPUT_DIR)
- iidx_news_data = generate_iidx_news_file()
+ iidx_news_data = generate_iidx_news_file(eamuse_feed=True)
sdvx_news_data = generate_sdvx_news_file()
- ddr_news_data = generate_ddr_news_file()
+ ddr_news_data = generate_ddr_news_file(eamuse_feed=True)
chunithm_jp_news_data = generate_chunithm_jp_news_file()
maimaidx_jp_news_data = generate_maimaidx_jp_news_file()
ongeki_jp_news_data = generate_ongeki_jp_news_file()
diff --git a/konami/eamuse_app.py b/konami/eamuse_app.py
new file mode 100644
index 0000000..b552477
--- /dev/null
+++ b/konami/eamuse_app.py
@@ -0,0 +1,51 @@
+from bs4 import BeautifulSoup
+from datetime import datetime
+from urllib.parse import urljoin
+import time
+
+BASE_URL = "https://eam.573.jp"
+
+def parse_news_page(html: str, identifier: str):
+ soup = BeautifulSoup(html, "html.parser")
+ entries = []
+
+ for li in soup.select("ul > li.ef"):
+ a_tag = li.find("a", href=True)
+ url = urljoin(BASE_URL, a_tag["href"]) if a_tag else None
+
+ date_text = li.select_one(".post-date")
+ if not date_text:
+ continue
+ raw_date = date_text.get_text(strip=True).replace("年", "/").replace("月", "/").replace("日", "")
+ try:
+ date_obj = datetime.strptime(raw_date, "%Y/%m/%d")
+ except ValueError:
+ continue
+ date_str = date_obj.strftime("%Y-%m-%d")
+ timestamp = int(time.mktime(date_obj.timetuple()))
+
+ content_tag = li.select_one(".article-text")
+ content = content_tag.get_text(strip=True) if content_tag else None
+
+ img_tag = li.select_one(".article-img img")
+ image_url = img_tag["src"] if img_tag else None
+ images = []
+ if image_url:
+ images.append({
+ "image": image_url,
+ "link": url
+ })
+
+ entry = {
+ "date": date_str,
+ "identifier": identifier,
+ "type": None,
+ "timestamp": timestamp,
+ "headline": None,
+ "content": content,
+ "url": url,
+ "images": images
+ }
+ entries.append(entry)
+
+ return entries
diff --git a/news_feed.py b/news_feed.py
index 75a3678..43be096 100644
--- a/news_feed.py
+++ b/news_feed.py
@@ -8,15 +8,18 @@ Generic format for a news entry. All keys are considered to be nullable
'headline': Headline,
'content': All text content of news,
'url': URL to full post if available,
- 'images': {
+ 'images': [
+ {
'image': URL to image,
'link': If there's an associated href. Else None
+ }
- }
+ ]
}
"""
from site_scraper import SiteScraper, download_site_as_html
+import konami.eamuse_app as eamuse_app
import bemani.sdvx as sound_voltex
import bemani.iidx as iidx
import bemani.ddr as ddr
@@ -39,6 +42,20 @@ def get_news(news_url: str, version=None) -> list:
news_posts = sorted(iidx.parse_pinky_crush_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL)
+ elif news_url == constants.EAMUSE_APP_FEED:
+ scraper = SiteScraper(headless=True)
+ site_data = scraper.get_page_source(news_url+"/?uuid_to="+version)
+ scraper.close()
+ match version:
+ case constants.IIDX_EAMUSE_APP_ID:
+ news_posts= sorted(eamuse_app.parse_news_page(site_data, "IIDX_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True)
+ news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL)
+ case constants.DDR_EAMUSE_APP_ID:
+ news_posts= sorted(eamuse_app.parse_news_page(site_data, "DDR_EAMUSEMENT"), key=lambda x: x['timestamp'], reverse=True)
+ news_posts = translate.add_translate_text_to_en(news_posts)
+ case _:
+ raise ValueError("Cannot find provided e-amuse app gameId", version)
+
elif news_url == constants.DDR_WORLD_NEWS_SITE:
scraper = SiteScraper(headless=True)
site_data = scraper.get_page_source(news_url)
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage