diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-04-14 01:56:18 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-04-14 01:56:18 -0700 |
| commit | dc279404b1f6e371d6d7acd1380a265762e60218 (patch) | |
| tree | 85444616b0c98697bf060bb932557ae323a0c9ed | |
| parent | 147c36d207ca74e876b6b4703fd3f57f3ab57e56 (diff) | |
add maimai DX JPN scraping
| -rw-r--r-- | constants.py | 2 | ||||
| -rw-r--r-- | generate.py | 6 | ||||
| -rw-r--r-- | news_feed.py | 6 | ||||
| -rw-r--r-- | sega/maimaidx_jp.py | 48 | ||||
| -rw-r--r-- | site/src/components/NewsFeed.tsx | 3 | ||||
| -rw-r--r-- | site/src/components/TitleBar.tsx | 5 |
6 files changed, 64 insertions, 6 deletions
diff --git a/constants.py b/constants.py index 7025146..f1ff58c 100644 --- a/constants.py +++ b/constants.py @@ -7,7 +7,7 @@ SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE ="https://p.eagate.573.jp/game/sdvx/vi/news/i IIDX_PINKY_CRUSH_NEWS_SITE="https://p.eagate.573.jp/game/2dx/32/info/index.html" CHUNITHM_JP_NEWS_SITE="https://info-chunithm.sega.jp/" -MAIMAIDX_NEWS_SITE="https://info-maimai.sega.jp/" +MAIMAIDX_JP_NEWS_SITE="https://info-maimai.sega.jp/" class CHUNITHM_VERSION(Enum): VERSE = 1 diff --git a/generate.py b/generate.py index 106f9bf..3e08d74 100644 --- a/generate.py +++ b/generate.py @@ -42,6 +42,10 @@ if __name__ == "__main__": with open(OUTPUT_DIR+'/chunithm_jp_news.json', 'w') as json_file: json.dump(attach_news_meta_data(chunithm_jp_news_data), json_file) - news = create_merged_feed(iidx_news_data, sdvx_news_data, chunithm_jp_news_data) + maimaidx_jp_news_data = feed.get_news(constants.MAIMAIDX_JP_NEWS_SITE, constants.MAIMAIDX_VERSION.PRISM_PLUS) + with open(OUTPUT_DIR+'/maimaidx_jp_news.json', 'w') as json_file: + json.dump(attach_news_meta_data(maimaidx_jp_news_data), json_file) + + news = create_merged_feed(iidx_news_data, sdvx_news_data, chunithm_jp_news_data, maimaidx_jp_news_data) with open(OUTPUT_DIR+'/news.json', 'w') as json_file: json.dump(attach_news_meta_data(news), json_file) diff --git a/news_feed.py b/news_feed.py index 8bd1179..01ee3d1 100644 --- a/news_feed.py +++ b/news_feed.py @@ -21,6 +21,7 @@ from site_scraper import SiteScraper import bemani.sdvx as sound_voltex import bemani.iidx as iidx import sega.chuni_jp as chunithm_jp +import sega.maimaidx_jp as maimaidx_jp import constants def get_news(news_url: str, version=None) -> list: @@ -33,8 +34,9 @@ def get_news(news_url: str, version=None) -> list: elif news_url == constants.CHUNITHM_JP_NEWS_SITE: if version == constants.CHUNITHM_VERSION.VERSE: news_posts = sorted(chunithm_jp.parse_chuni_jp_verse_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) - elif news_url == constants.MAIMAIDX_NEWS_SITE: - pass + elif news_url == constants.MAIMAIDX_JP_NEWS_SITE: + if version == constants.MAIMAIDX_VERSION.PRISM_PLUS: + news_posts = sorted(maimaidx_jp.parse_maimaidx_jp_prism_plus_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) else: news_posts = [] scraper.close() diff --git a/sega/maimaidx_jp.py b/sega/maimaidx_jp.py new file mode 100644 index 0000000..5a88ef1 --- /dev/null +++ b/sega/maimaidx_jp.py @@ -0,0 +1,48 @@ +from bs4 import BeautifulSoup +from datetime import datetime, timezone, timedelta +from urllib.parse import urljoin +import re + +def parse_maimaidx_jp_prism_plus_news_site(html: str): + soup = BeautifulSoup(html, "html.parser") + base_url = "https://info-maimai.sega.jp/" + news_items = [] + + news_boxes = soup.select(".maiPager-content .newsBox") + for box in news_boxes: + a_tag = box.select_one("a") + url = urljoin(base_url, a_tag["href"]) if a_tag and a_tag.get("href") else None + + img_tag = box.select_one("img") + image_url = urljoin(base_url, img_tag["src"]) if img_tag else None + + date_tag = box.select_one(".newsDate") + raw_date = date_tag.get_text(strip=True) if date_tag else None + + jst = timezone(timedelta(hours=9)) + try: + dt = datetime.strptime(raw_date.split(" ")[0], "%Y.%m.%d").replace(tzinfo=jst) + timestamp = int(dt.timestamp()) + except: + dt = None + timestamp = 0 + + headline_tag = box.select_one(".newsLink") + headline = headline_tag.get_text(strip=True) if headline_tag else None + content = box.get_text(separator="\n", strip=True) + identifier = re.sub(r"\W+", "-", headline.lower()) if headline else "unknown" + news_items.append({ + "date": raw_date, + "identifier": identifier, + "type": None, + "timestamp": timestamp, + "headline": headline, + "content": content, + "url": url, + "images": [{ + "image": image_url, + "link": url + }] if image_url else [] + }) + + return news_items diff --git a/site/src/components/NewsFeed.tsx b/site/src/components/NewsFeed.tsx index f030200..99099be 100644 --- a/site/src/components/NewsFeed.tsx +++ b/site/src/components/NewsFeed.tsx @@ -132,5 +132,8 @@ function getGameName(identifier: string): string | null { else if(identifier.startsWith("CHUNITHM_JP")){ return "CHUNITHM (JAPAN)"; } + else if(identifier.startsWith("MAIMAIDX_JP")){ + return "maimai DX (JAPAN)" + } return null; } diff --git a/site/src/components/TitleBar.tsx b/site/src/components/TitleBar.tsx index bf79191..d7cd1bf 100644 --- a/site/src/components/TitleBar.tsx +++ b/site/src/components/TitleBar.tsx @@ -21,7 +21,8 @@ const TitleBar: React.FC = () => { { name: "SEGA", games: [ - { id: "chunithm_jp", title: "CHUNITHM JPN" }, + { id: "chunithm_jp", title: "CHUNITHM (JPN)" }, + { id: "chunithm_jp", title: "maimai DX (JPN)" }, ] } ]; @@ -116,4 +117,4 @@ const TitleBar: React.FC = () => { ); }; -export default TitleBar;
\ No newline at end of file +export default TitleBar; |
