add maimai DX JPN scraping

author: Pinapelz <yukais@pinapelz.com> 2025-04-14 01:56:18 -0700
committer: Pinapelz <yukais@pinapelz.com> 2025-04-14 01:56:18 -0700
commit: dc279404b1f6e371d6d7acd1380a265762e60218 (patch)
tree: 85444616b0c98697bf060bb932557ae323a0c9ed
parent: 147c36d207ca74e876b6b4703fd3f57f3ab57e56 (diff)
6 files changed, 64 insertions, 6 deletions
diff --git a/constants.py b/constants.py
index 7025146..f1ff58c 100644
--- a/constants.py
+++ b/constants.py
@@ -7,7 +7,7 @@ SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE ="https://p.eagate.573.jp/game/sdvx/vi/news/i
 IIDX_PINKY_CRUSH_NEWS_SITE="https://p.eagate.573.jp/game/2dx/32/info/index.html"
 
 CHUNITHM_JP_NEWS_SITE="https://info-chunithm.sega.jp/"
-MAIMAIDX_NEWS_SITE="https://info-maimai.sega.jp/"
+MAIMAIDX_JP_NEWS_SITE="https://info-maimai.sega.jp/"
 
 class CHUNITHM_VERSION(Enum):
     VERSE = 1
diff --git a/generate.py b/generate.py
index 106f9bf..3e08d74 100644
--- a/generate.py
+++ b/generate.py
@@ -42,6 +42,10 @@ if __name__ == "__main__":
     with open(OUTPUT_DIR+'/chunithm_jp_news.json', 'w') as json_file:
         json.dump(attach_news_meta_data(chunithm_jp_news_data), json_file)
 
-    news = create_merged_feed(iidx_news_data, sdvx_news_data, chunithm_jp_news_data)
+    maimaidx_jp_news_data = feed.get_news(constants.MAIMAIDX_JP_NEWS_SITE, constants.MAIMAIDX_VERSION.PRISM_PLUS)
+    with open(OUTPUT_DIR+'/maimaidx_jp_news.json', 'w') as json_file:
+        json.dump(attach_news_meta_data(maimaidx_jp_news_data), json_file)
+
+    news = create_merged_feed(iidx_news_data, sdvx_news_data, chunithm_jp_news_data, maimaidx_jp_news_data)
     with open(OUTPUT_DIR+'/news.json', 'w') as json_file:
         json.dump(attach_news_meta_data(news), json_file)
diff --git a/news_feed.py b/news_feed.py
index 8bd1179..01ee3d1 100644
--- a/news_feed.py
+++ b/news_feed.py
@@ -21,6 +21,7 @@ from site_scraper import SiteScraper
 import bemani.sdvx as sound_voltex
 import bemani.iidx as iidx
 import sega.chuni_jp as chunithm_jp
+import sega.maimaidx_jp as maimaidx_jp
 import constants
 
 def get_news(news_url: str, version=None) -> list:
@@ -33,8 +34,9 @@ def get_news(news_url: str, version=None) -> list:
     elif news_url == constants.CHUNITHM_JP_NEWS_SITE:
         if version == constants.CHUNITHM_VERSION.VERSE:
             news_posts = sorted(chunithm_jp.parse_chuni_jp_verse_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
-    elif news_url == constants.MAIMAIDX_NEWS_SITE:
-        pass
+    elif news_url == constants.MAIMAIDX_JP_NEWS_SITE:
+        if version == constants.MAIMAIDX_VERSION.PRISM_PLUS:
+            news_posts = sorted(maimaidx_jp.parse_maimaidx_jp_prism_plus_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
     else:
         news_posts = []
     scraper.close()
diff --git a/sega/maimaidx_jp.py b/sega/maimaidx_jp.py
new file mode 100644
index 0000000..5a88ef1
--- /dev/null
+++ b/sega/maimaidx_jp.py
@@ -0,0 +1,48 @@
+from bs4 import BeautifulSoup
+from datetime import datetime, timezone, timedelta
+from urllib.parse import urljoin
+import re
+
+def parse_maimaidx_jp_prism_plus_news_site(html: str):
+    soup = BeautifulSoup(html, "html.parser")
+    base_url = "https://info-maimai.sega.jp/"
+    news_items = []
+
+    news_boxes = soup.select(".maiPager-content .newsBox")
+    for box in news_boxes:
+        a_tag = box.select_one("a")
+        url = urljoin(base_url, a_tag["href"]) if a_tag and a_tag.get("href") else None
+
+        img_tag = box.select_one("img")
+        image_url = urljoin(base_url, img_tag["src"]) if img_tag else None
+
+        date_tag = box.select_one(".newsDate")
+        raw_date = date_tag.get_text(strip=True) if date_tag else None
+
+        jst = timezone(timedelta(hours=9))
+        try:
+            dt = datetime.strptime(raw_date.split(" ")[0], "%Y.%m.%d").replace(tzinfo=jst)
+            timestamp = int(dt.timestamp())
+        except:
+            dt = None
+            timestamp = 0
+
+        headline_tag = box.select_one(".newsLink")
+        headline = headline_tag.get_text(strip=True) if headline_tag else None
+        content = box.get_text(separator="\n", strip=True)
+        identifier = re.sub(r"\W+", "-", headline.lower()) if headline else "unknown"
+        news_items.append({
+            "date": raw_date,
+            "identifier": identifier,
+            "type": None,
+            "timestamp": timestamp,
+            "headline": headline,
+            "content": content,
+            "url": url,
+            "images": [{
+                "image": image_url,
+                "link": url
+            }] if image_url else []
+        })
+
+    return news_items
diff --git a/site/src/components/NewsFeed.tsx b/site/src/components/NewsFeed.tsx
index f030200..99099be 100644
--- a/site/src/components/NewsFeed.tsx
+++ b/site/src/components/NewsFeed.tsx
@@ -132,5 +132,8 @@ function getGameName(identifier: string): string | null {
     else if(identifier.startsWith("CHUNITHM_JP")){
         return "CHUNITHM (JAPAN)";
     }
+    else if(identifier.startsWith("MAIMAIDX_JP")){
+      return "maimai DX (JAPAN)"
+    }
     return null;
 }
diff --git a/site/src/components/TitleBar.tsx b/site/src/components/TitleBar.tsx
index bf79191..d7cd1bf 100644
--- a/site/src/components/TitleBar.tsx
+++ b/site/src/components/TitleBar.tsx
@@ -21,7 +21,8 @@ const TitleBar: React.FC = () => {
         {
             name: "SEGA",
             games: [
-                { id: "chunithm_jp", title: "CHUNITHM JPN" },
+                { id: "chunithm_jp", title: "CHUNITHM (JPN)" },
+                { id: "chunithm_jp", title: "maimai DX (JPN)" },
             ]
         }
     ];
@@ -116,4 +117,4 @@ const TitleBar: React.FC = () => {
     );
 };
 
-export default TitleBar;
-\ No newline at end of file
+export default TitleBar;
author	Pinapelz <yukais@pinapelz.com>	2025-04-14 01:56:18 -0700
committer	Pinapelz <yukais@pinapelz.com>	2025-04-14 01:56:18 -0700
commit	dc279404b1f6e371d6d7acd1380a265762e60218 (patch)
tree	85444616b0c98697bf060bb932557ae323a0c9ed
parent	147c36d207ca74e876b6b4703fd3f57f3ab57e56 (diff)