4 files changed, 168 insertions, 8 deletions
diff --git a/bandai_namco/wmmt.py b/bandai_namco/wmmt.py
index 86a2ce4..eaa4767 100644
--- a/bandai_namco/wmmt.py
+++ b/bandai_namco/wmmt.py
@@ -15,7 +15,15 @@ TYPE_MAP = {
     "Online Events Information": "EVENTS",
     "Update Information": "UPDATE",
     "Future Lab News": "FUTURE LAB",
-    "Special Contents": "SPECIAL"
+    "Special Contents": "SPECIAL",
+    "Navi Scratch-off Item": "NAVI-SCRATCH",
+    "News": "NEWS",
+    "オンラインイベント情報": "EVENTS",
+    "アップデート情報": "UPDATE",
+    "未来研通信": "FUTURE LAB",
+    "スペシャルコンテンツ": "SPECIAL",
+    "ナビスクラッチ配信アイテム": "NAVI-SCRATCH",
+    "ニュース": "NEWS"
 }
 
 def make_wmmt_parser(version: constants.WANGAN_MAXI_VERSION):
@@ -46,15 +54,72 @@ def make_wmmt_parser(version: constants.WANGAN_MAXI_VERSION):
                     "url": url,
                     "title": title,
                     "date": date,
-                    "type": TYPE_MAP[type_name]
+                    "type": TYPE_MAP.get(type_name, "Unknown")
                 })
                 count += 1
         return results
+
+    def six_rr_parser(html: str):
+        soup = BeautifulSoup(html, "html.parser")
+        results = []
+        for section in soup.select("div.parts_column_02 > div.parts_bg_01"):
+            type_heading = section.select_one("section h2.parts_txt_01")
+            type_name = type_heading.get_text(strip=True) if type_heading else None
+            count = 0
+            for a in section.select("ul.archiveNav a[href]"):
+                if count >= constants.WANGAN_MAXI_POSTS_PER_SECTION:
+                    break
+                href = a["href"]
+                date_tag = a.find("p")
+                title_tag = a.find("h4")
+                title = title_tag.get_text(strip=True) if title_tag else "No title"
+                date = date_tag.get_text(strip=True) if date_tag else "No date"
+                url = urljoin(BASE_URL, href)
+                url = url.replace(".php", ".html")
+                results.append({
+                    "url": url,
+                    "title": title,
+                    "date": date,
+                    "type": TYPE_MAP.get(type_name, "Unknown")
+                })
+                count += 1
+        return results
+
+    def six_rr_plus_parser(html: str):
+        soup = BeautifulSoup(html, "html.parser")
+        results = []
+        for section in soup.select("div.parts_column_02 > div.parts_bg_01"):
+            type_heading = section.select_one("section h2.parts_txt_01")
+            type_name = type_heading.get_text(strip=True) if type_heading else None
+            count = 0
+            for a in section.select("ul.archiveNav a[href]"):
+                if count >= constants.WANGAN_MAXI_POSTS_PER_SECTION:
+                    break
+                href = a["href"]
+                date_tag = a.find("p")
+                title_tag = a.find("h4")
+                title = title_tag.get_text(strip=True) if title_tag else "No title"
+                date = date_tag.get_text(strip=True) if date_tag else "No date"
+                url = urljoin(BASE_URL, href)
+                url = url.replace(".php", ".html")
+                results.append({
+                    "url": url,
+                    "title": title,
+                    "date": date,
+                    "type": TYPE_MAP.get(type_name, "Unknown")
+                })
+                count += 1
+        return results
+
     if version == constants.WANGAN_MAXI_VERSION.FIVE_DX_PLUS:
         return five_dx_plus_parser
+    elif version == constants.WANGAN_MAXI_VERSION.SIX_RR:
+        return six_rr_parser
+    elif version == constants.WANGAN_MAXI_VERSION.SIX_RR_PLUS:
+        return six_rr_plus_parser
 
 
-def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VERSION, internal_path: str):
+def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VERSION, internal_path: str, region_text: str):
     def five_dx_plus_extractor(html: str, data: dict):
         image_base = BASE_URL + "/" + internal_path
         soup = BeautifulSoup(html, "html.parser")
@@ -82,6 +147,67 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER
                 "image": img_url,
                 "link": urljoin(BASE_URL, parent.get("href")) if parent and parent.get("href") else None
             })
+        data["type"] = "["+region_text+"]" + " " + data["type"]
+        data["identifier"] = identifier
+        data["timestamp"] = timestamp
+        data["content"] = content
+        data["images"] = images
+        data["is_ai_summary"] = False
+        return data
+
+    def six_rr_extractor(html: str, data: dict):
+        image_base = BASE_URL + "/" + internal_path
+        soup = BeautifulSoup(html, "html.parser")
+        container = soup.select_one(".parts_column_02")
+        if not container:
+            return None
+        date_str = data["date"]
+        timestamp = int(datetime.strptime(date_str, "%Y/%m/%d").replace(tzinfo=timezone.utc).timestamp())
+        first_p = container.find("p")
+        content = first_p.get_text(" ", strip=True) if first_p else ""
+        images = []
+        for img in container.select("img"):
+            src = img.get("src")
+            if not src:
+                continue
+            src = src.replace("./", "").lstrip("/")
+            img_url = f"{image_base}/{src}"
+            parent = img.find_parent("a")
+            images.append({
+                "image": img_url,
+                "link": urljoin(BASE_URL, parent.get("href")) if parent and parent.get("href") else None
+            })
+        data["type"] = "["+region_text+"]" + " " + data["type"]
+        data["identifier"] = identifier
+        data["timestamp"] = timestamp
+        data["content"] = content
+        data["images"] = images
+        data["is_ai_summary"] = False
+        return data
+
+    def six_rr_plus_extractor(html: str, data: dict):
+        image_base = BASE_URL + "/" + internal_path
+        soup = BeautifulSoup(html, "html.parser")
+        container = soup.select_one(".parts_column_02")
+        if not container:
+            return None
+        date_str = data["date"]
+        timestamp = int(datetime.strptime(date_str, "%Y/%m/%d").replace(tzinfo=timezone.utc).timestamp())
+        first_p = container.find("p")
+        content = first_p.get_text(" ", strip=True) if first_p else ""
+        images = []
+        for img in container.select("img"):
+            src = img.get("src")
+            if not src:
+                continue
+            src = src.replace("./", "").lstrip("/")
+            img_url = f"{image_base}/{src}"
+            parent = img.find_parent("a")
+            images.append({
+                "image": img_url,
+                "link": urljoin(BASE_URL, parent.get("href")) if parent and parent.get("href") else None
+            })
+        data["type"] = "["+region_text+"]" + " " + data["type"]
         data["identifier"] = identifier
         data["timestamp"] = timestamp
         data["content"] = content
@@ -91,6 +217,14 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER
 
     if version == constants.WANGAN_MAXI_VERSION.FIVE_DX_PLUS:
         return five_dx_plus_extractor
+    elif version == constants.WANGAN_MAXI_VERSION.SIX_RR:
+        return six_rr_extractor
+    elif version == constants.WANGAN_MAXI_VERSION.SIX_RR_PLUS:
+        return six_rr_plus_extractor
 
 get_wmmt_na_news_post_links = make_wmmt_parser(constants.WANGAN_MAXI_VERSION.FIVE_DX_PLUS)
-parse_wmmt_na_news = make_wmmt_news_extractor("WANGAN_MAXI_NA", constants.WANGAN_MAXI_VERSION.FIVE_DX_PLUS, "wanganmaxi5dxplus/na")
+get_wmmt_asia_oce_news_post_links = make_wmmt_parser(constants.WANGAN_MAXI_VERSION.SIX_RR)
+get_wmmt_jp_news_post_links = make_wmmt_parser(constants.WANGAN_MAXI_VERSION.SIX_RR_PLUS)
+parse_wmmt_na_news = make_wmmt_news_extractor("WANGAN_MAXI_NA", constants.WANGAN_MAXI_VERSION.FIVE_DX_PLUS, "wanganmaxi5dxplus/na", "NA")
+parse_wmmt_asia_oce_news = make_wmmt_news_extractor("WANGAN_MAXI_ASIA_OCE", constants.WANGAN_MAXI_VERSION.SIX_RR, "wanganmaxi6rr/en", "ASIA/OCE")
+parse_wmmt_jp_news = make_wmmt_news_extractor("WANGAN_MAXI_JP", constants.WANGAN_MAXI_VERSION.SIX_RR_PLUS, "wanganmaxi6rrplus/jp", "JPN")
diff --git a/generate.py b/generate.py
index a6ef392..fec2295 100644
--- a/generate.py
+++ b/generate.py
@@ -128,6 +128,9 @@ def generate_music_diver_news_file():
 def generate_taiko_news_file():
     return generate_news_file("taiko_news", constants.TAIKO_BLOG_SITE)
 
+def generate_wmmt_news_file():
+    return generate_news_file("wmmt_news", constants.WANGAN_MAXI_GENERIC)
+
 def generate_wacca_plus_news_file():
     return generate_news_file("wacca_plus_news", constants.WACCA_PLUS_MAGIC_STRING)
 
@@ -162,6 +165,7 @@ if __name__ == "__main__":
     wacca_plus_news = generate_wacca_plus_news_file()
     museca_plus_news = generate_museca_plus_news_file()
     generate_rbdx_plus_news_file()
+    wmmt_news = generate_wmmt_news_file()
 
 
 
@@ -180,6 +184,7 @@ if __name__ == "__main__":
         chunithm_intl_news_data,
         music_diver_news_data,
         taiko_news_data,
+        wmmt_news,
         wacca_plus_news,
         museca_plus_news,
         polaris_news_data,
diff --git a/news_feed.py b/news_feed.py
index d621984..e6e125e 100644
--- a/news_feed.py
+++ b/news_feed.py
@@ -173,9 +173,28 @@ def get_news(news_url: str, version=None) -> list:
         prelim_na_news_data = wmmt.get_wmmt_na_news_post_links(na_site_data)
         for data in prelim_na_news_data:
             post_site_data = download_site_as_html(data["url"])
-            news_posts.append(wmmt.parse_wmmt_na_news(post_site_data, data))
-        print(news_posts)
-        exit()
+            news = wmmt.parse_wmmt_na_news(post_site_data, data)
+            if news is not None:
+                news_posts.append(news)
+        asia_oce_site_data = download_site_as_html(constants.WANGAN_MAXI_ASIA_OCE_NEWS_SITE)
+        prelim_asia_oce_news_data = wmmt.get_wmmt_asia_oce_news_post_links(asia_oce_site_data)
+        for data in prelim_asia_oce_news_data:
+            post_site_data = download_site_as_html(data["url"])
+            news = wmmt.parse_wmmt_asia_oce_news(post_site_data, data)
+            if news is not None:
+                news_posts.append(news)
+        jp_site_data = download_site_as_html(constants.WANGAN_MAXI_JP_NEWS_SITE, response_encoding="utf-8")
+        prelim_jp_news_data = wmmt.get_wmmt_jp_news_post_links(jp_site_data)
+        jp_news = []
+        for data in prelim_jp_news_data:
+            post_site_data = download_site_as_html(data["url"], response_encoding="utf-8")
+            news = wmmt.parse_wmmt_jp_news(post_site_data, data)
+            if news is not None:
+                jp_news.append(news)
+        jp_news = translate.add_translate_text_to_en(jp_news)
+        news_posts.extend(jp_news)
+        news_posts = sorted(news_posts, key=lambda x: x['timestamp'], reverse=True)
+        return news_posts
 
 
     elif news_url == constants.WACCA_PLUS_MAGIC_STRING:
diff --git a/site_scraper.py b/site_scraper.py
index 9efa4b6..e9301b5 100644
--- a/site_scraper.py
+++ b/site_scraper.py
@@ -68,7 +68,7 @@ class SiteScraper:
         print("WebDriver closed successfully")
 
 
-def download_site_as_html(url: str, timeout: int = 10) -> str:
+def download_site_as_html(url: str, timeout: int = 10, response_encoding=None) -> str:
     headers = {
         "User-Agent": (
             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
@@ -84,6 +84,8 @@ def download_site_as_html(url: str, timeout: int = 10) -> str:
 
     try:
         response = requests.get(url, headers=headers, timeout=timeout)
+        if response_encoding:
+            response.encoding = response_encoding
         response.raise_for_status()
         return response.text
     except requests.RequestException as e: