wmmt: continue adding text until minimum char length is exceeded

author: Pinapelz <yukais@pinapelz.com> 2025-05-27 15:54:20 -0700
committer: Pinapelz <yukais@pinapelz.com> 2025-05-27 15:54:20 -0700
commit: 8d4a3eeb3a68e39301caec1b2289783bd2bf7b6d (patch)
tree: 8c9d9e950d2829081858507ae1b079d80aaab481 /bandai_namco
parent: bf38127abdaf5323391ae00f5134020ebb01c907 (diff)
1 files changed, 15 insertions, 6 deletions
diff --git a/bandai_namco/wmmt.py b/bandai_namco/wmmt.py
index fc0e4bf..b7ea927 100644
--- a/bandai_namco/wmmt.py
+++ b/bandai_namco/wmmt.py
@@ -144,8 +144,11 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER
         if paragraphs:
             content = paragraphs[0].get_text(" ", strip=True)
             if content and len(content.split()) < 50 and len(paragraphs) > 1:
-                next_p_content = paragraphs[1].get_text(" ", strip=True)
-                content += " " + next_p_content
+                for paragraph in paragraphs[1:]:
+                    next_p_content = paragraph.get_text(" ", strip=True)
+                    content += " " + next_p_content
+                    if len(content.split()) >= 50:
+                        break
         images = []
         seen_srcs = []
         for img in container.find_all("img"):
@@ -178,8 +181,11 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER
         if paragraphs:
             content = paragraphs[0].get_text(" ", strip=True)
             if content and len(content.split()) < 50 and len(paragraphs) > 1:
-                next_p_content = paragraphs[1].get_text(" ", strip=True)
-                content += " " + next_p_content
+                for paragraph in paragraphs[1:]:
+                    next_p_content = paragraph.get_text(" ", strip=True)
+                    content += " " + next_p_content
+                    if len(content.split()) >= 50:
+                        break
         images = []
         seen_srcs = []
         for img in container.select("img"):
@@ -214,8 +220,11 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER
         if paragraphs:
             content = paragraphs[0].get_text(" ", strip=True)
             if content and len(content.split()) < 50 and len(paragraphs) > 1:
-                next_p_content = paragraphs[1].get_text(" ", strip=True)
-                content += " " + next_p_content
+                for paragraph in paragraphs[1:]:
+                    next_p_content = paragraph.get_text(" ", strip=True)
+                    content += " " + next_p_content
+                    if len(content.split()) >= 50:
+                        break
         images = []
         seen_srcs = []
         for img in container.select("img"):
author	Pinapelz <yukais@pinapelz.com>	2025-05-27 15:54:20 -0700
committer	Pinapelz <yukais@pinapelz.com>	2025-05-27 15:54:20 -0700
commit	8d4a3eeb3a68e39301caec1b2289783bd2bf7b6d (patch)
tree	8c9d9e950d2829081858507ae1b079d80aaab481 /bandai_namco
parent	bf38127abdaf5323391ae00f5134020ebb01c907 (diff)