aboutsummaryrefslogtreecommitdiffstats
path: root/bandai_namco
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-05-27 15:54:20 -0700
committerPinapelz <yukais@pinapelz.com>2025-05-27 15:54:20 -0700
commit8d4a3eeb3a68e39301caec1b2289783bd2bf7b6d (patch)
tree8c9d9e950d2829081858507ae1b079d80aaab481 /bandai_namco
parentbf38127abdaf5323391ae00f5134020ebb01c907 (diff)
wmmt: continue adding text until minimum char length is exceeded
Diffstat (limited to 'bandai_namco')
-rw-r--r--bandai_namco/wmmt.py21
1 files changed, 15 insertions, 6 deletions
diff --git a/bandai_namco/wmmt.py b/bandai_namco/wmmt.py
index fc0e4bf..b7ea927 100644
--- a/bandai_namco/wmmt.py
+++ b/bandai_namco/wmmt.py
@@ -144,8 +144,11 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER
if paragraphs:
content = paragraphs[0].get_text(" ", strip=True)
if content and len(content.split()) < 50 and len(paragraphs) > 1:
- next_p_content = paragraphs[1].get_text(" ", strip=True)
- content += " " + next_p_content
+ for paragraph in paragraphs[1:]:
+ next_p_content = paragraph.get_text(" ", strip=True)
+ content += " " + next_p_content
+ if len(content.split()) >= 50:
+ break
images = []
seen_srcs = []
for img in container.find_all("img"):
@@ -178,8 +181,11 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER
if paragraphs:
content = paragraphs[0].get_text(" ", strip=True)
if content and len(content.split()) < 50 and len(paragraphs) > 1:
- next_p_content = paragraphs[1].get_text(" ", strip=True)
- content += " " + next_p_content
+ for paragraph in paragraphs[1:]:
+ next_p_content = paragraph.get_text(" ", strip=True)
+ content += " " + next_p_content
+ if len(content.split()) >= 50:
+ break
images = []
seen_srcs = []
for img in container.select("img"):
@@ -214,8 +220,11 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER
if paragraphs:
content = paragraphs[0].get_text(" ", strip=True)
if content and len(content.split()) < 50 and len(paragraphs) > 1:
- next_p_content = paragraphs[1].get_text(" ", strip=True)
- content += " " + next_p_content
+ for paragraph in paragraphs[1:]:
+ next_p_content = paragraph.get_text(" ", strip=True)
+ content += " " + next_p_content
+ if len(content.split()) >= 50:
+ break
images = []
seen_srcs = []
for img in container.select("img"):
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage