diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-05-15 01:24:08 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-05-15 01:40:47 -0700 |
| commit | d4208f2765b905834ad17b9008f769b4a3bfc1be (patch) | |
| tree | 93b038ec99cffd7f49ab88ad341b3797aa5ebbec | |
| parent | 98752471904a0f750d37e390160cfaea58437a96 (diff) | |
wmmt: extend content field if too short
| -rw-r--r-- | bandai_namco/wmmt.py | 31 | ||||
| -rw-r--r-- | generate.py | 1 |
2 files changed, 25 insertions, 7 deletions
diff --git a/bandai_namco/wmmt.py b/bandai_namco/wmmt.py index 1d5ea66..9ce2147 100644 --- a/bandai_namco/wmmt.py +++ b/bandai_namco/wmmt.py @@ -128,8 +128,13 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER return None date_str = data["date"] timestamp = int(datetime.strptime(date_str, "%Y/%m/%d").replace(tzinfo=timezone.utc).timestamp()) - first_p = container.find("p") - content = first_p.get_text(" ", strip=True) if first_p else "" + content = "" + paragraphs = container.find_all("p") + if paragraphs: + content = paragraphs[0].get_text(" ", strip=True) + if content and len(content.split()) < 10 and len(paragraphs) > 1: + next_p_content = paragraphs[1].get_text(" ", strip=True) + content += " " + next_p_content images = [] for img in container.find_all("img"): src = img.get("src").replace("./","") @@ -139,6 +144,8 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER src = "special/" + src elif data["type"] == "FUTURE LAB": src = "miraiken/" + src + elif data["type"] == "NAVI-SCRATCH": + src = "navi/" elif data["type"] == "UPDATE": src = "update/" + src img_url = image_base + "/" + src if src else None @@ -163,8 +170,12 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER return None date_str = data["date"] timestamp = int(datetime.strptime(date_str, "%Y/%m/%d").replace(tzinfo=timezone.utc).timestamp()) - first_p = container.find("p") - content = first_p.get_text(" ", strip=True) if first_p else "" + paragraphs = container.find_all("p") + if paragraphs: + content = paragraphs[0].get_text(" ", strip=True) + if content and len(content.split()) < 10 and len(paragraphs) > 1: + next_p_content = paragraphs[1].get_text(" ", strip=True) + content += " " + next_p_content images = [] for img in container.select("img"): src = img.get("src").replace("./","").lstrip("/") @@ -176,6 +187,8 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER src = "special/" + src elif data["type"] == "FUTURE LAB": src = "miraiken/" + src + elif data["type"] == "NAVI-SCRATCH": + src = "navi/" elif data["type"] == "UPDATE": src = "update/" + src src = src.replace("./", "").lstrip("/") @@ -201,8 +214,12 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER return None date_str = data["date"] timestamp = int(datetime.strptime(date_str, "%Y/%m/%d").replace(tzinfo=timezone.utc).timestamp()) - first_p = container.find("p") - content = first_p.get_text(" ", strip=True) if first_p else "" + paragraphs = container.find_all("p") + if paragraphs: + content = paragraphs[0].get_text(" ", strip=True) + if content and len(content.split()) < 10 and len(paragraphs) > 1: + next_p_content = paragraphs[1].get_text(" ", strip=True) + content += " " + next_p_content images = [] for img in container.select("img"): src = img.get("src").replace("./","").lstrip("/") @@ -212,6 +229,8 @@ def make_wmmt_news_extractor(identifier: str, version: constants.WANGAN_MAXI_VER src = "event/online/" + src elif data["type"] == "SPECIAL": src = "special/" + src + elif data["type"] == "NAVI-SCRATCH": + src = "navi/" elif data["type"] == "FUTURE LAB": src = "miraiken/" + src elif data["type"] == "UPDATE": diff --git a/generate.py b/generate.py index fec2295..a554e8e 100644 --- a/generate.py +++ b/generate.py @@ -184,7 +184,6 @@ if __name__ == "__main__": chunithm_intl_news_data, music_diver_news_data, taiko_news_data, - wmmt_news, wacca_plus_news, museca_plus_news, polaris_news_data, |
