From 0687ba184d1baec0a38fa9bf58a8f015febb6262 Mon Sep 17 00:00:00 2001 From: Pinapelz Date: Thu, 17 Apr 2025 00:06:10 -0700 Subject: fix: utils frontend str mapping func --- bemani/ddr.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'bemani/ddr.py') diff --git a/bemani/ddr.py b/bemani/ddr.py index 947728c..ee8b659 100644 --- a/bemani/ddr.py +++ b/bemani/ddr.py @@ -10,20 +10,35 @@ def parse_ddr_world_news_site(html: str): news_entries = [] for div in soup.select("div#info > div.news_one"): +<<<<<<< Updated upstream if 'none' in div.get('style', ''): continue +======= + # Skip hidden entries + style = div.get('style', '') + if 'none' in style: + continue + +>>>>>>> Stashed changes title_tag = div.select_one("div.news_title > div.title") date_tag = div.select_one("div.news_title > div.date") headline = title_tag.get_text(strip=True) if title_tag else None date_str = date_tag.get_text(strip=True) if date_tag else None +<<<<<<< Updated upstream +======= + # Parse date +>>>>>>> Stashed changes try: dt = datetime.strptime(date_str, "%Y/%m/%d") date_iso = dt.strftime("%Y-%m-%d") timestamp = int(time.mktime(dt.timetuple())) except Exception: date_iso, timestamp = None, None +<<<<<<< Updated upstream +======= +>>>>>>> Stashed changes paras = [p.get_text(strip=True, separator="\n") for p in div.find_all("p", recursive=False)] if not paras: @@ -35,10 +50,19 @@ def parse_ddr_world_news_site(html: str): paras.append(child.get_text(strip=True, separator="\n")) content = "\n\n".join(paras) if paras else None +<<<<<<< Updated upstream # image (use data-src if present) img = div.select_one("div.img_news_center img") raw_src = img.get("data-src") or img.get("src") if img else None image_url = urljoin(base_url, raw_src) if raw_src else None +======= + images = [] + for img in div.select("div.img_news_center img"): + raw_src = img.get("data-src") or img.get("src") + if raw_src: + full_url = urljoin(base_url, raw_src) + images.append({"image": full_url, "link": None}) +>>>>>>> Stashed changes news_entries.append({ "date": date_iso, @@ -48,9 +72,15 @@ def parse_ddr_world_news_site(html: str): "headline": headline, "content": content, "url": base_url, +<<<<<<< Updated upstream "images": { "image": image_url, "link": None } }) +======= + "images": images + }) + +>>>>>>> Stashed changes return news_entries -- cgit v1.2.3