diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-04-17 21:04:25 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-04-17 21:04:25 -0700 |
| commit | 9ec17b13c9b97febcde6c7b04ea57ec6a060b778 (patch) | |
| tree | 3967da44dd73695953502c6eb933c8a9fe28030d /bandai_namco/taiko.py | |
| parent | 4d84014f7c69e3a8074f47f2fd7688af90feeb01 (diff) | |
add support for Taiko no Tatsujin
Diffstat (limited to 'bandai_namco/taiko.py')
| -rw-r--r-- | bandai_namco/taiko.py | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/bandai_namco/taiko.py b/bandai_namco/taiko.py new file mode 100644 index 0000000..0aa2e0e --- /dev/null +++ b/bandai_namco/taiko.py @@ -0,0 +1,58 @@ +from bs4 import BeautifulSoup +from datetime import datetime +import time +import re + +def parse_taiko_blog_site(html: str) -> list: + base_url: str = "https://taiko-ch.net" + soup = BeautifulSoup(html, "html.parser") + + entries = [] + + for article in soup.select("article"): + try: + # Get date and timestamp + date_tag = article.select_one("p.entryDate") + if not date_tag: + continue + date_str = date_tag.text.strip() + date_obj = datetime.strptime(date_str, "%Y年%m月%d日") + timestamp = int(time.mktime(date_obj.timetuple())) + + # Get headline + headline_tag = article.select_one("h1") + headline = headline_tag.text.strip() if headline_tag else None + + # Get subheaders + content = [] + for div in article.find_all("div", style=re.compile(r"background:\s?#ff4500")): + title_text = div.get_text(strip=True).replace("■", "").strip() + if title_text: + content.append(f"• {title_text}") + + # Get images + images = [] + for img in article.find_all("img"): + img_url = img.get("src") or img.get("data-src") + if img_url: + if img_url.startswith("/"): + img_url = base_url + img_url + images.append({"image": img_url, "link": None}) + + entry = { + "date": date_str, + "identifier": "TAIKO", + "type": None, + "timestamp": timestamp, + "headline": headline, + "content": "\n".join(content), + "url": None, + "images": images + } + + entries.append(entry) + except Exception as e: + print(f"Error parsing article: {e}") + continue + + return entries |
