add support for Taiko no Tatsujin

author: Pinapelz <yukais@pinapelz.com> 2025-04-17 21:04:25 -0700
committer: Pinapelz <yukais@pinapelz.com> 2025-04-17 21:04:25 -0700
commit: 9ec17b13c9b97febcde6c7b04ea57ec6a060b778 (patch)
tree: 3967da44dd73695953502c6eb933c8a9fe28030d /bandai_namco/taiko.py
parent: 4d84014f7c69e3a8074f47f2fd7688af90feeb01 (diff)
1 files changed, 58 insertions, 0 deletions
diff --git a/bandai_namco/taiko.py b/bandai_namco/taiko.py
new file mode 100644
index 0000000..0aa2e0e
--- /dev/null
+++ b/bandai_namco/taiko.py
@@ -0,0 +1,58 @@
+from bs4 import BeautifulSoup
+from datetime import datetime
+import time
+import re
+
+def parse_taiko_blog_site(html: str) -> list:
+    base_url: str = "https://taiko-ch.net"
+    soup = BeautifulSoup(html, "html.parser")
+
+    entries = []
+
+    for article in soup.select("article"):
+        try:
+            # Get date and timestamp
+            date_tag = article.select_one("p.entryDate")
+            if not date_tag:
+                continue
+            date_str = date_tag.text.strip()
+            date_obj = datetime.strptime(date_str, "%Y年%m月%d日")
+            timestamp = int(time.mktime(date_obj.timetuple()))
+
+            # Get headline
+            headline_tag = article.select_one("h1")
+            headline = headline_tag.text.strip() if headline_tag else None
+
+            # Get subheaders
+            content = []
+            for div in article.find_all("div", style=re.compile(r"background:\s?#ff4500")):
+                title_text = div.get_text(strip=True).replace("■", "").strip()
+                if title_text:
+                    content.append(f"• {title_text}")
+
+            # Get images
+            images = []
+            for img in article.find_all("img"):
+                img_url = img.get("src") or img.get("data-src")
+                if img_url:
+                    if img_url.startswith("/"):
+                        img_url = base_url + img_url
+                    images.append({"image": img_url, "link": None})
+
+            entry = {
+                "date": date_str,
+                "identifier": "TAIKO",
+                "type": None,
+                "timestamp": timestamp,
+                "headline": headline,
+                "content": "\n".join(content),
+                "url": None,
+                "images": images
+            }
+
+            entries.append(entry)
+        except Exception as e:
+            print(f"Error parsing article: {e}")
+            continue
+
+    return entries
author	Pinapelz <yukais@pinapelz.com>	2025-04-17 21:04:25 -0700
committer	Pinapelz <yukais@pinapelz.com>	2025-04-17 21:04:25 -0700
commit	9ec17b13c9b97febcde6c7b04ea57ec6a060b778 (patch)
tree	3967da44dd73695953502c6eb933c8a9fe28030d /bandai_namco/taiko.py
parent	4d84014f7c69e3a8074f47f2fd7688af90feeb01 (diff)