aboutsummaryrefslogtreecommitdiffstats
path: root/bemani
diff options
context:
space:
mode:
Diffstat (limited to 'bemani')
-rw-r--r--bemani/__init__.py7
-rw-r--r--bemani/ddr.py63
-rw-r--r--bemani/iidx.py68
3 files changed, 7 insertions, 131 deletions
diff --git a/bemani/__init__.py b/bemani/__init__.py
new file mode 100644
index 0000000..f16ed0a
--- /dev/null
+++ b/bemani/__init__.py
@@ -0,0 +1,7 @@
+from bemani.sdvx import parse_exceed_gear_news_site
+from bemani.polaris_chord import parse_polaris_chord_news_site
+
+__all__ = [
+ "parse_exceed_gear_news_site",
+ "parse_polaris_chord_news_site",
+]
diff --git a/bemani/ddr.py b/bemani/ddr.py
deleted file mode 100644
index b5ae93c..0000000
--- a/bemani/ddr.py
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-Currently unused as e-eamusement app feed is favored. Here for archival purposes
-"""
-from bs4 import BeautifulSoup
-from datetime import datetime
-from urllib.parse import urljoin
-import time
-
-def parse_ddr_world_news_site(html: str):
- base_url = "https://p.eagate.573.jp"
- soup = BeautifulSoup(html, 'html.parser')
- news_entries = []
-
- for div in soup.select("div#info > div.news_one"):
- style = div.get('style', '')
- if 'none' in style:
- continue
-
- title_tag = div.select_one("div.news_title > div.title")
- date_tag = div.select_one("div.news_title > div.date")
- headline = title_tag.get_text(strip=True) if title_tag else None
- date_str = date_tag.get_text(strip=True) if date_tag else None
-
- try:
- dt = datetime.strptime(date_str, "%Y/%m/%d")
- date_iso = dt.strftime("%Y-%m-%d")
- timestamp = int(time.mktime(dt.timetuple()))
- except Exception:
- date_iso, timestamp = None, None
- paras = [p.get_text(strip=True, separator="\n\n")
- for p in div.find_all("p", recursive=False)]
- if not paras:
- for child in div.find_all(recursive=False):
- cls = child.get("class", [])
- if "news_title" in cls or "img_news_center" in cls:
- continue
- if child.name == "div":
- paras.append(child.get_text(strip=True, separator="\n\n"))
-
- content = "\n\n\n".join(paras) if paras else None
- if content:
- content = f"\n{content}\n"
-
- images = []
- for img in div.select("div.img_news_center img"):
- raw_src = img.get("data-src") or img.get("src")
- if raw_src:
- full_url = urljoin(base_url, raw_src)
- images.append({"image": full_url, "link": None})
-
- news_entries.append({
- "date": date_iso,
- "identifier": "DDR",
- "type": None,
- "timestamp": timestamp,
- "headline": headline,
- "content": content,
- "url": None,
- "images": images,
- 'is_ai_summary': False
- })
-
- return news_entries
diff --git a/bemani/iidx.py b/bemani/iidx.py
deleted file mode 100644
index de7f34c..0000000
--- a/bemani/iidx.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from bs4 import BeautifulSoup
-from datetime import datetime
-from urllib.parse import urljoin
-import re
-
-KEY_TERMS_TL = [
- ("クプロ", "QPro")
-]
-
-# Legacy code. e-amuse feed provides better data
-def parse_pinky_crush_news_site(html: str):
- base_url = "https://p.eagate.573.jp"
- type_map = {
- "i_01": "NEWSONG",
- "i_02": "RANKING",
- "i_03": "EVENT",
- "i_04": "SHOP",
- "i_05": "OTHER"
- }
- soup = BeautifulSoup(html, "html.parser")
- news_items = []
-
- for li in soup.select("#info-news > li"):
- date_elem = li.select_one(".news-main > li:nth-of-type(1)")
- headline_elem = li.select_one(".news-main > li:nth-of-type(2)")
- content_elem = li.select_one(".news-main > li:nth-of-type(3)")
- type_class = li.get("class", [None])[0]
- if not (date_elem and content_elem):
- continue
- date_str = date_elem.text.strip()
- try:
- dt = datetime.strptime(date_str, "%Y/%m/%d")
- timestamp = int(dt.timestamp())
- except ValueError:
- timestamp = None
-
- headline = headline_elem.a.text.strip() if headline_elem.a else headline_elem.text.strip()
-
- for a in content_elem.select("a[href]"):
- href = urljoin(base_url, a["href"])
- text = a.get_text(strip=True)
- a.replace_with(f"[{text}]({href})")
-
- for br in content_elem.find_all("br"):
- br.replace_with("\n")
-
- content = content_elem.get_text().strip()
-
- content = content.replace(
- " e-amusement ベーシックコース ",
- " e-amusement ベーシックコース "
- )
- content = content.replace("※", "\n※")
- content = re.sub(r"\n[ \t]+", "\n", content)
- content = re.sub(r'\s*/\s*', '/', content)
- news_items.append({
- "date": date_str,
- "identifier": "IIDX",
- "type": type_map[type_class],
- "timestamp": timestamp,
- "headline": headline,
- "content": content,
- "url": None,
- "images": [],
- 'is_ai_summary': False
- })
-
- return news_items
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage