aboutsummaryrefslogtreecommitdiffstats
path: root/bemani/sdvx.py
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2026-03-22 23:53:34 -0700
committerPinapelz <yukais@pinapelz.com>2026-03-23 00:17:55 -0700
commit5e31d2870f07085e4c837a17572a3e9eedb6df26 (patch)
tree3b97c5f1b0c139deb0bc4091595f085d754b14bd /bemani/sdvx.py
parent8f859e4786a02fea69ec086814d4f667f2f01d5d (diff)
Rename SDVX parser to parse_nabla_news_site
Replace parse_exceed_gear_news_site with parse_nabla_news_site and update imports and callers in __init__.py and news_feed.py
Diffstat (limited to 'bemani/sdvx.py')
-rw-r--r--bemani/sdvx.py48
1 files changed, 0 insertions, 48 deletions
diff --git a/bemani/sdvx.py b/bemani/sdvx.py
index 5a7d25c..5d7f72e 100644
--- a/bemani/sdvx.py
+++ b/bemani/sdvx.py
@@ -2,54 +2,6 @@ from bs4 import BeautifulSoup
from datetime import datetime
from urllib.parse import urljoin
-def parse_exceed_gear_news_site(html: str):
- base_url = "https://p.eagate.573.jp"
- soup = BeautifulSoup(html, 'html.parser')
- news_list = soup.select('.tab ul.news li')
-
- entries = []
- for li in news_list:
- date = li.select_one('strong')
- pre = li.select_one('pre')
-
- if not date or not pre:
- continue
- date_str = date.text.strip()
- try:
- dt = datetime.strptime(date_str, "%Y.%m.%d")
- timestamp = int(dt.timestamp())
- except ValueError:
- timestamp = None
- headline = li.select_one('p.notice')
- headline_text = headline.text.strip() if headline else None
- for tag in pre.select('font, b, u, span'):
- tag.unwrap()
- content = pre.get_text(separator='\n', strip=True)
- images = []
- for img in pre.select('img'):
- src = img.get('data-original') or img.get('src')
- if not src or src.startswith('data:'):
- continue
- src = urljoin(base_url, src)
- parent = img.find_parent('a')
- href = urljoin(base_url, parent['href']) if parent and parent.has_attr('href') else None
- if {'image': src, 'link': href} not in images:
- images.append({'image': src, 'link': href})
-
- entries.append({
- 'date': date_str,
- 'identifier': 'SOUND_VOLTEX',
- 'type': None,
- 'timestamp': timestamp,
- 'headline': headline_text,
- 'content': content,
- "url": None,
- 'images': images,
- 'is_ai_summary': False
- })
-
- return entries
-
def parse_nabla_news_site(html: str):
base_url = "https://p.eagate.573.jp"
soup = BeautifulSoup(html, 'html.parser')
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage