diff options
| author | Pinapelz <yukais@pinapelz.com> | 2026-03-22 23:53:34 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2026-03-23 00:17:55 -0700 |
| commit | 5e31d2870f07085e4c837a17572a3e9eedb6df26 (patch) | |
| tree | 3b97c5f1b0c139deb0bc4091595f085d754b14bd /bemani | |
| parent | 8f859e4786a02fea69ec086814d4f667f2f01d5d (diff) | |
Rename SDVX parser to parse_nabla_news_site
Replace parse_exceed_gear_news_site with parse_nabla_news_site and
update imports and callers in __init__.py and news_feed.py
Diffstat (limited to 'bemani')
| -rw-r--r-- | bemani/__init__.py | 4 | ||||
| -rw-r--r-- | bemani/sdvx.py | 48 |
2 files changed, 2 insertions, 50 deletions
diff --git a/bemani/__init__.py b/bemani/__init__.py index f16ed0a..dc4762a 100644 --- a/bemani/__init__.py +++ b/bemani/__init__.py @@ -1,7 +1,7 @@ -from bemani.sdvx import parse_exceed_gear_news_site +from bemani.sdvx import parse_nabla_news_site from bemani.polaris_chord import parse_polaris_chord_news_site __all__ = [ - "parse_exceed_gear_news_site", + "parse_nabla_news_site", "parse_polaris_chord_news_site", ] diff --git a/bemani/sdvx.py b/bemani/sdvx.py index 5a7d25c..5d7f72e 100644 --- a/bemani/sdvx.py +++ b/bemani/sdvx.py @@ -2,54 +2,6 @@ from bs4 import BeautifulSoup from datetime import datetime from urllib.parse import urljoin -def parse_exceed_gear_news_site(html: str): - base_url = "https://p.eagate.573.jp" - soup = BeautifulSoup(html, 'html.parser') - news_list = soup.select('.tab ul.news li') - - entries = [] - for li in news_list: - date = li.select_one('strong') - pre = li.select_one('pre') - - if not date or not pre: - continue - date_str = date.text.strip() - try: - dt = datetime.strptime(date_str, "%Y.%m.%d") - timestamp = int(dt.timestamp()) - except ValueError: - timestamp = None - headline = li.select_one('p.notice') - headline_text = headline.text.strip() if headline else None - for tag in pre.select('font, b, u, span'): - tag.unwrap() - content = pre.get_text(separator='\n', strip=True) - images = [] - for img in pre.select('img'): - src = img.get('data-original') or img.get('src') - if not src or src.startswith('data:'): - continue - src = urljoin(base_url, src) - parent = img.find_parent('a') - href = urljoin(base_url, parent['href']) if parent and parent.has_attr('href') else None - if {'image': src, 'link': href} not in images: - images.append({'image': src, 'link': href}) - - entries.append({ - 'date': date_str, - 'identifier': 'SOUND_VOLTEX', - 'type': None, - 'timestamp': timestamp, - 'headline': headline_text, - 'content': content, - "url": None, - 'images': images, - 'is_ai_summary': False - }) - - return entries - def parse_nabla_news_site(html: str): base_url = "https://p.eagate.573.jp" soup = BeautifulSoup(html, 'html.parser') |
