aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-04-28 10:23:29 -0700
committerPinapelz <yukais@pinapelz.com>2025-04-28 10:23:29 -0700
commit638c964b7dba51b0f86c4d1f562a77e2cdb49437 (patch)
tree9bedc2b587439051d362089e4ec05939a0ccc32d
parent4852c740b0e967429f61228511af18ea25a77c12 (diff)
add support for scraping polaris chord
-rw-r--r--bemani/ddr.py4
-rw-r--r--bemani/polaris_chord.py62
-rw-r--r--constants.py1
-rw-r--r--generate.py4
-rw-r--r--news_feed.py7
-rw-r--r--requirements.txtbin1384 -> 730 bytes
6 files changed, 77 insertions, 1 deletions
diff --git a/bemani/ddr.py b/bemani/ddr.py
index e9d4584..9651c48 100644
--- a/bemani/ddr.py
+++ b/bemani/ddr.py
@@ -1,8 +1,10 @@
+"""
+Currently unused as e-eamusement app feed is favored. Here for archival purposes
+"""
from bs4 import BeautifulSoup
from datetime import datetime
from urllib.parse import urljoin
import time
-import re
def parse_ddr_world_news_site(html: str):
base_url = "https://p.eagate.573.jp"
diff --git a/bemani/polaris_chord.py b/bemani/polaris_chord.py
new file mode 100644
index 0000000..2880dae
--- /dev/null
+++ b/bemani/polaris_chord.py
@@ -0,0 +1,62 @@
+from bs4 import BeautifulSoup
+from datetime import datetime
+import pytz
+import re
+
+CATEGORY_MAP = {
+ "i_01": "NEWS",
+ "i_02": "MUSIC",
+ "i_03": "EVENT",
+ "i_04": "OTHER"
+}
+
+
+def parse_polaris_chord_news_site(html: str) -> list[dict]:
+ soup = BeautifulSoup(html, 'html.parser')
+ news_list = []
+ for li in soup.select('#info-news li.news'):
+ raw_date = li.find('li', class_='news_date').text.strip()
+ match = re.search(r'(\d{4}/\d{1,2}/\d{1,2})', raw_date)
+ if not match:
+ continue
+ date_str = match.group(1)
+
+ try:
+ dt = datetime.strptime(date_str, '%Y/%m/%d')
+ except ValueError:
+ continue
+ jst = pytz.timezone('Asia/Tokyo')
+ dt_jst = jst.localize(dt)
+ timestamp = int(dt_jst.timestamp())
+
+ raw_type = li.get('data-category')
+ post_type = CATEGORY_MAP.get(raw_type)
+
+ headline = li.find('li', class_='news_title').text.strip()
+ detail = li.find('li', class_='news_detail')
+ content = detail.get_text(separator='\n').strip()
+
+ first_a = detail.find('a', href=True)
+ url = first_a['href'] if first_a else None
+
+ images = []
+ for img in detail.find_all('img'):
+ img_url = img.get('src')
+ link = None
+ if img.parent.name == 'a' and img.parent.has_attr('href'):
+ link = img.parent['href']
+ images.append({'image': img_url, 'link': link})
+
+ entry = {
+ 'date': date_str,
+ 'identifier': "POLARIS_CHORD",
+ 'type': post_type,
+ 'timestamp': timestamp,
+ 'headline': headline,
+ 'content': content,
+ 'url': url,
+ 'images': images,
+ 'is_ai_summary': False,
+ }
+ news_list.append(entry)
+ return news_list
diff --git a/constants.py b/constants.py
index ba725f0..f69a610 100644
--- a/constants.py
+++ b/constants.py
@@ -5,6 +5,7 @@ DAYS_LIMIT=14
SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE ="https://p.eagate.573.jp/game/sdvx/vi/news/index.html"
IIDX_PINKY_CRUSH_NEWS_SITE="https://p.eagate.573.jp/game/2dx/32/info/index.html"
DDR_WORLD_NEWS_SITE="https://p.eagate.573.jp/game/ddr/ddrworld/info/index.html"
+POLARIS_CHORD_NEWS_SITE="https://p.eagate.573.jp/game/polarischord/pc/news/index.html"
EAMUSE_APP_FEED="https://eam.573.jp/app/web/post/official"
IIDX_EAMUSE_APP_ID="s8svjrq62x592gvb"
diff --git a/generate.py b/generate.py
index a3edbd3..84033b7 100644
--- a/generate.py
+++ b/generate.py
@@ -86,6 +86,9 @@ def generate_ddr_news_file(eamuse_feed: bool=False):
else:
return generate_news_file("ddr_news", constants.DDR_WORLD_NEWS_SITE)
+def generate_polaris_chord_news_file():
+ return generate_news_file("polaris_chord_news", constants.POLARIS_CHORD_NEWS_SITE)
+
def generate_popn_music_news_file():
return generate_news_file("popn_music_news", constants.EAMUSE_APP_FEED, constants.POPN_MUSIC_EAMUSE_APP_ID)
@@ -136,6 +139,7 @@ if __name__ == "__main__":
iidx_news_data = generate_iidx_news_file(eamuse_feed=True)
sdvx_news_data = generate_sdvx_news_file()
ddr_news_data = generate_ddr_news_file(eamuse_feed=True)
+ polaris_news_data = generate_polaris_chord_news_file()
gitadora_news_data = generate_gitadora_news_file()
popn_music_news_data = generate_popn_music_news_file()
jubeat_news_data = generate_jubeat_news_file()
diff --git a/news_feed.py b/news_feed.py
index d78c78c..4896322 100644
--- a/news_feed.py
+++ b/news_feed.py
@@ -25,6 +25,7 @@ import bemani.sdvx as sound_voltex
import bemani.iidx as iidx
import bemani.ddr as ddr
import sega.chuni_jp as chunithm_jp
+import bemani.polaris_chord as polaris_chord
import sega.chuni_intl as chuni_intl
import sega.maimaidx_jp as maimaidx_jp
import sega.maimaidx_intl as maimaidx_intl
@@ -60,6 +61,12 @@ def get_news(news_url: str, version=None) -> list:
news_posts = sorted(iidx.parse_pinky_crush_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL)
+ elif news_url == constants.POLARIS_CHORD_NEWS_SITE:
+ scraper = SiteScraper(headless=True)
+ site_data = scraper.get_page_source(news_url)
+ news_posts = sorted(polaris_chord.parse_polaris_chord_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
+ news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL)
+
elif news_url == constants.EAMUSE_APP_FEED:
scraper = SiteScraper(headless=True)
site_data = scraper.get_page_source(news_url+"/?uuid_to="+version)
diff --git a/requirements.txt b/requirements.txt
index 7cbd3b8..c310de5 100644
--- a/requirements.txt
+++ b/requirements.txt
Binary files differ
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage