aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--constants.py1
-rw-r--r--generate.py5
-rw-r--r--news_feed.py10
-rw-r--r--sega/idac.py103
4 files changed, 117 insertions, 2 deletions
diff --git a/constants.py b/constants.py
index e1b0369..c252eab 100644
--- a/constants.py
+++ b/constants.py
@@ -23,6 +23,7 @@ CHUNITHM_INTL_NEWS_SITE="https://info-chunithm.sega.com/"
MAIMAIDX_JP_NEWS_SITE="https://info-maimai.sega.jp/"
MAIMAIDX_INTL_NEWS_SITE="https://maimai.sega.com/download/"
ONGEKI_JP_NEWS_SITE="https://info-ongeki.sega.jp/"
+IDAC_NEWS_SITE="https://info-initialdac.sega.jp/"
MUSIC_DIVER_NEWS="https://mypage.musicdiver.jp/api/news?lang=en"
STREET_FIGHTER_NEWS_SITE="https://sf6ta.jp/info/list"
diff --git a/generate.py b/generate.py
index e94384e..900777d 100644
--- a/generate.py
+++ b/generate.py
@@ -122,6 +122,9 @@ def generate_maimaidx_intl_news_file():
def generate_chunithm_intl_news_file():
return generate_news_file("chunithm_intl_news", constants.CHUNITHM_INTL_NEWS_SITE, constants.CHUNITHM_VERSION.VERSE)
+def generate_idac_news_file():
+ return generate_news_file("idac_news", constants.IDAC_NEWS_SITE)
+
def generate_music_diver_news_file():
return generate_news_file("music_diver_news", constants.MUSIC_DIVER_NEWS)
@@ -161,6 +164,7 @@ if __name__ == "__main__":
chunithm_jp_news_data = generate_chunithm_jp_news_file()
maimaidx_jp_news_data = generate_maimaidx_jp_news_file()
ongeki_jp_news_data = generate_ongeki_jp_news_file()
+ idac_news_data = generate_idac_news_file()
maimaidx_intl_news_data = generate_maimaidx_intl_news_file()
chunithm_intl_news_data = generate_chunithm_intl_news_file()
music_diver_news_data = generate_music_diver_news_file()
@@ -184,6 +188,7 @@ if __name__ == "__main__":
chunithm_jp_news_data,
maimaidx_jp_news_data,
ongeki_jp_news_data,
+ idac_news_data,
maimaidx_intl_news_data,
chunithm_intl_news_data,
music_diver_news_data,
diff --git a/news_feed.py b/news_feed.py
index 7c781db..6bd5116 100644
--- a/news_feed.py
+++ b/news_feed.py
@@ -30,6 +30,7 @@ import sega.chuni_intl as chuni_intl
import sega.maimaidx_jp as maimaidx_jp
import sega.maimaidx_intl as maimaidx_intl
import sega.ongeki_jp as ongeki_jp
+import sega.idac as idac
import taito.music_diver as music_diver
import taito.street_fighter as street_fighter
import bandai_namco.taiko as taiko
@@ -159,6 +160,13 @@ def get_news(news_url: str, version=None) -> list:
news_posts = sorted(ongeki_jp.parse_ongeki_refresh_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
+ elif news_url == constants.IDAC_NEWS_SITE:
+ site_data = download_site_as_html(news_url)
+ news_posts = sorted(idac.parse_idac_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
+ for news in news_posts:
+ promo_image_url = idac.get_promo_image(download_site_as_html(news["url"]))
+ news["images"] = [{'image': promo_image_url, 'link': None}]
+
elif news_url == constants.MUSIC_DIVER_NEWS:
api_data = download_site_as_html(news_url)
news_posts = sorted(music_diver.parse_music_diver_news_json(api_data), key=lambda x: x['timestamp'], reverse=True)
@@ -166,8 +174,6 @@ def get_news(news_url: str, version=None) -> list:
elif news_url == constants.STREET_FIGHTER_NEWS_SITE:
site_data = download_site_as_html(news_url)
news_posts = sorted(street_fighter.parse_sf_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
- print(news_posts)
- exit()
elif news_url == constants.TAIKO_BLOG_SITE:
diff --git a/sega/idac.py b/sega/idac.py
new file mode 100644
index 0000000..3b8a444
--- /dev/null
+++ b/sega/idac.py
@@ -0,0 +1,103 @@
+import json
+from bs4 import BeautifulSoup
+import re
+from datetime import datetime
+from urllib.parse import urljoin
+from constants import IDAC_NEWS_SITE
+
+
+def parse_idac_news_site(site_data: str):
+ soup = BeautifulSoup(site_data, "html.parser")
+ news_entries = []
+ articles = soup.find_all('article', class_=lambda x: x and 'post-' in x)
+ for article in articles:
+ try:
+ post_id = None
+ for cls in article.get('class', []):
+ if cls.startswith('post-') and cls[5:].isdigit():
+ post_id = cls[5:]
+ break
+
+ if not post_id:
+ continue
+ title_section = article.find('h1', class_='entry-title')
+ if not title_section:
+ continue
+ news_title_link = title_section.find('a', class_='news-title')
+ if not news_title_link:
+ continue
+
+ url = news_title_link.get('href', '')
+ headline = news_title_link.get_text(strip=True)
+ date_span = title_section.find('span', class_='entry_date')
+ if not date_span:
+ continue
+
+ date_text = date_span.get_text(strip=True)
+
+ date_match = re.match(r'(\d{4})年(\d{1,2})月(\d{1,2})日', date_text)
+ if not date_match:
+ continue
+
+ year = int(date_match.group(1))
+ month = int(date_match.group(2))
+ day = int(date_match.group(3))
+
+ # Create datetime object (assuming JST timezone, noon time)
+ try:
+ post_date = datetime(year, month, day, 12, 0)
+ timestamp = int(post_date.timestamp())
+ except ValueError:
+ continue
+ post_type = None
+ categories_list = title_section.find('ul', class_='post-categories')
+ if categories_list:
+ category_link = categories_list.find('a')
+ if category_link:
+ post_type = category_link.get_text(strip=True)
+ content = ""
+ entry_summary = article.find('div', class_='entry-summary')
+ if entry_summary:
+ content = entry_summary.get_text(strip=True)
+ content = re.sub(r'続きを読む\s*.*$', '', content).strip()
+ content = re.sub(r'\s*…\s*$', '', content).strip()
+ images = []
+ img_tags = article.find_all('img')
+ for img in img_tags:
+ img_src = img.get('src', '')
+ if img_src and not img_src.endswith('.svg'): # Skip icon/UI images
+ if img_src.startswith('/'):
+ img_src = urljoin('https://info-initialdac.sega.jp', img_src)
+ images.append({
+ 'image': img_src,
+ 'link': url
+ })
+ news_entry = {
+ 'date': post_date.strftime("%Y-%m-%d %H:%M"),
+ 'identifier': "IDAC_NEWS",
+ 'type': post_type,
+ 'timestamp': timestamp,
+ 'headline': headline,
+ 'content': content if content else headline,
+ 'url': url,
+ 'images': images,
+ 'is_ai_summary': False
+ }
+
+ news_entries.append(news_entry)
+
+ except Exception as e:
+ # Skip malformed entries
+ continue
+
+ return news_entries
+
+
+def get_promo_image(site_data: str) -> str:
+ soup = BeautifulSoup(site_data, "html.parser")
+ entry_content = soup.find('div', class_='entry-content')
+ if entry_content:
+ img_tag = entry_content.find('img')
+ if img_tag:
+ return img_tag.get('src', '')
+ return ''
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage