diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | bemani/iidx.py | 1 | ||||
| -rw-r--r-- | bemani/sdvx.py | 1 | ||||
| -rw-r--r-- | constants.py | 9 | ||||
| -rw-r--r-- | news_feed.py (renamed from konami.py) | 18 | ||||
| -rw-r--r-- | scrape.py | 26 |
6 files changed, 53 insertions, 3 deletions
@@ -172,3 +172,4 @@ cython_debug/ # PyPI configuration file .pypirc +news diff --git a/bemani/iidx.py b/bemani/iidx.py index e20dd7d..978ecbd 100644 --- a/bemani/iidx.py +++ b/bemani/iidx.py @@ -54,6 +54,7 @@ def parse_pinky_crush_news_site(html: str, base_url): "timestamp": timestamp, "headline": headline, "content": content, + "url": None, "images": [], }) diff --git a/bemani/sdvx.py b/bemani/sdvx.py index 55d97ef..50772e8 100644 --- a/bemani/sdvx.py +++ b/bemani/sdvx.py @@ -40,6 +40,7 @@ def parse_exceed_gear_news_site(html: str, base_url: str): 'timestamp': timestamp, 'headline': headline_text, 'content': content, + "url": None, 'images': images }) diff --git a/constants.py b/constants.py index f131a63..5ca4d1e 100644 --- a/constants.py +++ b/constants.py @@ -1,3 +1,12 @@ +from enum import Enum + +DAYS_LIMIT=7 + EAMUSEMENT_BASE_URL = "https://p.eagate.573.jp" SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE ="https://p.eagate.573.jp/game/sdvx/vi/news/index.html" IIDX_PINKY_CRUSH_NEWS_SITE="https://p.eagate.573.jp/game/2dx/32/info/index.html" + +CHUNITHM_NEWS_SITE="https://info-chunithm.sega.jp/" + +class CHUNITHM_VERSION(Enum): + VERSE = 1 @@ -1,11 +1,12 @@ """ -Fetching data for Konami/Bemani games +Generic format for a news entry. All keys are considered to be nullable { 'date': JST date of news post 'type': Type of post if available, otherwise if not provided it will be None (aka Generic news) 'timestamp': Unixtime of date above, 'headline': Headline, 'content': All text content of news, + 'url': URL to full post if available, 'images': { 'image': URL to image, 'link': If there's an associated href. Else None @@ -15,19 +16,30 @@ Fetching data for Konami/Bemani games """ from email.utils import parsedate_to_datetime +from datetime import datetime from site_scraper import SiteScraper import bemani.sdvx as sound_voltex import bemani.iidx as iidx +import sega.chuni_jp as chunithm_jp import constants -def get_news(news_url: str) -> list: +def get_news(news_url: str, version=None) -> list: scraper = SiteScraper(headless=True) + news_json = {} site_data = scraper.get_page_source(news_url) if news_url == constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE: news_posts = sorted(sound_voltex.parse_exceed_gear_news_site(site_data, constants.EAMUSEMENT_BASE_URL), key=lambda x: x['timestamp'], reverse=True) elif news_url == constants.IIDX_PINKY_CRUSH_NEWS_SITE: news_posts = sorted(iidx.parse_pinky_crush_news_site(site_data, constants.EAMUSEMENT_BASE_URL), key=lambda x: x['timestamp'], reverse=True) + elif news_url == constants.CHUNITHM_NEWS_SITE: + if version == constants.CHUNITHM_VERSION.VERSE: + news_posts = sorted(chunithm_jp.parse_chuni_jp_verse_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) else: news_posts = [] scraper.close() - return news_posts + news_json = { + "fetch_date": int(datetime.now().timestamp()), + "posts": news_posts + + } + return news_json diff --git a/scrape.py b/scrape.py new file mode 100644 index 0000000..8d1f467 --- /dev/null +++ b/scrape.py @@ -0,0 +1,26 @@ +""" +Generates news JSON files +""" +import news_feed as feed +import constants +import json +import os + + +OUTPUT_DIR = "news" + +if __name__ == "__main__": + if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) + + iidx_news_data = feed.get_news(constants.IIDX_PINKY_CRUSH_NEWS_SITE) + with open(OUTPUT_DIR+'/iidx_news.json', 'w') as json_file: + json.dump(iidx_news_data, json_file) + + sdvx_news_data = feed.get_news(constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE) + with open(OUTPUT_DIR+'/sdvx_news.json', 'w') as json_file: + json.dump(sdvx_news_data, json_file) + + chunithm_jp_news_data = feed.get_news(constants.CHUNITHM_NEWS_SITE, constants.CHUNITHM_VERSION.VERSE) + with open(OUTPUT_DIR+'/chunithm_jp_news.json', 'w') as json_file: + json.dump(chunithm_jp_news_data, json_file) |
