aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--bemani/iidx.py1
-rw-r--r--bemani/sdvx.py1
-rw-r--r--constants.py9
-rw-r--r--news_feed.py (renamed from konami.py)18
-rw-r--r--scrape.py26
6 files changed, 53 insertions, 3 deletions
diff --git a/.gitignore b/.gitignore
index 0a19790..828761b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -172,3 +172,4 @@ cython_debug/
# PyPI configuration file
.pypirc
+news
diff --git a/bemani/iidx.py b/bemani/iidx.py
index e20dd7d..978ecbd 100644
--- a/bemani/iidx.py
+++ b/bemani/iidx.py
@@ -54,6 +54,7 @@ def parse_pinky_crush_news_site(html: str, base_url):
"timestamp": timestamp,
"headline": headline,
"content": content,
+ "url": None,
"images": [],
})
diff --git a/bemani/sdvx.py b/bemani/sdvx.py
index 55d97ef..50772e8 100644
--- a/bemani/sdvx.py
+++ b/bemani/sdvx.py
@@ -40,6 +40,7 @@ def parse_exceed_gear_news_site(html: str, base_url: str):
'timestamp': timestamp,
'headline': headline_text,
'content': content,
+ "url": None,
'images': images
})
diff --git a/constants.py b/constants.py
index f131a63..5ca4d1e 100644
--- a/constants.py
+++ b/constants.py
@@ -1,3 +1,12 @@
+from enum import Enum
+
+DAYS_LIMIT=7
+
EAMUSEMENT_BASE_URL = "https://p.eagate.573.jp"
SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE ="https://p.eagate.573.jp/game/sdvx/vi/news/index.html"
IIDX_PINKY_CRUSH_NEWS_SITE="https://p.eagate.573.jp/game/2dx/32/info/index.html"
+
+CHUNITHM_NEWS_SITE="https://info-chunithm.sega.jp/"
+
+class CHUNITHM_VERSION(Enum):
+ VERSE = 1
diff --git a/konami.py b/news_feed.py
index 438b1ed..5737cea 100644
--- a/konami.py
+++ b/news_feed.py
@@ -1,11 +1,12 @@
"""
-Fetching data for Konami/Bemani games
+Generic format for a news entry. All keys are considered to be nullable
{
'date': JST date of news post
'type': Type of post if available, otherwise if not provided it will be None (aka Generic news)
'timestamp': Unixtime of date above,
'headline': Headline,
'content': All text content of news,
+ 'url': URL to full post if available,
'images': {
'image': URL to image,
'link': If there's an associated href. Else None
@@ -15,19 +16,30 @@ Fetching data for Konami/Bemani games
"""
from email.utils import parsedate_to_datetime
+from datetime import datetime
from site_scraper import SiteScraper
import bemani.sdvx as sound_voltex
import bemani.iidx as iidx
+import sega.chuni_jp as chunithm_jp
import constants
-def get_news(news_url: str) -> list:
+def get_news(news_url: str, version=None) -> list:
scraper = SiteScraper(headless=True)
+ news_json = {}
site_data = scraper.get_page_source(news_url)
if news_url == constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE:
news_posts = sorted(sound_voltex.parse_exceed_gear_news_site(site_data, constants.EAMUSEMENT_BASE_URL), key=lambda x: x['timestamp'], reverse=True)
elif news_url == constants.IIDX_PINKY_CRUSH_NEWS_SITE:
news_posts = sorted(iidx.parse_pinky_crush_news_site(site_data, constants.EAMUSEMENT_BASE_URL), key=lambda x: x['timestamp'], reverse=True)
+ elif news_url == constants.CHUNITHM_NEWS_SITE:
+ if version == constants.CHUNITHM_VERSION.VERSE:
+ news_posts = sorted(chunithm_jp.parse_chuni_jp_verse_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
else:
news_posts = []
scraper.close()
- return news_posts
+ news_json = {
+ "fetch_date": int(datetime.now().timestamp()),
+ "posts": news_posts
+
+ }
+ return news_json
diff --git a/scrape.py b/scrape.py
new file mode 100644
index 0000000..8d1f467
--- /dev/null
+++ b/scrape.py
@@ -0,0 +1,26 @@
+"""
+Generates news JSON files
+"""
+import news_feed as feed
+import constants
+import json
+import os
+
+
+OUTPUT_DIR = "news"
+
+if __name__ == "__main__":
+ if not os.path.exists(OUTPUT_DIR):
+ os.makedirs(OUTPUT_DIR)
+
+ iidx_news_data = feed.get_news(constants.IIDX_PINKY_CRUSH_NEWS_SITE)
+ with open(OUTPUT_DIR+'/iidx_news.json', 'w') as json_file:
+ json.dump(iidx_news_data, json_file)
+
+ sdvx_news_data = feed.get_news(constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE)
+ with open(OUTPUT_DIR+'/sdvx_news.json', 'w') as json_file:
+ json.dump(sdvx_news_data, json_file)
+
+ chunithm_jp_news_data = feed.get_news(constants.CHUNITHM_NEWS_SITE, constants.CHUNITHM_VERSION.VERSE)
+ with open(OUTPUT_DIR+'/chunithm_jp_news.json', 'w') as json_file:
+ json.dump(chunithm_jp_news_data, json_file)
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage