diff options
| -rw-r--r-- | community/museca_plus.py | 41 | ||||
| -rw-r--r-- | constants.py | 1 | ||||
| -rw-r--r-- | generate.py | 7 | ||||
| -rw-r--r-- | news_feed.py | 6 | ||||
| -rw-r--r-- | site/src/components/TitleBar.tsx | 5 | ||||
| -rw-r--r-- | site/src/utils.ts | 3 |
6 files changed, 60 insertions, 3 deletions
diff --git a/community/museca_plus.py b/community/museca_plus.py new file mode 100644 index 0000000..81c7313 --- /dev/null +++ b/community/museca_plus.py @@ -0,0 +1,41 @@ +from bs4 import BeautifulSoup +from datetime import datetime +from urllib.parse import urljoin +import time +import re + +def parse_museca_plus_news_site(html: str) -> list: + soup = BeautifulSoup(html, "html.parser") + news_posts = [] + base_url = "https://museca.plus/" + for p in soup.select("div.subcontainer.center.text > p"): + text = p.get_text(strip=True, separator=' ') + date_match = re.search(r'(\d{4}-\d{2}-\d{2})', text) + if not date_match: + continue + date_str = date_match.group(1) + try: + dt = datetime.strptime(date_str, "%Y-%m-%d") + timestamp = int(time.mktime(dt.timetuple())) + except ValueError: + continue + images = [] + for img in p.find_all("img"): + img_url = urljoin(base_url, img.get("src")) + parent_a = img.find_parent("a") + images.append({"image": img_url, "link": None}) + + content = p.get_text(separator=' ', strip=True) + + news_posts.append({ + 'date': date_str, + 'identifier': 'MUSECA_PLUS', + 'type': None, + 'timestamp': timestamp, + 'headline': None, + 'content': content, + 'url': None, + 'images': images + }) + + return news_posts diff --git a/constants.py b/constants.py index 361c72d..2e7d436 100644 --- a/constants.py +++ b/constants.py @@ -29,6 +29,7 @@ ADD_EN_TRANSLATION=True # Only takes effect if an API key is provided in .env CHUNI_RECURSIVE_IMAGE=True # Scrape the individual post pages and get all images there WACCA_PLUS_MAGIC_STRING="1206017527864369262" +MUSECA_PLUS_NEWS_SITE="https://museca.plus/" class CHUNITHM_VERSION(Enum): LUMINOUS_PLUS = 1 diff --git a/generate.py b/generate.py index 36ce6f9..9ace165 100644 --- a/generate.py +++ b/generate.py @@ -112,6 +112,9 @@ def generate_taiko_news_file(): def generate_wacca_plus_news_file(): return generate_news_file("wacca_plus_news", constants.WACCA_PLUS_MAGIC_STRING) +def generate_museca_plus_news_file(): + return generate_news_file("museca_plus_news", constants.MUSECA_PLUS_NEWS_SITE) + if __name__ == "__main__": log_output("JOB START", "TASK") if not os.path.exists(OUTPUT_DIR): @@ -132,6 +135,7 @@ if __name__ == "__main__": music_diver_news_data = generate_music_diver_news_file() taiko_news_data = generate_taiko_news_file() wacca_plus_news = generate_wacca_plus_news_file() + museca_plus_news = generate_museca_plus_news_file() news = create_merged_feed( @@ -149,7 +153,8 @@ if __name__ == "__main__": chunithm_intl_news_data, music_diver_news_data, taiko_news_data, - wacca_plus_news + wacca_plus_news, + museca_plus_news ) log_output("Creating merged news.json file for all news that are within " + str(constants.DAYS_LIMIT) + " days old") with open(OUTPUT_DIR+'/news.json', 'w') as json_file: diff --git a/news_feed.py b/news_feed.py index a5a1ade..766d77b 100644 --- a/news_feed.py +++ b/news_feed.py @@ -32,6 +32,7 @@ import taito.music_diver as music_diver import bandai_namco.taiko as taiko import community.disc as disc import community.wacca_plus.wacca_plus as wac_plus +import community.museca_plus as mus_plus import constants import translate @@ -143,6 +144,11 @@ def get_news(news_url: str, version=None) -> list: else: messages = disc.fetch_messages(constants.WACCA_PLUS_MAGIC_STRING) news_posts = sorted(wac_plus.parse_announcement_messages(messages), key=lambda x: x['timestamp'], reverse=True) + + elif news_url == constants.MUSECA_PLUS_NEWS_SITE: + site_data = download_site_as_html(news_url) + news_posts = sorted(mus_plus.parse_museca_plus_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) + else: news_posts = [] return news_posts diff --git a/site/src/components/TitleBar.tsx b/site/src/components/TitleBar.tsx index 3018357..937fcf7 100644 --- a/site/src/components/TitleBar.tsx +++ b/site/src/components/TitleBar.tsx @@ -86,7 +86,10 @@ const TitleBar: React.FC = () => { }, { name: "COMMUNITY", - games: [{ id: "wacca_plus", title: "WACCA+" }], + games: [ + { id: "wacca_plus", title: "WACCA PLUS" }, + { id: "museca_plus", title: "MÚSECA PLUS" }, + ], }, ]; diff --git a/site/src/utils.ts b/site/src/utils.ts index 18ea80c..4fe3493 100644 --- a/site/src/utils.ts +++ b/site/src/utils.ts @@ -17,7 +17,8 @@ export const getGameTitle = (gameId: string) => { if (lowerCaseGameId.startsWith("popn_music")) return "pop'n music"; if (lowerCaseGameId.startsWith("music_diver")) return "MUSIC DIVER"; if (lowerCaseGameId.startsWith("taiko")) return "Taiko no Tatsujin"; - if (lowerCaseGameId.startsWith("wacca")) return "WACCA+"; + if (lowerCaseGameId.startsWith("wacca")) return "WACCA PLUS"; + if (lowerCaseGameId.startsWith("museca")) return "MÚSECA PLUS"; return gameId.toUpperCase(); |
