aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--bemani/iidx.py3
-rw-r--r--constants.py2
-rw-r--r--generate.py1
-rw-r--r--news_feed.py3
-rw-r--r--translate.py118
6 files changed, 126 insertions, 2 deletions
diff --git a/.gitignore b/.gitignore
index e2ee484..2072926 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,3 +171,4 @@ cython_debug/
# PyPI configuration file
.pypirc
news
+tl_cache.json
diff --git a/bemani/iidx.py b/bemani/iidx.py
index 0d97e91..67e1085 100644
--- a/bemani/iidx.py
+++ b/bemani/iidx.py
@@ -3,6 +3,9 @@ from datetime import datetime
from urllib.parse import urljoin
import re
+IIDX_KEY_TERMS = [
+ ("クプロ", "QPro")
+]
def parse_pinky_crush_news_site(html: str, base_url):
type_map = {
diff --git a/constants.py b/constants.py
index 6d13daf..50f9166 100644
--- a/constants.py
+++ b/constants.py
@@ -12,6 +12,8 @@ MAIMAIDX_JP_NEWS_SITE="https://info-maimai.sega.jp/"
MAIMAIDX_INTL_NEWS_SITE="https://maimai.sega.com/download/"
ONGEKI_JP_NEWS_SITE="https://info-ongeki.sega.jp/"
+ADD_EN_TRANSLATION=True # Only takes effect if an API key is provided in .env
+
class CHUNITHM_VERSION(Enum):
LUMINOUS_PLUS = 1
VERSE = 2
diff --git a/generate.py b/generate.py
index 71093e5..e974bfa 100644
--- a/generate.py
+++ b/generate.py
@@ -4,7 +4,6 @@ Generally you're expected to update the game versions manually
as for most games you only ever want the latest version (supported) of the game
"""
import news_feed as feed
-import requests
import constants
import json
import os
diff --git a/news_feed.py b/news_feed.py
index 87782bf..18c2616 100644
--- a/news_feed.py
+++ b/news_feed.py
@@ -16,7 +16,6 @@ Generic format for a news entry. All keys are considered to be nullable
}
"""
-from email.utils import parsedate_to_datetime
from site_scraper import SiteScraper, download_site_as_html
import bemani.sdvx as sound_voltex
import bemani.iidx as iidx
@@ -26,6 +25,7 @@ import sega.maimaidx_jp as maimaidx_jp
import sega.maimaidx_intl as maimaidx_intl
import sega.ongeki_jp as ongeki_jp
import constants
+import translate
def get_news(news_url: str, version=None) -> list:
if news_url == constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE:
@@ -35,6 +35,7 @@ def get_news(news_url: str, version=None) -> list:
elif news_url == constants.IIDX_PINKY_CRUSH_NEWS_SITE:
site_data = download_site_as_html(news_url)
news_posts = sorted(iidx.parse_pinky_crush_news_site(site_data, constants.EAMUSEMENT_BASE_URL), key=lambda x: x['timestamp'], reverse=True)
+ news_posts = translate.add_translate_text_to_en(news_posts)
elif news_url == constants.CHUNITHM_JP_NEWS_SITE:
site_data = download_site_as_html(news_url)
diff --git a/translate.py b/translate.py
new file mode 100644
index 0000000..64ba018
--- /dev/null
+++ b/translate.py
@@ -0,0 +1,118 @@
+from dotenv import load_dotenv
+import requests
+import constants
+import re
+import os
+import json
+import hashlib
+
+
+load_dotenv()
+
+def _encode_links(markdown_text: str) -> tuple:
+ """
+ Find all occurrences of markdown links, replace them with 573_UPDATE_MARKDOWN_LINK_N where N is the nth link,
+ and record the word, its markdown replacement, and the occurrence count.
+ """
+ link_pattern = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')
+ links = []
+ link_count = 0
+
+ def replacer(match):
+ nonlocal link_count
+ link_count += 1
+ markdown_replacement = match.group(0)
+ placeholder = f"573_UPDATE_MARKDOWN_LINK_{link_count}"
+ links.append((placeholder, markdown_replacement))
+ return placeholder
+
+ return link_pattern.sub(replacer, markdown_text), links
+
+def _decode_links(raw_text: str, links: list) -> str:
+ """
+ Replaces the placeholders with hyperlinks
+ """
+ for link in links:
+ raw_text = raw_text.replace(link[0], link[1])
+ return raw_text
+
+def _load_translation_cache() -> list:
+ cache_file = "tl_cache.json"
+ tl_map = {}
+ if os.path.exists(cache_file):
+ with open(cache_file, "r", encoding="utf-8") as file:
+ entries = json.load(file)
+ for entry in entries:
+ key = hashlib.sha256((entry["source_lang"] + entry["target_lang"] + entry["source_txt"]).encode('utf-8')).hexdigest()
+ tl_map[key] = entry["result_txt"]
+ return tl_map
+ else:
+ with open(cache_file, "w", encoding="utf-8") as file:
+ json.dump([], file, ensure_ascii=False, indent=4)
+ return {}
+
+def _add_to_translation_cache(source_lang: str, target_lang: str, source_txt: str, result_txt: str) -> None:
+ cache_file = "tl_cache.json"
+ cache_entry = {
+ "source_lang": source_lang,
+ "target_lang": target_lang,
+ "source_txt": source_txt,
+ "result_txt": result_txt
+ }
+ if os.path.exists(cache_file):
+ with open(cache_file, "r", encoding="utf-8") as file:
+ cache = json.load(file)
+ else:
+ cache = []
+ cache.append(cache_entry)
+ with open(cache_file, "w", encoding="utf-8") as file:
+ json.dump(cache, file, ensure_ascii=False, indent=4)
+
+def request_google_translate(text: str, source: str="ja", target="en", translation_cache=None) -> tuple:
+ """
+ Translates input text and returns the translated text using Google Cloud Translation API.
+ """
+ key = hashlib.sha256((source + target + text).encode('utf-8')).hexdigest()
+ if translation_cache and key in translation_cache:
+ return translation_cache[key]
+ API_KEY = os.getenv("GOOGLE_TRANSLATE_API_KEY")
+ encoded_text, restore_data = _encode_links(text)
+ url = "https://translation.googleapis.com/language/translate/v2"
+ params = {
+ "q": text,
+ "source": source,
+ "target": target,
+ "format": "text",
+ "key": API_KEY,
+ }
+ response = requests.post(url, params=params)
+ data = response.json()
+ translated_text = data["data"]["translations"][0]["translatedText"]
+ translation_cache[key] = translated_text
+ _add_to_translation_cache(source, target, text, translated_text)
+ return _decode_links(translated_text, restore_data)
+
+def translation_possible() -> bool:
+ return constants.ADD_EN_TRANSLATION and os.getenv("GOOGLE_TRANSLATE_API_KEY") is not None
+
+def add_translate_text_to_en(news_post: dict, overrides: list=[]) -> dict:
+ """
+ Takes a news post dict as input, then appends the translated EN headline and content
+ to the newspost and returns it
+ """
+ translated_posts = []
+ translation_cache = _load_translation_cache()
+ for post in news_post:
+ headline = post["headline"]
+ if headline:
+ for override in overrides:
+ headline = headline.replace(override[0], override[1])
+ post["en_headline"] = request_google_translate(headline, translation_cache=translation_cache)
+ content = post["content"]
+ if content:
+ for override in overrides:
+ content = content.replace(override[0], override[1])
+ en_content = request_google_translate(content, translation_cache=translation_cache)
+ post["en_content"] = en_content
+ translated_posts.append(post)
+ return translated_posts
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage