diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-10-01 18:08:25 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-10-01 18:08:25 -0700 |
| commit | 63dd4995224db0540be1bca7a8f8d463483f8e43 (patch) | |
| tree | c56f23eaeea1ff8b96c39909317da65db5f41059 /translate.py | |
| parent | 691a8a1c40f8f1357c09e8f2ac885865bbad6a5e (diff) | |
migrate summarizer, tl, wac modules to cache using db
Diffstat (limited to 'translate.py')
| -rw-r--r-- | translate.py | 64 |
1 files changed, 10 insertions, 54 deletions
diff --git a/translate.py b/translate.py index 877872a..31206a4 100644 --- a/translate.py +++ b/translate.py @@ -1,9 +1,9 @@ from dotenv import load_dotenv +from database import Database import requests import constants import re import os -import json import hashlib @@ -36,58 +36,15 @@ def _decode_links(raw_text: str, links: list) -> str: raw_text = raw_text.replace(link[0], link[1]) return raw_text -def _load_translation_cache() -> dict: - cache_file = "tl_cache.json" - tl_map = {} - if os.path.exists(cache_file): - try: - with open(cache_file, "r", encoding="utf-8") as file: - entries = json.load(file) - for entry in entries: - key = hashlib.sha256((entry["source_lang"] + entry["target_lang"] + entry["source_txt"]).encode('utf-8')).hexdigest() - tl_map[key] = entry["result_txt"] - return tl_map - except (UnicodeDecodeError, json.JSONDecodeError, KeyError) as e: - print(f"Translation cache corrupted ({e}), deleting and starting fresh...") - os.remove(cache_file) - with open(cache_file, "w", encoding="utf-8") as file: - json.dump([], file, ensure_ascii=False, indent=4) - return {} - else: - with open(cache_file, "w", encoding="utf-8") as file: - json.dump([], file, ensure_ascii=False, indent=4) - return {} - -def _add_to_translation_cache(source_lang: str, target_lang: str, source_txt: str, result_txt: str) -> None: - cache_file = "tl_cache.json" - cache_entry = { - "source_lang": source_lang, - "target_lang": target_lang, - "source_txt": source_txt, - "result_txt": result_txt - } - try: - if os.path.exists(cache_file): - with open(cache_file, "r", encoding="utf-8") as file: - cache = json.load(file) - else: - cache = [] - cache.append(cache_entry) - with open(cache_file, "w", encoding="utf-8") as file: - json.dump(cache, file, ensure_ascii=False, indent=4) - except (UnicodeDecodeError, json.JSONDecodeError) as e: - print(f"Translation cache corrupted during write ({e}), starting fresh...") - cache = [cache_entry] - with open(cache_file, "w", encoding="utf-8") as file: - json.dump(cache, file, ensure_ascii=False, indent=4) - -def request_google_translate(text: str, source: str="ja", target="en", translation_cache=None) -> tuple: +def request_google_translate(text: str, source: str="ja", target="en") -> tuple: """ Translates input text and returns the translated text using Google Cloud Translation API. """ key = hashlib.sha256((source + target + text).encode('utf-8')).hexdigest() - if translation_cache and key in translation_cache: - return translation_cache[key] + database = Database() + tl_result = database.get_translation(key) + if tl_result: + return tl_result API_KEY = os.getenv("GOOGLE_TRANSLATE_API_KEY") encoded_text, restore_data = _encode_links(text) url = "https://translation.googleapis.com/language/translate/v2?key="+API_KEY @@ -100,8 +57,8 @@ def request_google_translate(text: str, source: str="ja", target="en", translati response = requests.post(url, json=payload) data = response.json() translated_text = data["data"]["translations"][0]["translatedText"] - translation_cache[key] = translated_text - _add_to_translation_cache(source, target, text, translated_text) + database.add_new_translation(key=key, source_lang=source, target_lang=target, source_txt=text, result_txt=translated_text) + database.close() return _decode_links(translated_text, restore_data) def translation_possible() -> bool: @@ -115,20 +72,19 @@ def add_translate_text_to_en(news_post: dict, overrides: list=[]) -> dict: if not translation_possible(): return news_post translated_posts = [] - translation_cache = _load_translation_cache() for post in news_post: headline = post.get("headline") if headline: for override in overrides: headline = headline.replace(override[0], override[1]) - post["en_headline"] = request_google_translate(headline, translation_cache=translation_cache) + post["en_headline"] = request_google_translate(headline) else: post["en_headline"] = None content = post.get("content") if content: for override in overrides: content = content.replace(override[0], override[1]) - en_content = request_google_translate(content, translation_cache=translation_cache) + en_content = request_google_translate(content) post["en_content"] = en_content else: post["en_content"] = None |
