aboutsummaryrefslogtreecommitdiffstats
path: root/translate.py
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-10-01 18:08:25 -0700
committerPinapelz <yukais@pinapelz.com>2025-10-01 18:08:25 -0700
commit63dd4995224db0540be1bca7a8f8d463483f8e43 (patch)
treec56f23eaeea1ff8b96c39909317da65db5f41059 /translate.py
parent691a8a1c40f8f1357c09e8f2ac885865bbad6a5e (diff)
migrate summarizer, tl, wac modules to cache using db
Diffstat (limited to 'translate.py')
-rw-r--r--translate.py64
1 files changed, 10 insertions, 54 deletions
diff --git a/translate.py b/translate.py
index 877872a..31206a4 100644
--- a/translate.py
+++ b/translate.py
@@ -1,9 +1,9 @@
from dotenv import load_dotenv
+from database import Database
import requests
import constants
import re
import os
-import json
import hashlib
@@ -36,58 +36,15 @@ def _decode_links(raw_text: str, links: list) -> str:
raw_text = raw_text.replace(link[0], link[1])
return raw_text
-def _load_translation_cache() -> dict:
- cache_file = "tl_cache.json"
- tl_map = {}
- if os.path.exists(cache_file):
- try:
- with open(cache_file, "r", encoding="utf-8") as file:
- entries = json.load(file)
- for entry in entries:
- key = hashlib.sha256((entry["source_lang"] + entry["target_lang"] + entry["source_txt"]).encode('utf-8')).hexdigest()
- tl_map[key] = entry["result_txt"]
- return tl_map
- except (UnicodeDecodeError, json.JSONDecodeError, KeyError) as e:
- print(f"Translation cache corrupted ({e}), deleting and starting fresh...")
- os.remove(cache_file)
- with open(cache_file, "w", encoding="utf-8") as file:
- json.dump([], file, ensure_ascii=False, indent=4)
- return {}
- else:
- with open(cache_file, "w", encoding="utf-8") as file:
- json.dump([], file, ensure_ascii=False, indent=4)
- return {}
-
-def _add_to_translation_cache(source_lang: str, target_lang: str, source_txt: str, result_txt: str) -> None:
- cache_file = "tl_cache.json"
- cache_entry = {
- "source_lang": source_lang,
- "target_lang": target_lang,
- "source_txt": source_txt,
- "result_txt": result_txt
- }
- try:
- if os.path.exists(cache_file):
- with open(cache_file, "r", encoding="utf-8") as file:
- cache = json.load(file)
- else:
- cache = []
- cache.append(cache_entry)
- with open(cache_file, "w", encoding="utf-8") as file:
- json.dump(cache, file, ensure_ascii=False, indent=4)
- except (UnicodeDecodeError, json.JSONDecodeError) as e:
- print(f"Translation cache corrupted during write ({e}), starting fresh...")
- cache = [cache_entry]
- with open(cache_file, "w", encoding="utf-8") as file:
- json.dump(cache, file, ensure_ascii=False, indent=4)
-
-def request_google_translate(text: str, source: str="ja", target="en", translation_cache=None) -> tuple:
+def request_google_translate(text: str, source: str="ja", target="en") -> tuple:
"""
Translates input text and returns the translated text using Google Cloud Translation API.
"""
key = hashlib.sha256((source + target + text).encode('utf-8')).hexdigest()
- if translation_cache and key in translation_cache:
- return translation_cache[key]
+ database = Database()
+ tl_result = database.get_translation(key)
+ if tl_result:
+ return tl_result
API_KEY = os.getenv("GOOGLE_TRANSLATE_API_KEY")
encoded_text, restore_data = _encode_links(text)
url = "https://translation.googleapis.com/language/translate/v2?key="+API_KEY
@@ -100,8 +57,8 @@ def request_google_translate(text: str, source: str="ja", target="en", translati
response = requests.post(url, json=payload)
data = response.json()
translated_text = data["data"]["translations"][0]["translatedText"]
- translation_cache[key] = translated_text
- _add_to_translation_cache(source, target, text, translated_text)
+ database.add_new_translation(key=key, source_lang=source, target_lang=target, source_txt=text, result_txt=translated_text)
+ database.close()
return _decode_links(translated_text, restore_data)
def translation_possible() -> bool:
@@ -115,20 +72,19 @@ def add_translate_text_to_en(news_post: dict, overrides: list=[]) -> dict:
if not translation_possible():
return news_post
translated_posts = []
- translation_cache = _load_translation_cache()
for post in news_post:
headline = post.get("headline")
if headline:
for override in overrides:
headline = headline.replace(override[0], override[1])
- post["en_headline"] = request_google_translate(headline, translation_cache=translation_cache)
+ post["en_headline"] = request_google_translate(headline)
else:
post["en_headline"] = None
content = post.get("content")
if content:
for override in overrides:
content = content.replace(override[0], override[1])
- en_content = request_google_translate(content, translation_cache=translation_cache)
+ en_content = request_google_translate(content)
post["en_content"] = en_content
else:
post["en_content"] = None
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage