aboutsummaryrefslogtreecommitdiffstats
path: root/translate.py
blob: dd15cffc632dd1bafbd08d393c1d08c86fd90f7e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from dotenv import load_dotenv
from common import create_database_connection
import requests
import constants
import re
import os
import hashlib


load_dotenv()

def _encode_links(markdown_text: str) -> tuple:
    """
    Find all occurrences of markdown links, replace them with 573_UPDATE_MARKDOWN_LINK_N where N is the nth link,
    and record the word, its markdown replacement, and the occurrence count.
    """
    link_pattern = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')
    links = []
    link_count = 0

    def replacer(match):
        nonlocal link_count
        link_count += 1
        markdown_replacement = match.group(0)
        placeholder = f"573_UPDATE_MARKDOWN_LINK_{link_count}"
        links.append((placeholder, markdown_replacement))
        return placeholder

    return link_pattern.sub(replacer, markdown_text), links

def _decode_links(raw_text: str, links: list) -> str:
    """
    Replaces the placeholders with hyperlinks
    """
    for link in links:
        raw_text = raw_text.replace(link[0], link[1])
    return raw_text

def request_google_translate(text: str, source: str="ja", target="en") -> tuple:
    """
    Translates input text and returns the translated text using Google Cloud Translation API.
    """
    key = hashlib.sha256((source + target + text).encode('utf-8')).hexdigest()
    database = create_database_connection()
    tl_result = database.get_translation(key)
    if tl_result:
        return tl_result
    API_KEY = os.getenv("GOOGLE_TRANSLATE_API_KEY")
    encoded_text, restore_data = _encode_links(text)
    url = "https://translation.googleapis.com/language/translate/v2?key="+API_KEY
    payload = {
        "q": text,
        "source": source,
        "target": target,
        "format": "text",
    }
    response = requests.post(url, json=payload)
    data = response.json()
    translated_text = data["data"]["translations"][0]["translatedText"]
    database.add_new_translation(key=key, source_lang=source, target_lang=target, source_txt=text, result_txt=translated_text)
    database.close()
    return _decode_links(translated_text, restore_data)

def translation_possible() -> bool:
    return constants.ADD_EN_TRANSLATION and os.getenv("GOOGLE_TRANSLATE_API_KEY") is not None

def add_translate_text_to_en(news_post: dict, overrides: list=[]) -> dict:
    """
    Takes a news post dict as input, then appends the translated EN headline and content
    to the newspost and returns it
    """
    if not translation_possible():
        return news_post
    translated_posts = []
    for post in news_post:
        headline = post.get("headline")
        if headline:
            for override in overrides:
                headline = headline.replace(override[0], override[1])
            post["en_headline"] = request_google_translate(headline)
        else:
            post["en_headline"] = None
        content = post.get("content")
        if content:
            for override in overrides:
                content = content.replace(override[0], override[1])
            en_content = request_google_translate(content)
            post["en_content"] = en_content
        else:
            post["en_content"] = None
        translated_posts.append(post)
    return translated_posts
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage