1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
import sqlite3
import os
import json
class Database:
def __init__(self):
self._conn = sqlite3.connect("news.db")
self._cursor = self._conn.cursor()
self._initialize_db()
self._migrate_old_data()
def _initialize_db(self):
with open("schema.sql") as f:
self._cursor.executescript(f.read())
self._conn.commit()
def _migrate_old_data(self):
"""
Migrates old summarization, tl and wac files into DB
"""
if os.path.exists("summarization_cache.json"):
print("[Database] Migrating old summarization_cache to DB")
with open("summarization_cache.json", "r") as file:
summ_cache = json.load(file)
for key, val in summ_cache.items():
self.add_new_summary(key, val["headline"], val["content"])
os.rename("summarization_cache.json", "summarization_cache.json.bak")
if os.path.exists("tl_cache.json"):
print("[Database] Migrating old translation cache (tl_cache.json) to DB")
with open("tl_cache.json", "r") as file:
tl_cache = json.load(file)
import hashlib
for entry in tl_cache:
key = hashlib.sha256((entry["source_lang"] + entry["target_lang"] + entry["source_txt"]).encode('utf-8')).hexdigest()
self.add_new_translation(key, entry["source_lang"], entry["target_lang"], entry["source_txt"], entry["result_txt"])
os.rename("tl_cache.json", "tl_cache.json.bak")
if os.path.exists("wac_result_cache.json"):
print("[Database] Migrating old WAC Data cache to DB")
with open("wac_result_cache.json", "r") as file:
wac_cache = json.load(file)
import hashlib
for key, value in wac_cache.items():
self.add_new_wac_entry(key, value[0], value[1])
os.rename("wac_result_cache.json", "wac_result_cache.json.bak")
def add_new_wac_entry(self, key: str, is_news: bool, post_type: str):
news_var = 0 if is_news is False else 1
self._cursor.execute(
"INSERT OR REPLACE INTO wacplus (id, isNews, type) VALUES (?, ?, ?)",
(key, news_var, post_type)
)
self._conn.commit()
def add_new_translation(self, key: str, source_lang: str, target_lang: str, source_txt: str, result_txt: str):
self._cursor.execute(
"INSERT OR REPLACE INTO translation (id, source_lang, target_lang, source, result) VALUES (?, ?, ?, ?, ?)",
(key, source_lang, target_lang, source_txt, result_txt)
)
self._conn.commit()
def add_new_summary(self, key: str, headline: str, content: str):
self._cursor.execute(
"INSERT OR REPLACE INTO summarization (id, headline, content) VALUES (?, ?, ?)",
(key, headline, content)
)
self._conn.commit()
|