1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
import json
import os
import sqlite3
class Database:
def __init__(self):
self._conn = sqlite3.connect("news.db")
self._cursor = self._conn.cursor()
self._initialize_db()
self._migrate_old_data()
def _initialize_db(self):
with open("schema.sql") as f:
self._cursor.executescript(f.read())
self._conn.commit()
def close(self):
"""Close the database connection"""
if self._conn:
self._conn.close()
def _migrate_old_data(self):
"""
Migrates old summarization, tl and wac files into DB
"""
if os.path.exists("summarization_cache.json"):
print("[Database] Migrating old summarization_cache to DB")
with open("summarization_cache.json", "r") as file:
summ_cache = json.load(file)
for key, val in summ_cache.items():
self.add_new_summary(key, val["headline"], val["content"])
os.rename("summarization_cache.json", "summarization_cache.json.bak")
if os.path.exists("tl_cache.json"):
print("[Database] Migrating old translation cache (tl_cache.json) to DB")
with open("tl_cache.json", "r") as file:
tl_cache = json.load(file)
import hashlib
for entry in tl_cache:
key = hashlib.sha256(
(
entry["source_lang"]
+ entry["target_lang"]
+ entry["source_txt"]
).encode("utf-8")
).hexdigest()
self.add_new_translation(
key,
entry["source_lang"],
entry["target_lang"],
entry["source_txt"],
entry["result_txt"],
)
os.rename("tl_cache.json", "tl_cache.json.bak")
if os.path.exists("wac_result_cache.json"):
print("[Database] Migrating old WAC Data cache to DB")
with open("wac_result_cache.json", "r") as file:
wac_cache = json.load(file)
import hashlib
for key, value in wac_cache.items():
self.add_new_wac_entry(key, value[0], value[1])
os.rename("wac_result_cache.json", "wac_result_cache.json.bak")
def add_new_wac_entry(self, key: str, is_news: bool, post_type: str):
news_var = 0 if is_news is False else 1
self._cursor.execute(
"INSERT OR REPLACE INTO wacplus (id, isNews, type) VALUES (?, ?, ?)",
(key, news_var, post_type),
)
self._conn.commit()
def add_new_translation(
self,
key: str,
source_lang: str,
target_lang: str,
source_txt: str,
result_txt: str,
):
self._cursor.execute(
"INSERT OR REPLACE INTO translation (id, source_lang, target_lang, source, result) VALUES (?, ?, ?, ?, ?)",
(key, source_lang, target_lang, source_txt, result_txt),
)
self._conn.commit()
def add_new_summary(self, key: str, headline: str, content: str):
self._cursor.execute(
"INSERT OR REPLACE INTO summarization (id, headline, content) VALUES (?, ?, ?)",
(key, headline, content),
)
self._conn.commit()
def add_news_entry(self, key: str, news_entry: dict):
is_ai_summary = 1 if news_entry.get("is_ai_summary", False) else 0
en_headline = news_entry.get("en_headline", None)
en_content = news_entry.get("en_content", None)
headline = news_entry.get("headline", None)
url = news_entry.get("url", None)
self._cursor.execute(
"INSERT OR REPLACE INTO news (news_id, date, identifier, type, timestamp, headline, content, url, is_ai_summary, en_headline, en_content) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(
key,
news_entry["date"],
news_entry["identifier"],
news_entry["type"],
news_entry["timestamp"],
headline,
news_entry["content"],
url,
is_ai_summary,
en_headline,
en_content,
),
)
for image_entry in news_entry["images"]:
link_url = image_entry.get("link", None)
self._cursor.execute(
"INSERT OR REPLACE INTO news_images (news_id, image_url, link_url) VALUES (?, ?, ?)",
(key, image_entry["image"], link_url),
)
self._conn.commit()
def get_summary(self, key: str):
self._cursor.execute(
"SELECT headline, content FROM summarization WHERE id = ?", (key,)
)
result = self._cursor.fetchone()
if result is None:
return None
return {"headline": result[0], "content": result[1]}
def get_translation(self, key: str):
self._cursor.execute("SELECT result FROM translation WHERE id = ?", (key,))
result = self._cursor.fetchone()
if result is None:
return None
return result[0]
def get_wac_entry(self, key: str):
self._cursor.execute("SELECT isNews, type FROM wacplus WHERE id = ?", (key,))
result = self._cursor.fetchone()
if result is None:
return None
is_news = True if result[0] == 1 else False
return is_news, result[1]
def check_news_id_exists(self, key: str) -> bool:
"""
Check if a news entry with the given ID exists in the database.
:param key: The ID of the news entry to check.
:return: True if the news entry exists, False otherwise.
"""
self._cursor.execute("SELECT 1 FROM news WHERE news_id = ?", (key,))
result = self._cursor.fetchone()
return result is not None
|