diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-09-30 16:52:59 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-09-30 16:53:19 -0700 |
| commit | 691a8a1c40f8f1357c09e8f2ac885865bbad6a5e (patch) | |
| tree | a16c32496afdefc1f159811cfde80849b8a97a40 /database.py | |
| parent | e81d5213da85cad3bd1b8bb5b10e3871c03f6ba9 (diff) | |
add new sqlite persistent storage creation and migration
Diffstat (limited to 'database.py')
| -rw-r--r-- | database.py | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/database.py b/database.py new file mode 100644 index 0000000..a4e5ac9 --- /dev/null +++ b/database.py @@ -0,0 +1,68 @@ +import sqlite3 +import os +import json + +class Database: + def __init__(self): + self._conn = sqlite3.connect("news.db") + self._cursor = self._conn.cursor() + self._initialize_db() + self._migrate_old_data() + + def _initialize_db(self): + with open("schema.sql") as f: + self._cursor.executescript(f.read()) + self._conn.commit() + + def _migrate_old_data(self): + """ + Migrates old summarization, tl and wac files into DB + """ + if os.path.exists("summarization_cache.json"): + print("[Database] Migrating old summarization_cache to DB") + with open("summarization_cache.json", "r") as file: + summ_cache = json.load(file) + for key, val in summ_cache.items(): + self.add_new_summary(key, val["headline"], val["content"]) + os.rename("summarization_cache.json", "summarization_cache.json.bak") + + if os.path.exists("tl_cache.json"): + print("[Database] Migrating old translation cache (tl_cache.json) to DB") + with open("tl_cache.json", "r") as file: + tl_cache = json.load(file) + import hashlib + for entry in tl_cache: + key = hashlib.sha256((entry["source_lang"] + entry["target_lang"] + entry["source_txt"]).encode('utf-8')).hexdigest() + self.add_new_translation(key, entry["source_lang"], entry["target_lang"], entry["source_txt"], entry["result_txt"]) + os.rename("tl_cache.json", "tl_cache.json.bak") + + if os.path.exists("wac_result_cache.json"): + print("[Database] Migrating old WAC Data cache to DB") + with open("wac_result_cache.json", "r") as file: + wac_cache = json.load(file) + import hashlib + for key, value in wac_cache.items(): + self.add_new_wac_entry(key, value[0], value[1]) + os.rename("wac_result_cache.json", "wac_result_cache.json.bak") + + def add_new_wac_entry(self, key: str, is_news: bool, post_type: str): + news_var = 0 if is_news is False else 1 + self._cursor.execute( + "INSERT OR REPLACE INTO wacplus (id, isNews, type) VALUES (?, ?, ?)", + (key, news_var, post_type) + ) + self._conn.commit() + + def add_new_translation(self, key: str, source_lang: str, target_lang: str, source_txt: str, result_txt: str): + self._cursor.execute( + "INSERT OR REPLACE INTO translation (id, source_lang, target_lang, source, result) VALUES (?, ?, ?, ?, ?)", + (key, source_lang, target_lang, source_txt, result_txt) + ) + self._conn.commit() + + def add_new_summary(self, key: str, headline: str, content: str): + self._cursor.execute( + "INSERT OR REPLACE INTO summarization (id, headline, content) VALUES (?, ?, ?)", + (key, headline, content) + ) + self._conn.commit() |
