aboutsummaryrefslogtreecommitdiffstats
path: root/database.py
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-09-30 16:52:59 -0700
committerPinapelz <yukais@pinapelz.com>2025-09-30 16:53:19 -0700
commit691a8a1c40f8f1357c09e8f2ac885865bbad6a5e (patch)
treea16c32496afdefc1f159811cfde80849b8a97a40 /database.py
parente81d5213da85cad3bd1b8bb5b10e3871c03f6ba9 (diff)
add new sqlite persistent storage creation and migration
Diffstat (limited to 'database.py')
-rw-r--r--database.py68
1 files changed, 68 insertions, 0 deletions
diff --git a/database.py b/database.py
new file mode 100644
index 0000000..a4e5ac9
--- /dev/null
+++ b/database.py
@@ -0,0 +1,68 @@
+import sqlite3
+import os
+import json
+
+class Database:
+ def __init__(self):
+ self._conn = sqlite3.connect("news.db")
+ self._cursor = self._conn.cursor()
+ self._initialize_db()
+ self._migrate_old_data()
+
+ def _initialize_db(self):
+ with open("schema.sql") as f:
+ self._cursor.executescript(f.read())
+ self._conn.commit()
+
+ def _migrate_old_data(self):
+ """
+ Migrates old summarization, tl and wac files into DB
+ """
+ if os.path.exists("summarization_cache.json"):
+ print("[Database] Migrating old summarization_cache to DB")
+ with open("summarization_cache.json", "r") as file:
+ summ_cache = json.load(file)
+ for key, val in summ_cache.items():
+ self.add_new_summary(key, val["headline"], val["content"])
+ os.rename("summarization_cache.json", "summarization_cache.json.bak")
+
+ if os.path.exists("tl_cache.json"):
+ print("[Database] Migrating old translation cache (tl_cache.json) to DB")
+ with open("tl_cache.json", "r") as file:
+ tl_cache = json.load(file)
+ import hashlib
+ for entry in tl_cache:
+ key = hashlib.sha256((entry["source_lang"] + entry["target_lang"] + entry["source_txt"]).encode('utf-8')).hexdigest()
+ self.add_new_translation(key, entry["source_lang"], entry["target_lang"], entry["source_txt"], entry["result_txt"])
+ os.rename("tl_cache.json", "tl_cache.json.bak")
+
+ if os.path.exists("wac_result_cache.json"):
+ print("[Database] Migrating old WAC Data cache to DB")
+ with open("wac_result_cache.json", "r") as file:
+ wac_cache = json.load(file)
+ import hashlib
+ for key, value in wac_cache.items():
+ self.add_new_wac_entry(key, value[0], value[1])
+ os.rename("wac_result_cache.json", "wac_result_cache.json.bak")
+
+ def add_new_wac_entry(self, key: str, is_news: bool, post_type: str):
+ news_var = 0 if is_news is False else 1
+ self._cursor.execute(
+ "INSERT OR REPLACE INTO wacplus (id, isNews, type) VALUES (?, ?, ?)",
+ (key, news_var, post_type)
+ )
+ self._conn.commit()
+
+ def add_new_translation(self, key: str, source_lang: str, target_lang: str, source_txt: str, result_txt: str):
+ self._cursor.execute(
+ "INSERT OR REPLACE INTO translation (id, source_lang, target_lang, source, result) VALUES (?, ?, ?, ?, ?)",
+ (key, source_lang, target_lang, source_txt, result_txt)
+ )
+ self._conn.commit()
+
+ def add_new_summary(self, key: str, headline: str, content: str):
+ self._cursor.execute(
+ "INSERT OR REPLACE INTO summarization (id, headline, content) VALUES (?, ?, ?)",
+ (key, headline, content)
+ )
+ self._conn.commit()
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage