diff options
Diffstat (limited to 'database/local_database.py')
| -rw-r--r-- | database/local_database.py | 164 |
1 files changed, 164 insertions, 0 deletions
diff --git a/database/local_database.py b/database/local_database.py new file mode 100644 index 0000000..9914132 --- /dev/null +++ b/database/local_database.py @@ -0,0 +1,164 @@ +from database.base_database import BaseDatabase +import json +import os +import sqlite3 + + +class Database(BaseDatabase): + def __init__(self, local_db_path: str="news.db"): + self._conn = sqlite3.connect(local_db_path) + self._cursor = self._conn.cursor() + self._initialize_db() + self._migrate_old_data() + + def _initialize_db(self): + with open("schema.sql") as f: + self._cursor.executescript(f.read()) + self._conn.commit() + + def close(self): + """Close the database connection""" + if self._conn: + self._conn.close() + + def _migrate_old_data(self): + """ + Migrates old summarization, tl and wac files into DB + """ + if os.path.exists("summarization_cache.json"): + print("[Database] Migrating old summarization_cache to DB") + with open("summarization_cache.json", "r") as file: + summ_cache = json.load(file) + for key, val in summ_cache.items(): + self.add_new_summary(key, val["headline"], val["content"]) + os.rename("summarization_cache.json", "summarization_cache.json.bak") + + if os.path.exists("tl_cache.json"): + print("[Database] Migrating old translation cache (tl_cache.json) to DB") + with open("tl_cache.json", "r") as file: + tl_cache = json.load(file) + import hashlib + + for entry in tl_cache: + key = hashlib.sha256( + ( + entry["source_lang"] + + entry["target_lang"] + + entry["source_txt"] + ).encode("utf-8") + ).hexdigest() + self.add_new_translation( + key, + entry["source_lang"], + entry["target_lang"], + entry["source_txt"], + entry["result_txt"], + ) + os.rename("tl_cache.json", "tl_cache.json.bak") + + if os.path.exists("wac_result_cache.json"): + print("[Database] Migrating old WAC Data cache to DB") + with open("wac_result_cache.json", "r") as file: + wac_cache = json.load(file) + import hashlib + + for key, value in wac_cache.items(): + self.add_new_wac_entry(key, value[0], value[1]) + os.rename("wac_result_cache.json", "wac_result_cache.json.bak") + + def add_new_wac_entry(self, key: str, is_news: bool, post_type: str): + news_var = 0 if is_news is False else 1 + self._cursor.execute( + "INSERT OR REPLACE INTO wacplus (id, isNews, type) VALUES (?, ?, ?)", + (key, news_var, post_type), + ) + self._conn.commit() + + def add_new_translation( + self, + key: str, + source_lang: str, + target_lang: str, + source_txt: str, + result_txt: str, + ): + self._cursor.execute( + "INSERT OR REPLACE INTO translation (id, source_lang, target_lang, source, result) VALUES (?, ?, ?, ?, ?)", + (key, source_lang, target_lang, source_txt, result_txt), + ) + self._conn.commit() + + def add_new_summary(self, key: str, headline: str, content: str): + self._cursor.execute( + "INSERT OR REPLACE INTO summarization (id, headline, content) VALUES (?, ?, ?)", + (key, headline, content), + ) + self._conn.commit() + + def add_news_entry(self, key: str, news_entry: dict): + is_ai_summary = 1 if news_entry.get("is_ai_summary", False) else 0 + en_headline = news_entry.get("en_headline", None) + en_content = news_entry.get("en_content", None) + headline = news_entry.get("headline", None) + url = news_entry.get("url", None) + self._cursor.execute( + "INSERT OR REPLACE INTO news (news_id, date, identifier, type, timestamp, headline, content, url, is_ai_summary, en_headline, en_content) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + key, + news_entry["date"], + news_entry["identifier"], + news_entry["type"], + news_entry["timestamp"], + headline, + news_entry["content"], + url, + is_ai_summary, + en_headline, + en_content, + ), + ) + for image_entry in news_entry["images"]: + if image_entry["image"].startswith("data:"): + continue + link_url = image_entry.get("link", None) + self._cursor.execute( + "INSERT OR REPLACE INTO news_images (news_id, image_url, link_url) VALUES (?, ?, ?)", + (key, image_entry["image"], link_url), + ) + self._conn.commit() + + + def get_summary(self, key: str): + self._cursor.execute( + "SELECT headline, content FROM summarization WHERE id = ?", (key,) + ) + result = self._cursor.fetchone() + if result is None: + return None + return {"headline": result[0], "content": result[1]} + + def get_translation(self, key: str): + self._cursor.execute("SELECT result FROM translation WHERE id = ?", (key,)) + result = self._cursor.fetchone() + if result is None: + return None + return result[0] + + def get_wac_entry(self, key: str): + self._cursor.execute("SELECT isNews, type FROM wacplus WHERE id = ?", (key,)) + result = self._cursor.fetchone() + if result is None: + return None + is_news = True if result[0] == 1 else False + return is_news, result[1] + + def check_news_id_exists(self, key: str) -> bool: + """ + Check if a news entry with the given ID exists in the database. + + :param key: The ID of the news entry to check. + :return: True if the news entry exists, False otherwise. + """ + self._cursor.execute("SELECT 1 FROM news WHERE news_id = ?", (key,)) + result = self._cursor.fetchone() + return result is not None |
