diff options
| author | Pinapelz <yukais@pinapelz.com> | 2026-05-08 23:27:10 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2026-05-09 00:27:57 -0700 |
| commit | 2c6b4907d1e032ede762cb32708ededa0b7fd973 (patch) | |
| tree | a78d29a18ab4ee54a497c6102a00d0f47878ef9f | |
| parent | 2b1defbe646305d5ecc8681ce3fd861cb62ab404 (diff) | |
modularize GUI logic
| -rw-r--r-- | gui/gui.py | 20 | ||||
| -rw-r--r-- | gui/gui_common.py | 12 | ||||
| -rw-r--r-- | gui/gui_loading.py | 133 | ||||
| -rw-r--r-- | gui/gui_runtime_dashboard.py | 179 | ||||
| -rw-r--r-- | gui/gui_settings.py (renamed from gui.py) | 272 | ||||
| -rw-r--r-- | server.py | 30 |
6 files changed, 448 insertions, 198 deletions
diff --git a/gui/gui.py b/gui/gui.py new file mode 100644 index 0000000..1624fb7 --- /dev/null +++ b/gui/gui.py @@ -0,0 +1,20 @@ +from gui.gui_loading import StatusCallback, run_with_loading_popup +from gui.gui_runtime_dashboard import ( + AudioActivityProvider, + RuntimeLogLinesProvider, + SubtitleLinesProvider, + run_runtime_dashboard, +) +from gui.gui_settings import prompt_input_sample_rate, select_settings + + +__all__ = [ + "AudioActivityProvider", + "RuntimeLogLinesProvider", + "SubtitleLinesProvider", + "StatusCallback", + "prompt_input_sample_rate", + "run_runtime_dashboard", + "run_with_loading_popup", + "select_settings", +] diff --git a/gui/gui_common.py b/gui/gui_common.py new file mode 100644 index 0000000..39bb38c --- /dev/null +++ b/gui/gui_common.py @@ -0,0 +1,12 @@ +from typing import cast +from PySide6.QtGui import QFont +from PySide6.QtWidgets import QApplication + + +def ensure_qt_app() -> QApplication: + app = QApplication.instance() + if app is None: + app = QApplication([]) + app = cast(QApplication, app) + app.setFont(QFont("Calibri", 12)) + return app diff --git a/gui/gui_loading.py b/gui/gui_loading.py new file mode 100644 index 0000000..a94e512 --- /dev/null +++ b/gui/gui_loading.py @@ -0,0 +1,133 @@ +from typing import Any, Callable, List, Optional, Tuple, TypeVar, cast +from queue import Empty, Queue +import threading +import time + +from PySide6.QtCore import QTimer +from PySide6.QtWidgets import QDialog, QHBoxLayout, QLabel, QProgressBar, QVBoxLayout + +from gui.gui.gui_common import ensure_qt_app + + +T = TypeVar("T") +StatusCallback = Callable[[str], None] + + +class _LoadingDialog(QDialog): + def __init__(self, title: str, initial_message: str) -> None: + super().__init__() + self.setWindowTitle(title) + self.setModal(True) + self.setFixedWidth(440) + + layout = QVBoxLayout(self) + + self._spinner_frames: List[str] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"] + self._spinner_index = 0 + + spinner_row = QHBoxLayout() + self._spinner_label = QLabel(self._spinner_frames[0], self) + self._spinner_label.setStyleSheet("font-size: 20px; font-weight: 700; color: #8fd18f;") + spinner_row.addWidget(self._spinner_label) + + self._message_label = QLabel(initial_message, self) + self._message_label.setWordWrap(True) + self._message_label.setStyleSheet("font-size: 13px;") + spinner_row.addWidget(self._message_label, 1) + layout.addLayout(spinner_row) + + self._progress = QProgressBar(self) + self._progress.setRange(0, 0) + self._progress.setTextVisible(False) + layout.addWidget(self._progress) + + self._hint_label = QLabel("Please wait…", self) + self._hint_label.setStyleSheet("font-size: 12px; color: #9aa0a6;") + layout.addWidget(self._hint_label) + + self._spinner_timer = QTimer(self) + self._spinner_timer.setInterval(90) + self._spinner_timer.timeout.connect(self._tick_spinner) + self._spinner_timer.start() + + def _tick_spinner(self) -> None: + self._spinner_index = (self._spinner_index + 1) % len(self._spinner_frames) + self._spinner_label.setText(self._spinner_frames[self._spinner_index]) + + def set_message(self, message: str) -> None: + self._message_label.setText(message) + + +def run_with_loading_popup( + title: str, + initial_message: str, + task: Callable[[StatusCallback], T], +) -> T: + app = ensure_qt_app() + + dialog = _LoadingDialog(title=title, initial_message=initial_message) + events: Queue[Tuple[str, Any]] = Queue() + + def publish_status(message: str) -> None: + events.put(("status", message)) + + def worker() -> None: + try: + result = task(publish_status) + events.put(("result", result)) + except Exception as exc: + events.put(("error", exc)) + + thread = threading.Thread(target=worker, daemon=True) + thread.start() + + dialog.show() + dialog.raise_() + dialog.activateWindow() + + done = False + result_value: Optional[T] = None + error: Optional[Exception] = None + + while not done: + app.processEvents() + + while True: + try: + event_type, payload = events.get_nowait() + except Empty: + break + + if event_type == "status": + dialog.set_message(str(payload)) + elif event_type == "result": + result_value = cast(T, payload) + done = True + elif event_type == "error": + error = cast(Exception, payload) + done = True + + if thread.is_alive() and not done: + time.sleep(0.03) + continue + + if not thread.is_alive(): + try: + event_type, payload = events.get_nowait() + if event_type == "status": + dialog.set_message(str(payload)) + elif event_type == "result": + result_value = cast(T, payload) + elif event_type == "error": + error = cast(Exception, payload) + except Empty: + pass + done = True + + dialog.close() + app.processEvents() + + if error is not None: + raise error + + return cast(T, result_value) diff --git a/gui/gui_runtime_dashboard.py b/gui/gui_runtime_dashboard.py new file mode 100644 index 0000000..48c431e --- /dev/null +++ b/gui/gui_runtime_dashboard.py @@ -0,0 +1,179 @@ +from typing import Any, Callable, Dict, List + +from PySide6.QtCore import QTimer +from PySide6.QtWidgets import QGroupBox, QLabel, QTextEdit, QVBoxLayout, QWidget + +from gui.gui_common import ensure_qt_app + + +AudioActivityProvider = Callable[[], Dict[str, Any]] +RuntimeLogLinesProvider = Callable[[], List[str]] +SubtitleLinesProvider = Callable[[], List[str]] + + +class _RuntimeDashboard(QWidget): + def __init__( + self, + get_audio_activity: AudioActivityProvider, + get_runtime_logs: RuntimeLogLinesProvider, + get_subtitle_lines: SubtitleLinesProvider, + on_close: Callable[[], None], + ) -> None: + super().__init__() + self._get_audio_activity = get_audio_activity + self._get_runtime_logs = get_runtime_logs + self._get_subtitle_lines = get_subtitle_lines + self._on_close = on_close + self._closed = False + self._last_rendered_runtime_logs: str = "" + self._last_rendered_final_logs: str = "" + + self.setWindowTitle("auto-live-tl") + self.setMinimumSize(1100, 700) + + layout = QVBoxLayout(self) + + title = QLabel("auto-live-tl", self) + title.setStyleSheet("font-size: 22px; font-weight: 700; color: #000000;") + layout.addWidget(title) + + self.audio_indicator = QLabel("⚪ Idle", self) + self.audio_indicator.setStyleSheet("font-size: 16px; color: #b0b0b0; font-weight: 600;") + layout.addWidget(self.audio_indicator) + + self.audio_details = QLabel("RMS 0.00000 | threshold 0.00300", self) + self.audio_details.setStyleSheet("font-size: 13px; color: #9aa0a6;") + layout.addWidget(self.audio_details) + + raw_group = QGroupBox("Debug Log (It's recommended to fetch the final data via the SSE API, see the README)", self) + raw_group_layout = QVBoxLayout(raw_group) + + raw_title = QLabel("System / Raw Output", raw_group) + raw_group_layout.addWidget(raw_title) + + self.runtime_log_view = QTextEdit(raw_group) + self.runtime_log_view.setReadOnly(True) + self.runtime_log_view.setPlaceholderText("Waiting for raw Whisper output...") + self.runtime_log_view.setStyleSheet( + """ + QTextEdit { + background: #111417; + color: #d8dee9; + border: 1px solid #2f3742; + border-radius: 8px; + padding: 8px; + font-family: 'Consolas', 'Monaco', monospace; + font-size: 13px; + line-height: 1.4; + } + """ + ) + raw_group_layout.addWidget(self.runtime_log_view, 3) + + final_title = QLabel("Final (Sent via SSE)", raw_group) + raw_group_layout.addWidget(final_title) + + self.final_log_view = QTextEdit(raw_group) + self.final_log_view.setReadOnly(True) + self.final_log_view.setPlaceholderText("Waiting for FINAL output...") + self.final_log_view.setStyleSheet( + """ + QTextEdit { + background: #0f1410; + color: #dcf9dd; + border: 1px solid #2f4a35; + border-radius: 8px; + padding: 8px; + font-family: 'Consolas', 'Monaco', monospace; + font-size: 14px; + font-weight: 700; + line-height: 1.6; + } + """ + ) + raw_group_layout.addWidget(self.final_log_view, 2) + + layout.addWidget(raw_group, 1) + + self._timer = QTimer(self) + self._timer.setInterval(150) + self._timer.timeout.connect(self._refresh) + self._timer.start() + self._refresh() + + def _shutdown(self) -> None: + if self._closed: + return + self._closed = True + self._timer.stop() + try: + self._on_close() + except Exception: + pass + + def closeEvent(self, event: Any) -> None: # type: ignore[override] + self._shutdown() + super().closeEvent(event) + + def _refresh(self) -> None: + try: + activity = self._get_audio_activity() + except Exception: + activity = {} + + active = bool(activity.get("active", False)) + try: + rms = float(activity.get("rms", 0.0)) + except (TypeError, ValueError): + rms = 0.0 + try: + threshold = float(activity.get("threshold", 0.0)) + except (TypeError, ValueError): + threshold = 0.0 + + if active: + self.audio_indicator.setText("🟢 Audio detected") + self.audio_indicator.setStyleSheet("font-size: 16px; color: #8fd18f; font-weight: 600;") + else: + self.audio_indicator.setText("⚪ Idle") + self.audio_indicator.setStyleSheet("font-size: 16px; color: #b0b0b0; font-weight: 600;") + self.audio_details.setText(f"RMS {rms:.5f} | threshold {threshold:.5f}") + + try: + logs = self._get_runtime_logs() + except Exception: + logs = [] + runtime_lines = [line for line in logs if "[FINAL]" not in line] + final_lines = [line for line in logs if "[FINAL]" in line] + + joined_runtime_logs = "\n".join(runtime_lines) + if joined_runtime_logs != self._last_rendered_runtime_logs: + self._last_rendered_runtime_logs = joined_runtime_logs + self.runtime_log_view.setPlainText(joined_runtime_logs) + log_scroll = self.runtime_log_view.verticalScrollBar() + log_scroll.setValue(log_scroll.maximum()) + + joined_final_logs = "\n\n".join(final_lines) + if joined_final_logs != self._last_rendered_final_logs: + self._last_rendered_final_logs = joined_final_logs + self.final_log_view.setPlainText(joined_final_logs) + final_scroll = self.final_log_view.verticalScrollBar() + final_scroll.setValue(final_scroll.maximum()) + + +def run_runtime_dashboard( + get_audio_activity: AudioActivityProvider, + get_runtime_logs: RuntimeLogLinesProvider, + get_subtitle_lines: SubtitleLinesProvider, + on_close: Callable[[], None], +) -> None: + app = ensure_qt_app() + + dashboard = _RuntimeDashboard( + get_audio_activity=get_audio_activity, + get_runtime_logs=get_runtime_logs, + get_subtitle_lines=get_subtitle_lines, + on_close=on_close, + ) + dashboard.show() + app.exec() diff --git a/gui.py b/gui/gui_settings.py index d112469..c6d98b5 100644 --- a/gui.py +++ b/gui/gui_settings.py @@ -1,11 +1,10 @@ -from typing import Iterable, List, Tuple, Dict, Any, Callable, cast, Optional +from typing import Iterable, List, Tuple, Dict, Any, Optional import time + import numpy as np import sounddevice as sd -from PySide6.QtGui import QFont from PySide6.QtCore import Qt, QTimer from PySide6.QtWidgets import ( - QApplication, QCheckBox, QComboBox, QDialog, @@ -18,11 +17,13 @@ from PySide6.QtWidgets import ( QLineEdit, QMessageBox, QTabWidget, - QTextEdit, QVBoxLayout, QWidget, ) +from gui.gui_common import ensure_qt_app + + class _SettingsDialog(QDialog): def __init__( self, @@ -184,6 +185,46 @@ class _SettingsDialog(QDialog): ollama_tab_layout.addWidget(ollama_advanced_group) tabs.addTab(ollama_tab, "Ollama") + # OpenAI Realtime tab + openai_tab = QWidget(self) + openai_tab_layout = QVBoxLayout(openai_tab) + + openai_layout = QFormLayout() + openai_layout.setLabelAlignment(Qt.AlignmentFlag.AlignLeft) + + self.use_openai_realtime_checkbox = QCheckBox(openai_tab) + self.use_openai_realtime_checkbox.setChecked(bool(get_value("use_openai_realtime_translate", False))) + openai_layout.addRow(QLabel("Use OpenAI realtime translation:"), self.use_openai_realtime_checkbox) + + self.openai_api_key_edit = QLineEdit(str(get_value("openai_api_key", "")), openai_tab) + self.openai_api_key_edit.setEchoMode(QLineEdit.EchoMode.Password) + self.openai_api_key_edit.setPlaceholderText("sk-...") + openai_layout.addRow(QLabel("OpenAI API key:"), self.openai_api_key_edit) + + self.openai_output_language_edit = QLineEdit(str(get_value("openai_output_language", "es")), openai_tab) + self.openai_output_language_edit.setPlaceholderText("es") + openai_layout.addRow(QLabel("Target language code:"), self.openai_output_language_edit) + + self.openai_model_edit = QLineEdit(str(get_value("openai_model", "gpt-realtime-translate")), openai_tab) + self.openai_model_edit.setPlaceholderText("gpt-realtime-translate") + openai_layout.addRow(QLabel("Realtime model:"), self.openai_model_edit) + + self.openai_safety_identifier_edit = QLineEdit(str(get_value("openai_safety_identifier", "")), openai_tab) + self.openai_safety_identifier_edit.setPlaceholderText("optional hashed-user-id") + openai_layout.addRow(QLabel("OpenAI-Safety-Identifier (optional):"), self.openai_safety_identifier_edit) + + openai_tab_layout.addLayout(openai_layout) + + self.openai_hint_label = QLabel( + "When enabled, source audio is streamed to OpenAI /v1/realtime/translations and subtitle SSE events are produced from realtime transcript output. Ollama cleanup is bypassed.", + openai_tab, + ) + self.openai_hint_label.setWordWrap(True) + self.openai_hint_label.setStyleSheet("font-size: 12px; color: #9aa0a6;") + openai_tab_layout.addWidget(self.openai_hint_label) + + tabs.addTab(openai_tab, "OpenAI Realtime") + button_layout = QHBoxLayout() root_layout.addLayout(button_layout) button_box = QDialogButtonBox( @@ -196,6 +237,7 @@ class _SettingsDialog(QDialog): self.device_combo.currentIndexChanged.connect(self._restart_monitor_stream) self.audio_activity_threshold_edit.textChanged.connect(self._on_threshold_changed) + self.use_openai_realtime_checkbox.toggled.connect(self._sync_backend_controls) self._monitor_timer = QTimer(self) self._monitor_timer.setInterval(120) @@ -203,6 +245,7 @@ class _SettingsDialog(QDialog): self._monitor_timer.start() self._restart_monitor_stream() + self._sync_backend_controls(self.use_openai_realtime_checkbox.isChecked()) self._refresh_audio_indicator() def _warn(self, title: str, text: str) -> None: @@ -216,6 +259,21 @@ class _SettingsDialog(QDialog): except ValueError: pass + def _sync_backend_controls(self, use_openai: bool) -> None: + self.openai_api_key_edit.setEnabled(use_openai) + self.openai_output_language_edit.setEnabled(use_openai) + self.openai_model_edit.setEnabled(use_openai) + self.openai_safety_identifier_edit.setEnabled(use_openai) + + self.use_ollama_cleanup_checkbox.setEnabled(not use_openai) + self.ollama_device_combo.setEnabled(not use_openai) + self.ollama_model_edit.setEnabled(not use_openai) + self.ollama_context_edit.setEnabled(not use_openai) + self.ollama_batch_edit.setEnabled(not use_openai) + + if use_openai: + self.use_ollama_cleanup_checkbox.setChecked(False) + def _pick_monitor_sample_rate(self, device_index: int, preferred_rate: int) -> Optional[int]: common_rates: List[int] = [48000, 44100, 32000, 24000, 22050, 16000, 12000, 8000] tried = set() @@ -372,6 +430,19 @@ class _SettingsDialog(QDialog): self._warn("Invalid batch size", "Batch size must be a positive integer.") return + use_openai_realtime = self.use_openai_realtime_checkbox.isChecked() + openai_api_key = self.openai_api_key_edit.text().strip() + openai_output_language = self.openai_output_language_edit.text().strip() + openai_model = self.openai_model_edit.text().strip() or "gpt-realtime-translate" + openai_safety_identifier = self.openai_safety_identifier_edit.text().strip() + + if use_openai_realtime and not openai_api_key: + self._warn("OpenAI API key required", "Please provide your OpenAI API key to use realtime translation.") + return + if use_openai_realtime and not openai_output_language: + self._warn("Target language required", "Please provide a target language code (example: es, fr, ja).") + return + self.selected_settings = { "audio_device_name": self.device_names[selection], "model_name": model_name, @@ -383,11 +454,16 @@ class _SettingsDialog(QDialog): "context_seconds": context_seconds, "update_interval_seconds": update_interval_seconds, "audio_activity_threshold": audio_activity_threshold, - "use_ollama_cleanup": self.use_ollama_cleanup_checkbox.isChecked(), + "use_ollama_cleanup": self.use_ollama_cleanup_checkbox.isChecked() and not use_openai_realtime, "ollama_device": self.ollama_device_combo.currentText(), "ollama_model": self.ollama_model_edit.text().strip(), "ollama_context_window": ollama_context_window, "ollama_raw_batch_size": ollama_raw_batch_size, + "use_openai_realtime_translate": use_openai_realtime, + "openai_api_key": openai_api_key, + "openai_output_language": openai_output_language or "es", + "openai_model": openai_model, + "openai_safety_identifier": openai_safety_identifier, } self._monitor_timer.stop() self._stop_monitor_stream() @@ -411,11 +487,7 @@ def select_settings( if not input_devices: raise RuntimeError("No audio input devices found.") - app = QApplication.instance() - if app is None: - app = QApplication([]) - app = cast(QApplication, app) - app.setFont(QFont("Calibri", 12)) + ensure_qt_app() dialog = _SettingsDialog( settings=settings, @@ -433,186 +505,8 @@ def select_settings( return dialog.selected_settings -AudioActivityProvider = Callable[[], Dict[str, Any]] -RuntimeLogLinesProvider = Callable[[], List[str]] -SubtitleLinesProvider = Callable[[], List[str]] - - -class _RuntimeDashboard(QWidget): - def __init__( - self, - get_audio_activity: AudioActivityProvider, - get_runtime_logs: RuntimeLogLinesProvider, - get_subtitle_lines: SubtitleLinesProvider, - on_close: Callable[[], None], - ) -> None: - super().__init__() - self._get_audio_activity = get_audio_activity - self._get_runtime_logs = get_runtime_logs - self._get_subtitle_lines = get_subtitle_lines - self._on_close = on_close - self._closed = False - self._last_rendered_runtime_logs: str = "" - self._last_rendered_final_logs: str = "" - - self.setWindowTitle("auto-live-tl") - self.setMinimumSize(1100, 700) - - layout = QVBoxLayout(self) - - title = QLabel("auto-live-tl", self) - title.setStyleSheet("font-size: 22px; font-weight: 700; color: #000000;") - layout.addWidget(title) - - self.audio_indicator = QLabel("⚪ Idle", self) - self.audio_indicator.setStyleSheet("font-size: 16px; color: #b0b0b0; font-weight: 600;") - layout.addWidget(self.audio_indicator) - - self.audio_details = QLabel("RMS 0.00000 | threshold 0.00300", self) - self.audio_details.setStyleSheet("font-size: 13px; color: #9aa0a6;") - layout.addWidget(self.audio_details) - - raw_group = QGroupBox("Debug Log (It's recommended to fetch the final data via the SSE API, see the README)", self) - raw_group_layout = QVBoxLayout(raw_group) - - raw_title = QLabel("System / Raw Output", raw_group) - raw_group_layout.addWidget(raw_title) - - self.runtime_log_view = QTextEdit(raw_group) - self.runtime_log_view.setReadOnly(True) - self.runtime_log_view.setPlaceholderText("Waiting for raw Whisper output...") - self.runtime_log_view.setStyleSheet( - """ - QTextEdit { - background: #111417; - color: #d8dee9; - border: 1px solid #2f3742; - border-radius: 8px; - padding: 8px; - font-family: 'Consolas', 'Monaco', monospace; - font-size: 13px; - line-height: 1.4; - } - """ - ) - raw_group_layout.addWidget(self.runtime_log_view, 3) - - final_title = QLabel("Final (Sent via SSE)", raw_group) - raw_group_layout.addWidget(final_title) - - self.final_log_view = QTextEdit(raw_group) - self.final_log_view.setReadOnly(True) - self.final_log_view.setPlaceholderText("Waiting for FINAL output...") - self.final_log_view.setStyleSheet( - """ - QTextEdit { - background: #0f1410; - color: #dcf9dd; - border: 1px solid #2f4a35; - border-radius: 8px; - padding: 8px; - font-family: 'Consolas', 'Monaco', monospace; - font-size: 14px; - font-weight: 700; - line-height: 1.6; - } - """ - ) - raw_group_layout.addWidget(self.final_log_view, 2) - - layout.addWidget(raw_group, 1) - - self._timer = QTimer(self) - self._timer.setInterval(150) - self._timer.timeout.connect(self._refresh) - self._timer.start() - self._refresh() - - def _shutdown(self) -> None: - if self._closed: - return - self._closed = True - self._timer.stop() - try: - self._on_close() - except Exception: - pass - - def closeEvent(self, event: Any) -> None: # type: ignore[override] - self._shutdown() - super().closeEvent(event) - - def _refresh(self) -> None: - try: - activity = self._get_audio_activity() - except Exception: - activity = {} - - active = bool(activity.get("active", False)) - try: - rms = float(activity.get("rms", 0.0)) - except (TypeError, ValueError): - rms = 0.0 - try: - threshold = float(activity.get("threshold", 0.0)) - except (TypeError, ValueError): - threshold = 0.0 - - if active: - self.audio_indicator.setText("🟢 Audio detected") - self.audio_indicator.setStyleSheet("font-size: 16px; color: #8fd18f; font-weight: 600;") - else: - self.audio_indicator.setText("⚪ Idle") - self.audio_indicator.setStyleSheet("font-size: 16px; color: #b0b0b0; font-weight: 600;") - self.audio_details.setText(f"RMS {rms:.5f} | threshold {threshold:.5f}") - - try: - logs = self._get_runtime_logs() - except Exception: - logs = [] - runtime_lines = [line for line in logs if "[FINAL]" not in line] - final_lines = [line for line in logs if "[FINAL]" in line] - - joined_runtime_logs = "\n".join(runtime_lines) - if joined_runtime_logs != self._last_rendered_runtime_logs: - self._last_rendered_runtime_logs = joined_runtime_logs - self.runtime_log_view.setPlainText(joined_runtime_logs) - log_scroll = self.runtime_log_view.verticalScrollBar() - log_scroll.setValue(log_scroll.maximum()) - - joined_final_logs = "\n\n".join(final_lines) - if joined_final_logs != self._last_rendered_final_logs: - self._last_rendered_final_logs = joined_final_logs - self.final_log_view.setPlainText(joined_final_logs) - final_scroll = self.final_log_view.verticalScrollBar() - final_scroll.setValue(final_scroll.maximum()) - - - - -def run_runtime_dashboard( - get_audio_activity: AudioActivityProvider, - get_runtime_logs: RuntimeLogLinesProvider, - get_subtitle_lines: SubtitleLinesProvider, - on_close: Callable[[], None], -) -> None: - app = QApplication.instance() - if app is None: - app = QApplication([]) - app = cast(QApplication, app) - app.setFont(QFont("Calibri", 12)) - - dashboard = _RuntimeDashboard( - get_audio_activity=get_audio_activity, - get_runtime_logs=get_runtime_logs, - get_subtitle_lines=get_subtitle_lines, - on_close=on_close, - ) - dashboard.show() - app.exec() - - def prompt_input_sample_rate(device_index: int, common_rates: Iterable[int]) -> int: + ensure_qt_app() rates = list(common_rates) while True: prompt = ( @@ -5,7 +5,7 @@ import queue import os from collections import Counter, deque import re -from typing import Any, Deque, Dict, Optional, Set, List, Iterator +from typing import Any, Deque, Dict, Optional, Set, List, Iterator, Callable from flask import Flask from flask_cors import CORS import ollama as _ollama @@ -14,7 +14,7 @@ from ollama import ChatResponse import numpy as np import sounddevice as sd from faster_whisper import WhisperModel -from gui import select_settings, prompt_input_sample_rate, run_runtime_dashboard +from gui import select_settings, prompt_input_sample_rate, run_runtime_dashboard, run_with_loading_popup from routes import register_routes from config import _SYSTEM_PROMPT, _LLM_EMPTY_SENTINELS, _HALLUCINATION_PHRASES @@ -162,34 +162,42 @@ def cleanup_subtitle_with_ollama(raw_text: str, context: List[str]) -> Optional[ return None -def ensure_ollama_ready() -> None: +def ensure_ollama_ready(status_callback: Optional[Callable[[str], None]] = None) -> None: """ Pulls Ollama model is necessary, checks model is downloaded """ + def report(message: str) -> None: + print(message) + if status_callback is not None: + status_callback(message.strip()) + + report("Checking Ollama server availability...") try: local = _ollama.list() except Exception as exc: raise RuntimeError( f"Cannot reach Ollama — is the server running? ({exc})" ) from exc + model_names: List[str] = [m.model for m in local.models] if not any(name.startswith(OLLAMA_MODEL) for name in model_names): - print(f" '{OLLAMA_MODEL}' not found locally — pulling (this may take a while) ...") + report(f"Model '{OLLAMA_MODEL}' not found locally. Pulling now (this can take a while)...") try: _ollama.pull(OLLAMA_MODEL) - print(" Pull complete.") + report("Model pull complete.") except Exception as exc: raise RuntimeError(f"Failed to pull model '{OLLAMA_MODEL}': {exc}") from exc else: - print(f" Model found locally.") - print(" Warming up model, almost done ...") + report("Model found locally.") + + report("Warming up Ollama model...") try: chat( model=OLLAMA_MODEL, messages=[{"role": "user", "content": "Ready?"}], options=OLLAMA_OPTIONS, ) - print(" ✅ Ollama is ready.") + report("✅ Ollama is ready.") except Exception as exc: raise RuntimeError(f"Ollama warm-up failed: {exc}") from exc @@ -544,7 +552,11 @@ def main() -> None: subtitle_context = deque(maxlen=OLLAMA_CONTEXT_WINDOW) RAW_BATCH_SIZE = int(settings.get("ollama_raw_batch_size", 3)) if USE_OLLAMA_CLEANUP: - ensure_ollama_ready() + run_with_loading_popup( + title="Preparing Ollama model", + initial_message="Checking model availability...", + task=ensure_ollama_ready, + ) llm_thread = threading.Thread(target=llm_processing_loop, daemon=True) llm_thread.start() |
