aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--gui/gui.py20
-rw-r--r--gui/gui_common.py12
-rw-r--r--gui/gui_loading.py133
-rw-r--r--gui/gui_runtime_dashboard.py179
-rw-r--r--gui/gui_settings.py (renamed from gui.py)272
-rw-r--r--server.py30
6 files changed, 448 insertions, 198 deletions
diff --git a/gui/gui.py b/gui/gui.py
new file mode 100644
index 0000000..1624fb7
--- /dev/null
+++ b/gui/gui.py
@@ -0,0 +1,20 @@
+from gui.gui_loading import StatusCallback, run_with_loading_popup
+from gui.gui_runtime_dashboard import (
+ AudioActivityProvider,
+ RuntimeLogLinesProvider,
+ SubtitleLinesProvider,
+ run_runtime_dashboard,
+)
+from gui.gui_settings import prompt_input_sample_rate, select_settings
+
+
+__all__ = [
+ "AudioActivityProvider",
+ "RuntimeLogLinesProvider",
+ "SubtitleLinesProvider",
+ "StatusCallback",
+ "prompt_input_sample_rate",
+ "run_runtime_dashboard",
+ "run_with_loading_popup",
+ "select_settings",
+]
diff --git a/gui/gui_common.py b/gui/gui_common.py
new file mode 100644
index 0000000..39bb38c
--- /dev/null
+++ b/gui/gui_common.py
@@ -0,0 +1,12 @@
+from typing import cast
+from PySide6.QtGui import QFont
+from PySide6.QtWidgets import QApplication
+
+
+def ensure_qt_app() -> QApplication:
+ app = QApplication.instance()
+ if app is None:
+ app = QApplication([])
+ app = cast(QApplication, app)
+ app.setFont(QFont("Calibri", 12))
+ return app
diff --git a/gui/gui_loading.py b/gui/gui_loading.py
new file mode 100644
index 0000000..a94e512
--- /dev/null
+++ b/gui/gui_loading.py
@@ -0,0 +1,133 @@
+from typing import Any, Callable, List, Optional, Tuple, TypeVar, cast
+from queue import Empty, Queue
+import threading
+import time
+
+from PySide6.QtCore import QTimer
+from PySide6.QtWidgets import QDialog, QHBoxLayout, QLabel, QProgressBar, QVBoxLayout
+
+from gui.gui.gui_common import ensure_qt_app
+
+
+T = TypeVar("T")
+StatusCallback = Callable[[str], None]
+
+
+class _LoadingDialog(QDialog):
+ def __init__(self, title: str, initial_message: str) -> None:
+ super().__init__()
+ self.setWindowTitle(title)
+ self.setModal(True)
+ self.setFixedWidth(440)
+
+ layout = QVBoxLayout(self)
+
+ self._spinner_frames: List[str] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
+ self._spinner_index = 0
+
+ spinner_row = QHBoxLayout()
+ self._spinner_label = QLabel(self._spinner_frames[0], self)
+ self._spinner_label.setStyleSheet("font-size: 20px; font-weight: 700; color: #8fd18f;")
+ spinner_row.addWidget(self._spinner_label)
+
+ self._message_label = QLabel(initial_message, self)
+ self._message_label.setWordWrap(True)
+ self._message_label.setStyleSheet("font-size: 13px;")
+ spinner_row.addWidget(self._message_label, 1)
+ layout.addLayout(spinner_row)
+
+ self._progress = QProgressBar(self)
+ self._progress.setRange(0, 0)
+ self._progress.setTextVisible(False)
+ layout.addWidget(self._progress)
+
+ self._hint_label = QLabel("Please wait…", self)
+ self._hint_label.setStyleSheet("font-size: 12px; color: #9aa0a6;")
+ layout.addWidget(self._hint_label)
+
+ self._spinner_timer = QTimer(self)
+ self._spinner_timer.setInterval(90)
+ self._spinner_timer.timeout.connect(self._tick_spinner)
+ self._spinner_timer.start()
+
+ def _tick_spinner(self) -> None:
+ self._spinner_index = (self._spinner_index + 1) % len(self._spinner_frames)
+ self._spinner_label.setText(self._spinner_frames[self._spinner_index])
+
+ def set_message(self, message: str) -> None:
+ self._message_label.setText(message)
+
+
+def run_with_loading_popup(
+ title: str,
+ initial_message: str,
+ task: Callable[[StatusCallback], T],
+) -> T:
+ app = ensure_qt_app()
+
+ dialog = _LoadingDialog(title=title, initial_message=initial_message)
+ events: Queue[Tuple[str, Any]] = Queue()
+
+ def publish_status(message: str) -> None:
+ events.put(("status", message))
+
+ def worker() -> None:
+ try:
+ result = task(publish_status)
+ events.put(("result", result))
+ except Exception as exc:
+ events.put(("error", exc))
+
+ thread = threading.Thread(target=worker, daemon=True)
+ thread.start()
+
+ dialog.show()
+ dialog.raise_()
+ dialog.activateWindow()
+
+ done = False
+ result_value: Optional[T] = None
+ error: Optional[Exception] = None
+
+ while not done:
+ app.processEvents()
+
+ while True:
+ try:
+ event_type, payload = events.get_nowait()
+ except Empty:
+ break
+
+ if event_type == "status":
+ dialog.set_message(str(payload))
+ elif event_type == "result":
+ result_value = cast(T, payload)
+ done = True
+ elif event_type == "error":
+ error = cast(Exception, payload)
+ done = True
+
+ if thread.is_alive() and not done:
+ time.sleep(0.03)
+ continue
+
+ if not thread.is_alive():
+ try:
+ event_type, payload = events.get_nowait()
+ if event_type == "status":
+ dialog.set_message(str(payload))
+ elif event_type == "result":
+ result_value = cast(T, payload)
+ elif event_type == "error":
+ error = cast(Exception, payload)
+ except Empty:
+ pass
+ done = True
+
+ dialog.close()
+ app.processEvents()
+
+ if error is not None:
+ raise error
+
+ return cast(T, result_value)
diff --git a/gui/gui_runtime_dashboard.py b/gui/gui_runtime_dashboard.py
new file mode 100644
index 0000000..48c431e
--- /dev/null
+++ b/gui/gui_runtime_dashboard.py
@@ -0,0 +1,179 @@
+from typing import Any, Callable, Dict, List
+
+from PySide6.QtCore import QTimer
+from PySide6.QtWidgets import QGroupBox, QLabel, QTextEdit, QVBoxLayout, QWidget
+
+from gui.gui_common import ensure_qt_app
+
+
+AudioActivityProvider = Callable[[], Dict[str, Any]]
+RuntimeLogLinesProvider = Callable[[], List[str]]
+SubtitleLinesProvider = Callable[[], List[str]]
+
+
+class _RuntimeDashboard(QWidget):
+ def __init__(
+ self,
+ get_audio_activity: AudioActivityProvider,
+ get_runtime_logs: RuntimeLogLinesProvider,
+ get_subtitle_lines: SubtitleLinesProvider,
+ on_close: Callable[[], None],
+ ) -> None:
+ super().__init__()
+ self._get_audio_activity = get_audio_activity
+ self._get_runtime_logs = get_runtime_logs
+ self._get_subtitle_lines = get_subtitle_lines
+ self._on_close = on_close
+ self._closed = False
+ self._last_rendered_runtime_logs: str = ""
+ self._last_rendered_final_logs: str = ""
+
+ self.setWindowTitle("auto-live-tl")
+ self.setMinimumSize(1100, 700)
+
+ layout = QVBoxLayout(self)
+
+ title = QLabel("auto-live-tl", self)
+ title.setStyleSheet("font-size: 22px; font-weight: 700; color: #000000;")
+ layout.addWidget(title)
+
+ self.audio_indicator = QLabel("⚪ Idle", self)
+ self.audio_indicator.setStyleSheet("font-size: 16px; color: #b0b0b0; font-weight: 600;")
+ layout.addWidget(self.audio_indicator)
+
+ self.audio_details = QLabel("RMS 0.00000 | threshold 0.00300", self)
+ self.audio_details.setStyleSheet("font-size: 13px; color: #9aa0a6;")
+ layout.addWidget(self.audio_details)
+
+ raw_group = QGroupBox("Debug Log (It's recommended to fetch the final data via the SSE API, see the README)", self)
+ raw_group_layout = QVBoxLayout(raw_group)
+
+ raw_title = QLabel("System / Raw Output", raw_group)
+ raw_group_layout.addWidget(raw_title)
+
+ self.runtime_log_view = QTextEdit(raw_group)
+ self.runtime_log_view.setReadOnly(True)
+ self.runtime_log_view.setPlaceholderText("Waiting for raw Whisper output...")
+ self.runtime_log_view.setStyleSheet(
+ """
+ QTextEdit {
+ background: #111417;
+ color: #d8dee9;
+ border: 1px solid #2f3742;
+ border-radius: 8px;
+ padding: 8px;
+ font-family: 'Consolas', 'Monaco', monospace;
+ font-size: 13px;
+ line-height: 1.4;
+ }
+ """
+ )
+ raw_group_layout.addWidget(self.runtime_log_view, 3)
+
+ final_title = QLabel("Final (Sent via SSE)", raw_group)
+ raw_group_layout.addWidget(final_title)
+
+ self.final_log_view = QTextEdit(raw_group)
+ self.final_log_view.setReadOnly(True)
+ self.final_log_view.setPlaceholderText("Waiting for FINAL output...")
+ self.final_log_view.setStyleSheet(
+ """
+ QTextEdit {
+ background: #0f1410;
+ color: #dcf9dd;
+ border: 1px solid #2f4a35;
+ border-radius: 8px;
+ padding: 8px;
+ font-family: 'Consolas', 'Monaco', monospace;
+ font-size: 14px;
+ font-weight: 700;
+ line-height: 1.6;
+ }
+ """
+ )
+ raw_group_layout.addWidget(self.final_log_view, 2)
+
+ layout.addWidget(raw_group, 1)
+
+ self._timer = QTimer(self)
+ self._timer.setInterval(150)
+ self._timer.timeout.connect(self._refresh)
+ self._timer.start()
+ self._refresh()
+
+ def _shutdown(self) -> None:
+ if self._closed:
+ return
+ self._closed = True
+ self._timer.stop()
+ try:
+ self._on_close()
+ except Exception:
+ pass
+
+ def closeEvent(self, event: Any) -> None: # type: ignore[override]
+ self._shutdown()
+ super().closeEvent(event)
+
+ def _refresh(self) -> None:
+ try:
+ activity = self._get_audio_activity()
+ except Exception:
+ activity = {}
+
+ active = bool(activity.get("active", False))
+ try:
+ rms = float(activity.get("rms", 0.0))
+ except (TypeError, ValueError):
+ rms = 0.0
+ try:
+ threshold = float(activity.get("threshold", 0.0))
+ except (TypeError, ValueError):
+ threshold = 0.0
+
+ if active:
+ self.audio_indicator.setText("🟢 Audio detected")
+ self.audio_indicator.setStyleSheet("font-size: 16px; color: #8fd18f; font-weight: 600;")
+ else:
+ self.audio_indicator.setText("⚪ Idle")
+ self.audio_indicator.setStyleSheet("font-size: 16px; color: #b0b0b0; font-weight: 600;")
+ self.audio_details.setText(f"RMS {rms:.5f} | threshold {threshold:.5f}")
+
+ try:
+ logs = self._get_runtime_logs()
+ except Exception:
+ logs = []
+ runtime_lines = [line for line in logs if "[FINAL]" not in line]
+ final_lines = [line for line in logs if "[FINAL]" in line]
+
+ joined_runtime_logs = "\n".join(runtime_lines)
+ if joined_runtime_logs != self._last_rendered_runtime_logs:
+ self._last_rendered_runtime_logs = joined_runtime_logs
+ self.runtime_log_view.setPlainText(joined_runtime_logs)
+ log_scroll = self.runtime_log_view.verticalScrollBar()
+ log_scroll.setValue(log_scroll.maximum())
+
+ joined_final_logs = "\n\n".join(final_lines)
+ if joined_final_logs != self._last_rendered_final_logs:
+ self._last_rendered_final_logs = joined_final_logs
+ self.final_log_view.setPlainText(joined_final_logs)
+ final_scroll = self.final_log_view.verticalScrollBar()
+ final_scroll.setValue(final_scroll.maximum())
+
+
+def run_runtime_dashboard(
+ get_audio_activity: AudioActivityProvider,
+ get_runtime_logs: RuntimeLogLinesProvider,
+ get_subtitle_lines: SubtitleLinesProvider,
+ on_close: Callable[[], None],
+) -> None:
+ app = ensure_qt_app()
+
+ dashboard = _RuntimeDashboard(
+ get_audio_activity=get_audio_activity,
+ get_runtime_logs=get_runtime_logs,
+ get_subtitle_lines=get_subtitle_lines,
+ on_close=on_close,
+ )
+ dashboard.show()
+ app.exec()
diff --git a/gui.py b/gui/gui_settings.py
index d112469..c6d98b5 100644
--- a/gui.py
+++ b/gui/gui_settings.py
@@ -1,11 +1,10 @@
-from typing import Iterable, List, Tuple, Dict, Any, Callable, cast, Optional
+from typing import Iterable, List, Tuple, Dict, Any, Optional
import time
+
import numpy as np
import sounddevice as sd
-from PySide6.QtGui import QFont
from PySide6.QtCore import Qt, QTimer
from PySide6.QtWidgets import (
- QApplication,
QCheckBox,
QComboBox,
QDialog,
@@ -18,11 +17,13 @@ from PySide6.QtWidgets import (
QLineEdit,
QMessageBox,
QTabWidget,
- QTextEdit,
QVBoxLayout,
QWidget,
)
+from gui.gui_common import ensure_qt_app
+
+
class _SettingsDialog(QDialog):
def __init__(
self,
@@ -184,6 +185,46 @@ class _SettingsDialog(QDialog):
ollama_tab_layout.addWidget(ollama_advanced_group)
tabs.addTab(ollama_tab, "Ollama")
+ # OpenAI Realtime tab
+ openai_tab = QWidget(self)
+ openai_tab_layout = QVBoxLayout(openai_tab)
+
+ openai_layout = QFormLayout()
+ openai_layout.setLabelAlignment(Qt.AlignmentFlag.AlignLeft)
+
+ self.use_openai_realtime_checkbox = QCheckBox(openai_tab)
+ self.use_openai_realtime_checkbox.setChecked(bool(get_value("use_openai_realtime_translate", False)))
+ openai_layout.addRow(QLabel("Use OpenAI realtime translation:"), self.use_openai_realtime_checkbox)
+
+ self.openai_api_key_edit = QLineEdit(str(get_value("openai_api_key", "")), openai_tab)
+ self.openai_api_key_edit.setEchoMode(QLineEdit.EchoMode.Password)
+ self.openai_api_key_edit.setPlaceholderText("sk-...")
+ openai_layout.addRow(QLabel("OpenAI API key:"), self.openai_api_key_edit)
+
+ self.openai_output_language_edit = QLineEdit(str(get_value("openai_output_language", "es")), openai_tab)
+ self.openai_output_language_edit.setPlaceholderText("es")
+ openai_layout.addRow(QLabel("Target language code:"), self.openai_output_language_edit)
+
+ self.openai_model_edit = QLineEdit(str(get_value("openai_model", "gpt-realtime-translate")), openai_tab)
+ self.openai_model_edit.setPlaceholderText("gpt-realtime-translate")
+ openai_layout.addRow(QLabel("Realtime model:"), self.openai_model_edit)
+
+ self.openai_safety_identifier_edit = QLineEdit(str(get_value("openai_safety_identifier", "")), openai_tab)
+ self.openai_safety_identifier_edit.setPlaceholderText("optional hashed-user-id")
+ openai_layout.addRow(QLabel("OpenAI-Safety-Identifier (optional):"), self.openai_safety_identifier_edit)
+
+ openai_tab_layout.addLayout(openai_layout)
+
+ self.openai_hint_label = QLabel(
+ "When enabled, source audio is streamed to OpenAI /v1/realtime/translations and subtitle SSE events are produced from realtime transcript output. Ollama cleanup is bypassed.",
+ openai_tab,
+ )
+ self.openai_hint_label.setWordWrap(True)
+ self.openai_hint_label.setStyleSheet("font-size: 12px; color: #9aa0a6;")
+ openai_tab_layout.addWidget(self.openai_hint_label)
+
+ tabs.addTab(openai_tab, "OpenAI Realtime")
+
button_layout = QHBoxLayout()
root_layout.addLayout(button_layout)
button_box = QDialogButtonBox(
@@ -196,6 +237,7 @@ class _SettingsDialog(QDialog):
self.device_combo.currentIndexChanged.connect(self._restart_monitor_stream)
self.audio_activity_threshold_edit.textChanged.connect(self._on_threshold_changed)
+ self.use_openai_realtime_checkbox.toggled.connect(self._sync_backend_controls)
self._monitor_timer = QTimer(self)
self._monitor_timer.setInterval(120)
@@ -203,6 +245,7 @@ class _SettingsDialog(QDialog):
self._monitor_timer.start()
self._restart_monitor_stream()
+ self._sync_backend_controls(self.use_openai_realtime_checkbox.isChecked())
self._refresh_audio_indicator()
def _warn(self, title: str, text: str) -> None:
@@ -216,6 +259,21 @@ class _SettingsDialog(QDialog):
except ValueError:
pass
+ def _sync_backend_controls(self, use_openai: bool) -> None:
+ self.openai_api_key_edit.setEnabled(use_openai)
+ self.openai_output_language_edit.setEnabled(use_openai)
+ self.openai_model_edit.setEnabled(use_openai)
+ self.openai_safety_identifier_edit.setEnabled(use_openai)
+
+ self.use_ollama_cleanup_checkbox.setEnabled(not use_openai)
+ self.ollama_device_combo.setEnabled(not use_openai)
+ self.ollama_model_edit.setEnabled(not use_openai)
+ self.ollama_context_edit.setEnabled(not use_openai)
+ self.ollama_batch_edit.setEnabled(not use_openai)
+
+ if use_openai:
+ self.use_ollama_cleanup_checkbox.setChecked(False)
+
def _pick_monitor_sample_rate(self, device_index: int, preferred_rate: int) -> Optional[int]:
common_rates: List[int] = [48000, 44100, 32000, 24000, 22050, 16000, 12000, 8000]
tried = set()
@@ -372,6 +430,19 @@ class _SettingsDialog(QDialog):
self._warn("Invalid batch size", "Batch size must be a positive integer.")
return
+ use_openai_realtime = self.use_openai_realtime_checkbox.isChecked()
+ openai_api_key = self.openai_api_key_edit.text().strip()
+ openai_output_language = self.openai_output_language_edit.text().strip()
+ openai_model = self.openai_model_edit.text().strip() or "gpt-realtime-translate"
+ openai_safety_identifier = self.openai_safety_identifier_edit.text().strip()
+
+ if use_openai_realtime and not openai_api_key:
+ self._warn("OpenAI API key required", "Please provide your OpenAI API key to use realtime translation.")
+ return
+ if use_openai_realtime and not openai_output_language:
+ self._warn("Target language required", "Please provide a target language code (example: es, fr, ja).")
+ return
+
self.selected_settings = {
"audio_device_name": self.device_names[selection],
"model_name": model_name,
@@ -383,11 +454,16 @@ class _SettingsDialog(QDialog):
"context_seconds": context_seconds,
"update_interval_seconds": update_interval_seconds,
"audio_activity_threshold": audio_activity_threshold,
- "use_ollama_cleanup": self.use_ollama_cleanup_checkbox.isChecked(),
+ "use_ollama_cleanup": self.use_ollama_cleanup_checkbox.isChecked() and not use_openai_realtime,
"ollama_device": self.ollama_device_combo.currentText(),
"ollama_model": self.ollama_model_edit.text().strip(),
"ollama_context_window": ollama_context_window,
"ollama_raw_batch_size": ollama_raw_batch_size,
+ "use_openai_realtime_translate": use_openai_realtime,
+ "openai_api_key": openai_api_key,
+ "openai_output_language": openai_output_language or "es",
+ "openai_model": openai_model,
+ "openai_safety_identifier": openai_safety_identifier,
}
self._monitor_timer.stop()
self._stop_monitor_stream()
@@ -411,11 +487,7 @@ def select_settings(
if not input_devices:
raise RuntimeError("No audio input devices found.")
- app = QApplication.instance()
- if app is None:
- app = QApplication([])
- app = cast(QApplication, app)
- app.setFont(QFont("Calibri", 12))
+ ensure_qt_app()
dialog = _SettingsDialog(
settings=settings,
@@ -433,186 +505,8 @@ def select_settings(
return dialog.selected_settings
-AudioActivityProvider = Callable[[], Dict[str, Any]]
-RuntimeLogLinesProvider = Callable[[], List[str]]
-SubtitleLinesProvider = Callable[[], List[str]]
-
-
-class _RuntimeDashboard(QWidget):
- def __init__(
- self,
- get_audio_activity: AudioActivityProvider,
- get_runtime_logs: RuntimeLogLinesProvider,
- get_subtitle_lines: SubtitleLinesProvider,
- on_close: Callable[[], None],
- ) -> None:
- super().__init__()
- self._get_audio_activity = get_audio_activity
- self._get_runtime_logs = get_runtime_logs
- self._get_subtitle_lines = get_subtitle_lines
- self._on_close = on_close
- self._closed = False
- self._last_rendered_runtime_logs: str = ""
- self._last_rendered_final_logs: str = ""
-
- self.setWindowTitle("auto-live-tl")
- self.setMinimumSize(1100, 700)
-
- layout = QVBoxLayout(self)
-
- title = QLabel("auto-live-tl", self)
- title.setStyleSheet("font-size: 22px; font-weight: 700; color: #000000;")
- layout.addWidget(title)
-
- self.audio_indicator = QLabel("⚪ Idle", self)
- self.audio_indicator.setStyleSheet("font-size: 16px; color: #b0b0b0; font-weight: 600;")
- layout.addWidget(self.audio_indicator)
-
- self.audio_details = QLabel("RMS 0.00000 | threshold 0.00300", self)
- self.audio_details.setStyleSheet("font-size: 13px; color: #9aa0a6;")
- layout.addWidget(self.audio_details)
-
- raw_group = QGroupBox("Debug Log (It's recommended to fetch the final data via the SSE API, see the README)", self)
- raw_group_layout = QVBoxLayout(raw_group)
-
- raw_title = QLabel("System / Raw Output", raw_group)
- raw_group_layout.addWidget(raw_title)
-
- self.runtime_log_view = QTextEdit(raw_group)
- self.runtime_log_view.setReadOnly(True)
- self.runtime_log_view.setPlaceholderText("Waiting for raw Whisper output...")
- self.runtime_log_view.setStyleSheet(
- """
- QTextEdit {
- background: #111417;
- color: #d8dee9;
- border: 1px solid #2f3742;
- border-radius: 8px;
- padding: 8px;
- font-family: 'Consolas', 'Monaco', monospace;
- font-size: 13px;
- line-height: 1.4;
- }
- """
- )
- raw_group_layout.addWidget(self.runtime_log_view, 3)
-
- final_title = QLabel("Final (Sent via SSE)", raw_group)
- raw_group_layout.addWidget(final_title)
-
- self.final_log_view = QTextEdit(raw_group)
- self.final_log_view.setReadOnly(True)
- self.final_log_view.setPlaceholderText("Waiting for FINAL output...")
- self.final_log_view.setStyleSheet(
- """
- QTextEdit {
- background: #0f1410;
- color: #dcf9dd;
- border: 1px solid #2f4a35;
- border-radius: 8px;
- padding: 8px;
- font-family: 'Consolas', 'Monaco', monospace;
- font-size: 14px;
- font-weight: 700;
- line-height: 1.6;
- }
- """
- )
- raw_group_layout.addWidget(self.final_log_view, 2)
-
- layout.addWidget(raw_group, 1)
-
- self._timer = QTimer(self)
- self._timer.setInterval(150)
- self._timer.timeout.connect(self._refresh)
- self._timer.start()
- self._refresh()
-
- def _shutdown(self) -> None:
- if self._closed:
- return
- self._closed = True
- self._timer.stop()
- try:
- self._on_close()
- except Exception:
- pass
-
- def closeEvent(self, event: Any) -> None: # type: ignore[override]
- self._shutdown()
- super().closeEvent(event)
-
- def _refresh(self) -> None:
- try:
- activity = self._get_audio_activity()
- except Exception:
- activity = {}
-
- active = bool(activity.get("active", False))
- try:
- rms = float(activity.get("rms", 0.0))
- except (TypeError, ValueError):
- rms = 0.0
- try:
- threshold = float(activity.get("threshold", 0.0))
- except (TypeError, ValueError):
- threshold = 0.0
-
- if active:
- self.audio_indicator.setText("🟢 Audio detected")
- self.audio_indicator.setStyleSheet("font-size: 16px; color: #8fd18f; font-weight: 600;")
- else:
- self.audio_indicator.setText("⚪ Idle")
- self.audio_indicator.setStyleSheet("font-size: 16px; color: #b0b0b0; font-weight: 600;")
- self.audio_details.setText(f"RMS {rms:.5f} | threshold {threshold:.5f}")
-
- try:
- logs = self._get_runtime_logs()
- except Exception:
- logs = []
- runtime_lines = [line for line in logs if "[FINAL]" not in line]
- final_lines = [line for line in logs if "[FINAL]" in line]
-
- joined_runtime_logs = "\n".join(runtime_lines)
- if joined_runtime_logs != self._last_rendered_runtime_logs:
- self._last_rendered_runtime_logs = joined_runtime_logs
- self.runtime_log_view.setPlainText(joined_runtime_logs)
- log_scroll = self.runtime_log_view.verticalScrollBar()
- log_scroll.setValue(log_scroll.maximum())
-
- joined_final_logs = "\n\n".join(final_lines)
- if joined_final_logs != self._last_rendered_final_logs:
- self._last_rendered_final_logs = joined_final_logs
- self.final_log_view.setPlainText(joined_final_logs)
- final_scroll = self.final_log_view.verticalScrollBar()
- final_scroll.setValue(final_scroll.maximum())
-
-
-
-
-def run_runtime_dashboard(
- get_audio_activity: AudioActivityProvider,
- get_runtime_logs: RuntimeLogLinesProvider,
- get_subtitle_lines: SubtitleLinesProvider,
- on_close: Callable[[], None],
-) -> None:
- app = QApplication.instance()
- if app is None:
- app = QApplication([])
- app = cast(QApplication, app)
- app.setFont(QFont("Calibri", 12))
-
- dashboard = _RuntimeDashboard(
- get_audio_activity=get_audio_activity,
- get_runtime_logs=get_runtime_logs,
- get_subtitle_lines=get_subtitle_lines,
- on_close=on_close,
- )
- dashboard.show()
- app.exec()
-
-
def prompt_input_sample_rate(device_index: int, common_rates: Iterable[int]) -> int:
+ ensure_qt_app()
rates = list(common_rates)
while True:
prompt = (
diff --git a/server.py b/server.py
index 7c6b67e..d1da010 100644
--- a/server.py
+++ b/server.py
@@ -5,7 +5,7 @@ import queue
import os
from collections import Counter, deque
import re
-from typing import Any, Deque, Dict, Optional, Set, List, Iterator
+from typing import Any, Deque, Dict, Optional, Set, List, Iterator, Callable
from flask import Flask
from flask_cors import CORS
import ollama as _ollama
@@ -14,7 +14,7 @@ from ollama import ChatResponse
import numpy as np
import sounddevice as sd
from faster_whisper import WhisperModel
-from gui import select_settings, prompt_input_sample_rate, run_runtime_dashboard
+from gui import select_settings, prompt_input_sample_rate, run_runtime_dashboard, run_with_loading_popup
from routes import register_routes
from config import _SYSTEM_PROMPT, _LLM_EMPTY_SENTINELS, _HALLUCINATION_PHRASES
@@ -162,34 +162,42 @@ def cleanup_subtitle_with_ollama(raw_text: str, context: List[str]) -> Optional[
return None
-def ensure_ollama_ready() -> None:
+def ensure_ollama_ready(status_callback: Optional[Callable[[str], None]] = None) -> None:
"""
Pulls Ollama model is necessary, checks model is downloaded
"""
+ def report(message: str) -> None:
+ print(message)
+ if status_callback is not None:
+ status_callback(message.strip())
+
+ report("Checking Ollama server availability...")
try:
local = _ollama.list()
except Exception as exc:
raise RuntimeError(
f"Cannot reach Ollama — is the server running? ({exc})"
) from exc
+
model_names: List[str] = [m.model for m in local.models]
if not any(name.startswith(OLLAMA_MODEL) for name in model_names):
- print(f" '{OLLAMA_MODEL}' not found locally — pulling (this may take a while) ...")
+ report(f"Model '{OLLAMA_MODEL}' not found locally. Pulling now (this can take a while)...")
try:
_ollama.pull(OLLAMA_MODEL)
- print(" Pull complete.")
+ report("Model pull complete.")
except Exception as exc:
raise RuntimeError(f"Failed to pull model '{OLLAMA_MODEL}': {exc}") from exc
else:
- print(f" Model found locally.")
- print(" Warming up model, almost done ...")
+ report("Model found locally.")
+
+ report("Warming up Ollama model...")
try:
chat(
model=OLLAMA_MODEL,
messages=[{"role": "user", "content": "Ready?"}],
options=OLLAMA_OPTIONS,
)
- print(" ✅ Ollama is ready.")
+ report("✅ Ollama is ready.")
except Exception as exc:
raise RuntimeError(f"Ollama warm-up failed: {exc}") from exc
@@ -544,7 +552,11 @@ def main() -> None:
subtitle_context = deque(maxlen=OLLAMA_CONTEXT_WINDOW)
RAW_BATCH_SIZE = int(settings.get("ollama_raw_batch_size", 3))
if USE_OLLAMA_CLEANUP:
- ensure_ollama_ready()
+ run_with_loading_popup(
+ title="Preparing Ollama model",
+ initial_message="Checking model availability...",
+ task=ensure_ollama_ready,
+ )
llm_thread = threading.Thread(target=llm_processing_loop, daemon=True)
llm_thread.start()
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage