aboutsummaryrefslogtreecommitdiffstats
path: root/gui.py
diff options
context:
space:
mode:
Diffstat (limited to 'gui.py')
-rw-r--r--gui.py332
1 files changed, 330 insertions, 2 deletions
diff --git a/gui.py b/gui.py
index 89817e5..d112469 100644
--- a/gui.py
+++ b/gui.py
@@ -1,7 +1,9 @@
-from typing import Iterable, List, Tuple, Dict, Any, cast
+from typing import Iterable, List, Tuple, Dict, Any, Callable, cast, Optional
+import time
+import numpy as np
import sounddevice as sd
from PySide6.QtGui import QFont
-from PySide6.QtCore import Qt
+from PySide6.QtCore import Qt, QTimer
from PySide6.QtWidgets import (
QApplication,
QCheckBox,
@@ -16,6 +18,7 @@ from PySide6.QtWidgets import (
QLineEdit,
QMessageBox,
QTabWidget,
+ QTextEdit,
QVBoxLayout,
QWidget,
)
@@ -41,8 +44,15 @@ class _SettingsDialog(QDialog):
def get_value(key: str, fallback: Any) -> Any:
return settings.get(key, default_settings.get(key, fallback))
+ self.device_indices = [idx for idx, _dev in input_devices]
self.device_names = [dev["name"] for _idx, dev in input_devices]
+ self._monitor_stream: Optional[sd.InputStream] = None
+ self._monitor_rms: float = 0.0
+ self._monitor_active_until: float = 0.0
+ self._monitor_error: str = ""
+ self._monitor_threshold: float = float(get_value("audio_activity_threshold", 0.003))
+
root_layout = QVBoxLayout(self)
tabs = QTabWidget(self)
@@ -127,6 +137,13 @@ class _SettingsDialog(QDialog):
self.update_interval_edit = QLineEdit(str(get_value("update_interval_seconds", 2)), whisper_tab)
whisper_advanced_layout.addRow(QLabel("Update interval (s):"), self.update_interval_edit)
+ self.audio_activity_threshold_edit = QLineEdit(str(get_value("audio_activity_threshold", 0.003)), whisper_tab)
+ whisper_advanced_layout.addRow(QLabel("Audio activity threshold (RMS):"), self.audio_activity_threshold_edit)
+
+ self.audio_indicator_label = QLabel("⚪ Idle", whisper_tab)
+ self.audio_indicator_label.setAlignment(Qt.AlignmentFlag.AlignLeft | Qt.AlignmentFlag.AlignVCenter)
+ whisper_advanced_layout.addRow(QLabel("Live input indicator:"), self.audio_indicator_label)
+
whisper_tab_layout.addWidget(whisper_advanced_group)
tabs.addTab(whisper_tab, "Whisper")
@@ -177,9 +194,125 @@ class _SettingsDialog(QDialog):
button_box.rejected.connect(self.reject)
button_layout.addWidget(button_box)
+ self.device_combo.currentIndexChanged.connect(self._restart_monitor_stream)
+ self.audio_activity_threshold_edit.textChanged.connect(self._on_threshold_changed)
+
+ self._monitor_timer = QTimer(self)
+ self._monitor_timer.setInterval(120)
+ self._monitor_timer.timeout.connect(self._refresh_audio_indicator)
+ self._monitor_timer.start()
+
+ self._restart_monitor_stream()
+ self._refresh_audio_indicator()
+
def _warn(self, title: str, text: str) -> None:
QMessageBox.warning(self, title, text)
+ def _on_threshold_changed(self, text: str) -> None:
+ try:
+ parsed = float(text.strip())
+ if parsed > 0:
+ self._monitor_threshold = parsed
+ except ValueError:
+ pass
+
+ def _pick_monitor_sample_rate(self, device_index: int, preferred_rate: int) -> Optional[int]:
+ common_rates: List[int] = [48000, 44100, 32000, 24000, 22050, 16000, 12000, 8000]
+ tried = set()
+ for rate in [preferred_rate] + common_rates:
+ if rate in tried or rate <= 0:
+ continue
+ tried.add(rate)
+ try:
+ sd.check_input_settings(device=device_index, channels=1, samplerate=rate, dtype="float32")
+ return rate
+ except sd.PortAudioError:
+ continue
+ return None
+
+ def _monitor_callback(self, indata: np.ndarray, frames: int, time_info: Any, status: Any) -> None:
+ if status:
+ self._monitor_error = f"Audio status: {status}"
+ if indata is None or len(indata) == 0:
+ return
+
+ chunk = indata[:, 0]
+ rms = float(np.sqrt(np.mean(np.square(chunk))))
+ self._monitor_rms = rms
+ if rms >= self._monitor_threshold:
+ self._monitor_active_until = time.monotonic() + 0.6
+
+ def _refresh_audio_indicator(self) -> None:
+ if self._monitor_error:
+ self.audio_indicator_label.setText(f"⚠ {self._monitor_error}")
+ self.audio_indicator_label.setStyleSheet("color: #f28b82;")
+ return
+
+ active = time.monotonic() <= self._monitor_active_until
+ rms_text = f"{self._monitor_rms:.5f}"
+ if active:
+ self.audio_indicator_label.setText(f"🟢 Audio detected (RMS {rms_text})")
+ self.audio_indicator_label.setStyleSheet("color: #8fd18f;")
+ else:
+ self.audio_indicator_label.setText(f"⚪ Idle (RMS {rms_text})")
+ self.audio_indicator_label.setStyleSheet("color: #b0b0b0;")
+
+ def _stop_monitor_stream(self) -> None:
+ stream = self._monitor_stream
+ self._monitor_stream = None
+ if stream is None:
+ return
+ try:
+ stream.stop()
+ except Exception:
+ pass
+ try:
+ stream.close()
+ except Exception:
+ pass
+
+ def _restart_monitor_stream(self, *_args: Any) -> None:
+ self._stop_monitor_stream()
+ self._monitor_error = ""
+ self._monitor_rms = 0.0
+ self._monitor_active_until = 0.0
+
+ selection = self.device_combo.currentIndex()
+ if selection < 0 or selection >= len(self.device_indices):
+ self._monitor_error = "No input device selected."
+ return
+
+ device_index = self.device_indices[selection]
+ try:
+ device_info = sd.query_devices(device_index)
+ except Exception as exc:
+ self._monitor_error = f"Could not read device info: {exc}"
+ return
+
+ preferred_rate = int(float(device_info.get("default_samplerate", 48000)))
+ if preferred_rate <= 0:
+ preferred_rate = 48000
+
+ sample_rate = self._pick_monitor_sample_rate(device_index, preferred_rate)
+ if sample_rate is None:
+ self._monitor_error = "No supported sample rate for monitor stream."
+ return
+
+ blocksize = max(256, int(sample_rate * 0.1))
+ try:
+ stream = sd.InputStream(
+ device=device_index,
+ channels=1,
+ samplerate=sample_rate,
+ dtype="float32",
+ callback=self._monitor_callback,
+ blocksize=blocksize,
+ )
+ stream.start()
+ self._monitor_stream = stream
+ except Exception as exc:
+ self._monitor_error = f"Unable to start monitor: {exc}"
+
def accept(self) -> None:
selection = self.device_combo.currentIndex()
if selection < 0:
@@ -216,6 +349,14 @@ class _SettingsDialog(QDialog):
return
try:
+ audio_activity_threshold = float(self.audio_activity_threshold_edit.text().strip())
+ if audio_activity_threshold <= 0:
+ raise ValueError
+ except ValueError:
+ self._warn("Invalid audio threshold", "Audio activity threshold must be a positive number.")
+ return
+
+ try:
ollama_context_window = int(self.ollama_context_edit.text().strip())
if ollama_context_window <= 0:
raise ValueError
@@ -241,14 +382,22 @@ class _SettingsDialog(QDialog):
"language": self.language_edit.text().strip(),
"context_seconds": context_seconds,
"update_interval_seconds": update_interval_seconds,
+ "audio_activity_threshold": audio_activity_threshold,
"use_ollama_cleanup": self.use_ollama_cleanup_checkbox.isChecked(),
"ollama_device": self.ollama_device_combo.currentText(),
"ollama_model": self.ollama_model_edit.text().strip(),
"ollama_context_window": ollama_context_window,
"ollama_raw_batch_size": ollama_raw_batch_size,
}
+ self._monitor_timer.stop()
+ self._stop_monitor_stream()
super().accept()
+ def reject(self) -> None:
+ self._monitor_timer.stop()
+ self._stop_monitor_stream()
+ super().reject()
+
def select_settings(
settings: Dict[str, Any],
@@ -284,6 +433,185 @@ def select_settings(
return dialog.selected_settings
+AudioActivityProvider = Callable[[], Dict[str, Any]]
+RuntimeLogLinesProvider = Callable[[], List[str]]
+SubtitleLinesProvider = Callable[[], List[str]]
+
+
+class _RuntimeDashboard(QWidget):
+ def __init__(
+ self,
+ get_audio_activity: AudioActivityProvider,
+ get_runtime_logs: RuntimeLogLinesProvider,
+ get_subtitle_lines: SubtitleLinesProvider,
+ on_close: Callable[[], None],
+ ) -> None:
+ super().__init__()
+ self._get_audio_activity = get_audio_activity
+ self._get_runtime_logs = get_runtime_logs
+ self._get_subtitle_lines = get_subtitle_lines
+ self._on_close = on_close
+ self._closed = False
+ self._last_rendered_runtime_logs: str = ""
+ self._last_rendered_final_logs: str = ""
+
+ self.setWindowTitle("auto-live-tl")
+ self.setMinimumSize(1100, 700)
+
+ layout = QVBoxLayout(self)
+
+ title = QLabel("auto-live-tl", self)
+ title.setStyleSheet("font-size: 22px; font-weight: 700; color: #000000;")
+ layout.addWidget(title)
+
+ self.audio_indicator = QLabel("⚪ Idle", self)
+ self.audio_indicator.setStyleSheet("font-size: 16px; color: #b0b0b0; font-weight: 600;")
+ layout.addWidget(self.audio_indicator)
+
+ self.audio_details = QLabel("RMS 0.00000 | threshold 0.00300", self)
+ self.audio_details.setStyleSheet("font-size: 13px; color: #9aa0a6;")
+ layout.addWidget(self.audio_details)
+
+ raw_group = QGroupBox("Debug Log (It's recommended to fetch the final data via the SSE API, see the README)", self)
+ raw_group_layout = QVBoxLayout(raw_group)
+
+ raw_title = QLabel("System / Raw Output", raw_group)
+ raw_group_layout.addWidget(raw_title)
+
+ self.runtime_log_view = QTextEdit(raw_group)
+ self.runtime_log_view.setReadOnly(True)
+ self.runtime_log_view.setPlaceholderText("Waiting for raw Whisper output...")
+ self.runtime_log_view.setStyleSheet(
+ """
+ QTextEdit {
+ background: #111417;
+ color: #d8dee9;
+ border: 1px solid #2f3742;
+ border-radius: 8px;
+ padding: 8px;
+ font-family: 'Consolas', 'Monaco', monospace;
+ font-size: 13px;
+ line-height: 1.4;
+ }
+ """
+ )
+ raw_group_layout.addWidget(self.runtime_log_view, 3)
+
+ final_title = QLabel("Final (Sent via SSE)", raw_group)
+ raw_group_layout.addWidget(final_title)
+
+ self.final_log_view = QTextEdit(raw_group)
+ self.final_log_view.setReadOnly(True)
+ self.final_log_view.setPlaceholderText("Waiting for FINAL output...")
+ self.final_log_view.setStyleSheet(
+ """
+ QTextEdit {
+ background: #0f1410;
+ color: #dcf9dd;
+ border: 1px solid #2f4a35;
+ border-radius: 8px;
+ padding: 8px;
+ font-family: 'Consolas', 'Monaco', monospace;
+ font-size: 14px;
+ font-weight: 700;
+ line-height: 1.6;
+ }
+ """
+ )
+ raw_group_layout.addWidget(self.final_log_view, 2)
+
+ layout.addWidget(raw_group, 1)
+
+ self._timer = QTimer(self)
+ self._timer.setInterval(150)
+ self._timer.timeout.connect(self._refresh)
+ self._timer.start()
+ self._refresh()
+
+ def _shutdown(self) -> None:
+ if self._closed:
+ return
+ self._closed = True
+ self._timer.stop()
+ try:
+ self._on_close()
+ except Exception:
+ pass
+
+ def closeEvent(self, event: Any) -> None: # type: ignore[override]
+ self._shutdown()
+ super().closeEvent(event)
+
+ def _refresh(self) -> None:
+ try:
+ activity = self._get_audio_activity()
+ except Exception:
+ activity = {}
+
+ active = bool(activity.get("active", False))
+ try:
+ rms = float(activity.get("rms", 0.0))
+ except (TypeError, ValueError):
+ rms = 0.0
+ try:
+ threshold = float(activity.get("threshold", 0.0))
+ except (TypeError, ValueError):
+ threshold = 0.0
+
+ if active:
+ self.audio_indicator.setText("🟢 Audio detected")
+ self.audio_indicator.setStyleSheet("font-size: 16px; color: #8fd18f; font-weight: 600;")
+ else:
+ self.audio_indicator.setText("⚪ Idle")
+ self.audio_indicator.setStyleSheet("font-size: 16px; color: #b0b0b0; font-weight: 600;")
+ self.audio_details.setText(f"RMS {rms:.5f} | threshold {threshold:.5f}")
+
+ try:
+ logs = self._get_runtime_logs()
+ except Exception:
+ logs = []
+ runtime_lines = [line for line in logs if "[FINAL]" not in line]
+ final_lines = [line for line in logs if "[FINAL]" in line]
+
+ joined_runtime_logs = "\n".join(runtime_lines)
+ if joined_runtime_logs != self._last_rendered_runtime_logs:
+ self._last_rendered_runtime_logs = joined_runtime_logs
+ self.runtime_log_view.setPlainText(joined_runtime_logs)
+ log_scroll = self.runtime_log_view.verticalScrollBar()
+ log_scroll.setValue(log_scroll.maximum())
+
+ joined_final_logs = "\n\n".join(final_lines)
+ if joined_final_logs != self._last_rendered_final_logs:
+ self._last_rendered_final_logs = joined_final_logs
+ self.final_log_view.setPlainText(joined_final_logs)
+ final_scroll = self.final_log_view.verticalScrollBar()
+ final_scroll.setValue(final_scroll.maximum())
+
+
+
+
+def run_runtime_dashboard(
+ get_audio_activity: AudioActivityProvider,
+ get_runtime_logs: RuntimeLogLinesProvider,
+ get_subtitle_lines: SubtitleLinesProvider,
+ on_close: Callable[[], None],
+) -> None:
+ app = QApplication.instance()
+ if app is None:
+ app = QApplication([])
+ app = cast(QApplication, app)
+ app.setFont(QFont("Calibri", 12))
+
+ dashboard = _RuntimeDashboard(
+ get_audio_activity=get_audio_activity,
+ get_runtime_logs=get_runtime_logs,
+ get_subtitle_lines=get_subtitle_lines,
+ on_close=on_close,
+ )
+ dashboard.show()
+ app.exec()
+
+
def prompt_input_sample_rate(device_index: int, common_rates: Iterable[int]) -> int:
rates = list(common_rates)
while True:
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage