From c545916ac1bba20112a98c3a75b3280b3042186e Mon Sep 17 00:00:00 2001 From: Pinapelz Date: Sun, 26 Apr 2026 21:31:26 -0700 Subject: add filtering for common whisper hallucination words --- config.py | 34 ++++++++++++++++++++++++++++++++++ server.py | 15 ++++++++------- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/config.py b/config.py index 1101085..676e8e1 100644 --- a/config.py +++ b/config.py @@ -22,3 +22,37 @@ _SYSTEM_PROMPT: str = ( " output an empty string and nothing else.\n" " 7. Output ONLY the final subtitle text. No labels, no explanations." ) + +_LLM_EMPTY_SENTINELS: frozenset = frozenset({ + "empty string", "empty", "(empty)", "[empty]", + "(empty string)", "[empty string]", "(none)", "none", "n/a", +}) + +_HALLUCINATION_PHRASES: frozenset = frozenset({ + "empty string", + "thank you for watching", + "thanks for watching", + "thank you for watching this video", + "thanks for watching this video", + "thank you for watching and ill see you next time", + "thank you for watching ill see you in the next video", + "thanks for watching ill see you next time", + "thank you so much for watching", + "thanks for watching and ill see you in the next one", + "see you next time", + "see you in the next video", + "ill see you in the next video", + "ill see you next time", + "see you later", + "bye bye", + "please subscribe", + "like and subscribe", + "dont forget to like and subscribe", + "please like and subscribe", + "subscribe to my channel", + "if you enjoyed this video please like and subscribe", + "you", + "uh", + "um", + "hmm", +}) diff --git a/server.py b/server.py index 9d0b396..cea2c8a 100644 --- a/server.py +++ b/server.py @@ -16,7 +16,7 @@ import sounddevice as sd from faster_whisper import WhisperModel from gui import select_settings, prompt_input_sample_rate from routes import register_routes -from config import _SYSTEM_PROMPT +from config import _SYSTEM_PROMPT, _LLM_EMPTY_SENTINELS, _HALLUCINATION_PHRASES TARGET_SAMPLE_RATE: int = 16000 CAPTURE_SAMPLE_RATE: int = 0 @@ -47,7 +47,7 @@ DEFAULT_SETTINGS: Dict[str, Any] = { "use_ollama_cleanup": True, "ollama_device": "GPU", "ollama_context_window": 5, - "ollama_raw_batch_size": 2, + "ollama_raw_batch_size": 1, } MODEL_CHOICES: List[str] = ["tiny", "base", "small", "medium", "large-v2", "large-v3", "distil-large-v3"] @@ -173,11 +173,6 @@ def ensure_ollama_ready() -> None: except Exception as exc: raise RuntimeError(f"Ollama warm-up failed: {exc}") from exc -_LLM_EMPTY_SENTINELS: frozenset = frozenset({ - "empty string", "empty", "(empty)", "[empty]", - "(empty string)", "[empty string]", "(none)", "none", "n/a", -}) - def normalize_llm_output(text: str) -> str: if text.strip().lower().rstrip(".") in _LLM_EMPTY_SENTINELS: @@ -214,6 +209,12 @@ def is_hallucination(text: str) -> bool: if count >= 4 and count / len(clean) > 0.40: print(f"🔴 Hallucination (\'{top}\' x{count}, {count/len(clean):.0%}): {text[:60]!r}") return True + + normalized = re.sub(r"[^\w\s]", "", text.lower()).strip() + if normalized in _HALLUCINATION_PHRASES: + print(f"🔴 Hallucination (blocked phrase): {text!r}") + return True + return False -- cgit v1.2.3