Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
1574 lines
65 KiB
Python
1574 lines
65 KiB
Python
"""
|
|
Tests for user input processing around the hot window.
|
|
|
|
These tests verify observable behaviour: given a sequence of events (TTS finishes,
|
|
user speaks, time passes), does the system accept or reject the input, and does
|
|
the accepted query contain the right text?
|
|
|
|
Tests exercise VoiceListener._process_transcript with mocked TTS and intent judge
|
|
but use real StateManager and EchoDetector instances to avoid coupling to internals.
|
|
"""
|
|
|
|
import time
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
import pytest
|
|
|
|
from jarvis.listening.state_manager import StateManager, ListeningState
|
|
from jarvis.listening.intent_judge import IntentJudgment
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _create_listener(**kwargs):
|
|
"""Create a VoiceListener with mocked heavy subsystems.
|
|
|
|
Returns (listener, mock_tts) so tests can control TTS state.
|
|
Uses real StateManager and EchoDetector — only Whisper, audio, and
|
|
the intent judge are mocked.
|
|
"""
|
|
mock_cfg = MagicMock()
|
|
mock_cfg.whisper_model = "small"
|
|
mock_cfg.whisper_device = "auto"
|
|
mock_cfg.whisper_compute_type = "int8"
|
|
mock_cfg.whisper_backend = "faster-whisper"
|
|
mock_cfg.sample_rate = 16000
|
|
mock_cfg.vad_enabled = False
|
|
mock_cfg.vad_aggressiveness = 2
|
|
mock_cfg.echo_tolerance = kwargs.get("echo_tolerance", 0.3)
|
|
mock_cfg.echo_energy_threshold = 2.0
|
|
mock_cfg.hot_window_seconds = kwargs.get("hot_window_seconds", 3.0)
|
|
mock_cfg.hot_window_enabled = True
|
|
mock_cfg.voice_collect_seconds = 2.0
|
|
mock_cfg.voice_max_collect_seconds = 60.0
|
|
mock_cfg.voice_device = None
|
|
mock_cfg.voice_debug = False
|
|
mock_cfg.voice_min_energy = 0.0045
|
|
mock_cfg.tune_enabled = False
|
|
mock_cfg.wake_word = "jarvis"
|
|
mock_cfg.wake_aliases = []
|
|
mock_cfg.wake_fuzzy_ratio = 0.78
|
|
mock_cfg.stop_commands = ["stop", "quiet"]
|
|
mock_cfg.tts_rate = 200
|
|
mock_cfg.transcript_buffer_duration_sec = 120.0
|
|
mock_cfg.intent_judge_model = "gemma4:e2b"
|
|
mock_cfg.ollama_base_url = "http://127.0.0.1:11434"
|
|
mock_cfg.intent_judge_timeout_sec = 3.0
|
|
mock_db = MagicMock()
|
|
mock_tts = MagicMock()
|
|
mock_tts.enabled = True
|
|
mock_tts.is_speaking.return_value = kwargs.get("tts_speaking", False)
|
|
mock_dialogue_memory = MagicMock()
|
|
|
|
with patch("jarvis.listening.listener.webrtcvad", None), \
|
|
patch("jarvis.listening.listener.sd", None), \
|
|
patch("jarvis.listening.listener.np", None), \
|
|
patch("jarvis.listening.listener.create_intent_judge", return_value=None):
|
|
from jarvis.listening.listener import VoiceListener
|
|
listener = VoiceListener(mock_db, mock_cfg, mock_tts, mock_dialogue_memory)
|
|
|
|
return listener, mock_tts
|
|
|
|
|
|
def _make_judgment(directed=True, query="", stop=False, confidence="high", reasoning="test"):
|
|
"""Build an IntentJudgment."""
|
|
return IntentJudgment(
|
|
directed=directed, query=query, stop=stop,
|
|
confidence=confidence, reasoning=reasoning,
|
|
)
|
|
|
|
|
|
def _install_intent_judge(listener, judgment):
|
|
"""Replace the listener's intent judge with a mock returning *judgment*."""
|
|
mock_judge = MagicMock()
|
|
mock_judge.available = True
|
|
mock_judge.judge.return_value = judgment
|
|
listener._intent_judge = mock_judge
|
|
return mock_judge
|
|
|
|
|
|
def _simulate_tts_finish(listener):
|
|
"""Simulate TTS finishing: track finish time and schedule hot window activation."""
|
|
listener.echo_detector.track_tts_finish()
|
|
listener.state_manager.schedule_hot_window_activation()
|
|
|
|
|
|
def _wait_for_hot_window_active(listener, timeout=0.5):
|
|
"""Wait until hot window is formally active (past echo_tolerance delay)."""
|
|
deadline = time.time() + timeout
|
|
while time.time() < deadline:
|
|
if listener.state_manager.is_hot_window_active():
|
|
return True
|
|
time.sleep(0.01)
|
|
return False
|
|
|
|
|
|
def _accepted_query(listener) -> str:
|
|
"""Return the accepted query text, or empty string if input was rejected."""
|
|
if listener.state_manager.get_pending_query():
|
|
return listener.state_manager.get_pending_query()
|
|
return ""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: User speaks during active hot window
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.unit
|
|
class TestUserSpeaksDuringHotWindow:
|
|
"""TTS finishes, hot window activates, user speaks within the window."""
|
|
|
|
@patch("builtins.print")
|
|
def test_directed_follow_up_is_accepted(self, _print):
|
|
"""User's follow-up question during hot window is accepted."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
listener.echo_detector.track_tts_start("The weather is sunny today.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="thanks"))
|
|
|
|
listener._process_transcript("thanks", utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) == "thanks"
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_undirected_background_speech_is_accepted_in_hot_window(self, _print):
|
|
"""Non-echo speech during hot window is accepted even if judge says not directed.
|
|
|
|
The 3s hot window is short enough that false positives (accepting
|
|
background speech) are preferable to false negatives (ignoring genuine
|
|
follow-ups like 'don't you already know that?'). Small LLMs sometimes
|
|
reject valid follow-ups, so we override in hot window mode.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
listener.echo_detector.track_tts_start("Here is your answer.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=False, query="", confidence="high",
|
|
reasoning="background conversation"))
|
|
|
|
listener._process_transcript("did you see the game last night", utterance_energy=0.01)
|
|
|
|
# In hot window, non-echo speech is always accepted
|
|
assert _accepted_query(listener) == "did you see the game last night"
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_judge_query_is_used_in_hot_window(self, _print):
|
|
"""In hot window, the intent judge's extracted query is authoritative.
|
|
|
|
The judge is the canonical echo-stripper and noise-pruner; its output
|
|
always wins over the raw transcript. This prevents partial-salvage
|
|
leakage where echo fragments ride through on the raw text. If the
|
|
judge returns an empty query, the listener falls back to raw text.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
listener.echo_detector.track_tts_start("Do you want to know more?")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="what is the weather tomorrow"))
|
|
|
|
listener._process_transcript(
|
|
"uh okay what is the weather tomorrow", utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) == "what is the weather tomorrow"
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_empty_judge_query_falls_back_to_raw_text(self, _print):
|
|
"""If the judge is directed but returns no query, fall back to raw text."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
listener.echo_detector.track_tts_start("Do you want to know more?")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query=""))
|
|
|
|
listener._process_transcript("tell me a joke please", utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) == "tell me a joke please"
|
|
listener.state_manager.stop()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: User starts speaking during hot window, transcript arrives after expiry
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.unit
|
|
class TestTranscriptArrivesAfterHotWindowExpiry:
|
|
"""User speaks during hot window but Whisper is slow — transcript arrives after expiry.
|
|
|
|
Uses timestamp-based detection: utterance_start_time is compared against the
|
|
hot window's time span, so it doesn't matter when Whisper finishes."""
|
|
|
|
@patch("builtins.print")
|
|
def test_speech_started_during_window_accepted_after_expiry(self, _print):
|
|
"""Speech that STARTED during the hot window is accepted even after expiry.
|
|
|
|
This is the core scenario: user starts speaking at 2.5s into a 3s window,
|
|
Whisper takes 2s to transcribe, so transcript arrives at 4.5s — after
|
|
"Returning to wake word mode". The timestamp check still detects the
|
|
speech started during the window.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=0.08)
|
|
|
|
listener.echo_detector.track_tts_start("Short answer.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Speech starts during active window
|
|
speech_start = time.time()
|
|
|
|
# Wait for hot window to expire (simulates Whisper delay)
|
|
time.sleep(0.12)
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
|
|
# Transcript arrives after expiry — but speech_start was during window
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="tell me more"))
|
|
listener._process_transcript(
|
|
"tell me more", utterance_energy=0.01,
|
|
utterance_start_time=speech_start, utterance_end_time=time.time())
|
|
|
|
assert _accepted_query(listener) == "tell me more"
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_speech_started_after_expiry_rejected(self, _print):
|
|
"""Speech starting AFTER window expired is rejected (requires wake word)."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=0.05)
|
|
|
|
listener.echo_detector.track_tts_start("Short answer.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Wait for hot window to expire
|
|
time.sleep(0.1)
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
|
|
# Speech starts AFTER expiry
|
|
speech_start = time.time()
|
|
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="tell me more"))
|
|
listener._process_transcript(
|
|
"tell me more", utterance_energy=0.01,
|
|
utterance_start_time=speech_start, utterance_end_time=time.time())
|
|
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_voice_during_active_window_accepted_before_expiry(self, _print):
|
|
"""Voice processed while hot window is still active succeeds."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
listener.echo_detector.track_tts_start("Short answer.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
speech_start = time.time()
|
|
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="tell me more"))
|
|
listener._process_transcript(
|
|
"tell me more", utterance_energy=0.01,
|
|
utterance_start_time=speech_start, utterance_end_time=time.time())
|
|
|
|
assert _accepted_query(listener) == "tell me more"
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_voice_during_pending_activation_accepted(self, _print):
|
|
"""Voice start during echo_tolerance delay (pending activation) still counts."""
|
|
listener, _ = _create_listener(echo_tolerance=0.5, hot_window_seconds=3.0)
|
|
|
|
listener.echo_detector.track_tts_start("Answer text.")
|
|
_simulate_tts_finish(listener)
|
|
|
|
# Hot window not yet active (still in echo_tolerance delay)
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
|
|
# Speech starts now during pending period
|
|
speech_start = time.time()
|
|
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="yes please"))
|
|
listener._process_transcript(
|
|
"yes please", utterance_energy=0.01,
|
|
utterance_start_time=speech_start, utterance_end_time=time.time())
|
|
|
|
assert _accepted_query(listener) == "yes please"
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_speech_minutes_after_window_not_treated_as_hot(self, _print):
|
|
"""Speech a minute after hot window expired is NOT treated as hot window.
|
|
|
|
Regression test: a stale boolean flag previously caused speech long
|
|
after the window to be treated as hot window input.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=0.05)
|
|
|
|
listener.echo_detector.track_tts_start("Quick answer.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Wait for window to expire
|
|
time.sleep(0.1)
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
|
|
# Simulate speech "a minute later" (use a start time well after expiry)
|
|
speech_start = time.time() + 0.5 # even 500ms later should be rejected
|
|
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="something funny"))
|
|
listener._process_transcript(
|
|
"something funny", utterance_energy=0.01,
|
|
utterance_start_time=speech_start, utterance_end_time=speech_start + 1.0)
|
|
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: Echo and user speech in the same Whisper chunk
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.unit
|
|
class TestEchoAndUserSpeechInSameChunk:
|
|
"""Whisper merges echo + user speech into one transcript chunk."""
|
|
|
|
@patch("builtins.print")
|
|
def test_mixed_echo_and_speech_after_tts_accepted_in_hot_window(self, _print):
|
|
"""When echo + user speech arrive as one chunk in hot window, input is accepted."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
tts_text = "here is the answer"
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
now = time.time()
|
|
# Intent judge sees the mixed text and marks it directed
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="thanks can you also check email"))
|
|
|
|
# Mixed chunk: echo + user speech
|
|
listener._process_transcript(
|
|
"here is the answer thanks can you also check email",
|
|
utterance_energy=0.01,
|
|
utterance_start_time=now - 3.0,
|
|
utterance_end_time=now - 0.5,
|
|
)
|
|
|
|
# Hot window uses raw text (intent judge handles echo stripping)
|
|
query = _accepted_query(listener)
|
|
assert query != ""
|
|
assert "thanks" in query or "check email" in query
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_echo_plus_speech_from_during_tts_accepted_after_expiry(self, _print):
|
|
"""Mixed echo+speech chunk where VAD triggered during TTS is accepted
|
|
even after the hot window expires.
|
|
|
|
Real scenario: TTS plays, mic picks up echo (VAD triggers during TTS),
|
|
user speaks during hot window, Whisper takes >3s to transcribe the long
|
|
combined audio, hot window expires, transcript arrives.
|
|
|
|
The utterance started BEFORE the hot window span (during TTS) but
|
|
ended DURING the span (user spoke during window). The system should
|
|
recognise this overlap and treat it as hot window input.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
tts_text = "Got it. I will keep my responses short and to the point from now on."
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
span_start = listener.state_manager._hot_window_span_start
|
|
|
|
# Manually expire hot window (simulates Whisper taking >3s)
|
|
listener.state_manager.expire_hot_window()
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
|
|
# Intent judge correctly extracts user speech from mixed transcript
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True,
|
|
query="tell me something random"))
|
|
|
|
# Mixed chunk: full TTS echo + user speech appended
|
|
# utterance_start_time is BEFORE span_start (VAD triggered during TTS)
|
|
# utterance_end_time is AFTER span_start (user spoke during window)
|
|
mixed_text = (
|
|
"Got it. I will keep my responses short and to the point from now on. "
|
|
"Yeah, I guess that's fine, but tell me something random."
|
|
)
|
|
listener._process_transcript(
|
|
mixed_text,
|
|
utterance_energy=0.01,
|
|
utterance_start_time=span_start - 2.0,
|
|
utterance_end_time=span_start + 0.05,
|
|
)
|
|
|
|
query = _accepted_query(listener)
|
|
assert query != "", (
|
|
"Mixed echo+speech where utterance overlaps hot window should be "
|
|
"accepted, not dropped because utterance_start_time < span_start"
|
|
)
|
|
assert "random" in query
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_mixed_echo_speech_unsalvaged_uses_judge_extraction(self, _print):
|
|
"""When salvage fails to strip echo, the post-judge echo check should
|
|
use the intent judge's extraction instead of rejecting everything.
|
|
|
|
If the heard text is much longer than TTS (mixed content), the echo
|
|
check should recognise it's not pure echo and fall through to use the
|
|
judge's extracted query.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
tts_text = "The current temperature is around nine degrees celsius."
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Intent judge correctly extracts user speech
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True,
|
|
query="what will it be tomorrow"))
|
|
|
|
# Mixed text where salvage won't work (Whisper transcribed echo differently
|
|
# from TTS text, so exact word matching fails). User speech is substantially
|
|
# longer than TTS echo so word count guard lets it through.
|
|
mixed_text = (
|
|
"the temperature is about 9 degrees. "
|
|
"yeah I figured as much but what will it be like tomorrow afternoon"
|
|
)
|
|
listener._process_transcript(
|
|
mixed_text,
|
|
utterance_energy=0.01,
|
|
)
|
|
|
|
query = _accepted_query(listener)
|
|
assert query != "", (
|
|
"Mixed echo+speech should not be rejected when text is longer than TTS"
|
|
)
|
|
assert "tomorrow" in query
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_judge_echo_reasoning_overridden_for_mixed_content_in_hot_window(self, _print):
|
|
"""When the intent judge says 'not directed' with echo reasoning but the
|
|
utterance overlaps the hot window and text is longer than TTS (mixed
|
|
echo+speech), the rejection should be overridden.
|
|
|
|
Real scenario: TTS plays, mic picks up echo + user speaks during hot window,
|
|
hot window expires, Whisper delivers mixed transcript. Intent judge sees TTS
|
|
text in transcript and says 'echo, not directed'. But the word-count guard
|
|
shows it's mixed content and could_be_hot_window is True, so the override
|
|
should kick in.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
tts_text = "You are currently in Tbilisi, Georgia."
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
span_start = listener.state_manager._hot_window_span_start
|
|
|
|
# Hot window expires (Whisper is slow)
|
|
listener.state_manager.expire_hot_window()
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
|
|
# Intent judge incorrectly classifies as echo (sees TTS text in transcript)
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=False,
|
|
query="",
|
|
confidence="high",
|
|
reasoning="echo of TTS output"))
|
|
|
|
mixed_text = (
|
|
"you are currently in T-Ballista Georgia and what do you think "
|
|
"about Joseph Stalin and communism in general?"
|
|
)
|
|
listener._process_transcript(
|
|
mixed_text,
|
|
utterance_energy=0.01,
|
|
utterance_start_time=span_start - 2.0,
|
|
utterance_end_time=span_start + 0.05,
|
|
)
|
|
|
|
query = _accepted_query(listener)
|
|
assert query != "", (
|
|
"Mixed echo+speech should be accepted in hot window even when "
|
|
"intent judge says 'echo, not directed' — word count shows mixed content"
|
|
)
|
|
assert "stalin" in query.lower() or "communism" in query.lower()
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_judge_returns_none_hot_window_speech_still_accepted(self, _print):
|
|
"""When the intent judge times out or errors (returns None), hot window
|
|
speech that passes the echo check should still be accepted.
|
|
|
|
Real scenario: user speaks during hot window, Whisper delivers mixed
|
|
echo+speech, intent judge times out on the long transcript. The beep
|
|
started (early check passed) but the query is silently dropped because
|
|
the judge-None path falls through to wake word detection.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
tts_text = "You are currently in Tbilisi, Georgia."
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
span_start = listener.state_manager._hot_window_span_start
|
|
|
|
# Hot window expires (Whisper is slow)
|
|
listener.state_manager.expire_hot_window()
|
|
|
|
# Intent judge returns None (timeout)
|
|
_install_intent_judge(listener, None)
|
|
|
|
mixed_text = (
|
|
"you are currently in T-Ballista Georgia and what do you think "
|
|
"about Joseph Stalin and communism in general?"
|
|
)
|
|
listener._process_transcript(
|
|
mixed_text,
|
|
utterance_energy=0.01,
|
|
utterance_start_time=span_start - 2.0,
|
|
utterance_end_time=span_start + 0.05,
|
|
)
|
|
|
|
query = _accepted_query(listener)
|
|
assert query != "", (
|
|
"Hot window speech should be accepted even when intent judge "
|
|
"times out — the early echo check already cleared it"
|
|
)
|
|
assert "stalin" in query.lower() or "communism" in query.lower()
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_utterance_starting_during_tts_ending_after_treated_as_hot_window(self, _print):
|
|
"""Utterance that starts before TTS finishes is still treated as hot window context."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
listener.echo_detector.track_tts_start("some response text")
|
|
tts_finish = time.time()
|
|
listener.echo_detector.track_tts_finish()
|
|
listener.state_manager.schedule_hot_window_activation()
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Utterance started 0.5s BEFORE TTS finished, ended 1s after
|
|
utterance_start = tts_finish - 0.5
|
|
utterance_end = tts_finish + 1.0
|
|
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="tell me more"))
|
|
|
|
listener._process_transcript(
|
|
"tell me more",
|
|
utterance_energy=0.01,
|
|
utterance_start_time=utterance_start,
|
|
utterance_end_time=utterance_end,
|
|
)
|
|
|
|
assert _accepted_query(listener) == "tell me more"
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_early_echo_check_salvages_trailing_user_speech(self, _print):
|
|
"""Early echo check must salvage user speech appended after an echo prefix.
|
|
|
|
Whisper often merges the tail of TTS echo with the user's follow-up into
|
|
one transcript. The early fuzzy echo check used to reject the whole chunk,
|
|
so the user's real speech was dropped before the intent judge could see it.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
tts_text = (
|
|
"I do have a tool to check the weather, but I need to use it with a "
|
|
"location. I can check the forecast for London for you right now."
|
|
)
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="yeah go ahead and do that"))
|
|
|
|
# Mixed chunk: exact tail of TTS echo + user's follow-up
|
|
listener._process_transcript(
|
|
"I can check the forecast for London for you right now. "
|
|
"Yeah, go ahead and do that.",
|
|
utterance_energy=0.01,
|
|
)
|
|
|
|
query = _accepted_query(listener)
|
|
assert query != "", "Trailing user speech should be salvaged, not rejected as echo"
|
|
assert "go ahead" in query.lower()
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_early_echo_salvage_accepts_at_minimum_word_count(self, _print):
|
|
"""Salvaged remainder at exactly min_salvage_words should be accepted."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
min_words = listener.echo_detector.min_salvage_words
|
|
|
|
tts_text = "The weather is going to be sunny today in London."
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
follow_up_words = ["thanks", "tell", "me", "more", "please"][:min_words]
|
|
follow_up = " ".join(follow_up_words)
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query=follow_up))
|
|
|
|
listener._process_transcript(
|
|
f"{tts_text} {follow_up}",
|
|
utterance_energy=0.01,
|
|
)
|
|
|
|
assert _accepted_query(listener) != "", (
|
|
f"Remainder of exactly {min_words} words should be salvaged"
|
|
)
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_early_echo_salvage_rejects_below_minimum_word_count(self, _print):
|
|
"""Salvaged remainder below min_salvage_words should be rejected as echo."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
min_words = listener.echo_detector.min_salvage_words
|
|
|
|
tts_text = "The weather is going to be sunny today in London."
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
short_tail = " ".join(["really", "nice"][: max(min_words - 1, 1)])
|
|
judge = _install_intent_judge(listener, _make_judgment(
|
|
directed=True, query=short_tail, reasoning="should not be consulted"))
|
|
|
|
listener._process_transcript(
|
|
f"{tts_text} {short_tail}",
|
|
utterance_energy=0.01,
|
|
)
|
|
|
|
assert _accepted_query(listener) == "", (
|
|
f"Remainder below {min_words} words should be rejected"
|
|
)
|
|
judge.judge.assert_not_called()
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_early_echo_salvage_rejects_when_no_prefix_match(self, _print):
|
|
"""If cleanup_leading_echo can't strip any prefix, fall back to rejection."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
tts_text = "alpha beta gamma delta epsilon zeta"
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
judge = _install_intent_judge(listener, _make_judgment(
|
|
directed=False, query="", reasoning="should not be consulted"))
|
|
|
|
# Shares enough words with TTS to clear partial_ratio >= 70 (marks it
|
|
# echo) but the tokens are in a different order so cleanup_leading_echo
|
|
# cannot find a matching prefix — nothing to salvage.
|
|
listener._process_transcript(
|
|
"beta alpha delta gamma zeta epsilon",
|
|
utterance_energy=0.01,
|
|
)
|
|
|
|
assert _accepted_query(listener) == "", (
|
|
"Chunk with no strippable prefix should be rejected as pure echo"
|
|
)
|
|
judge.judge.assert_not_called()
|
|
listener.state_manager.stop()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: Grace period boundaries
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.unit
|
|
class TestHotWindowOnlyFromStateManager:
|
|
"""Hot window status comes exclusively from the state manager's formal
|
|
activation/expiry — not from time-based grace periods. This prevents
|
|
false hot window claims after the user has seen 'Returning to wake word mode'."""
|
|
|
|
@patch("builtins.print")
|
|
def test_recent_tts_without_hot_window_activation_not_treated_as_hot(self, _print):
|
|
"""TTS finishing without hot window activation does not create a hot window."""
|
|
listener, _ = _create_listener(
|
|
hot_window_seconds=3.0,
|
|
echo_tolerance=0.3,
|
|
)
|
|
|
|
# Track TTS finish but do NOT schedule hot window activation
|
|
listener.echo_detector.track_tts_start("answer text")
|
|
listener.echo_detector.track_tts_finish()
|
|
|
|
# Judge says directed, but no wake word and no hot window
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="thanks"))
|
|
|
|
listener._process_transcript("thanks", utterance_energy=0.01)
|
|
|
|
# Should NOT be accepted — no hot window active, no wake word
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_formal_hot_window_activation_required(self, _print):
|
|
"""Only formally activated hot window allows wake-word-free input."""
|
|
listener, _ = _create_listener(
|
|
hot_window_seconds=3.0,
|
|
echo_tolerance=0.02,
|
|
)
|
|
|
|
listener.echo_detector.track_tts_start("old answer")
|
|
listener.echo_detector.track_tts_finish()
|
|
tts_finish = listener.echo_detector._last_tts_finish_time
|
|
|
|
# Judge says directed, but no wake word in text — should be rejected
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="hello there"))
|
|
|
|
listener._process_transcript(
|
|
"hello there",
|
|
utterance_energy=0.01,
|
|
utterance_start_time=tts_finish + 0.5,
|
|
utterance_end_time=tts_finish + 1.0,
|
|
)
|
|
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_no_timestamps_with_active_hot_window_accepted(self, _print):
|
|
"""When Whisper provides no timestamps but hot window is active, accepted."""
|
|
listener, _ = _create_listener(hot_window_seconds=3.0, echo_tolerance=0.02)
|
|
|
|
listener.echo_detector.track_tts_start("recent response")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="and also"))
|
|
|
|
listener._process_transcript(
|
|
"and also",
|
|
utterance_energy=0.01,
|
|
utterance_start_time=0,
|
|
utterance_end_time=0,
|
|
)
|
|
|
|
assert _accepted_query(listener) == "and also"
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_no_timestamps_without_hot_window_rejected(self, _print):
|
|
"""When Whisper provides no timestamps and no hot window, requires wake word."""
|
|
listener, _ = _create_listener(hot_window_seconds=3.0, echo_tolerance=0.3)
|
|
|
|
listener.echo_detector.track_tts_start("stale response")
|
|
# TTS finished but no hot window scheduled
|
|
listener.echo_detector.track_tts_finish()
|
|
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="random remark"))
|
|
|
|
listener._process_transcript(
|
|
"random remark",
|
|
utterance_energy=0.01,
|
|
utterance_start_time=0,
|
|
utterance_end_time=0,
|
|
)
|
|
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: Echo rejection does NOT extend the hot window
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.unit
|
|
class TestEchoRejectionDoesNotExtendFollowUpWindow:
|
|
"""Echo is caught early (instant fuzzy check), so it doesn't block the
|
|
audio loop or extend the hot window. The original window duration applies."""
|
|
|
|
@patch("builtins.print")
|
|
def test_echo_does_not_reset_window_timer(self, _print):
|
|
"""Echo rejection leaves the original window timer untouched."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
listener.echo_detector.track_tts_start("The answer is 42.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
original_start = listener.state_manager._hot_window_start_time
|
|
|
|
# Feed echo — caught early
|
|
listener._process_transcript("The answer is 42", utterance_energy=0.01)
|
|
|
|
# Window timer should not have been reset
|
|
assert listener.state_manager._hot_window_start_time == original_start
|
|
# Window still active (within original 3s)
|
|
assert listener.state_manager.is_hot_window_active()
|
|
|
|
# User speaks within the original window
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="thanks"))
|
|
listener._process_transcript("thanks", utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) == "thanks"
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_echo_after_window_expiry_does_not_reactivate(self, _print):
|
|
"""Late echo arrival after window expired does NOT reactivate the window."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=0.05)
|
|
|
|
listener.echo_detector.track_tts_start("Short reply.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Let hot window expire
|
|
time.sleep(0.1)
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
|
|
# Late echo arrives — window should stay expired
|
|
listener._process_transcript("Short reply", utterance_energy=0.01)
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
|
|
# Speech without wake word should be rejected
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="one more thing"))
|
|
listener._process_transcript("one more thing", utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestLongTtsTailEcho:
|
|
"""Echoes of the TAIL of a long TTS response must still be rejected. The
|
|
fuzzy echo check previously truncated TTS to 300 chars, so tail echoes from
|
|
longer responses slipped through and were accepted as user speech."""
|
|
|
|
@patch("builtins.print")
|
|
def test_tail_echo_from_long_tts_rejected(self, _print):
|
|
"""Echo of the final clause of a ~370-char TTS is caught, not accepted."""
|
|
long_tts = (
|
|
"You asked for something interesting, so I found that there are "
|
|
"over 1800 creative writing prompts available across various genres, "
|
|
"including themes like a character losing the ability to create or "
|
|
"an intangible concept becoming a real object. I also found that "
|
|
"evolving marketing tactics rely on using data, leveraging "
|
|
"analytics, and being agile to understand user behavior."
|
|
)
|
|
assert len(long_tts) > 300 # Guard: the bug only manifests past old cap
|
|
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
listener.echo_detector.track_tts_start(long_tts)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Mic picks up the tail of the TTS response — this is pure echo.
|
|
tail_echo = "leveraging analytics and being agile to understand user behavior."
|
|
_install_intent_judge(
|
|
listener,
|
|
_make_judgment(directed=False, reasoning="Segment is an echo"),
|
|
)
|
|
listener._process_transcript(tail_echo, utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: Early beep and face state feedback
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _is_beeping(listener) -> bool:
|
|
"""Check if the thinking tune is currently active."""
|
|
return listener._tune_player is not None
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestEarlyBeepFeedback:
|
|
"""Beep should start immediately after Whisper transcription, before the
|
|
intent judge runs. This gives instant auditory feedback to the user."""
|
|
|
|
@patch("builtins.print")
|
|
def test_beep_starts_on_wake_word_before_intent_judge(self, _print):
|
|
"""Beep starts right after 'Heard' when wake word is present."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
listener.cfg.tune_enabled = True
|
|
|
|
# No intent judge installed — beep should still start from the
|
|
# early detection path, then fallback wake word check processes query.
|
|
listener._process_transcript("jarvis what time is it", utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) != ""
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_beep_starts_in_hot_window_before_intent_judge(self, _print):
|
|
"""Beep starts right after 'Heard' when in hot window."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
listener.cfg.tune_enabled = True
|
|
|
|
listener.echo_detector.track_tts_start("Here is the answer.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="tell me more"))
|
|
listener._process_transcript("tell me more", utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) == "tell me more"
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_no_beep_without_wake_word_or_hot_window(self, _print):
|
|
"""No beep when there's no wake word and not in hot window."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
listener.cfg.tune_enabled = True
|
|
|
|
# Random speech, no wake word, no hot window
|
|
listener._process_transcript("the weather is nice today", utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) == ""
|
|
# Beep should not have been started (and if it was, it was stopped)
|
|
assert not _is_beeping(listener)
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_beep_stops_when_intent_judge_rejects(self, _print):
|
|
"""Early beep is stopped if intent judge rejects the input."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
listener.cfg.tune_enabled = True
|
|
|
|
# Install judge that rejects — speech has wake word so early beep fires,
|
|
# but judge says not directed so beep should be stopped.
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=False, query="", confidence="high",
|
|
reasoning="narrative mention"))
|
|
|
|
listener._process_transcript("jarvis is a cool name", utterance_energy=0.01)
|
|
|
|
# Query should NOT be accepted (judge rejected + fallback wake word
|
|
# check won't find a query after "jarvis")
|
|
assert not _is_beeping(listener)
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_no_beep_during_tts_playback(self, _print):
|
|
"""Beep does not start while TTS is actively speaking."""
|
|
listener, mock_tts = _create_listener(
|
|
echo_tolerance=0.02, hot_window_seconds=3.0, tts_speaking=True)
|
|
listener.cfg.tune_enabled = True
|
|
|
|
listener._process_transcript("jarvis what time is it", utterance_energy=0.01)
|
|
|
|
# Should not beep during TTS (stop command path handles TTS interrupts)
|
|
assert not _is_beeping(listener)
|
|
listener.state_manager.stop()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: Echo caught early in hot window (no intent judge, no window reset)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.unit
|
|
class TestEchoRejectionInHotWindow:
|
|
"""Echo in the hot window is caught by the early fuzzy check before
|
|
the intent judge runs. The hot window timer is NOT reset."""
|
|
|
|
@patch("builtins.print")
|
|
def test_confirmed_echo_rejected_without_intent_judge(self, _print):
|
|
"""Echo matching TTS is caught early — intent judge never runs."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
tts_text = "The weather will be sunny tomorrow."
|
|
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
judge = _install_intent_judge(listener, _make_judgment(
|
|
directed=False, query="", confidence="high",
|
|
reasoning="echo of assistant speech"))
|
|
|
|
listener._process_transcript(
|
|
"the weather will be sunny tomorrow",
|
|
utterance_energy=0.01)
|
|
|
|
# Echo caught early — no query accepted, no intent judge called
|
|
assert _accepted_query(listener) == ""
|
|
judge.judge.assert_not_called()
|
|
# Hot window still active (within original 3s, NOT reset)
|
|
assert listener.state_manager.is_hot_window_active()
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_echo_rejected_before_intent_judge_can_accept(self, _print):
|
|
"""Echo is caught early even when intent judge would say directed.
|
|
|
|
The mic picks up Jarvis's TTS output and Whisper transcribes it.
|
|
The early fuzzy check catches it before the intent judge runs.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
tts_text = "Georgian cuisine is incredibly rich and you should try Khachapuri and Georgian bread."
|
|
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
judge = _install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="and kg chai like georgian bread",
|
|
confidence="high", reasoning="user follow-up"))
|
|
|
|
listener._process_transcript(
|
|
"and kg chai like georgian bread",
|
|
utterance_energy=0.01)
|
|
|
|
# Echo caught early — no query accepted
|
|
assert _accepted_query(listener) == ""
|
|
# Intent judge never called
|
|
judge.judge.assert_not_called()
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_non_echo_speech_accepted_via_override(self, _print):
|
|
"""Non-echo speech in hot window is accepted even if judge rejects.
|
|
|
|
In hot window, non-echo speech is always accepted (override), since
|
|
small LLMs sometimes reject valid follow-ups.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
tts_text = "The weather will be sunny tomorrow."
|
|
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Judge rejects unrelated speech
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=False, query="", confidence="high",
|
|
reasoning="background conversation"))
|
|
|
|
listener._process_transcript(
|
|
"did you see the game last night",
|
|
utterance_energy=0.01)
|
|
|
|
# Non-echo speech in hot window is accepted via override
|
|
assert _accepted_query(listener) == "did you see the game last night"
|
|
listener.state_manager.stop()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: Hot window boundary enforcement
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.unit
|
|
class TestHotWindowBoundary:
|
|
"""The hot window has a strict time boundary. Speech arriving after
|
|
the window expires should require wake word detection."""
|
|
|
|
@patch("builtins.print")
|
|
def test_speech_within_window_accepted(self, _print):
|
|
"""Speech processed while hot window is active is accepted."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
listener.echo_detector.track_tts_start("Short answer.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="thanks"))
|
|
listener._process_transcript("thanks", utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) == "thanks"
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_speech_after_window_requires_wake_word(self, _print):
|
|
"""Speech arriving after hot window expired requires wake word."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=0.05)
|
|
|
|
listener.echo_detector.track_tts_start("Short answer.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Let hot window expire
|
|
time.sleep(0.1)
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
|
|
# Speech without wake word — should be rejected
|
|
_install_intent_judge(listener, _make_judgment(directed=True, query="tell me more"))
|
|
listener._process_transcript("tell me more", utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_speech_after_window_with_wake_word_accepted(self, _print):
|
|
"""Speech after hot window expired but containing wake word is accepted."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=0.05)
|
|
|
|
listener.echo_detector.track_tts_start("Short answer.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Let hot window expire
|
|
time.sleep(0.1)
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
|
|
# Speech with wake word — accepted via wake word detection fallback
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="what time is it"))
|
|
listener._process_transcript("jarvis what time is it", utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) != ""
|
|
listener.state_manager.stop()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: Echo is caught early (before beep and intent judge)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.unit
|
|
class TestEchoCaughtBeforeBeepAndIntentJudge:
|
|
"""Echo in the hot window must be caught BEFORE the thinking beep starts
|
|
and before the intent judge is called. This prevents:
|
|
1. False beep on echo (user hears beep then nothing happens)
|
|
2. Intent judge blocking the audio loop for seconds on echo
|
|
3. Hot window extending indefinitely from repeated echo resets
|
|
"""
|
|
|
|
@patch("builtins.print")
|
|
def test_echo_in_hot_window_does_not_trigger_beep(self, _print):
|
|
"""Echo matching TTS output should not start the thinking beep."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
listener.cfg.tune_enabled = True
|
|
tts_text = "Tbilisi is a must-see especially the colourful old town."
|
|
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Install intent judge that should NOT be called for echo
|
|
judge = _install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="tbilisi is a must-see"))
|
|
|
|
listener._process_transcript(
|
|
"Tbilisi is a must-see especially the colourful old town",
|
|
utterance_energy=0.01)
|
|
|
|
# No beep should have started
|
|
assert not _is_beeping(listener)
|
|
# Echo should be rejected — no query accepted
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_echo_in_hot_window_skips_intent_judge(self, _print):
|
|
"""Echo caught early should not invoke the intent judge at all."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
tts_text = "For breathtaking scenery you should explore the mountainous regions."
|
|
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
judge = _install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="explore the mountainous regions"))
|
|
|
|
listener._process_transcript(
|
|
"For breathtaking scenery you should explore the mountainous regions like Steneti",
|
|
utterance_energy=0.01)
|
|
|
|
# Intent judge should not have been called
|
|
judge.judge.assert_not_called()
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_echo_does_not_extend_hot_window(self, _print):
|
|
"""Echo rejection should NOT reset/extend the hot window timer.
|
|
|
|
Previously, each echo chunk called reset_hot_window_expiry(), extending
|
|
the window by another full duration. With multiple echo chunks, this
|
|
created a window lasting 6+ seconds instead of 3, causing speech long
|
|
after TTS to be treated as hot window input.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=0.10)
|
|
tts_text = "The answer is sunny and warm."
|
|
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Record when hot window started
|
|
original_start = listener.state_manager._hot_window_start_time
|
|
|
|
# Process echo — should be caught early
|
|
listener._process_transcript(
|
|
"the answer is sunny and warm",
|
|
utterance_energy=0.01)
|
|
|
|
# Hot window start time should NOT have been reset
|
|
assert listener.state_manager._hot_window_start_time == original_start
|
|
|
|
# Wait for original window to expire
|
|
time.sleep(0.15)
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_non_echo_in_hot_window_still_triggers_beep(self, _print):
|
|
"""Non-echo speech in hot window should still get the early beep."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
listener.cfg.tune_enabled = True
|
|
tts_text = "The weather is sunny today."
|
|
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="what about tomorrow"))
|
|
|
|
listener._process_transcript("what about tomorrow", utterance_energy=0.01)
|
|
|
|
assert _accepted_query(listener) == "what about tomorrow"
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_multiple_echo_chunks_do_not_stack_window_extensions(self, _print):
|
|
"""Multiple echo chunks should not extend the hot window repeatedly.
|
|
|
|
Real scenario: TTS response is split into 2+ Whisper chunks. Each
|
|
previously reset the timer, creating a window of N*hot_window_seconds.
|
|
Now echo is caught early without any timer reset.
|
|
"""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=0.10)
|
|
tts_text = "Tbilisi is a must-see. For breathtaking scenery explore Svaneti."
|
|
|
|
listener.echo_detector.track_tts_start(tts_text)
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# First echo chunk
|
|
listener._process_transcript(
|
|
"Tbilisi is a must-see especially the colourful old town",
|
|
utterance_energy=0.01)
|
|
|
|
# Second echo chunk
|
|
listener._process_transcript(
|
|
"For breathtaking scenery you should explore Steneti",
|
|
utterance_energy=0.01)
|
|
|
|
# Both should be rejected
|
|
assert _accepted_query(listener) == ""
|
|
|
|
# Window should still expire on original schedule
|
|
time.sleep(0.15)
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
|
|
# Speech after expiry requires wake word
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="what the hell"))
|
|
listener._process_transcript("what the hell", utterance_energy=0.01)
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: Speech without wake word outside hot window is ignored
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.unit
|
|
class TestSpeechIgnoredOutsideHotWindow:
|
|
"""When no hot window is active and no wake word is present, all speech
|
|
should be completely ignored — no beep, no intent judge query, no action.
|
|
This is the default idle state."""
|
|
|
|
@patch("builtins.print")
|
|
def test_complete_sentence_without_wake_word_ignored(self, _print):
|
|
"""A full sentence without wake word and no hot window is ignored."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
# Judge would accept if asked — but it shouldn't matter
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="what is the meaning of life"))
|
|
|
|
listener._process_transcript(
|
|
"what is the meaning of life",
|
|
utterance_energy=0.01,
|
|
)
|
|
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_no_beep_no_intent_for_background_chatter(self, _print):
|
|
"""Background conversation without wake word triggers no beep and
|
|
no intent judge invocation."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
listener.cfg.tune_enabled = True
|
|
|
|
judge = _install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="pass the salt"))
|
|
|
|
listener._process_transcript(
|
|
"hey can you pass the salt please",
|
|
utterance_energy=0.01,
|
|
)
|
|
|
|
assert _accepted_query(listener) == ""
|
|
# Intent judge should still be called (it's the decision-maker),
|
|
# but since it returns directed without wake word, it's rejected
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_multiple_utterances_after_hot_window_all_ignored(self, _print):
|
|
"""Multiple consecutive utterances after hot window expires are all
|
|
ignored if they lack a wake word. The system stays in wake word mode."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
listener.echo_detector.track_tts_start("The answer is 42.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
# Expire hot window
|
|
listener.state_manager.expire_hot_window()
|
|
assert not listener.state_manager.is_hot_window_active()
|
|
|
|
# Install judge that would accept everything
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="first remark"))
|
|
|
|
# First utterance — no wake word, no hot window
|
|
listener._process_transcript("I think it might rain later", utterance_energy=0.01)
|
|
assert _accepted_query(listener) == ""
|
|
|
|
# Second utterance — still no wake word, still no hot window
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="second remark"))
|
|
listener._process_transcript("yeah the forecast said so", utterance_energy=0.01)
|
|
assert _accepted_query(listener) == ""
|
|
|
|
# Third utterance with wake word — THIS should work
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="will it rain"))
|
|
listener._process_transcript("jarvis will it rain today", utterance_energy=0.01)
|
|
assert "rain" in _accepted_query(listener)
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_speech_long_after_any_tts_ignored(self, _print):
|
|
"""Speech arriving long after any TTS activity is ignored without
|
|
wake word, even if the intent judge says directed."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
# TTS happened ages ago, hot window long expired
|
|
listener.echo_detector.track_tts_start("Old response.")
|
|
listener.echo_detector.track_tts_finish()
|
|
# No hot window scheduled — simulates a stale session
|
|
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="hey what time is it"))
|
|
|
|
# Speech with timestamps well after any TTS
|
|
now = time.time()
|
|
listener._process_transcript(
|
|
"hey what time is it",
|
|
utterance_energy=0.01,
|
|
utterance_start_time=now,
|
|
utterance_end_time=now + 1.0,
|
|
)
|
|
|
|
assert _accepted_query(listener) == ""
|
|
listener.state_manager.stop()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests: Stale wake timestamp must not leak across utterances
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestStaleWakeTimestampAcrossUtterances:
|
|
"""After the intent judge rejects a wake-worded utterance, the next
|
|
utterance without a wake word must not be accepted just because the
|
|
previous utterance had one.
|
|
|
|
Real-world bug: user said "Jarvis, remember..." (rejected by judge),
|
|
then said "Hey Google, TV off." The judge saw the previous "Jarvis"
|
|
in its buffer and returned directed=true with query="tv off". The
|
|
verification guard `_wake_timestamp is not None` short-circuited true
|
|
because it was never cleared, so the unrelated "Hey Google" command
|
|
was accepted.
|
|
"""
|
|
|
|
@patch("builtins.print")
|
|
def test_rejected_wake_utterance_does_not_vouch_for_next_utterance(self, _print):
|
|
"""A prior rejected wake-worded utterance must not authorise a later
|
|
utterance that lacks a wake word."""
|
|
listener, _ = _create_listener(echo_tolerance=0.3, hot_window_seconds=3.0)
|
|
|
|
# First utterance: has "jarvis", judge rejects as not directed
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=False, query="", confidence="high",
|
|
reasoning="statement to self, not directed"))
|
|
|
|
now = time.time()
|
|
listener._process_transcript(
|
|
"jarvis i want you to remember that my other office days are thursdays",
|
|
utterance_energy=0.01,
|
|
utterance_start_time=now,
|
|
utterance_end_time=now + 2.0,
|
|
)
|
|
assert _accepted_query(listener) == ""
|
|
|
|
# Second utterance: no wake word, judge hallucinates directed=true
|
|
# (e.g. because the earlier "jarvis" is still in its context buffer)
|
|
_install_intent_judge(listener, _make_judgment(
|
|
directed=True, query="tv off", confidence="high",
|
|
reasoning="synthesised from buffer"))
|
|
|
|
listener._process_transcript(
|
|
"hey google, tv off.",
|
|
utterance_energy=0.01,
|
|
utterance_start_time=now + 5.0,
|
|
utterance_end_time=now + 6.0,
|
|
)
|
|
|
|
# Must be rejected — no wake word in this utterance, no hot window
|
|
assert _accepted_query(listener) == "", (
|
|
"Second utterance without wake word must not be accepted just "
|
|
"because a prior utterance set _wake_timestamp")
|
|
listener.state_manager.stop()
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestIntentJudgeGating:
|
|
"""The intent judge must not be called on pure ambient speech.
|
|
|
|
Calling it on every utterance blocks the audio loop for up to
|
|
`intent_judge_timeout_sec` on each background chatter, which can
|
|
cascade into UI freezes when many utterances queue up during a slow
|
|
or loaded Ollama. The judge adds value only when there's an
|
|
engagement signal: wake word, hot window, or active TTS.
|
|
"""
|
|
|
|
@patch("builtins.print")
|
|
def test_judge_not_called_for_ambient_speech(self, _print):
|
|
"""Ambient speech with no wake word / hot window / TTS must not hit the judge."""
|
|
listener, _ = _create_listener()
|
|
|
|
mock_judge = _install_intent_judge(
|
|
listener, _make_judgment(directed=False, query=""))
|
|
|
|
# No hot window, no TTS, no wake word in the text
|
|
listener._process_transcript(
|
|
"random background chatter about the weather",
|
|
utterance_energy=0.01,
|
|
)
|
|
|
|
assert mock_judge.judge.call_count == 0, (
|
|
"Intent judge must be gated on an engagement signal; ambient "
|
|
"speech should skip the judge to avoid blocking the audio loop")
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_judge_called_when_wake_word_detected(self, _print):
|
|
"""Utterances containing the wake word do reach the judge."""
|
|
listener, _ = _create_listener()
|
|
|
|
mock_judge = _install_intent_judge(
|
|
listener, _make_judgment(
|
|
directed=True, query="what time is it"))
|
|
|
|
listener._process_transcript(
|
|
"jarvis what time is it", utterance_energy=0.01,
|
|
)
|
|
|
|
assert mock_judge.judge.call_count == 1
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_judge_called_in_hot_window(self, _print):
|
|
"""Utterances during the hot window do reach the judge."""
|
|
listener, _ = _create_listener(echo_tolerance=0.02, hot_window_seconds=3.0)
|
|
|
|
listener.echo_detector.track_tts_start("Here you go.")
|
|
_simulate_tts_finish(listener)
|
|
_wait_for_hot_window_active(listener)
|
|
|
|
mock_judge = _install_intent_judge(
|
|
listener, _make_judgment(directed=True, query="thanks"))
|
|
|
|
listener._process_transcript("thanks", utterance_energy=0.01)
|
|
|
|
assert mock_judge.judge.call_count == 1
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_judge_skipped_for_short_utterance_during_tts(self, _print):
|
|
"""Short utterances (<=3 words) during active TTS bypass the judge.
|
|
|
|
The fast text-based stop-command check already handles short
|
|
interruptions like "stop" / "shut up" while TTS is speaking. Sending
|
|
these to the judge would block the audio loop for the judge's
|
|
timeout on every short echo chunk during playback.
|
|
"""
|
|
listener, mock_tts = _create_listener(tts_speaking=True)
|
|
|
|
mock_judge = _install_intent_judge(
|
|
listener, _make_judgment(directed=False, query=""))
|
|
|
|
listener._process_transcript(
|
|
"uh huh yeah", utterance_energy=0.01,
|
|
)
|
|
|
|
assert mock_judge.judge.call_count == 0, (
|
|
"Short utterances during TTS must be handled by the stop-command "
|
|
"path, not the judge, to avoid blocking the audio loop")
|
|
listener.state_manager.stop()
|
|
|
|
@patch("builtins.print")
|
|
def test_judge_called_for_longer_utterance_during_tts(self, _print):
|
|
"""Longer utterances (>3 words) during TTS still reach the judge.
|
|
|
|
Active TTS is itself an engagement signal — the user may be
|
|
interrupting with a real follow-up or correction, and the judge
|
|
needs to see it to catch intents the fast text-based stop-command
|
|
check misses.
|
|
"""
|
|
listener, mock_tts = _create_listener(tts_speaking=True)
|
|
|
|
mock_judge = _install_intent_judge(
|
|
listener, _make_judgment(
|
|
directed=True, query="what about tomorrow's weather"))
|
|
|
|
# >3 words, no stop-command keywords, not echo
|
|
listener._process_transcript(
|
|
"actually what about tomorrow's weather",
|
|
utterance_energy=0.01,
|
|
)
|
|
|
|
assert mock_judge.judge.call_count == 1
|
|
listener.state_manager.stop()
|