Add Discord-native hybrid front-end for Jarvis (bot + bridge)

Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions
--- a/tests/test_echo_detection.py
+++ b/tests/test_echo_detection.py
@@ -0,0 +1,821 @@
+"""
+Tests for echo detection module.
+
+These tests verify that TTS echo detection properly identifies
+when heard audio is an echo of TTS output vs genuine user speech.
+"""
+
+import time
+import pytest
+from jarvis.listening.echo_detection import EchoDetector
+
+
+class TestTextNormalization:
+    """Tests for text normalization handling TTS/Whisper differences."""
+
+    def test_normalize_celsius_symbol(self):
+        """Normalizes 9°C to '9 degrees celsius'."""
+        detector = EchoDetector()
+        result = detector._normalize_for_comparison("It's 9°C outside")
+        assert "9 degrees celsius" in result
+        assert "°" not in result
+
+    def test_normalize_fahrenheit_symbol(self):
+        """Normalizes 48°F to '48 degrees fahrenheit'."""
+        detector = EchoDetector()
+        result = detector._normalize_for_comparison("It's 48°F")
+        assert "48 degrees fahrenheit" in result
+
+    def test_normalize_generic_degree(self):
+        """Normalizes standalone degree symbol."""
+        detector = EchoDetector()
+        result = detector._normalize_for_comparison("Turn it to 180°")
+        assert "180 degrees" in result
+
+    def test_normalize_with_space(self):
+        """Handles space between number and degree symbol."""
+        detector = EchoDetector()
+        result = detector._normalize_for_comparison("It's 9 °C")
+        assert "9 degrees celsius" in result
+
+    def test_normalize_removes_parentheses(self):
+        """Removes parentheses from text."""
+        detector = EchoDetector()
+        result = detector._normalize_for_comparison("It's 48°F (9°C)")
+        # Should contain both values without parentheses
+        assert "(" not in result
+        assert ")" not in result
+        assert "48 degrees fahrenheit" in result
+        assert "9 degrees celsius" in result
+
+
+class TestTextSimilarity:
+    """Tests for text similarity matching."""
+
+    def test_exact_match(self):
+        """Detects exact text match."""
+        detector = EchoDetector()
+        assert detector._check_text_similarity("hello world", "hello world") is True
+
+    def test_case_insensitive_match(self):
+        """Detects match regardless of case."""
+        detector = EchoDetector()
+        assert detector._check_text_similarity("Hello World", "hello world") is True
+
+    def test_partial_match(self):
+        """Detects when heard text is substring of TTS."""
+        detector = EchoDetector()
+        tts = "the weather today is sunny and warm"
+        heard = "sunny and warm"
+        assert detector._check_text_similarity(heard, tts) is True
+
+    def test_no_match(self):
+        """Returns False for unrelated text."""
+        detector = EchoDetector()
+        assert detector._check_text_similarity("what time is it", "the weather is nice") is False
+
+    def test_degree_symbol_match(self):
+        """Matches degree symbol text against Whisper transcription."""
+        detector = EchoDetector()
+        tts = "It's currently 9°C outside"
+        heard = "It's currently 9 degrees celsius outside"
+        assert detector._check_text_similarity(heard, tts) is True
+
+    def test_empty_strings(self):
+        """Returns False for empty strings."""
+        detector = EchoDetector()
+        assert detector._check_text_similarity("", "hello") is False
+        assert detector._check_text_similarity("hello", "") is False
+        assert detector._check_text_similarity("", "") is False
+
+    def test_higher_threshold_in_hot_window(self):
+        """Uses higher threshold (92) for hot window to reduce false rejections."""
+        detector = EchoDetector()
+        # Test that threshold parameter affects matching
+        # Use text with typos/variations that won't be exact match
+        # "the weether forcast" vs "the weather forecast" scores ~89-92
+        tts = "the weather forecast"
+        heard = "the weether forcast"  # typos - similar but not exact
+        # At low threshold this should match, at threshold above score it should not
+        low_threshold = detector._check_text_similarity(heard, tts, threshold=80)
+        high_threshold = detector._check_text_similarity(heard, tts, threshold=95)
+        # Lower threshold (80) should match text scoring ~92
+        assert low_threshold is True
+        # Higher threshold (95) should reject text scoring ~92
+        assert high_threshold is False
+
+
+class TestEchoRejection:
+    """Tests for the main echo rejection decision logic."""
+
+    def test_no_rejection_without_tts(self):
+        """Doesn't reject if no TTS was ever played."""
+        detector = EchoDetector()
+        assert detector.should_reject_as_echo("hello", current_energy=0.01) is False
+
+    def test_rejects_echo_during_tts(self):
+        """Rejects matching text during TTS playback."""
+        detector = EchoDetector()
+        tts_text = "the weather is nice today"
+        detector.track_tts_start(tts_text)
+
+        # Simulate utterance starting right after TTS starts
+        utterance_start = time.time()
+
+        result = detector.should_reject_as_echo(
+            heard_text="nice today",
+            current_energy=0.01,
+            is_during_tts=True,
+            tts_rate=200.0,
+            utterance_start_time=utterance_start
+        )
+        assert result is True
+
+    def test_accepts_different_text_during_tts(self):
+        """Accepts non-matching text during TTS (interruption)."""
+        detector = EchoDetector()
+        detector.track_tts_start("the weather is nice")
+
+        result = detector.should_reject_as_echo(
+            heard_text="stop",
+            current_energy=0.05,
+            is_during_tts=True,
+            tts_rate=200.0,
+            utterance_start_time=time.time()
+        )
+        assert result is False
+
+    def test_rejects_echo_in_cooldown_window(self):
+        """Rejects matching text shortly after TTS finishes."""
+        detector = EchoDetector()
+        tts_text = "hello world"
+        detector.track_tts_start(tts_text, baseline_energy=0.01)
+        detector.track_tts_finish()
+
+        # Simulate utterance starting immediately after TTS
+        utterance_start = time.time()
+
+        result = detector.should_reject_as_echo(
+            heard_text="hello world",
+            current_energy=0.008,  # Low energy (below baseline * threshold)
+            is_during_tts=False,
+            utterance_start_time=utterance_start
+        )
+        assert result is True
+
+    def test_accepts_high_energy_in_cooldown(self):
+        """Accepts speech with high energy even in cooldown (real user)."""
+        detector = EchoDetector(energy_spike_threshold=2.0)
+        detector.track_tts_start("hello world", baseline_energy=0.01)
+        detector.track_tts_finish()
+
+        utterance_start = time.time()
+
+        result = detector.should_reject_as_echo(
+            heard_text="hello world",
+            current_energy=0.05,  # High energy (5x baseline)
+            is_during_tts=False,
+            utterance_start_time=utterance_start
+        )
+        assert result is False
+
+    def test_accepts_after_extended_window(self):
+        """Accepts speech after extended echo window expires."""
+        detector = EchoDetector(echo_tolerance=0.3)
+        detector.track_tts_start("hello world")
+        detector.track_tts_finish()
+
+        # Simulate utterance starting well after TTS (2 seconds)
+        utterance_start = time.time() + 2.0
+        detector._last_tts_finish_time = time.time() - 2.0  # TTS finished 2s ago
+
+        result = detector.should_reject_as_echo(
+            heard_text="hello world",
+            current_energy=0.01,
+            is_during_tts=False,
+            utterance_start_time=utterance_start
+        )
+        assert result is False
+
+    @pytest.mark.unit
+    def test_rejects_echo_during_tts_with_timing_drift(self):
+        """Rejects echo during TTS even when timing-based segment matching fails.
+
+        When TTS timing drifts (plays faster/slower than expected), segment
+        matching may check the wrong portion of the TTS text. The fallback
+        full-TTS check should catch these cases for long utterances.
+        """
+        detector = EchoDetector()
+        # Weather forecast TTS
+        tts_text = (
+            "the weather tomorrow is expected to be mostly cloudy with a high "
+            "of around 8 degrees celsius 46.4 degrees fahrenheit and a low of "
+            "2 degrees celsius 35.6 degrees fahrenheit it should be quite breezy"
+        )
+        detector.track_tts_start(tts_text)
+
+        # Simulate TTS playing faster than expected - utterance starts early in TTS
+        # but the actual audio is from the middle/end (timing drift)
+        tts_start = detector._tts_start_time
+        # Utterance starts 2 seconds after TTS, but this is actually audio from later in TTS
+        utterance_start = tts_start + 2.0
+
+        # This fragment is from the middle of TTS but segment matching will
+        # look at the wrong segment due to timing drift
+        heard = "35.6 degrees fahrenheit it should be quite breezy"
+
+        result = detector.should_reject_as_echo(
+            heard_text=heard,
+            current_energy=0.01,
+            is_during_tts=True,
+            tts_rate=200.0,
+            utterance_start_time=utterance_start
+        )
+        # Should be rejected via full-TTS fallback (8 words, 100% similarity)
+        assert result is True, "Should reject echo via full-TTS fallback when segment matching fails"
+
+    @pytest.mark.unit
+    def test_accepts_stop_command_during_tts_fallback(self):
+        """Stop commands should not trigger the full-TTS fallback rejection.
+
+        The fallback only applies to utterances > 4 words, so short commands
+        like 'stop' should still be accepted during TTS.
+        """
+        detector = EchoDetector()
+        detector.track_tts_start("the weather tomorrow will be sunny and warm")
+
+        result = detector.should_reject_as_echo(
+            heard_text="stop",
+            current_energy=0.05,
+            is_during_tts=True,
+            tts_rate=200.0,
+            utterance_start_time=time.time()
+        )
+        assert result is False, "Stop command should not be rejected during TTS"
+
+
+class TestLeadingEchoCleanup:
+    """Tests for cleanup_leading_echo functionality."""
+
+    def test_cleanup_leading_overlap(self):
+        """Removes leading words that match end of TTS."""
+        detector = EchoDetector()
+        detector._last_tts_text = "the weather today is sunny"
+
+        heard = "is sunny what time is it"
+        result = detector.cleanup_leading_echo(heard)
+        assert result == "what time is it"
+
+    def test_no_cleanup_when_no_overlap(self):
+        """Doesn't modify text when there's no overlap."""
+        detector = EchoDetector()
+        detector._last_tts_text = "the weather is nice"
+
+        heard = "what time is it"
+        result = detector.cleanup_leading_echo(heard)
+        assert result == heard
+
+    def test_no_cleanup_short_overlap(self):
+        """Doesn't cleanup if overlap is only 1 word."""
+        detector = EchoDetector()
+        detector._last_tts_text = "the weather is nice"
+
+        heard = "nice what time is it"  # Only 1 word overlap
+        result = detector.cleanup_leading_echo(heard)
+        assert result == heard  # No cleanup for 1-word overlap
+
+    def test_cleanup_requires_remainder(self):
+        """Doesn't cleanup if the entire heard text is the echo."""
+        detector = EchoDetector()
+        detector._last_tts_text = "the weather is nice"
+
+        heard = "is nice"  # Entire text is echo, no remainder
+        result = detector.cleanup_leading_echo(heard)
+        assert result == heard  # Don't cleanup if nothing remains
+
+    def test_cleanup_fuzzy_word_match(self):
+        """Handles Whisper transcription differences (e.g. Tbilisi vs T-Valisi)."""
+        detector = EchoDetector()
+        detector._last_tts_text = (
+            "I don't have a direct way to predict tomorrow's weather, "
+            "but I can check for you. Let me search for the forecast in Tbilisi."
+        )
+
+        heard = (
+            "i don't have a direct way to predict tomorrow's weather "
+            "but i can check for you let me search for the forecast in t-valisi "
+            "you already searched so i can see the tool calls"
+        )
+        result = detector.cleanup_leading_echo(heard)
+        assert "you already searched" in result
+        assert "forecast" not in result
+
+
+class TestHotWindowEchoDetection:
+    """Tests for echo detection in hot window mode."""
+
+    def test_higher_threshold_in_hot_window(self):
+        """Uses stricter matching in hot window to allow more follow-up speech."""
+        detector = EchoDetector()
+        detector.track_tts_start("tell me about the weather today")
+        detector.track_tts_finish()
+
+        utterance_start = time.time()
+
+        # Text that's somewhat similar but not the same
+        result = detector.should_reject_as_echo(
+            heard_text="tell me more",
+            current_energy=0.01,
+            is_during_tts=False,
+            utterance_start_time=utterance_start,
+            in_hot_window=True  # Hot window mode
+        )
+        # Should be less likely to reject in hot window due to higher threshold
+        # (The actual behavior depends on similarity scores)
+        assert result is False  # "tell me more" is different enough
+
+    def test_partial_echo_from_long_tts(self):
+        """Detects partial echo from a long TTS response.
+
+        This tests the scenario where TTS outputs a long response and Whisper
+        picks up only a portion of it, potentially with transcription errors.
+        Common in rooms with echo/reverb at higher volumes.
+        """
+        detector = EchoDetector()
+        # Simulate a long weather response
+        tts_text = (
+            "You're in London, and I've got the latest weather update for you: "
+            "it's currently overcast with light rain showers, and the temperature "
+            "is around 8 degrees celsius at 18:48 UTC. I'd recommend grabbing an "
+            "umbrella to stay dry. Would you like me to suggest any outdoor "
+            "activities or provide more weather details?"
+        )
+        detector.track_tts_start(tts_text)
+        detector.track_tts_finish()
+
+        utterance_start = time.time()
+
+        # Partial echo that Whisper picked up (with some transcription variations)
+        partial_echo = "the temperature is around 8 degrees celsius. I'd recommend grabbing an umbrella"
+
+        # Should detect as echo - this is clearly part of the TTS output
+        result = detector._check_text_similarity(partial_echo, tts_text, threshold=70)
+        assert result is True, f"Should detect partial echo at threshold 70"
+
+    def test_echo_with_whisper_transcription_errors(self):
+        """Detects echo even with Whisper transcription errors.
+
+        Whisper sometimes mishears numbers and times (e.g., "18:48" as "1848").
+        The fuzzy matching should still catch these as echo.
+        """
+        detector = EchoDetector()
+        tts_text = "the temperature is 8 degrees celsius at 18:48 UTC"
+        detector.track_tts_start(tts_text)
+        detector.track_tts_finish()
+
+        # Whisper transcription with errors
+        heard_with_errors = "the temperature is around 8 degrees celsius at 1848 UTC"
+
+        # Should still detect similarity despite transcription errors
+        result = detector._check_text_similarity(heard_with_errors, tts_text, threshold=70)
+        assert result is True, "Should detect echo despite transcription errors"
+
+    def test_echo_question_from_tts(self):
+        """Detects when a question from TTS is echoed back.
+
+        TTS often ends with questions like "Would you like more details?"
+        These should be detected as echo, not new user queries.
+        """
+        detector = EchoDetector()
+        tts_text = (
+            "The weather is nice today. Would you like me to suggest "
+            "any outdoor activities or provide more weather details?"
+        )
+        detector.track_tts_start(tts_text)
+        detector.track_tts_finish()
+
+        # Echo of the question portion
+        echoed_question = "would you like me to suggest any outdoor activities"
+
+        result = detector._check_text_similarity(echoed_question, tts_text, threshold=70)
+        assert result is True, "Should detect echoed question from TTS"
+
+    def test_accepts_genuine_followup_in_hot_window(self):
+        """Accepts genuine follow-up that differs from TTS content."""
+        detector = EchoDetector()
+        tts_text = "The weather in London is currently overcast with rain"
+        detector.track_tts_start(tts_text)
+        detector.track_tts_finish()
+
+        utterance_start = time.time()
+
+        # Genuine follow-up question - different content
+        followup = "what about tomorrow's forecast"
+
+        result = detector.should_reject_as_echo(
+            heard_text=followup,
+            current_energy=0.03,
+            is_during_tts=False,
+            utterance_start_time=utterance_start,
+            in_hot_window=True
+        )
+        assert result is False, "Should accept genuine follow-up question"
+
+    def test_threshold_70_catches_partial_matches(self):
+        """Verifies threshold 70 catches partial echo matches.
+
+        When using threshold 70 in hot window for fast rejection,
+        partial echoes with ~75% similarity should be caught.
+        """
+        detector = EchoDetector()
+        tts_text = "London has about 8 hours of daylight in winter months"
+
+        # Partial echo with some differences
+        partial_echo = "London has about 8 hours of daylight"
+
+        # At threshold 70, should match (this is clearly a partial echo)
+        result_70 = detector._check_text_similarity(partial_echo, tts_text, threshold=70)
+        assert result_70 is True, "Threshold 70 should catch partial echo"
+
+        # At threshold 92 (default hot window), might not match as strictly
+        # This is fine - the intent judge handles ambiguous cases
+        result_92 = detector._check_text_similarity(partial_echo, tts_text, threshold=92)
+        # We don't assert on this as it depends on the fuzzy match algorithm
+
+
+class TestSalvageDuringTTS:
+    """Tests for cleanup_leading_echo_during_tts functionality.
+
+    This tests the salvage logic that extracts user speech from utterances
+    that start during TTS (mixed echo + user speech).
+    """
+
+    @pytest.fixture
+    def detector(self):
+        return EchoDetector()
+
+    def test_salvages_user_speech_after_echo(self, detector):
+        """Extracts user speech that follows TTS echo.
+
+        Scenario: User starts speaking during TTS, mic picks up end of TTS
+        plus user's actual question.
+        """
+        tts_text = (
+            "According to the BBC Weather forecast, tomorrow in Kensington is expected "
+            "to be quite gloomy with overcast conditions. You might want to bundle up "
+            "and plan your outdoor activities accordingly."
+        )
+        detector._last_tts_text = tts_text
+        detector._tts_start_time = 1000.0
+
+        # User's mic picks up end of TTS + their actual question
+        heard = (
+            "You might want to bundle up and plan your outdoor activities accordingly. "
+            "Okay, let's switch the topic now. I want to talk about philosophy."
+        )
+
+        # Utterance started 10 seconds into TTS
+        result = detector.cleanup_leading_echo_during_tts(heard, tts_rate=200, utterance_start_time=1010.0)
+
+        # Should remove echo and keep user's speech
+        assert "bundle up" not in result.lower(), "Echo portion should be removed"
+        assert "philosophy" in result.lower(), "User's actual question should be preserved"
+        assert "switch the topic" in result.lower(), "User's speech should be preserved"
+
+    def test_salvage_with_timing_mismatch(self, detector):
+        """Salvages correctly even when timing estimate is off.
+
+        Real-world scenario: mic timing doesn't perfectly match TTS timing
+        due to audio processing delays, pre-roll buffer, etc.
+        """
+        tts_text = (
+            "It's going to be quite chilly. You might want to bundle up "
+            "and plan your outdoor activities accordingly."
+        )
+        detector._last_tts_text = tts_text
+        detector._tts_start_time = 1000.0
+
+        # User's mic picks up end of TTS + their question
+        # Timing estimate would be wrong, but full-text fallback should work
+        heard = "plan your outdoor activities accordingly. What do you think life is about?"
+
+        # Even with wrong timing estimate, should find match in full TTS
+        result = detector.cleanup_leading_echo_during_tts(heard, tts_rate=200, utterance_start_time=1005.0)
+
+        assert "outdoor activities" not in result.lower(), "Echo should be removed"
+        assert "life is about" in result.lower(), "User's question should be preserved"
+
+    def test_no_salvage_when_no_overlap(self, detector):
+        """Returns original text when no overlap with TTS."""
+        detector._last_tts_text = "The weather is nice today"
+        detector._tts_start_time = 1000.0
+
+        heard = "What time is it?"
+        result = detector.cleanup_leading_echo_during_tts(heard, tts_rate=200, utterance_start_time=1005.0)
+
+        assert result == heard, "Should return original when no echo overlap"
+
+    def test_no_salvage_when_all_echo(self, detector):
+        """Returns original when entire utterance is echo (no user speech to salvage)."""
+        tts_text = "The weather is nice and sunny today"
+        detector._last_tts_text = tts_text
+        detector._tts_start_time = 1000.0
+
+        # Entire heard text matches end of TTS - nothing to salvage
+        heard = "nice and sunny today"
+        result = detector.cleanup_leading_echo_during_tts(heard, tts_rate=200, utterance_start_time=1005.0)
+
+        # Should return original since there's nothing left after removing echo
+        assert result == heard
+
+    def test_echo_not_in_salvaged_output(self, detector):
+        """Verifies echo portion doesn't slip into salvaged output.
+
+        This is the critical test - ensures we don't accidentally include
+        echo text in what we return to the user.
+        """
+        tts_text = (
+            "According to the forecast, it will rain tomorrow. "
+            "Would you like me to suggest indoor activities?"
+        )
+        detector._last_tts_text = tts_text
+        detector._tts_start_time = 1000.0
+
+        heard = "Would you like me to suggest indoor activities? No thanks, tell me about philosophy instead."
+        result = detector.cleanup_leading_echo_during_tts(heard, tts_rate=200, utterance_start_time=1008.0)
+
+        # Critical: echo words should NOT be in the result
+        assert "suggest indoor activities" not in result.lower(), "Echo phrase must not be in output"
+        assert "would you like" not in result.lower(), "Echo phrase must not be in output"
+        # User's actual request should be preserved
+        assert "philosophy" in result.lower(), "User's request should be preserved"
+
+
+class TestRealWorldSalvageScenarios:
+    """Tests for real-world salvage scenarios that have caused regressions.
+
+    These tests capture actual issues encountered in production:
+    - Temperature notation differences (5.7°C vs "5.7 degrees Celsius")
+    - User appending speech to TTS echo
+    - Whisper transcription differences from TTS text
+    """
+
+    @pytest.fixture
+    def detector(self):
+        return EchoDetector()
+
+    def test_temperature_notation_mismatch(self, detector):
+        """Salvages user speech when Whisper transcribes temperature differently.
+
+        Real scenario: TTS says "5.7°C" but Whisper transcribes "5.7 degrees Celsius"
+        This caused salvage to fail because word-level matching didn't match.
+        """
+        tts_text = "It's going to be a bit chilly tomorrow in Kensington, with overcast skies and a temperature around 5.7°C."
+        detector._last_tts_text = tts_text
+
+        # Whisper transcribes temperature differently
+        heard = "It's going to be a bit chilly tomorrow in Kensington with overcast skies and a temperature around 5.7 degrees Celsius. Nice, you remembered not to say it in Fahrenheit."
+
+        result = detector.cleanup_leading_echo(heard)
+
+        # Should salvage user's follow-up
+        assert "nice" in result.lower(), "User's follow-up should be preserved"
+        assert "fahrenheit" in result.lower(), "User's comment should be preserved"
+        # Echo should be removed
+        assert "chilly tomorrow" not in result.lower(), "Echo should be removed"
+
+    def test_user_appends_speech_to_full_tts_echo(self, detector):
+        """User speaks immediately after TTS, mic captures both.
+
+        The entire TTS is captured plus user's response. cleanup_leading_echo
+        should remove the TTS portion and return user's speech.
+        """
+        tts_text = "Would you like some help finding one?"
+        detector._last_tts_text = tts_text
+
+        # User responds right after TTS, mic captures both
+        heard = "Would you like some help finding one? No thanks, I'm good."
+
+        result = detector.cleanup_leading_echo(heard)
+
+        # Should return user's response
+        assert "no thanks" in result.lower(), "User's response should be preserved"
+        assert "i'm good" in result.lower() or "im good" in result.lower(), "User's response should be preserved"
+        # Echo should be removed
+        assert "would you like" not in result.lower(), "Echo should be removed"
+
+    def test_salvage_preserves_user_question(self, detector):
+        """Salvage preserves user's follow-up question after echo."""
+        tts_text = "The weather tomorrow will be cloudy with a high of 12 degrees."
+        detector._last_tts_text = tts_text
+
+        heard = "The weather tomorrow will be cloudy with a high of 12 degrees. What about the day after?"
+
+        result = detector.cleanup_leading_echo(heard)
+
+        assert "what about" in result.lower(), "User's question should be preserved"
+        assert "day after" in result.lower(), "User's question should be preserved"
+        assert "cloudy" not in result.lower(), "Echo should be removed"
+
+    def test_no_salvage_when_heard_matches_tts_exactly(self, detector):
+        """Returns original when heard text is exactly TTS (no user speech).
+
+        This ensures we don't accidentally salvage a trailing word from pure echo.
+        """
+        tts_text = "Would you like some help finding one?"
+        detector._last_tts_text = tts_text
+
+        # Heard matches TTS exactly - no user speech to salvage
+        heard = "Would you like some help finding one?"
+
+        result = detector.cleanup_leading_echo(heard)
+
+        # Should return original (full echo, nothing to salvage)
+        assert result == heard, "Should return original when no user speech to salvage"
+
+    def test_salvage_with_minor_transcription_errors(self, detector):
+        """Salvage works despite minor Whisper transcription errors."""
+        tts_text = "I can see you're interested in finding out more about this topic."
+        detector._last_tts_text = tts_text
+
+        # Whisper may drop punctuation or have minor differences
+        heard = "I can see youre interested in finding out more about this topic tell me about philosophy"
+
+        result = detector.cleanup_leading_echo(heard)
+
+        # Should salvage user's request (may or may not work depending on how different)
+        # At minimum, shouldn't crash
+        assert result is not None
+
+
+class TestFullTTSFallbackSalvage:
+    """Tests for salvaging user speech in the full-TTS fallback path.
+
+    The full-TTS fallback (threshold 70) catches echoes with significant timing drift
+    that segment matching misses. But when the heard text contains TTS echo + user speech,
+    we should salvage the user speech instead of rejecting the entire utterance.
+
+    Real bug scenario:
+    - TTS: "...Temperature will be around 10°C (50°F). A great day to grab a cuppa."
+    - Heard: "50 degrees Fahrenheit. A great day to grab a cup. Tell me a random topic."
+    - OLD behavior: Rejected entire utterance as echo (74.6% similarity to full TTS)
+    - NEW behavior: Salvage "Tell me a random topic" from the suffix
+    """
+
+    @pytest.fixture
+    def detector(self):
+        return EchoDetector()
+
+    def test_salvages_user_speech_from_mixed_echo(self, detector):
+        """User speech after TTS echo should not be rejected.
+
+        The similarity match finds the echo prefix, but there's user speech
+        at the end that should be salvaged.
+        """
+        tts_text = (
+            "I think there's been a mix-up! We were just talking about the weather "
+            "in Kensington, London. Let me check again. According to the tool, "
+            "tomorrow's forecast for Kensington is: Overcast with a chance of light "
+            "drizzle. Temperature will be around 10°C (50°F). A great day to grab "
+            "a cuppa and enjoy the outdoors."
+        )
+        detector.track_tts_start(tts_text)
+        detector._tts_start_time = 1000.0
+
+        # Heard: end of TTS + user speech
+        heard = (
+            "50 degrees Fahrenheit. A great day to grab a cup and enjoy the outdoors. "
+            "Fine, yeah. Then tell me a random topic about philosophy."
+        )
+
+        # This should NOT be rejected because there's salvageable user speech
+        result = detector.should_reject_as_echo(
+            heard_text=heard,
+            current_energy=0.01,
+            is_during_tts=True,
+            tts_rate=200,
+            utterance_start_time=1012.0  # Near end of TTS
+        )
+
+        assert result is False, (
+            "Should NOT reject when there's user speech to salvage. "
+            "The full-TTS fallback should check for salvageable suffix."
+        )
+
+    def test_still_rejects_pure_echo_in_fallback(self, detector):
+        """Pure echo (no user speech) should still be rejected by fallback."""
+        tts_text = (
+            "I think there's been a mix-up! We were just talking about the weather. "
+            "Let me check again. Tomorrow's forecast is overcast with light drizzle. "
+            "Temperature will be around 10°C."
+        )
+        detector.track_tts_start(tts_text)
+        detector._tts_start_time = 1000.0
+
+        # Heard: just echo, no user speech
+        heard = "Tomorrow's forecast is overcast with light drizzle. Temperature will be around 10 degrees Celsius."
+
+        result = detector.should_reject_as_echo(
+            heard_text=heard,
+            current_energy=0.01,
+            is_during_tts=True,
+            tts_rate=200,
+            utterance_start_time=1005.0
+        )
+
+        assert result is True, "Pure echo should still be rejected"
+
+    def test_salvage_suffix_from_echo_returns_user_speech(self, detector):
+        """_salvage_suffix_from_echo returns the user speech portion."""
+        tts_text = "The weather is nice. Would you like to hear more?"
+        detector._last_tts_text = tts_text
+        detector._tts_start_time = 1000.0
+
+        heard = "Would you like to hear more? No thanks, tell me about philosophy."
+
+        result = detector._salvage_suffix_from_echo(heard, tts_rate=200, utterance_start_time=1005.0)
+
+        assert result is not None
+        assert "philosophy" in result.lower(), "User speech should be salvaged"
+        assert "would you like" not in result.lower(), "Echo should be removed"
+
+    def test_salvage_returns_none_for_pure_echo(self, detector):
+        """_salvage_suffix_from_echo returns None for pure echo."""
+        tts_text = "The weather is nice today."
+        detector._last_tts_text = tts_text
+        detector._tts_start_time = 1000.0
+
+        # Pure echo, nothing to salvage
+        heard = "The weather is nice today."
+
+        result = detector._salvage_suffix_from_echo(heard, tts_rate=200, utterance_start_time=1005.0)
+
+        # Should return None (nothing salvaged) or original text
+        assert result is None or result == heard
+
+
+class TestRightmostEchoBoundarySalvage:
+    """Field regression: follow-up that starts with a Whisper-mangled echo tail.
+
+    Captured from a real session on 2026-04-20:
+      TTS said:  "The movie Possessor is a psychological thriller that
+                  explores themes of surveillance and identity."
+      User said: "Who made it?"
+      Whisper heard: "laws, themes of surveillance and identity. Who made it?"
+
+    The user started speaking inside the 3s follow-up hot window, and
+    Whisper merged the mic-captured echo tail with the real follow-up.
+    Every salvage path in the codebase before this commit either returned
+    the text unchanged (exact-word cleanup — fails because 'laws' doesn't
+    match 'explores') or truncated the salvage to just 'made it?' (fuzzy
+    prefix iteration picks the SHORTEST suffix first). Both are wrong:
+    the whole follow-up — 'Who made it?' — must survive so the intent
+    judge can dispatch it.
+    """
+
+    @pytest.fixture
+    def detector_with_tts(self):
+        import time as _time
+        d = EchoDetector()
+        tts = (
+            "The movie Possessor is a psychological thriller that "
+            "explores themes of surveillance and identity."
+        )
+        now = _time.time()
+        d._last_tts_text = tts
+        d._tts_start_time = now - 10.0
+        d._last_tts_finish_time = now - 1.0
+        d._tts_exact_duration = 9.0
+        return d, now
+
+    def test_salvages_full_follow_up_after_whisper_mangled_echo_prefix(self, detector_with_tts):
+        detector, now = detector_with_tts
+        heard = "laws, themes of surveillance and identity.  Who made it?"
+
+        result = detector.salvage_after_echo_tail(heard)
+
+        assert result is not None, "expected a salvage, got None (rejection)"
+        lowered = result.lower()
+        # All three words of the real follow-up must survive the salvage.
+        assert "who" in lowered
+        assert "made" in lowered
+        assert "it" in lowered
+        # None of the echo-tail filler should leak through.
+        assert "surveillance" not in lowered
+        assert "identity" not in lowered
+        assert "themes" not in lowered
+        assert "laws" not in lowered
+
+    def test_returns_none_when_heard_is_pure_echo(self, detector_with_tts):
+        detector, _now = detector_with_tts
+        heard = "themes of surveillance and identity"
+        # Nothing non-echo after the tail — nothing to salvage.
+        result = detector.salvage_after_echo_tail(heard)
+        assert result is None
+
+    def test_returns_none_when_heard_shares_nothing_with_tts(self, detector_with_tts):
+        detector, _now = detector_with_tts
+        heard = "what is the weather tomorrow in London"
+        # No echo prefix at all — no salvage needed; caller keeps the text as-is.
+        result = detector.salvage_after_echo_tail(heard)
+        assert result is None