Add Discord-native hybrid front-end for Jarvis (bot + bridge)

Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions
--- a/tests/test_short_query_echo.py
+++ b/tests/test_short_query_echo.py
@@ -0,0 +1,170 @@
+"""
+Test that short legitimate queries are not incorrectly rejected as echo.
+
+The hot window echo detection uses length-aware processing:
+- Short queries (<=4 words): Skip fast rejection entirely, let intent judge handle
+- Longer queries (>4 words): Use threshold 70 for fast rejection
+
+This prevents false positives on "tell me more", "how", "weather" etc.
+while still catching actual partial echoes from long TTS responses.
+"""
+
+import pytest
+from jarvis.listening.echo_detection import EchoDetector
+
+
+class TestShortQueryBehavior:
+    """Test that short queries are handled appropriately.
+
+    The fast echo rejection path is SKIPPED for queries <=4 words.
+    These tests verify the thresholds that WOULD apply if used,
+    demonstrating why we skip fast rejection for short queries.
+    """
+
+    @pytest.fixture
+    def detector(self):
+        return EchoDetector()
+
+    @pytest.fixture
+    def weather_tts(self):
+        return (
+            "The weather in London is currently overcast with light rain "
+            "showers and the temperature is around 8 degrees celsius. "
+            "Would you like me to provide more details?"
+        )
+
+    def test_partial_ratio_matches_substrings_falsely(self, detector, weather_tts):
+        """Demonstrate why we skip fast rejection for short queries.
+
+        partial_ratio finds substrings, causing false positives:
+        - 'how' matches 's**how**ers' with 100%
+        - 'weather' matches exactly with 100%
+        - 'more details' matches exactly with 100%
+
+        This is why queries <=4 words skip fast rejection.
+        """
+        # These short queries would be incorrectly rejected at any reasonable threshold
+        false_positive_queries = [
+            "how",           # Substring of 'showers'
+            "weather",       # Exact word match
+            "more details",  # Exact phrase match
+            "light rain",    # Exact phrase match
+            "the",           # Common word
+        ]
+
+        for query in false_positive_queries:
+            # These all get high scores from partial_ratio
+            result = detector._check_text_similarity(query, weather_tts, threshold=85)
+            # We're demonstrating these WOULD be rejected, which is why we skip them
+            assert result is True, f"'{query}' should match at threshold 85 (demonstrating the problem)"
+
+    def test_legitimate_short_queries_pass_intent_judge(self, detector, weather_tts):
+        """Short queries that don't match TTS should be accepted by intent judge.
+
+        These queries have low similarity scores and would pass even with fast rejection,
+        but they still go through intent judge for proper context-aware handling.
+        """
+        legitimate_queries = [
+            "yes",
+            "no",
+            "what about tomorrow",
+            "sounds good",
+            "thanks",
+        ]
+
+        for query in legitimate_queries:
+            # Verify these have low similarity - would pass fast rejection if applied
+            result = detector._check_text_similarity(query, weather_tts, threshold=85)
+            assert result is False, f"'{query}' has low similarity as expected"
+
+
+class TestLongerEchoDetection:
+    """Test that longer echoes (>4 words) are detected."""
+
+    @pytest.fixture
+    def detector(self):
+        return EchoDetector()
+
+    @pytest.fixture
+    def weather_tts(self):
+        return (
+            "The weather in London is currently overcast with light rain "
+            "showers and the temperature is around 8 degrees celsius. "
+            "Would you like me to provide more details?"
+        )
+
+    def test_longer_echo_detected_at_threshold_70(self, detector, weather_tts):
+        """Longer queries (>4 words) that match TTS should be detected at threshold 70."""
+        actual_echoes = [
+            "the weather in london is currently overcast",  # 7 words
+            "light rain showers and the temperature is around",  # 8 words
+            "would you like me to provide more details",  # 8 words
+        ]
+
+        for echo in actual_echoes:
+            word_count = len(echo.split())
+            assert word_count > 4, f"Test setup error: '{echo}' has only {word_count} words"
+            result = detector._check_text_similarity(echo, weather_tts, threshold=70)
+            assert result is True, f"Echo '{echo[:30]}...' ({word_count} words) should be detected at threshold 70"
+
+    def test_partial_echo_with_transcription_errors(self, detector):
+        """Longer partial echoes with transcription errors should be detected."""
+        tts = (
+            "The temperature is around 8 degrees celsius at 18:48 UTC. "
+            "Would you like me to provide more weather information?"
+        )
+        detector.track_tts_start(tts)
+
+        # Whisper transcription with errors (common in high-volume rooms)
+        echo_with_errors = "the temperature is around 8 degrees celsius at 1848 UTC"  # 10 words
+
+        # This should be detected at threshold 70
+        result = detector._check_text_similarity(echo_with_errors, tts, threshold=70)
+        assert result is True, "Partial echo with transcription errors should be detected"
+
+    def test_longer_followups_not_rejected(self, detector, weather_tts):
+        """Longer follow-up questions (>4 words) should NOT match TTS."""
+        long_followups = [
+            "what will the weather be like tomorrow",  # 7 words
+            "should i bring an umbrella with me today",  # 8 words
+            "thanks jarvis that was very helpful information",  # 7 words
+            "can you tell me about the weekend forecast",  # 8 words
+        ]
+
+        for query in long_followups:
+            word_count = len(query.split())
+            assert word_count > 4, f"Test setup error: '{query}' has only {word_count} words"
+            result = detector._check_text_similarity(query, weather_tts, threshold=70)
+            assert result is False, f"Follow-up '{query}' should not be rejected at threshold 70"
+
+
+class TestLengthBoundary:
+    """Test behavior at the 4-word boundary."""
+
+    @pytest.fixture
+    def detector(self):
+        return EchoDetector()
+
+    def test_four_word_query_skips_fast_rejection(self, detector):
+        """4-word queries skip fast rejection (handled by intent judge)."""
+        # This is a design decision, not an assertion about similarity
+        query = "tell me more please"  # 4 words
+        assert len(query.split()) == 4
+
+    def test_five_word_query_uses_fast_rejection(self, detector):
+        """5-word queries use fast rejection at threshold 70."""
+        tts = "The weather today is nice and sunny in London"
+        query = "the weather today is nice"  # 5 words - matches TTS
+
+        assert len(query.split()) == 5
+        result = detector._check_text_similarity(query, tts, threshold=70)
+        assert result is True, "5-word echo should be detected at threshold 70"
+
+    def test_five_word_non_echo_passes(self, detector):
+        """5-word non-echo queries should pass fast rejection."""
+        tts = "The weather today is nice and sunny in London"
+        query = "what about the rain tomorrow"  # 5 words - doesn't match
+
+        assert len(query.split()) == 5
+        result = detector._check_text_similarity(query, tts, threshold=70)
+        assert result is False, "5-word non-echo should pass threshold 70"