Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
This commit is contained in:
170
tests/test_short_query_echo.py
Normal file
170
tests/test_short_query_echo.py
Normal file
@@ -0,0 +1,170 @@
|
||||
"""
|
||||
Test that short legitimate queries are not incorrectly rejected as echo.
|
||||
|
||||
The hot window echo detection uses length-aware processing:
|
||||
- Short queries (<=4 words): Skip fast rejection entirely, let intent judge handle
|
||||
- Longer queries (>4 words): Use threshold 70 for fast rejection
|
||||
|
||||
This prevents false positives on "tell me more", "how", "weather" etc.
|
||||
while still catching actual partial echoes from long TTS responses.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from jarvis.listening.echo_detection import EchoDetector
|
||||
|
||||
|
||||
class TestShortQueryBehavior:
|
||||
"""Test that short queries are handled appropriately.
|
||||
|
||||
The fast echo rejection path is SKIPPED for queries <=4 words.
|
||||
These tests verify the thresholds that WOULD apply if used,
|
||||
demonstrating why we skip fast rejection for short queries.
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def detector(self):
|
||||
return EchoDetector()
|
||||
|
||||
@pytest.fixture
|
||||
def weather_tts(self):
|
||||
return (
|
||||
"The weather in London is currently overcast with light rain "
|
||||
"showers and the temperature is around 8 degrees celsius. "
|
||||
"Would you like me to provide more details?"
|
||||
)
|
||||
|
||||
def test_partial_ratio_matches_substrings_falsely(self, detector, weather_tts):
|
||||
"""Demonstrate why we skip fast rejection for short queries.
|
||||
|
||||
partial_ratio finds substrings, causing false positives:
|
||||
- 'how' matches 's**how**ers' with 100%
|
||||
- 'weather' matches exactly with 100%
|
||||
- 'more details' matches exactly with 100%
|
||||
|
||||
This is why queries <=4 words skip fast rejection.
|
||||
"""
|
||||
# These short queries would be incorrectly rejected at any reasonable threshold
|
||||
false_positive_queries = [
|
||||
"how", # Substring of 'showers'
|
||||
"weather", # Exact word match
|
||||
"more details", # Exact phrase match
|
||||
"light rain", # Exact phrase match
|
||||
"the", # Common word
|
||||
]
|
||||
|
||||
for query in false_positive_queries:
|
||||
# These all get high scores from partial_ratio
|
||||
result = detector._check_text_similarity(query, weather_tts, threshold=85)
|
||||
# We're demonstrating these WOULD be rejected, which is why we skip them
|
||||
assert result is True, f"'{query}' should match at threshold 85 (demonstrating the problem)"
|
||||
|
||||
def test_legitimate_short_queries_pass_intent_judge(self, detector, weather_tts):
|
||||
"""Short queries that don't match TTS should be accepted by intent judge.
|
||||
|
||||
These queries have low similarity scores and would pass even with fast rejection,
|
||||
but they still go through intent judge for proper context-aware handling.
|
||||
"""
|
||||
legitimate_queries = [
|
||||
"yes",
|
||||
"no",
|
||||
"what about tomorrow",
|
||||
"sounds good",
|
||||
"thanks",
|
||||
]
|
||||
|
||||
for query in legitimate_queries:
|
||||
# Verify these have low similarity - would pass fast rejection if applied
|
||||
result = detector._check_text_similarity(query, weather_tts, threshold=85)
|
||||
assert result is False, f"'{query}' has low similarity as expected"
|
||||
|
||||
|
||||
class TestLongerEchoDetection:
|
||||
"""Test that longer echoes (>4 words) are detected."""
|
||||
|
||||
@pytest.fixture
|
||||
def detector(self):
|
||||
return EchoDetector()
|
||||
|
||||
@pytest.fixture
|
||||
def weather_tts(self):
|
||||
return (
|
||||
"The weather in London is currently overcast with light rain "
|
||||
"showers and the temperature is around 8 degrees celsius. "
|
||||
"Would you like me to provide more details?"
|
||||
)
|
||||
|
||||
def test_longer_echo_detected_at_threshold_70(self, detector, weather_tts):
|
||||
"""Longer queries (>4 words) that match TTS should be detected at threshold 70."""
|
||||
actual_echoes = [
|
||||
"the weather in london is currently overcast", # 7 words
|
||||
"light rain showers and the temperature is around", # 8 words
|
||||
"would you like me to provide more details", # 8 words
|
||||
]
|
||||
|
||||
for echo in actual_echoes:
|
||||
word_count = len(echo.split())
|
||||
assert word_count > 4, f"Test setup error: '{echo}' has only {word_count} words"
|
||||
result = detector._check_text_similarity(echo, weather_tts, threshold=70)
|
||||
assert result is True, f"Echo '{echo[:30]}...' ({word_count} words) should be detected at threshold 70"
|
||||
|
||||
def test_partial_echo_with_transcription_errors(self, detector):
|
||||
"""Longer partial echoes with transcription errors should be detected."""
|
||||
tts = (
|
||||
"The temperature is around 8 degrees celsius at 18:48 UTC. "
|
||||
"Would you like me to provide more weather information?"
|
||||
)
|
||||
detector.track_tts_start(tts)
|
||||
|
||||
# Whisper transcription with errors (common in high-volume rooms)
|
||||
echo_with_errors = "the temperature is around 8 degrees celsius at 1848 UTC" # 10 words
|
||||
|
||||
# This should be detected at threshold 70
|
||||
result = detector._check_text_similarity(echo_with_errors, tts, threshold=70)
|
||||
assert result is True, "Partial echo with transcription errors should be detected"
|
||||
|
||||
def test_longer_followups_not_rejected(self, detector, weather_tts):
|
||||
"""Longer follow-up questions (>4 words) should NOT match TTS."""
|
||||
long_followups = [
|
||||
"what will the weather be like tomorrow", # 7 words
|
||||
"should i bring an umbrella with me today", # 8 words
|
||||
"thanks jarvis that was very helpful information", # 7 words
|
||||
"can you tell me about the weekend forecast", # 8 words
|
||||
]
|
||||
|
||||
for query in long_followups:
|
||||
word_count = len(query.split())
|
||||
assert word_count > 4, f"Test setup error: '{query}' has only {word_count} words"
|
||||
result = detector._check_text_similarity(query, weather_tts, threshold=70)
|
||||
assert result is False, f"Follow-up '{query}' should not be rejected at threshold 70"
|
||||
|
||||
|
||||
class TestLengthBoundary:
|
||||
"""Test behavior at the 4-word boundary."""
|
||||
|
||||
@pytest.fixture
|
||||
def detector(self):
|
||||
return EchoDetector()
|
||||
|
||||
def test_four_word_query_skips_fast_rejection(self, detector):
|
||||
"""4-word queries skip fast rejection (handled by intent judge)."""
|
||||
# This is a design decision, not an assertion about similarity
|
||||
query = "tell me more please" # 4 words
|
||||
assert len(query.split()) == 4
|
||||
|
||||
def test_five_word_query_uses_fast_rejection(self, detector):
|
||||
"""5-word queries use fast rejection at threshold 70."""
|
||||
tts = "The weather today is nice and sunny in London"
|
||||
query = "the weather today is nice" # 5 words - matches TTS
|
||||
|
||||
assert len(query.split()) == 5
|
||||
result = detector._check_text_similarity(query, tts, threshold=70)
|
||||
assert result is True, "5-word echo should be detected at threshold 70"
|
||||
|
||||
def test_five_word_non_echo_passes(self, detector):
|
||||
"""5-word non-echo queries should pass fast rejection."""
|
||||
tts = "The weather today is nice and sunny in London"
|
||||
query = "what about the rain tomorrow" # 5 words - doesn't match
|
||||
|
||||
assert len(query.split()) == 5
|
||||
result = detector._check_text_similarity(query, tts, threshold=70)
|
||||
assert result is False, "5-word non-echo should pass threshold 70"
|
||||
Reference in New Issue
Block a user