Files
javis_bot/tests/test_short_query_echo.py
javis-bot c4abf63f38
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Transform isair/jarvis into a Discord-controlled voice assistant running on
the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact.

- bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral),
  voice channel join + voice receive/playback, pluggable VNC screen broadcast
  (selfbot live / noVNC / screenshot)
- bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS
  behind a thin localhost HTTP API
- .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite,
  docs/language-comparison.md and docs/vnc-xfce-setup.md

Language decision: hybrid (Python brain + Node/bun Discord layer) because
Discord blocks bot video; native screen broadcast only works via a Node
selfbot library.
2026-06-09 14:51:05 +09:00

171 lines
7.2 KiB
Python

"""
Test that short legitimate queries are not incorrectly rejected as echo.
The hot window echo detection uses length-aware processing:
- Short queries (<=4 words): Skip fast rejection entirely, let intent judge handle
- Longer queries (>4 words): Use threshold 70 for fast rejection
This prevents false positives on "tell me more", "how", "weather" etc.
while still catching actual partial echoes from long TTS responses.
"""
import pytest
from jarvis.listening.echo_detection import EchoDetector
class TestShortQueryBehavior:
"""Test that short queries are handled appropriately.
The fast echo rejection path is SKIPPED for queries <=4 words.
These tests verify the thresholds that WOULD apply if used,
demonstrating why we skip fast rejection for short queries.
"""
@pytest.fixture
def detector(self):
return EchoDetector()
@pytest.fixture
def weather_tts(self):
return (
"The weather in London is currently overcast with light rain "
"showers and the temperature is around 8 degrees celsius. "
"Would you like me to provide more details?"
)
def test_partial_ratio_matches_substrings_falsely(self, detector, weather_tts):
"""Demonstrate why we skip fast rejection for short queries.
partial_ratio finds substrings, causing false positives:
- 'how' matches 's**how**ers' with 100%
- 'weather' matches exactly with 100%
- 'more details' matches exactly with 100%
This is why queries <=4 words skip fast rejection.
"""
# These short queries would be incorrectly rejected at any reasonable threshold
false_positive_queries = [
"how", # Substring of 'showers'
"weather", # Exact word match
"more details", # Exact phrase match
"light rain", # Exact phrase match
"the", # Common word
]
for query in false_positive_queries:
# These all get high scores from partial_ratio
result = detector._check_text_similarity(query, weather_tts, threshold=85)
# We're demonstrating these WOULD be rejected, which is why we skip them
assert result is True, f"'{query}' should match at threshold 85 (demonstrating the problem)"
def test_legitimate_short_queries_pass_intent_judge(self, detector, weather_tts):
"""Short queries that don't match TTS should be accepted by intent judge.
These queries have low similarity scores and would pass even with fast rejection,
but they still go through intent judge for proper context-aware handling.
"""
legitimate_queries = [
"yes",
"no",
"what about tomorrow",
"sounds good",
"thanks",
]
for query in legitimate_queries:
# Verify these have low similarity - would pass fast rejection if applied
result = detector._check_text_similarity(query, weather_tts, threshold=85)
assert result is False, f"'{query}' has low similarity as expected"
class TestLongerEchoDetection:
"""Test that longer echoes (>4 words) are detected."""
@pytest.fixture
def detector(self):
return EchoDetector()
@pytest.fixture
def weather_tts(self):
return (
"The weather in London is currently overcast with light rain "
"showers and the temperature is around 8 degrees celsius. "
"Would you like me to provide more details?"
)
def test_longer_echo_detected_at_threshold_70(self, detector, weather_tts):
"""Longer queries (>4 words) that match TTS should be detected at threshold 70."""
actual_echoes = [
"the weather in london is currently overcast", # 7 words
"light rain showers and the temperature is around", # 8 words
"would you like me to provide more details", # 8 words
]
for echo in actual_echoes:
word_count = len(echo.split())
assert word_count > 4, f"Test setup error: '{echo}' has only {word_count} words"
result = detector._check_text_similarity(echo, weather_tts, threshold=70)
assert result is True, f"Echo '{echo[:30]}...' ({word_count} words) should be detected at threshold 70"
def test_partial_echo_with_transcription_errors(self, detector):
"""Longer partial echoes with transcription errors should be detected."""
tts = (
"The temperature is around 8 degrees celsius at 18:48 UTC. "
"Would you like me to provide more weather information?"
)
detector.track_tts_start(tts)
# Whisper transcription with errors (common in high-volume rooms)
echo_with_errors = "the temperature is around 8 degrees celsius at 1848 UTC" # 10 words
# This should be detected at threshold 70
result = detector._check_text_similarity(echo_with_errors, tts, threshold=70)
assert result is True, "Partial echo with transcription errors should be detected"
def test_longer_followups_not_rejected(self, detector, weather_tts):
"""Longer follow-up questions (>4 words) should NOT match TTS."""
long_followups = [
"what will the weather be like tomorrow", # 7 words
"should i bring an umbrella with me today", # 8 words
"thanks jarvis that was very helpful information", # 7 words
"can you tell me about the weekend forecast", # 8 words
]
for query in long_followups:
word_count = len(query.split())
assert word_count > 4, f"Test setup error: '{query}' has only {word_count} words"
result = detector._check_text_similarity(query, weather_tts, threshold=70)
assert result is False, f"Follow-up '{query}' should not be rejected at threshold 70"
class TestLengthBoundary:
"""Test behavior at the 4-word boundary."""
@pytest.fixture
def detector(self):
return EchoDetector()
def test_four_word_query_skips_fast_rejection(self, detector):
"""4-word queries skip fast rejection (handled by intent judge)."""
# This is a design decision, not an assertion about similarity
query = "tell me more please" # 4 words
assert len(query.split()) == 4
def test_five_word_query_uses_fast_rejection(self, detector):
"""5-word queries use fast rejection at threshold 70."""
tts = "The weather today is nice and sunny in London"
query = "the weather today is nice" # 5 words - matches TTS
assert len(query.split()) == 5
result = detector._check_text_similarity(query, tts, threshold=70)
assert result is True, "5-word echo should be detected at threshold 70"
def test_five_word_non_echo_passes(self, detector):
"""5-word non-echo queries should pass fast rejection."""
tts = "The weather today is nice and sunny in London"
query = "what about the rain tomorrow" # 5 words - doesn't match
assert len(query.split()) == 5
result = detector._check_text_similarity(query, tts, threshold=70)
assert result is False, "5-word non-echo should pass threshold 70"