Files
javis_bot/tests/test_prompts.py
javis-bot c4abf63f38
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Transform isair/jarvis into a Discord-controlled voice assistant running on
the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact.

- bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral),
  voice channel join + voice receive/playback, pluggable VNC screen broadcast
  (selfbot live / noVNC / screenshot)
- bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS
  behind a thin localhost HTTP API
- .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite,
  docs/language-comparison.md and docs/vnc-xfce-setup.md

Language decision: hybrid (Python brain + Node/bun Discord layer) because
Discord blocks bot video; native screen broadcast only works via a Node
selfbot library.
2026-06-09 14:51:05 +09:00

214 lines
9.0 KiB
Python

"""
Unit tests for the prompts module.
Tests model size detection and prompt component selection.
"""
import pytest
class TestModelSizeDetection:
"""Tests for detect_model_size function."""
@pytest.mark.parametrize("model_name,expected_small", [
# Small models (should return SMALL)
("gemma4", True),
("gemma4:e2b", True),
("gemma4:e4b", True),
("llama3.2:3b", True),
("llama3.2:1b", True),
("mistral:7b", True),
("gemma:7b", True),
("phi3:3b", True),
("qwen2:7b", True),
# Various separators
("model-3b-instruct", True),
("model_1b_chat", True),
# Large models (should return LARGE)
("gpt-oss:20b", False),
("llama3.1:8b", False),
("qwen2.5:14b", False),
("gemma2:27b", False),
("llama3:70b", False),
("mixtral:8x7b", False), # 8x7b is effectively large
# Edge cases
(None, False), # None defaults to LARGE
("", False), # Empty defaults to LARGE
("custom-model", False), # No size indicator = LARGE
])
def test_detect_model_size(self, model_name, expected_small):
"""Model size detection works for various model names."""
from jarvis.reply.prompts import detect_model_size, ModelSize
result = detect_model_size(model_name)
expected = ModelSize.SMALL if expected_small else ModelSize.LARGE
assert result == expected, \
f"Expected {expected.value} for '{model_name}', got {result.value}"
class TestPromptComponents:
"""Tests for get_system_prompts function."""
def test_small_model_has_tool_constraints(self):
"""Small models get explicit tool constraints covering every rule.
Constraints are phrased language-agnostically (per CLAUDE.md: no
hardcoded English greetings / English unit names / etc.), so we
assert against BEHAVIOURAL sections, not specific tokens in one
language.
"""
from jarvis.reply.prompts import get_system_prompts, ModelSize
prompts = get_system_prompts(ModelSize.SMALL)
assert prompts.tool_constraints is not None
text = prompts.tool_constraints.lower()
# Each section header must be present — they structure the rules.
for section in (
"greeting handling",
"user instructions",
"unknown named entities",
"arguments the tool can auto-derive",
):
assert section in text, f"Missing section {section!r} in small-model constraints"
def test_large_model_has_tool_constraints(self):
"""Large models also get constraints — a shorter restatement of the
named-entity and auto-derive rules. gpt-oss:20b and similar
confabulate specifics and occasionally ask for tool args the tool
already auto-derives, so the large variant is not a no-op."""
from jarvis.reply.prompts import get_system_prompts, ModelSize
prompts = get_system_prompts(ModelSize.LARGE)
assert prompts.tool_constraints is not None
text = prompts.tool_constraints.lower()
assert "unknown named entities" in text
assert "arguments the tool can auto-derive" in text
def test_small_model_balanced_incentives(self):
"""Small models get balanced tool incentives - use tools but not for greetings."""
from jarvis.reply.prompts import get_system_prompts, ModelSize
prompts = get_system_prompts(ModelSize.SMALL)
# Should encourage tool use for legitimate cases
assert "use tools" in prompts.tool_incentives.lower()
# But mention greetings specifically
assert "greeting" in prompts.tool_incentives.lower()
def test_large_model_proactive_incentives(self):
"""Large models get proactive tool incentives."""
from jarvis.reply.prompts import get_system_prompts, ModelSize
prompts = get_system_prompts(ModelSize.LARGE)
# Should encourage proactive tool use
assert "proactively" in prompts.tool_incentives.lower()
def test_both_sizes_have_core_components(self):
"""Both model sizes have the core prompt components."""
from jarvis.reply.prompts import get_system_prompts, ModelSize
for size in [ModelSize.SMALL, ModelSize.LARGE]:
prompts = get_system_prompts(size)
# All core components should be present
assert prompts.asr_note, f"{size.value} missing asr_note"
assert prompts.inference_guidance, f"{size.value} missing inference_guidance"
assert prompts.tool_incentives, f"{size.value} missing tool_incentives"
assert prompts.voice_style, f"{size.value} missing voice_style"
assert prompts.tool_guidance, f"{size.value} missing tool_guidance"
def test_to_list_returns_non_empty_strings(self):
"""to_list() returns only non-empty prompt strings."""
from jarvis.reply.prompts import get_system_prompts, ModelSize
for size in [ModelSize.SMALL, ModelSize.LARGE]:
prompts = get_system_prompts(size)
prompt_list = prompts.to_list()
assert len(prompt_list) >= 5, f"{size.value} should have at least 5 components"
assert all(isinstance(p, str) and p for p in prompt_list), \
f"{size.value} has empty or non-string components"
def test_small_model_to_list_includes_constraints(self):
"""Small model to_list() includes tool constraints."""
from jarvis.reply.prompts import get_system_prompts, ModelSize
prompts = get_system_prompts(ModelSize.SMALL)
prompt_list = prompts.to_list()
# Should have more items due to tool_constraints
assert len(prompt_list) == 6
# Tool constraints should be in the list (greeting handling)
has_constraints = any("greeting" in p.lower() for p in prompt_list)
assert has_constraints, "Small model should include greeting constraints"
def test_large_model_to_list_includes_constraints(self):
"""Large model to_list() now includes tool constraints too. The large
variant covers the named-entity and auto-derive rules — without it,
larger models confabulate for unfamiliar entities or nag the user
for args the tool already auto-derives (field failure 2026-04-20).
"""
from jarvis.reply.prompts import get_system_prompts, ModelSize
prompts = get_system_prompts(ModelSize.LARGE)
prompt_list = prompts.to_list()
# Both sizes now carry all 6 components.
assert len(prompt_list) == 6
has_named_entity_rule = any("UNKNOWN NAMED ENTITIES" in p for p in prompt_list)
assert has_named_entity_rule, "Large model should include the named-entity rule"
has_auto_derive_rule = any("AUTO-DERIVE" in p for p in prompt_list)
assert has_auto_derive_rule, "Large model should include the auto-derive rule"
class TestPromptLanguageAgnosticism:
"""Tests that prompts are language-agnostic."""
def test_greeting_rule_is_language_agnostic(self):
"""Greeting handling must NOT list language-specific greeting tokens.
CLAUDE.md forbids hardcoded language patterns — the assistant
supports arbitrary languages, and listing 'hello' / 'ni hao' /
'bonjour' both biases the model toward those languages and gives a
false sense of coverage. The new rule describes the SEMANTIC
category ("a greeting or casual social phrase, whatever language"),
letting the model rely on its own multilingual understanding."""
from jarvis.reply.prompts import get_system_prompts, ModelSize
prompts = get_system_prompts(ModelSize.SMALL)
constraints = prompts.tool_constraints.lower()
# The section itself must be present.
assert "greeting handling" in constraints
# None of the old English-biased greeting tokens should be hard-coded
# into the prompt any more.
for token in ("ni hao", "bonjour", "hola", "merhaba", "ciao"):
assert token not in constraints, (
f"Stale language-specific token {token!r} is still hardcoded in "
"the constraints — the rule should describe the category, not "
"enumerate language-specific surface forms."
)
# The language-agnostic phrasing must be present.
assert "whatever language" in constraints or "any language" in constraints
def test_greeting_constraint_is_narrow(self):
"""Greeting constraint is narrowly scoped, not overly restrictive."""
from jarvis.reply.prompts import get_system_prompts, ModelSize
prompts = get_system_prompts(ModelSize.SMALL)
constraints = prompts.tool_constraints.lower()
# Should mention greetings specifically
assert "greeting" in constraints
# Should NOT have overly broad restrictions like "ONLY use tools when explicitly asked"
# (This would hurt legitimate tool use for news, weather, etc.)
assert "only when explicitly" not in constraints