Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
214 lines
9.0 KiB
Python
214 lines
9.0 KiB
Python
"""
|
|
Unit tests for the prompts module.
|
|
|
|
Tests model size detection and prompt component selection.
|
|
"""
|
|
|
|
import pytest
|
|
|
|
|
|
class TestModelSizeDetection:
|
|
"""Tests for detect_model_size function."""
|
|
|
|
@pytest.mark.parametrize("model_name,expected_small", [
|
|
# Small models (should return SMALL)
|
|
("gemma4", True),
|
|
("gemma4:e2b", True),
|
|
("gemma4:e4b", True),
|
|
("llama3.2:3b", True),
|
|
("llama3.2:1b", True),
|
|
("mistral:7b", True),
|
|
("gemma:7b", True),
|
|
("phi3:3b", True),
|
|
("qwen2:7b", True),
|
|
# Various separators
|
|
("model-3b-instruct", True),
|
|
("model_1b_chat", True),
|
|
# Large models (should return LARGE)
|
|
("gpt-oss:20b", False),
|
|
("llama3.1:8b", False),
|
|
("qwen2.5:14b", False),
|
|
("gemma2:27b", False),
|
|
("llama3:70b", False),
|
|
("mixtral:8x7b", False), # 8x7b is effectively large
|
|
# Edge cases
|
|
(None, False), # None defaults to LARGE
|
|
("", False), # Empty defaults to LARGE
|
|
("custom-model", False), # No size indicator = LARGE
|
|
])
|
|
def test_detect_model_size(self, model_name, expected_small):
|
|
"""Model size detection works for various model names."""
|
|
from jarvis.reply.prompts import detect_model_size, ModelSize
|
|
|
|
result = detect_model_size(model_name)
|
|
expected = ModelSize.SMALL if expected_small else ModelSize.LARGE
|
|
|
|
assert result == expected, \
|
|
f"Expected {expected.value} for '{model_name}', got {result.value}"
|
|
|
|
|
|
class TestPromptComponents:
|
|
"""Tests for get_system_prompts function."""
|
|
|
|
def test_small_model_has_tool_constraints(self):
|
|
"""Small models get explicit tool constraints covering every rule.
|
|
|
|
Constraints are phrased language-agnostically (per CLAUDE.md: no
|
|
hardcoded English greetings / English unit names / etc.), so we
|
|
assert against BEHAVIOURAL sections, not specific tokens in one
|
|
language.
|
|
"""
|
|
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
|
|
|
prompts = get_system_prompts(ModelSize.SMALL)
|
|
|
|
assert prompts.tool_constraints is not None
|
|
text = prompts.tool_constraints.lower()
|
|
# Each section header must be present — they structure the rules.
|
|
for section in (
|
|
"greeting handling",
|
|
"user instructions",
|
|
"unknown named entities",
|
|
"arguments the tool can auto-derive",
|
|
):
|
|
assert section in text, f"Missing section {section!r} in small-model constraints"
|
|
|
|
def test_large_model_has_tool_constraints(self):
|
|
"""Large models also get constraints — a shorter restatement of the
|
|
named-entity and auto-derive rules. gpt-oss:20b and similar
|
|
confabulate specifics and occasionally ask for tool args the tool
|
|
already auto-derives, so the large variant is not a no-op."""
|
|
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
|
|
|
prompts = get_system_prompts(ModelSize.LARGE)
|
|
|
|
assert prompts.tool_constraints is not None
|
|
text = prompts.tool_constraints.lower()
|
|
assert "unknown named entities" in text
|
|
assert "arguments the tool can auto-derive" in text
|
|
|
|
def test_small_model_balanced_incentives(self):
|
|
"""Small models get balanced tool incentives - use tools but not for greetings."""
|
|
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
|
|
|
prompts = get_system_prompts(ModelSize.SMALL)
|
|
|
|
# Should encourage tool use for legitimate cases
|
|
assert "use tools" in prompts.tool_incentives.lower()
|
|
# But mention greetings specifically
|
|
assert "greeting" in prompts.tool_incentives.lower()
|
|
|
|
def test_large_model_proactive_incentives(self):
|
|
"""Large models get proactive tool incentives."""
|
|
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
|
|
|
prompts = get_system_prompts(ModelSize.LARGE)
|
|
|
|
# Should encourage proactive tool use
|
|
assert "proactively" in prompts.tool_incentives.lower()
|
|
|
|
def test_both_sizes_have_core_components(self):
|
|
"""Both model sizes have the core prompt components."""
|
|
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
|
|
|
for size in [ModelSize.SMALL, ModelSize.LARGE]:
|
|
prompts = get_system_prompts(size)
|
|
|
|
# All core components should be present
|
|
assert prompts.asr_note, f"{size.value} missing asr_note"
|
|
assert prompts.inference_guidance, f"{size.value} missing inference_guidance"
|
|
assert prompts.tool_incentives, f"{size.value} missing tool_incentives"
|
|
assert prompts.voice_style, f"{size.value} missing voice_style"
|
|
assert prompts.tool_guidance, f"{size.value} missing tool_guidance"
|
|
|
|
def test_to_list_returns_non_empty_strings(self):
|
|
"""to_list() returns only non-empty prompt strings."""
|
|
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
|
|
|
for size in [ModelSize.SMALL, ModelSize.LARGE]:
|
|
prompts = get_system_prompts(size)
|
|
prompt_list = prompts.to_list()
|
|
|
|
assert len(prompt_list) >= 5, f"{size.value} should have at least 5 components"
|
|
assert all(isinstance(p, str) and p for p in prompt_list), \
|
|
f"{size.value} has empty or non-string components"
|
|
|
|
def test_small_model_to_list_includes_constraints(self):
|
|
"""Small model to_list() includes tool constraints."""
|
|
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
|
|
|
prompts = get_system_prompts(ModelSize.SMALL)
|
|
prompt_list = prompts.to_list()
|
|
|
|
# Should have more items due to tool_constraints
|
|
assert len(prompt_list) == 6
|
|
|
|
# Tool constraints should be in the list (greeting handling)
|
|
has_constraints = any("greeting" in p.lower() for p in prompt_list)
|
|
assert has_constraints, "Small model should include greeting constraints"
|
|
|
|
def test_large_model_to_list_includes_constraints(self):
|
|
"""Large model to_list() now includes tool constraints too. The large
|
|
variant covers the named-entity and auto-derive rules — without it,
|
|
larger models confabulate for unfamiliar entities or nag the user
|
|
for args the tool already auto-derives (field failure 2026-04-20).
|
|
"""
|
|
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
|
|
|
prompts = get_system_prompts(ModelSize.LARGE)
|
|
prompt_list = prompts.to_list()
|
|
|
|
# Both sizes now carry all 6 components.
|
|
assert len(prompt_list) == 6
|
|
|
|
has_named_entity_rule = any("UNKNOWN NAMED ENTITIES" in p for p in prompt_list)
|
|
assert has_named_entity_rule, "Large model should include the named-entity rule"
|
|
has_auto_derive_rule = any("AUTO-DERIVE" in p for p in prompt_list)
|
|
assert has_auto_derive_rule, "Large model should include the auto-derive rule"
|
|
|
|
|
|
class TestPromptLanguageAgnosticism:
|
|
"""Tests that prompts are language-agnostic."""
|
|
|
|
def test_greeting_rule_is_language_agnostic(self):
|
|
"""Greeting handling must NOT list language-specific greeting tokens.
|
|
|
|
CLAUDE.md forbids hardcoded language patterns — the assistant
|
|
supports arbitrary languages, and listing 'hello' / 'ni hao' /
|
|
'bonjour' both biases the model toward those languages and gives a
|
|
false sense of coverage. The new rule describes the SEMANTIC
|
|
category ("a greeting or casual social phrase, whatever language"),
|
|
letting the model rely on its own multilingual understanding."""
|
|
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
|
|
|
prompts = get_system_prompts(ModelSize.SMALL)
|
|
constraints = prompts.tool_constraints.lower()
|
|
|
|
# The section itself must be present.
|
|
assert "greeting handling" in constraints
|
|
|
|
# None of the old English-biased greeting tokens should be hard-coded
|
|
# into the prompt any more.
|
|
for token in ("ni hao", "bonjour", "hola", "merhaba", "ciao"):
|
|
assert token not in constraints, (
|
|
f"Stale language-specific token {token!r} is still hardcoded in "
|
|
"the constraints — the rule should describe the category, not "
|
|
"enumerate language-specific surface forms."
|
|
)
|
|
|
|
# The language-agnostic phrasing must be present.
|
|
assert "whatever language" in constraints or "any language" in constraints
|
|
|
|
def test_greeting_constraint_is_narrow(self):
|
|
"""Greeting constraint is narrowly scoped, not overly restrictive."""
|
|
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
|
|
|
prompts = get_system_prompts(ModelSize.SMALL)
|
|
constraints = prompts.tool_constraints.lower()
|
|
|
|
# Should mention greetings specifically
|
|
assert "greeting" in constraints
|
|
# Should NOT have overly broad restrictions like "ONLY use tools when explicitly asked"
|
|
# (This would hurt legitimate tool use for news, weather, etc.)
|
|
assert "only when explicitly" not in constraints
|