Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
This commit is contained in:
213
tests/test_prompts.py
Normal file
213
tests/test_prompts.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""
|
||||
Unit tests for the prompts module.
|
||||
|
||||
Tests model size detection and prompt component selection.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestModelSizeDetection:
|
||||
"""Tests for detect_model_size function."""
|
||||
|
||||
@pytest.mark.parametrize("model_name,expected_small", [
|
||||
# Small models (should return SMALL)
|
||||
("gemma4", True),
|
||||
("gemma4:e2b", True),
|
||||
("gemma4:e4b", True),
|
||||
("llama3.2:3b", True),
|
||||
("llama3.2:1b", True),
|
||||
("mistral:7b", True),
|
||||
("gemma:7b", True),
|
||||
("phi3:3b", True),
|
||||
("qwen2:7b", True),
|
||||
# Various separators
|
||||
("model-3b-instruct", True),
|
||||
("model_1b_chat", True),
|
||||
# Large models (should return LARGE)
|
||||
("gpt-oss:20b", False),
|
||||
("llama3.1:8b", False),
|
||||
("qwen2.5:14b", False),
|
||||
("gemma2:27b", False),
|
||||
("llama3:70b", False),
|
||||
("mixtral:8x7b", False), # 8x7b is effectively large
|
||||
# Edge cases
|
||||
(None, False), # None defaults to LARGE
|
||||
("", False), # Empty defaults to LARGE
|
||||
("custom-model", False), # No size indicator = LARGE
|
||||
])
|
||||
def test_detect_model_size(self, model_name, expected_small):
|
||||
"""Model size detection works for various model names."""
|
||||
from jarvis.reply.prompts import detect_model_size, ModelSize
|
||||
|
||||
result = detect_model_size(model_name)
|
||||
expected = ModelSize.SMALL if expected_small else ModelSize.LARGE
|
||||
|
||||
assert result == expected, \
|
||||
f"Expected {expected.value} for '{model_name}', got {result.value}"
|
||||
|
||||
|
||||
class TestPromptComponents:
|
||||
"""Tests for get_system_prompts function."""
|
||||
|
||||
def test_small_model_has_tool_constraints(self):
|
||||
"""Small models get explicit tool constraints covering every rule.
|
||||
|
||||
Constraints are phrased language-agnostically (per CLAUDE.md: no
|
||||
hardcoded English greetings / English unit names / etc.), so we
|
||||
assert against BEHAVIOURAL sections, not specific tokens in one
|
||||
language.
|
||||
"""
|
||||
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
||||
|
||||
prompts = get_system_prompts(ModelSize.SMALL)
|
||||
|
||||
assert prompts.tool_constraints is not None
|
||||
text = prompts.tool_constraints.lower()
|
||||
# Each section header must be present — they structure the rules.
|
||||
for section in (
|
||||
"greeting handling",
|
||||
"user instructions",
|
||||
"unknown named entities",
|
||||
"arguments the tool can auto-derive",
|
||||
):
|
||||
assert section in text, f"Missing section {section!r} in small-model constraints"
|
||||
|
||||
def test_large_model_has_tool_constraints(self):
|
||||
"""Large models also get constraints — a shorter restatement of the
|
||||
named-entity and auto-derive rules. gpt-oss:20b and similar
|
||||
confabulate specifics and occasionally ask for tool args the tool
|
||||
already auto-derives, so the large variant is not a no-op."""
|
||||
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
||||
|
||||
prompts = get_system_prompts(ModelSize.LARGE)
|
||||
|
||||
assert prompts.tool_constraints is not None
|
||||
text = prompts.tool_constraints.lower()
|
||||
assert "unknown named entities" in text
|
||||
assert "arguments the tool can auto-derive" in text
|
||||
|
||||
def test_small_model_balanced_incentives(self):
|
||||
"""Small models get balanced tool incentives - use tools but not for greetings."""
|
||||
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
||||
|
||||
prompts = get_system_prompts(ModelSize.SMALL)
|
||||
|
||||
# Should encourage tool use for legitimate cases
|
||||
assert "use tools" in prompts.tool_incentives.lower()
|
||||
# But mention greetings specifically
|
||||
assert "greeting" in prompts.tool_incentives.lower()
|
||||
|
||||
def test_large_model_proactive_incentives(self):
|
||||
"""Large models get proactive tool incentives."""
|
||||
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
||||
|
||||
prompts = get_system_prompts(ModelSize.LARGE)
|
||||
|
||||
# Should encourage proactive tool use
|
||||
assert "proactively" in prompts.tool_incentives.lower()
|
||||
|
||||
def test_both_sizes_have_core_components(self):
|
||||
"""Both model sizes have the core prompt components."""
|
||||
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
||||
|
||||
for size in [ModelSize.SMALL, ModelSize.LARGE]:
|
||||
prompts = get_system_prompts(size)
|
||||
|
||||
# All core components should be present
|
||||
assert prompts.asr_note, f"{size.value} missing asr_note"
|
||||
assert prompts.inference_guidance, f"{size.value} missing inference_guidance"
|
||||
assert prompts.tool_incentives, f"{size.value} missing tool_incentives"
|
||||
assert prompts.voice_style, f"{size.value} missing voice_style"
|
||||
assert prompts.tool_guidance, f"{size.value} missing tool_guidance"
|
||||
|
||||
def test_to_list_returns_non_empty_strings(self):
|
||||
"""to_list() returns only non-empty prompt strings."""
|
||||
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
||||
|
||||
for size in [ModelSize.SMALL, ModelSize.LARGE]:
|
||||
prompts = get_system_prompts(size)
|
||||
prompt_list = prompts.to_list()
|
||||
|
||||
assert len(prompt_list) >= 5, f"{size.value} should have at least 5 components"
|
||||
assert all(isinstance(p, str) and p for p in prompt_list), \
|
||||
f"{size.value} has empty or non-string components"
|
||||
|
||||
def test_small_model_to_list_includes_constraints(self):
|
||||
"""Small model to_list() includes tool constraints."""
|
||||
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
||||
|
||||
prompts = get_system_prompts(ModelSize.SMALL)
|
||||
prompt_list = prompts.to_list()
|
||||
|
||||
# Should have more items due to tool_constraints
|
||||
assert len(prompt_list) == 6
|
||||
|
||||
# Tool constraints should be in the list (greeting handling)
|
||||
has_constraints = any("greeting" in p.lower() for p in prompt_list)
|
||||
assert has_constraints, "Small model should include greeting constraints"
|
||||
|
||||
def test_large_model_to_list_includes_constraints(self):
|
||||
"""Large model to_list() now includes tool constraints too. The large
|
||||
variant covers the named-entity and auto-derive rules — without it,
|
||||
larger models confabulate for unfamiliar entities or nag the user
|
||||
for args the tool already auto-derives (field failure 2026-04-20).
|
||||
"""
|
||||
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
||||
|
||||
prompts = get_system_prompts(ModelSize.LARGE)
|
||||
prompt_list = prompts.to_list()
|
||||
|
||||
# Both sizes now carry all 6 components.
|
||||
assert len(prompt_list) == 6
|
||||
|
||||
has_named_entity_rule = any("UNKNOWN NAMED ENTITIES" in p for p in prompt_list)
|
||||
assert has_named_entity_rule, "Large model should include the named-entity rule"
|
||||
has_auto_derive_rule = any("AUTO-DERIVE" in p for p in prompt_list)
|
||||
assert has_auto_derive_rule, "Large model should include the auto-derive rule"
|
||||
|
||||
|
||||
class TestPromptLanguageAgnosticism:
|
||||
"""Tests that prompts are language-agnostic."""
|
||||
|
||||
def test_greeting_rule_is_language_agnostic(self):
|
||||
"""Greeting handling must NOT list language-specific greeting tokens.
|
||||
|
||||
CLAUDE.md forbids hardcoded language patterns — the assistant
|
||||
supports arbitrary languages, and listing 'hello' / 'ni hao' /
|
||||
'bonjour' both biases the model toward those languages and gives a
|
||||
false sense of coverage. The new rule describes the SEMANTIC
|
||||
category ("a greeting or casual social phrase, whatever language"),
|
||||
letting the model rely on its own multilingual understanding."""
|
||||
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
||||
|
||||
prompts = get_system_prompts(ModelSize.SMALL)
|
||||
constraints = prompts.tool_constraints.lower()
|
||||
|
||||
# The section itself must be present.
|
||||
assert "greeting handling" in constraints
|
||||
|
||||
# None of the old English-biased greeting tokens should be hard-coded
|
||||
# into the prompt any more.
|
||||
for token in ("ni hao", "bonjour", "hola", "merhaba", "ciao"):
|
||||
assert token not in constraints, (
|
||||
f"Stale language-specific token {token!r} is still hardcoded in "
|
||||
"the constraints — the rule should describe the category, not "
|
||||
"enumerate language-specific surface forms."
|
||||
)
|
||||
|
||||
# The language-agnostic phrasing must be present.
|
||||
assert "whatever language" in constraints or "any language" in constraints
|
||||
|
||||
def test_greeting_constraint_is_narrow(self):
|
||||
"""Greeting constraint is narrowly scoped, not overly restrictive."""
|
||||
from jarvis.reply.prompts import get_system_prompts, ModelSize
|
||||
|
||||
prompts = get_system_prompts(ModelSize.SMALL)
|
||||
constraints = prompts.tool_constraints.lower()
|
||||
|
||||
# Should mention greetings specifically
|
||||
assert "greeting" in constraints
|
||||
# Should NOT have overly broad restrictions like "ONLY use tools when explicitly asked"
|
||||
# (This would hurt legitimate tool use for news, weather, etc.)
|
||||
assert "only when explicitly" not in constraints
|
||||
Reference in New Issue
Block a user