Add Discord-native hybrid front-end for Jarvis (bot + bridge)

Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions
--- a/tests/test_prompts.py
+++ b/tests/test_prompts.py
@@ -0,0 +1,213 @@
+"""
+Unit tests for the prompts module.
+
+Tests model size detection and prompt component selection.
+"""
+
+import pytest
+
+
+class TestModelSizeDetection:
+    """Tests for detect_model_size function."""
+
+    @pytest.mark.parametrize("model_name,expected_small", [
+        # Small models (should return SMALL)
+        ("gemma4", True),
+        ("gemma4:e2b", True),
+        ("gemma4:e4b", True),
+        ("llama3.2:3b", True),
+        ("llama3.2:1b", True),
+        ("mistral:7b", True),
+        ("gemma:7b", True),
+        ("phi3:3b", True),
+        ("qwen2:7b", True),
+        # Various separators
+        ("model-3b-instruct", True),
+        ("model_1b_chat", True),
+        # Large models (should return LARGE)
+        ("gpt-oss:20b", False),
+        ("llama3.1:8b", False),
+        ("qwen2.5:14b", False),
+        ("gemma2:27b", False),
+        ("llama3:70b", False),
+        ("mixtral:8x7b", False),  # 8x7b is effectively large
+        # Edge cases
+        (None, False),  # None defaults to LARGE
+        ("", False),    # Empty defaults to LARGE
+        ("custom-model", False),  # No size indicator = LARGE
+    ])
+    def test_detect_model_size(self, model_name, expected_small):
+        """Model size detection works for various model names."""
+        from jarvis.reply.prompts import detect_model_size, ModelSize
+
+        result = detect_model_size(model_name)
+        expected = ModelSize.SMALL if expected_small else ModelSize.LARGE
+
+        assert result == expected, \
+            f"Expected {expected.value} for '{model_name}', got {result.value}"
+
+
+class TestPromptComponents:
+    """Tests for get_system_prompts function."""
+
+    def test_small_model_has_tool_constraints(self):
+        """Small models get explicit tool constraints covering every rule.
+
+        Constraints are phrased language-agnostically (per CLAUDE.md: no
+        hardcoded English greetings / English unit names / etc.), so we
+        assert against BEHAVIOURAL sections, not specific tokens in one
+        language.
+        """
+        from jarvis.reply.prompts import get_system_prompts, ModelSize
+
+        prompts = get_system_prompts(ModelSize.SMALL)
+
+        assert prompts.tool_constraints is not None
+        text = prompts.tool_constraints.lower()
+        # Each section header must be present — they structure the rules.
+        for section in (
+            "greeting handling",
+            "user instructions",
+            "unknown named entities",
+            "arguments the tool can auto-derive",
+        ):
+            assert section in text, f"Missing section {section!r} in small-model constraints"
+
+    def test_large_model_has_tool_constraints(self):
+        """Large models also get constraints — a shorter restatement of the
+        named-entity and auto-derive rules. gpt-oss:20b and similar
+        confabulate specifics and occasionally ask for tool args the tool
+        already auto-derives, so the large variant is not a no-op."""
+        from jarvis.reply.prompts import get_system_prompts, ModelSize
+
+        prompts = get_system_prompts(ModelSize.LARGE)
+
+        assert prompts.tool_constraints is not None
+        text = prompts.tool_constraints.lower()
+        assert "unknown named entities" in text
+        assert "arguments the tool can auto-derive" in text
+
+    def test_small_model_balanced_incentives(self):
+        """Small models get balanced tool incentives - use tools but not for greetings."""
+        from jarvis.reply.prompts import get_system_prompts, ModelSize
+
+        prompts = get_system_prompts(ModelSize.SMALL)
+
+        # Should encourage tool use for legitimate cases
+        assert "use tools" in prompts.tool_incentives.lower()
+        # But mention greetings specifically
+        assert "greeting" in prompts.tool_incentives.lower()
+
+    def test_large_model_proactive_incentives(self):
+        """Large models get proactive tool incentives."""
+        from jarvis.reply.prompts import get_system_prompts, ModelSize
+
+        prompts = get_system_prompts(ModelSize.LARGE)
+
+        # Should encourage proactive tool use
+        assert "proactively" in prompts.tool_incentives.lower()
+
+    def test_both_sizes_have_core_components(self):
+        """Both model sizes have the core prompt components."""
+        from jarvis.reply.prompts import get_system_prompts, ModelSize
+
+        for size in [ModelSize.SMALL, ModelSize.LARGE]:
+            prompts = get_system_prompts(size)
+
+            # All core components should be present
+            assert prompts.asr_note, f"{size.value} missing asr_note"
+            assert prompts.inference_guidance, f"{size.value} missing inference_guidance"
+            assert prompts.tool_incentives, f"{size.value} missing tool_incentives"
+            assert prompts.voice_style, f"{size.value} missing voice_style"
+            assert prompts.tool_guidance, f"{size.value} missing tool_guidance"
+
+    def test_to_list_returns_non_empty_strings(self):
+        """to_list() returns only non-empty prompt strings."""
+        from jarvis.reply.prompts import get_system_prompts, ModelSize
+
+        for size in [ModelSize.SMALL, ModelSize.LARGE]:
+            prompts = get_system_prompts(size)
+            prompt_list = prompts.to_list()
+
+            assert len(prompt_list) >= 5, f"{size.value} should have at least 5 components"
+            assert all(isinstance(p, str) and p for p in prompt_list), \
+                f"{size.value} has empty or non-string components"
+
+    def test_small_model_to_list_includes_constraints(self):
+        """Small model to_list() includes tool constraints."""
+        from jarvis.reply.prompts import get_system_prompts, ModelSize
+
+        prompts = get_system_prompts(ModelSize.SMALL)
+        prompt_list = prompts.to_list()
+
+        # Should have more items due to tool_constraints
+        assert len(prompt_list) == 6
+
+        # Tool constraints should be in the list (greeting handling)
+        has_constraints = any("greeting" in p.lower() for p in prompt_list)
+        assert has_constraints, "Small model should include greeting constraints"
+
+    def test_large_model_to_list_includes_constraints(self):
+        """Large model to_list() now includes tool constraints too. The large
+        variant covers the named-entity and auto-derive rules — without it,
+        larger models confabulate for unfamiliar entities or nag the user
+        for args the tool already auto-derives (field failure 2026-04-20).
+        """
+        from jarvis.reply.prompts import get_system_prompts, ModelSize
+
+        prompts = get_system_prompts(ModelSize.LARGE)
+        prompt_list = prompts.to_list()
+
+        # Both sizes now carry all 6 components.
+        assert len(prompt_list) == 6
+
+        has_named_entity_rule = any("UNKNOWN NAMED ENTITIES" in p for p in prompt_list)
+        assert has_named_entity_rule, "Large model should include the named-entity rule"
+        has_auto_derive_rule = any("AUTO-DERIVE" in p for p in prompt_list)
+        assert has_auto_derive_rule, "Large model should include the auto-derive rule"
+
+
+class TestPromptLanguageAgnosticism:
+    """Tests that prompts are language-agnostic."""
+
+    def test_greeting_rule_is_language_agnostic(self):
+        """Greeting handling must NOT list language-specific greeting tokens.
+
+        CLAUDE.md forbids hardcoded language patterns — the assistant
+        supports arbitrary languages, and listing 'hello' / 'ni hao' /
+        'bonjour' both biases the model toward those languages and gives a
+        false sense of coverage. The new rule describes the SEMANTIC
+        category ("a greeting or casual social phrase, whatever language"),
+        letting the model rely on its own multilingual understanding."""
+        from jarvis.reply.prompts import get_system_prompts, ModelSize
+
+        prompts = get_system_prompts(ModelSize.SMALL)
+        constraints = prompts.tool_constraints.lower()
+
+        # The section itself must be present.
+        assert "greeting handling" in constraints
+
+        # None of the old English-biased greeting tokens should be hard-coded
+        # into the prompt any more.
+        for token in ("ni hao", "bonjour", "hola", "merhaba", "ciao"):
+            assert token not in constraints, (
+                f"Stale language-specific token {token!r} is still hardcoded in "
+                "the constraints — the rule should describe the category, not "
+                "enumerate language-specific surface forms."
+            )
+
+        # The language-agnostic phrasing must be present.
+        assert "whatever language" in constraints or "any language" in constraints
+
+    def test_greeting_constraint_is_narrow(self):
+        """Greeting constraint is narrowly scoped, not overly restrictive."""
+        from jarvis.reply.prompts import get_system_prompts, ModelSize
+
+        prompts = get_system_prompts(ModelSize.SMALL)
+        constraints = prompts.tool_constraints.lower()
+
+        # Should mention greetings specifically
+        assert "greeting" in constraints
+        # Should NOT have overly broad restrictions like "ONLY use tools when explicitly asked"
+        # (This would hurt legitimate tool use for news, weather, etc.)
+        assert "only when explicitly" not in constraints