Add Discord-native hybrid front-end for Jarvis (bot + bridge)

Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions
--- a/evals/test_planner_personalisation.py
+++ b/evals/test_planner_personalisation.py
@@ -0,0 +1,124 @@
+"""
+Planner — Personalisation Detection (Live)
+
+Guards that the task-list planner emits a ``searchMemory`` directive as
+the first step for queries that implicitly depend on the user's own
+interests, tastes, or history — even when the user did not use the word
+"preference" or "history" in the query.
+
+Motivating field incident (2026-04-24):
+  User asked "Tell me some news that might interest me, Jarvis." The
+  planner emitted ``webSearch query='current news'`` with no
+  ``searchMemory`` step, so the engine skipped memory enrichment and the
+  reply was a generic BBC front-page summary with no personalisation.
+
+The planner's rule 2 already lists "preferences" as a trigger, but
+gemma4:e2b doesn't pattern-match phrases like "interest me", "suggest
+something for me", "what should I…" onto that category without concrete
+examples. This eval asserts the prompt teaches the connection — adding
+examples that name the exact linguistic shape of a personalisation
+request.
+
+Run: EVAL_JUDGE_MODEL=gemma4:e2b pytest evals/test_planner_personalisation.py -v
+"""
+
+import pytest
+
+from conftest import requires_judge_llm
+from helpers import JUDGE_BASE_URL, JUDGE_MODEL
+
+
+def _cfg():
+    from types import SimpleNamespace
+    return SimpleNamespace(
+        ollama_base_url=JUDGE_BASE_URL,
+        ollama_chat_model=JUDGE_MODEL,
+        planner_model="",
+        tool_router_model="",
+        intent_judge_model="",
+        planner_enabled=True,
+        planner_timeout_sec=20.0,
+    )
+
+
+_TOOL_CATALOG = [
+    ("webSearch", "Search the web for current facts and events."),
+    ("getWeather", "Current weather and forecast for a location."),
+    ("stop", "End the turn and reply to the user."),
+]
+
+
+@pytest.mark.eval
+@requires_judge_llm
+class TestPlannerEmitsSearchMemoryForPersonalisedQueries:
+    """Field-regression guard for the 'interest me' pattern."""
+
+    @pytest.mark.parametrize(
+        "query",
+        [
+            "tell me some news that might interest me",
+            "suggest something I'd enjoy watching tonight",
+            "what should I cook for dinner",
+            "recommend a book I'd like",
+        ],
+        ids=lambda q: q[:40],
+    )
+    def test_personalised_query_plans_memory_lookup_first(self, query):
+        from jarvis.reply.planner import (
+            plan_query, plan_requires_memory, is_search_memory_step,
+        )
+
+        plan = plan_query(
+            cfg=_cfg(),
+            query=query,
+            dialogue_context="",
+            tools=_TOOL_CATALOG,
+        )
+        print(f"\n  Query: {query!r}")
+        print(f"  Plan: {plan}")
+
+        assert plan, (
+            f"Planner returned an empty plan for {query!r} — expected a "
+            f"multi-step plan starting with a searchMemory directive."
+        )
+        assert plan_requires_memory(plan), (
+            f"Planner did not request memory for personalised query "
+            f"{query!r}. Plan: {plan}. The user's own interests are "
+            f"exactly what rule 2 of the planner prompt lists as a "
+            f"trigger for searchMemory."
+        )
+        assert is_search_memory_step(plan[0]), (
+            f"searchMemory must be the FIRST step so memory enrichment "
+            f"runs before any tool call. Plan: {plan}"
+        )
+
+    @pytest.mark.parametrize(
+        "query",
+        [
+            "what is the capital of France",
+            "who is Britney Spears",
+            "what's 2 plus 2",
+        ],
+        ids=lambda q: q[:40],
+    )
+    def test_general_knowledge_query_does_not_request_memory(self, query):
+        """Negative case: pure general-knowledge queries must NOT trigger
+        a searchMemory directive. Every extra searchMemory is a wasted
+        memory-enrichment LLM call downstream."""
+        from jarvis.reply.planner import plan_query, plan_requires_memory
+
+        plan = plan_query(
+            cfg=_cfg(),
+            query=query,
+            dialogue_context="",
+            tools=_TOOL_CATALOG,
+        )
+        print(f"\n  Query: {query!r}")
+        print(f"  Plan: {plan}")
+
+        assert plan, f"Planner returned empty plan for {query!r}"
+        assert not plan_requires_memory(plan), (
+            f"Planner wrongly requested searchMemory for a general-"
+            f"knowledge query {query!r}. That wastes a memory-enrichment "
+            f"LLM call on every such turn. Plan: {plan}"
+        )