Add Discord-native hybrid front-end for Jarvis (bot + bridge)

Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions
--- a/tests/test_engine_tool_carryover.py
+++ b/tests/test_engine_tool_carryover.py
@@ -0,0 +1,227 @@
+"""End-to-end: tool-call + tool-result messages from one reply must be
+visible to the LLM on the next reply within the hot window, so the model
+can synthesise from prior results rather than re-fetching.
+"""
+
+from unittest.mock import Mock, patch
+
+import pytest
+
+from src.jarvis.memory.conversation import DialogueMemory
+from src.jarvis.reply.engine import run_reply_engine
+
+
+def _mock_cfg():
+    cfg = Mock()
+    cfg.ollama_base_url = "http://localhost:11434"
+    cfg.ollama_chat_model = "test-large"  # avoid SMALL-model text-tool path
+    cfg.voice_debug = False
+    cfg.llm_tools_timeout_sec = 8.0
+    cfg.llm_embed_timeout_sec = 10.0
+    cfg.llm_chat_timeout_sec = 45.0
+    cfg.llm_digest_timeout_sec = 8.0
+    cfg.memory_enrichment_max_results = 5
+    cfg.memory_enrichment_source = "diary"
+    cfg.memory_digest_enabled = False
+    cfg.tool_result_digest_enabled = False
+    cfg.location_ip_address = None
+    cfg.location_auto_detect = False
+    cfg.location_enabled = False
+    cfg.agentic_max_turns = 8
+    cfg.tool_search_max_calls = 3
+    cfg.tool_selection_strategy = "all"
+    cfg.tool_carryover_max_turns = 2
+    cfg.tool_carryover_per_entry_chars = 1200
+    cfg.mcps = {}
+    cfg.llm_thinking_enabled = False
+    cfg.tts_engine = "none"
+    cfg.ollama_embed_model = "test-embed"
+    return cfg
+
+
+@pytest.mark.unit
+@patch("src.jarvis.reply.engine.plan_query", return_value=[])
+@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
+@patch("src.jarvis.reply.engine.run_tool_with_retries")
+@patch("src.jarvis.reply.engine.extract_text_from_response")
+@patch("src.jarvis.reply.engine.chat_with_messages")
+def test_tool_carryover_makes_prior_result_visible_to_next_turn(
+    mock_chat, mock_extract, mock_tool, _mock_extract, _mock_plan
+):
+    # Turn 1: model emits webSearch call, then final text.
+    mock_tool.return_value = Mock(
+        reply_text="Justin Bieber is a Canadian singer.",
+        error_message=None,
+    )
+    mock_chat.side_effect = [
+        # Turn 1a: tool call
+        {"message": {"content": "", "tool_calls": [{
+            "id": "c1", "type": "function",
+            "function": {"name": "webSearch",
+                         "arguments": {"query": "justin bieber"}},
+        }]}},
+        # Turn 1b: final reply
+        {"message": {"content": "He is a Canadian singer."}},
+        # Turn 2a: final reply directly — reuse from prior context
+        {"message": {"content": "His breakout song was Baby."}},
+    ]
+    mock_extract.side_effect = [
+        "",
+        "He is a Canadian singer.",
+        "His breakout song was Baby.",
+    ]
+
+    db = Mock()
+    cfg = _mock_cfg()
+    dm = DialogueMemory()
+
+    run_reply_engine(db=db, cfg=cfg, tts=None,
+                     text="who is justin bieber",
+                     dialogue_memory=dm)
+
+    # Confirm carryover was recorded
+    assert len(dm._tool_turns) == 1
+    stored = dm._tool_turns[0][1]
+    stored_roles = [m.get("role") for m in stored]
+    assert "tool" in stored_roles
+    assert any(m.get("tool_calls") for m in stored)
+
+    # Turn 2: query on the same topic — the turn-2 LLM call should receive
+    # the turn-1 tool messages in its `messages` argument.
+    run_reply_engine(db=db, cfg=cfg, tts=None,
+                     text="what is his most famous song",
+                     dialogue_memory=dm)
+
+    # The third chat_with_messages call is turn-2's only turn (single text).
+    turn2_kwargs = mock_chat.call_args_list[-1].kwargs
+    turn2_messages = turn2_kwargs.get("messages")
+    roles_in_turn2 = [m.get("role") for m in turn2_messages]
+    assert "tool" in roles_in_turn2, (
+        f"Expected prior tool-role message to be injected on turn 2; "
+        f"got roles={roles_in_turn2}"
+    )
+    # The tool message content must be the prior webSearch result
+    tool_contents = [
+        m.get("content") for m in turn2_messages if m.get("role") == "tool"
+    ]
+    assert any("Canadian singer" in (c or "") for c in tool_contents)
+
+
+@pytest.mark.unit
+@patch("src.jarvis.reply.engine.plan_query", return_value=[])
+@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
+@patch("src.jarvis.reply.engine.run_tool_with_retries")
+@patch("src.jarvis.reply.engine.extract_text_from_response")
+@patch("src.jarvis.reply.engine.chat_with_messages")
+def test_stop_signal_clears_tool_carryover(
+    mock_chat, mock_extract, mock_tool, _mock_extract, _mock_plan
+):
+    """Turn 1 runs a tool; turn 2 receives the stop signal. After turn 2,
+    carryover must be empty so the next wake-word turn starts fresh.
+    """
+    from src.jarvis.tools.builtin.stop import STOP_SIGNAL
+
+    mock_tool.side_effect = [
+        Mock(reply_text="Justin Bieber is a Canadian singer.", error_message=None),
+        Mock(reply_text=STOP_SIGNAL, error_message=None),
+    ]
+    mock_chat.side_effect = [
+        # Turn 1a: tool call
+        {"message": {"content": "", "tool_calls": [{
+            "id": "c1", "type": "function",
+            "function": {"name": "webSearch", "arguments": {"query": "bieber"}},
+        }]}},
+        # Turn 1b: final reply
+        {"message": {"content": "He is a Canadian singer."}},
+        # Turn 2: stop tool
+        {"message": {"content": "", "tool_calls": [{
+            "id": "c2", "type": "function",
+            "function": {"name": "stop", "arguments": {}},
+        }]}},
+    ]
+    mock_extract.side_effect = ["", "He is a Canadian singer.", ""]
+
+    db = Mock()
+    cfg = _mock_cfg()
+    dm = DialogueMemory()
+
+    run_reply_engine(db=db, cfg=cfg, tts=None,
+                     text="who is justin bieber", dialogue_memory=dm)
+    assert len(dm._tool_turns) == 1, "turn-1 tool carryover should be recorded"
+
+    reply = run_reply_engine(db=db, cfg=cfg, tts=None,
+                             text="stop", dialogue_memory=dm)
+    assert reply is None, "stop signal returns None"
+    assert dm._tool_turns == [], (
+        "stop signal must clear carryover so the next wake-word turn is clean"
+    )
+
+
+@pytest.mark.unit
+@patch("src.jarvis.reply.engine.plan_query", return_value=[])
+@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
+@patch("src.jarvis.reply.engine.run_tool_with_retries")
+@patch("src.jarvis.reply.engine.extract_text_from_response")
+@patch("src.jarvis.reply.engine.chat_with_messages")
+def test_tool_carryover_text_tool_mode(
+    mock_chat, mock_extract, mock_tool, _mock_extract, _mock_plan
+):
+    """Small-model path: tool results come back as role=user with a
+    ``tool_name`` tag. Carryover must pick those up too.
+    """
+    cfg = _mock_cfg()
+    cfg.ollama_chat_model = "gemma4:e2b"  # triggers SMALL/text-tool path
+
+    mock_tool.return_value = Mock(
+        reply_text="Paris is the capital of France.", error_message=None,
+    )
+    fence_call = (
+        "```tool_call\n"
+        '{"name": "webSearch", "arguments": {"query": "paris"}}\n'
+        "```"
+    )
+    mock_chat.side_effect = [
+        # Turn 1a: text-tool call emitted inside a markdown fence
+        {"message": {"content": fence_call}},
+        # Turn 1b: final reply
+        {"message": {"content": "Paris is in France."}},
+        # Turn 2: follow-up reply
+        {"message": {"content": "Its population is about 2.1 million."}},
+    ]
+    mock_extract.side_effect = [
+        fence_call,
+        "Paris is in France.",
+        "Its population is about 2.1 million.",
+    ]
+
+    db = Mock()
+    dm = DialogueMemory()
+
+    run_reply_engine(db=db, cfg=cfg, tts=None,
+                     text="what about paris", dialogue_memory=dm)
+
+    assert len(dm._tool_turns) == 1
+    stored = dm._tool_turns[0][1]
+    roles = [m.get("role") for m in stored]
+    # Text-tool fallback stores tool results as role=user with tool_name.
+    assert "user" in roles
+    assert any(m.get("tool_name") == "webSearch" for m in stored)
+
+    run_reply_engine(db=db, cfg=cfg, tts=None,
+                     text="tell me more", dialogue_memory=dm)
+
+    turn2_messages = mock_chat.call_args_list[-1].kwargs.get("messages") or []
+    # The prior tool payload should appear in the turn-2 messages list —
+    # either as role=tool (native) or role=user with tool_name (text-tool).
+    tool_like = [
+        m for m in turn2_messages
+        if m.get("role") == "tool"
+        or (m.get("role") == "user" and m.get("tool_name"))
+    ]
+    assert tool_like, (
+        f"expected prior text-tool result to be carried over; got roles="
+        f"{[m.get('role') for m in turn2_messages]}"
+    )
+    assert any(
+        "Paris" in (m.get("content") or "") for m in tool_like
+    )