Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled

Transform isair/jarvis into a Discord-controlled voice assistant running on
the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact.

- bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral),
  voice channel join + voice receive/playback, pluggable VNC screen broadcast
  (selfbot live / noVNC / screenshot)
- bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS
  behind a thin localhost HTTP API
- .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite,
  docs/language-comparison.md and docs/vnc-xfce-setup.md

Language decision: hybrid (Python brain + Node/bun Discord layer) because
Discord blocks bot video; native screen broadcast only works via a Node
selfbot library.
This commit is contained in:
javis-bot
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions

View File

@@ -0,0 +1,563 @@
"""Engine-level tool carry-over guard.
Field trace (2026-05-03, gemma4:e2b):
Turn 1 user: "how's the weather tomorrow Jarvis?" → no location set →
assistant invokes ``getWeather``, tool returns ``success=False``
("I couldn't auto-detect your location, please tell me a city"),
assistant relays the request.
Turn 2 user: "I'm in London" → small-model router picks ``webSearch``
instead of ``getWeather``, planner falls back to a web search for
"weather in london tomorrow", DDG fails, Wikipedia matches the 2014
film "Edge of Tomorrow", and the assistant parrots the film summary
as the weather answer.
Fix: when the previous assistant turn invoked a tool that reported
``success=False`` on its ``ToolExecutionResult``, union the previous
turn's tool name into the allow-list. The ``tool_failed`` flag stamped
onto each recorded tool result is the truth source. Gating on failure
(rather than recency or query length) means a successful chain followed
by a genuine new short ask ("play some music") correctly does NOT carry
over the prior tool.
The carry-over is an engine-side per-turn overlay: the router cache
stores only the raw router output, so future identical queries are
unaffected.
"""
from unittest.mock import Mock, patch
import pytest
from src.jarvis.memory.conversation import DialogueMemory
from src.jarvis.reply.engine import run_reply_engine
def _mock_cfg():
cfg = Mock()
cfg.ollama_base_url = "http://localhost:11434"
cfg.ollama_chat_model = "test-large"
cfg.voice_debug = False
cfg.llm_tools_timeout_sec = 8.0
cfg.llm_embed_timeout_sec = 10.0
cfg.llm_chat_timeout_sec = 45.0
cfg.llm_digest_timeout_sec = 8.0
cfg.memory_enrichment_max_results = 5
cfg.memory_enrichment_source = "diary"
cfg.memory_digest_enabled = False
cfg.tool_result_digest_enabled = False
cfg.location_ip_address = None
cfg.location_auto_detect = False
cfg.location_enabled = False
cfg.agentic_max_turns = 8
cfg.tool_search_max_calls = 3
cfg.tool_selection_strategy = "all"
cfg.tool_carryover_max_turns = 2
cfg.tool_carryover_per_entry_chars = 1200
cfg.mcps = {}
cfg.llm_thinking_enabled = False
cfg.tts_engine = "none"
cfg.ollama_embed_model = "test-embed"
cfg.db_path = ":memory:"
return cfg
def _tool_names_from_chat_call(call) -> set[str]:
"""Pull function names out of the OpenAI-style tools schema passed
to chat_with_messages.
"""
schema = call.kwargs.get("tools") or []
names: set[str] = set()
for entry in schema:
if not isinstance(entry, dict):
continue
fn = entry.get("function") or {}
nm = fn.get("name")
if isinstance(nm, str):
names.add(nm)
return names
def _failed_tool_turn(tool_name: str, tool_call_id: str = "c1") -> list[dict]:
"""Plant a previous-turn tool turn where the tool was invoked and
reported failure. Mirrors the message shape the engine records for a
native tool call whose ``ToolExecutionResult.success`` was False.
"""
return [
{"role": "assistant", "content": "", "tool_calls": [{
"id": tool_call_id, "type": "function",
"function": {"name": tool_name, "arguments": {}},
}]},
{"role": "tool", "tool_call_id": tool_call_id,
"tool_name": tool_name,
"content": "I couldn't auto-detect your location.",
"tool_failed": True},
]
def _succeeded_tool_turn(tool_name: str, tool_call_id: str = "c1") -> list[dict]:
"""Plant a previous-turn tool turn where the tool succeeded."""
return [
{"role": "assistant", "content": "", "tool_calls": [{
"id": tool_call_id, "type": "function",
"function": {"name": tool_name, "arguments": {"location": "London"}},
}]},
{"role": "tool", "tool_call_id": tool_call_id,
"tool_name": tool_name,
"content": "London: 15°C and partly cloudy.",
"tool_failed": False},
]
def _failed_text_tool_turn(tool_name: str) -> list[dict]:
"""Plant a previous-turn tool turn in the text-tool fallback shape
(small models). Tool error is appended as a ``role=user`` message
tagged with both ``tool_name`` and ``tool_failed=True``.
"""
return [
{"role": "assistant",
"content": (
"```tool_call\n"
'{"name": "' + tool_name + '", "arguments": {}}\n'
"```"
)},
{"role": "user",
"content": (
"[Tool error: " + tool_name + "] I couldn't auto-detect "
"your location."
),
"tool_name": tool_name,
"tool_failed": True},
]
@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile",
return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.plan_query", return_value=[])
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_followup_carries_over_failed_previous_tool(
mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
_mock_graph, _mock_warm, _mock_fmt,
):
"""Previous turn invoked ``getWeather`` and the tool reported failure;
this turn's router only picked ``webSearch``. The engine must union
``getWeather`` back in so the chat model can re-call it with the
location the user just supplied.
"""
mock_chat.side_effect = [
{"message": {"content": "Weather in London is 15°C."}},
]
mock_extract.side_effect = ["Weather in London is 15°C."]
db = Mock()
cfg = _mock_cfg()
dm = DialogueMemory()
dm.add_message("user", "how's the weather tomorrow Jarvis")
dm.record_tool_turn(_failed_tool_turn("getWeather"))
dm.add_message("assistant", "I do not have a location set.")
with patch(
"src.jarvis.reply.engine.select_tools",
return_value=["webSearch"],
):
run_reply_engine(db=db, cfg=cfg, tts=None,
text="I'm in London", dialogue_memory=dm)
tool_names = _tool_names_from_chat_call(mock_chat.call_args_list[-1])
assert "webSearch" in tool_names, (
f"router pick must remain visible; saw {sorted(tool_names)}"
)
assert "getWeather" in tool_names, (
"previous-turn failed tool must be carried over; "
f"saw {sorted(tool_names)}"
)
@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile",
return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.plan_query", return_value=[])
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_successful_previous_tool_does_not_trigger_carryover(
mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
_mock_graph, _mock_warm, _mock_fmt,
):
"""A successful prior tool call means the chain completed. A genuine
new short ask ("log my breakfast") must NOT inherit the prior tool —
that would noisily widen the allow-list for unrelated turns and
risks small models replaying the previous tool. The router pick
stands on its own.
"""
mock_chat.side_effect = [
{"message": {"content": "Logged."}},
]
mock_extract.side_effect = ["Logged."]
db = Mock()
cfg = _mock_cfg()
dm = DialogueMemory()
dm.add_message("user", "how's the weather in London")
dm.record_tool_turn(_succeeded_tool_turn("getWeather"))
dm.add_message("assistant", "It's 15°C and partly cloudy in London.")
with patch(
"src.jarvis.reply.engine.select_tools",
return_value=["logMeal"],
):
run_reply_engine(db=db, cfg=cfg, tts=None,
text="log my breakfast", dialogue_memory=dm)
tool_names = _tool_names_from_chat_call(mock_chat.call_args_list[-1])
assert "logMeal" in tool_names
assert "getWeather" not in tool_names, (
"successful prior tool must not be carried over; "
f"saw {sorted(tool_names)}"
)
@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile",
return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.plan_query", return_value=[])
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_cold_start_does_not_trigger_carryover(
mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
_mock_graph, _mock_warm, _mock_fmt,
):
"""Empty dialogue memory — the carry-over path must be a no-op."""
mock_chat.side_effect = [
{"message": {"content": "Hello."}},
]
mock_extract.side_effect = ["Hello."]
db = Mock()
cfg = _mock_cfg()
dm = DialogueMemory() # cold start — no prior turns
with patch(
"src.jarvis.reply.engine.select_tools",
return_value=["webSearch"],
):
run_reply_engine(db=db, cfg=cfg, tts=None,
text="hi", dialogue_memory=dm)
tool_names = _tool_names_from_chat_call(mock_chat.call_args_list[-1])
assert "webSearch" in tool_names
assert "getWeather" not in tool_names
@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile",
return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.plan_query", return_value=[])
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_carryover_does_not_pollute_router_cache(
mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
_mock_graph, _mock_warm, _mock_fmt,
):
"""The router cache stores the raw router output. Carry-over is a
per-turn overlay layered on top — it must NOT be written back to the
cache, otherwise every replay of the same query inherits a
contaminated tool list.
"""
mock_chat.side_effect = [
{"message": {"content": "Weather in London is 15°C."}},
]
mock_extract.side_effect = ["Weather in London is 15°C."]
db = Mock()
cfg = _mock_cfg()
dm = DialogueMemory()
dm.add_message("user", "how's the weather tomorrow Jarvis")
dm.record_tool_turn(_failed_tool_turn("getWeather"))
dm.add_message("assistant", "I do not have a location set.")
with patch(
"src.jarvis.reply.engine.select_tools",
return_value=["webSearch"],
):
run_reply_engine(db=db, cfg=cfg, tts=None,
text="I'm in London", dialogue_memory=dm)
cached_router_entries = [
(k, v) for k, v in dm._hot_cache.items() if k.startswith("router:")
]
assert cached_router_entries, "router output should have been cached"
for key, (_ts, value) in cached_router_entries:
assert value == ["webSearch"], (
f"router cache for {key!r} should hold raw router output "
f"['webSearch']; got {value!r}"
)
@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile",
return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.plan_query", return_value=[])
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_long_followup_still_carries_over_when_previous_failed(
mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
_mock_graph, _mock_warm, _mock_fmt,
):
"""Failure-gated carry-over does NOT depend on query length. A long
follow-up that supplies the missing arg ("Right, sorry — I'm in
Edinburgh, please try the lookup again for tomorrow") must still
carry over the failed tool. The earlier char-length heuristic was
dropped because it false-negatived this shape; the failure flag is
the right signal.
"""
mock_chat.side_effect = [
{"message": {"content": "Edinburgh weather: 12°C."}},
]
mock_extract.side_effect = ["Edinburgh weather: 12°C."]
db = Mock()
cfg = _mock_cfg()
dm = DialogueMemory()
dm.add_message("user", "how's the weather tomorrow Jarvis")
dm.record_tool_turn(_failed_tool_turn("getWeather"))
dm.add_message("assistant", "I do not have a location set.")
long_followup = (
"Right, sorry — I'm in Edinburgh, please try the lookup again for "
"tomorrow morning if you would."
)
assert len(long_followup) > 80
with patch(
"src.jarvis.reply.engine.select_tools",
return_value=["webSearch"],
):
run_reply_engine(db=db, cfg=cfg, tts=None,
text=long_followup, dialogue_memory=dm)
tool_names = _tool_names_from_chat_call(mock_chat.call_args_list[-1])
assert "getWeather" in tool_names, (
f"long follow-up to a failed tool must still carry over; "
f"saw {sorted(tool_names)}"
)
@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile",
return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.plan_query", return_value=[])
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_text_tool_fallback_failure_carries_over(
mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
_mock_graph, _mock_warm, _mock_fmt,
):
"""Small-model path: the previous turn's tool error was stored as a
``role=user`` message tagged with ``tool_name`` and
``tool_failed=True``. The walker must collect the name from this
shape too, not only from native ``assistant.tool_calls`` + ``role=tool``
pairs.
"""
mock_chat.side_effect = [
{"message": {"content": "Weather in Berlin is 9°C."}},
]
mock_extract.side_effect = ["Weather in Berlin is 9°C."]
db = Mock()
cfg = _mock_cfg()
dm = DialogueMemory()
dm.add_message("user", "how's the weather")
dm.record_tool_turn(_failed_text_tool_turn("getWeather"))
dm.add_message("assistant", "I couldn't auto-detect your location.")
with patch(
"src.jarvis.reply.engine.select_tools",
return_value=["webSearch"],
):
run_reply_engine(db=db, cfg=cfg, tts=None,
text="I'm in Berlin", dialogue_memory=dm)
tool_names = _tool_names_from_chat_call(mock_chat.call_args_list[-1])
assert "getWeather" in tool_names, (
"text-tool fallback failure shape must be carried over; "
f"saw {sorted(tool_names)}"
)
@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile",
return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.plan_query", return_value=[])
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_multi_tool_call_only_failed_sibling_carries_over(
mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
_mock_graph, _mock_warm, _mock_fmt,
):
"""When an assistant message carries multiple tool_calls but only
one of them failed, only the failed name must be carried over. The
successful sibling stays the chat model's responsibility through
its own routing.
"""
mock_chat.side_effect = [
{"message": {"content": "Sure."}},
]
mock_extract.side_effect = ["Sure."]
db = Mock()
cfg = _mock_cfg()
dm = DialogueMemory()
dm.add_message("user", "weather and search Pushkin")
dm.record_tool_turn([
{"role": "assistant", "content": "", "tool_calls": [
{"id": "c-w", "type": "function",
"function": {"name": "getWeather", "arguments": {}}},
{"id": "c-s", "type": "function",
"function": {"name": "webSearch", "arguments": {"query": "Pushkin"}}},
]},
# getWeather failed (no location), webSearch succeeded.
{"role": "tool", "tool_call_id": "c-w",
"tool_name": "getWeather",
"content": "I couldn't auto-detect your location.",
"tool_failed": True},
{"role": "tool", "tool_call_id": "c-s",
"tool_name": "webSearch",
"content": "Pushkin was a Russian poet (1799-1837).",
"tool_failed": False},
])
dm.add_message("assistant",
"Pushkin was a Russian poet. I couldn't auto-detect "
"your location for the weather lookup.")
with patch(
"src.jarvis.reply.engine.select_tools",
return_value=["fetchWebPage"],
):
run_reply_engine(db=db, cfg=cfg, tts=None,
text="I'm in Paris", dialogue_memory=dm)
tool_names = _tool_names_from_chat_call(mock_chat.call_args_list[-1])
assert "getWeather" in tool_names, (
"failed sibling tool_call must be carried over; "
f"saw {sorted(tool_names)}"
)
assert "webSearch" not in tool_names, (
"successful sibling tool_call must NOT be carried over; "
f"saw {sorted(tool_names)}"
)
@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile",
return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
@patch("src.jarvis.reply.engine.run_tool_with_retries")
def test_planner_direct_exec_stamps_tool_failed(
mock_tool, mock_chat, mock_extract, _mock_extract_mem,
_mock_graph, _mock_warm, _mock_fmt,
):
"""The planner's direct-exec path (text-tool mode + concrete plan
step) appends tool results without going through the chat-model
loop. Verify that path stamps ``tool_failed`` so the next turn's
walker can see prior failures planted by direct-exec.
"""
from src.jarvis.tools.types import ToolExecutionResult
cfg = _mock_cfg()
cfg.ollama_chat_model = "gemma4:e2b" # triggers SMALL/text-tool path
# First reply: planner emits a getWeather step, direct-exec runs the
# tool which returns success=False (no location), then the chat
# model produces a final text reply.
mock_tool.return_value = ToolExecutionResult(
success=False,
reply_text="I couldn't auto-detect your location.",
)
mock_chat.side_effect = [
{"message": {"content": "Tell me which city."}},
]
mock_extract.side_effect = ["Tell me which city."]
db = Mock()
dm = DialogueMemory()
# Concrete plan step the resolver fast-path can parse without an LLM.
with patch(
"src.jarvis.reply.engine.plan_query",
return_value=["getWeather", "Reply to the user."],
), patch(
"src.jarvis.reply.engine.select_tools",
return_value=["getWeather"],
):
run_reply_engine(db=db, cfg=cfg, tts=None,
text="how's the weather",
dialogue_memory=dm)
# The direct-exec path should have recorded a tool turn with the
# failure flag set so a follow-up turn can carry over getWeather.
assert dm._tool_turns, (
"planner direct-exec path must record a tool turn into "
"dialogue memory carryover"
)
stored_msgs = [m for _ts, msgs in dm._tool_turns for m in msgs]
failed_entries = [
m for m in stored_msgs
if m.get("tool_failed") and m.get("tool_name") == "getWeather"
]
assert failed_entries, (
"direct-exec failure must stamp tool_failed=True; "
f"stored messages: {stored_msgs}"
)
@pytest.mark.unit
def test_walker_logs_orphan_assistant_tool_call(caplog):
"""When an assistant tool_call has no matching role=tool result in
the recent window (e.g. truncation, scrub, eviction), the walker
should fail-open and log a diagnostic — never crash, never silently
widen the allow-list with the orphan name.
"""
from src.jarvis.reply.engine import _previous_turn_failed_tool_names
recent = [
{"role": "user", "content": "weather please"},
{"role": "assistant", "content": "", "tool_calls": [
{"id": "c-orphan", "type": "function",
"function": {"name": "getWeather", "arguments": {}}},
]},
# No matching role=tool result for c-orphan.
{"role": "assistant", "content": "I couldn't auto-detect."},
]
names = _previous_turn_failed_tool_names(recent)
# No failed tool result was seen, so nothing carries over even
# though an assistant tool_call exists.
assert names == [], (
f"orphan tool_call must not be carried over; got {names}"
)