Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled

Transform isair/jarvis into a Discord-controlled voice assistant running on
the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact.

- bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral),
  voice channel join + voice receive/playback, pluggable VNC screen broadcast
  (selfbot live / noVNC / screenshot)
- bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS
  behind a thin localhost HTTP API
- .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite,
  docs/language-comparison.md and docs/vnc-xfce-setup.md

Language decision: hybrid (Python brain + Node/bun Discord layer) because
Discord blocks bot video; native screen broadcast only works via a Node
selfbot library.
This commit is contained in:
javis-bot
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions

View File

@@ -0,0 +1,519 @@
"""Integration test for the toolSearchTool escape hatch and related loop behaviours.
Scenario: the router picks a narrow initial tool set. Mid-loop the chat model
realises it needs a different tool and invokes ``toolSearchTool``. The engine
dispatches it, merges the returned tool names into the per-turn allow-list,
and the next turn calls the newly-surfaced tool (``getWeather``). The final
content is delivered immediately.
"""
from unittest.mock import patch
import pytest
def _assistant_tool_call(name: str, args: dict, call_id: str = "call_1"):
return {
"message": {
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": call_id,
"type": "function",
"function": {"name": name, "arguments": args},
}
],
}
}
def _assistant_content(text: str):
return {"message": {"role": "assistant", "content": text}}
def test_loop_merges_toolsearchtool_results_into_allowlist(
mock_config, db, dialogue_memory
):
from jarvis.reply import engine as engine_mod
from jarvis.tools.types import ToolExecutionResult
mock_config.ollama_chat_model = "gpt-oss:20b" # LARGE → no forced text tools
invoked_tools: list[tuple[str, dict]] = []
def fake_tool_runner(db, cfg, tool_name, tool_args, **kwargs):
invoked_tools.append((tool_name, tool_args or {}))
if tool_name == "toolSearchTool":
# Returns a newly-routed tool that was NOT in the initial pick.
return ToolExecutionResult(
success=True,
reply_text="getWeather: Report current weather.",
error_message=None,
)
if tool_name == "getWeather":
return ToolExecutionResult(
success=True,
reply_text="London: 12C partly cloudy.",
error_message=None,
)
return ToolExecutionResult(
success=True, reply_text="result", error_message=None
)
chat_responses = iter(
[
# Turn 1: model calls toolSearchTool.
_assistant_tool_call(
"toolSearchTool", {"query": "current weather in london"}
),
# Turn 2: model uses the newly-surfaced getWeather.
_assistant_tool_call(
"getWeather", {"location": "London"}, call_id="call_2"
),
# Turn 3: final reply.
_assistant_content("It's 12C and partly cloudy in London."),
]
)
def fake_chat(*args, **kwargs):
try:
return next(chat_responses)
except StopIteration:
return _assistant_content("Done.")
with patch.object(engine_mod, "run_tool_with_retries", side_effect=fake_tool_runner), \
patch.object(engine_mod, "chat_with_messages", side_effect=fake_chat), \
patch.object(engine_mod, "select_tools", return_value=["webSearch", "stop"]), \
patch.object(
engine_mod,
"extract_search_params_for_memory",
return_value={"keywords": []},
):
reply = engine_mod.run_reply_engine(
db=db,
cfg=mock_config,
tts=None,
text="how's the weather in london?",
dialogue_memory=dialogue_memory,
)
tool_names = [n for n, _ in invoked_tools]
assert "toolSearchTool" in tool_names, (
f"Expected toolSearchTool to be invoked; got {tool_names}"
)
assert "getWeather" in tool_names, (
"Expected getWeather (surfaced mid-loop by toolSearchTool) to be "
f"invoked on a subsequent turn; got {tool_names}"
)
# getWeather must follow toolSearchTool (the allow-list widening
# happens after the tool result is appended).
assert tool_names.index("getWeather") > tool_names.index("toolSearchTool")
assert reply and "London" in reply
def test_initial_allowlist_always_includes_toolsearchtool(
mock_config, db, dialogue_memory
):
"""Even when the router returns no additional tools, the engine must
always append ``toolSearchTool`` so the escape hatch is reachable."""
from jarvis.reply import engine as engine_mod
from jarvis.tools.types import ToolExecutionResult
mock_config.ollama_chat_model = "gpt-oss:20b"
captured_allow_lists: list[list[str]] = []
def fake_chat(*args, **kwargs):
# Capture a snapshot of allowed_tools via the first system message
# (too invasive to reach into the closure — instead we assert on the
# final reply path indirectly).
return _assistant_content("Hello back!")
with patch.object(engine_mod, "chat_with_messages", side_effect=fake_chat), \
patch.object(engine_mod, "select_tools", return_value=["stop"]), \
patch.object(
engine_mod,
"extract_search_params_for_memory",
return_value={"keywords": []},
):
# Patch the tools description generator to snapshot the allow-list.
real_generate = engine_mod.generate_tools_json_schema
def spy_schema(allowed_tools, mcp_tools):
captured_allow_lists.append(list(allowed_tools))
return real_generate(allowed_tools, mcp_tools)
with patch.object(
engine_mod, "generate_tools_json_schema", side_effect=spy_schema
):
engine_mod.run_reply_engine(
db=db,
cfg=mock_config,
tts=None,
text="hi",
dialogue_memory=dialogue_memory,
)
assert captured_allow_lists, "generate_tools_json_schema was never called"
# The engine now runs the router before the planner, which builds an
# auxiliary schema for the planner's tool catalogue (router-narrowed,
# no escape hatch) before the final chat-model schema. The escape hatch
# only joins in the chat-model allow-list. Assert it appears somewhere
# in the captured calls — implementations are free to reuse the same
# schema generator at multiple call sites.
assert any("toolSearchTool" in al for al in captured_allow_lists), (
f"toolSearchTool missing from any allow-list: {captured_allow_lists}"
)
def test_schema_regenerated_after_toolsearchtool_merge(
mock_config, db, dialogue_memory
):
"""F1: after toolSearchTool widens the allow-list, the next native-mode
LLM call must receive a tools schema that includes the newly surfaced
tool name."""
from jarvis.reply import engine as engine_mod
from jarvis.tools.types import ToolExecutionResult
mock_config.ollama_chat_model = "gpt-oss:20b" # LARGE → native tools
def fake_tool_runner(db, cfg, tool_name, tool_args, **kwargs):
if tool_name == "toolSearchTool":
return ToolExecutionResult(
success=True,
reply_text="getWeather: Report current weather.",
error_message=None,
)
return ToolExecutionResult(
success=True, reply_text="done", error_message=None
)
chat_responses = iter(
[
_assistant_tool_call(
"toolSearchTool", {"query": "weather"}, call_id="c1"
),
_assistant_content("All good."),
]
)
captured_tools_params: list = []
def fake_chat(*args, **kwargs):
captured_tools_params.append(kwargs.get("tools"))
try:
return next(chat_responses)
except StopIteration:
return _assistant_content("done")
with patch.object(engine_mod, "run_tool_with_retries", side_effect=fake_tool_runner), \
patch.object(engine_mod, "chat_with_messages", side_effect=fake_chat), \
patch.object(engine_mod, "select_tools", return_value=["webSearch", "stop"]), \
patch.object(
engine_mod,
"extract_search_params_for_memory",
return_value={"keywords": []},
):
engine_mod.run_reply_engine(
db=db,
cfg=mock_config,
tts=None,
text="weather?",
dialogue_memory=dialogue_memory,
)
# Two LLM calls: pre-merge and post-merge. The post-merge call must
# include getWeather in its tools schema.
assert len(captured_tools_params) >= 2
post_merge_schema = captured_tools_params[1] or []
names = []
for s in post_merge_schema:
if isinstance(s, dict):
fn = s.get("function", {}) if isinstance(s.get("function"), dict) else {}
nm = fn.get("name")
if nm:
names.append(nm)
assert "getWeather" in names, (
f"Expected getWeather in post-merge tools schema; got {names}"
)
def test_tool_search_max_calls_cap(mock_config, db, dialogue_memory):
"""F5: toolSearchTool invocations are capped per reply."""
from jarvis.reply import engine as engine_mod
from jarvis.tools.types import ToolExecutionResult
mock_config.ollama_chat_model = "gpt-oss:20b"
mock_config.tool_search_max_calls = 2
dispatch_count = {"toolSearchTool": 0}
def fake_tool_runner(db, cfg, tool_name, tool_args, **kwargs):
if tool_name == "toolSearchTool":
dispatch_count["toolSearchTool"] += 1
return ToolExecutionResult(
success=True,
reply_text="No additional tools found for that description.",
error_message=None,
)
return ToolExecutionResult(
success=True, reply_text="ok", error_message=None
)
# Model keeps trying toolSearchTool; last turn emits final content.
responses = [
_assistant_tool_call("toolSearchTool", {"query": "a"}, call_id="c1"),
_assistant_tool_call("toolSearchTool", {"query": "b"}, call_id="c2"),
_assistant_tool_call("toolSearchTool", {"query": "c"}, call_id="c3"),
_assistant_tool_call("toolSearchTool", {"query": "d"}, call_id="c4"),
_assistant_content("All right, giving up."),
]
it = iter(responses)
def fake_chat(*args, **kwargs):
try:
return next(it)
except StopIteration:
return _assistant_content("done")
with patch.object(engine_mod, "run_tool_with_retries", side_effect=fake_tool_runner), \
patch.object(engine_mod, "chat_with_messages", side_effect=fake_chat), \
patch.object(engine_mod, "select_tools", return_value=["webSearch", "stop"]), \
patch.object(
engine_mod,
"extract_search_params_for_memory",
return_value={"keywords": []},
):
engine_mod.run_reply_engine(
db=db,
cfg=mock_config,
tts=None,
text="hello",
dialogue_memory=dialogue_memory,
)
assert dispatch_count["toolSearchTool"] == 2, (
f"Expected cap to limit dispatch to 2; got "
f"{dispatch_count['toolSearchTool']}"
)
def test_validate_tool_args_catches_unknown_keys():
"""Unit test for the schema validator — unknown arg key is the exact
failure mode the field log hit."""
from jarvis.reply.engine import _validate_tool_args_against_schema
err = _validate_tool_args_against_schema(
"webSearch",
{"query": "tube strikes today"},
mcp_tools=None,
)
assert err is not None
assert "unknown argument" in err.lower()
assert "search_query" in err
def test_validate_tool_args_passes_correct_keys():
from jarvis.reply.engine import _validate_tool_args_against_schema
err = _validate_tool_args_against_schema(
"webSearch",
{"search_query": "tube strikes today"},
mcp_tools=None,
)
assert err is None
def test_validate_tool_args_catches_missing_required():
from jarvis.reply.engine import _validate_tool_args_against_schema
err = _validate_tool_args_against_schema(
"webSearch",
{},
mcp_tools=None,
)
assert err is not None
assert "missing required" in err.lower()
def test_max_turns_produces_digest(mock_config, db, dialogue_memory):
"""When the loop hits ``agentic_max_turns`` via a pure tool-call loop
(no content turn), the engine runs ``digest_loop_for_max_turns`` and
ships the caveat-prefixed digest."""
from jarvis.reply import engine as engine_mod
from jarvis.tools.types import ToolExecutionResult
mock_config.ollama_chat_model = "gpt-oss:20b"
mock_config.agentic_max_turns = 3
# The model keeps calling toolSearchTool every turn — no content is
# ever produced, so the loop exhausts max_turns and the digest fires.
def fake_chat(*args, **kwargs):
return _assistant_tool_call("toolSearchTool", {"query": "a"}, call_id="c1")
def fake_tool_runner(db, cfg, tool_name, tool_args, **kwargs):
return ToolExecutionResult(
success=True,
reply_text="No additional tools found.",
error_message=None,
)
captured = {}
def fake_digest(user_query, loop_messages, cfg):
captured["user_query"] = user_query
captured["loop_messages"] = loop_messages
return "Couldn't finish: I was still working through the request."
with patch.object(engine_mod, "chat_with_messages", side_effect=fake_chat), \
patch.object(engine_mod, "run_tool_with_retries", side_effect=fake_tool_runner), \
patch.object(
engine_mod, "select_tools", return_value=["toolSearchTool", "stop"]
), \
patch.object(
engine_mod,
"extract_search_params_for_memory",
return_value={"keywords": []},
), \
patch.object(
engine_mod, "digest_loop_for_max_turns", side_effect=fake_digest
):
reply = engine_mod.run_reply_engine(
db=db,
cfg=mock_config,
tts=None,
text="do something complicated",
dialogue_memory=dialogue_memory,
)
assert reply == "Couldn't finish: I was still working through the request."
assert captured.get("user_query"), "digest should receive the user query"
assert isinstance(captured.get("loop_messages"), list)
def test_max_turns_digest_failure_falls_back_to_generic_error(
mock_config, db, dialogue_memory
):
"""If the digest returns None (e.g. timeout) and there is no last
candidate reply (pure tool-call loop), the engine must emit the
generic error rather than returning None."""
from jarvis.reply import engine as engine_mod
from jarvis.tools.types import ToolExecutionResult
mock_config.ollama_chat_model = "gpt-oss:20b"
mock_config.agentic_max_turns = 2
# Pure tool-call loop — no content, so last_candidate_reply stays None.
def fake_chat(*args, **kwargs):
return _assistant_tool_call("toolSearchTool", {"query": "a"}, call_id="c1")
def fake_tool_runner(db, cfg, tool_name, tool_args, **kwargs):
return ToolExecutionResult(
success=True,
reply_text="No additional tools found.",
error_message=None,
)
with patch.object(engine_mod, "chat_with_messages", side_effect=fake_chat), \
patch.object(engine_mod, "run_tool_with_retries", side_effect=fake_tool_runner), \
patch.object(
engine_mod, "select_tools", return_value=["toolSearchTool", "stop"]
), \
patch.object(
engine_mod,
"extract_search_params_for_memory",
return_value={"keywords": []},
), \
patch.object(
engine_mod, "digest_loop_for_max_turns", return_value=None
):
reply = engine_mod.run_reply_engine(
db=db,
cfg=mock_config,
tts=None,
text="do something complicated",
dialogue_memory=dialogue_memory,
)
# Must return some reply (generic error), not None.
assert reply is not None and reply.strip()
def test_toolsearchtool_empty_result_does_not_register_sentence_as_tool(
mock_config, db, dialogue_memory, capsys
):
"""Regression: when toolSearchTool surfaces nothing, it returns the
plain sentence ``"No additional tools found for that description."``
as ``reply_text``. The engine's line-splitting merger used to treat
that whole sentence as a tool name and append it to ``allowed_tools``,
producing the field-log line ``🔧 Discovered 1 tool(s): No additional
tools found for that description.`` and polluting the allow-list
with a bogus entry. The parser must reject anything that is not an
actual tool name from the registry.
"""
from jarvis.reply import engine as engine_mod
from jarvis.tools.types import ToolExecutionResult
mock_config.ollama_chat_model = "gpt-oss:20b"
def fake_tool_runner(db, cfg, tool_name, tool_args, **kwargs):
if tool_name == "toolSearchTool":
return ToolExecutionResult(
success=True,
reply_text="No additional tools found for that description.",
error_message=None,
)
return ToolExecutionResult(
success=True, reply_text="ok", error_message=None
)
chat_responses = iter(
[
_assistant_tool_call(
"toolSearchTool", {"query": "open youtube"}, call_id="c1"
),
_assistant_content("I could not find a tool for that."),
]
)
captured_tools_params: list = []
def fake_chat(*args, **kwargs):
captured_tools_params.append(kwargs.get("tools"))
try:
return next(chat_responses)
except StopIteration:
return _assistant_content("done")
with patch.object(engine_mod, "run_tool_with_retries", side_effect=fake_tool_runner), \
patch.object(engine_mod, "chat_with_messages", side_effect=fake_chat), \
patch.object(engine_mod, "select_tools", return_value=["stop"]), \
patch.object(
engine_mod,
"extract_search_params_for_memory",
return_value={"keywords": []},
):
engine_mod.run_reply_engine(
db=db,
cfg=mock_config,
tts=None,
text="open youtube",
dialogue_memory=dialogue_memory,
)
# The user-facing `🔧 Discovered N tool(s):` line is the first
# symptom of the bug — if the parser accepts the empty-result
# sentence as a tool name, the log prints it verbatim.
stdout = capsys.readouterr().out
assert "No additional tools found for that description" not in stdout or (
"🔍 No new tools found" in stdout
), (
"Engine's toolSearchTool merger printed the empty-result sentence "
"as a discovered tool name. Expected `🔍 No new tools found` "
"instead. Full stdout:\n" + stdout
)
assert "🔧 Discovered" not in stdout or (
"No additional tools found" not in stdout
), (
"Engine logged `🔧 Discovered ... No additional tools found ...` "
"— the sentence was misclassified as a tool name. Stdout:\n" + stdout
)