Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled

Transform isair/jarvis into a Discord-controlled voice assistant running on
the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact.

- bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral),
  voice channel join + voice receive/playback, pluggable VNC screen broadcast
  (selfbot live / noVNC / screenshot)
- bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS
  behind a thin localhost HTTP API
- .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite,
  docs/language-comparison.md and docs/vnc-xfce-setup.md

Language decision: hybrid (Python brain + Node/bun Discord layer) because
Discord blocks bot video; native screen broadcast only works via a Node
selfbot library.
This commit is contained in:
javis-bot
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions

View File

@@ -0,0 +1,137 @@
"""
End-to-end eval — single-turn flow where the user's location lives in the
User branch of the knowledge graph (warm profile). The warm profile is
always-loaded into the system prompt, so the chat model and planner can
ground ``getWeather`` on it without a ``searchMemory`` step.
This stresses the warm-profile-injection path. It complements:
- ``evals/test_followup_supplies_missing_tool_arg.py`` (hot-window
carry-over, two-turn).
- ``evals/test_diary_supplies_missing_tool_arg.py`` (diary recall via
planner-emitted ``searchMemory``).
Run: EVAL_JUDGE_MODEL=gemma4:e2b ./scripts/run_evals.sh graph_supplies_missing_tool_arg
"""
from unittest.mock import patch
import pytest
from conftest import requires_judge_llm
from helpers import (
ToolCallCapture,
assert_not_fallback_reply,
JUDGE_MODEL,
)
_EDINBURGH_FORECAST = (
"Weather for Edinburgh, UK:\n"
"Today: 11°C, partly cloudy. High 13°C, low 7°C.\n"
"Tomorrow: 12°C, light rain, high 14°C, low 8°C."
)
def _make_runner(capture: ToolCallCapture):
from jarvis.tools.types import ToolExecutionResult
def _runner(db, cfg, tool_name, tool_args, **kwargs):
capture.record(tool_name, tool_args or {})
if tool_name == "getWeather":
location = ((tool_args or {}).get("location") or "").strip()
if not location:
return ToolExecutionResult(
success=False,
reply_text=(
"I couldn't auto-detect your location. Please "
"tell me which city to check the weather for."
),
)
return ToolExecutionResult(
success=True,
reply_text=_EDINBURGH_FORECAST,
)
return ToolExecutionResult(success=True, reply_text="OK")
return _runner
@pytest.mark.eval
@requires_judge_llm
class TestGraphSuppliesMissingToolArg:
"""Warm-profile injection path: a User-branch fact ("lives in
Edinburgh") is always loaded into the system prompt, so the chat
model can supply it as the location argument without an extra
memory search."""
def test_warm_profile_user_fact_grounds_get_weather_call(
self, mock_config, eval_db, eval_dialogue_memory,
):
from jarvis.reply.engine import run_reply_engine
mock_config.ollama_base_url = "http://localhost:11434"
mock_config.ollama_chat_model = JUDGE_MODEL
# Geoip disabled — the only way the model gets a location is from
# the warm profile loaded out of the graph.
mock_config.location_enabled = False
capture = ToolCallCapture()
# Inject a User-branch fact directly into the warm-profile builder
# rather than seeding the SQLite-backed graph store. The warm-
# profile path the engine relies on is `build_warm_profile` →
# `format_warm_profile_block`; seeding via the public API replays
# the production shape without depending on graph-mutation
# listeners or branch-root bootstrapping in the test DB.
warm_profile = {
"user": "The user lives in Edinburgh.",
"directives": "",
}
with patch(
"jarvis.memory.graph_ops.build_warm_profile",
return_value=warm_profile,
), patch(
"jarvis.reply.engine.run_tool_with_retries",
side_effect=_make_runner(capture),
):
response = run_reply_engine(
db=eval_db, cfg=mock_config, tts=None,
text="how's the weather, Jarvis?",
dialogue_memory=eval_dialogue_memory,
)
print(f"\n Graph Supplies Missing Tool Arg ({JUDGE_MODEL}):")
print(f" Tools called: {capture.tool_names()}")
for c in capture.calls:
print(f" - {c['name']}({c['args']})")
print(f" Response: {(response or '')[:300]}")
assert_not_fallback_reply(response, context="warm-profile")
weather_calls = [c for c in capture.calls if c["name"] == "getWeather"]
edinburgh_calls = [
c for c in weather_calls
if "edinburgh" in (c["args"].get("location") or "").lower()
]
assert edinburgh_calls, (
"getWeather was not invoked with location='Edinburgh' even "
"though the warm profile names Edinburgh as the user's home. "
"The chat model must use always-loaded user facts as tool "
"arguments without an explicit prompt to do so. "
f"All getWeather calls: {[c['args'] for c in weather_calls]}. "
f"Tools observed: {capture.tool_names()}. "
f"Response: {(response or '')[:400]}"
)
response_lower = (response or "").lower()
assert "edinburgh" in response_lower, (
"Reply does not mention Edinburgh despite the warm profile "
f"naming it as the user's location. Response: {(response or '')[:400]}"
)
assert "hackney" not in response_lower, (
"Reply mentions Hackney — the warm profile clearly states "
"Edinburgh, and geoip is disabled in this test. The model "
f"leaked a hardcoded default. Response: {(response or '')[:400]}"
)