Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
This commit is contained in:
275
tests/test_greeting_no_tools.py
Normal file
275
tests/test_greeting_no_tools.py
Normal file
@@ -0,0 +1,275 @@
|
||||
"""
|
||||
Unit tests for greeting and instruction handling in the reply engine.
|
||||
|
||||
Verifies that the model-size-aware prompt system correctly prevents
|
||||
tool calls for greetings and user instructions, while still allowing
|
||||
tools for queries that genuinely require them.
|
||||
|
||||
These tests use a mocked LLM and do not require a real Ollama instance.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _disable_planner():
|
||||
"""These tests verify greeting/instruction routing against a mocked
|
||||
chat LLM. The planner uses its own LLM call (`call_llm_direct`) which
|
||||
is not mocked here, so disable it to keep the test hermetic."""
|
||||
with patch("jarvis.reply.engine.plan_query", return_value=[]):
|
||||
yield
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Data
|
||||
# =============================================================================
|
||||
|
||||
# Greetings in multiple languages - should NOT trigger tools
|
||||
GREETING_TEST_CASES = [
|
||||
pytest.param("hello", False, id="Greeting: hello"),
|
||||
pytest.param("hi there", False, id="Greeting: hi there"),
|
||||
pytest.param("hey", False, id="Greeting: hey"),
|
||||
pytest.param("ni hao", False, id="Greeting: ni hao (Chinese)"),
|
||||
pytest.param("bonjour", False, id="Greeting: bonjour (French)"),
|
||||
pytest.param("hola", False, id="Greeting: hola (Spanish)"),
|
||||
pytest.param("merhaba", False, id="Greeting: merhaba (Turkish)"),
|
||||
pytest.param("ciao", False, id="Greeting: ciao (Italian)"),
|
||||
pytest.param("guten tag", False, id="Greeting: guten tag (German)"),
|
||||
pytest.param("how are you", False, id="Greeting: how are you"),
|
||||
pytest.param("thank you", False, id="Greeting: thank you"),
|
||||
pytest.param("thanks", False, id="Greeting: thanks"),
|
||||
pytest.param("goodbye", False, id="Greeting: goodbye"),
|
||||
pytest.param("good morning", False, id="Greeting: good morning"),
|
||||
pytest.param("good night", False, id="Greeting: good night"),
|
||||
]
|
||||
|
||||
# User instructions about behaviour - should NOT trigger tools
|
||||
USER_INSTRUCTION_TEST_CASES = [
|
||||
pytest.param("always use Celsius when telling me temperatures", False, id="Instruction: use Celsius"),
|
||||
pytest.param("remember to always tell me things in Celsius", False, id="Instruction: remember Celsius"),
|
||||
pytest.param("be more brief in your responses", False, id="Instruction: be more brief"),
|
||||
pytest.param("speak in French from now on", False, id="Instruction: speak in French"),
|
||||
pytest.param("always give me the short version", False, id="Instruction: short version"),
|
||||
pytest.param("don't use emojis in your responses", False, id="Instruction: no emojis"),
|
||||
pytest.param("note that I prefer metric units", False, id="Instruction: prefer metric"),
|
||||
]
|
||||
|
||||
# Queries that SHOULD trigger tools
|
||||
TOOL_REQUIRED_TEST_CASES = [
|
||||
pytest.param("what's the weather", True, id="Tool query: weather"),
|
||||
pytest.param("search for python tutorials", True, id="Tool query: web search"),
|
||||
pytest.param("what's the weather in Tokyo", True, id="Tool query: weather with location"),
|
||||
pytest.param("look up the news today", True, id="Tool query: news search"),
|
||||
pytest.param("what did I eat yesterday", True, id="Tool query: meal recall"),
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Helpers
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class ToolCallCapture:
|
||||
"""Captures tool calls made during a test run."""
|
||||
calls: List[Dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
def record(self, name: str, args: Dict[str, Any]):
|
||||
self.calls.append({"name": name, "args": args})
|
||||
|
||||
def has_any_tool(self) -> bool:
|
||||
return len(self.calls) > 0
|
||||
|
||||
def tool_names(self) -> List[str]:
|
||||
return [c["name"] for c in self.calls]
|
||||
|
||||
|
||||
def _mock_llm_response(content: str, tool_calls=None):
|
||||
"""Build a minimal mock Ollama response dict."""
|
||||
message = {"content": content, "role": "assistant"}
|
||||
if tool_calls:
|
||||
message["tool_calls"] = tool_calls
|
||||
return {"message": message}
|
||||
|
||||
|
||||
def _tool_call(name: str, args: Dict[str, Any]):
|
||||
"""Build a mock tool-call entry in OpenAI format."""
|
||||
return {
|
||||
"id": f"call_{name}_001",
|
||||
"function": {"name": name, "arguments": args},
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests
|
||||
# =============================================================================
|
||||
|
||||
class TestGreetingNoTools:
|
||||
"""
|
||||
Verifies that the model-size-aware prompt system does not trigger tool
|
||||
calls for greetings or user instructions when using a mocked LLM.
|
||||
"""
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("query,should_use_tools", GREETING_TEST_CASES + USER_INSTRUCTION_TEST_CASES)
|
||||
def test_greeting_no_tool_calls(
|
||||
self,
|
||||
query: str,
|
||||
should_use_tools: bool,
|
||||
mock_config,
|
||||
db,
|
||||
dialogue_memory,
|
||||
):
|
||||
"""Greetings and user instructions should not trigger tool calls."""
|
||||
from jarvis.reply.engine import run_reply_engine
|
||||
|
||||
mock_config.ollama_chat_model = "gemma4:e2b"
|
||||
capture = ToolCallCapture()
|
||||
|
||||
def mock_tool_run(db, cfg, tool_name, tool_args, **kwargs): # noqa: F841 (shadows fixture)
|
||||
from jarvis.tools.types import ToolExecutionResult
|
||||
capture.record(tool_name, tool_args or {})
|
||||
return ToolExecutionResult(success=True, reply_text="Tool result")
|
||||
|
||||
def mock_chat(base_url, chat_model, messages, timeout_sec, extra_options=None, tools=None, thinking=False):
|
||||
return _mock_llm_response("Hello! How can I help you today?")
|
||||
|
||||
with patch('jarvis.reply.engine.run_tool_with_retries', side_effect=mock_tool_run), \
|
||||
patch('jarvis.reply.engine.chat_with_messages', side_effect=mock_chat), \
|
||||
patch('jarvis.reply.engine.extract_search_params_for_memory', return_value={"keywords": []}):
|
||||
|
||||
run_reply_engine(
|
||||
db=db, cfg=mock_config, tts=None,
|
||||
text=query, dialogue_memory=dialogue_memory,
|
||||
)
|
||||
|
||||
assert not capture.has_any_tool(), \
|
||||
f"Greeting '{query}' should NOT trigger tools. Called: {capture.tool_names()}"
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("query,should_use_tools", TOOL_REQUIRED_TEST_CASES)
|
||||
def test_tool_queries_still_work(
|
||||
self,
|
||||
query: str,
|
||||
should_use_tools: bool,
|
||||
mock_config,
|
||||
db,
|
||||
dialogue_memory,
|
||||
):
|
||||
"""Queries that require tools should still trigger them."""
|
||||
from jarvis.reply.engine import run_reply_engine
|
||||
|
||||
mock_config.ollama_chat_model = "gemma4:e2b"
|
||||
capture = ToolCallCapture()
|
||||
|
||||
def mock_tool_run(db, cfg, tool_name, tool_args, **kwargs): # noqa: F841 (shadows fixture)
|
||||
from jarvis.tools.types import ToolExecutionResult
|
||||
capture.record(tool_name, tool_args or {})
|
||||
return ToolExecutionResult(success=True, reply_text="Weather: 20C sunny")
|
||||
|
||||
call_count = 0
|
||||
|
||||
def mock_chat(base_url, chat_model, messages, timeout_sec, extra_options=None, tools=None, thinking=False):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
if "weather" in query.lower():
|
||||
return _mock_llm_response("", [_tool_call("getWeather", {"location": "here"})])
|
||||
elif "search" in query.lower() or "look up" in query.lower() or "news" in query.lower():
|
||||
return _mock_llm_response("", [_tool_call("webSearch", {"search_query": query})])
|
||||
elif "eat" in query.lower() or "meal" in query.lower():
|
||||
return _mock_llm_response("", [_tool_call("fetchMeals", {})])
|
||||
return _mock_llm_response("Here's the information you requested.")
|
||||
|
||||
with patch('jarvis.reply.engine.run_tool_with_retries', side_effect=mock_tool_run), \
|
||||
patch('jarvis.reply.engine.chat_with_messages', side_effect=mock_chat), \
|
||||
patch('jarvis.reply.engine.extract_search_params_for_memory', return_value={"keywords": []}), \
|
||||
patch('jarvis.reply.engine.select_tools',
|
||||
return_value=["webSearch", "getWeather", "fetchMeals", "stop"]):
|
||||
|
||||
response = run_reply_engine(
|
||||
db=db, cfg=mock_config, tts=None,
|
||||
text=query, dialogue_memory=dialogue_memory,
|
||||
)
|
||||
|
||||
assert capture.has_any_tool(), \
|
||||
f"Query '{query}' SHOULD trigger tools but didn't. Response: {response}"
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_thinking_only_response_continues_loop(
|
||||
self,
|
||||
mock_config,
|
||||
db,
|
||||
dialogue_memory,
|
||||
):
|
||||
"""A thinking-only response (no content, no tool call) should continue the loop, not break it."""
|
||||
from jarvis.reply.engine import run_reply_engine
|
||||
|
||||
mock_config.ollama_chat_model = "gemma4:12b"
|
||||
call_count = 0
|
||||
|
||||
def mock_chat(base_url, chat_model, messages, timeout_sec, extra_options=None, tools=None, thinking=False):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
# First turn: thinking only, no content, no tool call
|
||||
return {"message": {"content": "", "role": "assistant", "thinking": "Let me think about this..."}}
|
||||
# Second turn: actual response
|
||||
return _mock_llm_response("The answer is 42.")
|
||||
|
||||
with patch('jarvis.reply.engine.chat_with_messages', side_effect=mock_chat), \
|
||||
patch('jarvis.reply.engine.extract_search_params_for_memory', return_value={"keywords": []}):
|
||||
|
||||
response = run_reply_engine(
|
||||
db=db, cfg=mock_config, tts=None,
|
||||
text="what is the meaning of life",
|
||||
dialogue_memory=dialogue_memory,
|
||||
)
|
||||
|
||||
assert call_count == 2, f"Expected 2 LLM calls (thinking + response), got {call_count}"
|
||||
assert response is not None
|
||||
assert "42" in response
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_all_tools_available_regardless_of_profile(
|
||||
self,
|
||||
mock_config,
|
||||
db,
|
||||
dialogue_memory,
|
||||
):
|
||||
"""All builtin tools should be available regardless of which profile is selected."""
|
||||
from jarvis.reply.engine import run_reply_engine
|
||||
|
||||
mock_config.ollama_chat_model = "gemma4:e2b"
|
||||
capture = ToolCallCapture()
|
||||
|
||||
def mock_tool_run(db, cfg, tool_name, tool_args, **kwargs):
|
||||
from jarvis.tools.types import ToolExecutionResult
|
||||
capture.record(tool_name, tool_args or {})
|
||||
return ToolExecutionResult(success=True, reply_text="Logged: pizza")
|
||||
|
||||
call_count = 0
|
||||
|
||||
def mock_chat(base_url, chat_model, messages, timeout_sec, extra_options=None, tools=None, thinking=False):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
return _mock_llm_response("", [_tool_call("logMeal", {"description": "pizza"})])
|
||||
return _mock_llm_response("Logged your meal!")
|
||||
|
||||
# logMeal was previously restricted to "life" profile only — now all tools are always available
|
||||
with patch('jarvis.reply.engine.run_tool_with_retries', side_effect=mock_tool_run), \
|
||||
patch('jarvis.reply.engine.chat_with_messages', side_effect=mock_chat), \
|
||||
patch('jarvis.reply.engine.extract_search_params_for_memory', return_value={"keywords": []}):
|
||||
|
||||
run_reply_engine(
|
||||
db=db, cfg=mock_config, tts=None,
|
||||
text="log that I had pizza for lunch",
|
||||
dialogue_memory=dialogue_memory,
|
||||
)
|
||||
|
||||
assert capture.has_any_tool(), "logMeal should always be callable"
|
||||
assert "logMeal" in capture.tool_names()
|
||||
Reference in New Issue
Block a user