Files
javis_bot/tests/test_dialogue_memory_tool_carryover.py
javis-bot c4abf63f38
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Transform isair/jarvis into a Discord-controlled voice assistant running on
the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact.

- bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral),
  voice channel join + voice receive/playback, pluggable VNC screen broadcast
  (selfbot live / noVNC / screenshot)
- bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS
  behind a thin localhost HTTP API
- .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite,
  docs/language-comparison.md and docs/vnc-xfce-setup.md

Language decision: hybrid (Python brain + Node/bun Discord layer) because
Discord blocks bot video; native screen broadcast only works via a Node
selfbot library.
2026-06-09 14:51:05 +09:00

283 lines
11 KiB
Python

"""Tests for DialogueMemory tool-message carryover across turns.
Behaviour under test: within the hot-window (RECENT_WINDOW_SEC), tool-call
and tool-result messages generated during one reply must be retrievable as
part of the next reply's initial messages, so follow-up turns can reuse the
prior tool output instead of re-fetching.
"""
import time
import pytest
from src.jarvis.memory.conversation import DialogueMemory
@pytest.mark.unit
class TestToolCarryover:
def test_record_tool_turn_stores_messages(self):
dm = DialogueMemory()
dm.add_message("user", "who is justin bieber")
dm.record_tool_turn([
{
"role": "assistant",
"content": "",
"tool_calls": [
{"id": "call_1", "type": "function",
"function": {"name": "webSearch",
"arguments": {"query": "justin bieber"}}}
],
},
{"role": "tool", "tool_call_id": "call_1",
"content": "Justin Bieber is a Canadian singer..."},
])
dm.add_message("assistant", "He is a Canadian singer.")
out = dm.get_recent_turns_with_tools()
roles = [m.get("role") for m in out]
# Order: user, assistant-with-tool_calls, tool, assistant
assert roles == ["user", "assistant", "tool", "assistant"]
assert out[1].get("tool_calls")
assert out[2].get("tool_call_id") == "call_1"
assert "Canadian singer" in out[2]["content"]
def test_carryover_survives_second_add_message(self):
"""Tool rows must interleave at the correct timestamps between text messages."""
dm = DialogueMemory()
dm.add_message("user", "q1")
dm.record_tool_turn([
{"role": "assistant", "content": "",
"tool_calls": [{"id": "c1", "type": "function",
"function": {"name": "webSearch",
"arguments": {"query": "q1"}}}]},
{"role": "tool", "tool_call_id": "c1", "content": "r1"},
])
dm.add_message("assistant", "a1")
time.sleep(0.005)
dm.add_message("user", "q2")
out = dm.get_recent_turns_with_tools()
roles = [m.get("role") for m in out]
assert roles == ["user", "assistant", "tool", "assistant", "user"]
def test_truncates_large_tool_content(self):
dm = DialogueMemory()
huge = "x" * 5000
dm.add_message("user", "q")
dm.record_tool_turn([
{"role": "assistant", "content": "",
"tool_calls": [{"id": "c1", "type": "function",
"function": {"name": "webSearch",
"arguments": {"query": "q"}}}]},
{"role": "tool", "tool_call_id": "c1", "content": huge},
])
out = dm.get_recent_turns_with_tools(per_entry_chars=1200)
tool_msg = next(m for m in out if m.get("role") == "tool")
assert len(tool_msg["content"]) <= 1201 # 1200 + ellipsis char
def test_caps_to_max_tool_turns(self):
dm = DialogueMemory()
for i in range(4):
dm.add_message("user", f"q{i}")
dm.record_tool_turn([
{"role": "assistant", "content": "",
"tool_calls": [{"id": f"c{i}", "type": "function",
"function": {"name": "webSearch",
"arguments": {"q": f"q{i}"}}}]},
{"role": "tool", "tool_call_id": f"c{i}", "content": f"r{i}"},
])
dm.add_message("assistant", f"a{i}")
out = dm.get_recent_turns_with_tools(max_tool_turns=2)
tool_contents = [m["content"] for m in out if m.get("role") == "tool"]
# Only the most recent 2 tool turns survive
assert tool_contents == ["r2", "r3"]
def test_clear_tool_carryover_drops_tool_msgs_only(self):
dm = DialogueMemory()
dm.add_message("user", "q")
dm.record_tool_turn([
{"role": "assistant", "content": "",
"tool_calls": [{"id": "c1", "type": "function",
"function": {"name": "webSearch",
"arguments": {"q": "x"}}}]},
{"role": "tool", "tool_call_id": "c1", "content": "r"},
])
dm.add_message("assistant", "a")
dm.clear_tool_carryover()
out = dm.get_recent_turns_with_tools()
roles = [m.get("role") for m in out]
# Tool rows gone, but user/assistant prose preserved
assert roles == ["user", "assistant"]
def test_tool_turns_survive_past_recent_window_age(self):
"""Tool carryover is conversation-scoped, not RECENT_WINDOW_SEC-
bounded. An ongoing conversation must keep prior tool results
visible regardless of how long ago each tool fired; the engine
clears them on new-conversation entry and on ``stop``.
"""
dm = DialogueMemory(inactivity_timeout=300.0)
dm.add_message("user", "q")
dm.record_tool_turn([
{"role": "assistant", "content": "",
"tool_calls": [{"id": "c1", "type": "function",
"function": {"name": "webSearch",
"arguments": {"q": "x"}}}]},
{"role": "tool", "tool_call_id": "c1", "content": "r"},
])
# Even when we backdate the tool-turn timestamp past the window,
# the carryover survives until explicitly cleared.
with dm._lock:
dm._tool_turns = [
(ts - (dm.RECENT_WINDOW_SEC + 10), msgs)
for ts, msgs in dm._tool_turns
]
out = dm.get_recent_turns_with_tools()
assert any(m.get("role") == "tool" for m in out), (
"tool carryover must persist beyond RECENT_WINDOW_SEC age"
)
dm.clear_tool_carryover()
out_after_clear = dm.get_recent_turns_with_tools()
assert not any(m.get("role") == "tool" for m in out_after_clear)
def test_tool_call_arguments_are_scrubbed(self):
"""Native tool-call arguments can carry secrets too (e.g. an
email or token in the search query). They must be scrubbed
on record so re-injection on the next turn cannot leak them.
"""
dm = DialogueMemory()
dm.record_tool_turn([
{
"role": "assistant",
"content": "",
"tool_calls": [{
"id": "c1",
"type": "function",
"function": {
"name": "webSearch",
"arguments": {
"query": "look up alice@example.com please",
},
},
}],
},
{"role": "tool", "tool_call_id": "c1", "content": "ok"},
])
stored_call = dm._tool_turns[0][1][0]["tool_calls"][0]
stored_args = stored_call["function"]["arguments"]
assert "alice@example.com" not in stored_args["query"]
assert "[REDACTED_EMAIL]" in stored_args["query"]
def test_tool_call_arguments_list_form_is_scrubbed(self):
"""Some providers / custom tools pass arguments as a list of
scalars or dicts. Each element must be scrubbed too — otherwise
a positional secret slips through.
"""
dm = DialogueMemory()
dm.record_tool_turn([
{
"role": "assistant",
"content": "",
"tool_calls": [{
"id": "c1",
"type": "function",
"function": {
"name": "lookup",
"arguments": [
"alice@example.com",
{"note": "ping bob@example.com"},
],
},
}],
},
{"role": "tool", "tool_call_id": "c1", "content": "ok"},
])
stored = dm._tool_turns[0][1][0]["tool_calls"][0]["function"]["arguments"]
flat = repr(stored)
assert "alice@example.com" not in flat
assert "bob@example.com" not in flat
assert flat.count("[REDACTED_EMAIL]") >= 2
def test_tool_call_arguments_string_form_is_scrubbed(self):
"""Some providers serialise arguments as a JSON string, not a dict."""
dm = DialogueMemory()
dm.record_tool_turn([
{
"role": "assistant",
"content": "",
"tool_calls": [{
"id": "c1",
"type": "function",
"function": {
"name": "webSearch",
"arguments": '{"query": "alice@example.com"}',
},
}],
},
{"role": "tool", "tool_call_id": "c1", "content": "ok"},
])
stored_args = dm._tool_turns[0][1][0]["tool_calls"][0]["function"]["arguments"]
assert "alice@example.com" not in stored_args
assert "[REDACTED_EMAIL]" in stored_args
def test_tool_payloads_are_scrubbed_of_secrets(self):
"""Tool results may contain emails, API tokens, JWTs. record_tool_turn
must scrub those before persisting so follow-up injection can't leak.
"""
dm = DialogueMemory()
dm.add_message("user", "look up the api")
dirty = (
"Contact: alice@example.com\n"
"Bearer token: eyJhbGciOiJIUzI1NiJ9.abc.def\n"
"Fine content stays."
)
dm.record_tool_turn([
{"role": "tool", "tool_call_id": "c1", "content": dirty},
])
stored = dm._tool_turns[0][1][0]["content"]
assert "alice@example.com" not in stored
assert "[REDACTED_EMAIL]" in stored
assert "eyJhbGciOiJIUzI1NiJ9" not in stored
assert "Fine content stays." in stored
def test_truncation_preserves_untrusted_fence_end_marker(self):
"""When a tool result carrying an UNTRUSTED WEB EXTRACT fence is
truncated, the closing marker must be re-appended so the downstream
prompt-injection defence fence stays intact.
"""
dm = DialogueMemory()
dm.add_message("user", "q")
begin = "<<<BEGIN UNTRUSTED WEB EXTRACT>>>"
end = "<<<END UNTRUSTED WEB EXTRACT>>>"
payload = (
"Search result:\n" + begin + "\n" + ("x" * 5000) + "\n" + end
)
dm.record_tool_turn([
{"role": "tool", "tool_call_id": "c1", "content": payload},
])
out = dm.get_recent_turns_with_tools(per_entry_chars=500)
tool_msg = next(m for m in out if m.get("role") == "tool")
assert begin in tool_msg["content"]
assert end in tool_msg["content"], (
"closing fence marker must survive truncation"
)
def test_get_pending_chunks_excludes_tool_rows(self):
"""Tool messages must not pollute the diary summariser input."""
dm = DialogueMemory()
dm.add_message("user", "q")
dm.record_tool_turn([
{"role": "tool", "tool_call_id": "c1",
"content": "raw web extract with secrets"},
])
dm.add_message("assistant", "a")
chunks = dm.get_pending_chunks()
joined = " | ".join(chunks)
assert "raw web extract" not in joined
assert "User: q" in joined
assert "Assistant: a" in joined