Add Discord-native hybrid front-end for Jarvis (bot + bridge)

Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions
--- a/tests/test_engine_tool_carryover_guard.py
+++ b/tests/test_engine_tool_carryover_guard.py
@@ -0,0 +1,563 @@
+"""Engine-level tool carry-over guard.
+
+Field trace (2026-05-03, gemma4:e2b):
+  Turn 1 user: "how's the weather tomorrow Jarvis?" → no location set →
+    assistant invokes ``getWeather``, tool returns ``success=False``
+    ("I couldn't auto-detect your location, please tell me a city"),
+    assistant relays the request.
+  Turn 2 user: "I'm in London" → small-model router picks ``webSearch``
+    instead of ``getWeather``, planner falls back to a web search for
+    "weather in london tomorrow", DDG fails, Wikipedia matches the 2014
+    film "Edge of Tomorrow", and the assistant parrots the film summary
+    as the weather answer.
+
+Fix: when the previous assistant turn invoked a tool that reported
+``success=False`` on its ``ToolExecutionResult``, union the previous
+turn's tool name into the allow-list. The ``tool_failed`` flag stamped
+onto each recorded tool result is the truth source. Gating on failure
+(rather than recency or query length) means a successful chain followed
+by a genuine new short ask ("play some music") correctly does NOT carry
+over the prior tool.
+
+The carry-over is an engine-side per-turn overlay: the router cache
+stores only the raw router output, so future identical queries are
+unaffected.
+"""
+
+from unittest.mock import Mock, patch
+
+import pytest
+
+from src.jarvis.memory.conversation import DialogueMemory
+from src.jarvis.reply.engine import run_reply_engine
+
+
+def _mock_cfg():
+    cfg = Mock()
+    cfg.ollama_base_url = "http://localhost:11434"
+    cfg.ollama_chat_model = "test-large"
+    cfg.voice_debug = False
+    cfg.llm_tools_timeout_sec = 8.0
+    cfg.llm_embed_timeout_sec = 10.0
+    cfg.llm_chat_timeout_sec = 45.0
+    cfg.llm_digest_timeout_sec = 8.0
+    cfg.memory_enrichment_max_results = 5
+    cfg.memory_enrichment_source = "diary"
+    cfg.memory_digest_enabled = False
+    cfg.tool_result_digest_enabled = False
+    cfg.location_ip_address = None
+    cfg.location_auto_detect = False
+    cfg.location_enabled = False
+    cfg.agentic_max_turns = 8
+    cfg.tool_search_max_calls = 3
+    cfg.tool_selection_strategy = "all"
+    cfg.tool_carryover_max_turns = 2
+    cfg.tool_carryover_per_entry_chars = 1200
+    cfg.mcps = {}
+    cfg.llm_thinking_enabled = False
+    cfg.tts_engine = "none"
+    cfg.ollama_embed_model = "test-embed"
+    cfg.db_path = ":memory:"
+    return cfg
+
+
+def _tool_names_from_chat_call(call) -> set[str]:
+    """Pull function names out of the OpenAI-style tools schema passed
+    to chat_with_messages.
+    """
+    schema = call.kwargs.get("tools") or []
+    names: set[str] = set()
+    for entry in schema:
+        if not isinstance(entry, dict):
+            continue
+        fn = entry.get("function") or {}
+        nm = fn.get("name")
+        if isinstance(nm, str):
+            names.add(nm)
+    return names
+
+
+def _failed_tool_turn(tool_name: str, tool_call_id: str = "c1") -> list[dict]:
+    """Plant a previous-turn tool turn where the tool was invoked and
+    reported failure. Mirrors the message shape the engine records for a
+    native tool call whose ``ToolExecutionResult.success`` was False.
+    """
+    return [
+        {"role": "assistant", "content": "", "tool_calls": [{
+            "id": tool_call_id, "type": "function",
+            "function": {"name": tool_name, "arguments": {}},
+        }]},
+        {"role": "tool", "tool_call_id": tool_call_id,
+         "tool_name": tool_name,
+         "content": "I couldn't auto-detect your location.",
+         "tool_failed": True},
+    ]
+
+
+def _succeeded_tool_turn(tool_name: str, tool_call_id: str = "c1") -> list[dict]:
+    """Plant a previous-turn tool turn where the tool succeeded."""
+    return [
+        {"role": "assistant", "content": "", "tool_calls": [{
+            "id": tool_call_id, "type": "function",
+            "function": {"name": tool_name, "arguments": {"location": "London"}},
+        }]},
+        {"role": "tool", "tool_call_id": tool_call_id,
+         "tool_name": tool_name,
+         "content": "London: 15°C and partly cloudy.",
+         "tool_failed": False},
+    ]
+
+
+def _failed_text_tool_turn(tool_name: str) -> list[dict]:
+    """Plant a previous-turn tool turn in the text-tool fallback shape
+    (small models). Tool error is appended as a ``role=user`` message
+    tagged with both ``tool_name`` and ``tool_failed=True``.
+    """
+    return [
+        {"role": "assistant",
+         "content": (
+             "```tool_call\n"
+             '{"name": "' + tool_name + '", "arguments": {}}\n'
+             "```"
+         )},
+        {"role": "user",
+         "content": (
+             "[Tool error: " + tool_name + "] I couldn't auto-detect "
+             "your location."
+         ),
+         "tool_name": tool_name,
+         "tool_failed": True},
+    ]
+
+
+@pytest.mark.unit
+@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
+@patch("src.jarvis.memory.graph_ops.build_warm_profile",
+       return_value={"user": "", "directives": ""})
+@patch("src.jarvis.memory.graph.GraphMemoryStore")
+@patch("src.jarvis.reply.engine.plan_query", return_value=[])
+@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
+@patch("src.jarvis.reply.engine.extract_text_from_response")
+@patch("src.jarvis.reply.engine.chat_with_messages")
+def test_followup_carries_over_failed_previous_tool(
+    mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
+    _mock_graph, _mock_warm, _mock_fmt,
+):
+    """Previous turn invoked ``getWeather`` and the tool reported failure;
+    this turn's router only picked ``webSearch``. The engine must union
+    ``getWeather`` back in so the chat model can re-call it with the
+    location the user just supplied.
+    """
+    mock_chat.side_effect = [
+        {"message": {"content": "Weather in London is 15°C."}},
+    ]
+    mock_extract.side_effect = ["Weather in London is 15°C."]
+
+    db = Mock()
+    cfg = _mock_cfg()
+    dm = DialogueMemory()
+    dm.add_message("user", "how's the weather tomorrow Jarvis")
+    dm.record_tool_turn(_failed_tool_turn("getWeather"))
+    dm.add_message("assistant", "I do not have a location set.")
+
+    with patch(
+        "src.jarvis.reply.engine.select_tools",
+        return_value=["webSearch"],
+    ):
+        run_reply_engine(db=db, cfg=cfg, tts=None,
+                         text="I'm in London", dialogue_memory=dm)
+
+    tool_names = _tool_names_from_chat_call(mock_chat.call_args_list[-1])
+    assert "webSearch" in tool_names, (
+        f"router pick must remain visible; saw {sorted(tool_names)}"
+    )
+    assert "getWeather" in tool_names, (
+        "previous-turn failed tool must be carried over; "
+        f"saw {sorted(tool_names)}"
+    )
+
+
+@pytest.mark.unit
+@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
+@patch("src.jarvis.memory.graph_ops.build_warm_profile",
+       return_value={"user": "", "directives": ""})
+@patch("src.jarvis.memory.graph.GraphMemoryStore")
+@patch("src.jarvis.reply.engine.plan_query", return_value=[])
+@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
+@patch("src.jarvis.reply.engine.extract_text_from_response")
+@patch("src.jarvis.reply.engine.chat_with_messages")
+def test_successful_previous_tool_does_not_trigger_carryover(
+    mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
+    _mock_graph, _mock_warm, _mock_fmt,
+):
+    """A successful prior tool call means the chain completed. A genuine
+    new short ask ("log my breakfast") must NOT inherit the prior tool —
+    that would noisily widen the allow-list for unrelated turns and
+    risks small models replaying the previous tool. The router pick
+    stands on its own.
+    """
+    mock_chat.side_effect = [
+        {"message": {"content": "Logged."}},
+    ]
+    mock_extract.side_effect = ["Logged."]
+
+    db = Mock()
+    cfg = _mock_cfg()
+    dm = DialogueMemory()
+    dm.add_message("user", "how's the weather in London")
+    dm.record_tool_turn(_succeeded_tool_turn("getWeather"))
+    dm.add_message("assistant", "It's 15°C and partly cloudy in London.")
+
+    with patch(
+        "src.jarvis.reply.engine.select_tools",
+        return_value=["logMeal"],
+    ):
+        run_reply_engine(db=db, cfg=cfg, tts=None,
+                         text="log my breakfast", dialogue_memory=dm)
+
+    tool_names = _tool_names_from_chat_call(mock_chat.call_args_list[-1])
+    assert "logMeal" in tool_names
+    assert "getWeather" not in tool_names, (
+        "successful prior tool must not be carried over; "
+        f"saw {sorted(tool_names)}"
+    )
+
+
+@pytest.mark.unit
+@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
+@patch("src.jarvis.memory.graph_ops.build_warm_profile",
+       return_value={"user": "", "directives": ""})
+@patch("src.jarvis.memory.graph.GraphMemoryStore")
+@patch("src.jarvis.reply.engine.plan_query", return_value=[])
+@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
+@patch("src.jarvis.reply.engine.extract_text_from_response")
+@patch("src.jarvis.reply.engine.chat_with_messages")
+def test_cold_start_does_not_trigger_carryover(
+    mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
+    _mock_graph, _mock_warm, _mock_fmt,
+):
+    """Empty dialogue memory — the carry-over path must be a no-op."""
+    mock_chat.side_effect = [
+        {"message": {"content": "Hello."}},
+    ]
+    mock_extract.side_effect = ["Hello."]
+
+    db = Mock()
+    cfg = _mock_cfg()
+    dm = DialogueMemory()  # cold start — no prior turns
+
+    with patch(
+        "src.jarvis.reply.engine.select_tools",
+        return_value=["webSearch"],
+    ):
+        run_reply_engine(db=db, cfg=cfg, tts=None,
+                         text="hi", dialogue_memory=dm)
+
+    tool_names = _tool_names_from_chat_call(mock_chat.call_args_list[-1])
+    assert "webSearch" in tool_names
+    assert "getWeather" not in tool_names
+
+
+@pytest.mark.unit
+@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
+@patch("src.jarvis.memory.graph_ops.build_warm_profile",
+       return_value={"user": "", "directives": ""})
+@patch("src.jarvis.memory.graph.GraphMemoryStore")
+@patch("src.jarvis.reply.engine.plan_query", return_value=[])
+@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
+@patch("src.jarvis.reply.engine.extract_text_from_response")
+@patch("src.jarvis.reply.engine.chat_with_messages")
+def test_carryover_does_not_pollute_router_cache(
+    mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
+    _mock_graph, _mock_warm, _mock_fmt,
+):
+    """The router cache stores the raw router output. Carry-over is a
+    per-turn overlay layered on top — it must NOT be written back to the
+    cache, otherwise every replay of the same query inherits a
+    contaminated tool list.
+    """
+    mock_chat.side_effect = [
+        {"message": {"content": "Weather in London is 15°C."}},
+    ]
+    mock_extract.side_effect = ["Weather in London is 15°C."]
+
+    db = Mock()
+    cfg = _mock_cfg()
+    dm = DialogueMemory()
+    dm.add_message("user", "how's the weather tomorrow Jarvis")
+    dm.record_tool_turn(_failed_tool_turn("getWeather"))
+    dm.add_message("assistant", "I do not have a location set.")
+
+    with patch(
+        "src.jarvis.reply.engine.select_tools",
+        return_value=["webSearch"],
+    ):
+        run_reply_engine(db=db, cfg=cfg, tts=None,
+                         text="I'm in London", dialogue_memory=dm)
+
+    cached_router_entries = [
+        (k, v) for k, v in dm._hot_cache.items() if k.startswith("router:")
+    ]
+    assert cached_router_entries, "router output should have been cached"
+    for key, (_ts, value) in cached_router_entries:
+        assert value == ["webSearch"], (
+            f"router cache for {key!r} should hold raw router output "
+            f"['webSearch']; got {value!r}"
+        )
+
+
+@pytest.mark.unit
+@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
+@patch("src.jarvis.memory.graph_ops.build_warm_profile",
+       return_value={"user": "", "directives": ""})
+@patch("src.jarvis.memory.graph.GraphMemoryStore")
+@patch("src.jarvis.reply.engine.plan_query", return_value=[])
+@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
+@patch("src.jarvis.reply.engine.extract_text_from_response")
+@patch("src.jarvis.reply.engine.chat_with_messages")
+def test_long_followup_still_carries_over_when_previous_failed(
+    mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
+    _mock_graph, _mock_warm, _mock_fmt,
+):
+    """Failure-gated carry-over does NOT depend on query length. A long
+    follow-up that supplies the missing arg ("Right, sorry — I'm in
+    Edinburgh, please try the lookup again for tomorrow") must still
+    carry over the failed tool. The earlier char-length heuristic was
+    dropped because it false-negatived this shape; the failure flag is
+    the right signal.
+    """
+    mock_chat.side_effect = [
+        {"message": {"content": "Edinburgh weather: 12°C."}},
+    ]
+    mock_extract.side_effect = ["Edinburgh weather: 12°C."]
+
+    db = Mock()
+    cfg = _mock_cfg()
+    dm = DialogueMemory()
+    dm.add_message("user", "how's the weather tomorrow Jarvis")
+    dm.record_tool_turn(_failed_tool_turn("getWeather"))
+    dm.add_message("assistant", "I do not have a location set.")
+
+    long_followup = (
+        "Right, sorry — I'm in Edinburgh, please try the lookup again for "
+        "tomorrow morning if you would."
+    )
+    assert len(long_followup) > 80
+
+    with patch(
+        "src.jarvis.reply.engine.select_tools",
+        return_value=["webSearch"],
+    ):
+        run_reply_engine(db=db, cfg=cfg, tts=None,
+                         text=long_followup, dialogue_memory=dm)
+
+    tool_names = _tool_names_from_chat_call(mock_chat.call_args_list[-1])
+    assert "getWeather" in tool_names, (
+        f"long follow-up to a failed tool must still carry over; "
+        f"saw {sorted(tool_names)}"
+    )
+
+
+@pytest.mark.unit
+@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
+@patch("src.jarvis.memory.graph_ops.build_warm_profile",
+       return_value={"user": "", "directives": ""})
+@patch("src.jarvis.memory.graph.GraphMemoryStore")
+@patch("src.jarvis.reply.engine.plan_query", return_value=[])
+@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
+@patch("src.jarvis.reply.engine.extract_text_from_response")
+@patch("src.jarvis.reply.engine.chat_with_messages")
+def test_text_tool_fallback_failure_carries_over(
+    mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
+    _mock_graph, _mock_warm, _mock_fmt,
+):
+    """Small-model path: the previous turn's tool error was stored as a
+    ``role=user`` message tagged with ``tool_name`` and
+    ``tool_failed=True``. The walker must collect the name from this
+    shape too, not only from native ``assistant.tool_calls`` + ``role=tool``
+    pairs.
+    """
+    mock_chat.side_effect = [
+        {"message": {"content": "Weather in Berlin is 9°C."}},
+    ]
+    mock_extract.side_effect = ["Weather in Berlin is 9°C."]
+
+    db = Mock()
+    cfg = _mock_cfg()
+    dm = DialogueMemory()
+    dm.add_message("user", "how's the weather")
+    dm.record_tool_turn(_failed_text_tool_turn("getWeather"))
+    dm.add_message("assistant", "I couldn't auto-detect your location.")
+
+    with patch(
+        "src.jarvis.reply.engine.select_tools",
+        return_value=["webSearch"],
+    ):
+        run_reply_engine(db=db, cfg=cfg, tts=None,
+                         text="I'm in Berlin", dialogue_memory=dm)
+
+    tool_names = _tool_names_from_chat_call(mock_chat.call_args_list[-1])
+    assert "getWeather" in tool_names, (
+        "text-tool fallback failure shape must be carried over; "
+        f"saw {sorted(tool_names)}"
+    )
+
+
+@pytest.mark.unit
+@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
+@patch("src.jarvis.memory.graph_ops.build_warm_profile",
+       return_value={"user": "", "directives": ""})
+@patch("src.jarvis.memory.graph.GraphMemoryStore")
+@patch("src.jarvis.reply.engine.plan_query", return_value=[])
+@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
+@patch("src.jarvis.reply.engine.extract_text_from_response")
+@patch("src.jarvis.reply.engine.chat_with_messages")
+def test_multi_tool_call_only_failed_sibling_carries_over(
+    mock_chat, mock_extract, _mock_extract_mem, _mock_plan,
+    _mock_graph, _mock_warm, _mock_fmt,
+):
+    """When an assistant message carries multiple tool_calls but only
+    one of them failed, only the failed name must be carried over. The
+    successful sibling stays the chat model's responsibility through
+    its own routing.
+    """
+    mock_chat.side_effect = [
+        {"message": {"content": "Sure."}},
+    ]
+    mock_extract.side_effect = ["Sure."]
+
+    db = Mock()
+    cfg = _mock_cfg()
+    dm = DialogueMemory()
+    dm.add_message("user", "weather and search Pushkin")
+    dm.record_tool_turn([
+        {"role": "assistant", "content": "", "tool_calls": [
+            {"id": "c-w", "type": "function",
+             "function": {"name": "getWeather", "arguments": {}}},
+            {"id": "c-s", "type": "function",
+             "function": {"name": "webSearch", "arguments": {"query": "Pushkin"}}},
+        ]},
+        # getWeather failed (no location), webSearch succeeded.
+        {"role": "tool", "tool_call_id": "c-w",
+         "tool_name": "getWeather",
+         "content": "I couldn't auto-detect your location.",
+         "tool_failed": True},
+        {"role": "tool", "tool_call_id": "c-s",
+         "tool_name": "webSearch",
+         "content": "Pushkin was a Russian poet (1799-1837).",
+         "tool_failed": False},
+    ])
+    dm.add_message("assistant",
+                   "Pushkin was a Russian poet. I couldn't auto-detect "
+                   "your location for the weather lookup.")
+
+    with patch(
+        "src.jarvis.reply.engine.select_tools",
+        return_value=["fetchWebPage"],
+    ):
+        run_reply_engine(db=db, cfg=cfg, tts=None,
+                         text="I'm in Paris", dialogue_memory=dm)
+
+    tool_names = _tool_names_from_chat_call(mock_chat.call_args_list[-1])
+    assert "getWeather" in tool_names, (
+        "failed sibling tool_call must be carried over; "
+        f"saw {sorted(tool_names)}"
+    )
+    assert "webSearch" not in tool_names, (
+        "successful sibling tool_call must NOT be carried over; "
+        f"saw {sorted(tool_names)}"
+    )
+
+
+@pytest.mark.unit
+@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
+@patch("src.jarvis.memory.graph_ops.build_warm_profile",
+       return_value={"user": "", "directives": ""})
+@patch("src.jarvis.memory.graph.GraphMemoryStore")
+@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
+@patch("src.jarvis.reply.engine.extract_text_from_response")
+@patch("src.jarvis.reply.engine.chat_with_messages")
+@patch("src.jarvis.reply.engine.run_tool_with_retries")
+def test_planner_direct_exec_stamps_tool_failed(
+    mock_tool, mock_chat, mock_extract, _mock_extract_mem,
+    _mock_graph, _mock_warm, _mock_fmt,
+):
+    """The planner's direct-exec path (text-tool mode + concrete plan
+    step) appends tool results without going through the chat-model
+    loop. Verify that path stamps ``tool_failed`` so the next turn's
+    walker can see prior failures planted by direct-exec.
+    """
+    from src.jarvis.tools.types import ToolExecutionResult
+
+    cfg = _mock_cfg()
+    cfg.ollama_chat_model = "gemma4:e2b"  # triggers SMALL/text-tool path
+
+    # First reply: planner emits a getWeather step, direct-exec runs the
+    # tool which returns success=False (no location), then the chat
+    # model produces a final text reply.
+    mock_tool.return_value = ToolExecutionResult(
+        success=False,
+        reply_text="I couldn't auto-detect your location.",
+    )
+    mock_chat.side_effect = [
+        {"message": {"content": "Tell me which city."}},
+    ]
+    mock_extract.side_effect = ["Tell me which city."]
+
+    db = Mock()
+    dm = DialogueMemory()
+
+    # Concrete plan step the resolver fast-path can parse without an LLM.
+    with patch(
+        "src.jarvis.reply.engine.plan_query",
+        return_value=["getWeather", "Reply to the user."],
+    ), patch(
+        "src.jarvis.reply.engine.select_tools",
+        return_value=["getWeather"],
+    ):
+        run_reply_engine(db=db, cfg=cfg, tts=None,
+                         text="how's the weather",
+                         dialogue_memory=dm)
+
+    # The direct-exec path should have recorded a tool turn with the
+    # failure flag set so a follow-up turn can carry over getWeather.
+    assert dm._tool_turns, (
+        "planner direct-exec path must record a tool turn into "
+        "dialogue memory carryover"
+    )
+    stored_msgs = [m for _ts, msgs in dm._tool_turns for m in msgs]
+    failed_entries = [
+        m for m in stored_msgs
+        if m.get("tool_failed") and m.get("tool_name") == "getWeather"
+    ]
+    assert failed_entries, (
+        "direct-exec failure must stamp tool_failed=True; "
+        f"stored messages: {stored_msgs}"
+    )
+
+
+@pytest.mark.unit
+def test_walker_logs_orphan_assistant_tool_call(caplog):
+    """When an assistant tool_call has no matching role=tool result in
+    the recent window (e.g. truncation, scrub, eviction), the walker
+    should fail-open and log a diagnostic — never crash, never silently
+    widen the allow-list with the orphan name.
+    """
+    from src.jarvis.reply.engine import _previous_turn_failed_tool_names
+
+    recent = [
+        {"role": "user", "content": "weather please"},
+        {"role": "assistant", "content": "", "tool_calls": [
+            {"id": "c-orphan", "type": "function",
+             "function": {"name": "getWeather", "arguments": {}}},
+        ]},
+        # No matching role=tool result for c-orphan.
+        {"role": "assistant", "content": "I couldn't auto-detect."},
+    ]
+
+    names = _previous_turn_failed_tool_names(recent)
+    # No failed tool result was seen, so nothing carries over even
+    # though an assistant tool_call exists.
+    assert names == [], (
+        f"orphan tool_call must not be carried over; got {names}"
+    )