Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
1397 lines
55 KiB
Python
1397 lines
55 KiB
Python
"""Tests for graph_ops.py — LLM-dependent graph memory operations.
|
|
|
|
All LLM calls are mocked to test the logic independently.
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
import types
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
import pytest
|
|
|
|
# Mock 'requests' before importing graph_ops (which imports llm which needs requests)
|
|
if "requests" not in sys.modules:
|
|
sys.modules["requests"] = types.ModuleType("requests")
|
|
sys.modules["requests"].post = MagicMock()
|
|
sys.modules["requests"].exceptions = types.ModuleType("requests.exceptions")
|
|
sys.modules["requests"].exceptions.Timeout = type("Timeout", (Exception,), {})
|
|
|
|
from src.jarvis.memory.graph import GraphMemoryStore, SPLIT_THRESHOLD
|
|
from src.jarvis.memory.graph import BRANCH_USER, BRANCH_DIRECTIVES, BRANCH_WORLD
|
|
from src.jarvis.memory.graph_ops import (
|
|
extract_graph_memories,
|
|
_llm_pick_best_child,
|
|
find_best_node,
|
|
auto_split_node,
|
|
update_graph_from_dialogue,
|
|
build_warm_profile,
|
|
format_warm_profile_block,
|
|
merge_node_data,
|
|
consolidate_all_populated_nodes,
|
|
MergeResult,
|
|
)
|
|
|
|
|
|
# ── Fixtures ───────────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.fixture
|
|
def store(tmp_path):
|
|
"""Fresh GraphMemoryStore with temporary database."""
|
|
s = GraphMemoryStore(str(tmp_path / "test_ops.db"))
|
|
yield s
|
|
s.close()
|
|
|
|
|
|
@pytest.fixture
|
|
def populated_store(store):
|
|
"""Store with a few topic nodes for traversal tests."""
|
|
store.create_node(
|
|
name="Music",
|
|
description="Musical preferences and listening habits",
|
|
data="Enjoys jazz and lo-fi hip hop",
|
|
parent_id="root",
|
|
)
|
|
store.create_node(
|
|
name="Work",
|
|
description="Professional details and projects",
|
|
data="Senior engineer at Acme Corp. Uses Python daily.",
|
|
parent_id="root",
|
|
)
|
|
store.create_node(
|
|
name="Health",
|
|
description="Health, fitness, and dietary information",
|
|
data="Runs 3 times a week. Prefers dark roast coffee.",
|
|
parent_id="root",
|
|
)
|
|
return store
|
|
|
|
|
|
# ── extract_graph_memories ─────────────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestExtractGraphMemories:
|
|
"""Tests for memory extraction from conversation summaries.
|
|
|
|
The extractor now emits ``(branch_id, fact_text)`` tuples, where
|
|
branch_id is one of ``user`` / ``directives`` / ``world``. Callers
|
|
route each fact into the corresponding top-level branch of the
|
|
knowledge graph.
|
|
"""
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_extracts_facts(self, mock_llm):
|
|
mock_llm.return_value = (
|
|
'[{"branch": "USER", "fact": "Prefers dark roast coffee"},'
|
|
' {"branch": "WORLD", "fact": "Acme Corp is based in London"}]'
|
|
)
|
|
facts = extract_graph_memories("summary text", "http://localhost", "model")
|
|
assert len(facts) == 2
|
|
assert facts[0] == ("user", "Prefers dark roast coffee")
|
|
assert facts[1] == ("world", "Acme Corp is based in London")
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_classifies_directive_branch(self, mock_llm):
|
|
"""A user-issued behavioural rule must land in the DIRECTIVES
|
|
branch so it survives verbatim into the warm system-prompt
|
|
blob, rather than being summarised alongside descriptive user
|
|
facts."""
|
|
mock_llm.return_value = (
|
|
'[{"branch": "DIRECTIVES", "fact": "Always answer in British English"}]'
|
|
)
|
|
facts = extract_graph_memories("summary", "http://localhost", "model")
|
|
assert facts == [("directives", "Always answer in British English")]
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_returns_empty_when_nothing_worth_storing(self, mock_llm):
|
|
|
|
mock_llm.return_value = "[]"
|
|
facts = extract_graph_memories("just small talk", "http://localhost", "model")
|
|
assert facts == []
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_handles_llm_returning_none(self, mock_llm):
|
|
|
|
mock_llm.return_value = None
|
|
facts = extract_graph_memories("summary", "http://localhost", "model")
|
|
assert facts == []
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_handles_malformed_json(self, mock_llm):
|
|
|
|
mock_llm.return_value = "Here are some facts: not valid json"
|
|
facts = extract_graph_memories("summary", "http://localhost", "model")
|
|
assert facts == []
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_handles_json_embedded_in_text(self, mock_llm):
|
|
|
|
mock_llm.return_value = (
|
|
'Sure! Here are the facts:\n'
|
|
'[{"branch": "USER", "fact": "Likes hiking"},'
|
|
' {"branch": "USER", "fact": "Has a cat named Luna"}]\n'
|
|
'Hope that helps!'
|
|
)
|
|
facts = extract_graph_memories("summary", "http://localhost", "model")
|
|
assert len(facts) == 2
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_filters_empty_strings(self, mock_llm):
|
|
|
|
mock_llm.return_value = (
|
|
'[{"branch": "USER", "fact": "Valid fact"},'
|
|
' {"branch": "USER", "fact": ""},'
|
|
' {"branch": "USER", "fact": " "},'
|
|
' {"branch": "USER", "fact": "Another fact"}]'
|
|
)
|
|
facts = extract_graph_memories("summary", "http://localhost", "model")
|
|
assert len(facts) == 2
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_unknown_branch_defaults_to_user(self, mock_llm):
|
|
"""When the model emits a branch label we don't recognise, the
|
|
fact still gets stored — under USER — rather than silently
|
|
dropping a potentially useful piece of information. The
|
|
assistant is a personal agent; user-scoped context is the
|
|
safer default for unclassified items."""
|
|
mock_llm.return_value = (
|
|
'[{"branch": "MISC", "fact": "Some useful fact"}]'
|
|
)
|
|
facts = extract_graph_memories("summary", "http://localhost", "model")
|
|
assert facts == [("user", "Some useful fact")]
|
|
|
|
|
|
# ── _llm_pick_best_child ──────────────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestLLMPickBestChild:
|
|
"""Tests for the LLM child-picking logic."""
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_picks_numbered_child(self, mock_llm, populated_store):
|
|
|
|
children = populated_store.get_children("root")
|
|
mock_llm.return_value = "2"
|
|
|
|
result = _llm_pick_best_child("fact", children, "http://localhost", "model")
|
|
assert result == children[1].id
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_returns_none_for_NONE(self, mock_llm, populated_store):
|
|
|
|
children = populated_store.get_children("root")
|
|
mock_llm.return_value = "NONE"
|
|
|
|
result = _llm_pick_best_child("unrelated fact", children, "http://localhost", "model")
|
|
assert result is None
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_returns_none_for_empty_children(self, mock_llm):
|
|
|
|
result = _llm_pick_best_child("fact", [], "http://localhost", "model")
|
|
assert result is None
|
|
mock_llm.assert_not_called()
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_returns_none_for_llm_failure(self, mock_llm, populated_store):
|
|
|
|
children = populated_store.get_children("root")
|
|
mock_llm.return_value = None
|
|
|
|
result = _llm_pick_best_child("fact", children, "http://localhost", "model")
|
|
assert result is None
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_handles_number_in_text(self, mock_llm, populated_store):
|
|
|
|
children = populated_store.get_children("root")
|
|
mock_llm.return_value = "I think option 1 is the best fit."
|
|
|
|
result = _llm_pick_best_child("fact", children, "http://localhost", "model")
|
|
assert result == children[0].id
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_handles_out_of_range_number(self, mock_llm, populated_store):
|
|
|
|
children = populated_store.get_children("root")
|
|
mock_llm.return_value = "99"
|
|
|
|
result = _llm_pick_best_child("fact", children, "http://localhost", "model")
|
|
assert result is None
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_uses_picker_model_when_provided(self, mock_llm, populated_store):
|
|
# Behaviour: picker_model overrides the chat model for this classification-
|
|
# shaped call, so placement runs on the small model without paging in the
|
|
# big chat model. When absent, the chat model is used (backwards-compatible).
|
|
children = populated_store.get_children("root")
|
|
mock_llm.return_value = "1"
|
|
|
|
_llm_pick_best_child(
|
|
"fact", children, "http://localhost", "big-chat", picker_model="small-judge"
|
|
)
|
|
assert mock_llm.call_args.kwargs["chat_model"] == "small-judge"
|
|
|
|
_llm_pick_best_child("fact", children, "http://localhost", "big-chat")
|
|
assert mock_llm.call_args.kwargs["chat_model"] == "big-chat"
|
|
|
|
|
|
# ── find_best_node ─────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestFindBestNode:
|
|
"""Tests for the three-entry-point traversal."""
|
|
|
|
@patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
|
|
def test_matches_recent_node_first(self, mock_pick, populated_store):
|
|
|
|
children = populated_store.get_children("root")
|
|
music_node = [c for c in children if c.name == "Music"][0]
|
|
# Touch Music so it appears in recent nodes
|
|
populated_store.touch_node(music_node.id)
|
|
|
|
# First call (recent nodes): return the music node
|
|
mock_pick.return_value = music_node.id
|
|
|
|
result = find_best_node(populated_store, "Likes jazz", "http://localhost", "model")
|
|
assert result == music_node.id
|
|
# Should only call once (matched on recent nodes)
|
|
assert mock_pick.call_count == 1
|
|
|
|
@patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
|
|
def test_falls_through_to_top_nodes(self, mock_pick, populated_store):
|
|
|
|
children = populated_store.get_children("root")
|
|
work_node = [c for c in children if c.name == "Work"][0]
|
|
# Touch Work many times so it appears in top nodes
|
|
for _ in range(5):
|
|
populated_store.touch_node(work_node.id)
|
|
|
|
# First call (recent): None. Second call (top): match work.
|
|
mock_pick.side_effect = [None, work_node.id]
|
|
|
|
result = find_best_node(populated_store, "Uses TypeScript", "http://localhost", "model")
|
|
assert result == work_node.id
|
|
|
|
@patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
|
|
def test_falls_through_to_root_traversal(self, mock_pick, populated_store):
|
|
|
|
children = populated_store.get_children("root")
|
|
health_node = [c for c in children if c.name == "Health"][0]
|
|
|
|
# Recent: None, Top: skipped (all recent_ids overlap), Root children: pick Health
|
|
mock_pick.side_effect = [None, health_node.id]
|
|
|
|
result = find_best_node(populated_store, "Allergic to peanuts", "http://localhost", "model")
|
|
assert result == health_node.id
|
|
|
|
@patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
|
|
def test_writes_to_root_when_nothing_matches(self, mock_pick, populated_store):
|
|
|
|
# Everything returns None — no match anywhere
|
|
mock_pick.return_value = None
|
|
|
|
result = find_best_node(populated_store, "Completely unrelated fact", "http://localhost", "model")
|
|
assert result == "root"
|
|
|
|
@patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
|
|
def test_empty_graph_writes_to_root(self, mock_pick, store):
|
|
"""With seeded branches under root but nothing else, an
|
|
unclassified fact with no branch pin will try to pick among
|
|
the seeded branches. If the picker declines all of them
|
|
(returns None), traversal halts at root."""
|
|
# Picker declines at every level so traversal breaks at root.
|
|
mock_pick.return_value = None
|
|
result = find_best_node(store, "First ever fact", "http://localhost", "model")
|
|
assert result == "root"
|
|
|
|
@patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
|
|
def test_branch_pin_skips_shortcut_entry_points(self, mock_pick, store):
|
|
"""When a branch is pinned, the recent / top shortcut entry
|
|
points are skipped entirely — the fact descends only through
|
|
the pinned branch's subtree. With an empty branch, that means
|
|
the branch root itself is the write target, and the picker is
|
|
never consulted."""
|
|
mock_pick.return_value = None
|
|
result = find_best_node(
|
|
store, "Likes jazz music", "http://localhost", "model",
|
|
branch_root_id="user",
|
|
)
|
|
assert result == "user"
|
|
# The picker was never called because the User branch has no
|
|
# children yet; descent terminated immediately at the branch root.
|
|
mock_pick.assert_not_called()
|
|
|
|
|
|
# ── auto_split_node ────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestAutoSplitNode:
|
|
"""Tests for the auto-split logic."""
|
|
|
|
def _make_large_node(self, store, token_count=2000):
|
|
"""Create a node with data exceeding the split threshold."""
|
|
# ~4 chars per token, so token_count * 4 chars
|
|
data = "\n".join([f"Fact number {i}: some information here for padding" for i in range(token_count // 10)])
|
|
node = store.create_node(
|
|
name="Large Topic",
|
|
description="A topic with lots of data",
|
|
data=data,
|
|
parent_id="root",
|
|
)
|
|
return node
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_successful_split(self, mock_llm, store):
|
|
|
|
node = self._make_large_node(store)
|
|
assert node.data_token_count > SPLIT_THRESHOLD
|
|
|
|
mock_llm.return_value = json.dumps({
|
|
"categories": [
|
|
{"name": "Category A", "description": "First category", "facts": ["Fact 1", "Fact 2"]},
|
|
{"name": "Category B", "description": "Second category", "facts": ["Fact 3", "Fact 4"]},
|
|
],
|
|
"summary": "A topic covering categories A and B"
|
|
})
|
|
|
|
result = auto_split_node(store, node.id, "http://localhost", "model")
|
|
assert result is True
|
|
|
|
# Verify children were created
|
|
children = store.get_children(node.id)
|
|
assert len(children) == 2
|
|
names = {c.name for c in children}
|
|
assert "Category A" in names
|
|
assert "Category B" in names
|
|
|
|
# Verify parent data was cleared and description updated
|
|
updated_parent = store.get_node(node.id)
|
|
assert updated_parent.data == ""
|
|
assert "categories A and B" in updated_parent.description
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_split_aborts_with_fewer_than_2_categories(self, mock_llm, store):
|
|
|
|
node = self._make_large_node(store)
|
|
|
|
mock_llm.return_value = json.dumps({
|
|
"categories": [
|
|
{"name": "Only One", "description": "Just one", "facts": ["All the facts"]},
|
|
],
|
|
"summary": "Everything"
|
|
})
|
|
|
|
result = auto_split_node(store, node.id, "http://localhost", "model")
|
|
assert result is False
|
|
|
|
# Data should still be on the parent
|
|
parent = store.get_node(node.id)
|
|
assert parent.data != ""
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_split_aborts_on_llm_failure(self, mock_llm, store):
|
|
|
|
node = self._make_large_node(store)
|
|
mock_llm.return_value = None
|
|
|
|
result = auto_split_node(store, node.id, "http://localhost", "model")
|
|
assert result is False
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_split_aborts_on_malformed_json(self, mock_llm, store):
|
|
|
|
node = self._make_large_node(store)
|
|
mock_llm.return_value = "This is not JSON at all"
|
|
|
|
result = auto_split_node(store, node.id, "http://localhost", "model")
|
|
assert result is False
|
|
|
|
def test_split_skips_below_threshold(self, store):
|
|
|
|
node = store.create_node(name="Small", description="Tiny", data="Short data", parent_id="root")
|
|
result = auto_split_node(store, node.id, "http://localhost", "model")
|
|
assert result is False
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_split_aborts_on_category_missing_facts(self, mock_llm, store):
|
|
|
|
node = self._make_large_node(store)
|
|
mock_llm.return_value = json.dumps({
|
|
"categories": [
|
|
{"name": "Cat A", "description": "First", "facts": ["Fact 1"]},
|
|
{"name": "Cat B", "description": "Second", "facts": []},
|
|
],
|
|
"summary": "Summary"
|
|
})
|
|
|
|
result = auto_split_node(store, node.id, "http://localhost", "model")
|
|
assert result is False
|
|
|
|
|
|
# ── append_to_node ─────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestAppendToNode:
|
|
"""Tests for the append_to_node method on GraphMemoryStore."""
|
|
|
|
def test_append_to_empty_node(self, store):
|
|
node = store.create_node(name="Test", description="Test", data="", parent_id="root")
|
|
exceeded = store.append_to_node(node.id, "First fact")
|
|
updated = store.get_node(node.id)
|
|
assert updated.data == "First fact"
|
|
assert exceeded is False
|
|
|
|
def test_append_to_existing_data(self, store):
|
|
node = store.create_node(name="Test", description="Test", data="Existing", parent_id="root")
|
|
store.append_to_node(node.id, "New fact")
|
|
updated = store.get_node(node.id)
|
|
assert "Existing" in updated.data
|
|
assert "New fact" in updated.data
|
|
assert "\n" in updated.data # Separated by newline
|
|
|
|
def test_returns_true_when_threshold_exceeded(self, store):
|
|
# Create node with data just below threshold
|
|
big_data = "x" * (SPLIT_THRESHOLD * 4 - 10) # ~SPLIT_THRESHOLD tokens
|
|
node = store.create_node(name="Big", description="Big", data=big_data, parent_id="root")
|
|
exceeded = store.append_to_node(node.id, "More data that pushes it over")
|
|
assert exceeded is True
|
|
|
|
def test_returns_false_for_nonexistent_node(self, store):
|
|
exceeded = store.append_to_node("nonexistent", "data")
|
|
assert exceeded is False
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestNodeContainsFact:
|
|
"""Tests for GraphMemoryStore.node_contains_fact (dedupe primitive)."""
|
|
|
|
def test_returns_false_for_empty_node(self, store):
|
|
node = store.create_node(name="T", description="T", data="", parent_id="root")
|
|
assert store.node_contains_fact(node.id, "anything") is False
|
|
|
|
def test_returns_false_for_nonexistent_node(self, store):
|
|
assert store.node_contains_fact("nope", "anything") is False
|
|
|
|
def test_returns_false_for_empty_fact(self, store):
|
|
node = store.create_node(name="T", description="T", data="hello", parent_id="root")
|
|
assert store.node_contains_fact(node.id, " ") is False
|
|
|
|
def test_exact_line_match(self, store):
|
|
node = store.create_node(
|
|
name="T", description="T", data="Line A\nLine B", parent_id="root"
|
|
)
|
|
assert store.node_contains_fact(node.id, "Line A") is True
|
|
assert store.node_contains_fact(node.id, "Line B") is True
|
|
assert store.node_contains_fact(node.id, "Line C") is False
|
|
|
|
def test_case_and_whitespace_insensitive(self, store):
|
|
node = store.create_node(
|
|
name="T", description="T", data="Justin Bieber is Canadian.", parent_id="root"
|
|
)
|
|
assert store.node_contains_fact(node.id, "justin bieber is canadian.") is True
|
|
assert store.node_contains_fact(node.id, " Justin Bieber is Canadian. ") is True
|
|
|
|
def test_turkish_dotted_i_folds(self, store):
|
|
"""Locale-naive .lower() returns the wrong key for Turkish İ; the
|
|
store must use casefold + NFKC so İstanbul / i̇stanbul collide."""
|
|
node = store.create_node(
|
|
name="T", description="T", data="İstanbul is large.", parent_id="root"
|
|
)
|
|
assert store.node_contains_fact(node.id, "i̇stanbul is large.") is True
|
|
|
|
def test_german_sharp_s_folds_to_ss(self, store):
|
|
node = store.create_node(
|
|
name="T", description="T", data="Straße", parent_id="root"
|
|
)
|
|
assert store.node_contains_fact(node.id, "strasse") is True
|
|
|
|
def test_substring_is_not_a_match(self, store):
|
|
"""Dedupe is line-equality, not substring — avoid false positives."""
|
|
node = store.create_node(
|
|
name="T", description="T", data="Justin Bieber is Canadian.", parent_id="root"
|
|
)
|
|
assert store.node_contains_fact(node.id, "Justin Bieber") is False
|
|
|
|
|
|
# ── update_graph_from_dialogue (end-to-end) ────────────────────────────
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestUpdateGraphFromDialogue:
|
|
"""End-to-end tests for the orchestrator function."""
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_full_flow_extracts_and_stores(self, mock_llm, store):
|
|
"""End-to-end: extraction emits branch-tagged facts, the
|
|
orchestrator pins traversal to each fact's branch, and the
|
|
fact lands inside that branch's subtree. Because the fixed
|
|
branches are seeded at store creation and the branch subtree
|
|
is empty on a fresh store, each fact writes to the branch
|
|
root node directly."""
|
|
# First call: extraction. With empty branches, no LLM calls are
|
|
# needed for traversal — find_best_node goes straight to the
|
|
# branch root because it has no children.
|
|
mock_llm.return_value = (
|
|
'[{"branch": "USER", "fact": "Likes jazz music"},'
|
|
' {"branch": "WORLD", "fact": "Acme Corp is based in London"}]'
|
|
)
|
|
|
|
result = update_graph_from_dialogue(
|
|
store=store,
|
|
summary="User likes jazz; Acme Corp is in London",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert len(result.stored) == 2
|
|
assert result.skipped == 0
|
|
for fact, node_name in result.stored:
|
|
assert isinstance(fact, str) and fact
|
|
assert isinstance(node_name, str) and node_name
|
|
|
|
user_node = store.get_node("user")
|
|
world_node = store.get_node("world")
|
|
assert user_node is not None and "jazz" in user_node.data
|
|
assert world_node is not None and "Acme" in world_node.data
|
|
# The un-classified facts should NOT have landed on the root
|
|
# itself — the branch pinning keeps them inside their subtree.
|
|
root = store.get_node("root")
|
|
assert "jazz" not in root.data
|
|
assert "Acme" not in root.data
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_no_facts_extracted(self, mock_llm, store):
|
|
|
|
mock_llm.return_value = "[]"
|
|
|
|
result = update_graph_from_dialogue(
|
|
store=store,
|
|
summary="User said hello and asked about the weather",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.stored == []
|
|
assert result.skipped == 0
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_extraction_failure_returns_zero(self, mock_llm, store):
|
|
|
|
mock_llm.return_value = None
|
|
|
|
result = update_graph_from_dialogue(
|
|
store=store,
|
|
summary="summary",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.stored == []
|
|
assert result.skipped == 0
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_skips_duplicate_facts_on_second_flush(self, mock_llm, store):
|
|
"""Re-extracting the same fact from a growing daily summary must
|
|
not duplicate it in the graph.
|
|
|
|
Mirrors production: two diary flushes in quick succession both
|
|
extract the same fact from the cumulative summary. The second
|
|
flush should be a no-op for the graph, not a duplicate append.
|
|
"""
|
|
# First flush: branch root has no children, so extraction is the
|
|
# only LLM call needed.
|
|
mock_llm.return_value = (
|
|
'[{"branch": "WORLD", "fact": "Justin Bieber is a Canadian singer."}]'
|
|
)
|
|
result1 = update_graph_from_dialogue(
|
|
store=store,
|
|
summary="User asked about Justin Bieber.",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
assert len(result1.stored) == 1
|
|
assert result1.skipped == 0
|
|
|
|
# Second flush: same fact re-extracted, should be deduped.
|
|
mock_llm.return_value = (
|
|
'[{"branch": "WORLD", "fact": "Justin Bieber is a Canadian singer."}]'
|
|
)
|
|
result2 = update_graph_from_dialogue(
|
|
store=store,
|
|
summary="User asked about Justin Bieber.",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
assert result2.stored == [], "duplicate fact should not be reported as learned"
|
|
assert result2.skipped == 1, "duplicate must be counted so the CLI can still log it"
|
|
|
|
world = store.get_node("world")
|
|
assert world.data.count("Justin Bieber") == 1
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_dedupe_handles_non_latin_case_folding(self, mock_llm, store):
|
|
"""Locale-safe folding: Turkish İ/i̇ and German ß/ss collapse to the
|
|
same dedupe key. Python's ``str.lower`` would miss these cases —
|
|
the store uses ``casefold`` + NFKC instead."""
|
|
mock_llm.return_value = (
|
|
'[{"branch": "WORLD", "fact": "İstanbul is the largest city in Turkey."}]'
|
|
)
|
|
update_graph_from_dialogue(
|
|
store=store,
|
|
summary="s",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
mock_llm.return_value = (
|
|
'[{"branch": "WORLD", "fact": "i̇stanbul is the largest city in turkey."}]'
|
|
)
|
|
result = update_graph_from_dialogue(
|
|
store=store,
|
|
summary="s",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
assert result.stored == [], "Turkish İ/i̇ variants should dedupe"
|
|
assert result.skipped == 1
|
|
|
|
mock_llm.return_value = (
|
|
'[{"branch": "WORLD", "fact": "Straße names are ordered alphabetically."}]'
|
|
)
|
|
update_graph_from_dialogue(
|
|
store=store,
|
|
summary="s",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
mock_llm.return_value = (
|
|
'[{"branch": "WORLD", "fact": "strasse names are ordered alphabetically."}]'
|
|
)
|
|
result = update_graph_from_dialogue(
|
|
store=store,
|
|
summary="s",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
assert result.stored == [], "German ß should casefold to ss for dedupe"
|
|
assert result.skipped == 1
|
|
|
|
@patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_dedupe_on_child_after_split(self, mock_llm, mock_pick, store):
|
|
"""Dedupe must trigger on whichever node traversal lands on, not
|
|
only on the branch root. Pre-populate a child of ``world`` with a
|
|
fact, force the picker to descend into it, then re-extract the
|
|
same fact and assert no duplicate append."""
|
|
child = store.create_node(
|
|
name="Music",
|
|
description="Musicians, bands, songs.",
|
|
data="Justin Bieber is a Canadian singer.",
|
|
parent_id="world",
|
|
)
|
|
|
|
# Force the picker to descend into the Music child on every call.
|
|
mock_pick.return_value = child.id
|
|
|
|
mock_llm.return_value = (
|
|
'[{"branch": "WORLD", "fact": "Justin Bieber is a Canadian singer."}]'
|
|
)
|
|
result = update_graph_from_dialogue(
|
|
store=store,
|
|
summary="User asked about Justin Bieber.",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.stored == [], "duplicate on a child node should still dedupe"
|
|
assert result.skipped == 1
|
|
refreshed = store.get_node(child.id)
|
|
assert refreshed.data.count("Justin Bieber is a Canadian singer.") == 1
|
|
|
|
|
|
# ── Merge (rewrite-on-write consolidation) ────────────────────────────
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestMergeNodeData:
|
|
"""merge_node_data rewrites a node's data via an LLM consolidation pass."""
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_rewrites_node_with_consolidated_facts(self, mock_llm, store):
|
|
node = store.create_node(
|
|
name="Test",
|
|
description="d",
|
|
data="User likes coffee.\nUser is from Hackney.\nUser drives a Tesla.",
|
|
parent_id="user",
|
|
)
|
|
new_fact = "User dislikes coffee and prefers cycling over driving."
|
|
mock_llm.return_value = (
|
|
'{"facts": ["' + new_fact + '", "User is from Hackney."]}'
|
|
)
|
|
|
|
result = merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=[new_fact],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.success is True
|
|
assert result.incorporated_indices == [0]
|
|
refreshed = store.get_node(node.id)
|
|
assert "User dislikes coffee" in refreshed.data
|
|
assert "User likes coffee." not in refreshed.data
|
|
assert "User is from Hackney." in refreshed.data
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_empty_node_skips_llm(self, mock_llm, store):
|
|
node = store.create_node(name="T", description="d", data="", parent_id="user")
|
|
|
|
result = merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=["any"],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.success is False
|
|
mock_llm.assert_not_called()
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_llm_failure_leaves_node_untouched(self, mock_llm, store):
|
|
node = store.create_node(
|
|
name="T", description="d", data="Existing fact.", parent_id="user",
|
|
)
|
|
mock_llm.return_value = None
|
|
|
|
result = merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=["any"],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.success is False
|
|
assert store.get_node(node.id).data == "Existing fact."
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_unparseable_response_leaves_node_untouched(self, mock_llm, store):
|
|
node = store.create_node(
|
|
name="T", description="d", data="Existing fact.", parent_id="user",
|
|
)
|
|
mock_llm.return_value = "no json here"
|
|
|
|
result = merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=["any"],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.success is False
|
|
assert store.get_node(node.id).data == "Existing fact."
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_empty_rewrite_treated_as_failure(self, mock_llm, store):
|
|
"""A non-empty existing payload should never collapse to nothing.
|
|
Treat empty-list rewrites as suspect and refuse to wipe the node."""
|
|
node = store.create_node(
|
|
name="T", description="d", data="A.\nB.", parent_id="user",
|
|
)
|
|
mock_llm.return_value = '{"facts": []}'
|
|
|
|
result = merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=["C"],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.success is False
|
|
assert store.get_node(node.id).data == "A.\nB."
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_non_string_facts_filtered(self, mock_llm, store):
|
|
node = store.create_node(
|
|
name="T", description="d", data="A.", parent_id="user",
|
|
)
|
|
mock_llm.return_value = (
|
|
'{"facts": ["Kept fact.", 42, null, " ", "Another kept."]}'
|
|
)
|
|
|
|
result = merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=["x"],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.success is True
|
|
assert store.get_node(node.id).data == "Kept fact.\nAnother kept."
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_hallucination_guard_rejects_oversized_rewrite(self, mock_llm, store):
|
|
"""Consolidation rules can shrink or hold but should never grow
|
|
the node beyond `existing + new + small slack`. Reject rewrites
|
|
that explode in size — they mean the model invented content."""
|
|
node = store.create_node(
|
|
name="T", description="d", data="One existing fact.", parent_id="user",
|
|
)
|
|
# 1 existing + 1 new + slack(2) = cap of 4. Return 8 facts.
|
|
bogus = '{"facts": [' + ", ".join(f'"Invented {i}."' for i in range(8)) + "]}"
|
|
mock_llm.return_value = bogus
|
|
|
|
result = merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=["A new fact."],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.success is False
|
|
assert store.get_node(node.id).data == "One existing fact."
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_incorporated_indices_track_each_new_fact(self, mock_llm, store):
|
|
"""When a batch contains multiple new facts and the rewrite
|
|
consolidates one of them out, the result should list only the
|
|
indices that survived. Caller uses this to avoid reporting
|
|
merged-out facts as 'newly stored'."""
|
|
node = store.create_node(
|
|
name="T", description="d", data="Old A.", parent_id="user",
|
|
)
|
|
# New facts at indices 0 and 1. Rewrite keeps only the first.
|
|
mock_llm.return_value = '{"facts": ["Fresh One.", "Old A."]}'
|
|
|
|
result = merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=["Fresh One.", "Fresh Two."],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.success is True
|
|
assert result.incorporated_indices == [0]
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_empty_new_facts_runs_self_consolidation(self, mock_llm, store):
|
|
"""Calling with new_facts=[] should still hit the LLM and run a
|
|
consolidation pass over the existing data alone — the migration
|
|
path for nodes that accumulated contradictions before merge-on-
|
|
write landed."""
|
|
node = store.create_node(
|
|
name="T",
|
|
description="d",
|
|
data="User has a need for X.\nUser does not have a need for X.",
|
|
parent_id="user",
|
|
)
|
|
mock_llm.return_value = '{"facts": ["User does not have a need for X."]}'
|
|
|
|
result = merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=[],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.success is True
|
|
assert result.incorporated_indices == []
|
|
assert store.get_node(node.id).data == "User does not have a need for X."
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_extracts_facts_object_from_markdown_fenced_response(self, mock_llm, store):
|
|
"""Tighter regex must still pull the object out when the model
|
|
wraps it in a markdown code fence."""
|
|
node = store.create_node(
|
|
name="T", description="d", data="Old.", parent_id="user",
|
|
)
|
|
mock_llm.return_value = (
|
|
'```json\n{"facts": ["New."]}\n```'
|
|
)
|
|
|
|
result = merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=["New."],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.success is True
|
|
assert "New." in store.get_node(node.id).data
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_hallucination_guard_boundary_pins_to_slack_constant(self, mock_llm, store):
|
|
"""The guard's cap is `existing + new + _MERGE_GROWTH_SLACK`.
|
|
Pin both sides of the boundary against the named constant so a
|
|
future tweak to the slack can't silently drift the guard."""
|
|
from src.jarvis.memory.graph_ops import (
|
|
_MERGE_GROWTH_SLACK,
|
|
_split_data_lines,
|
|
)
|
|
|
|
existing_data = "E1.\nE2."
|
|
node = store.create_node(
|
|
name="T", description="d", data=existing_data, parent_id="user",
|
|
)
|
|
# Derive `existing_count` via the same helper production uses
|
|
# so the boundary math can't drift if the parsing rule changes.
|
|
existing_count = len(_split_data_lines(existing_data))
|
|
new_facts = ["N1."]
|
|
cap = existing_count + len(new_facts) + _MERGE_GROWTH_SLACK
|
|
|
|
# At the cap → accepted.
|
|
at_cap = '{"facts": [' + ", ".join(f'"L{i}."' for i in range(cap)) + "]}"
|
|
mock_llm.return_value = at_cap
|
|
result = merge_node_data(
|
|
store=store, node_id=node.id, new_facts=new_facts,
|
|
ollama_base_url="http://localhost", ollama_chat_model="model",
|
|
)
|
|
assert result.success is True
|
|
|
|
# One over the cap → rejected.
|
|
node2 = store.create_node(
|
|
name="T2", description="d", data="E1.\nE2.", parent_id="user",
|
|
)
|
|
over_cap = '{"facts": [' + ", ".join(f'"L{i}."' for i in range(cap + 1)) + "]}"
|
|
mock_llm.return_value = over_cap
|
|
result = merge_node_data(
|
|
store=store, node_id=node2.id, new_facts=new_facts,
|
|
ollama_base_url="http://localhost", ollama_chat_model="model",
|
|
)
|
|
assert result.success is False
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_incorporated_indices_tolerant_to_trailing_punctuation(self, mock_llm, store):
|
|
"""Picker models routinely drop the trailing full stop when
|
|
rewriting facts ("X." → "X"). A strict normalise_fact match
|
|
would then return `incorporated_indices=[]` even when the
|
|
fact clearly landed, and the orchestrator would silently
|
|
under-report every batched flush as '0 stored'. Pin the
|
|
tolerant match against this exact rephrasing."""
|
|
node = store.create_node(
|
|
name="T", description="d", data="Old.", parent_id="user",
|
|
)
|
|
# Picker drops the trailing period from the new fact.
|
|
mock_llm.return_value = '{"facts": ["The user has a dog"]}'
|
|
|
|
result = merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=["The user has a dog."],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.success is True
|
|
assert result.incorporated_indices == [0], (
|
|
"Trailing-period rephrasing must still count as incorporation."
|
|
)
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_prompt_body_matches_parsed_line_count(self, mock_llm, store):
|
|
"""The CURRENT facts block sent to the picker must contain
|
|
exactly the lines `_split_data_lines` produced — blank lines
|
|
and whitespace-only lines stripped from both signals
|
|
consistently. Locks the round-6 consolidation that made the
|
|
helper the sole parser."""
|
|
node = store.create_node(
|
|
name="T",
|
|
description="d",
|
|
# Mid-blob blank line + a whitespace-only line. The old
|
|
# `node.data.strip()` path would have left these in the
|
|
# prompt body while the parsed list dropped them.
|
|
data="A.\n\n \nB.",
|
|
parent_id="user",
|
|
)
|
|
mock_llm.return_value = '{"facts": ["A.", "B."]}'
|
|
|
|
merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=[],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
sent_user_content = mock_llm.call_args.kwargs["user_content"]
|
|
assert "CURRENT facts on the node" in sent_user_content
|
|
assert "A.\nB." in sent_user_content
|
|
# The dropped blank/whitespace lines must not survive into the prompt.
|
|
assert "A.\n\n" not in sent_user_content
|
|
assert " \n" not in sent_user_content
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_extracts_object_with_braces_inside_fact_strings(self, mock_llm, store):
|
|
"""A fact whose text contains literal `{` or `}` must still
|
|
parse — `raw_decode` handles balanced nesting that a
|
|
`[^{}]`-scoped regex would have refused to match."""
|
|
node = store.create_node(
|
|
name="T", description="d", data="Old.", parent_id="user",
|
|
)
|
|
mock_llm.return_value = (
|
|
'preamble {"facts": ["User uses {placeholder} syntax in templates."]} trailing'
|
|
)
|
|
|
|
result = merge_node_data(
|
|
store=store,
|
|
node_id=node.id,
|
|
new_facts=["User uses {placeholder} syntax in templates."],
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
assert result.success is True
|
|
assert "{placeholder}" in store.get_node(node.id).data
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestMergeSystemPromptInvariants:
|
|
"""Pin the rule set the merge prompt must teach. Behaviour against a
|
|
real picker model is covered by the merge_consolidation evals; this
|
|
catches a future edit that silently drops a rule from the system
|
|
prompt's text. Each rule is referenced at least once below."""
|
|
|
|
def test_prompt_lists_supersession_rule(self):
|
|
from src.jarvis.memory.graph_ops import _MERGE_SYSTEM_PROMPT
|
|
assert "CONTRADICTION" in _MERGE_SYSTEM_PROMPT
|
|
|
|
def test_prompt_lists_dedupe_rule(self):
|
|
from src.jarvis.memory.graph_ops import _MERGE_SYSTEM_PROMPT
|
|
assert "DUPLICATION" in _MERGE_SYSTEM_PROMPT
|
|
|
|
def test_prompt_lists_consolidation_rule(self):
|
|
from src.jarvis.memory.graph_ops import _MERGE_SYSTEM_PROMPT
|
|
assert "CONSOLIDATION" in _MERGE_SYSTEM_PROMPT
|
|
|
|
def test_prompt_lists_independence_rule(self):
|
|
from src.jarvis.memory.graph_ops import _MERGE_SYSTEM_PROMPT
|
|
assert "INDEPENDENCE" in _MERGE_SYSTEM_PROMPT
|
|
|
|
def test_prompt_lists_pruning_rule(self):
|
|
from src.jarvis.memory.graph_ops import _MERGE_SYSTEM_PROMPT
|
|
assert "PRUNING" in _MERGE_SYSTEM_PROMPT
|
|
|
|
def test_prompt_lists_meta_narrative_rule_with_assistant_examples(self):
|
|
"""The META-NARRATIVE rule must be present and must give the
|
|
picker model concrete examples of the verb forms to drop. The
|
|
bug it exists to fix was a 'The assistant is unable to ...'
|
|
line surviving consolidate-all sweeps because no rule covered
|
|
capability denials. If the rule label or its trigger phrasings
|
|
get edited away, this test fails. Scoped to the rule's own
|
|
section (META-NARRATIVE up to the next numbered rule) so the
|
|
assertions can't be satisfied by unrelated text elsewhere in
|
|
the prompt."""
|
|
from src.jarvis.memory.graph_ops import _MERGE_SYSTEM_PROMPT
|
|
assert "META-NARRATIVE" in _MERGE_SYSTEM_PROMPT
|
|
rule_start = _MERGE_SYSTEM_PROMPT.index("META-NARRATIVE")
|
|
# Bound the section by the next numbered rule (e.g. '\n7. ')
|
|
# OR the response-format trailer ('\nRespond with ...') that
|
|
# follows the rule list. The trailer fallback matters when
|
|
# META-NARRATIVE is the LAST numbered rule — without it the
|
|
# section would balloon to include the JSON schema text and
|
|
# the in-section keyword checks could pass on a future prompt
|
|
# that no longer mentions those keywords inside the rule
|
|
# itself.
|
|
end_pattern = re.search(
|
|
r"\n\d+\. |\nRespond with\b",
|
|
_MERGE_SYSTEM_PROMPT[rule_start:],
|
|
)
|
|
rule_end = rule_start + (
|
|
end_pattern.start() if end_pattern else len(_MERGE_SYSTEM_PROMPT) - rule_start
|
|
)
|
|
section = _MERGE_SYSTEM_PROMPT[rule_start:rule_end]
|
|
# The two shapes the bug report surfaced explicitly must be
|
|
# named in this rule's section, not just somewhere else.
|
|
assert "The assistant" in section
|
|
assert "unable to" in section
|
|
# Counter-protection: the rule must not over-prune real
|
|
# directives, so an exception clause is required in-section.
|
|
assert "directive" in section.lower()
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestConsolidateAllPopulatedNodes:
|
|
"""consolidate_all_populated_nodes runs a self-merge pass on every
|
|
populated node. Migration path for the contradiction backlog."""
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_walks_only_populated_nodes(self, mock_llm, store):
|
|
# Two populated nodes + one empty node + the seeded branch roots.
|
|
store.create_node(
|
|
name="A", description="d",
|
|
data="Line 1.\nContradicts line 1.", parent_id="user",
|
|
)
|
|
store.create_node(
|
|
name="B", description="d",
|
|
data="Line X.\nDuplicate of line X.", parent_id="world",
|
|
)
|
|
store.create_node(name="Empty", description="d", data="", parent_id="user")
|
|
|
|
# Two LLM calls expected (one per populated node).
|
|
mock_llm.side_effect = [
|
|
'{"facts": ["Line 1."]}',
|
|
'{"facts": ["Line X."]}',
|
|
]
|
|
|
|
results = list(consolidate_all_populated_nodes(
|
|
store=store,
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
))
|
|
|
|
names = {n for n, _, _ in results}
|
|
assert "A" in names and "B" in names
|
|
assert "Empty" not in names
|
|
assert mock_llm.call_count == 2
|
|
# Each consolidated node shrank from 2 lines to 1.
|
|
for _, before, after in results:
|
|
assert before == 2
|
|
assert after == 1
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_failure_per_node_does_not_abort_the_rest(self, mock_llm, store):
|
|
store.create_node(name="A", description="d", data="X.", parent_id="user")
|
|
store.create_node(name="B", description="d", data="Y.", parent_id="world")
|
|
|
|
# First node's LLM returns junk → fail-open. Second succeeds.
|
|
mock_llm.side_effect = ["garbage", '{"facts": ["Y."]}']
|
|
|
|
results = list(consolidate_all_populated_nodes(
|
|
store=store,
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
))
|
|
|
|
assert len(results) == 2
|
|
# Both nodes still have their data — fail-open leaves untouched.
|
|
names = {n for n, _, _ in results}
|
|
assert names == {"A", "B"}
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_yields_per_node_for_streaming(self, mock_llm, store):
|
|
"""The op must be a generator that yields each result as the
|
|
walk progresses — buffering the whole sweep before yielding
|
|
defeats the streaming NDJSON endpoint that wraps it."""
|
|
store.create_node(name="A", description="d", data="A.", parent_id="user")
|
|
store.create_node(name="B", description="d", data="B.", parent_id="world")
|
|
mock_llm.side_effect = ['{"facts": ["A."]}', '{"facts": ["B."]}']
|
|
|
|
gen = consolidate_all_populated_nodes(
|
|
store=store,
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
# First call only triggers one LLM hit (the first node), which
|
|
# proves the second node hasn't been processed yet.
|
|
first = next(gen)
|
|
assert mock_llm.call_count == 1
|
|
assert first[0] in {"A", "B"}
|
|
|
|
# Draining the generator runs the rest.
|
|
rest = list(gen)
|
|
assert len(rest) == 1
|
|
assert mock_llm.call_count == 2
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestUpdateGraphMerge:
|
|
"""update_graph_from_dialogue runs the merge pass on populated nodes."""
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_contradiction_replaces_old_fact_via_merge(self, mock_llm, store):
|
|
"""Regression: 'user does not need a daily check-in' should
|
|
replace the prior 'user has a need for a daily check-in' line
|
|
on the User branch root via the merge rewrite, not coexist."""
|
|
store.update_node(
|
|
"user",
|
|
data="The user has a need for a simple daily check-in system.",
|
|
)
|
|
|
|
# Two LLM calls: extraction then merge.
|
|
mock_llm.side_effect = [
|
|
'[{"branch": "USER", "fact": "The user does not need a daily check-in system."}]',
|
|
'{"facts": ["The user does not need a daily check-in system."]}',
|
|
]
|
|
|
|
result = update_graph_from_dialogue(
|
|
store=store,
|
|
summary="User clarified they do not need a check-in.",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
stored = result.stored
|
|
assert len(stored) == 1
|
|
user_data = store.get_node("user").data
|
|
assert "does not need" in user_data
|
|
assert "has a need for" not in user_data
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_merge_failure_falls_back_to_append(self, mock_llm, store):
|
|
"""A flaky merge LLM must not block the write — the fact still
|
|
lands via plain append so we never lose data on transient
|
|
failures."""
|
|
store.update_node("user", data="Existing line.")
|
|
|
|
mock_llm.side_effect = [
|
|
'[{"branch": "USER", "fact": "Brand new fact."}]',
|
|
"garbage with no json",
|
|
]
|
|
|
|
result = update_graph_from_dialogue(
|
|
store=store,
|
|
summary="s",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
stored = result.stored
|
|
assert len(stored) == 1
|
|
data = store.get_node("user").data
|
|
assert "Existing line." in data
|
|
assert "Brand new fact." in data
|
|
|
|
@patch("src.jarvis.memory.graph_ops.call_llm_direct")
|
|
def test_cold_start_skips_merge_llm_call(self, mock_llm, store):
|
|
"""When the chosen node has no data, the merge pass should
|
|
short-circuit (no LLM call) and the fact lands via plain
|
|
append — keeps cold-start writes cheap."""
|
|
# Only the extraction call should hit the LLM.
|
|
mock_llm.return_value = (
|
|
'[{"branch": "WORLD", "fact": "Acme Corp is based in London."}]'
|
|
)
|
|
|
|
result = update_graph_from_dialogue(
|
|
store=store,
|
|
summary="s",
|
|
ollama_base_url="http://localhost",
|
|
ollama_chat_model="model",
|
|
)
|
|
|
|
stored = result.stored
|
|
assert len(stored) == 1
|
|
assert "Acme Corp" in store.get_node("world").data
|
|
# Exactly one LLM call: extraction. Empty branch root means the
|
|
# picker is skipped (no children) and the merge step short-
|
|
# circuits before hitting the LLM.
|
|
assert mock_llm.call_count == 1
|
|
|
|
|
|
# ── Warm profile helpers ──────────────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestBuildWarmProfile:
|
|
"""build_warm_profile reads User + Directives branches."""
|
|
|
|
def test_empty_graph_returns_empty_sections(self, store):
|
|
profile = build_warm_profile(store)
|
|
assert profile == {"user": "", "directives": ""}
|
|
|
|
def test_collects_user_branch_only(self, store):
|
|
store.create_node(
|
|
name="Identity",
|
|
description="Who the user is",
|
|
data="User's name is Baris.",
|
|
parent_id=BRANCH_USER,
|
|
)
|
|
profile = build_warm_profile(store)
|
|
assert "Baris" in profile["user"]
|
|
assert profile["directives"] == ""
|
|
|
|
def test_collects_directives_branch_only(self, store):
|
|
store.create_node(
|
|
name="Tone",
|
|
description="Reply style",
|
|
data="Always reply briefly.",
|
|
parent_id=BRANCH_DIRECTIVES,
|
|
)
|
|
profile = build_warm_profile(store)
|
|
assert "briefly" in profile["directives"]
|
|
assert profile["user"] == ""
|
|
|
|
def test_ignores_world_branch(self, store):
|
|
store.create_node(
|
|
name="News",
|
|
description="External fact",
|
|
data="Paris is the capital of France.",
|
|
parent_id=BRANCH_WORLD,
|
|
)
|
|
profile = build_warm_profile(store)
|
|
assert profile["user"] == ""
|
|
assert profile["directives"] == ""
|
|
|
|
def test_respects_char_caps(self, store):
|
|
long_fact = "x" * 5000
|
|
store.create_node(
|
|
name="Long", description="d", data=long_fact, parent_id=BRANCH_USER,
|
|
)
|
|
profile = build_warm_profile(store, user_max_chars=200)
|
|
assert len(profile["user"]) <= 200
|
|
assert profile["user"].endswith("…")
|
|
|
|
def test_walks_branch_subtree(self, store):
|
|
child = store.create_node(
|
|
name="Sub", description="child of user",
|
|
data="User lives in Brighton.", parent_id=BRANCH_USER,
|
|
)
|
|
store.create_node(
|
|
name="Grandchild", description="deeper",
|
|
data="User moved in 2023.", parent_id=child.id,
|
|
)
|
|
profile = build_warm_profile(store)
|
|
assert "Brighton" in profile["user"]
|
|
assert "2023" in profile["user"]
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestFormatWarmProfileBlock:
|
|
"""format_warm_profile_block uses denial-template mirroring."""
|
|
|
|
def test_empty_profile_returns_empty_string(self):
|
|
assert format_warm_profile_block({"user": "", "directives": ""}) == ""
|
|
|
|
def test_user_only_omits_directives_heading(self):
|
|
out = format_warm_profile_block({"user": "Name is Baris.", "directives": ""})
|
|
assert "INFORMATION THE USER HAS SHARED" in out
|
|
assert "STANDING INSTRUCTIONS" not in out
|
|
assert "Baris" in out
|
|
|
|
def test_directives_only_omits_user_heading(self):
|
|
out = format_warm_profile_block({"user": "", "directives": "Reply briefly."})
|
|
assert "STANDING INSTRUCTIONS" in out
|
|
assert "INFORMATION THE USER HAS SHARED" not in out
|
|
assert "briefly" in out
|
|
|
|
def test_both_sections_rendered(self):
|
|
out = format_warm_profile_block(
|
|
{"user": "Name is Baris.", "directives": "Reply briefly."}
|
|
)
|
|
assert "INFORMATION THE USER HAS SHARED" in out
|
|
assert "STANDING INSTRUCTIONS" in out
|
|
# User section appears before directives
|
|
assert out.index("INFORMATION THE USER") < out.index("STANDING INSTRUCTIONS")
|
|
|
|
def test_whitespace_only_treated_as_empty(self):
|
|
assert format_warm_profile_block({"user": " \n", "directives": "\t"}) == ""
|