Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
This commit is contained in:
349
tests/test_diary_rewrite_sweep.py
Normal file
349
tests/test_diary_rewrite_sweep.py
Normal file
@@ -0,0 +1,349 @@
|
||||
"""Tests for ``rewrite_all_diary_summaries`` — the LLM-driven bulk sweep
|
||||
that walks every row in ``conversation_summaries`` and asks the chat model
|
||||
to remove deflection narration.
|
||||
|
||||
Replaces the regex-based scrub sweep tests in #366. The previous regex
|
||||
approach was English-only and accreted patterns whenever the model invented
|
||||
a new shape. The current sweep delegates the semantic check to the chat
|
||||
model itself, which is language-agnostic and improves automatically as
|
||||
models upgrade.
|
||||
|
||||
The contract under test:
|
||||
1. Walks every row, writes back rewritten text only when it changed.
|
||||
2. Preserves ``ts_utc`` on rewrite — the audit trail must survive cleanup.
|
||||
3. Empty rewrite → keep original, surface ``would_empty: true``.
|
||||
4. LLM failure on a row → row left untouched, sweep continues.
|
||||
5. Per-row write failure → row reported with ``error``, sweep continues.
|
||||
6. Re-embeds rewritten rows when an embed model is configured.
|
||||
7. Event payload contains counts/booleans only, never raw summary text.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from jarvis.memory import conversation as cmod
|
||||
from jarvis.memory.conversation import rewrite_all_diary_summaries
|
||||
from jarvis.memory.db import Database
|
||||
|
||||
|
||||
def _seed(db: Database, rows: list[tuple[str, str, str | None]]) -> None:
|
||||
"""Seed (date_utc, summary, topics) tuples into the DB."""
|
||||
for date_utc, summary, topics in rows:
|
||||
db.upsert_conversation_summary(
|
||||
date_utc=date_utc, summary=summary, topics=topics, source_app="jarvis",
|
||||
)
|
||||
|
||||
|
||||
class TestRewriteSweepBehaviour:
|
||||
def test_walks_every_row_and_rewrites_only_dirty_ones(self, tmp_path, monkeypatch):
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
_seed(db, [
|
||||
("2026-04-10", "The user asked X. The assistant could not help.", None),
|
||||
("2026-04-15", "The user prefers Celsius.", None),
|
||||
("2026-04-27", "The user asked Y. The assistant did not have info.", None),
|
||||
])
|
||||
|
||||
# Fake LLM: drop any sentence containing "the assistant".
|
||||
def fake_call(*args, **kwargs):
|
||||
text = args[3] if len(args) >= 4 else kwargs.get("user_prompt", "")
|
||||
for marker in ("<<<BEGIN UNTRUSTED WEB EXTRACT>>>", "<<<END UNTRUSTED WEB EXTRACT>>>"):
|
||||
text = text.replace(marker, "")
|
||||
text = text.replace("Return the cleaned text only.", "").strip()
|
||||
kept = [s.strip() for s in text.split(".") if s.strip() and "the assistant" not in s.lower()]
|
||||
return ". ".join(kept) + ("." if kept else "")
|
||||
|
||||
monkeypatch.setattr(cmod, "call_llm_direct", fake_call)
|
||||
|
||||
events = list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
assert len(events) == 3
|
||||
rewritten = [e for e in events if e["rewritten"]]
|
||||
assert len(rewritten) == 2
|
||||
|
||||
rows = {r["date_utc"]: r["summary"] for r in db.get_all_conversation_summaries()}
|
||||
assert "could not" not in rows["2026-04-10"].lower()
|
||||
assert "did not have" not in rows["2026-04-27"].lower()
|
||||
# Clean row is byte-identical to the seed.
|
||||
assert rows["2026-04-15"] == "The user prefers Celsius."
|
||||
|
||||
def test_preserves_ts_utc_on_rewrite(self, tmp_path, monkeypatch):
|
||||
"""A maintenance pass must not make cleaned rows look like new
|
||||
writes — the audit trail of when a row was *originally* authored
|
||||
is the only signal users have to verify diary provenance."""
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
_seed(db, [
|
||||
("2026-04-10", "User asked X. The assistant could not help.", None),
|
||||
])
|
||||
original_ts = db.get_all_conversation_summaries()[0]["ts_utc"]
|
||||
|
||||
# Sleep so a stomped ts_utc would be detectably different.
|
||||
time.sleep(1.1)
|
||||
|
||||
monkeypatch.setattr(
|
||||
cmod, "call_llm_direct",
|
||||
lambda *a, **k: "User asked X.",
|
||||
)
|
||||
list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
|
||||
new_ts = db.get_all_conversation_summaries()[0]["ts_utc"]
|
||||
assert new_ts == original_ts, (
|
||||
"ts_utc was stomped — audit trail is destroyed by a maintenance pass"
|
||||
)
|
||||
|
||||
def test_empty_rewrite_keeps_original_and_surfaces_would_empty(self, tmp_path, monkeypatch):
|
||||
"""If the model returns empty (entire row was deflection), keep
|
||||
the original. Empty diary entries are worse than slightly-leaky
|
||||
ones — retrieval treats absence as 'no record'."""
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
_seed(db, [
|
||||
("2026-04-10", "The assistant could not help. The assistant offered to search.", None),
|
||||
])
|
||||
|
||||
monkeypatch.setattr(cmod, "call_llm_direct", lambda *a, **k: "")
|
||||
events = list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
|
||||
assert len(events) == 1
|
||||
assert events[0]["would_empty"] is True
|
||||
assert events[0]["rewritten"] is False
|
||||
# Row must still be there with original content.
|
||||
rows = db.get_all_conversation_summaries()
|
||||
assert rows[0]["summary"].startswith("The assistant could not help")
|
||||
|
||||
def test_llm_failure_on_one_row_does_not_stop_sweep(self, tmp_path, monkeypatch):
|
||||
"""Per-row failure must be fail-open. The sweep continues with
|
||||
the remaining rows so a transient model hiccup on one date does
|
||||
not abandon the rest of the diary."""
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
_seed(db, [
|
||||
("2026-04-10", "User asked X. The assistant could not help.", None),
|
||||
("2026-04-15", "User asked Y. The assistant could not help.", None),
|
||||
("2026-04-27", "User asked Z. The assistant could not help.", None),
|
||||
])
|
||||
|
||||
calls = {"n": 0}
|
||||
|
||||
def flaky(*args, **kwargs):
|
||||
calls["n"] += 1
|
||||
if calls["n"] == 2:
|
||||
raise RuntimeError("ollama timeout")
|
||||
return "User asked something."
|
||||
|
||||
monkeypatch.setattr(cmod, "call_llm_direct", flaky)
|
||||
events = list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
|
||||
assert len(events) == 3
|
||||
errors = [e for e in events if e.get("error")]
|
||||
assert len(errors) == 1
|
||||
# Other two rows still got rewritten.
|
||||
rewritten = [e for e in events if e["rewritten"]]
|
||||
assert len(rewritten) == 2
|
||||
|
||||
def test_event_payload_contains_no_raw_summary_text(self, tmp_path, monkeypatch):
|
||||
"""Privacy contract: per-row events must contain only counts,
|
||||
booleans, and the date — never any portion of the diary text."""
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
sentinel = "thisIsAUniqueSentinelStringThatMustNotLeak"
|
||||
_seed(db, [
|
||||
("2026-04-10", f"User said {sentinel}. The assistant could not help.", None),
|
||||
])
|
||||
|
||||
monkeypatch.setattr(
|
||||
cmod, "call_llm_direct",
|
||||
lambda *a, **k: f"User said {sentinel}.",
|
||||
)
|
||||
events = list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
|
||||
for ev in events:
|
||||
for v in ev.values():
|
||||
assert sentinel not in str(v), (
|
||||
f"diary content leaked into event field: {ev}"
|
||||
)
|
||||
|
||||
def test_error_field_is_class_name_only_never_message(self, tmp_path, monkeypatch):
|
||||
"""Stringified exception messages can echo offending input back to
|
||||
the caller. The error field must be the class name only."""
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
sentinel = "secretDiaryTokenInExceptionMessage"
|
||||
_seed(db, [
|
||||
("2026-04-10", f"User said {sentinel}. The assistant could not help.", None),
|
||||
])
|
||||
|
||||
def boom(*a, **k):
|
||||
raise ValueError(f"oops {sentinel}")
|
||||
|
||||
monkeypatch.setattr(cmod, "call_llm_direct", boom)
|
||||
events = list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
|
||||
assert len(events) == 1
|
||||
assert events[0]["error"] == "RewriteFailed"
|
||||
for v in events[0].values():
|
||||
assert sentinel not in str(v)
|
||||
|
||||
def test_unchanged_rewrite_does_not_trigger_writeback(self, tmp_path, monkeypatch):
|
||||
"""If the LLM returns the input verbatim (clean row), no DB write
|
||||
happens and the embedding stays put. Equivalent of the topic
|
||||
optimiser's 'topics_changed=False → skip writeback' rule."""
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
_seed(db, [
|
||||
("2026-04-15", "The user prefers Celsius.", None),
|
||||
])
|
||||
original_ts = db.get_all_conversation_summaries()[0]["ts_utc"]
|
||||
|
||||
time.sleep(1.1)
|
||||
|
||||
monkeypatch.setattr(
|
||||
cmod, "call_llm_direct",
|
||||
lambda *a, **k: "The user prefers Celsius.",
|
||||
)
|
||||
events = list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
|
||||
assert events[0]["rewritten"] is False
|
||||
# ts_utc must not have changed since no write happened.
|
||||
assert db.get_all_conversation_summaries()[0]["ts_utc"] == original_ts
|
||||
|
||||
def test_handles_empty_diary_without_calling_llm(self, tmp_path, monkeypatch):
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
|
||||
called = {"n": 0}
|
||||
|
||||
def tracker(*a, **k):
|
||||
called["n"] += 1
|
||||
return ""
|
||||
|
||||
monkeypatch.setattr(cmod, "call_llm_direct", tracker)
|
||||
events = list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
|
||||
assert events == []
|
||||
assert called["n"] == 0
|
||||
|
||||
def test_strips_markdown_fences_from_model_output(self, tmp_path, monkeypatch):
|
||||
"""Some models wrap output in ```text fences despite instructions.
|
||||
The sweep must strip them so the persisted summary is plain text."""
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
_seed(db, [
|
||||
("2026-04-10", "User asked X. The assistant could not help.", None),
|
||||
])
|
||||
|
||||
monkeypatch.setattr(
|
||||
cmod, "call_llm_direct",
|
||||
lambda *a, **k: "```\nUser asked X.\n```",
|
||||
)
|
||||
list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
|
||||
persisted = db.get_all_conversation_summaries()[0]["summary"]
|
||||
assert persisted == "User asked X."
|
||||
assert "```" not in persisted
|
||||
|
||||
def test_strips_single_line_backtick_wrap(self, tmp_path, monkeypatch):
|
||||
r"""Regression: the previous regex strip treated ``\`\`\`X\`\`\``` as
|
||||
one giant opening fence and consumed the whole response, tripping
|
||||
the empty-rewrite guard and dropping a perfectly good rewrite.
|
||||
The fix unwraps both single-line and multi-line fence shapes."""
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
_seed(db, [
|
||||
("2026-04-10", "User asked X. The assistant could not help.", None),
|
||||
])
|
||||
|
||||
monkeypatch.setattr(
|
||||
cmod, "call_llm_direct",
|
||||
lambda *a, **k: "```User asked X.```",
|
||||
)
|
||||
events = list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
|
||||
# The rewrite must land — not get dropped via the would_empty guard.
|
||||
assert events[0]["rewritten"] is True
|
||||
assert events[0]["would_empty"] is False
|
||||
persisted = db.get_all_conversation_summaries()[0]["summary"]
|
||||
assert persisted == "User asked X."
|
||||
|
||||
def test_strips_language_tagged_fences(self, tmp_path, monkeypatch):
|
||||
"""Models often emit ```text\\n...\\n``` despite being told no
|
||||
markdown. The language tag (anything between the opening ``` and
|
||||
the first newline) must be dropped along with the fence."""
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
_seed(db, [
|
||||
("2026-04-10", "User asked X. The assistant could not help.", None),
|
||||
])
|
||||
|
||||
monkeypatch.setattr(
|
||||
cmod, "call_llm_direct",
|
||||
lambda *a, **k: "```text\nUser asked X.\n```",
|
||||
)
|
||||
list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
|
||||
persisted = db.get_all_conversation_summaries()[0]["summary"]
|
||||
assert persisted == "User asked X."
|
||||
|
||||
def test_strips_echoed_untrusted_fence_markers(self, tmp_path, monkeypatch):
|
||||
"""The diary text is wrapped in ``<<<BEGIN UNTRUSTED WEB EXTRACT>>>``
|
||||
markers before being passed to the model (treat-as-data framing).
|
||||
Some models echo those markers back. They must be stripped so the
|
||||
markers don't end up persisted in the diary."""
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
_seed(db, [
|
||||
("2026-04-10", "User asked X. The assistant could not help.", None),
|
||||
])
|
||||
|
||||
monkeypatch.setattr(
|
||||
cmod, "call_llm_direct",
|
||||
lambda *a, **k: (
|
||||
"<<<BEGIN UNTRUSTED WEB EXTRACT>>>\n"
|
||||
"User asked X.\n"
|
||||
"<<<END UNTRUSTED WEB EXTRACT>>>"
|
||||
),
|
||||
)
|
||||
list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
|
||||
persisted = db.get_all_conversation_summaries()[0]["summary"]
|
||||
assert persisted == "User asked X."
|
||||
assert "BEGIN UNTRUSTED" not in persisted
|
||||
assert "END UNTRUSTED" not in persisted
|
||||
|
||||
def test_whitespace_only_difference_is_treated_as_no_change(self, tmp_path, monkeypatch):
|
||||
"""Idempotence: the LLM may return content with different leading/
|
||||
trailing whitespace. We compare stripped texts, so this should not
|
||||
trigger a writeback (no embedding refresh, ts_utc preserved)."""
|
||||
db = Database(tmp_path / "jarvis.db")
|
||||
_seed(db, [
|
||||
("2026-04-15", "The user prefers Celsius.", None),
|
||||
])
|
||||
original_ts = db.get_all_conversation_summaries()[0]["ts_utc"]
|
||||
|
||||
time.sleep(1.1)
|
||||
|
||||
monkeypatch.setattr(
|
||||
cmod, "call_llm_direct",
|
||||
lambda *a, **k: " The user prefers Celsius. \n",
|
||||
)
|
||||
events = list(rewrite_all_diary_summaries(
|
||||
db, ollama_base_url="http://localhost", ollama_chat_model="test",
|
||||
))
|
||||
|
||||
assert events[0]["rewritten"] is False
|
||||
assert db.get_all_conversation_summaries()[0]["ts_utc"] == original_ts
|
||||
Reference in New Issue
Block a user