Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled

Transform isair/jarvis into a Discord-controlled voice assistant running on
the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact.

- bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral),
  voice channel join + voice receive/playback, pluggable VNC screen broadcast
  (selfbot live / noVNC / screenshot)
- bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS
  behind a thin localhost HTTP API
- .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite,
  docs/language-comparison.md and docs/vnc-xfce-setup.md

Language decision: hybrid (Python brain + Node/bun Discord layer) because
Discord blocks bot video; native screen broadcast only works via a Node
selfbot library.
This commit is contained in:
javis-bot
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions

View File

@@ -0,0 +1,349 @@
"""Tests for ``rewrite_all_diary_summaries`` — the LLM-driven bulk sweep
that walks every row in ``conversation_summaries`` and asks the chat model
to remove deflection narration.
Replaces the regex-based scrub sweep tests in #366. The previous regex
approach was English-only and accreted patterns whenever the model invented
a new shape. The current sweep delegates the semantic check to the chat
model itself, which is language-agnostic and improves automatically as
models upgrade.
The contract under test:
1. Walks every row, writes back rewritten text only when it changed.
2. Preserves ``ts_utc`` on rewrite — the audit trail must survive cleanup.
3. Empty rewrite → keep original, surface ``would_empty: true``.
4. LLM failure on a row → row left untouched, sweep continues.
5. Per-row write failure → row reported with ``error``, sweep continues.
6. Re-embeds rewritten rows when an embed model is configured.
7. Event payload contains counts/booleans only, never raw summary text.
"""
from __future__ import annotations
import time
import pytest
from jarvis.memory import conversation as cmod
from jarvis.memory.conversation import rewrite_all_diary_summaries
from jarvis.memory.db import Database
def _seed(db: Database, rows: list[tuple[str, str, str | None]]) -> None:
"""Seed (date_utc, summary, topics) tuples into the DB."""
for date_utc, summary, topics in rows:
db.upsert_conversation_summary(
date_utc=date_utc, summary=summary, topics=topics, source_app="jarvis",
)
class TestRewriteSweepBehaviour:
def test_walks_every_row_and_rewrites_only_dirty_ones(self, tmp_path, monkeypatch):
db = Database(tmp_path / "jarvis.db")
_seed(db, [
("2026-04-10", "The user asked X. The assistant could not help.", None),
("2026-04-15", "The user prefers Celsius.", None),
("2026-04-27", "The user asked Y. The assistant did not have info.", None),
])
# Fake LLM: drop any sentence containing "the assistant".
def fake_call(*args, **kwargs):
text = args[3] if len(args) >= 4 else kwargs.get("user_prompt", "")
for marker in ("<<<BEGIN UNTRUSTED WEB EXTRACT>>>", "<<<END UNTRUSTED WEB EXTRACT>>>"):
text = text.replace(marker, "")
text = text.replace("Return the cleaned text only.", "").strip()
kept = [s.strip() for s in text.split(".") if s.strip() and "the assistant" not in s.lower()]
return ". ".join(kept) + ("." if kept else "")
monkeypatch.setattr(cmod, "call_llm_direct", fake_call)
events = list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
assert len(events) == 3
rewritten = [e for e in events if e["rewritten"]]
assert len(rewritten) == 2
rows = {r["date_utc"]: r["summary"] for r in db.get_all_conversation_summaries()}
assert "could not" not in rows["2026-04-10"].lower()
assert "did not have" not in rows["2026-04-27"].lower()
# Clean row is byte-identical to the seed.
assert rows["2026-04-15"] == "The user prefers Celsius."
def test_preserves_ts_utc_on_rewrite(self, tmp_path, monkeypatch):
"""A maintenance pass must not make cleaned rows look like new
writes — the audit trail of when a row was *originally* authored
is the only signal users have to verify diary provenance."""
db = Database(tmp_path / "jarvis.db")
_seed(db, [
("2026-04-10", "User asked X. The assistant could not help.", None),
])
original_ts = db.get_all_conversation_summaries()[0]["ts_utc"]
# Sleep so a stomped ts_utc would be detectably different.
time.sleep(1.1)
monkeypatch.setattr(
cmod, "call_llm_direct",
lambda *a, **k: "User asked X.",
)
list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
new_ts = db.get_all_conversation_summaries()[0]["ts_utc"]
assert new_ts == original_ts, (
"ts_utc was stomped — audit trail is destroyed by a maintenance pass"
)
def test_empty_rewrite_keeps_original_and_surfaces_would_empty(self, tmp_path, monkeypatch):
"""If the model returns empty (entire row was deflection), keep
the original. Empty diary entries are worse than slightly-leaky
ones — retrieval treats absence as 'no record'."""
db = Database(tmp_path / "jarvis.db")
_seed(db, [
("2026-04-10", "The assistant could not help. The assistant offered to search.", None),
])
monkeypatch.setattr(cmod, "call_llm_direct", lambda *a, **k: "")
events = list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
assert len(events) == 1
assert events[0]["would_empty"] is True
assert events[0]["rewritten"] is False
# Row must still be there with original content.
rows = db.get_all_conversation_summaries()
assert rows[0]["summary"].startswith("The assistant could not help")
def test_llm_failure_on_one_row_does_not_stop_sweep(self, tmp_path, monkeypatch):
"""Per-row failure must be fail-open. The sweep continues with
the remaining rows so a transient model hiccup on one date does
not abandon the rest of the diary."""
db = Database(tmp_path / "jarvis.db")
_seed(db, [
("2026-04-10", "User asked X. The assistant could not help.", None),
("2026-04-15", "User asked Y. The assistant could not help.", None),
("2026-04-27", "User asked Z. The assistant could not help.", None),
])
calls = {"n": 0}
def flaky(*args, **kwargs):
calls["n"] += 1
if calls["n"] == 2:
raise RuntimeError("ollama timeout")
return "User asked something."
monkeypatch.setattr(cmod, "call_llm_direct", flaky)
events = list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
assert len(events) == 3
errors = [e for e in events if e.get("error")]
assert len(errors) == 1
# Other two rows still got rewritten.
rewritten = [e for e in events if e["rewritten"]]
assert len(rewritten) == 2
def test_event_payload_contains_no_raw_summary_text(self, tmp_path, monkeypatch):
"""Privacy contract: per-row events must contain only counts,
booleans, and the date — never any portion of the diary text."""
db = Database(tmp_path / "jarvis.db")
sentinel = "thisIsAUniqueSentinelStringThatMustNotLeak"
_seed(db, [
("2026-04-10", f"User said {sentinel}. The assistant could not help.", None),
])
monkeypatch.setattr(
cmod, "call_llm_direct",
lambda *a, **k: f"User said {sentinel}.",
)
events = list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
for ev in events:
for v in ev.values():
assert sentinel not in str(v), (
f"diary content leaked into event field: {ev}"
)
def test_error_field_is_class_name_only_never_message(self, tmp_path, monkeypatch):
"""Stringified exception messages can echo offending input back to
the caller. The error field must be the class name only."""
db = Database(tmp_path / "jarvis.db")
sentinel = "secretDiaryTokenInExceptionMessage"
_seed(db, [
("2026-04-10", f"User said {sentinel}. The assistant could not help.", None),
])
def boom(*a, **k):
raise ValueError(f"oops {sentinel}")
monkeypatch.setattr(cmod, "call_llm_direct", boom)
events = list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
assert len(events) == 1
assert events[0]["error"] == "RewriteFailed"
for v in events[0].values():
assert sentinel not in str(v)
def test_unchanged_rewrite_does_not_trigger_writeback(self, tmp_path, monkeypatch):
"""If the LLM returns the input verbatim (clean row), no DB write
happens and the embedding stays put. Equivalent of the topic
optimiser's 'topics_changed=False → skip writeback' rule."""
db = Database(tmp_path / "jarvis.db")
_seed(db, [
("2026-04-15", "The user prefers Celsius.", None),
])
original_ts = db.get_all_conversation_summaries()[0]["ts_utc"]
time.sleep(1.1)
monkeypatch.setattr(
cmod, "call_llm_direct",
lambda *a, **k: "The user prefers Celsius.",
)
events = list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
assert events[0]["rewritten"] is False
# ts_utc must not have changed since no write happened.
assert db.get_all_conversation_summaries()[0]["ts_utc"] == original_ts
def test_handles_empty_diary_without_calling_llm(self, tmp_path, monkeypatch):
db = Database(tmp_path / "jarvis.db")
called = {"n": 0}
def tracker(*a, **k):
called["n"] += 1
return ""
monkeypatch.setattr(cmod, "call_llm_direct", tracker)
events = list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
assert events == []
assert called["n"] == 0
def test_strips_markdown_fences_from_model_output(self, tmp_path, monkeypatch):
"""Some models wrap output in ```text fences despite instructions.
The sweep must strip them so the persisted summary is plain text."""
db = Database(tmp_path / "jarvis.db")
_seed(db, [
("2026-04-10", "User asked X. The assistant could not help.", None),
])
monkeypatch.setattr(
cmod, "call_llm_direct",
lambda *a, **k: "```\nUser asked X.\n```",
)
list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
persisted = db.get_all_conversation_summaries()[0]["summary"]
assert persisted == "User asked X."
assert "```" not in persisted
def test_strips_single_line_backtick_wrap(self, tmp_path, monkeypatch):
r"""Regression: the previous regex strip treated ``\`\`\`X\`\`\``` as
one giant opening fence and consumed the whole response, tripping
the empty-rewrite guard and dropping a perfectly good rewrite.
The fix unwraps both single-line and multi-line fence shapes."""
db = Database(tmp_path / "jarvis.db")
_seed(db, [
("2026-04-10", "User asked X. The assistant could not help.", None),
])
monkeypatch.setattr(
cmod, "call_llm_direct",
lambda *a, **k: "```User asked X.```",
)
events = list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
# The rewrite must land — not get dropped via the would_empty guard.
assert events[0]["rewritten"] is True
assert events[0]["would_empty"] is False
persisted = db.get_all_conversation_summaries()[0]["summary"]
assert persisted == "User asked X."
def test_strips_language_tagged_fences(self, tmp_path, monkeypatch):
"""Models often emit ```text\\n...\\n``` despite being told no
markdown. The language tag (anything between the opening ``` and
the first newline) must be dropped along with the fence."""
db = Database(tmp_path / "jarvis.db")
_seed(db, [
("2026-04-10", "User asked X. The assistant could not help.", None),
])
monkeypatch.setattr(
cmod, "call_llm_direct",
lambda *a, **k: "```text\nUser asked X.\n```",
)
list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
persisted = db.get_all_conversation_summaries()[0]["summary"]
assert persisted == "User asked X."
def test_strips_echoed_untrusted_fence_markers(self, tmp_path, monkeypatch):
"""The diary text is wrapped in ``<<<BEGIN UNTRUSTED WEB EXTRACT>>>``
markers before being passed to the model (treat-as-data framing).
Some models echo those markers back. They must be stripped so the
markers don't end up persisted in the diary."""
db = Database(tmp_path / "jarvis.db")
_seed(db, [
("2026-04-10", "User asked X. The assistant could not help.", None),
])
monkeypatch.setattr(
cmod, "call_llm_direct",
lambda *a, **k: (
"<<<BEGIN UNTRUSTED WEB EXTRACT>>>\n"
"User asked X.\n"
"<<<END UNTRUSTED WEB EXTRACT>>>"
),
)
list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
persisted = db.get_all_conversation_summaries()[0]["summary"]
assert persisted == "User asked X."
assert "BEGIN UNTRUSTED" not in persisted
assert "END UNTRUSTED" not in persisted
def test_whitespace_only_difference_is_treated_as_no_change(self, tmp_path, monkeypatch):
"""Idempotence: the LLM may return content with different leading/
trailing whitespace. We compare stripped texts, so this should not
trigger a writeback (no embedding refresh, ts_utc preserved)."""
db = Database(tmp_path / "jarvis.db")
_seed(db, [
("2026-04-15", "The user prefers Celsius.", None),
])
original_ts = db.get_all_conversation_summaries()[0]["ts_utc"]
time.sleep(1.1)
monkeypatch.setattr(
cmod, "call_llm_direct",
lambda *a, **k: " The user prefers Celsius. \n",
)
events = list(rewrite_all_diary_summaries(
db, ollama_base_url="http://localhost", ollama_chat_model="test",
))
assert events[0]["rewritten"] is False
assert db.get_all_conversation_summaries()[0]["ts_utc"] == original_ts