Files
javis_bot/src/jarvis/memory/db.py
javis-bot c4abf63f38
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Transform isair/jarvis into a Discord-controlled voice assistant running on
the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact.

- bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral),
  voice channel join + voice receive/playback, pluggable VNC screen broadcast
  (selfbot live / noVNC / screenshot)
- bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS
  behind a thin localhost HTTP API
- .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite,
  docs/language-comparison.md and docs/vnc-xfce-setup.md

Language decision: hybrid (Python brain + Node/bun Discord layer) because
Discord blocks bot video; native screen broadcast only works via a Node
selfbot library.
2026-06-09 14:51:05 +09:00

443 lines
17 KiB
Python

from __future__ import annotations
import sqlite3
import re
from typing import Sequence, Optional
from pathlib import Path
import threading
from datetime import datetime, timezone
from ..debug import debug_log
_SCHEMA_SQL = """
PRAGMA journal_mode=WAL;
PRAGMA synchronous=NORMAL;
-- Structured meals log (optional feature)
CREATE TABLE IF NOT EXISTS meals (
id INTEGER PRIMARY KEY,
ts_utc TEXT NOT NULL,
source_app TEXT NOT NULL,
description TEXT NOT NULL,
calories_kcal REAL,
protein_g REAL,
carbs_g REAL,
fat_g REAL,
fiber_g REAL,
sugar_g REAL,
sodium_mg REAL,
potassium_mg REAL,
micros_json TEXT,
confidence REAL
);
-- Conversation summaries for diary/memory system
CREATE TABLE IF NOT EXISTS conversation_summaries (
id INTEGER PRIMARY KEY,
date_utc TEXT NOT NULL, -- YYYY-MM-DD format
ts_utc TEXT NOT NULL, -- When summary was created
summary TEXT NOT NULL, -- Concise summary of the day's conversations
topics TEXT, -- Comma-separated list of main topics discussed
source_app TEXT NOT NULL, -- Source app that generated the conversation
UNIQUE(date_utc, source_app)
);
CREATE VIRTUAL TABLE IF NOT EXISTS summaries_fts USING fts5(
summary,
topics,
content='conversation_summaries',
content_rowid='id',
tokenize='porter'
);
-- Triggers for conversation summaries FTS
CREATE TRIGGER IF NOT EXISTS summaries_ai AFTER INSERT ON conversation_summaries BEGIN
INSERT INTO summaries_fts(rowid, summary, topics) VALUES (new.id, new.summary, new.topics);
END;
CREATE TRIGGER IF NOT EXISTS summaries_ad AFTER DELETE ON conversation_summaries BEGIN
INSERT INTO summaries_fts(summaries_fts, rowid, summary, topics) VALUES('delete', old.id, old.summary, old.topics);
END;
CREATE TRIGGER IF NOT EXISTS summaries_au AFTER UPDATE ON conversation_summaries BEGIN
INSERT INTO summaries_fts(summaries_fts, rowid, summary, topics) VALUES('delete', old.id, old.summary, old.topics);
INSERT INTO summaries_fts(rowid, summary, topics) VALUES (new.id, new.summary, new.topics);
END;
"""
_VSS_SCHEMA_SQL = """
CREATE VIRTUAL TABLE IF NOT EXISTS embeddings USING vss0(
id INTEGER PRIMARY KEY,
vec FLOAT[768]
);
CREATE TABLE IF NOT EXISTS summary_vec (
summary_id INTEGER PRIMARY KEY REFERENCES conversation_summaries(id) ON DELETE CASCADE,
emb_id INTEGER NOT NULL REFERENCES embeddings(id)
);
"""
def _normalize_fts_query(raw: str) -> str:
# Use improved fuzzy search query generation
try:
from .fuzzy_search import generate_flexible_fts_query
flexible_query = generate_flexible_fts_query(raw)
if flexible_query:
return flexible_query
except ImportError:
pass
# Fallback: Extract alphanumeric tokens and join them with spaces (logical AND)
tokens = re.findall(r"[A-Za-z0-9_]+", raw)
return " ".join(tokens)
class Database:
def __init__(self, db_path: str, sqlite_vss_path: Optional[str] = None) -> None:
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
self.db_path = db_path
self.conn = sqlite3.connect(db_path, check_same_thread=False)
self.conn.row_factory = sqlite3.Row
self._lock = threading.RLock()
self.is_vss_enabled = False
self._python_vector_store = None
if sqlite_vss_path:
try:
self.conn.enable_load_extension(True)
self.conn.load_extension(sqlite_vss_path)
self.is_vss_enabled = True
except Exception:
self.is_vss_enabled = False
# If sqlite-vss is not available, use best available vector store (FAISS or Python fallback)
if not self.is_vss_enabled:
from ..utils.vector_store import get_best_vector_store
self._python_vector_store = get_best_vector_store(db_path, dimension=768)
# Log which vector store implementation is being used
import sys
store_type = type(self._python_vector_store).__name__
if store_type == "FAISSVectorStore":
debug_log("Using FAISS vector store for fast search", "jarvis")
else:
debug_log("Using Python fallback vector store", "jarvis")
self._init_schema()
def _init_schema(self) -> None:
with self._lock:
cur = self.conn.cursor()
cur.executescript(_SCHEMA_SQL)
if self.is_vss_enabled:
cur.executescript(_VSS_SCHEMA_SQL)
self.conn.commit()
def search_hybrid(self, fts_query: str, query_vec_json: Optional[str], top_k: int = 8) -> list[sqlite3.Row]:
with self._lock:
cur = self.conn.cursor()
safe_q = _normalize_fts_query(fts_query)
# Use Python vector store if sqlite-vss is not available
if not self.is_vss_enabled and self._python_vector_store and query_vec_json is not None and safe_q:
# Parse query vector
import json as _json
query_vec = _json.loads(query_vec_json)
# Get vector search results (use max of top_k*3 and 50 for good hybrid scoring)
vector_search_limit = max(top_k * 3, 50)
vector_results = self._python_vector_store.search(query_vec, top_k=vector_search_limit)
# Get FTS results (use max of top_k*3 and 50 for good hybrid scoring)
fts_search_limit = max(top_k * 3, 50)
fts_sql = f"""
SELECT s.id, bm25(summaries_fts) AS bm
FROM summaries_fts
JOIN conversation_summaries s ON s.id = summaries_fts.rowid
WHERE summaries_fts MATCH ?
ORDER BY bm
LIMIT {fts_search_limit}
"""
fts_rows = cur.execute(fts_sql, (safe_q,)).fetchall()
fts_scores = {row['id']: row['bm'] for row in fts_rows}
# Combine scores
combined_scores = {}
# Add vector scores (60% weight)
for summary_id, distance in vector_results:
combined_scores[summary_id] = (1.0 / (1.0 + distance)) * 0.6
# Add FTS scores (40% weight)
for summary_id, bm_score in fts_scores.items():
if summary_id in combined_scores:
combined_scores[summary_id] += (1.0 / (1.0 + bm_score)) * 0.4
else:
combined_scores[summary_id] = (1.0 / (1.0 + bm_score)) * 0.4
# Sort by combined score and fetch summaries
sorted_ids = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:top_k]
if sorted_ids:
# Fetch summaries for top results
placeholders = ','.join('?' * len(sorted_ids))
summary_sql = f"""
SELECT s.id,
'[' || s.date_utc || '] ' || s.summary || ' (Topics: ' || COALESCE(s.topics, '') || ')' AS text,
'summary' AS result_type
FROM conversation_summaries s
WHERE s.id IN ({placeholders})
"""
rows = cur.execute(summary_sql, [sid for sid, _ in sorted_ids]).fetchall()
# Create result rows with scores
results = []
id_to_score = {sid: score for sid, score in sorted_ids}
for row in rows:
# Create a new row dict with score
result = dict(row)
result['score'] = id_to_score.get(row['id'], 0.0)
results.append(result)
# Sort by score again (in case DB returned in different order)
results.sort(key=lambda x: x['score'], reverse=True)
return results
else:
return []
elif self.is_vss_enabled and query_vec_json is not None and safe_q:
# Hybrid search: 60% vector similarity (semantic) + 40% FTS (exact terms)
# This balances finding semantically related content with keyword matches
# Use dynamic limits for efficiency on large datasets
search_limit = max(top_k * 3, 50)
summary_sql = f"""
WITH fts_sum AS (
SELECT s.id, bm25(summaries_fts) AS bm
FROM summaries_fts
JOIN conversation_summaries s ON s.id = summaries_fts.rowid
WHERE summaries_fts MATCH ?
ORDER BY bm LIMIT {search_limit}
),
v_sum AS (
SELECT sv.summary_id AS id, distance
FROM vss_search(embeddings, 'vec', ?)
JOIN summary_vec sv ON sv.emb_id = rowid
LIMIT {search_limit}
)
SELECT s.id, (
(1.0/(1.0+COALESCE(v_sum.distance, 1))) * 0.6 +
(1.0/(1.0+COALESCE(fts_sum.bm, 10))) * 0.4
) AS score,
'[' || s.date_utc || '] ' || s.summary || ' (Topics: ' || COALESCE(s.topics, '') || ')' AS text,
'summary' AS result_type
FROM conversation_summaries s
LEFT JOIN v_sum ON v_sum.id = s.id
LEFT JOIN fts_sum ON fts_sum.id = s.id
WHERE v_sum.id IS NOT NULL OR fts_sum.id IS NOT NULL
ORDER BY score DESC
LIMIT {int(top_k)};
"""
rows = cur.execute(summary_sql, (safe_q, query_vec_json)).fetchall()
elif safe_q:
# FTS-only search over conversation summaries
summary_sql = f"""
SELECT s.id, bm25(summaries_fts) AS score,
'[' || s.date_utc || '] ' || s.summary || ' (Topics: ' || COALESCE(s.topics, '') || ')' AS text,
'summary' AS result_type
FROM summaries_fts
JOIN conversation_summaries s ON s.id = summaries_fts.rowid
WHERE summaries_fts MATCH ?
ORDER BY score
LIMIT {int(top_k)};
"""
rows = cur.execute(summary_sql, (safe_q,)).fetchall()
else:
# Fallback: latest conversation summaries
summary_sql = f"""
SELECT id, 0.0 AS score,
'[' || date_utc || '] ' || summary || ' (Topics: ' || COALESCE(topics, '') || ')' AS text,
'summary' AS result_type
FROM conversation_summaries
ORDER BY date_utc DESC
LIMIT {int(top_k)};
"""
rows = cur.execute(summary_sql).fetchall()
return rows
@staticmethod
def _pack_vector(vec: Sequence[float]) -> bytes:
# SQLite-vss expects a float array; packing via array('f') ensures binary blob layout.
import array
arr = array.array('f', [float(x) for x in vec])
return arr.tobytes()
# --- Meals API ---
def insert_meal(
self,
ts_utc: str,
source_app: str,
description: str,
calories_kcal: Optional[float] = None,
protein_g: Optional[float] = None,
carbs_g: Optional[float] = None,
fat_g: Optional[float] = None,
fiber_g: Optional[float] = None,
sugar_g: Optional[float] = None,
sodium_mg: Optional[float] = None,
potassium_mg: Optional[float] = None,
micros_json: Optional[str] = None,
confidence: Optional[float] = None,
) -> int:
with self._lock:
cur = self.conn.cursor()
cur.execute(
"""
INSERT INTO meals(ts_utc, source_app, description, calories_kcal, protein_g, carbs_g, fat_g, fiber_g, sugar_g, sodium_mg, potassium_mg, micros_json, confidence)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
ts_utc,
source_app,
description,
calories_kcal,
protein_g,
carbs_g,
fat_g,
fiber_g,
sugar_g,
sodium_mg,
potassium_mg,
micros_json,
confidence,
),
)
self.conn.commit()
return int(cur.lastrowid)
def get_meals_between(self, ts_utc_min: str, ts_utc_max: str) -> list[sqlite3.Row]:
with self._lock:
cur = self.conn.cursor()
rows = cur.execute(
"""
SELECT * FROM meals
WHERE ts_utc >= ? AND ts_utc <= ?
ORDER BY ts_utc ASC
""",
(ts_utc_min, ts_utc_max),
).fetchall()
return rows
def delete_meal(self, meal_id: int) -> bool:
with self._lock:
cur = self.conn.cursor()
cur.execute("DELETE FROM meals WHERE id = ?", (meal_id,))
self.conn.commit()
return cur.rowcount > 0
# --- Conversation Summaries API ---
def upsert_conversation_summary(
self,
date_utc: str, # YYYY-MM-DD format
summary: str,
topics: Optional[str] = None,
source_app: str = "jarvis",
ts_utc: Optional[str] = None,
) -> int:
"""Insert or update a conversation summary for a given date.
``ts_utc`` defaults to "now". Maintenance ops that rewrite an
existing row's content without changing what it represents (e.g.
the deflection scrub bulk sweep) should pass through the row's
original ``ts_utc`` so the audit trail is preserved.
"""
if ts_utc is None:
ts_utc = datetime.now(timezone.utc).isoformat()
with self._lock:
cur = self.conn.cursor()
cur.execute(
"""
INSERT OR REPLACE INTO conversation_summaries(date_utc, ts_utc, summary, topics, source_app)
VALUES (?, ?, ?, ?, ?)
""",
(date_utc, ts_utc, summary, topics, source_app),
)
self.conn.commit()
return int(cur.lastrowid)
def get_conversation_summary(self, date_utc: str, source_app: str = "jarvis") -> Optional[sqlite3.Row]:
"""Get conversation summary for a specific date."""
with self._lock:
cur = self.conn.cursor()
row = cur.execute(
"""
SELECT * FROM conversation_summaries
WHERE date_utc = ? AND source_app = ?
""",
(date_utc, source_app),
).fetchone()
return row
def get_recent_conversation_summaries(self, days: int = 7) -> list[sqlite3.Row]:
"""Get conversation summaries from the last N days."""
from datetime import datetime, timedelta, timezone
cutoff_date = (datetime.now(timezone.utc) - timedelta(days=days)).date().isoformat()
with self._lock:
cur = self.conn.cursor()
rows = cur.execute(
"""
SELECT * FROM conversation_summaries
WHERE date_utc >= ?
ORDER BY date_utc DESC
""",
(cutoff_date,),
).fetchall()
return rows
def get_all_conversation_summaries(self) -> list[sqlite3.Row]:
"""Get all conversation summaries, ordered by date ascending (oldest first).
Used for bulk import into graph memory — processes diary entries
chronologically so the graph builds up naturally.
"""
with self._lock:
cur = self.conn.cursor()
rows = cur.execute(
"""
SELECT * FROM conversation_summaries
ORDER BY date_utc ASC
""",
).fetchall()
return rows
def upsert_summary_embedding(self, summary_id: int, vec: Sequence[float]) -> Optional[int]:
"""Store or update embedding for a conversation summary."""
if self.is_vss_enabled:
# Use sqlite-vss
with self._lock:
cur = self.conn.cursor()
cur.execute("INSERT INTO embeddings(vec) VALUES (?)", (sqlite3.Binary(self._pack_vector(vec)),))
emb_id = cur.lastrowid
cur.execute(
"INSERT OR REPLACE INTO summary_vec(summary_id, emb_id) VALUES (?, ?)",
(summary_id, emb_id),
)
self.conn.commit()
return int(emb_id)
elif self._python_vector_store:
# Use Python vector store
self._python_vector_store.add_vector(summary_id, list(vec))
return summary_id # Return summary_id as a placeholder for emb_id
else:
return None
def close(self) -> None:
try:
with self._lock:
self.conn.close()
except Exception:
pass