Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
This commit is contained in:
442
src/jarvis/memory/db.py
Normal file
442
src/jarvis/memory/db.py
Normal file
@@ -0,0 +1,442 @@
|
||||
from __future__ import annotations
|
||||
import sqlite3
|
||||
import re
|
||||
from typing import Sequence, Optional
|
||||
from pathlib import Path
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from ..debug import debug_log
|
||||
|
||||
_SCHEMA_SQL = """
|
||||
PRAGMA journal_mode=WAL;
|
||||
PRAGMA synchronous=NORMAL;
|
||||
|
||||
-- Structured meals log (optional feature)
|
||||
CREATE TABLE IF NOT EXISTS meals (
|
||||
id INTEGER PRIMARY KEY,
|
||||
ts_utc TEXT NOT NULL,
|
||||
source_app TEXT NOT NULL,
|
||||
description TEXT NOT NULL,
|
||||
calories_kcal REAL,
|
||||
protein_g REAL,
|
||||
carbs_g REAL,
|
||||
fat_g REAL,
|
||||
fiber_g REAL,
|
||||
sugar_g REAL,
|
||||
sodium_mg REAL,
|
||||
potassium_mg REAL,
|
||||
micros_json TEXT,
|
||||
confidence REAL
|
||||
);
|
||||
|
||||
-- Conversation summaries for diary/memory system
|
||||
CREATE TABLE IF NOT EXISTS conversation_summaries (
|
||||
id INTEGER PRIMARY KEY,
|
||||
date_utc TEXT NOT NULL, -- YYYY-MM-DD format
|
||||
ts_utc TEXT NOT NULL, -- When summary was created
|
||||
summary TEXT NOT NULL, -- Concise summary of the day's conversations
|
||||
topics TEXT, -- Comma-separated list of main topics discussed
|
||||
source_app TEXT NOT NULL, -- Source app that generated the conversation
|
||||
UNIQUE(date_utc, source_app)
|
||||
);
|
||||
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS summaries_fts USING fts5(
|
||||
summary,
|
||||
topics,
|
||||
content='conversation_summaries',
|
||||
content_rowid='id',
|
||||
tokenize='porter'
|
||||
);
|
||||
|
||||
-- Triggers for conversation summaries FTS
|
||||
CREATE TRIGGER IF NOT EXISTS summaries_ai AFTER INSERT ON conversation_summaries BEGIN
|
||||
INSERT INTO summaries_fts(rowid, summary, topics) VALUES (new.id, new.summary, new.topics);
|
||||
END;
|
||||
CREATE TRIGGER IF NOT EXISTS summaries_ad AFTER DELETE ON conversation_summaries BEGIN
|
||||
INSERT INTO summaries_fts(summaries_fts, rowid, summary, topics) VALUES('delete', old.id, old.summary, old.topics);
|
||||
END;
|
||||
CREATE TRIGGER IF NOT EXISTS summaries_au AFTER UPDATE ON conversation_summaries BEGIN
|
||||
INSERT INTO summaries_fts(summaries_fts, rowid, summary, topics) VALUES('delete', old.id, old.summary, old.topics);
|
||||
INSERT INTO summaries_fts(rowid, summary, topics) VALUES (new.id, new.summary, new.topics);
|
||||
END;
|
||||
"""
|
||||
|
||||
_VSS_SCHEMA_SQL = """
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS embeddings USING vss0(
|
||||
id INTEGER PRIMARY KEY,
|
||||
vec FLOAT[768]
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS summary_vec (
|
||||
summary_id INTEGER PRIMARY KEY REFERENCES conversation_summaries(id) ON DELETE CASCADE,
|
||||
emb_id INTEGER NOT NULL REFERENCES embeddings(id)
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
def _normalize_fts_query(raw: str) -> str:
|
||||
# Use improved fuzzy search query generation
|
||||
try:
|
||||
from .fuzzy_search import generate_flexible_fts_query
|
||||
flexible_query = generate_flexible_fts_query(raw)
|
||||
if flexible_query:
|
||||
return flexible_query
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Fallback: Extract alphanumeric tokens and join them with spaces (logical AND)
|
||||
tokens = re.findall(r"[A-Za-z0-9_]+", raw)
|
||||
return " ".join(tokens)
|
||||
|
||||
|
||||
class Database:
|
||||
def __init__(self, db_path: str, sqlite_vss_path: Optional[str] = None) -> None:
|
||||
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
self.db_path = db_path
|
||||
self.conn = sqlite3.connect(db_path, check_same_thread=False)
|
||||
self.conn.row_factory = sqlite3.Row
|
||||
self._lock = threading.RLock()
|
||||
self.is_vss_enabled = False
|
||||
self._python_vector_store = None
|
||||
|
||||
if sqlite_vss_path:
|
||||
try:
|
||||
self.conn.enable_load_extension(True)
|
||||
self.conn.load_extension(sqlite_vss_path)
|
||||
self.is_vss_enabled = True
|
||||
except Exception:
|
||||
self.is_vss_enabled = False
|
||||
|
||||
# If sqlite-vss is not available, use best available vector store (FAISS or Python fallback)
|
||||
if not self.is_vss_enabled:
|
||||
from ..utils.vector_store import get_best_vector_store
|
||||
self._python_vector_store = get_best_vector_store(db_path, dimension=768)
|
||||
|
||||
# Log which vector store implementation is being used
|
||||
import sys
|
||||
store_type = type(self._python_vector_store).__name__
|
||||
if store_type == "FAISSVectorStore":
|
||||
debug_log("Using FAISS vector store for fast search", "jarvis")
|
||||
else:
|
||||
debug_log("Using Python fallback vector store", "jarvis")
|
||||
|
||||
self._init_schema()
|
||||
|
||||
def _init_schema(self) -> None:
|
||||
with self._lock:
|
||||
cur = self.conn.cursor()
|
||||
cur.executescript(_SCHEMA_SQL)
|
||||
if self.is_vss_enabled:
|
||||
cur.executescript(_VSS_SCHEMA_SQL)
|
||||
self.conn.commit()
|
||||
|
||||
|
||||
|
||||
def search_hybrid(self, fts_query: str, query_vec_json: Optional[str], top_k: int = 8) -> list[sqlite3.Row]:
|
||||
with self._lock:
|
||||
cur = self.conn.cursor()
|
||||
safe_q = _normalize_fts_query(fts_query)
|
||||
|
||||
# Use Python vector store if sqlite-vss is not available
|
||||
if not self.is_vss_enabled and self._python_vector_store and query_vec_json is not None and safe_q:
|
||||
# Parse query vector
|
||||
import json as _json
|
||||
query_vec = _json.loads(query_vec_json)
|
||||
|
||||
# Get vector search results (use max of top_k*3 and 50 for good hybrid scoring)
|
||||
vector_search_limit = max(top_k * 3, 50)
|
||||
vector_results = self._python_vector_store.search(query_vec, top_k=vector_search_limit)
|
||||
|
||||
# Get FTS results (use max of top_k*3 and 50 for good hybrid scoring)
|
||||
fts_search_limit = max(top_k * 3, 50)
|
||||
fts_sql = f"""
|
||||
SELECT s.id, bm25(summaries_fts) AS bm
|
||||
FROM summaries_fts
|
||||
JOIN conversation_summaries s ON s.id = summaries_fts.rowid
|
||||
WHERE summaries_fts MATCH ?
|
||||
ORDER BY bm
|
||||
LIMIT {fts_search_limit}
|
||||
"""
|
||||
fts_rows = cur.execute(fts_sql, (safe_q,)).fetchall()
|
||||
fts_scores = {row['id']: row['bm'] for row in fts_rows}
|
||||
|
||||
# Combine scores
|
||||
combined_scores = {}
|
||||
|
||||
# Add vector scores (60% weight)
|
||||
for summary_id, distance in vector_results:
|
||||
combined_scores[summary_id] = (1.0 / (1.0 + distance)) * 0.6
|
||||
|
||||
# Add FTS scores (40% weight)
|
||||
for summary_id, bm_score in fts_scores.items():
|
||||
if summary_id in combined_scores:
|
||||
combined_scores[summary_id] += (1.0 / (1.0 + bm_score)) * 0.4
|
||||
else:
|
||||
combined_scores[summary_id] = (1.0 / (1.0 + bm_score)) * 0.4
|
||||
|
||||
# Sort by combined score and fetch summaries
|
||||
sorted_ids = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:top_k]
|
||||
|
||||
if sorted_ids:
|
||||
# Fetch summaries for top results
|
||||
placeholders = ','.join('?' * len(sorted_ids))
|
||||
summary_sql = f"""
|
||||
SELECT s.id,
|
||||
'[' || s.date_utc || '] ' || s.summary || ' (Topics: ' || COALESCE(s.topics, '') || ')' AS text,
|
||||
'summary' AS result_type
|
||||
FROM conversation_summaries s
|
||||
WHERE s.id IN ({placeholders})
|
||||
"""
|
||||
rows = cur.execute(summary_sql, [sid for sid, _ in sorted_ids]).fetchall()
|
||||
|
||||
# Create result rows with scores
|
||||
results = []
|
||||
id_to_score = {sid: score for sid, score in sorted_ids}
|
||||
for row in rows:
|
||||
# Create a new row dict with score
|
||||
result = dict(row)
|
||||
result['score'] = id_to_score.get(row['id'], 0.0)
|
||||
results.append(result)
|
||||
|
||||
# Sort by score again (in case DB returned in different order)
|
||||
results.sort(key=lambda x: x['score'], reverse=True)
|
||||
return results
|
||||
else:
|
||||
return []
|
||||
|
||||
elif self.is_vss_enabled and query_vec_json is not None and safe_q:
|
||||
# Hybrid search: 60% vector similarity (semantic) + 40% FTS (exact terms)
|
||||
# This balances finding semantically related content with keyword matches
|
||||
# Use dynamic limits for efficiency on large datasets
|
||||
search_limit = max(top_k * 3, 50)
|
||||
summary_sql = f"""
|
||||
WITH fts_sum AS (
|
||||
SELECT s.id, bm25(summaries_fts) AS bm
|
||||
FROM summaries_fts
|
||||
JOIN conversation_summaries s ON s.id = summaries_fts.rowid
|
||||
WHERE summaries_fts MATCH ?
|
||||
ORDER BY bm LIMIT {search_limit}
|
||||
),
|
||||
v_sum AS (
|
||||
SELECT sv.summary_id AS id, distance
|
||||
FROM vss_search(embeddings, 'vec', ?)
|
||||
JOIN summary_vec sv ON sv.emb_id = rowid
|
||||
LIMIT {search_limit}
|
||||
)
|
||||
SELECT s.id, (
|
||||
(1.0/(1.0+COALESCE(v_sum.distance, 1))) * 0.6 +
|
||||
(1.0/(1.0+COALESCE(fts_sum.bm, 10))) * 0.4
|
||||
) AS score,
|
||||
'[' || s.date_utc || '] ' || s.summary || ' (Topics: ' || COALESCE(s.topics, '') || ')' AS text,
|
||||
'summary' AS result_type
|
||||
FROM conversation_summaries s
|
||||
LEFT JOIN v_sum ON v_sum.id = s.id
|
||||
LEFT JOIN fts_sum ON fts_sum.id = s.id
|
||||
WHERE v_sum.id IS NOT NULL OR fts_sum.id IS NOT NULL
|
||||
ORDER BY score DESC
|
||||
LIMIT {int(top_k)};
|
||||
"""
|
||||
rows = cur.execute(summary_sql, (safe_q, query_vec_json)).fetchall()
|
||||
|
||||
elif safe_q:
|
||||
# FTS-only search over conversation summaries
|
||||
summary_sql = f"""
|
||||
SELECT s.id, bm25(summaries_fts) AS score,
|
||||
'[' || s.date_utc || '] ' || s.summary || ' (Topics: ' || COALESCE(s.topics, '') || ')' AS text,
|
||||
'summary' AS result_type
|
||||
FROM summaries_fts
|
||||
JOIN conversation_summaries s ON s.id = summaries_fts.rowid
|
||||
WHERE summaries_fts MATCH ?
|
||||
ORDER BY score
|
||||
LIMIT {int(top_k)};
|
||||
"""
|
||||
rows = cur.execute(summary_sql, (safe_q,)).fetchall()
|
||||
|
||||
else:
|
||||
# Fallback: latest conversation summaries
|
||||
summary_sql = f"""
|
||||
SELECT id, 0.0 AS score,
|
||||
'[' || date_utc || '] ' || summary || ' (Topics: ' || COALESCE(topics, '') || ')' AS text,
|
||||
'summary' AS result_type
|
||||
FROM conversation_summaries
|
||||
ORDER BY date_utc DESC
|
||||
LIMIT {int(top_k)};
|
||||
"""
|
||||
rows = cur.execute(summary_sql).fetchall()
|
||||
|
||||
return rows
|
||||
|
||||
@staticmethod
|
||||
def _pack_vector(vec: Sequence[float]) -> bytes:
|
||||
# SQLite-vss expects a float array; packing via array('f') ensures binary blob layout.
|
||||
import array
|
||||
arr = array.array('f', [float(x) for x in vec])
|
||||
return arr.tobytes()
|
||||
|
||||
# --- Meals API ---
|
||||
def insert_meal(
|
||||
self,
|
||||
ts_utc: str,
|
||||
source_app: str,
|
||||
description: str,
|
||||
calories_kcal: Optional[float] = None,
|
||||
protein_g: Optional[float] = None,
|
||||
carbs_g: Optional[float] = None,
|
||||
fat_g: Optional[float] = None,
|
||||
fiber_g: Optional[float] = None,
|
||||
sugar_g: Optional[float] = None,
|
||||
sodium_mg: Optional[float] = None,
|
||||
potassium_mg: Optional[float] = None,
|
||||
micros_json: Optional[str] = None,
|
||||
confidence: Optional[float] = None,
|
||||
) -> int:
|
||||
with self._lock:
|
||||
cur = self.conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO meals(ts_utc, source_app, description, calories_kcal, protein_g, carbs_g, fat_g, fiber_g, sugar_g, sodium_mg, potassium_mg, micros_json, confidence)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
ts_utc,
|
||||
source_app,
|
||||
description,
|
||||
calories_kcal,
|
||||
protein_g,
|
||||
carbs_g,
|
||||
fat_g,
|
||||
fiber_g,
|
||||
sugar_g,
|
||||
sodium_mg,
|
||||
potassium_mg,
|
||||
micros_json,
|
||||
confidence,
|
||||
),
|
||||
)
|
||||
self.conn.commit()
|
||||
return int(cur.lastrowid)
|
||||
|
||||
def get_meals_between(self, ts_utc_min: str, ts_utc_max: str) -> list[sqlite3.Row]:
|
||||
with self._lock:
|
||||
cur = self.conn.cursor()
|
||||
rows = cur.execute(
|
||||
"""
|
||||
SELECT * FROM meals
|
||||
WHERE ts_utc >= ? AND ts_utc <= ?
|
||||
ORDER BY ts_utc ASC
|
||||
""",
|
||||
(ts_utc_min, ts_utc_max),
|
||||
).fetchall()
|
||||
return rows
|
||||
|
||||
def delete_meal(self, meal_id: int) -> bool:
|
||||
with self._lock:
|
||||
cur = self.conn.cursor()
|
||||
cur.execute("DELETE FROM meals WHERE id = ?", (meal_id,))
|
||||
self.conn.commit()
|
||||
return cur.rowcount > 0
|
||||
|
||||
# --- Conversation Summaries API ---
|
||||
def upsert_conversation_summary(
|
||||
self,
|
||||
date_utc: str, # YYYY-MM-DD format
|
||||
summary: str,
|
||||
topics: Optional[str] = None,
|
||||
source_app: str = "jarvis",
|
||||
ts_utc: Optional[str] = None,
|
||||
) -> int:
|
||||
"""Insert or update a conversation summary for a given date.
|
||||
|
||||
``ts_utc`` defaults to "now". Maintenance ops that rewrite an
|
||||
existing row's content without changing what it represents (e.g.
|
||||
the deflection scrub bulk sweep) should pass through the row's
|
||||
original ``ts_utc`` so the audit trail is preserved.
|
||||
"""
|
||||
if ts_utc is None:
|
||||
ts_utc = datetime.now(timezone.utc).isoformat()
|
||||
with self._lock:
|
||||
cur = self.conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT OR REPLACE INTO conversation_summaries(date_utc, ts_utc, summary, topics, source_app)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""",
|
||||
(date_utc, ts_utc, summary, topics, source_app),
|
||||
)
|
||||
self.conn.commit()
|
||||
return int(cur.lastrowid)
|
||||
|
||||
def get_conversation_summary(self, date_utc: str, source_app: str = "jarvis") -> Optional[sqlite3.Row]:
|
||||
"""Get conversation summary for a specific date."""
|
||||
with self._lock:
|
||||
cur = self.conn.cursor()
|
||||
row = cur.execute(
|
||||
"""
|
||||
SELECT * FROM conversation_summaries
|
||||
WHERE date_utc = ? AND source_app = ?
|
||||
""",
|
||||
(date_utc, source_app),
|
||||
).fetchone()
|
||||
return row
|
||||
|
||||
def get_recent_conversation_summaries(self, days: int = 7) -> list[sqlite3.Row]:
|
||||
"""Get conversation summaries from the last N days."""
|
||||
from datetime import datetime, timedelta, timezone
|
||||
cutoff_date = (datetime.now(timezone.utc) - timedelta(days=days)).date().isoformat()
|
||||
|
||||
with self._lock:
|
||||
cur = self.conn.cursor()
|
||||
rows = cur.execute(
|
||||
"""
|
||||
SELECT * FROM conversation_summaries
|
||||
WHERE date_utc >= ?
|
||||
ORDER BY date_utc DESC
|
||||
""",
|
||||
(cutoff_date,),
|
||||
).fetchall()
|
||||
return rows
|
||||
|
||||
def get_all_conversation_summaries(self) -> list[sqlite3.Row]:
|
||||
"""Get all conversation summaries, ordered by date ascending (oldest first).
|
||||
|
||||
Used for bulk import into graph memory — processes diary entries
|
||||
chronologically so the graph builds up naturally.
|
||||
"""
|
||||
with self._lock:
|
||||
cur = self.conn.cursor()
|
||||
rows = cur.execute(
|
||||
"""
|
||||
SELECT * FROM conversation_summaries
|
||||
ORDER BY date_utc ASC
|
||||
""",
|
||||
).fetchall()
|
||||
return rows
|
||||
|
||||
def upsert_summary_embedding(self, summary_id: int, vec: Sequence[float]) -> Optional[int]:
|
||||
"""Store or update embedding for a conversation summary."""
|
||||
if self.is_vss_enabled:
|
||||
# Use sqlite-vss
|
||||
with self._lock:
|
||||
cur = self.conn.cursor()
|
||||
cur.execute("INSERT INTO embeddings(vec) VALUES (?)", (sqlite3.Binary(self._pack_vector(vec)),))
|
||||
emb_id = cur.lastrowid
|
||||
cur.execute(
|
||||
"INSERT OR REPLACE INTO summary_vec(summary_id, emb_id) VALUES (?, ?)",
|
||||
(summary_id, emb_id),
|
||||
)
|
||||
self.conn.commit()
|
||||
return int(emb_id)
|
||||
elif self._python_vector_store:
|
||||
# Use Python vector store
|
||||
self._python_vector_store.add_vector(summary_id, list(vec))
|
||||
return summary_id # Return summary_id as a placeholder for emb_id
|
||||
else:
|
||||
return None
|
||||
|
||||
def close(self) -> None:
|
||||
try:
|
||||
with self._lock:
|
||||
self.conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
Reference in New Issue
Block a user