Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
443 lines
17 KiB
Python
443 lines
17 KiB
Python
from __future__ import annotations
|
|
import sqlite3
|
|
import re
|
|
from typing import Sequence, Optional
|
|
from pathlib import Path
|
|
import threading
|
|
from datetime import datetime, timezone
|
|
|
|
from ..debug import debug_log
|
|
|
|
_SCHEMA_SQL = """
|
|
PRAGMA journal_mode=WAL;
|
|
PRAGMA synchronous=NORMAL;
|
|
|
|
-- Structured meals log (optional feature)
|
|
CREATE TABLE IF NOT EXISTS meals (
|
|
id INTEGER PRIMARY KEY,
|
|
ts_utc TEXT NOT NULL,
|
|
source_app TEXT NOT NULL,
|
|
description TEXT NOT NULL,
|
|
calories_kcal REAL,
|
|
protein_g REAL,
|
|
carbs_g REAL,
|
|
fat_g REAL,
|
|
fiber_g REAL,
|
|
sugar_g REAL,
|
|
sodium_mg REAL,
|
|
potassium_mg REAL,
|
|
micros_json TEXT,
|
|
confidence REAL
|
|
);
|
|
|
|
-- Conversation summaries for diary/memory system
|
|
CREATE TABLE IF NOT EXISTS conversation_summaries (
|
|
id INTEGER PRIMARY KEY,
|
|
date_utc TEXT NOT NULL, -- YYYY-MM-DD format
|
|
ts_utc TEXT NOT NULL, -- When summary was created
|
|
summary TEXT NOT NULL, -- Concise summary of the day's conversations
|
|
topics TEXT, -- Comma-separated list of main topics discussed
|
|
source_app TEXT NOT NULL, -- Source app that generated the conversation
|
|
UNIQUE(date_utc, source_app)
|
|
);
|
|
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS summaries_fts USING fts5(
|
|
summary,
|
|
topics,
|
|
content='conversation_summaries',
|
|
content_rowid='id',
|
|
tokenize='porter'
|
|
);
|
|
|
|
-- Triggers for conversation summaries FTS
|
|
CREATE TRIGGER IF NOT EXISTS summaries_ai AFTER INSERT ON conversation_summaries BEGIN
|
|
INSERT INTO summaries_fts(rowid, summary, topics) VALUES (new.id, new.summary, new.topics);
|
|
END;
|
|
CREATE TRIGGER IF NOT EXISTS summaries_ad AFTER DELETE ON conversation_summaries BEGIN
|
|
INSERT INTO summaries_fts(summaries_fts, rowid, summary, topics) VALUES('delete', old.id, old.summary, old.topics);
|
|
END;
|
|
CREATE TRIGGER IF NOT EXISTS summaries_au AFTER UPDATE ON conversation_summaries BEGIN
|
|
INSERT INTO summaries_fts(summaries_fts, rowid, summary, topics) VALUES('delete', old.id, old.summary, old.topics);
|
|
INSERT INTO summaries_fts(rowid, summary, topics) VALUES (new.id, new.summary, new.topics);
|
|
END;
|
|
"""
|
|
|
|
_VSS_SCHEMA_SQL = """
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS embeddings USING vss0(
|
|
id INTEGER PRIMARY KEY,
|
|
vec FLOAT[768]
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS summary_vec (
|
|
summary_id INTEGER PRIMARY KEY REFERENCES conversation_summaries(id) ON DELETE CASCADE,
|
|
emb_id INTEGER NOT NULL REFERENCES embeddings(id)
|
|
);
|
|
"""
|
|
|
|
|
|
def _normalize_fts_query(raw: str) -> str:
|
|
# Use improved fuzzy search query generation
|
|
try:
|
|
from .fuzzy_search import generate_flexible_fts_query
|
|
flexible_query = generate_flexible_fts_query(raw)
|
|
if flexible_query:
|
|
return flexible_query
|
|
except ImportError:
|
|
pass
|
|
|
|
# Fallback: Extract alphanumeric tokens and join them with spaces (logical AND)
|
|
tokens = re.findall(r"[A-Za-z0-9_]+", raw)
|
|
return " ".join(tokens)
|
|
|
|
|
|
class Database:
|
|
def __init__(self, db_path: str, sqlite_vss_path: Optional[str] = None) -> None:
|
|
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
self.db_path = db_path
|
|
self.conn = sqlite3.connect(db_path, check_same_thread=False)
|
|
self.conn.row_factory = sqlite3.Row
|
|
self._lock = threading.RLock()
|
|
self.is_vss_enabled = False
|
|
self._python_vector_store = None
|
|
|
|
if sqlite_vss_path:
|
|
try:
|
|
self.conn.enable_load_extension(True)
|
|
self.conn.load_extension(sqlite_vss_path)
|
|
self.is_vss_enabled = True
|
|
except Exception:
|
|
self.is_vss_enabled = False
|
|
|
|
# If sqlite-vss is not available, use best available vector store (FAISS or Python fallback)
|
|
if not self.is_vss_enabled:
|
|
from ..utils.vector_store import get_best_vector_store
|
|
self._python_vector_store = get_best_vector_store(db_path, dimension=768)
|
|
|
|
# Log which vector store implementation is being used
|
|
import sys
|
|
store_type = type(self._python_vector_store).__name__
|
|
if store_type == "FAISSVectorStore":
|
|
debug_log("Using FAISS vector store for fast search", "jarvis")
|
|
else:
|
|
debug_log("Using Python fallback vector store", "jarvis")
|
|
|
|
self._init_schema()
|
|
|
|
def _init_schema(self) -> None:
|
|
with self._lock:
|
|
cur = self.conn.cursor()
|
|
cur.executescript(_SCHEMA_SQL)
|
|
if self.is_vss_enabled:
|
|
cur.executescript(_VSS_SCHEMA_SQL)
|
|
self.conn.commit()
|
|
|
|
|
|
|
|
def search_hybrid(self, fts_query: str, query_vec_json: Optional[str], top_k: int = 8) -> list[sqlite3.Row]:
|
|
with self._lock:
|
|
cur = self.conn.cursor()
|
|
safe_q = _normalize_fts_query(fts_query)
|
|
|
|
# Use Python vector store if sqlite-vss is not available
|
|
if not self.is_vss_enabled and self._python_vector_store and query_vec_json is not None and safe_q:
|
|
# Parse query vector
|
|
import json as _json
|
|
query_vec = _json.loads(query_vec_json)
|
|
|
|
# Get vector search results (use max of top_k*3 and 50 for good hybrid scoring)
|
|
vector_search_limit = max(top_k * 3, 50)
|
|
vector_results = self._python_vector_store.search(query_vec, top_k=vector_search_limit)
|
|
|
|
# Get FTS results (use max of top_k*3 and 50 for good hybrid scoring)
|
|
fts_search_limit = max(top_k * 3, 50)
|
|
fts_sql = f"""
|
|
SELECT s.id, bm25(summaries_fts) AS bm
|
|
FROM summaries_fts
|
|
JOIN conversation_summaries s ON s.id = summaries_fts.rowid
|
|
WHERE summaries_fts MATCH ?
|
|
ORDER BY bm
|
|
LIMIT {fts_search_limit}
|
|
"""
|
|
fts_rows = cur.execute(fts_sql, (safe_q,)).fetchall()
|
|
fts_scores = {row['id']: row['bm'] for row in fts_rows}
|
|
|
|
# Combine scores
|
|
combined_scores = {}
|
|
|
|
# Add vector scores (60% weight)
|
|
for summary_id, distance in vector_results:
|
|
combined_scores[summary_id] = (1.0 / (1.0 + distance)) * 0.6
|
|
|
|
# Add FTS scores (40% weight)
|
|
for summary_id, bm_score in fts_scores.items():
|
|
if summary_id in combined_scores:
|
|
combined_scores[summary_id] += (1.0 / (1.0 + bm_score)) * 0.4
|
|
else:
|
|
combined_scores[summary_id] = (1.0 / (1.0 + bm_score)) * 0.4
|
|
|
|
# Sort by combined score and fetch summaries
|
|
sorted_ids = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:top_k]
|
|
|
|
if sorted_ids:
|
|
# Fetch summaries for top results
|
|
placeholders = ','.join('?' * len(sorted_ids))
|
|
summary_sql = f"""
|
|
SELECT s.id,
|
|
'[' || s.date_utc || '] ' || s.summary || ' (Topics: ' || COALESCE(s.topics, '') || ')' AS text,
|
|
'summary' AS result_type
|
|
FROM conversation_summaries s
|
|
WHERE s.id IN ({placeholders})
|
|
"""
|
|
rows = cur.execute(summary_sql, [sid for sid, _ in sorted_ids]).fetchall()
|
|
|
|
# Create result rows with scores
|
|
results = []
|
|
id_to_score = {sid: score for sid, score in sorted_ids}
|
|
for row in rows:
|
|
# Create a new row dict with score
|
|
result = dict(row)
|
|
result['score'] = id_to_score.get(row['id'], 0.0)
|
|
results.append(result)
|
|
|
|
# Sort by score again (in case DB returned in different order)
|
|
results.sort(key=lambda x: x['score'], reverse=True)
|
|
return results
|
|
else:
|
|
return []
|
|
|
|
elif self.is_vss_enabled and query_vec_json is not None and safe_q:
|
|
# Hybrid search: 60% vector similarity (semantic) + 40% FTS (exact terms)
|
|
# This balances finding semantically related content with keyword matches
|
|
# Use dynamic limits for efficiency on large datasets
|
|
search_limit = max(top_k * 3, 50)
|
|
summary_sql = f"""
|
|
WITH fts_sum AS (
|
|
SELECT s.id, bm25(summaries_fts) AS bm
|
|
FROM summaries_fts
|
|
JOIN conversation_summaries s ON s.id = summaries_fts.rowid
|
|
WHERE summaries_fts MATCH ?
|
|
ORDER BY bm LIMIT {search_limit}
|
|
),
|
|
v_sum AS (
|
|
SELECT sv.summary_id AS id, distance
|
|
FROM vss_search(embeddings, 'vec', ?)
|
|
JOIN summary_vec sv ON sv.emb_id = rowid
|
|
LIMIT {search_limit}
|
|
)
|
|
SELECT s.id, (
|
|
(1.0/(1.0+COALESCE(v_sum.distance, 1))) * 0.6 +
|
|
(1.0/(1.0+COALESCE(fts_sum.bm, 10))) * 0.4
|
|
) AS score,
|
|
'[' || s.date_utc || '] ' || s.summary || ' (Topics: ' || COALESCE(s.topics, '') || ')' AS text,
|
|
'summary' AS result_type
|
|
FROM conversation_summaries s
|
|
LEFT JOIN v_sum ON v_sum.id = s.id
|
|
LEFT JOIN fts_sum ON fts_sum.id = s.id
|
|
WHERE v_sum.id IS NOT NULL OR fts_sum.id IS NOT NULL
|
|
ORDER BY score DESC
|
|
LIMIT {int(top_k)};
|
|
"""
|
|
rows = cur.execute(summary_sql, (safe_q, query_vec_json)).fetchall()
|
|
|
|
elif safe_q:
|
|
# FTS-only search over conversation summaries
|
|
summary_sql = f"""
|
|
SELECT s.id, bm25(summaries_fts) AS score,
|
|
'[' || s.date_utc || '] ' || s.summary || ' (Topics: ' || COALESCE(s.topics, '') || ')' AS text,
|
|
'summary' AS result_type
|
|
FROM summaries_fts
|
|
JOIN conversation_summaries s ON s.id = summaries_fts.rowid
|
|
WHERE summaries_fts MATCH ?
|
|
ORDER BY score
|
|
LIMIT {int(top_k)};
|
|
"""
|
|
rows = cur.execute(summary_sql, (safe_q,)).fetchall()
|
|
|
|
else:
|
|
# Fallback: latest conversation summaries
|
|
summary_sql = f"""
|
|
SELECT id, 0.0 AS score,
|
|
'[' || date_utc || '] ' || summary || ' (Topics: ' || COALESCE(topics, '') || ')' AS text,
|
|
'summary' AS result_type
|
|
FROM conversation_summaries
|
|
ORDER BY date_utc DESC
|
|
LIMIT {int(top_k)};
|
|
"""
|
|
rows = cur.execute(summary_sql).fetchall()
|
|
|
|
return rows
|
|
|
|
@staticmethod
|
|
def _pack_vector(vec: Sequence[float]) -> bytes:
|
|
# SQLite-vss expects a float array; packing via array('f') ensures binary blob layout.
|
|
import array
|
|
arr = array.array('f', [float(x) for x in vec])
|
|
return arr.tobytes()
|
|
|
|
# --- Meals API ---
|
|
def insert_meal(
|
|
self,
|
|
ts_utc: str,
|
|
source_app: str,
|
|
description: str,
|
|
calories_kcal: Optional[float] = None,
|
|
protein_g: Optional[float] = None,
|
|
carbs_g: Optional[float] = None,
|
|
fat_g: Optional[float] = None,
|
|
fiber_g: Optional[float] = None,
|
|
sugar_g: Optional[float] = None,
|
|
sodium_mg: Optional[float] = None,
|
|
potassium_mg: Optional[float] = None,
|
|
micros_json: Optional[str] = None,
|
|
confidence: Optional[float] = None,
|
|
) -> int:
|
|
with self._lock:
|
|
cur = self.conn.cursor()
|
|
cur.execute(
|
|
"""
|
|
INSERT INTO meals(ts_utc, source_app, description, calories_kcal, protein_g, carbs_g, fat_g, fiber_g, sugar_g, sodium_mg, potassium_mg, micros_json, confidence)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(
|
|
ts_utc,
|
|
source_app,
|
|
description,
|
|
calories_kcal,
|
|
protein_g,
|
|
carbs_g,
|
|
fat_g,
|
|
fiber_g,
|
|
sugar_g,
|
|
sodium_mg,
|
|
potassium_mg,
|
|
micros_json,
|
|
confidence,
|
|
),
|
|
)
|
|
self.conn.commit()
|
|
return int(cur.lastrowid)
|
|
|
|
def get_meals_between(self, ts_utc_min: str, ts_utc_max: str) -> list[sqlite3.Row]:
|
|
with self._lock:
|
|
cur = self.conn.cursor()
|
|
rows = cur.execute(
|
|
"""
|
|
SELECT * FROM meals
|
|
WHERE ts_utc >= ? AND ts_utc <= ?
|
|
ORDER BY ts_utc ASC
|
|
""",
|
|
(ts_utc_min, ts_utc_max),
|
|
).fetchall()
|
|
return rows
|
|
|
|
def delete_meal(self, meal_id: int) -> bool:
|
|
with self._lock:
|
|
cur = self.conn.cursor()
|
|
cur.execute("DELETE FROM meals WHERE id = ?", (meal_id,))
|
|
self.conn.commit()
|
|
return cur.rowcount > 0
|
|
|
|
# --- Conversation Summaries API ---
|
|
def upsert_conversation_summary(
|
|
self,
|
|
date_utc: str, # YYYY-MM-DD format
|
|
summary: str,
|
|
topics: Optional[str] = None,
|
|
source_app: str = "jarvis",
|
|
ts_utc: Optional[str] = None,
|
|
) -> int:
|
|
"""Insert or update a conversation summary for a given date.
|
|
|
|
``ts_utc`` defaults to "now". Maintenance ops that rewrite an
|
|
existing row's content without changing what it represents (e.g.
|
|
the deflection scrub bulk sweep) should pass through the row's
|
|
original ``ts_utc`` so the audit trail is preserved.
|
|
"""
|
|
if ts_utc is None:
|
|
ts_utc = datetime.now(timezone.utc).isoformat()
|
|
with self._lock:
|
|
cur = self.conn.cursor()
|
|
cur.execute(
|
|
"""
|
|
INSERT OR REPLACE INTO conversation_summaries(date_utc, ts_utc, summary, topics, source_app)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
""",
|
|
(date_utc, ts_utc, summary, topics, source_app),
|
|
)
|
|
self.conn.commit()
|
|
return int(cur.lastrowid)
|
|
|
|
def get_conversation_summary(self, date_utc: str, source_app: str = "jarvis") -> Optional[sqlite3.Row]:
|
|
"""Get conversation summary for a specific date."""
|
|
with self._lock:
|
|
cur = self.conn.cursor()
|
|
row = cur.execute(
|
|
"""
|
|
SELECT * FROM conversation_summaries
|
|
WHERE date_utc = ? AND source_app = ?
|
|
""",
|
|
(date_utc, source_app),
|
|
).fetchone()
|
|
return row
|
|
|
|
def get_recent_conversation_summaries(self, days: int = 7) -> list[sqlite3.Row]:
|
|
"""Get conversation summaries from the last N days."""
|
|
from datetime import datetime, timedelta, timezone
|
|
cutoff_date = (datetime.now(timezone.utc) - timedelta(days=days)).date().isoformat()
|
|
|
|
with self._lock:
|
|
cur = self.conn.cursor()
|
|
rows = cur.execute(
|
|
"""
|
|
SELECT * FROM conversation_summaries
|
|
WHERE date_utc >= ?
|
|
ORDER BY date_utc DESC
|
|
""",
|
|
(cutoff_date,),
|
|
).fetchall()
|
|
return rows
|
|
|
|
def get_all_conversation_summaries(self) -> list[sqlite3.Row]:
|
|
"""Get all conversation summaries, ordered by date ascending (oldest first).
|
|
|
|
Used for bulk import into graph memory — processes diary entries
|
|
chronologically so the graph builds up naturally.
|
|
"""
|
|
with self._lock:
|
|
cur = self.conn.cursor()
|
|
rows = cur.execute(
|
|
"""
|
|
SELECT * FROM conversation_summaries
|
|
ORDER BY date_utc ASC
|
|
""",
|
|
).fetchall()
|
|
return rows
|
|
|
|
def upsert_summary_embedding(self, summary_id: int, vec: Sequence[float]) -> Optional[int]:
|
|
"""Store or update embedding for a conversation summary."""
|
|
if self.is_vss_enabled:
|
|
# Use sqlite-vss
|
|
with self._lock:
|
|
cur = self.conn.cursor()
|
|
cur.execute("INSERT INTO embeddings(vec) VALUES (?)", (sqlite3.Binary(self._pack_vector(vec)),))
|
|
emb_id = cur.lastrowid
|
|
cur.execute(
|
|
"INSERT OR REPLACE INTO summary_vec(summary_id, emb_id) VALUES (?, ?)",
|
|
(summary_id, emb_id),
|
|
)
|
|
self.conn.commit()
|
|
return int(emb_id)
|
|
elif self._python_vector_store:
|
|
# Use Python vector store
|
|
self._python_vector_store.add_vector(summary_id, list(vec))
|
|
return summary_id # Return summary_id as a placeholder for emb_id
|
|
else:
|
|
return None
|
|
|
|
def close(self) -> None:
|
|
try:
|
|
with self._lock:
|
|
self.conn.close()
|
|
except Exception:
|
|
pass
|