# ============================================================================
# Javis Bot — Docker Compose
#   ollama      : the LLM backend for the jarvis brain
#   ollama-init : one-shot, auto-pulls the chat + embed models on startup
#   javis       : all-in-one container (VNC desktop + Chrome + bridge + bot)
#
# Just bring it up — everything (incl. Ollama models) comes up automatically:
#   docker compose up -d --build
#
# The Discord token can be added LAST: without it the desktop, brain bridge,
# Ollama and models all run; only the bot waits. Then put DISCORD_BOT_TOKEN in
# .env and re-run `docker compose up -d`.
#
# Watch the desktop:  VNC viewer -> localhost:5901  (or browser -> localhost:6080)
# ============================================================================
services:
  ollama:
    image: ollama/ollama:latest
    restart: unless-stopped
    volumes:
      - ollama_models:/root/.ollama
    # GPU: needs nvidia-container-toolkit on the host (CDI). Verified on the
    # RTX 5050 (Blackwell sm_120) — Ollama offloads 100% to GPU.
    devices:
      - "nvidia.com/gpu=all"

  # Auto-pull the models the brain needs, then exit. Idempotent (re-runnable).
  ollama-init:
    image: ollama/ollama:latest
    depends_on:
      - ollama
    restart: "no"
    environment:
      OLLAMA_HOST: http://ollama:11434
      CHAT_MODEL: ${OLLAMA_CHAT_MODEL:-qwen3:8b}
      EMBED_MODEL: ${OLLAMA_EMBED_MODEL:-nomic-embed-text}
    entrypoint: ["/bin/sh", "-c"]
    command:
      - |
        echo "[ollama-init] waiting for ollama server...";
        until ollama list >/dev/null 2>&1; do sleep 2; done;
        echo "[ollama-init] pulling $$CHAT_MODEL";
        ollama pull "$$CHAT_MODEL";
        echo "[ollama-init] pulling $$EMBED_MODEL";
        ollama pull "$$EMBED_MODEL";
        echo "[ollama-init] models ready.";

  javis:
    build: .
    restart: unless-stopped
    env_file:
      - path: .env
        required: false
    environment:
      # Point the brain at the ollama service and the bot at the in-container bridge.
      OLLAMA_BASE_URL: http://ollama:11434
      OLLAMA_CHAT_MODEL: ${OLLAMA_CHAT_MODEL:-qwen3:8b}
      OLLAMA_EMBED_MODEL: ${OLLAMA_EMBED_MODEL:-nomic-embed-text}
      WHISPER_MODEL: ${WHISPER_MODEL:-small}
      WHISPER_DEVICE: ${WHISPER_DEVICE:-cuda}
      WHISPER_COMPUTE_TYPE: ${WHISPER_COMPUTE_TYPE:-float16}
      BRIDGE_URL: http://127.0.0.1:8765
    depends_on:
      - ollama
    # GPU: accelerates Whisper STT (and anything else CUDA) in this container.
    # Verified: faster-whisper float16 works on the RTX 5050 (sm_120).
    devices:
      - "nvidia.com/gpu=all"
    shm_size: "1gb"          # Chrome needs a larger /dev/shm
    ports:
      # Host ports are overridable. If the HOST already runs VNC on 5901
      # (see docs/vnc-xfce-setup.md), set VNC_PORT=5902 in .env.
      - "${VNC_PORT:-5901}:5901"      # VNC
      - "${NOVNC_PORT:-6080}:6080"    # noVNC (open in a browser)
      - "${BRIDGE_PORT:-8765}:8765"   # brain bridge (usually internal-only)
    volumes:
      - javis_data:/data                         # jarvis db + memory
      - whisper_cache:/root/.cache/huggingface   # cached Whisper models
      - piper_voices:/opt/piper-voices           # TTS voices

volumes:
  ollama_models:
  javis_data:
  whisper_cache:
  piper_voices: