# ============================================================================ # Javis Bot — Docker Compose # ollama : the LLM backend for the jarvis brain # ollama-init : one-shot, auto-pulls the chat + embed models on startup # javis : all-in-one container (VNC desktop + Chrome + bridge + bot) # # Just bring it up — everything (incl. Ollama models) comes up automatically: # docker compose up -d --build # # The Discord token can be added LAST: without it the desktop, brain bridge, # Ollama and models all run; only the bot waits. Then put DISCORD_BOT_TOKEN in # .env and re-run `docker compose up -d`. # # Watch the desktop: VNC viewer -> localhost:5901 (or browser -> localhost:6080) # ============================================================================ services: ollama: image: ollama/ollama:latest restart: unless-stopped volumes: - ollama_models:/root/.ollama # GPU: needs nvidia-container-toolkit on the host (CDI). Verified on the # RTX 5050 (Blackwell sm_120) — Ollama offloads 100% to GPU. devices: - "nvidia.com/gpu=all" # Auto-pull the models the brain needs, then exit. Idempotent (re-runnable). ollama-init: image: ollama/ollama:latest depends_on: - ollama restart: "no" environment: OLLAMA_HOST: http://ollama:11434 CHAT_MODEL: ${OLLAMA_CHAT_MODEL:-qwen3:8b} EMBED_MODEL: ${OLLAMA_EMBED_MODEL:-nomic-embed-text} entrypoint: ["/bin/sh", "-c"] command: - | echo "[ollama-init] waiting for ollama server..."; until ollama list >/dev/null 2>&1; do sleep 2; done; echo "[ollama-init] pulling $$CHAT_MODEL"; ollama pull "$$CHAT_MODEL"; echo "[ollama-init] pulling $$EMBED_MODEL"; ollama pull "$$EMBED_MODEL"; echo "[ollama-init] models ready."; javis: build: . restart: unless-stopped env_file: - path: .env required: false environment: # Point the brain at the ollama service and the bot at the in-container bridge. OLLAMA_BASE_URL: http://ollama:11434 OLLAMA_CHAT_MODEL: ${OLLAMA_CHAT_MODEL:-qwen3:8b} OLLAMA_EMBED_MODEL: ${OLLAMA_EMBED_MODEL:-nomic-embed-text} WHISPER_MODEL: ${WHISPER_MODEL:-small} WHISPER_DEVICE: ${WHISPER_DEVICE:-cuda} WHISPER_COMPUTE_TYPE: ${WHISPER_COMPUTE_TYPE:-float16} BRIDGE_URL: http://127.0.0.1:8765 depends_on: - ollama # GPU: accelerates Whisper STT (and anything else CUDA) in this container. # Verified: faster-whisper float16 works on the RTX 5050 (sm_120). devices: - "nvidia.com/gpu=all" shm_size: "1gb" # Chrome needs a larger /dev/shm ports: # Host ports are overridable. If the HOST already runs VNC on 5901 # (see docs/vnc-xfce-setup.md), set VNC_PORT=5902 in .env. - "${VNC_PORT:-5901}:5901" # VNC - "${NOVNC_PORT:-6080}:6080" # noVNC (open in a browser) - "${BRIDGE_PORT:-8765}:8765" # brain bridge (usually internal-only) volumes: - javis_data:/data # jarvis db + memory - whisper_cache:/root/.cache/huggingface # cached Whisper models - piper_voices:/opt/piper-voices # TTS voices volumes: ollama_models: javis_data: whisper_cache: piper_voices: