import { config as loadDotenv } from "dotenv"; import { z } from "zod"; loadDotenv(); const emptyToUndefined = z.preprocess((value) => { if (typeof value !== "string") { return value; } const trimmed = value.trim(); return trimmed.length === 0 ? undefined : trimmed; }, z.string().min(1).optional()); const envSchema = z.object({ LOCAL_AI_VENV_PATH: z.string().min(1).default(".local-ai/.venv"), LOCAL_AI_PYTHON: emptyToUndefined, AUDIO_SOURCE: emptyToUndefined, DOCKER_BIN: emptyToUndefined, TTS_ENABLED: z .string() .optional() .transform((value) => value?.trim().toLowerCase() !== "false"), TTS_IMAGE: z.string().min(1).default("realtime-voice-bot-melotts:v0.1.2"), TTS_LANGUAGE: z.string().min(1).default("KR"), TTS_SPEAKER: z.string().min(1).default("KR"), TTS_DEVICE: z.string().min(1).default("cpu"), TTS_SPEED: z.coerce.number().min(0.5).max(2).default(1.18), TTS_SDP_RATIO: z.coerce.number().min(0).max(1).default(0.22), TTS_NOISE_SCALE: z.coerce.number().min(0).max(2).default(0.55), TTS_NOISE_SCALE_W: z.coerce.number().min(0).max(2).default(0.75), TTS_CACHE_DIR: z.string().min(1).default(".local-ai/tts-cache"), TTS_OUTPUT_DIR: z.string().min(1).default(".local-ai/tts-output"), DEBUG: z .string() .optional() .transform((value) => value?.trim().toLowerCase() === "true"), OLLAMA_BASE_URL: z.string().min(1).default("http://127.0.0.1:11434"), OLLAMA_MODEL: z.string().min(1).default("qwen3:8b"), OLLAMA_KEEP_ALIVE: z.string().min(1).default("5m"), MAX_CONVERSATION_TURNS: z.coerce.number().int().min(1).max(20).default(6), WHISPER_MODEL: z.string().min(1).default("large-v3-turbo"), WHISPER_LANGUAGE: z.string().min(1).default("ko"), WHISPER_DEVICE: z.enum(["auto", "cuda", "cpu"]).default("auto"), WHISPER_COMPUTE_TYPE: z.string().min(1).default("auto"), WHISPER_BEAM_SIZE: z.coerce.number().int().min(1).max(8).default(2), SEGMENT_START_THRESHOLD: z.coerce.number().int().min(100).max(10000).default(900), SEGMENT_CONTINUE_THRESHOLD: z.coerce.number().int().min(50).max(10000).default(450), SEGMENT_START_FRAMES: z.coerce.number().int().min(1).max(10).default(2), SEGMENT_END_FRAMES: z.coerce.number().int().min(4).max(60).default(24), SEGMENT_PREROLL_SAMPLES: z.coerce.number().int().min(320).max(16000).default(3200), SEGMENT_MIN_SPEECH_SAMPLES: z.coerce.number().int().min(1600).max(64000).default(7200), SEGMENT_MAX_SPEECH_SAMPLES: z.coerce.number().int().min(16000).max(320000).default(160000), DEBUG_TRANSCRIPTS: z .string() .optional() .transform((value) => value === "true"), LOG_LEVEL: z.enum(["debug", "info", "warn", "error"]).default("info"), }); export type AppConfig = z.infer; export function loadConfig(): AppConfig { return envSchema.parse(process.env); }