diff --git a/.env.example b/.env.example
index 97e5f56..b0e83fb 100644
--- a/.env.example
+++ b/.env.example
@@ -7,6 +7,10 @@ LOCAL_AI_PYTHON=python
 AUDIO_SOURCE=
 
 DEBUG=false
+OLLAMA_BASE_URL=http://127.0.0.1:11434
+OLLAMA_MODEL=qwen3:8b
+OLLAMA_KEEP_ALIVE=5m
+MAX_CONVERSATION_TURNS=6
 WHISPER_MODEL=large-v3-turbo
 WHISPER_LANGUAGE=ko
 WHISPER_DEVICE=auto
diff --git a/README.md b/README.md
index d9a6a8e..3ba60fa 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # realtime_voice_bot
 
-출력장치로 재생되는 소리를 파일 저장 없이 바로 받아서, 메모리 버퍼에서 발화 구간을 나눈 뒤 `faster-whisper`로 STT 하는 최소 프로토타입입니다.
+출력장치로 재생되는 소리를 파일 저장 없이 바로 받아서 `faster-whisper`로 STT 테스트를 하고, 별도로 `Ollama` LLM CLI 테스트를 할 수 있는 최소 프로토타입입니다.
 
 현재 문서는 **Windows PC에서 실행하는 기준**으로 적었습니다.
 
@@ -11,12 +11,13 @@
 - 메모리 버퍼 기반 간단한 저지연 발화 분리
 - 미리 로드한 `faster-whisper` 워커에 PCM 직접 전달
 - 디스크에 WAV 저장 없이 바로 전사
+- 로컬 `Ollama` LLM CLI 테스트
 
 ## 빠른 시작
 
 ```bat
 bun install
-bun run setup:python
+bun run setup
 copy .env.example .env
 ```
 
@@ -29,7 +30,13 @@ bun run devices
 실행:
 
 ```bat
-bun run start:loopback
+bun run test:stt
+```
+
+LLM 단독 테스트:
+
+```bat
+bun run test:llm
 ```
 
 ## 환경 변수
@@ -42,6 +49,15 @@ bun run start:loopback
   - `false`면 전사 결과만 출력
 - `WHISPER_MODEL`
   - 기본값 `large-v3-turbo`
+- `OLLAMA_BASE_URL`
+  - 기본값 `http://127.0.0.1:11434`
+- `OLLAMA_MODEL`
+  - 기본값 `qwen3:8b`
+- `OLLAMA_KEEP_ALIVE`
+  - 기본값 `5m`
+- `MAX_CONVERSATION_TURNS`
+  - 기본값 `6`
+  - 최근 대화 몇 턴까지 LLM 문맥으로 넘길지 정합니다
 - `WHISPER_LANGUAGE`
   - 기본값 `ko`
 - `WHISPER_DEVICE`
@@ -59,7 +75,7 @@ bun run start:loopback
 
 ## 메모
 
-- 이 버전은 일단 `STT`만 합니다.
+- 이 버전은 `STT` 테스트와 `LLM` 테스트를 따로 합니다.
 - 최소 지연을 위해 파일 저장은 하지 않습니다.
 - VAD는 현재 모델 기반이 아니라 진폭 기반 단순 분리입니다.
 - Windows에서는 보통 출력 루프백이 가능한 장치나 `Stereo Mix`, 오디오 인터페이스 loopback 채널을 `AUDIO_SOURCE`로 잡아야 합니다.
@@ -70,12 +86,19 @@ bun run start:loopback
 ## Windows 테스트 순서
 
 1. `bun install`
-2. `bun run setup:python`
+2. `bun run setup:stt`
 3. `copy .env.example .env`
 4. `bun run devices`
 5. `.env`에서 `AUDIO_SOURCE=`에 루프백 장치 이름 입력
-6. `bun run start:loopback`
-7. 유튜브, 디스코드 통화, 동영상 같은 소리를 재생해서 전사 로그 확인
+6. `bun run test:stt`
+7. 유튜브, 디스코드 통화, 동영상 같은 소리를 재생해서 전사 확인
+
+## Windows LLM 테스트 순서
+
+1. `bun run setup:llm`
+2. `bun run test:llm`
+3. 콘솔에 직접 문장을 입력하고 답변 확인
+4. `/reset` 으로 문맥 초기화, `/exit` 로 종료
 
 ## Windows용 .env 예시
 
@@ -83,6 +106,10 @@ bun run start:loopback
 LOCAL_AI_PYTHON=python
 AUDIO_SOURCE=
 DEBUG=false
+OLLAMA_BASE_URL=http://127.0.0.1:11434
+OLLAMA_MODEL=qwen3:8b
+OLLAMA_KEEP_ALIVE=5m
+MAX_CONVERSATION_TURNS=6
 WHISPER_MODEL=large-v3-turbo
 WHISPER_LANGUAGE=ko
 WHISPER_DEVICE=auto
diff --git a/package.json b/package.json
index 7f80149..2d2978b 100644
--- a/package.json
+++ b/package.json
@@ -4,9 +4,13 @@
   "private": true,
   "type": "module",
   "scripts": {
-    "start:loopback": "bun src/index.ts loopback",
+    "setup": "bun src/setup.ts",
+    "setup:stt": "bun src/setup-python.ts",
+    "setup:llm": "bun src/setup-llm.ts",
+    "setup:python": "bun run setup:stt",
+    "test:stt": "bun src/index.ts test-stt",
+    "test:llm": "bun src/index.ts test-llm",
     "devices": "bun src/index.ts devices",
-    "setup:python": "bun src/setup-python.ts",
     "check": "tsc --noEmit",
     "build": "tsc -p tsconfig.json"
   },
diff --git a/src/config.ts b/src/config.ts
index 496c289..785b54f 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -19,6 +19,10 @@ const envSchema = z.object({
     .string()
     .optional()
     .transform((value) => value?.trim().toLowerCase() === "true"),
+  OLLAMA_BASE_URL: z.string().min(1).default("http://127.0.0.1:11434"),
+  OLLAMA_MODEL: z.string().min(1).default("qwen3:8b"),
+  OLLAMA_KEEP_ALIVE: z.string().min(1).default("5m"),
+  MAX_CONVERSATION_TURNS: z.coerce.number().int().min(1).max(20).default(6),
   WHISPER_MODEL: z.string().min(1).default("large-v3-turbo"),
   WHISPER_LANGUAGE: z.string().min(1).default("ko"),
   WHISPER_DEVICE: z.enum(["auto", "cuda", "cpu"]).default("auto"),
diff --git a/src/index.ts b/src/index.ts
index 08998c6..227545c 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,14 +1,16 @@
 import process from "node:process";
+import { createInterface } from "node:readline";
 
 import { loadConfig } from "./config.js";
 import { Logger } from "./logger.js";
 import { printAudioDevices, spawnLoopbackCapture } from "./audio/capture.js";
 import { RealtimeSegmenter } from "./audio/realtime-segmenter.js";
 import { FasterWhisperSttService } from "./services/faster-whisper-stt.js";
+import { OllamaLlmService } from "./services/ollama-llm.js";
 
-const mode = process.argv[2] ?? "loopback";
+const mode = process.argv[2] ?? "test-stt";
 
-async function runLoopback(): Promise<void> {
+async function runSttTest(): Promise<void> {
   const config = loadConfig();
   const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
   const stt = new FasterWhisperSttService(config, logger);
@@ -104,7 +106,7 @@ async function runLoopback(): Promise<void> {
         }
       }
     } catch (error) {
-      logger.warn("STT failed", error);
+      logger.error("STT failed", error);
     } finally {
       transcribing = false;
       void runNext();
@@ -146,7 +148,11 @@ async function runLoopback(): Promise<void> {
     },
     onSpeechReady: (samples) => {
       emittedSegmentCount += 1;
-      logger.info("Speech segment ready", { index: emittedSegmentCount, samples, ms: Math.round((samples / 16000) * 1000) });
+      logger.info("Speech segment ready", {
+        index: emittedSegmentCount,
+        samples,
+        ms: Math.round((samples / 16000) * 1000),
+      });
     },
     onSegment: (pcm16) => {
       const index = nextSegmentIndex++;
@@ -188,7 +194,7 @@ async function runLoopback(): Promise<void> {
   });
 
   if (config.DEBUG) {
-    console.log("실시간 출력장치 STT를 시작합니다. Ctrl+C 로 종료합니다.");
+    console.log("실시간 출력장치 STT 테스트를 시작합니다. Ctrl+C 로 종료합니다.");
     console.log(`source: ${config.AUDIO_SOURCE ?? "unset"}`);
     console.log(`model: ${config.WHISPER_MODEL}`);
     console.log(`language: ${config.WHISPER_LANGUAGE}`);
@@ -208,16 +214,76 @@ async function runLoopback(): Promise<void> {
   }, 5000).unref();
 }
 
+async function runLlmCli(): Promise<void> {
+  const config = loadConfig();
+  const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
+  const llm = new OllamaLlmService(config, logger);
+
+  await llm.warmup();
+
+  console.log(`LLM CLI 테스트를 시작합니다. model=${config.OLLAMA_MODEL}`);
+  console.log("/exit 로 종료, /reset 으로 대화 초기화");
+
+  const rl = createInterface({
+    input: process.stdin,
+    output: process.stdout,
+    prompt: "you> ",
+  });
+
+  rl.prompt();
+
+  rl.on("line", async (line) => {
+    const text = line.trim();
+
+    if (!text) {
+      rl.prompt();
+      return;
+    }
+
+    if (text === "/exit") {
+      rl.close();
+      return;
+    }
+
+    if (text === "/reset") {
+      llm.resetConversation();
+      console.log("assistant> 대화 문맥을 초기화했습니다.");
+      rl.prompt();
+      return;
+    }
+
+    try {
+      const startedAt = Date.now();
+      const reply = await llm.generateReply(text);
+      logger.info("LLM latency", {
+        llm_ms: Date.now() - startedAt,
+      });
+      console.log(`assistant> ${reply}`);
+    } catch (error) {
+      console.error(error instanceof Error ? error.message : String(error));
+    }
+
+    rl.prompt();
+  });
+
+  rl.on("close", () => {
+    process.exit(0);
+  });
+}
+
 async function main(): Promise<void> {
   switch (mode) {
     case "devices":
       await printAudioDevices();
       return;
-    case "loopback":
-      await runLoopback();
+    case "test-stt":
+      await runSttTest();
+      return;
+    case "test-llm":
+      await runLlmCli();
       return;
     default:
-      throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: loopback, devices`);
+      throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: test-stt, test-llm, devices`);
   }
 }
 
diff --git a/src/services/ollama-llm.ts b/src/services/ollama-llm.ts
new file mode 100644
index 0000000..a2ac2ee
--- /dev/null
+++ b/src/services/ollama-llm.ts
@@ -0,0 +1,91 @@
+import type { AppConfig } from "../config.js";
+import type { Logger } from "../logger.js";
+
+interface OllamaChatMessage {
+  role: "system" | "user" | "assistant";
+  content: string;
+}
+
+interface OllamaChatResponse {
+  message?: {
+    content?: string;
+  };
+}
+
+const SYSTEM_PROMPT =
+  "너는 한국어로 짧고 자연스럽게 답하는 로컬 음성 비서다. 사용자의 말에 바로 답하고, 군더더기 없는 1~3문장으로 답해라.";
+
+export class OllamaLlmService {
+  private history: OllamaChatMessage[] = [];
+
+  constructor(
+    private readonly config: AppConfig,
+    private readonly logger: Logger,
+  ) {}
+
+  async warmup(): Promise<void> {
+    const reply = await this.chat(
+      [
+        { role: "system", content: SYSTEM_PROMPT },
+        { role: "user", content: "준비 상태 확인입니다. 한 단어로만 답하세요." },
+      ],
+    );
+    this.logger.info("LLM warmup finished", { model: this.config.OLLAMA_MODEL, reply });
+  }
+
+  async generateReply(userText: string): Promise<string> {
+    const messages: OllamaChatMessage[] = [
+      { role: "system", content: SYSTEM_PROMPT },
+      ...this.history,
+      { role: "user", content: userText },
+    ];
+
+    const reply = await this.chat(messages);
+
+    this.history.push({ role: "user", content: userText });
+    this.history.push({ role: "assistant", content: reply });
+    this.trimHistory();
+
+    return reply;
+  }
+
+  resetConversation(): void {
+    this.history = [];
+  }
+
+  private trimHistory(): void {
+    const maxMessages = this.config.MAX_CONVERSATION_TURNS * 2;
+    if (this.history.length <= maxMessages) {
+      return;
+    }
+    this.history = this.history.slice(-maxMessages);
+  }
+
+  private async chat(messages: OllamaChatMessage[]): Promise<string> {
+    const response = await fetch(`${this.config.OLLAMA_BASE_URL}/api/chat`, {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({
+        model: this.config.OLLAMA_MODEL,
+        messages,
+        stream: false,
+        think: false,
+        keep_alive: this.config.OLLAMA_KEEP_ALIVE,
+      }),
+    });
+
+    if (!response.ok) {
+      const body = await response.text();
+      throw new Error(`Ollama API ${response.status}: ${body}`);
+    }
+
+    const payload = (await response.json()) as OllamaChatResponse;
+    const content = payload.message?.content?.trim();
+    if (!content) {
+      throw new Error("Ollama 응답에 message.content 가 없습니다.");
+    }
+    return content;
+  }
+}
diff --git a/src/setup-llm.ts b/src/setup-llm.ts
new file mode 100644
index 0000000..624f1f8
--- /dev/null
+++ b/src/setup-llm.ts
@@ -0,0 +1,38 @@
+import process from "node:process";
+import { spawn } from "node:child_process";
+
+import { loadConfig } from "./config.js";
+
+async function run(command: string, args: string[]): Promise<void> {
+  await new Promise<void>((resolve, reject) => {
+    const child = spawn(command, args, {
+      stdio: "inherit",
+      windowsHide: true,
+      shell: process.platform === "win32",
+    });
+
+    child.on("exit", (code) => {
+      if (code === 0) {
+        resolve();
+        return;
+      }
+      reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
+    });
+
+    child.on("error", reject);
+  });
+}
+
+export async function setupLlm(): Promise<void> {
+  const config = loadConfig();
+  console.log(`Ollama 모델 준비: ${config.OLLAMA_MODEL}`);
+  await run("ollama", ["pull", config.OLLAMA_MODEL]);
+  console.log("Ollama LLM 환경 준비 완료");
+}
+
+if (import.meta.main) {
+  void setupLlm().catch((error) => {
+    console.error(error instanceof Error ? error.message : String(error));
+    process.exit(1);
+  });
+}
diff --git a/src/setup-python.ts b/src/setup-python.ts
index 1213d25..15b1896 100644
--- a/src/setup-python.ts
+++ b/src/setup-python.ts
@@ -24,7 +24,7 @@ async function run(command: string, args: string[], cwd: string): Promise<void>
   });
 }
 
-async function main(): Promise<void> {
+export async function setupSttPython(): Promise<void> {
   const config = loadConfig();
   const python = await resolveBasePythonCommand(config);
   const venvRoot = path.resolve(process.cwd(), config.LOCAL_AI_VENV_PATH);
@@ -47,7 +47,9 @@ async function main(): Promise<void> {
   console.log("Python STT 환경 준비 완료");
 }
 
-void main().catch((error) => {
-  console.error(error instanceof Error ? error.message : String(error));
-  process.exit(1);
-});
+if (import.meta.main) {
+  void setupSttPython().catch((error) => {
+    console.error(error instanceof Error ? error.message : String(error));
+    process.exit(1);
+  });
+}
diff --git a/src/setup.ts b/src/setup.ts
new file mode 100644
index 0000000..5ec24b5
--- /dev/null
+++ b/src/setup.ts
@@ -0,0 +1,16 @@
+import process from "node:process";
+
+import { setupLlm } from "./setup-llm.js";
+import { setupSttPython } from "./setup-python.js";
+
+async function main(): Promise<void> {
+  await setupSttPython();
+  await setupLlm();
+}
+
+if (import.meta.main) {
+  void main().catch((error) => {
+    console.error(error instanceof Error ? error.message : String(error));
+    process.exit(1);
+  });
+}