realtime_voice_bot/src/services/ollama-llm.ts

import type { AssistantRuntimeConfig } from "../config.js";
import type { ConversationMemory, UserUtterance } from "./conversation.js";
import type { LlmService } from "./llm.js";

const ASSISTANT_INSTRUCTIONS = [
  "너는 디스코드 음성 채널 또는 로컬 마이크 테스트에서 동작하는 한국어 음성 비서다.",
  "답변은 짧고 실용적으로 한다.",
  "기본은 한 문장, 길어도 두 문장을 넘기지 않는다.",
  "말투는 자연스러운 한국어로 유지한다.",
  "speaker_id와 speaker_name은 화자 구분용이므로 필요할 때만 자연스럽게 반영한다.",
  "잘 못 들었거나 의미가 불명확하면 짧게 다시 물어본다.",
  "목록, 마크다운, 코드블록은 쓰지 않는다.",
  "생각 과정을 드러내지 말고 최종 답변만 말한다.",
].join(" ");

interface OllamaChatResponse {
  message?: {
    content?: string;
    thinking?: string;
  };
  error?: string;
}

interface OllamaTagsResponse {
  models?: Array<{
    name?: string;
    model?: string;
  }>;
}

function normalizeReply(text: string): string {
  const strippedThink = text.replace(/<think>[\s\S]*?<\/think>/gi, " ");
  const compact = strippedThink.replace(/\s+/g, " ").trim();
  if (compact.length <= 180) {
    return compact;
  }

  const sentences = compact.match(/[^.!?]+[.!?]?/g);
  if (!sentences || sentences.length === 0) {
    return compact.slice(0, 180).trim();
  }

  return sentences.slice(0, 2).join(" ").trim().slice(0, 180).trim();
}

export class OllamaLlmService implements LlmService {
  constructor(private readonly config: AssistantRuntimeConfig) {}

  async warmup(): Promise<void> {
    const url = new URL("/api/tags", this.config.OLLAMA_BASE_URL);
    let response: Response;

    try {
      response = await fetch(url);
    } catch {
      throw new Error(
        `Ollama 서버에 연결할 수 없습니다. ${this.config.OLLAMA_BASE_URL} 확인 후 Ollama 앱이 실행 중인지 확인해 주세요. Windows에서는 \`localhost\` 대신 \`http://127.0.0.1:11434\` 를 권장합니다. 모델이 없으면 \`ollama pull ${this.config.OLLAMA_MODEL}\` 를 먼저 실행하세요.`,
      );
    }

    const body = (await response.json().catch(() => ({}))) as OllamaTagsResponse & { error?: string };
    if (!response.ok) {
      throw new Error(body.error ?? `Ollama 상태 확인 실패: HTTP ${response.status}`);
    }

    const models = body.models ?? [];
    const exists = models.some((model) => {
      const name = model.name?.trim();
      const alias = model.model?.trim();
      return name === this.config.OLLAMA_MODEL || alias === this.config.OLLAMA_MODEL;
    });

    if (!exists) {
      throw new Error(
        `Ollama 모델 ${this.config.OLLAMA_MODEL} 이 없습니다. \`ollama pull ${this.config.OLLAMA_MODEL}\` 를 먼저 실행해 주세요.`,
      );
    }
  }

  async generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string> {
    const url = new URL("/api/chat", this.config.OLLAMA_BASE_URL);
    let response: Response;
    try {
      response = await fetch(url, {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({
          model: this.config.OLLAMA_MODEL,
          messages: [
            {
              role: "system",
              content: ASSISTANT_INSTRUCTIONS,
            },
            {
              role: "user",
              content: memory.buildPrompt(utterance),
            },
          ],
          think: false,
          stream: false,
          keep_alive: this.config.OLLAMA_KEEP_ALIVE,
          options: {
            num_ctx: this.config.OLLAMA_NUM_CTX,
            temperature: 0.4,
            num_predict: 120,
          },
        }),
      });
    } catch {
      throw new Error(
        `Ollama 서버에 연결할 수 없습니다. ${this.config.OLLAMA_BASE_URL} 확인 후 Ollama 앱이 실행 중인지 확인해 주세요. Windows에서는 \`localhost\` 대신 \`http://127.0.0.1:11434\` 를 권장합니다.`,
      );
    }

    const body = (await response.json().catch(() => ({}))) as OllamaChatResponse;

    if (!response.ok) {
      throw new Error(body.error ?? `Ollama request failed with status ${response.status}`);
    }

    const output = body.message?.content?.trim();
    if (!output) {
      return "잘 못 들었습니다. 한 번만 다시 말씀해 주세요.";
    }

    return normalizeReply(output);
  }
}