feat: switch llm to local ollama qwen3

This commit is contained in:
2026-04-30 02:53:00 +09:00
parent 23bc534b23
commit 24aa58fc42
12 changed files with 136 additions and 91 deletions

View File

@@ -24,7 +24,7 @@ import { float32ToPcm16Buffer, int16ArrayToFloat32, Stereo48kToMono16kDownsample
import { ConversationMemory, type UserUtterance } from "../services/conversation.js";
import { ElevenLabsSttService } from "../services/elevenlabs-stt.js";
import { ElevenLabsTtsService, type PreparedSpeechAudio } from "../services/elevenlabs-tts.js";
import { OpenAiLlmService } from "../services/openai-llm.js";
import type { LlmService } from "../services/llm.js";
interface GuildVoiceSessionOptions {
client: Client;
@@ -35,7 +35,7 @@ interface GuildVoiceSessionOptions {
textChannelId?: string;
stt: ElevenLabsSttService;
tts: ElevenLabsTtsService;
llm: OpenAiLlmService;
llm: LlmService;
}
interface SpeechJob {

View File

@@ -14,14 +14,14 @@ import { takeFrame, int16ArrayToFloat32, float32ToPcm16Buffer } from "./pcm.js";
import { ConversationMemory, type UserUtterance } from "../services/conversation.js";
import { ElevenLabsSttService } from "../services/elevenlabs-stt.js";
import { ElevenLabsTtsService, type PreparedSpeechAudio } from "../services/elevenlabs-tts.js";
import { OpenAiLlmService } from "../services/openai-llm.js";
import type { LlmService } from "../services/llm.js";
interface LocalVoiceSessionOptions {
config: AssistantRuntimeConfig;
logger: Logger;
stt: ElevenLabsSttService;
tts: ElevenLabsTtsService;
llm: OpenAiLlmService;
llm: LlmService;
}
interface SpeechJob {

View File

@@ -15,12 +15,14 @@ const envSchema = z.object({
DISCORD_BOT_TOKEN: emptyToUndefined,
DISCORD_APPLICATION_ID: emptyToUndefined,
DISCORD_COMMAND_GUILD_ID: emptyToUndefined,
OPENAI_API_KEY: emptyToUndefined,
OPENAI_MODEL: z.string().min(1).default("gpt-5.4-mini"),
ELEVENLABS_API_KEY: emptyToUndefined,
ELEVENLABS_VOICE_ID: emptyToUndefined,
ELEVENLABS_STT_MODEL: z.string().min(1).default("scribe_v2_realtime"),
ELEVENLABS_TTS_MODEL: z.string().min(1).default("eleven_flash_v2_5"),
OLLAMA_BASE_URL: z.string().min(1).default("http://localhost:11434"),
OLLAMA_MODEL: z.string().min(1).default("qwen3:0.6b"),
OLLAMA_KEEP_ALIVE: z.string().min(1).default("5m"),
OLLAMA_NUM_CTX: z.coerce.number().int().min(512).max(32768).default(4096),
BOT_DEFAULT_LANGUAGE: z.string().min(2).default("ko"),
MAX_CONVERSATION_TURNS: z.coerce.number().int().min(4).max(30).default(12),
LOCAL_AUDIO_SOURCE: emptyToUndefined,
@@ -35,7 +37,6 @@ const envSchema = z.object({
export type AppConfig = z.infer<typeof envSchema>;
export type AssistantRuntimeConfig = AppConfig & {
OPENAI_API_KEY: string;
ELEVENLABS_API_KEY: string;
ELEVENLABS_VOICE_ID: string;
};
@@ -58,7 +59,6 @@ function requirePresent(value: string | undefined, name: string): string {
export function requireAssistantRuntimeConfig(config: AppConfig): AssistantRuntimeConfig {
return {
...config,
OPENAI_API_KEY: requirePresent(config.OPENAI_API_KEY, "OPENAI_API_KEY"),
ELEVENLABS_API_KEY: requirePresent(config.ELEVENLABS_API_KEY, "ELEVENLABS_API_KEY"),
ELEVENLABS_VOICE_ID: requirePresent(config.ELEVENLABS_VOICE_ID, "ELEVENLABS_VOICE_ID"),
};

View File

@@ -17,7 +17,7 @@ import { type DiscordRuntimeConfig } from "./config.js";
import { Logger } from "./logger.js";
import { ElevenLabsSttService } from "./services/elevenlabs-stt.js";
import { ElevenLabsTtsService } from "./services/elevenlabs-tts.js";
import { OpenAiLlmService } from "./services/openai-llm.js";
import { OllamaLlmService } from "./services/ollama-llm.js";
export async function runDiscordBot(config: DiscordRuntimeConfig, logger: Logger): Promise<void> {
const commands = [
@@ -39,7 +39,7 @@ export async function runDiscordBot(config: DiscordRuntimeConfig, logger: Logger
const stt = new ElevenLabsSttService(config);
const tts = new ElevenLabsTtsService(config);
const llm = new OpenAiLlmService(config);
const llm = new OllamaLlmService(config);
const sessions = new Map<string, GuildVoiceSession>();
function getVoiceChannel(interaction: ChatInputCommandInteraction): VoiceBasedChannel | null {

View File

@@ -8,7 +8,7 @@ import { Logger } from "./logger.js";
import { LocalVoiceSession } from "./audio/local-voice-session.js";
import { ElevenLabsSttService } from "./services/elevenlabs-stt.js";
import { ElevenLabsTtsService } from "./services/elevenlabs-tts.js";
import { OpenAiLlmService } from "./services/openai-llm.js";
import { OllamaLlmService } from "./services/ollama-llm.js";
function resolveFfmpegPath(): string {
const ffmpegPath = ffmpegStatic as unknown as string | null;
@@ -74,7 +74,7 @@ export async function printLocalAudioDevices(): Promise<void> {
export async function runLocalAssistant(config: AssistantRuntimeConfig, logger: Logger): Promise<void> {
const stt = new ElevenLabsSttService(config);
const tts = new ElevenLabsTtsService(config);
const llm = new OpenAiLlmService(config);
const llm = new OllamaLlmService(config);
const session = new LocalVoiceSession({
config,
logger,

5
src/services/llm.ts Normal file
View File

@@ -0,0 +1,5 @@
import type { ConversationMemory, UserUtterance } from "./conversation.js";
export interface LlmService {
generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string>;
}

View File

@@ -0,0 +1,85 @@
import type { AssistantRuntimeConfig } from "../config.js";
import type { ConversationMemory, UserUtterance } from "./conversation.js";
import type { LlmService } from "./llm.js";
const ASSISTANT_INSTRUCTIONS = [
"너는 디스코드 음성 채널 또는 로컬 마이크 테스트에서 동작하는 한국어 음성 비서다.",
"답변은 짧고 실용적으로 한다.",
"기본은 한 문장, 길어도 두 문장을 넘기지 않는다.",
"말투는 자연스러운 한국어로 유지한다.",
"speaker_id와 speaker_name은 화자 구분용이므로 필요할 때만 자연스럽게 반영한다.",
"잘 못 들었거나 의미가 불명확하면 짧게 다시 물어본다.",
"목록, 마크다운, 코드블록은 쓰지 않는다.",
"생각 과정을 드러내지 말고 최종 답변만 말한다.",
].join(" ");
interface OllamaChatResponse {
message?: {
content?: string;
thinking?: string;
};
error?: string;
}
function normalizeReply(text: string): string {
const strippedThink = text.replace(/<think>[\s\S]*?<\/think>/gi, " ");
const compact = strippedThink.replace(/\s+/g, " ").trim();
if (compact.length <= 180) {
return compact;
}
const sentences = compact.match(/[^.!?]+[.!?]?/g);
if (!sentences || sentences.length === 0) {
return compact.slice(0, 180).trim();
}
return sentences.slice(0, 2).join(" ").trim().slice(0, 180).trim();
}
export class OllamaLlmService implements LlmService {
constructor(private readonly config: AssistantRuntimeConfig) {}
async generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string> {
const url = new URL("/api/chat", this.config.OLLAMA_BASE_URL);
const response = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: this.config.OLLAMA_MODEL,
messages: [
{
role: "system",
content: ASSISTANT_INSTRUCTIONS,
},
{
role: "user",
content: memory.buildPrompt(utterance),
},
],
think: false,
stream: false,
keep_alive: this.config.OLLAMA_KEEP_ALIVE,
options: {
num_ctx: this.config.OLLAMA_NUM_CTX,
temperature: 0.4,
num_predict: 120,
},
}),
});
const body = (await response.json().catch(() => ({}))) as OllamaChatResponse;
if (!response.ok) {
throw new Error(body.error ?? `Ollama request failed with status ${response.status}`);
}
const output = body.message?.content?.trim();
if (!output) {
return "잘 못 들었습니다. 한 번만 다시 말씀해 주세요.";
}
return normalizeReply(output);
}
}

View File

@@ -1,64 +0,0 @@
import OpenAI from "openai";
import type { AssistantRuntimeConfig } from "../config.js";
import type { ConversationMemory, UserUtterance } from "./conversation.js";
const ASSISTANT_INSTRUCTIONS = [
"너는 디스코드 음성 채널에서 동작하는 한국어 음성 비서다.",
"답변은 짧고 실용적으로 한다.",
"기본은 한 문장, 길어도 두 문장을 넘기지 않는다.",
"말투는 자연스러운 한국어로 유지한다.",
"speaker_id와 speaker_name은 화자 구분용이므로 필요할 때만 자연스럽게 반영한다.",
"잘 못 들었거나 의미가 불명확하면 짧게 다시 물어본다.",
"목록, 마크다운, 코드블록은 쓰지 않는다.",
].join(" ");
function normalizeReply(text: string): string {
const compact = text.replace(/\s+/g, " ").trim();
if (compact.length <= 180) {
return compact;
}
const sentences = compact.match(/[^.!?]+[.!?]?/g);
if (!sentences || sentences.length === 0) {
return compact.slice(0, 180).trim();
}
return sentences.slice(0, 2).join(" ").trim().slice(0, 180).trim();
}
export class OpenAiLlmService {
private readonly client: OpenAI;
constructor(private readonly config: AssistantRuntimeConfig) {
this.client = new OpenAI({
apiKey: this.config.OPENAI_API_KEY,
});
}
async generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string> {
const response = await this.client.responses.create({
model: this.config.OPENAI_MODEL,
instructions: ASSISTANT_INSTRUCTIONS,
input: [
{
role: "user",
content: [
{
type: "input_text",
text: memory.buildPrompt(utterance),
},
],
},
],
max_output_tokens: 120,
});
const output = response.output_text?.trim();
if (!output) {
return "잘 못 들었습니다. 한 번만 다시 말씀해 주세요.";
}
return normalizeReply(output);
}
}