From 1a8e8d0a8f4a919206ba2d0b4a464dd18bdd767a Mon Sep 17 00:00:00 2001 From: claude-bot Date: Fri, 1 May 2026 03:38:30 +0900 Subject: [PATCH] Simplify Windows TTS playback path --- src/audio/local-voice-session.ts | 46 ++++++++++++++++++++++++++++++ src/services/tts.ts | 1 + src/services/windows-system-tts.ts | 42 +++++---------------------- 3 files changed, 54 insertions(+), 35 deletions(-) diff --git a/src/audio/local-voice-session.ts b/src/audio/local-voice-session.ts index d15c64f..ab9e2cf 100644 --- a/src/audio/local-voice-session.ts +++ b/src/audio/local-voice-session.ts @@ -511,6 +511,11 @@ export class LocalVoiceSession { } private async playToWindowsDefaultSink(playback: PreparedSpeechAudio, signal: AbortSignal): Promise { + if (playback.sourceFilePath) { + await this.playWindowsWaveFile(playback.sourceFilePath, signal); + return; + } + const chunks: Buffer[] = []; await new Promise((resolve, reject) => { @@ -584,6 +589,47 @@ export class LocalVoiceSession { } } + private async playWindowsWaveFile(filePath: string, signal: AbortSignal): Promise { + const psScript = [ + "Add-Type -AssemblyName System;", + `$player = New-Object System.Media.SoundPlayer('${filePath.replace(/'/g, "''")}');`, + "$player.PlaySync();", + ].join(" "); + + const player = spawn("powershell", ["-NoProfile", "-Command", psScript], { + stdio: ["ignore", "ignore", "pipe"], + }); + this.currentPlayer = player; + + player.stderr.on("data", (chunk: Buffer) => { + const text = chunk.toString().trim(); + if (text.length > 0) { + this.options.logger.debug("[powershell-player]", text); + } + }); + + signal.addEventListener( + "abort", + () => { + if (!player.killed) { + player.kill("SIGKILL"); + } + }, + { once: true }, + ); + + const [code, playSignal] = (await once(player, "exit")) as [number | null, NodeJS.Signals | null]; + this.currentPlayer = null; + + if (signal.aborted) { + return; + } + + if (code !== 0) { + throw new Error(`powershell playback exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`); + } + } + private getFfmpegPath(): string { return requireFfmpegPath(); } diff --git a/src/services/tts.ts b/src/services/tts.ts index 556b3a6..5ee5981 100644 --- a/src/services/tts.ts +++ b/src/services/tts.ts @@ -2,6 +2,7 @@ import type { Readable } from "node:stream"; export interface PreparedSpeechAudio { stream: Readable; + sourceFilePath?: string; dispose: () => void; } diff --git a/src/services/windows-system-tts.ts b/src/services/windows-system-tts.ts index b3b26e7..de08952 100644 --- a/src/services/windows-system-tts.ts +++ b/src/services/windows-system-tts.ts @@ -4,8 +4,6 @@ import { unlink } from "node:fs/promises"; import os from "node:os"; import path from "node:path"; -import prism from "prism-media"; - import { resolveFfmpegPath } from "../audio/ffmpeg-path.js"; import type { PreparedSpeechAudio, TtsService } from "./tts.js"; @@ -90,45 +88,19 @@ export class WindowsSystemTtsService implements TtsService { throw error; }); - const input = createReadStream(tempPath); - const ffmpeg = new prism.FFmpeg({ - args: [ - "-analyzeduration", - "0", - "-loglevel", - "0", - "-i", - tempPath, - "-f", - "s16le", - "-ar", - "48000", - "-ac", - "2", - "pipe:1", - ], - }); - - signal?.addEventListener( - "abort", - () => { - input.destroy(); - ffmpeg.destroy(); - void unlink(tempPath).catch(() => null); - }, - { once: true }, - ); - return { - stream: ffmpeg, + stream: createReadStream(tempPath), + sourceFilePath: tempPath, dispose: () => { - input.destroy(); - ffmpeg.destroy(); - void unlink(tempPath).catch(() => null); + this.cleanupTempWave(tempPath); }, }; } + private cleanupTempWave(filePath: string): void { + void unlink(filePath).catch(() => null); + } + async destroy(): Promise { return; }