import { ChildProcess, spawn } from 'node:child_process'; import { EventEmitter } from 'node:events'; import { type WakeWordConfig, type AudioConfig } from '../config/index.js'; import { type HardwareService } from '../hardware/index.js'; import { createLogger, type Logger } from '../utils/index.js'; export interface WakeWordServiceEvents { detected: () => void; ready: () => void; error: (error: Error) => void; } /** * Wake word detection service. * * Two operating modes, selected by whether a HardwareService is passed * to the constructor: * * 1. **ALSA mode** (no HardwareService) * The Python subprocess opens PyAudio on `audioConfig.captureDevice` * and reads the mic directly. Pause releases the ALSA device so * arecord (the AlsaAudioService) can use it during conversation. * * 2. **ESP32 mode** (HardwareService provided) * The Python subprocess reads raw S16 mono PCM from stdin. We * subscribe to `hardware.on('audio_up')` and pipe every mic chunk * coming off the UART straight into the Python process. Control * commands (PAUSE/RESUME/RESET/QUIT) go over a separate pipe at * fd 3 because stdin is busy carrying audio. * * The model is loaded once at startup; pause/resume is cheap and * does not reload it. */ export class WakeWordService extends EventEmitter { private process: ChildProcess | null = null; private readonly logger: Logger; private _isListening = false; private _isPaused = false; private _streamClosed = false; private readonly usesHardware: boolean; /** Latched forwarder so we can detach it on stop / error. */ private readonly forwardMicChunk = (chunk: Buffer): void => { if (!this.process || !this.process.stdin || this.process.stdin.destroyed) return; this.process.stdin.write(chunk, (err) => { if (err && (err as NodeJS.ErrnoException).code === 'EPIPE') { this.logger.warn('Wake word process stdin pipe broken — detaching audio'); this.detachHardware(); } }); }; constructor( private readonly wakeWordConfig: WakeWordConfig, private readonly audioConfig: AudioConfig, private readonly hardware: HardwareService | null = null, ) { super(); this.logger = createLogger('wake-word', 'info'); this.usesHardware = hardware !== null; } get isListening(): boolean { return this._isListening && !this._isPaused; } start(): void { if (this.process) { if (this._isPaused) this.resume(); return; } this.logger.info( { mode: this.usesHardware ? 'esp32' : 'alsa', model: this.wakeWordConfig.modelName, threshold: this.wakeWordConfig.threshold, }, 'Starting wake word detection', ); const args = [ this.wakeWordConfig.scriptPath, '--model', this.wakeWordConfig.modelName, '--threshold', String(this.wakeWordConfig.threshold), '--sample-rate', String(this.audioConfig.sampleRate), ]; if (this.usesHardware) { args.push('--input', 'stdin', '--control-fd', '3'); } else { args.push('--input', 'alsa', '--device', this.audioConfig.captureDevice); } // stdio layout: // 0: stdin — audio in (ESP32 mode) or control (ALSA mode) // 1: stdout — DETECTED events // 2: stderr — status & log lines // 3: extra — control pipe (ESP32 mode only) const stdio: ('pipe' | 'ignore')[] = this.usesHardware ? ['pipe', 'pipe', 'pipe', 'pipe'] : ['pipe', 'pipe', 'pipe']; this.process = spawn(this.wakeWordConfig.pythonPath, args, { stdio }); this._isListening = true; this._isPaused = false; this.process.stdout?.on('data', (data: Buffer) => { const lines = data.toString().trim().split('\n'); for (const line of lines) { if (line.trim() === 'DETECTED') { this.logger.info('🎙️ Wake word detected!'); this.emit('detected'); } else { this.logger.debug({ line }, 'Wake word stdout'); } } }); this.process.stderr?.on('data', (data: Buffer) => { const lines = data.toString().trim().split('\n'); for (const line of lines) { const msg = line.trim(); if (!msg) continue; if (msg === 'READY') { this.logger.info('🎤 Wake word engine ready — listening...'); this.emit('ready'); } else if (msg === 'PAUSED') { this._streamClosed = false; this.logger.debug('Wake word paused'); } else if (msg === 'STREAM_CLOSED') { this._streamClosed = true; this.logger.debug('Wake word audio stream closed'); } else if (msg === 'RESUMED') { this.logger.debug('Wake word resumed'); } else if (msg === 'STREAM_REOPENED') { this.logger.debug('Wake word audio stream reopened'); } else if (msg.startsWith('Loading wake word model')) { this.logger.info('⏳ Loading wake word model...'); } else if (msg.startsWith('Wake word model loaded')) { this.logger.info('✅ Wake word model loaded'); } else if (msg.startsWith('Matched device') || msg.startsWith('Using device') || msg.startsWith('Listening')) { this.logger.info(`🔊 ${msg}`); } else { this.logger.warn('Wake word stderr: %s', msg); } } }); this.process.on('error', (err) => { this._isListening = false; this.logger.error({ err }, 'Wake word process error'); this.detachHardware(); this.emit('error', new Error(`Wake word process failed: ${err.message}`)); }); this.process.on('exit', (code) => { this._isListening = false; this._isPaused = false; this.detachHardware(); this.process = null; if (code !== 0 && code !== null) { this.logger.warn({ code }, 'Wake word process exited unexpectedly'); setTimeout(() => { this.logger.info('Restarting wake word detection...'); this.start(); }, 2000); } }); // In ESP32 mode, start piping mic audio from the UART. if (this.usesHardware && this.hardware) { this.hardware.on('audio_up', this.forwardMicChunk); } } /** * Pause wake word detection. * * In ALSA mode we must wait for STREAM_CLOSED so arecord can reclaim * the device. In ESP32 mode the audio flow never stops — we just * tell the Python process to ignore detections. */ pause(): Promise { if (!this.process || this._isPaused) return Promise.resolve(); this._isPaused = true; this._streamClosed = false; this.writeControl('PAUSE'); if (this.usesHardware) { // No physical device to release — resolve immediately. return Promise.resolve(); } return new Promise((resolve) => { const checkInterval = setInterval(() => { if (this._streamClosed || !this.process) { clearInterval(checkInterval); resolve(); } }, 50); setTimeout(() => { clearInterval(checkInterval); resolve(); }, 2000); }); } resume(): void { if (!this.process || !this._isPaused) return; this._isPaused = false; this.writeControl('RESUME'); this.logger.info('🎤 Resuming wake word listening...'); } stop(): void { if (this.process) { this.writeControl('QUIT'); this.detachHardware(); setTimeout(() => { if (this.process) { this.process.kill('SIGTERM'); this.process = null; } }, 500); } this._isListening = false; this._isPaused = false; this.removeAllListeners(); } // ────────────────────────────────────────────────────────── // Internals // ────────────────────────────────────────────────────────── /** * Write a text control command. In ALSA mode that goes to stdin; * in ESP32 mode stdin carries audio so commands travel over the * extra pipe at fd 3 (process.stdio[3]). */ private writeControl(cmd: string): void { if (!this.process) return; const line = `${cmd}\n`; if (this.usesHardware) { // stdio[3] is our control pipe — a Node Writable (net.Socket) stream. const control = this.process.stdio[3] as unknown as | (NodeJS.WritableStream & { destroyed?: boolean }) | null; if (control && !control.destroyed) { control.write(line); } } else { this.process.stdin?.write(line); } } private detachHardware(): void { if (this.usesHardware && this.hardware) { this.hardware.off('audio_up', this.forwardMicChunk); } } }