269 lines
8.7 KiB
TypeScript
269 lines
8.7 KiB
TypeScript
import { ChildProcess, spawn } from 'node:child_process';
|
|
import { EventEmitter } from 'node:events';
|
|
import { type WakeWordConfig, type AudioConfig } from '../config/index.js';
|
|
import { type HardwareService } from '../hardware/index.js';
|
|
import { createLogger, type Logger } from '../utils/index.js';
|
|
|
|
export interface WakeWordServiceEvents {
|
|
detected: () => void;
|
|
ready: () => void;
|
|
error: (error: Error) => void;
|
|
}
|
|
|
|
/**
|
|
* Wake word detection service.
|
|
*
|
|
* Two operating modes, selected by whether a HardwareService is passed
|
|
* to the constructor:
|
|
*
|
|
* 1. **ALSA mode** (no HardwareService)
|
|
* The Python subprocess opens PyAudio on `audioConfig.captureDevice`
|
|
* and reads the mic directly. Pause releases the ALSA device so
|
|
* arecord (the AlsaAudioService) can use it during conversation.
|
|
*
|
|
* 2. **ESP32 mode** (HardwareService provided)
|
|
* The Python subprocess reads raw S16 mono PCM from stdin. We
|
|
* subscribe to `hardware.on('audio_up')` and pipe every mic chunk
|
|
* coming off the UART straight into the Python process. Control
|
|
* commands (PAUSE/RESUME/RESET/QUIT) go over a separate pipe at
|
|
* fd 3 because stdin is busy carrying audio.
|
|
*
|
|
* The model is loaded once at startup; pause/resume is cheap and
|
|
* does not reload it.
|
|
*/
|
|
export class WakeWordService extends EventEmitter {
|
|
private process: ChildProcess | null = null;
|
|
private readonly logger: Logger;
|
|
private _isListening = false;
|
|
private _isPaused = false;
|
|
private _streamClosed = false;
|
|
private readonly usesHardware: boolean;
|
|
|
|
/** Latched forwarder so we can detach it on stop / error. */
|
|
private readonly forwardMicChunk = (chunk: Buffer): void => {
|
|
if (!this.process || !this.process.stdin || this.process.stdin.destroyed) return;
|
|
this.process.stdin.write(chunk, (err) => {
|
|
if (err && (err as NodeJS.ErrnoException).code === 'EPIPE') {
|
|
this.logger.warn('Wake word process stdin pipe broken — detaching audio');
|
|
this.detachHardware();
|
|
}
|
|
});
|
|
};
|
|
|
|
constructor(
|
|
private readonly wakeWordConfig: WakeWordConfig,
|
|
private readonly audioConfig: AudioConfig,
|
|
private readonly hardware: HardwareService | null = null,
|
|
) {
|
|
super();
|
|
this.logger = createLogger('wake-word', 'info');
|
|
this.usesHardware = hardware !== null;
|
|
}
|
|
|
|
get isListening(): boolean {
|
|
return this._isListening && !this._isPaused;
|
|
}
|
|
|
|
start(): void {
|
|
if (this.process) {
|
|
if (this._isPaused) this.resume();
|
|
return;
|
|
}
|
|
|
|
this.logger.info(
|
|
{
|
|
mode: this.usesHardware ? 'esp32' : 'alsa',
|
|
model: this.wakeWordConfig.modelName,
|
|
threshold: this.wakeWordConfig.threshold,
|
|
},
|
|
'Starting wake word detection',
|
|
);
|
|
|
|
const args = [
|
|
this.wakeWordConfig.scriptPath,
|
|
'--model', this.wakeWordConfig.modelName,
|
|
'--threshold', String(this.wakeWordConfig.threshold),
|
|
'--sample-rate', String(this.audioConfig.sampleRate),
|
|
];
|
|
|
|
if (this.usesHardware) {
|
|
args.push('--input', 'stdin', '--control-fd', '3');
|
|
} else {
|
|
args.push('--input', 'alsa', '--device', this.audioConfig.captureDevice);
|
|
}
|
|
|
|
// stdio layout:
|
|
// 0: stdin — audio in (ESP32 mode) or control (ALSA mode)
|
|
// 1: stdout — DETECTED events
|
|
// 2: stderr — status & log lines
|
|
// 3: extra — control pipe (ESP32 mode only)
|
|
const stdio: ('pipe' | 'ignore')[] = this.usesHardware
|
|
? ['pipe', 'pipe', 'pipe', 'pipe']
|
|
: ['pipe', 'pipe', 'pipe'];
|
|
|
|
this.process = spawn(this.wakeWordConfig.pythonPath, args, { stdio });
|
|
|
|
this._isListening = true;
|
|
this._isPaused = false;
|
|
|
|
this.process.stdout?.on('data', (data: Buffer) => {
|
|
const lines = data.toString().trim().split('\n');
|
|
for (const line of lines) {
|
|
if (line.trim() === 'DETECTED') {
|
|
this.logger.info('🎙️ Wake word detected!');
|
|
this.emit('detected');
|
|
} else {
|
|
this.logger.debug({ line }, 'Wake word stdout');
|
|
}
|
|
}
|
|
});
|
|
|
|
this.process.stderr?.on('data', (data: Buffer) => {
|
|
const lines = data.toString().trim().split('\n');
|
|
for (const line of lines) {
|
|
const msg = line.trim();
|
|
if (!msg) continue;
|
|
|
|
if (msg === 'READY') {
|
|
this.logger.info('🎤 Wake word engine ready — listening...');
|
|
this.emit('ready');
|
|
} else if (msg === 'PAUSED') {
|
|
this._streamClosed = false;
|
|
this.logger.debug('Wake word paused');
|
|
} else if (msg === 'STREAM_CLOSED') {
|
|
this._streamClosed = true;
|
|
this.logger.debug('Wake word audio stream closed');
|
|
} else if (msg === 'RESUMED') {
|
|
this.logger.debug('Wake word resumed');
|
|
} else if (msg === 'STREAM_REOPENED') {
|
|
this.logger.debug('Wake word audio stream reopened');
|
|
} else if (msg.startsWith('Loading wake word model')) {
|
|
this.logger.info('⏳ Loading wake word model...');
|
|
} else if (msg.startsWith('Wake word model loaded')) {
|
|
this.logger.info('✅ Wake word model loaded');
|
|
} else if (msg.startsWith('Matched device') || msg.startsWith('Using device') || msg.startsWith('Listening')) {
|
|
this.logger.info(`🔊 ${msg}`);
|
|
} else {
|
|
this.logger.warn('Wake word stderr: %s', msg);
|
|
}
|
|
}
|
|
});
|
|
|
|
this.process.on('error', (err) => {
|
|
this._isListening = false;
|
|
this.logger.error({ err }, 'Wake word process error');
|
|
this.detachHardware();
|
|
this.emit('error', new Error(`Wake word process failed: ${err.message}`));
|
|
});
|
|
|
|
this.process.on('exit', (code) => {
|
|
this._isListening = false;
|
|
this._isPaused = false;
|
|
this.detachHardware();
|
|
this.process = null;
|
|
if (code !== 0 && code !== null) {
|
|
this.logger.warn({ code }, 'Wake word process exited unexpectedly');
|
|
setTimeout(() => {
|
|
this.logger.info('Restarting wake word detection...');
|
|
this.start();
|
|
}, 2000);
|
|
}
|
|
});
|
|
|
|
// In ESP32 mode, start piping mic audio from the UART.
|
|
if (this.usesHardware && this.hardware) {
|
|
this.hardware.on('audio_up', this.forwardMicChunk);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Pause wake word detection.
|
|
*
|
|
* In ALSA mode we must wait for STREAM_CLOSED so arecord can reclaim
|
|
* the device. In ESP32 mode the audio flow never stops — we just
|
|
* tell the Python process to ignore detections.
|
|
*/
|
|
pause(): Promise<void> {
|
|
if (!this.process || this._isPaused) return Promise.resolve();
|
|
|
|
this._isPaused = true;
|
|
this._streamClosed = false;
|
|
|
|
this.writeControl('PAUSE');
|
|
|
|
if (this.usesHardware) {
|
|
// No physical device to release — resolve immediately.
|
|
return Promise.resolve();
|
|
}
|
|
|
|
return new Promise((resolve) => {
|
|
const checkInterval = setInterval(() => {
|
|
if (this._streamClosed || !this.process) {
|
|
clearInterval(checkInterval);
|
|
resolve();
|
|
}
|
|
}, 50);
|
|
|
|
setTimeout(() => {
|
|
clearInterval(checkInterval);
|
|
resolve();
|
|
}, 2000);
|
|
});
|
|
}
|
|
|
|
resume(): void {
|
|
if (!this.process || !this._isPaused) return;
|
|
|
|
this._isPaused = false;
|
|
this.writeControl('RESUME');
|
|
this.logger.info('🎤 Resuming wake word listening...');
|
|
}
|
|
|
|
stop(): void {
|
|
if (this.process) {
|
|
this.writeControl('QUIT');
|
|
this.detachHardware();
|
|
setTimeout(() => {
|
|
if (this.process) {
|
|
this.process.kill('SIGTERM');
|
|
this.process = null;
|
|
}
|
|
}, 500);
|
|
}
|
|
this._isListening = false;
|
|
this._isPaused = false;
|
|
this.removeAllListeners();
|
|
}
|
|
|
|
// ──────────────────────────────────────────────────────────
|
|
// Internals
|
|
// ──────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Write a text control command. In ALSA mode that goes to stdin;
|
|
* in ESP32 mode stdin carries audio so commands travel over the
|
|
* extra pipe at fd 3 (process.stdio[3]).
|
|
*/
|
|
private writeControl(cmd: string): void {
|
|
if (!this.process) return;
|
|
const line = `${cmd}\n`;
|
|
if (this.usesHardware) {
|
|
// stdio[3] is our control pipe — a Node Writable (net.Socket) stream.
|
|
const control = this.process.stdio[3] as unknown as
|
|
| (NodeJS.WritableStream & { destroyed?: boolean })
|
|
| null;
|
|
if (control && !control.destroyed) {
|
|
control.write(line);
|
|
}
|
|
} else {
|
|
this.process.stdin?.write(line);
|
|
}
|
|
}
|
|
|
|
private detachHardware(): void {
|
|
if (this.usesHardware && this.hardware) {
|
|
this.hardware.off('audio_up', this.forwardMicChunk);
|
|
}
|
|
}
|
|
}
|