From 02705ea8b51e07d8fefa436e137e1baa4b435169 Mon Sep 17 00:00:00 2001 From: ordinarthur Date: Thu, 9 Apr 2026 13:19:51 +0200 Subject: [PATCH] wake word ok ! --- apps/robot-client/scripts/test_wakeword.ts | 150 ++++++++++++++++++ .../src/config/hardware.config.ts | 2 +- .../src/services/orchestrator.service.ts | 4 +- .../src/services/wake-word.service.ts | 12 +- 4 files changed, 160 insertions(+), 8 deletions(-) create mode 100644 apps/robot-client/scripts/test_wakeword.ts diff --git a/apps/robot-client/scripts/test_wakeword.ts b/apps/robot-client/scripts/test_wakeword.ts new file mode 100644 index 0000000..f92cfd8 --- /dev/null +++ b/apps/robot-client/scripts/test_wakeword.ts @@ -0,0 +1,150 @@ +#!/usr/bin/env npx tsx +/** + * Test wake word detection using live ESP32 audio. + * + * Usage: + * npx tsx scripts/test_wakeword.ts [--threshold 0.5] [--record out.raw] + * + * Connects to the ESP32 via serial, reads AUDIO_UP frames, and pipes + * the raw PCM into the wake_word.py subprocess. Prints detections live. + * + * --record Also dump raw PCM to a file so you can replay it later: + * python3 scripts/wake_word.py --model hey_jarvis --input stdin < out.raw + */ +import { SerialPort } from 'serialport'; +import { spawn, type ChildProcess } from 'node:child_process'; +import { createWriteStream, type WriteStream } from 'node:fs'; +import { parseArgs } from 'node:util'; +import { FrameDecoder, MsgType, encodeFrame } from '../src/hardware/protocol.js'; + +const { values } = parseArgs({ + options: { + threshold: { type: 'string', default: '0.5' }, + record: { type: 'string' }, + model: { type: 'string', default: 'hey_jarvis' }, + python: { type: 'string', default: process.env.WAKEWORD_PYTHON_PATH || 'python3' }, + port: { type: 'string', default: '/dev/serial0' }, + baud: { type: 'string', default: '921600' }, + }, +}); + +const threshold = values.threshold!; +const model = values.model!; +const pythonPath = values.python!; +const serialPath = values.port!; +const baudRate = parseInt(values.baud!, 10); + +let recordStream: WriteStream | null = null; +if (values.record) { + recordStream = createWriteStream(values.record); + console.log(`šŸ“ Recording raw PCM to ${values.record}`); +} + +// ── Spawn Python wake word process ── +const pyArgs = [ + './scripts/wake_word.py', + '--model', model, + '--threshold', threshold, + '--sample-rate', '16000', + '--input', 'stdin', + '--control-fd', '3', +]; + +console.log(`šŸ Spawning: ${pythonPath} ${pyArgs.join(' ')}`); +console.log(`šŸŽ¤ Threshold: ${threshold} | Model: ${model}`); +console.log(`šŸ”Œ Serial: ${serialPath} @ ${baudRate}\n`); + +const py: ChildProcess = spawn(pythonPath, pyArgs, { + stdio: ['pipe', 'pipe', 'pipe', 'pipe'], +}); + +py.stdout?.on('data', (data: Buffer) => { + const lines = data.toString().trim().split('\n'); + for (const line of lines) { + if (line.trim() === 'DETECTED') { + console.log(`\n🟢 DETECTED at ${new Date().toLocaleTimeString()}\n`); + } + } +}); + +py.stderr?.on('data', (data: Buffer) => { + const lines = data.toString().trim().split('\n'); + for (const line of lines) { + const msg = line.trim(); + if (msg === 'READY') { + console.log('āœ… Wake word engine ready — say "Hey Jarvis"!\n'); + } else if (msg.startsWith('Loading')) { + console.log(`ā³ ${msg}`); + } else if (msg.startsWith('Wake word model loaded')) { + console.log(`āœ… ${msg}`); + } else if (!msg.includes('onnxruntime') && !msg.includes('UserWarning') && !msg.includes('warnings.warn')) { + console.log(` [py] ${msg}`); + } + } +}); + +py.on('exit', (code) => { + console.log(`\nāŒ Python process exited with code ${code}`); + process.exit(code ?? 1); +}); + +// ── Open serial and forward AUDIO_UP to Python stdin ── +let audioChunks = 0; + +const decoder = new FrameDecoder((frame) => { + if (frame.type === MsgType.AUDIO_UP) { + audioChunks++; + if (py.stdin && !py.stdin.destroyed) { + py.stdin.write(frame.payload); + } + if (recordStream) { + recordStream.write(frame.payload); + } + // Progress indicator every ~1s (assuming ~100ms chunks) + if (audioChunks % 10 === 0) { + process.stdout.write(`\ršŸŽ§ Audio chunks: ${audioChunks} `); + } + } +}); + +const serial = new SerialPort({ path: serialPath, baudRate, autoOpen: false }); + +serial.on('data', (chunk: Buffer) => decoder.feed(chunk)); +serial.on('error', (err) => { + console.error('Serial error:', err.message); + process.exit(1); +}); + +serial.open((err) => { + if (err) { + console.error(`Failed to open ${serialPath}:`, err.message); + process.exit(1); + } + console.log(`šŸ”Œ Serial port open: ${serialPath}`); + + // Send heartbeat so ESP32 stays active + setInterval(() => { + if (serial.isOpen) serial.write(encodeFrame(MsgType.STATUS)); + }, 1000); +}); + +// ── Graceful shutdown ── +function cleanup() { + console.log('\n\nShutting down...'); + if (recordStream) { + recordStream.end(); + console.log(`šŸ“ Recording saved`); + } + const control = py.stdio[3] as unknown as NodeJS.WritableStream | null; + if (control && !(control as any).destroyed) { + control.write('QUIT\n'); + } + setTimeout(() => { + py.kill('SIGTERM'); + serial.close(); + process.exit(0); + }, 500); +} + +process.on('SIGINT', cleanup); +process.on('SIGTERM', cleanup); diff --git a/apps/robot-client/src/config/hardware.config.ts b/apps/robot-client/src/config/hardware.config.ts index e0c330a..3fea598 100644 --- a/apps/robot-client/src/config/hardware.config.ts +++ b/apps/robot-client/src/config/hardware.config.ts @@ -74,7 +74,7 @@ export function loadHardwareConfig(): HardwareConfig { pythonPath: process.env.WAKEWORD_PYTHON_PATH || 'python3', scriptPath: process.env.WAKEWORD_SCRIPT_PATH || './scripts/wake_word.py', modelName: process.env.WAKEWORD_MODEL || 'hey_ti_pote', - threshold: parseFloat(process.env.WAKEWORD_THRESHOLD || '0.5'), + threshold: parseFloat(process.env.WAKEWORD_THRESHOLD || '0.75'), }, serial: { // The ESP32 is now the mic/speaker front-end — serial link is diff --git a/apps/robot-client/src/services/orchestrator.service.ts b/apps/robot-client/src/services/orchestrator.service.ts index c06e00f..4aabbe5 100644 --- a/apps/robot-client/src/services/orchestrator.service.ts +++ b/apps/robot-client/src/services/orchestrator.service.ts @@ -268,8 +268,8 @@ export class OrchestratorService extends EventEmitter { } } - // After playback, continue listening for more speech (continuous conversation) - this.continueListening(); + // After playback, return to idle and wait for a new wake word + this.returnToIdle(); } /** diff --git a/apps/robot-client/src/services/wake-word.service.ts b/apps/robot-client/src/services/wake-word.service.ts index 7d2b30a..74ee7b1 100644 --- a/apps/robot-client/src/services/wake-word.service.ts +++ b/apps/robot-client/src/services/wake-word.service.ts @@ -42,10 +42,12 @@ export class WakeWordService extends EventEmitter { /** Latched forwarder so we can detach it on stop / error. */ private readonly forwardMicChunk = (chunk: Buffer): void => { if (!this.process || !this.process.stdin || this.process.stdin.destroyed) return; - // Node gracefully buffers writes if the pipe is full; we don't - // apply back-pressure here because dropping wake-word audio would - // just hurt detection accuracy for a few tens of ms. - this.process.stdin.write(chunk); + this.process.stdin.write(chunk, (err) => { + if (err && (err as NodeJS.ErrnoException).code === 'EPIPE') { + this.logger.warn('Wake word process stdin pipe broken — detaching audio'); + this.detachHardware(); + } + }); }; constructor( @@ -142,7 +144,7 @@ export class WakeWordService extends EventEmitter { } else if (msg.startsWith('Matched device') || msg.startsWith('Using device') || msg.startsWith('Listening')) { this.logger.info(`šŸ”Š ${msg}`); } else { - this.logger.warn({ msg }, 'Wake word stderr'); + this.logger.warn('Wake word stderr: %s', msg); } } });