/** * Ti-Pote — End-to-end audio loopback test. * * What it proves: the whole Pi ↔ ESP32 ↔ mic/speaker chain works, * without bringing the cloud/wake-word/orchestrator into the picture. * * What it does: * 1. Opens the serial link to the ESP32. * 2. Captures `CAPTURE_MS` (default 5000) of mic audio via * AUDIO_UP frames into a single in-memory buffer. * 3. Pauses briefly. * 4. Streams that buffer back to the ESP32 as AUDIO_DOWN frames * and waits for the speaker to finish playing. * * Expected result: you say "allô allô" during step 2 and hear your * own voice played back on the robot's speaker a moment later. * * Run with: * HARDWARE_SERIAL_PORT=/dev/serial0 pnpm --filter @ti-pote/robot-client audio:loopback * * Optional env: * CAPTURE_MS — capture duration in ms (default 5000) * HARDWARE_SERIAL_PORT / HARDWARE_SERIAL_BAUD */ import { writeFileSync } from 'node:fs'; import { HardwareService, Emotion } from '../src/hardware/index.js'; import { Esp32AudioService } from '../src/services/audio.service.js'; const path = process.env.HARDWARE_SERIAL_PORT ?? '/dev/serial0'; const baudRate = parseInt(process.env.HARDWARE_SERIAL_BAUD ?? '921600', 10); const captureMs = parseInt(process.env.CAPTURE_MS ?? '5000', 10); const debug = !!process.env.DEBUG; const dumpPath = process.env.DUMP_PATH ?? '/tmp/tipote-capture.raw'; const skipPlayback = !!process.env.SKIP_PLAYBACK; const SAMPLE_RATE = 16000; const BYTES_PER_SAMPLE = 2; let debugFramesSeen = 0; async function sleep(ms: number): Promise { return new Promise((r) => setTimeout(r, ms)); } async function main(): Promise { const hw = new HardwareService({ path, baudRate, heartbeatIntervalMs: 1000 }); hw.on('log', (line) => console.log(`[firmware] ${line}`)); hw.on('error', (err) => console.error(`[firmware error] ${err.message}`)); if (debug) { hw.on('audio_up', (chunk) => { // Print first 8 int16 samples of the first few frames // so we can see whether the wire carries zeros or real data. if (debugFramesSeen < 3) { const head: number[] = []; for (let i = 0; i < Math.min(chunk.length, 16); i += 2) { head.push(chunk.readInt16LE(i)); } console.log(`[debug] frame ${debugFramesSeen} len=${chunk.length} head=${head.join(',')}`); debugFramesSeen++; } }); } console.log(`→ opening ${path} @ ${baudRate} baud`); await hw.connect(); try { const rtt = await hw.ping(Buffer.from('loopback')); console.log(`→ ping round-trip: ${rtt.toFixed(1)} ms`); hw.sendEmotion(Emotion.SURPRISED); // ── 1. Capture ──────────────────────────────────────────────── const chunks: Buffer[] = []; let bytesCaptured = 0; const collect = (chunk: Buffer): void => { chunks.push(chunk); bytesCaptured += chunk.length; }; hw.on('audio_up', collect); console.log(`🎙️ Recording ${captureMs} ms — say something!`); await sleep(captureMs); hw.off('audio_up', collect); const capture = Buffer.concat(chunks); const samples = capture.length / BYTES_PER_SAMPLE; const durationMs = (samples / SAMPLE_RATE) * 1000; console.log( `✅ captured ${capture.length} bytes (${samples} samples, ${durationMs.toFixed(0)} ms)` + ` across ${chunks.length} frames`, ); if (capture.length === 0) { console.error( '❌ no audio received from the ESP32. Check the I2S wiring ' + '(BCLK=32, LRCLK=33, DIN=34) and that the firmware got past `audio: I2S ready`.', ); return; } // Quick RMS sanity check so we catch "mic muted" / "disconnected" early. const rms = computeRms(capture); console.log(` RMS level: ${rms.toFixed(0)} (silence ≈ 10, speech ≳ 500)`); if (debug) { // Dump the raw capture so we can replay it offline: // aplay -r 16000 -f S16_LE -c 1 /tmp/tipote-capture.raw writeFileSync(dumpPath, capture); console.log(`[debug] raw capture written to ${dumpPath} (${capture.length} bytes)`); const allZero = capture.every((b) => b === 0); console.log(`[debug] capture.allZero=${allZero}`); // Also print some distinct int16 values we saw, to spot patterns. const seen = new Set(); for (let i = 0; i < capture.length - 1 && seen.size < 10; i += 2) { seen.add(capture.readInt16LE(i)); } console.log(`[debug] first distinct samples: ${[...seen].join(',')}`); } if (skipPlayback) { console.log('SKIP_PLAYBACK set — not sending AUDIO_DOWN'); return; } // ── 2. Playback ─────────────────────────────────────────────── await sleep(500); const audio = new Esp32AudioService( { backend: 'esp32', captureDevice: 'default', playbackDevice: 'default', sampleRate: SAMPLE_RATE, bitDepth: 16, channels: 1, chunkDurationMs: 20, }, hw, ); hw.sendEmotion(Emotion.HAPPY); console.log('🔊 Playing back on the ESP32 speaker...'); await audio.play(capture); console.log('✅ playback done'); } finally { hw.sendEmotion(Emotion.NEUTRAL); await sleep(200); await hw.disconnect(); } } function computeRms(buf: Buffer): number { if (buf.length < 2) return 0; let sumSquares = 0; const samples = buf.length / 2; for (let i = 0; i < buf.length - 1; i += 2) { const s = buf.readInt16LE(i); sumSquares += s * s; } return Math.sqrt(sumSquares / samples); } main().catch((err) => { console.error('loopback failed:', err); process.exit(1); });