ti-pote/apps/robot-client/scripts/audio-loopback.ts
2026-04-09 02:47:53 +02:00

172 lines
5.7 KiB
TypeScript

/**
* Ti-Pote — End-to-end audio loopback test.
*
* What it proves: the whole Pi ↔ ESP32 ↔ mic/speaker chain works,
* without bringing the cloud/wake-word/orchestrator into the picture.
*
* What it does:
* 1. Opens the serial link to the ESP32.
* 2. Captures `CAPTURE_MS` (default 5000) of mic audio via
* AUDIO_UP frames into a single in-memory buffer.
* 3. Pauses briefly.
* 4. Streams that buffer back to the ESP32 as AUDIO_DOWN frames
* and waits for the speaker to finish playing.
*
* Expected result: you say "allô allô" during step 2 and hear your
* own voice played back on the robot's speaker a moment later.
*
* Run with:
* HARDWARE_SERIAL_PORT=/dev/serial0 pnpm --filter @ti-pote/robot-client audio:loopback
*
* Optional env:
* CAPTURE_MS — capture duration in ms (default 5000)
* HARDWARE_SERIAL_PORT / HARDWARE_SERIAL_BAUD
*/
import { writeFileSync } from 'node:fs';
import { HardwareService, Emotion } from '../src/hardware/index.js';
import { Esp32AudioService } from '../src/services/audio.service.js';
const path = process.env.HARDWARE_SERIAL_PORT ?? '/dev/serial0';
const baudRate = parseInt(process.env.HARDWARE_SERIAL_BAUD ?? '921600', 10);
const captureMs = parseInt(process.env.CAPTURE_MS ?? '5000', 10);
const debug = !!process.env.DEBUG;
const dumpPath = process.env.DUMP_PATH ?? '/tmp/tipote-capture.raw';
const skipPlayback = !!process.env.SKIP_PLAYBACK;
const SAMPLE_RATE = 16000;
const BYTES_PER_SAMPLE = 2;
let debugFramesSeen = 0;
async function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
async function main(): Promise<void> {
const hw = new HardwareService({ path, baudRate, heartbeatIntervalMs: 1000 });
hw.on('log', (line) => console.log(`[firmware] ${line}`));
hw.on('error', (err) => console.error(`[firmware error] ${err.message}`));
if (debug) {
hw.on('audio_up', (chunk) => {
// Print first 8 int16 samples of the first few frames
// so we can see whether the wire carries zeros or real data.
if (debugFramesSeen < 3) {
const head: number[] = [];
for (let i = 0; i < Math.min(chunk.length, 16); i += 2) {
head.push(chunk.readInt16LE(i));
}
console.log(`[debug] frame ${debugFramesSeen} len=${chunk.length} head=${head.join(',')}`);
debugFramesSeen++;
}
});
}
console.log(`→ opening ${path} @ ${baudRate} baud`);
await hw.connect();
try {
const rtt = await hw.ping(Buffer.from('loopback'));
console.log(`→ ping round-trip: ${rtt.toFixed(1)} ms`);
hw.sendEmotion(Emotion.SURPRISED);
// ── 1. Capture ────────────────────────────────────────────────
const chunks: Buffer[] = [];
let bytesCaptured = 0;
const collect = (chunk: Buffer): void => {
chunks.push(chunk);
bytesCaptured += chunk.length;
};
hw.on('audio_up', collect);
console.log(`🎙️ Recording ${captureMs} ms — say something!`);
await sleep(captureMs);
hw.off('audio_up', collect);
const capture = Buffer.concat(chunks);
const samples = capture.length / BYTES_PER_SAMPLE;
const durationMs = (samples / SAMPLE_RATE) * 1000;
console.log(
`✅ captured ${capture.length} bytes (${samples} samples, ${durationMs.toFixed(0)} ms)` +
` across ${chunks.length} frames`,
);
if (capture.length === 0) {
console.error(
'❌ no audio received from the ESP32. Check the I2S wiring ' +
'(BCLK=32, LRCLK=33, DIN=34) and that the firmware got past `audio: I2S ready`.',
);
return;
}
// Quick RMS sanity check so we catch "mic muted" / "disconnected" early.
const rms = computeRms(capture);
console.log(` RMS level: ${rms.toFixed(0)} (silence ≈ 10, speech ≳ 500)`);
if (debug) {
// Dump the raw capture so we can replay it offline:
// aplay -r 16000 -f S16_LE -c 1 /tmp/tipote-capture.raw
writeFileSync(dumpPath, capture);
console.log(`[debug] raw capture written to ${dumpPath} (${capture.length} bytes)`);
const allZero = capture.every((b) => b === 0);
console.log(`[debug] capture.allZero=${allZero}`);
// Also print some distinct int16 values we saw, to spot patterns.
const seen = new Set<number>();
for (let i = 0; i < capture.length - 1 && seen.size < 10; i += 2) {
seen.add(capture.readInt16LE(i));
}
console.log(`[debug] first distinct samples: ${[...seen].join(',')}`);
}
if (skipPlayback) {
console.log('SKIP_PLAYBACK set — not sending AUDIO_DOWN');
return;
}
// ── 2. Playback ───────────────────────────────────────────────
await sleep(500);
const audio = new Esp32AudioService(
{
backend: 'esp32',
captureDevice: 'default',
playbackDevice: 'default',
sampleRate: SAMPLE_RATE,
bitDepth: 16,
channels: 1,
chunkDurationMs: 20,
},
hw,
);
hw.sendEmotion(Emotion.HAPPY);
console.log('🔊 Playing back on the ESP32 speaker...');
await audio.play(capture);
console.log('✅ playback done');
} finally {
hw.sendEmotion(Emotion.NEUTRAL);
await sleep(200);
await hw.disconnect();
}
}
function computeRms(buf: Buffer): number {
if (buf.length < 2) return 0;
let sumSquares = 0;
const samples = buf.length / 2;
for (let i = 0; i < buf.length - 1; i += 2) {
const s = buf.readInt16LE(i);
sumSquares += s * s;
}
return Math.sqrt(sumSquares / samples);
}
main().catch((err) => {
console.error('loopback failed:', err);
process.exit(1);
});