172 lines
5.7 KiB
TypeScript
172 lines
5.7 KiB
TypeScript
/**
|
|
* Ti-Pote — End-to-end audio loopback test.
|
|
*
|
|
* What it proves: the whole Pi ↔ ESP32 ↔ mic/speaker chain works,
|
|
* without bringing the cloud/wake-word/orchestrator into the picture.
|
|
*
|
|
* What it does:
|
|
* 1. Opens the serial link to the ESP32.
|
|
* 2. Captures `CAPTURE_MS` (default 5000) of mic audio via
|
|
* AUDIO_UP frames into a single in-memory buffer.
|
|
* 3. Pauses briefly.
|
|
* 4. Streams that buffer back to the ESP32 as AUDIO_DOWN frames
|
|
* and waits for the speaker to finish playing.
|
|
*
|
|
* Expected result: you say "allô allô" during step 2 and hear your
|
|
* own voice played back on the robot's speaker a moment later.
|
|
*
|
|
* Run with:
|
|
* HARDWARE_SERIAL_PORT=/dev/serial0 pnpm --filter @ti-pote/robot-client audio:loopback
|
|
*
|
|
* Optional env:
|
|
* CAPTURE_MS — capture duration in ms (default 5000)
|
|
* HARDWARE_SERIAL_PORT / HARDWARE_SERIAL_BAUD
|
|
*/
|
|
|
|
import { writeFileSync } from 'node:fs';
|
|
import { HardwareService, Emotion } from '../src/hardware/index.js';
|
|
import { Esp32AudioService } from '../src/services/audio.service.js';
|
|
|
|
const path = process.env.HARDWARE_SERIAL_PORT ?? '/dev/serial0';
|
|
const baudRate = parseInt(process.env.HARDWARE_SERIAL_BAUD ?? '921600', 10);
|
|
const captureMs = parseInt(process.env.CAPTURE_MS ?? '5000', 10);
|
|
const debug = !!process.env.DEBUG;
|
|
const dumpPath = process.env.DUMP_PATH ?? '/tmp/tipote-capture.raw';
|
|
const skipPlayback = !!process.env.SKIP_PLAYBACK;
|
|
|
|
const SAMPLE_RATE = 16000;
|
|
const BYTES_PER_SAMPLE = 2;
|
|
|
|
let debugFramesSeen = 0;
|
|
|
|
async function sleep(ms: number): Promise<void> {
|
|
return new Promise((r) => setTimeout(r, ms));
|
|
}
|
|
|
|
async function main(): Promise<void> {
|
|
const hw = new HardwareService({ path, baudRate, heartbeatIntervalMs: 1000 });
|
|
hw.on('log', (line) => console.log(`[firmware] ${line}`));
|
|
hw.on('error', (err) => console.error(`[firmware error] ${err.message}`));
|
|
if (debug) {
|
|
hw.on('audio_up', (chunk) => {
|
|
// Print first 8 int16 samples of the first few frames
|
|
// so we can see whether the wire carries zeros or real data.
|
|
if (debugFramesSeen < 3) {
|
|
const head: number[] = [];
|
|
for (let i = 0; i < Math.min(chunk.length, 16); i += 2) {
|
|
head.push(chunk.readInt16LE(i));
|
|
}
|
|
console.log(`[debug] frame ${debugFramesSeen} len=${chunk.length} head=${head.join(',')}`);
|
|
debugFramesSeen++;
|
|
}
|
|
});
|
|
}
|
|
|
|
console.log(`→ opening ${path} @ ${baudRate} baud`);
|
|
await hw.connect();
|
|
|
|
try {
|
|
const rtt = await hw.ping(Buffer.from('loopback'));
|
|
console.log(`→ ping round-trip: ${rtt.toFixed(1)} ms`);
|
|
|
|
hw.sendEmotion(Emotion.SURPRISED);
|
|
|
|
// ── 1. Capture ────────────────────────────────────────────────
|
|
const chunks: Buffer[] = [];
|
|
let bytesCaptured = 0;
|
|
|
|
const collect = (chunk: Buffer): void => {
|
|
chunks.push(chunk);
|
|
bytesCaptured += chunk.length;
|
|
};
|
|
hw.on('audio_up', collect);
|
|
|
|
console.log(`🎙️ Recording ${captureMs} ms — say something!`);
|
|
await sleep(captureMs);
|
|
|
|
hw.off('audio_up', collect);
|
|
const capture = Buffer.concat(chunks);
|
|
const samples = capture.length / BYTES_PER_SAMPLE;
|
|
const durationMs = (samples / SAMPLE_RATE) * 1000;
|
|
console.log(
|
|
`✅ captured ${capture.length} bytes (${samples} samples, ${durationMs.toFixed(0)} ms)` +
|
|
` across ${chunks.length} frames`,
|
|
);
|
|
|
|
if (capture.length === 0) {
|
|
console.error(
|
|
'❌ no audio received from the ESP32. Check the I2S wiring ' +
|
|
'(BCLK=32, LRCLK=33, DIN=34) and that the firmware got past `audio: I2S ready`.',
|
|
);
|
|
return;
|
|
}
|
|
|
|
// Quick RMS sanity check so we catch "mic muted" / "disconnected" early.
|
|
const rms = computeRms(capture);
|
|
console.log(` RMS level: ${rms.toFixed(0)} (silence ≈ 10, speech ≳ 500)`);
|
|
|
|
if (debug) {
|
|
// Dump the raw capture so we can replay it offline:
|
|
// aplay -r 16000 -f S16_LE -c 1 /tmp/tipote-capture.raw
|
|
writeFileSync(dumpPath, capture);
|
|
console.log(`[debug] raw capture written to ${dumpPath} (${capture.length} bytes)`);
|
|
|
|
const allZero = capture.every((b) => b === 0);
|
|
console.log(`[debug] capture.allZero=${allZero}`);
|
|
|
|
// Also print some distinct int16 values we saw, to spot patterns.
|
|
const seen = new Set<number>();
|
|
for (let i = 0; i < capture.length - 1 && seen.size < 10; i += 2) {
|
|
seen.add(capture.readInt16LE(i));
|
|
}
|
|
console.log(`[debug] first distinct samples: ${[...seen].join(',')}`);
|
|
}
|
|
|
|
if (skipPlayback) {
|
|
console.log('SKIP_PLAYBACK set — not sending AUDIO_DOWN');
|
|
return;
|
|
}
|
|
|
|
// ── 2. Playback ───────────────────────────────────────────────
|
|
await sleep(500);
|
|
|
|
const audio = new Esp32AudioService(
|
|
{
|
|
backend: 'esp32',
|
|
captureDevice: 'default',
|
|
playbackDevice: 'default',
|
|
sampleRate: SAMPLE_RATE,
|
|
bitDepth: 16,
|
|
channels: 1,
|
|
chunkDurationMs: 20,
|
|
},
|
|
hw,
|
|
);
|
|
|
|
hw.sendEmotion(Emotion.HAPPY);
|
|
console.log('🔊 Playing back on the ESP32 speaker...');
|
|
await audio.play(capture);
|
|
console.log('✅ playback done');
|
|
} finally {
|
|
hw.sendEmotion(Emotion.NEUTRAL);
|
|
await sleep(200);
|
|
await hw.disconnect();
|
|
}
|
|
}
|
|
|
|
function computeRms(buf: Buffer): number {
|
|
if (buf.length < 2) return 0;
|
|
let sumSquares = 0;
|
|
const samples = buf.length / 2;
|
|
for (let i = 0; i < buf.length - 1; i += 2) {
|
|
const s = buf.readInt16LE(i);
|
|
sumSquares += s * s;
|
|
}
|
|
return Math.sqrt(sumSquares / samples);
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error('loopback failed:', err);
|
|
process.exit(1);
|
|
});
|