ok script esp
This commit is contained in:
parent
b29653e3aa
commit
c19d9a7cf4
@ -12,7 +12,11 @@
|
|||||||
"format": "prettier --write \"src/**/*.ts\"",
|
"format": "prettier --write \"src/**/*.ts\"",
|
||||||
"test": "vitest run",
|
"test": "vitest run",
|
||||||
"test:watch": "vitest",
|
"test:watch": "vitest",
|
||||||
"hw:demo": "tsx scripts/hardware-demo.ts"
|
"hw:demo": "pnpm exec tsx scripts/hardware-demo.ts",
|
||||||
|
"audio:loopback": "pnpm exec tsx scripts/audio-loopback.ts",
|
||||||
|
"audio:beep": "pnpm exec tsx scripts/audio-beep.ts",
|
||||||
|
"esp:record": "pnpm exec tsx ../robot-hardware/scripts/esp-record.ts",
|
||||||
|
"esp:play": "pnpm exec tsx ../robot-hardware/scripts/esp-play.ts"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"socket.io-client": "^4.8.3",
|
"socket.io-client": "^4.8.3",
|
||||||
|
|||||||
99
apps/robot-client/scripts/audio-beep.ts
Normal file
99
apps/robot-client/scripts/audio-beep.ts
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
/**
|
||||||
|
* Ti-Pote — Pure tone speaker test.
|
||||||
|
*
|
||||||
|
* Generates a 440 Hz sine wave at ~70% of full scale and streams it
|
||||||
|
* to the ESP32 speaker via AUDIO_DOWN frames, then a second beep at
|
||||||
|
* 880 Hz. Completely independent of the microphone — if this does
|
||||||
|
* not produce audible sound, the problem is downstream of the ESP32
|
||||||
|
* on the speaker path (MAX98357A wiring, SD pin, VIN, speaker leads).
|
||||||
|
*
|
||||||
|
* Run with:
|
||||||
|
* HARDWARE_SERIAL_PORT=/dev/serial0 pnpm --filter @ti-pote/robot-client audio:beep
|
||||||
|
*
|
||||||
|
* Optional env:
|
||||||
|
* BEEP_MS — length of each beep in ms (default 1500)
|
||||||
|
* BEEP_FREQ — primary frequency in Hz (default 440)
|
||||||
|
* BEEP_AMP — amplitude 0.0..1.0 (default 0.7)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { HardwareService, Emotion } from '../src/hardware/index.js';
|
||||||
|
import { Esp32AudioService } from '../src/services/audio.service.js';
|
||||||
|
|
||||||
|
const path = process.env.HARDWARE_SERIAL_PORT ?? '/dev/serial0';
|
||||||
|
const baudRate = parseInt(process.env.HARDWARE_SERIAL_BAUD ?? '921600', 10);
|
||||||
|
const beepMs = parseInt(process.env.BEEP_MS ?? '1500', 10);
|
||||||
|
const beepFreq = parseInt(process.env.BEEP_FREQ ?? '440', 10);
|
||||||
|
const beepAmp = parseFloat(process.env.BEEP_AMP ?? '0.7');
|
||||||
|
|
||||||
|
const SAMPLE_RATE = 16000;
|
||||||
|
|
||||||
|
function generateSine(freqHz: number, durationMs: number, amplitude: number): Buffer {
|
||||||
|
const sampleCount = Math.floor((SAMPLE_RATE * durationMs) / 1000);
|
||||||
|
const buf = Buffer.alloc(sampleCount * 2);
|
||||||
|
const amp = Math.max(0, Math.min(1, amplitude)) * 32767;
|
||||||
|
const twoPiF = (2 * Math.PI * freqHz) / SAMPLE_RATE;
|
||||||
|
// 5 ms linear attack/release so the speaker doesn't click.
|
||||||
|
const rampSamples = Math.floor((SAMPLE_RATE * 5) / 1000);
|
||||||
|
for (let i = 0; i < sampleCount; i++) {
|
||||||
|
let env = 1;
|
||||||
|
if (i < rampSamples) env = i / rampSamples;
|
||||||
|
else if (i > sampleCount - rampSamples) env = (sampleCount - i) / rampSamples;
|
||||||
|
const s = Math.round(Math.sin(i * twoPiF) * amp * env);
|
||||||
|
buf.writeInt16LE(Math.max(-32768, Math.min(32767, s)), i * 2);
|
||||||
|
}
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((r) => setTimeout(r, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const hw = new HardwareService({ path, baudRate, heartbeatIntervalMs: 1000 });
|
||||||
|
hw.on('log', (line) => console.log(`[firmware] ${line}`));
|
||||||
|
hw.on('error', (err) => console.error(`[firmware error] ${err.message}`));
|
||||||
|
|
||||||
|
console.log(`→ opening ${path} @ ${baudRate} baud`);
|
||||||
|
await hw.connect();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const rtt = await hw.ping(Buffer.from('beep'));
|
||||||
|
console.log(`→ ping round-trip: ${rtt.toFixed(1)} ms`);
|
||||||
|
|
||||||
|
const audio = new Esp32AudioService(
|
||||||
|
{
|
||||||
|
backend: 'esp32',
|
||||||
|
captureDevice: 'default',
|
||||||
|
playbackDevice: 'default',
|
||||||
|
sampleRate: SAMPLE_RATE,
|
||||||
|
bitDepth: 16,
|
||||||
|
channels: 1,
|
||||||
|
chunkDurationMs: 20,
|
||||||
|
},
|
||||||
|
hw,
|
||||||
|
);
|
||||||
|
|
||||||
|
hw.sendEmotion(Emotion.HAPPY);
|
||||||
|
|
||||||
|
console.log(`🔊 Beep 1: ${beepFreq} Hz · ${beepMs} ms · amp=${beepAmp}`);
|
||||||
|
const tone1 = generateSine(beepFreq, beepMs, beepAmp);
|
||||||
|
await audio.play(tone1);
|
||||||
|
|
||||||
|
await sleep(400);
|
||||||
|
|
||||||
|
console.log(`🔊 Beep 2: ${beepFreq * 2} Hz · ${beepMs} ms · amp=${beepAmp}`);
|
||||||
|
const tone2 = generateSine(beepFreq * 2, beepMs, beepAmp);
|
||||||
|
await audio.play(tone2);
|
||||||
|
|
||||||
|
console.log('✅ done — did you hear two beeps?');
|
||||||
|
} finally {
|
||||||
|
hw.sendEmotion(Emotion.NEUTRAL);
|
||||||
|
await sleep(200);
|
||||||
|
await hw.disconnect();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((err) => {
|
||||||
|
console.error('beep failed:', err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
171
apps/robot-client/scripts/audio-loopback.ts
Normal file
171
apps/robot-client/scripts/audio-loopback.ts
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
/**
|
||||||
|
* Ti-Pote — End-to-end audio loopback test.
|
||||||
|
*
|
||||||
|
* What it proves: the whole Pi ↔ ESP32 ↔ mic/speaker chain works,
|
||||||
|
* without bringing the cloud/wake-word/orchestrator into the picture.
|
||||||
|
*
|
||||||
|
* What it does:
|
||||||
|
* 1. Opens the serial link to the ESP32.
|
||||||
|
* 2. Captures `CAPTURE_MS` (default 5000) of mic audio via
|
||||||
|
* AUDIO_UP frames into a single in-memory buffer.
|
||||||
|
* 3. Pauses briefly.
|
||||||
|
* 4. Streams that buffer back to the ESP32 as AUDIO_DOWN frames
|
||||||
|
* and waits for the speaker to finish playing.
|
||||||
|
*
|
||||||
|
* Expected result: you say "allô allô" during step 2 and hear your
|
||||||
|
* own voice played back on the robot's speaker a moment later.
|
||||||
|
*
|
||||||
|
* Run with:
|
||||||
|
* HARDWARE_SERIAL_PORT=/dev/serial0 pnpm --filter @ti-pote/robot-client audio:loopback
|
||||||
|
*
|
||||||
|
* Optional env:
|
||||||
|
* CAPTURE_MS — capture duration in ms (default 5000)
|
||||||
|
* HARDWARE_SERIAL_PORT / HARDWARE_SERIAL_BAUD
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { writeFileSync } from 'node:fs';
|
||||||
|
import { HardwareService, Emotion } from '../src/hardware/index.js';
|
||||||
|
import { Esp32AudioService } from '../src/services/audio.service.js';
|
||||||
|
|
||||||
|
const path = process.env.HARDWARE_SERIAL_PORT ?? '/dev/serial0';
|
||||||
|
const baudRate = parseInt(process.env.HARDWARE_SERIAL_BAUD ?? '921600', 10);
|
||||||
|
const captureMs = parseInt(process.env.CAPTURE_MS ?? '5000', 10);
|
||||||
|
const debug = !!process.env.DEBUG;
|
||||||
|
const dumpPath = process.env.DUMP_PATH ?? '/tmp/tipote-capture.raw';
|
||||||
|
const skipPlayback = !!process.env.SKIP_PLAYBACK;
|
||||||
|
|
||||||
|
const SAMPLE_RATE = 16000;
|
||||||
|
const BYTES_PER_SAMPLE = 2;
|
||||||
|
|
||||||
|
let debugFramesSeen = 0;
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((r) => setTimeout(r, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const hw = new HardwareService({ path, baudRate, heartbeatIntervalMs: 1000 });
|
||||||
|
hw.on('log', (line) => console.log(`[firmware] ${line}`));
|
||||||
|
hw.on('error', (err) => console.error(`[firmware error] ${err.message}`));
|
||||||
|
if (debug) {
|
||||||
|
hw.on('audio_up', (chunk) => {
|
||||||
|
// Print first 8 int16 samples of the first few frames
|
||||||
|
// so we can see whether the wire carries zeros or real data.
|
||||||
|
if (debugFramesSeen < 3) {
|
||||||
|
const head: number[] = [];
|
||||||
|
for (let i = 0; i < Math.min(chunk.length, 16); i += 2) {
|
||||||
|
head.push(chunk.readInt16LE(i));
|
||||||
|
}
|
||||||
|
console.log(`[debug] frame ${debugFramesSeen} len=${chunk.length} head=${head.join(',')}`);
|
||||||
|
debugFramesSeen++;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`→ opening ${path} @ ${baudRate} baud`);
|
||||||
|
await hw.connect();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const rtt = await hw.ping(Buffer.from('loopback'));
|
||||||
|
console.log(`→ ping round-trip: ${rtt.toFixed(1)} ms`);
|
||||||
|
|
||||||
|
hw.sendEmotion(Emotion.SURPRISED);
|
||||||
|
|
||||||
|
// ── 1. Capture ────────────────────────────────────────────────
|
||||||
|
const chunks: Buffer[] = [];
|
||||||
|
let bytesCaptured = 0;
|
||||||
|
|
||||||
|
const collect = (chunk: Buffer): void => {
|
||||||
|
chunks.push(chunk);
|
||||||
|
bytesCaptured += chunk.length;
|
||||||
|
};
|
||||||
|
hw.on('audio_up', collect);
|
||||||
|
|
||||||
|
console.log(`🎙️ Recording ${captureMs} ms — say something!`);
|
||||||
|
await sleep(captureMs);
|
||||||
|
|
||||||
|
hw.off('audio_up', collect);
|
||||||
|
const capture = Buffer.concat(chunks);
|
||||||
|
const samples = capture.length / BYTES_PER_SAMPLE;
|
||||||
|
const durationMs = (samples / SAMPLE_RATE) * 1000;
|
||||||
|
console.log(
|
||||||
|
`✅ captured ${capture.length} bytes (${samples} samples, ${durationMs.toFixed(0)} ms)` +
|
||||||
|
` across ${chunks.length} frames`,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (capture.length === 0) {
|
||||||
|
console.error(
|
||||||
|
'❌ no audio received from the ESP32. Check the I2S wiring ' +
|
||||||
|
'(BCLK=32, LRCLK=33, DIN=34) and that the firmware got past `audio: I2S ready`.',
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quick RMS sanity check so we catch "mic muted" / "disconnected" early.
|
||||||
|
const rms = computeRms(capture);
|
||||||
|
console.log(` RMS level: ${rms.toFixed(0)} (silence ≈ 10, speech ≳ 500)`);
|
||||||
|
|
||||||
|
if (debug) {
|
||||||
|
// Dump the raw capture so we can replay it offline:
|
||||||
|
// aplay -r 16000 -f S16_LE -c 1 /tmp/tipote-capture.raw
|
||||||
|
writeFileSync(dumpPath, capture);
|
||||||
|
console.log(`[debug] raw capture written to ${dumpPath} (${capture.length} bytes)`);
|
||||||
|
|
||||||
|
const allZero = capture.every((b) => b === 0);
|
||||||
|
console.log(`[debug] capture.allZero=${allZero}`);
|
||||||
|
|
||||||
|
// Also print some distinct int16 values we saw, to spot patterns.
|
||||||
|
const seen = new Set<number>();
|
||||||
|
for (let i = 0; i < capture.length - 1 && seen.size < 10; i += 2) {
|
||||||
|
seen.add(capture.readInt16LE(i));
|
||||||
|
}
|
||||||
|
console.log(`[debug] first distinct samples: ${[...seen].join(',')}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skipPlayback) {
|
||||||
|
console.log('SKIP_PLAYBACK set — not sending AUDIO_DOWN');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── 2. Playback ───────────────────────────────────────────────
|
||||||
|
await sleep(500);
|
||||||
|
|
||||||
|
const audio = new Esp32AudioService(
|
||||||
|
{
|
||||||
|
backend: 'esp32',
|
||||||
|
captureDevice: 'default',
|
||||||
|
playbackDevice: 'default',
|
||||||
|
sampleRate: SAMPLE_RATE,
|
||||||
|
bitDepth: 16,
|
||||||
|
channels: 1,
|
||||||
|
chunkDurationMs: 20,
|
||||||
|
},
|
||||||
|
hw,
|
||||||
|
);
|
||||||
|
|
||||||
|
hw.sendEmotion(Emotion.HAPPY);
|
||||||
|
console.log('🔊 Playing back on the ESP32 speaker...');
|
||||||
|
await audio.play(capture);
|
||||||
|
console.log('✅ playback done');
|
||||||
|
} finally {
|
||||||
|
hw.sendEmotion(Emotion.NEUTRAL);
|
||||||
|
await sleep(200);
|
||||||
|
await hw.disconnect();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function computeRms(buf: Buffer): number {
|
||||||
|
if (buf.length < 2) return 0;
|
||||||
|
let sumSquares = 0;
|
||||||
|
const samples = buf.length / 2;
|
||||||
|
for (let i = 0; i < buf.length - 1; i += 2) {
|
||||||
|
const s = buf.readInt16LE(i);
|
||||||
|
sumSquares += s * s;
|
||||||
|
}
|
||||||
|
return Math.sqrt(sumSquares / samples);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((err) => {
|
||||||
|
console.error('loopback failed:', err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
@ -2,94 +2,175 @@
|
|||||||
"""
|
"""
|
||||||
Ti-Pote Wake Word Detection Script.
|
Ti-Pote Wake Word Detection Script.
|
||||||
|
|
||||||
Runs OpenWakeWord model continuously, listening on the specified ALSA device.
|
Runs OpenWakeWord continuously and prints "DETECTED" to stdout when
|
||||||
Prints "DETECTED" to stdout when the wake word is heard.
|
the wake word is heard.
|
||||||
|
|
||||||
Supports PAUSE/RESUME commands on stdin to temporarily stop/start listening
|
Two input modes:
|
||||||
without reloading the model. When paused, the audio stream is closed so other
|
|
||||||
processes (arecord) can use the device.
|
|
||||||
|
|
||||||
Usage:
|
1. --input alsa (default, legacy)
|
||||||
python3 wake_word.py --model hey_jarvis --threshold 0.5 --device default --sample-rate 16000
|
Opens an ALSA capture device via PyAudio. PAUSE/RESUME/QUIT
|
||||||
|
commands are read from stdin.
|
||||||
|
|
||||||
Requirements:
|
2. --input stdin
|
||||||
pip install openwakeword pyaudio numpy
|
Reads raw S16 mono PCM audio from stdin (fd 0). This is used when
|
||||||
|
the Raspberry Pi is just an orchestrator and the microphone lives
|
||||||
|
on the ESP32 — the Node client forwards AUDIO_UP frames into this
|
||||||
|
script's stdin. Control commands are read from a separate file
|
||||||
|
descriptor specified by --control-fd (default: 3).
|
||||||
|
|
||||||
|
Control commands (one per line, uppercase):
|
||||||
|
PAUSE — stop emitting DETECTED events (audio keeps flowing so
|
||||||
|
we don't overflow the pipe, but predictions are ignored).
|
||||||
|
RESUME — resume emitting and reset the model buffer.
|
||||||
|
RESET — reset the model buffer without touching the pause flag.
|
||||||
|
QUIT — exit cleanly.
|
||||||
|
|
||||||
|
Usage (ALSA):
|
||||||
|
python3 wake_word.py --model hey_jarvis --device default
|
||||||
|
|
||||||
|
Usage (stdin / ESP32 backend):
|
||||||
|
python3 wake_word.py --model hey_jarvis --input stdin --control-fd 3
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import sys
|
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import select
|
import sys
|
||||||
import threading
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
def main():
|
CHUNK_SAMPLES = 1280 # ≈ 80 ms @ 16 kHz (OpenWakeWord's preferred size)
|
||||||
parser = argparse.ArgumentParser(description='Ti-Pote Wake Word Detection')
|
|
||||||
parser.add_argument('--model', type=str, default='hey_jarvis',
|
|
||||||
help='Wake word model name (default: hey_jarvis as placeholder)')
|
|
||||||
parser.add_argument('--threshold', type=float, default=0.5,
|
|
||||||
help='Detection threshold (0.0-1.0)')
|
|
||||||
parser.add_argument('--device', type=str, default='default',
|
|
||||||
help='ALSA audio capture device')
|
|
||||||
parser.add_argument('--sample-rate', type=int, default=16000,
|
|
||||||
help='Audio sample rate in Hz')
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
|
def load_model(model_name: str):
|
||||||
try:
|
try:
|
||||||
from openwakeword.model import Model
|
from openwakeword.model import Model
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print("ERROR: openwakeword not installed. Run: pip install openwakeword", file=sys.stderr)
|
print("ERROR: openwakeword not installed. Run: pip install openwakeword",
|
||||||
|
file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
try:
|
|
||||||
import pyaudio
|
|
||||||
except ImportError:
|
|
||||||
print("ERROR: pyaudio not installed. Run: pip install pyaudio", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# ── Load the wake word model (one time only) ──
|
|
||||||
|
|
||||||
print(f"Loading wake word model: {args.model}...", file=sys.stderr)
|
|
||||||
|
|
||||||
import openwakeword
|
import openwakeword
|
||||||
pretrained_paths = openwakeword.get_pretrained_model_paths()
|
pretrained = openwakeword.get_pretrained_model_paths()
|
||||||
model_path = None
|
model_path = next(
|
||||||
for p in pretrained_paths:
|
(p for p in pretrained if os.path.basename(p).startswith(model_name)),
|
||||||
basename = os.path.basename(p)
|
None,
|
||||||
if basename.startswith(args.model):
|
)
|
||||||
model_path = p
|
|
||||||
break
|
|
||||||
|
|
||||||
if model_path is None:
|
if model_path is None:
|
||||||
if os.path.isfile(args.model):
|
if os.path.isfile(model_name):
|
||||||
model_path = args.model
|
model_path = model_name
|
||||||
else:
|
else:
|
||||||
print(f"ERROR: model '{args.model}' not found in pretrained models", file=sys.stderr)
|
print(f"ERROR: model '{model_name}' not found", file=sys.stderr)
|
||||||
print(f"Available models:", file=sys.stderr)
|
for p in pretrained:
|
||||||
for p in pretrained_paths:
|
|
||||||
print(f" - {os.path.basename(p)}", file=sys.stderr)
|
print(f" - {os.path.basename(p)}", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(f"Resolved model path: {model_path}", file=sys.stderr)
|
print(f"Loading wake word model: {model_name}...", file=sys.stderr)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
oww_model = Model(wakeword_model_paths=[model_path])
|
return Model(wakeword_model_paths=[model_path])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"ERROR loading model '{args.model}': {e}", file=sys.stderr)
|
print(f"ERROR loading model '{model_name}': {e}", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(f"Wake word model loaded: {args.model}", file=sys.stderr)
|
|
||||||
print(f"Threshold: {args.threshold}", file=sys.stderr)
|
|
||||||
print(f"Listening on device: {args.device}", file=sys.stderr)
|
|
||||||
|
|
||||||
# ── Initialize PyAudio ──
|
class State:
|
||||||
|
"""Shared mutable state between the audio and control threads."""
|
||||||
|
def __init__(self):
|
||||||
|
self.paused = False
|
||||||
|
self.running = True
|
||||||
|
self.reset_requested = False
|
||||||
|
self.lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def start_control_reader(state: State, fd: int):
|
||||||
|
"""Background thread that reads PAUSE/RESUME/RESET/QUIT commands."""
|
||||||
|
try:
|
||||||
|
f = os.fdopen(fd, 'r', buffering=1)
|
||||||
|
except OSError as e:
|
||||||
|
print(f"ERROR opening control fd {fd}: {e}", file=sys.stderr)
|
||||||
|
return
|
||||||
|
|
||||||
|
def reader():
|
||||||
|
while state.running:
|
||||||
|
try:
|
||||||
|
line = f.readline()
|
||||||
|
except Exception:
|
||||||
|
break
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
cmd = line.strip().upper()
|
||||||
|
with state.lock:
|
||||||
|
if cmd == 'PAUSE' and not state.paused:
|
||||||
|
state.paused = True
|
||||||
|
print("PAUSED", file=sys.stderr, flush=True)
|
||||||
|
elif cmd == 'RESUME' and state.paused:
|
||||||
|
state.paused = False
|
||||||
|
state.reset_requested = True
|
||||||
|
print("RESUMED", file=sys.stderr, flush=True)
|
||||||
|
elif cmd == 'RESET':
|
||||||
|
state.reset_requested = True
|
||||||
|
elif cmd == 'QUIT':
|
||||||
|
state.running = False
|
||||||
|
break
|
||||||
|
|
||||||
|
t = threading.Thread(target=reader, daemon=True)
|
||||||
|
t.start()
|
||||||
|
|
||||||
|
|
||||||
|
def run_predict_loop(oww_model, read_chunk, state: State, threshold: float):
|
||||||
|
"""
|
||||||
|
Shared loop: pull a chunk from `read_chunk()`, feed the model,
|
||||||
|
optionally emit DETECTED. Exits when `read_chunk()` returns None
|
||||||
|
or state.running is False.
|
||||||
|
"""
|
||||||
|
print("READY", file=sys.stderr, flush=True)
|
||||||
|
try:
|
||||||
|
while state.running:
|
||||||
|
with state.lock:
|
||||||
|
if state.reset_requested:
|
||||||
|
oww_model.reset()
|
||||||
|
state.reset_requested = False
|
||||||
|
|
||||||
|
audio_data = read_chunk()
|
||||||
|
if audio_data is None:
|
||||||
|
# EOF / error; exit cleanly
|
||||||
|
break
|
||||||
|
|
||||||
|
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
||||||
|
oww_model.predict(audio_array)
|
||||||
|
|
||||||
|
with state.lock:
|
||||||
|
if state.paused:
|
||||||
|
# Keep draining but don't emit detections.
|
||||||
|
continue
|
||||||
|
|
||||||
|
for _, score in oww_model.prediction_buffer.items():
|
||||||
|
if len(score) > 0 and score[-1] > threshold:
|
||||||
|
print("DETECTED", flush=True)
|
||||||
|
oww_model.reset()
|
||||||
|
break
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────
|
||||||
|
# ALSA input (legacy backend)
|
||||||
|
# ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def run_alsa_mode(args, oww_model, state: State):
|
||||||
|
import re
|
||||||
|
try:
|
||||||
|
import pyaudio
|
||||||
|
except ImportError:
|
||||||
|
print("ERROR: pyaudio not installed. Run: pip install pyaudio",
|
||||||
|
file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
pa = pyaudio.PyAudio()
|
pa = pyaudio.PyAudio()
|
||||||
|
|
||||||
# Find the device index
|
|
||||||
import re
|
|
||||||
device_index = None
|
device_index = None
|
||||||
if args.device != 'default':
|
if args.device != 'default':
|
||||||
try:
|
try:
|
||||||
@ -97,14 +178,14 @@ def main():
|
|||||||
info = pa.get_device_info_by_index(idx)
|
info = pa.get_device_info_by_index(idx)
|
||||||
if info.get('maxInputChannels', 0) > 0:
|
if info.get('maxInputChannels', 0) > 0:
|
||||||
device_index = idx
|
device_index = idx
|
||||||
print(f"Using device by index: [{idx}] {info['name']}", file=sys.stderr)
|
print(f"Using device by index: [{idx}] {info['name']}",
|
||||||
|
file=sys.stderr)
|
||||||
except (ValueError, IOError):
|
except (ValueError, IOError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if device_index is None:
|
if device_index is None:
|
||||||
hw_match = re.search(r'(\d+),(\d+)', args.device)
|
hw_match = re.search(r'(\d+),(\d+)', args.device)
|
||||||
hw_pattern = f"hw:{hw_match.group(1)},{hw_match.group(2)}" if hw_match else None
|
hw_pattern = f"hw:{hw_match.group(1)},{hw_match.group(2)}" if hw_match else None
|
||||||
|
|
||||||
for i in range(pa.get_device_count()):
|
for i in range(pa.get_device_count()):
|
||||||
info = pa.get_device_info_by_index(i)
|
info = pa.get_device_info_by_index(i)
|
||||||
if info.get('maxInputChannels', 0) <= 0:
|
if info.get('maxInputChannels', 0) <= 0:
|
||||||
@ -115,133 +196,134 @@ def main():
|
|||||||
print(f"Matched device: [{i}] {name}", file=sys.stderr)
|
print(f"Matched device: [{i}] {name}", file=sys.stderr)
|
||||||
break
|
break
|
||||||
|
|
||||||
if device_index is None:
|
stream = {'handle': None}
|
||||||
print(f"WARNING: Device '{args.device}' not found, listing available inputs:", file=sys.stderr)
|
|
||||||
for i in range(pa.get_device_count()):
|
|
||||||
info = pa.get_device_info_by_index(i)
|
|
||||||
if info.get('maxInputChannels', 0) > 0:
|
|
||||||
print(f" [{i}] {info['name']}", file=sys.stderr)
|
|
||||||
print("Falling back to default device", file=sys.stderr)
|
|
||||||
|
|
||||||
# ── Audio stream helpers ──
|
|
||||||
|
|
||||||
chunk_size = 1280 # ~80ms at 16kHz (OpenWakeWord expects this)
|
|
||||||
stream = None
|
|
||||||
|
|
||||||
def open_stream():
|
def open_stream():
|
||||||
nonlocal stream
|
stream['handle'] = pa.open(
|
||||||
stream = pa.open(
|
|
||||||
format=pyaudio.paInt16,
|
format=pyaudio.paInt16,
|
||||||
channels=1,
|
channels=1,
|
||||||
rate=args.sample_rate,
|
rate=args.sample_rate,
|
||||||
input=True,
|
input=True,
|
||||||
frames_per_buffer=chunk_size,
|
frames_per_buffer=CHUNK_SAMPLES,
|
||||||
input_device_index=device_index,
|
input_device_index=device_index,
|
||||||
)
|
)
|
||||||
|
|
||||||
def close_stream():
|
def close_stream():
|
||||||
nonlocal stream
|
h = stream['handle']
|
||||||
if stream is not None:
|
if h is not None:
|
||||||
try:
|
try:
|
||||||
stream.stop_stream()
|
h.stop_stream()
|
||||||
stream.close()
|
h.close()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
stream = None
|
stream['handle'] = None
|
||||||
|
|
||||||
# ── Stdin command reader (PAUSE / RESUME) ──
|
def read_chunk():
|
||||||
|
with state.lock:
|
||||||
paused = False
|
is_paused = state.paused
|
||||||
running = True
|
# In ALSA mode, pausing means physically releasing the device.
|
||||||
lock = threading.Lock()
|
if is_paused:
|
||||||
|
if stream['handle'] is not None:
|
||||||
def stdin_reader():
|
close_stream()
|
||||||
nonlocal paused, running
|
print("STREAM_CLOSED", file=sys.stderr, flush=True)
|
||||||
while running:
|
time.sleep(0.1)
|
||||||
try:
|
return b'\x00' * (CHUNK_SAMPLES * 2) # dummy silence; won't be predicted
|
||||||
line = sys.stdin.readline()
|
if stream['handle'] is None:
|
||||||
if not line: # EOF
|
open_stream()
|
||||||
running = False
|
oww_model.reset()
|
||||||
break
|
print("STREAM_REOPENED", file=sys.stderr, flush=True)
|
||||||
cmd = line.strip().upper()
|
try:
|
||||||
with lock:
|
return stream['handle'].read(CHUNK_SAMPLES, exception_on_overflow=False)
|
||||||
if cmd == 'PAUSE':
|
except Exception as e:
|
||||||
if not paused:
|
print(f"Audio read error: {e}", file=sys.stderr)
|
||||||
paused = True
|
close_stream()
|
||||||
print("PAUSED", file=sys.stderr, flush=True)
|
time.sleep(0.5)
|
||||||
elif cmd == 'RESUME':
|
return b'\x00' * (CHUNK_SAMPLES * 2)
|
||||||
if paused:
|
|
||||||
paused = False
|
|
||||||
print("RESUMED", file=sys.stderr, flush=True)
|
|
||||||
elif cmd == 'QUIT':
|
|
||||||
running = False
|
|
||||||
break
|
|
||||||
except Exception:
|
|
||||||
break
|
|
||||||
|
|
||||||
stdin_thread = threading.Thread(target=stdin_reader, daemon=True)
|
|
||||||
stdin_thread.start()
|
|
||||||
|
|
||||||
# ── Signal handling ──
|
|
||||||
|
|
||||||
def handle_signal(sig, frame):
|
|
||||||
nonlocal running
|
|
||||||
running = False
|
|
||||||
signal.signal(signal.SIGTERM, handle_signal)
|
|
||||||
signal.signal(signal.SIGINT, handle_signal)
|
|
||||||
|
|
||||||
# ── Main loop ──
|
|
||||||
|
|
||||||
open_stream()
|
open_stream()
|
||||||
print("READY", file=sys.stderr, flush=True)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while running:
|
run_predict_loop(oww_model, read_chunk, state, args.threshold)
|
||||||
with lock:
|
|
||||||
is_paused = paused
|
|
||||||
|
|
||||||
if is_paused:
|
|
||||||
# Close the audio stream so arecord can use the device
|
|
||||||
if stream is not None:
|
|
||||||
close_stream()
|
|
||||||
print("STREAM_CLOSED", file=sys.stderr, flush=True)
|
|
||||||
# Wait a bit before checking again
|
|
||||||
import time
|
|
||||||
time.sleep(0.1)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Reopen stream if it was closed (after resume)
|
|
||||||
if stream is None:
|
|
||||||
open_stream()
|
|
||||||
oww_model.reset()
|
|
||||||
print("STREAM_REOPENED", file=sys.stderr, flush=True)
|
|
||||||
|
|
||||||
try:
|
|
||||||
audio_data = stream.read(chunk_size, exception_on_overflow=False)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Audio read error: {e}", file=sys.stderr)
|
|
||||||
close_stream()
|
|
||||||
import time
|
|
||||||
time.sleep(0.5)
|
|
||||||
continue
|
|
||||||
|
|
||||||
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
|
||||||
|
|
||||||
oww_model.predict(audio_array)
|
|
||||||
|
|
||||||
for model_name, score in oww_model.prediction_buffer.items():
|
|
||||||
if len(score) > 0 and score[-1] > args.threshold:
|
|
||||||
print("DETECTED", flush=True)
|
|
||||||
oww_model.reset()
|
|
||||||
break
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
pass
|
|
||||||
finally:
|
finally:
|
||||||
close_stream()
|
close_stream()
|
||||||
pa.terminate()
|
pa.terminate()
|
||||||
print("Wake word detection stopped", file=sys.stderr)
|
print("Wake word detection stopped", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────
|
||||||
|
# Stdin input (ESP32 backend)
|
||||||
|
# ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def run_stdin_mode(args, oww_model, state: State):
|
||||||
|
"""
|
||||||
|
Audio bytes arrive on stdin (fd 0), 16-bit signed LE mono at
|
||||||
|
`args.sample_rate`. We block until a full CHUNK_SAMPLES chunk is
|
||||||
|
available and hand it to the model.
|
||||||
|
"""
|
||||||
|
print("Listening on stdin for raw S16LE mono PCM", file=sys.stderr)
|
||||||
|
chunk_bytes = CHUNK_SAMPLES * 2
|
||||||
|
stdin = sys.stdin.buffer
|
||||||
|
buf = bytearray()
|
||||||
|
|
||||||
|
def read_chunk():
|
||||||
|
# Keep reading until we have a full chunk or hit EOF.
|
||||||
|
while len(buf) < chunk_bytes and state.running:
|
||||||
|
try:
|
||||||
|
data = stdin.read(chunk_bytes - len(buf))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"stdin read error: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
if not data:
|
||||||
|
return None
|
||||||
|
buf.extend(data)
|
||||||
|
if len(buf) < chunk_bytes:
|
||||||
|
return None
|
||||||
|
chunk = bytes(buf[:chunk_bytes])
|
||||||
|
del buf[:chunk_bytes]
|
||||||
|
return chunk
|
||||||
|
|
||||||
|
try:
|
||||||
|
run_predict_loop(oww_model, read_chunk, state, args.threshold)
|
||||||
|
finally:
|
||||||
|
print("Wake word detection stopped", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────
|
||||||
|
# Entrypoint
|
||||||
|
# ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description='Ti-Pote Wake Word Detection')
|
||||||
|
parser.add_argument('--model', type=str, default='hey_jarvis')
|
||||||
|
parser.add_argument('--threshold', type=float, default=0.5)
|
||||||
|
parser.add_argument('--input', type=str, choices=['alsa', 'stdin'], default='alsa',
|
||||||
|
help="Audio source. 'alsa' opens PyAudio, 'stdin' reads from fd 0.")
|
||||||
|
parser.add_argument('--device', type=str, default='default',
|
||||||
|
help='ALSA audio capture device (only used with --input alsa).')
|
||||||
|
parser.add_argument('--control-fd', type=int, default=0,
|
||||||
|
help='File descriptor to read control commands from. '
|
||||||
|
'Default 0 (stdin) for ALSA, pass 3 for stdin mode.')
|
||||||
|
parser.add_argument('--sample-rate', type=int, default=16000)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
state = State()
|
||||||
|
|
||||||
|
def handle_signal(_sig, _frame):
|
||||||
|
state.running = False
|
||||||
|
signal.signal(signal.SIGTERM, handle_signal)
|
||||||
|
signal.signal(signal.SIGINT, handle_signal)
|
||||||
|
|
||||||
|
oww_model = load_model(args.model)
|
||||||
|
print(f"Wake word model loaded: {args.model}", file=sys.stderr)
|
||||||
|
print(f"Threshold: {args.threshold}", file=sys.stderr)
|
||||||
|
|
||||||
|
start_control_reader(state, args.control_fd)
|
||||||
|
|
||||||
|
if args.input == 'stdin':
|
||||||
|
run_stdin_mode(args, oww_model, state)
|
||||||
|
else:
|
||||||
|
print(f"Listening on device: {args.device}", file=sys.stderr)
|
||||||
|
run_alsa_mode(args, oww_model, state)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|||||||
@ -1,8 +1,11 @@
|
|||||||
export interface AudioConfig {
|
export interface AudioConfig {
|
||||||
/** ALSA device for capture (e.g., 'plughw:1,0' or 'default') */
|
/** Which audio backend to use: 'esp32' (default) or 'alsa' (legacy). */
|
||||||
|
backend: 'esp32' | 'alsa';
|
||||||
|
|
||||||
|
/** ALSA device for capture (only used when backend='alsa'). */
|
||||||
captureDevice: string;
|
captureDevice: string;
|
||||||
|
|
||||||
/** ALSA device for playback (e.g., 'plughw:0,0' or 'default') */
|
/** ALSA device for playback (only used when backend='alsa'). */
|
||||||
playbackDevice: string;
|
playbackDevice: string;
|
||||||
|
|
||||||
/** Sample rate in Hz */
|
/** Sample rate in Hz */
|
||||||
@ -53,8 +56,13 @@ export interface HardwareConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function loadHardwareConfig(): HardwareConfig {
|
export function loadHardwareConfig(): HardwareConfig {
|
||||||
|
const backend = (process.env.AUDIO_BACKEND || 'esp32').toLowerCase() as
|
||||||
|
| 'esp32'
|
||||||
|
| 'alsa';
|
||||||
|
|
||||||
return {
|
return {
|
||||||
audio: {
|
audio: {
|
||||||
|
backend,
|
||||||
captureDevice: process.env.AUDIO_CAPTURE_DEVICE || 'default',
|
captureDevice: process.env.AUDIO_CAPTURE_DEVICE || 'default',
|
||||||
playbackDevice: process.env.AUDIO_PLAYBACK_DEVICE || 'default',
|
playbackDevice: process.env.AUDIO_PLAYBACK_DEVICE || 'default',
|
||||||
sampleRate: parseInt(process.env.AUDIO_SAMPLE_RATE || '16000', 10),
|
sampleRate: parseInt(process.env.AUDIO_SAMPLE_RATE || '16000', 10),
|
||||||
@ -69,8 +77,15 @@ export function loadHardwareConfig(): HardwareConfig {
|
|||||||
threshold: parseFloat(process.env.WAKEWORD_THRESHOLD || '0.5'),
|
threshold: parseFloat(process.env.WAKEWORD_THRESHOLD || '0.5'),
|
||||||
},
|
},
|
||||||
serial: {
|
serial: {
|
||||||
enabled: (process.env.HARDWARE_SERIAL_ENABLED || 'false').toLowerCase() === 'true',
|
// The ESP32 is now the mic/speaker front-end — serial link is
|
||||||
path: process.env.HARDWARE_SERIAL_PORT || '/dev/ttyUSB0',
|
// enabled by default. Set HARDWARE_SERIAL_ENABLED=false only
|
||||||
|
// when intentionally falling back to the ALSA backend.
|
||||||
|
enabled:
|
||||||
|
(process.env.HARDWARE_SERIAL_ENABLED || (backend === 'esp32' ? 'true' : 'false'))
|
||||||
|
.toLowerCase() === 'true',
|
||||||
|
// Default to /dev/serial0 (the Pi's hardware UART once the
|
||||||
|
// console has been freed via raspi-config).
|
||||||
|
path: process.env.HARDWARE_SERIAL_PORT || '/dev/serial0',
|
||||||
baudRate: parseInt(process.env.HARDWARE_SERIAL_BAUD || '921600', 10),
|
baudRate: parseInt(process.env.HARDWARE_SERIAL_BAUD || '921600', 10),
|
||||||
heartbeatIntervalMs: parseInt(process.env.HARDWARE_HEARTBEAT_MS || '1000', 10),
|
heartbeatIntervalMs: parseInt(process.env.HARDWARE_HEARTBEAT_MS || '1000', 10),
|
||||||
},
|
},
|
||||||
|
|||||||
@ -27,8 +27,17 @@ export interface HardwareServiceEvents {
|
|||||||
log: (message: string) => void;
|
log: (message: string) => void;
|
||||||
frame: (frame: DecodedFrame) => void;
|
frame: (frame: DecodedFrame) => void;
|
||||||
ack: (payload: Buffer) => void;
|
ack: (payload: Buffer) => void;
|
||||||
|
/** Emitted for each AUDIO_UP frame received from the ESP32 (raw S16 mono PCM). */
|
||||||
|
audio_up: (chunk: Buffer) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Max bytes we put in a single AUDIO_DOWN frame. Must stay below
|
||||||
|
* MAX_PAYLOAD_SIZE (1024) and should map to a whole number of
|
||||||
|
* 20 ms @ 16 kHz chunks: 640 bytes = 20 ms, 320 samples.
|
||||||
|
*/
|
||||||
|
const AUDIO_DOWN_CHUNK_BYTES = 640;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* HardwareService — the robot-client's only direct link to the ESP32.
|
* HardwareService — the robot-client's only direct link to the ESP32.
|
||||||
*
|
*
|
||||||
@ -136,6 +145,42 @@ export class HardwareService extends EventEmitter {
|
|||||||
this.writeFrame(MsgType.DISPLAY_CLEAR);
|
this.writeFrame(MsgType.DISPLAY_CLEAR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Send a PCM S16 mono 16 kHz buffer to the ESP32 speaker as one or
|
||||||
|
* more AUDIO_DOWN frames. The buffer is automatically split into
|
||||||
|
* chunks of `AUDIO_DOWN_CHUNK_BYTES` so each frame fits within the
|
||||||
|
* UART protocol's MAX_PAYLOAD_SIZE.
|
||||||
|
*
|
||||||
|
* Back-pressure note: `SerialPort.write` buffers in user-space, so
|
||||||
|
* this method is best-effort. For long TTS playbacks, call
|
||||||
|
* `drainAudioDown()` between chunks or space them with a `setTimeout`
|
||||||
|
* to avoid unbounded growth.
|
||||||
|
*/
|
||||||
|
sendAudioDown(chunk: Buffer): void {
|
||||||
|
if (!this.port?.isOpen) {
|
||||||
|
this.log.warn('Dropping AUDIO_DOWN — serial port not open');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (let offset = 0; offset < chunk.length; offset += AUDIO_DOWN_CHUNK_BYTES) {
|
||||||
|
const slice = chunk.subarray(offset, offset + AUDIO_DOWN_CHUNK_BYTES);
|
||||||
|
this.writeFrame(MsgType.AUDIO_DOWN, slice);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for the kernel-side serial buffer to drain. Useful between
|
||||||
|
* large AUDIO_DOWN bursts to keep latency bounded.
|
||||||
|
*/
|
||||||
|
drainAudioDown(): Promise<void> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
if (!this.port?.isOpen) {
|
||||||
|
resolve();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.port.drain((err) => (err ? reject(err) : resolve()));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Round-trip PING → PONG used for bring-up and latency checks.
|
* Round-trip PING → PONG used for bring-up and latency checks.
|
||||||
* Resolves with the measured RTT in ms.
|
* Resolves with the measured RTT in ms.
|
||||||
@ -187,6 +232,9 @@ export class HardwareService extends EventEmitter {
|
|||||||
case MsgType.ERROR:
|
case MsgType.ERROR:
|
||||||
this.log.error({ payload: frame.payload.toString('utf8') }, 'firmware error');
|
this.log.error({ payload: frame.payload.toString('utf8') }, 'firmware error');
|
||||||
return;
|
return;
|
||||||
|
case MsgType.AUDIO_UP:
|
||||||
|
this.emit('audio_up', frame.payload);
|
||||||
|
return;
|
||||||
default:
|
default:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
import { loadRobotConfig, loadHardwareConfig } from './config/index.js';
|
import { loadRobotConfig, loadHardwareConfig } from './config/index.js';
|
||||||
import { CloudSocket } from './transport/index.js';
|
import { CloudSocket } from './transport/index.js';
|
||||||
import {
|
import {
|
||||||
AudioService,
|
createAudioService,
|
||||||
WakeWordService,
|
WakeWordService,
|
||||||
KeyboardTriggerService,
|
KeyboardTriggerService,
|
||||||
HealthService,
|
HealthService,
|
||||||
@ -72,15 +72,16 @@ async function main(): Promise<void> {
|
|||||||
const resolvedConfig = { ...robotConfig, deviceId, deviceToken };
|
const resolvedConfig = { ...robotConfig, deviceId, deviceToken };
|
||||||
|
|
||||||
const cloudSocket = new CloudSocket(resolvedConfig as Required<typeof resolvedConfig>);
|
const cloudSocket = new CloudSocket(resolvedConfig as Required<typeof resolvedConfig>);
|
||||||
const audioService = new AudioService(hardwareConfig.audio);
|
|
||||||
const healthService = new HealthService(cloudSocket);
|
const healthService = new HealthService(cloudSocket);
|
||||||
|
|
||||||
// ── Optional: hardware bridge (ESP32 firmware) ──
|
// ── Hardware bridge (ESP32 firmware) ──
|
||||||
// The serial link is opt-in via HARDWARE_SERIAL_ENABLED=true. We
|
// With AUDIO_BACKEND=esp32 the ESP32 owns the mic AND the speaker,
|
||||||
// treat failures here as non-fatal: even without a face, the
|
// so the serial link is mandatory. With AUDIO_BACKEND=alsa we can
|
||||||
// robot can still converse with the cloud.
|
// still run without it (face will be missing, but audio works).
|
||||||
|
|
||||||
|
const audioBackend = hardwareConfig.audio.backend;
|
||||||
let hardwareService: HardwareService | null = null;
|
let hardwareService: HardwareService | null = null;
|
||||||
|
|
||||||
if (hardwareConfig.serial.enabled) {
|
if (hardwareConfig.serial.enabled) {
|
||||||
hardwareService = new HardwareService({
|
hardwareService = new HardwareService({
|
||||||
path: hardwareConfig.serial.path,
|
path: hardwareConfig.serial.path,
|
||||||
@ -93,19 +94,40 @@ async function main(): Promise<void> {
|
|||||||
hardwareService.sendEmotion(Emotion.HAPPY);
|
hardwareService.sendEmotion(Emotion.HAPPY);
|
||||||
logger.info('Hardware bridge connected');
|
logger.info('Hardware bridge connected');
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
if (audioBackend === 'esp32') {
|
||||||
|
logger.fatal(
|
||||||
|
{ err, path: hardwareConfig.serial.path },
|
||||||
|
'Hardware bridge required for AUDIO_BACKEND=esp32 — check the UART wiring or set AUDIO_BACKEND=alsa',
|
||||||
|
);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
logger.warn({ err }, 'Hardware bridge unavailable — continuing without face');
|
logger.warn({ err }, 'Hardware bridge unavailable — continuing without face');
|
||||||
hardwareService = null;
|
hardwareService = null;
|
||||||
}
|
}
|
||||||
|
} else if (audioBackend === 'esp32') {
|
||||||
|
logger.fatal(
|
||||||
|
'AUDIO_BACKEND=esp32 requires HARDWARE_SERIAL_ENABLED=true. Either enable the serial link or switch to AUDIO_BACKEND=alsa.',
|
||||||
|
);
|
||||||
|
process.exit(1);
|
||||||
} else {
|
} else {
|
||||||
logger.info('Hardware bridge disabled (set HARDWARE_SERIAL_ENABLED=true to enable)');
|
logger.info('Hardware bridge disabled (set HARDWARE_SERIAL_ENABLED=true to enable)');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Audio service — pick a backend now that we know whether the
|
||||||
|
// hardware bridge is alive.
|
||||||
|
const audioService = createAudioService(hardwareConfig.audio, hardwareService);
|
||||||
|
logger.info({ backend: audioBackend }, 'Audio service initialised');
|
||||||
|
|
||||||
// Choose trigger based on TRIGGER_MODE
|
// Choose trigger based on TRIGGER_MODE
|
||||||
let trigger: ITriggerService;
|
let trigger: ITriggerService;
|
||||||
|
|
||||||
if (resolvedConfig.triggerMode === 'wakeword') {
|
if (resolvedConfig.triggerMode === 'wakeword') {
|
||||||
logger.info('Trigger: wake word (OpenWakeWord)');
|
logger.info('Trigger: wake word (OpenWakeWord)');
|
||||||
trigger = new WakeWordService(hardwareConfig.wakeWord, hardwareConfig.audio);
|
trigger = new WakeWordService(
|
||||||
|
hardwareConfig.wakeWord,
|
||||||
|
hardwareConfig.audio,
|
||||||
|
audioBackend === 'esp32' ? hardwareService : null,
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
logger.info('Trigger: keyboard (press Enter to talk)');
|
logger.info('Trigger: keyboard (press Enter to talk)');
|
||||||
trigger = new KeyboardTriggerService();
|
trigger = new KeyboardTriggerService();
|
||||||
|
|||||||
@ -1,30 +1,48 @@
|
|||||||
import { ChildProcess, spawn } from 'node:child_process';
|
import { ChildProcess, spawn } from 'node:child_process';
|
||||||
import { EventEmitter } from 'node:events';
|
import { EventEmitter } from 'node:events';
|
||||||
import { type AudioConfig } from '../config/index.js';
|
import { type AudioConfig } from '../config/index.js';
|
||||||
|
import { type HardwareService } from '../hardware/index.js';
|
||||||
import { createLogger, type Logger } from '../utils/index.js';
|
import { createLogger, type Logger } from '../utils/index.js';
|
||||||
|
|
||||||
export interface AudioServiceEvents {
|
export interface AudioServiceEvents {
|
||||||
/** Emitted when a raw PCM audio chunk is captured from the microphone */
|
/** Emitted when a raw PCM audio chunk is captured from the microphone. */
|
||||||
audio_chunk: (chunk: Buffer) => void;
|
audio_chunk: (chunk: Buffer) => void;
|
||||||
/** Emitted when playback of a response finishes */
|
/** Emitted when playback of a response finishes. */
|
||||||
playback_done: () => void;
|
playback_done: () => void;
|
||||||
/** Emitted on audio errors */
|
/** Emitted on audio errors. */
|
||||||
error: (error: Error) => void;
|
error: (error: Error) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Audio service for Raspberry Pi.
|
* Common audio interface used by the orchestrator, wake word service,
|
||||||
|
* and test scripts. Two backends implement it:
|
||||||
*
|
*
|
||||||
* Uses ALSA tools (arecord/aplay) via child processes.
|
* - `AlsaAudioService` — arecord/aplay child processes, for dev on a
|
||||||
* Works with any ALSA-compatible audio device:
|
* machine with a USB mic or when the Pi owns the I2S mic/speaker
|
||||||
* - I2S (INMP441 mic, MAX98357 amp) connected directly to Pi GPIO
|
* directly. Selected with `AUDIO_BACKEND=alsa`.
|
||||||
* - USB audio devices
|
|
||||||
* - Default system audio
|
|
||||||
*
|
*
|
||||||
* Audio format: PCM signed 16-bit little-endian, mono, 16kHz
|
* - `Esp32AudioService` — mic and speaker live on the ESP32; audio
|
||||||
|
* flows over UART via `HardwareService`. Selected with
|
||||||
|
* `AUDIO_BACKEND=esp32` (the default in production).
|
||||||
*/
|
*/
|
||||||
export class AudioService extends EventEmitter {
|
export abstract class AudioService extends EventEmitter {
|
||||||
|
abstract get isCapturing(): boolean;
|
||||||
|
abstract get isPlaying(): boolean;
|
||||||
|
abstract startCapture(): void;
|
||||||
|
abstract stopCapture(): void;
|
||||||
|
abstract play(audioBuffer: Buffer): Promise<void>;
|
||||||
|
abstract stopPlayback(): void;
|
||||||
|
abstract destroy(): Promise<void>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
// ALSA backend — kept for dev on laptops and for Pi setups where
|
||||||
|
// the mic/speaker hang off ALSA directly (USB sound card, HAT…).
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export class AlsaAudioService extends AudioService {
|
||||||
private captureProcess: ChildProcess | null = null;
|
private captureProcess: ChildProcess | null = null;
|
||||||
|
private playProcess: ChildProcess | null = null;
|
||||||
private readonly logger: Logger;
|
private readonly logger: Logger;
|
||||||
private _isCapturing = false;
|
private _isCapturing = false;
|
||||||
private _isPlaying = false;
|
private _isPlaying = false;
|
||||||
@ -32,7 +50,7 @@ export class AudioService extends EventEmitter {
|
|||||||
|
|
||||||
constructor(private readonly config: AudioConfig) {
|
constructor(private readonly config: AudioConfig) {
|
||||||
super();
|
super();
|
||||||
this.logger = createLogger('audio', 'info');
|
this.logger = createLogger('audio:alsa', 'info');
|
||||||
}
|
}
|
||||||
|
|
||||||
get isCapturing(): boolean {
|
get isCapturing(): boolean {
|
||||||
@ -43,10 +61,6 @@ export class AudioService extends EventEmitter {
|
|||||||
return this._isPlaying;
|
return this._isPlaying;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Start capturing audio from the microphone.
|
|
||||||
* Emits 'audio_chunk' events with raw PCM buffers.
|
|
||||||
*/
|
|
||||||
startCapture(): void {
|
startCapture(): void {
|
||||||
if (this._isCapturing) {
|
if (this._isCapturing) {
|
||||||
this.logger.warn('Already capturing audio');
|
this.logger.warn('Already capturing audio');
|
||||||
@ -58,13 +72,6 @@ export class AudioService extends EventEmitter {
|
|||||||
'Starting audio capture',
|
'Starting audio capture',
|
||||||
);
|
);
|
||||||
|
|
||||||
// arecord outputs raw PCM to stdout
|
|
||||||
// -D: ALSA device
|
|
||||||
// -f: format (S16_LE = signed 16-bit little-endian)
|
|
||||||
// -r: sample rate
|
|
||||||
// -c: channels
|
|
||||||
// -t: type (raw = no header)
|
|
||||||
// --buffer-size: in frames, controls latency
|
|
||||||
const bufferFrames = Math.floor(this.config.sampleRate * (this.config.chunkDurationMs / 1000));
|
const bufferFrames = Math.floor(this.config.sampleRate * (this.config.chunkDurationMs / 1000));
|
||||||
|
|
||||||
this.captureProcess = spawn('arecord', [
|
this.captureProcess = spawn('arecord', [
|
||||||
@ -112,9 +119,6 @@ export class AudioService extends EventEmitter {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Stop capturing audio from the microphone.
|
|
||||||
*/
|
|
||||||
stopCapture(): void {
|
stopCapture(): void {
|
||||||
if (!this.captureProcess) return;
|
if (!this.captureProcess) return;
|
||||||
|
|
||||||
@ -125,12 +129,6 @@ export class AudioService extends EventEmitter {
|
|||||||
this._isCapturing = false;
|
this._isCapturing = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Play audio through the speaker.
|
|
||||||
* Accepts either raw PCM or WAV (with RIFF header) data.
|
|
||||||
*
|
|
||||||
* @returns Promise that resolves when playback is complete
|
|
||||||
*/
|
|
||||||
async play(audioBuffer: Buffer): Promise<void> {
|
async play(audioBuffer: Buffer): Promise<void> {
|
||||||
if (this._isPlaying) {
|
if (this._isPlaying) {
|
||||||
this.logger.warn('Already playing audio, queueing...');
|
this.logger.warn('Already playing audio, queueing...');
|
||||||
@ -152,24 +150,26 @@ export class AudioService extends EventEmitter {
|
|||||||
'-',
|
'-',
|
||||||
];
|
];
|
||||||
|
|
||||||
const playProcess = spawn('aplay', args, {
|
this.playProcess = spawn('aplay', args, {
|
||||||
stdio: ['pipe', 'ignore', 'pipe'],
|
stdio: ['pipe', 'ignore', 'pipe'],
|
||||||
});
|
});
|
||||||
|
|
||||||
playProcess.stderr?.on('data', (data: Buffer) => {
|
this.playProcess.stderr?.on('data', (data: Buffer) => {
|
||||||
const msg = data.toString().trim();
|
const msg = data.toString().trim();
|
||||||
if (msg && !msg.startsWith('Playing') && !msg.startsWith('Warning')) {
|
if (msg && !msg.startsWith('Playing') && !msg.startsWith('Warning')) {
|
||||||
this.logger.error({ msg }, 'aplay stderr');
|
this.logger.error({ msg }, 'aplay stderr');
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
playProcess.on('error', (err) => {
|
this.playProcess.on('error', (err) => {
|
||||||
this._isPlaying = false;
|
this._isPlaying = false;
|
||||||
|
this.playProcess = null;
|
||||||
reject(new Error(`Audio playback failed: ${err.message}`));
|
reject(new Error(`Audio playback failed: ${err.message}`));
|
||||||
});
|
});
|
||||||
|
|
||||||
playProcess.on('exit', (code) => {
|
this.playProcess.on('exit', (code) => {
|
||||||
this._isPlaying = false;
|
this._isPlaying = false;
|
||||||
|
this.playProcess = null;
|
||||||
if (code === 0 || code === null) {
|
if (code === 0 || code === null) {
|
||||||
this.emit('playback_done');
|
this.emit('playback_done');
|
||||||
resolve();
|
resolve();
|
||||||
@ -178,26 +178,194 @@ export class AudioService extends EventEmitter {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Write audio data to aplay's stdin and close it
|
this.playProcess.stdin?.write(audioBuffer);
|
||||||
playProcess.stdin?.write(audioBuffer);
|
this.playProcess.stdin?.end();
|
||||||
playProcess.stdin?.end();
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Stop any currently playing audio.
|
|
||||||
*/
|
|
||||||
stopPlayback(): void {
|
stopPlayback(): void {
|
||||||
// aplay is spawned per-play, so we can't easily stop it here
|
if (this.playProcess) {
|
||||||
// For interrupt support, we'd track the play process
|
this.playProcess.kill('SIGTERM');
|
||||||
|
this.playProcess = null;
|
||||||
|
}
|
||||||
this._isPlaying = false;
|
this._isPlaying = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Clean up resources.
|
|
||||||
*/
|
|
||||||
async destroy(): Promise<void> {
|
async destroy(): Promise<void> {
|
||||||
this.stopCapture();
|
this.stopCapture();
|
||||||
|
this.stopPlayback();
|
||||||
this.removeAllListeners();
|
this.removeAllListeners();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
// ESP32 backend — the mic and speaker live on the firmware side and
|
||||||
|
// audio flows over the UART link owned by HardwareService.
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bytes-per-chunk written to the ESP32 per AUDIO_DOWN frame. Must
|
||||||
|
* match `AUDIO_DOWN_CHUNK_BYTES` in HardwareService. 640 bytes =
|
||||||
|
* 20 ms of 16 kHz S16 mono audio.
|
||||||
|
*/
|
||||||
|
const ESP32_CHUNK_BYTES = 640;
|
||||||
|
|
||||||
|
/** Milliseconds we wait between two AUDIO_DOWN frames during playback. */
|
||||||
|
const ESP32_PACING_MS = 18;
|
||||||
|
|
||||||
|
export class Esp32AudioService extends AudioService {
|
||||||
|
private readonly logger: Logger;
|
||||||
|
private _isCapturing = false;
|
||||||
|
private _isPlaying = false;
|
||||||
|
private _playbackAbort = false;
|
||||||
|
|
||||||
|
/** Latched listener so we can detach on `stopCapture()`. */
|
||||||
|
private readonly forwardAudioUp = (chunk: Buffer): void => {
|
||||||
|
if (!this._isCapturing) return;
|
||||||
|
this.emit('audio_chunk', chunk);
|
||||||
|
};
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
_config: AudioConfig,
|
||||||
|
private readonly hardware: HardwareService,
|
||||||
|
) {
|
||||||
|
super();
|
||||||
|
void _config;
|
||||||
|
this.logger = createLogger('audio:esp32', 'info');
|
||||||
|
}
|
||||||
|
|
||||||
|
get isCapturing(): boolean {
|
||||||
|
return this._isCapturing;
|
||||||
|
}
|
||||||
|
|
||||||
|
get isPlaying(): boolean {
|
||||||
|
return this._isPlaying;
|
||||||
|
}
|
||||||
|
|
||||||
|
startCapture(): void {
|
||||||
|
if (this._isCapturing) {
|
||||||
|
this.logger.warn('Already capturing audio');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.logger.info('Subscribing to ESP32 AUDIO_UP stream');
|
||||||
|
this._isCapturing = true;
|
||||||
|
// Attach exactly once per capture session — removed in stopCapture.
|
||||||
|
this.hardware.on('audio_up', this.forwardAudioUp);
|
||||||
|
}
|
||||||
|
|
||||||
|
stopCapture(): void {
|
||||||
|
if (!this._isCapturing) return;
|
||||||
|
this.logger.info('Unsubscribing from ESP32 AUDIO_UP stream');
|
||||||
|
this._isCapturing = false;
|
||||||
|
this.hardware.off('audio_up', this.forwardAudioUp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Play a PCM S16 mono 16 kHz buffer on the ESP32 speaker. If `buf`
|
||||||
|
* carries a WAV header, strip it first (the firmware expects raw PCM).
|
||||||
|
*
|
||||||
|
* We pace the writes manually so the Node serial buffer and the
|
||||||
|
* ESP32 speaker DMA stay roughly in sync. Without pacing, the whole
|
||||||
|
* buffer would be pushed into the kernel at once and the robot would
|
||||||
|
* still be "speaking" long after the orchestrator thinks it's done.
|
||||||
|
*/
|
||||||
|
async play(audioBuffer: Buffer): Promise<void> {
|
||||||
|
if (this._isPlaying) {
|
||||||
|
this.logger.warn('Already playing audio — ignoring new buffer');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pcm = stripWavHeader(audioBuffer);
|
||||||
|
if (pcm.length === 0) {
|
||||||
|
this.emit('playback_done');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this._isPlaying = true;
|
||||||
|
this._playbackAbort = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
for (let offset = 0; offset < pcm.length; offset += ESP32_CHUNK_BYTES) {
|
||||||
|
if (this._playbackAbort) break;
|
||||||
|
const slice = pcm.subarray(offset, offset + ESP32_CHUNK_BYTES);
|
||||||
|
this.hardware.sendAudioDown(slice);
|
||||||
|
if (ESP32_PACING_MS > 0) {
|
||||||
|
await delay(ESP32_PACING_MS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Let the kernel TX buffer drain so we don't race on destroy.
|
||||||
|
try {
|
||||||
|
await this.hardware.drainAudioDown();
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.warn({ err }, 'drain after playback failed');
|
||||||
|
}
|
||||||
|
this.emit('playback_done');
|
||||||
|
} finally {
|
||||||
|
this._isPlaying = false;
|
||||||
|
this._playbackAbort = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stopPlayback(): void {
|
||||||
|
if (!this._isPlaying) return;
|
||||||
|
this.logger.info('Aborting playback');
|
||||||
|
this._playbackAbort = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
async destroy(): Promise<void> {
|
||||||
|
this.stopCapture();
|
||||||
|
this.stopPlayback();
|
||||||
|
this.removeAllListeners();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
// Helpers
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function delay(ms: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Strip the 44-byte RIFF/WAVE header if present. The ESP32 I2S driver
|
||||||
|
* wants raw S16 mono PCM, nothing else.
|
||||||
|
*/
|
||||||
|
function stripWavHeader(buf: Buffer): Buffer {
|
||||||
|
if (buf.length > 44 && buf.toString('ascii', 0, 4) === 'RIFF' && buf.toString('ascii', 8, 12) === 'WAVE') {
|
||||||
|
return buf.subarray(44);
|
||||||
|
}
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
// Factory
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export type AudioBackend = 'alsa' | 'esp32';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create the right AudioService for the current backend. The default
|
||||||
|
* is `esp32`; set `AUDIO_BACKEND=alsa` to fall back to the legacy
|
||||||
|
* arecord/aplay path (useful for laptop dev without an ESP32 wired in).
|
||||||
|
*/
|
||||||
|
export function createAudioService(
|
||||||
|
config: AudioConfig,
|
||||||
|
hardware: HardwareService | null,
|
||||||
|
): AudioService {
|
||||||
|
const backend = (config.backend ?? 'esp32') as AudioBackend;
|
||||||
|
if (backend === 'alsa') {
|
||||||
|
return new AlsaAudioService(config);
|
||||||
|
}
|
||||||
|
if (backend === 'esp32') {
|
||||||
|
if (!hardware) {
|
||||||
|
throw new Error(
|
||||||
|
'AUDIO_BACKEND=esp32 requires a connected HardwareService — ' +
|
||||||
|
'set HARDWARE_SERIAL_ENABLED=true and make sure the ESP32 is reachable, ' +
|
||||||
|
'or switch to AUDIO_BACKEND=alsa for local development.',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return new Esp32AudioService(config, hardware);
|
||||||
|
}
|
||||||
|
throw new Error(`Unknown AUDIO_BACKEND: ${backend}`);
|
||||||
|
}
|
||||||
|
|||||||
@ -1,4 +1,10 @@
|
|||||||
export { AudioService } from './audio.service.js';
|
export {
|
||||||
|
AudioService,
|
||||||
|
AlsaAudioService,
|
||||||
|
Esp32AudioService,
|
||||||
|
createAudioService,
|
||||||
|
type AudioBackend,
|
||||||
|
} from './audio.service.js';
|
||||||
export { WakeWordService } from './wake-word.service.js';
|
export { WakeWordService } from './wake-word.service.js';
|
||||||
export { KeyboardTriggerService } from './keyboard-trigger.service.js';
|
export { KeyboardTriggerService } from './keyboard-trigger.service.js';
|
||||||
export { HealthService } from './health.service.js';
|
export { HealthService } from './health.service.js';
|
||||||
|
|||||||
@ -1,24 +1,35 @@
|
|||||||
import { ChildProcess, spawn } from 'node:child_process';
|
import { ChildProcess, spawn } from 'node:child_process';
|
||||||
import { EventEmitter } from 'node:events';
|
import { EventEmitter } from 'node:events';
|
||||||
import { type WakeWordConfig, type AudioConfig } from '../config/index.js';
|
import { type WakeWordConfig, type AudioConfig } from '../config/index.js';
|
||||||
|
import { type HardwareService } from '../hardware/index.js';
|
||||||
import { createLogger, type Logger } from '../utils/index.js';
|
import { createLogger, type Logger } from '../utils/index.js';
|
||||||
|
|
||||||
export interface WakeWordServiceEvents {
|
export interface WakeWordServiceEvents {
|
||||||
/** Emitted when the wake word is detected */
|
|
||||||
detected: () => void;
|
detected: () => void;
|
||||||
/** Emitted when the engine is ready */
|
|
||||||
ready: () => void;
|
ready: () => void;
|
||||||
/** Emitted on errors */
|
|
||||||
error: (error: Error) => void;
|
error: (error: Error) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wake word detection service.
|
* Wake word detection service.
|
||||||
*
|
*
|
||||||
* Runs OpenWakeWord as a **long-lived** Python subprocess.
|
* Two operating modes, selected by whether a HardwareService is passed
|
||||||
* The model is loaded once at startup; pause/resume is handled via
|
* to the constructor:
|
||||||
* PAUSE/RESUME commands on stdin, so the audio device is released
|
*
|
||||||
* while arecord is capturing, then reclaimed when listening resumes.
|
* 1. **ALSA mode** (no HardwareService)
|
||||||
|
* The Python subprocess opens PyAudio on `audioConfig.captureDevice`
|
||||||
|
* and reads the mic directly. Pause releases the ALSA device so
|
||||||
|
* arecord (the AlsaAudioService) can use it during conversation.
|
||||||
|
*
|
||||||
|
* 2. **ESP32 mode** (HardwareService provided)
|
||||||
|
* The Python subprocess reads raw S16 mono PCM from stdin. We
|
||||||
|
* subscribe to `hardware.on('audio_up')` and pipe every mic chunk
|
||||||
|
* coming off the UART straight into the Python process. Control
|
||||||
|
* commands (PAUSE/RESUME/RESET/QUIT) go over a separate pipe at
|
||||||
|
* fd 3 because stdin is busy carrying audio.
|
||||||
|
*
|
||||||
|
* The model is loaded once at startup; pause/resume is cheap and
|
||||||
|
* does not reload it.
|
||||||
*/
|
*/
|
||||||
export class WakeWordService extends EventEmitter {
|
export class WakeWordService extends EventEmitter {
|
||||||
private process: ChildProcess | null = null;
|
private process: ChildProcess | null = null;
|
||||||
@ -26,51 +37,73 @@ export class WakeWordService extends EventEmitter {
|
|||||||
private _isListening = false;
|
private _isListening = false;
|
||||||
private _isPaused = false;
|
private _isPaused = false;
|
||||||
private _streamClosed = false;
|
private _streamClosed = false;
|
||||||
|
private readonly usesHardware: boolean;
|
||||||
|
|
||||||
|
/** Latched forwarder so we can detach it on stop / error. */
|
||||||
|
private readonly forwardMicChunk = (chunk: Buffer): void => {
|
||||||
|
if (!this.process || !this.process.stdin || this.process.stdin.destroyed) return;
|
||||||
|
// Node gracefully buffers writes if the pipe is full; we don't
|
||||||
|
// apply back-pressure here because dropping wake-word audio would
|
||||||
|
// just hurt detection accuracy for a few tens of ms.
|
||||||
|
this.process.stdin.write(chunk);
|
||||||
|
};
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private readonly wakeWordConfig: WakeWordConfig,
|
private readonly wakeWordConfig: WakeWordConfig,
|
||||||
private readonly audioConfig: AudioConfig,
|
private readonly audioConfig: AudioConfig,
|
||||||
|
private readonly hardware: HardwareService | null = null,
|
||||||
) {
|
) {
|
||||||
super();
|
super();
|
||||||
this.logger = createLogger('wake-word', 'info');
|
this.logger = createLogger('wake-word', 'info');
|
||||||
|
this.usesHardware = hardware !== null;
|
||||||
}
|
}
|
||||||
|
|
||||||
get isListening(): boolean {
|
get isListening(): boolean {
|
||||||
return this._isListening && !this._isPaused;
|
return this._isListening && !this._isPaused;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Start the wake word Python subprocess.
|
|
||||||
* The model is loaded once; subsequent pause/resume cycles are fast.
|
|
||||||
*/
|
|
||||||
start(): void {
|
start(): void {
|
||||||
if (this.process) {
|
if (this.process) {
|
||||||
// Process already running — just resume if paused
|
if (this._isPaused) this.resume();
|
||||||
if (this._isPaused) {
|
|
||||||
this.resume();
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.logger.info(
|
this.logger.info(
|
||||||
{ model: this.wakeWordConfig.modelName, threshold: this.wakeWordConfig.threshold },
|
{
|
||||||
|
mode: this.usesHardware ? 'esp32' : 'alsa',
|
||||||
|
model: this.wakeWordConfig.modelName,
|
||||||
|
threshold: this.wakeWordConfig.threshold,
|
||||||
|
},
|
||||||
'Starting wake word detection',
|
'Starting wake word detection',
|
||||||
);
|
);
|
||||||
|
|
||||||
this.process = spawn(this.wakeWordConfig.pythonPath, [
|
const args = [
|
||||||
this.wakeWordConfig.scriptPath,
|
this.wakeWordConfig.scriptPath,
|
||||||
'--model', this.wakeWordConfig.modelName,
|
'--model', this.wakeWordConfig.modelName,
|
||||||
'--threshold', String(this.wakeWordConfig.threshold),
|
'--threshold', String(this.wakeWordConfig.threshold),
|
||||||
'--device', this.audioConfig.captureDevice,
|
|
||||||
'--sample-rate', String(this.audioConfig.sampleRate),
|
'--sample-rate', String(this.audioConfig.sampleRate),
|
||||||
], {
|
];
|
||||||
stdio: ['pipe', 'pipe', 'pipe'],
|
|
||||||
});
|
if (this.usesHardware) {
|
||||||
|
args.push('--input', 'stdin', '--control-fd', '3');
|
||||||
|
} else {
|
||||||
|
args.push('--input', 'alsa', '--device', this.audioConfig.captureDevice);
|
||||||
|
}
|
||||||
|
|
||||||
|
// stdio layout:
|
||||||
|
// 0: stdin — audio in (ESP32 mode) or control (ALSA mode)
|
||||||
|
// 1: stdout — DETECTED events
|
||||||
|
// 2: stderr — status & log lines
|
||||||
|
// 3: extra — control pipe (ESP32 mode only)
|
||||||
|
const stdio: ('pipe' | 'ignore')[] = this.usesHardware
|
||||||
|
? ['pipe', 'pipe', 'pipe', 'pipe']
|
||||||
|
: ['pipe', 'pipe', 'pipe'];
|
||||||
|
|
||||||
|
this.process = spawn(this.wakeWordConfig.pythonPath, args, { stdio });
|
||||||
|
|
||||||
this._isListening = true;
|
this._isListening = true;
|
||||||
this._isPaused = false;
|
this._isPaused = false;
|
||||||
|
|
||||||
// ── stdout: DETECTED events ──
|
|
||||||
this.process.stdout?.on('data', (data: Buffer) => {
|
this.process.stdout?.on('data', (data: Buffer) => {
|
||||||
const lines = data.toString().trim().split('\n');
|
const lines = data.toString().trim().split('\n');
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
@ -83,7 +116,6 @@ export class WakeWordService extends EventEmitter {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// ── stderr: status messages ──
|
|
||||||
this.process.stderr?.on('data', (data: Buffer) => {
|
this.process.stderr?.on('data', (data: Buffer) => {
|
||||||
const lines = data.toString().trim().split('\n');
|
const lines = data.toString().trim().split('\n');
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
@ -107,10 +139,9 @@ export class WakeWordService extends EventEmitter {
|
|||||||
this.logger.info('⏳ Loading wake word model...');
|
this.logger.info('⏳ Loading wake word model...');
|
||||||
} else if (msg.startsWith('Wake word model loaded')) {
|
} else if (msg.startsWith('Wake word model loaded')) {
|
||||||
this.logger.info('✅ Wake word model loaded');
|
this.logger.info('✅ Wake word model loaded');
|
||||||
} else if (msg.startsWith('Matched device') || msg.startsWith('Using device')) {
|
} else if (msg.startsWith('Matched device') || msg.startsWith('Using device') || msg.startsWith('Listening')) {
|
||||||
this.logger.info(`🔊 ${msg}`);
|
this.logger.info(`🔊 ${msg}`);
|
||||||
} else {
|
} else {
|
||||||
// Log unknown stderr messages at warn level to catch errors
|
|
||||||
this.logger.warn({ msg }, 'Wake word stderr');
|
this.logger.warn({ msg }, 'Wake word stderr');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -119,29 +150,36 @@ export class WakeWordService extends EventEmitter {
|
|||||||
this.process.on('error', (err) => {
|
this.process.on('error', (err) => {
|
||||||
this._isListening = false;
|
this._isListening = false;
|
||||||
this.logger.error({ err }, 'Wake word process error');
|
this.logger.error({ err }, 'Wake word process error');
|
||||||
|
this.detachHardware();
|
||||||
this.emit('error', new Error(`Wake word process failed: ${err.message}`));
|
this.emit('error', new Error(`Wake word process failed: ${err.message}`));
|
||||||
});
|
});
|
||||||
|
|
||||||
this.process.on('exit', (code) => {
|
this.process.on('exit', (code) => {
|
||||||
this._isListening = false;
|
this._isListening = false;
|
||||||
this._isPaused = false;
|
this._isPaused = false;
|
||||||
|
this.detachHardware();
|
||||||
this.process = null;
|
this.process = null;
|
||||||
if (code !== 0 && code !== null) {
|
if (code !== 0 && code !== null) {
|
||||||
this.logger.warn({ code }, 'Wake word process exited unexpectedly');
|
this.logger.warn({ code }, 'Wake word process exited unexpectedly');
|
||||||
// Auto-restart after a short delay
|
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
this.logger.info('Restarting wake word detection...');
|
this.logger.info('Restarting wake word detection...');
|
||||||
this.start();
|
this.start();
|
||||||
}, 2000);
|
}, 2000);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// In ESP32 mode, start piping mic audio from the UART.
|
||||||
|
if (this.usesHardware && this.hardware) {
|
||||||
|
this.hardware.on('audio_up', this.forwardMicChunk);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pause wake word detection.
|
* Pause wake word detection.
|
||||||
* Sends PAUSE command to Python subprocess which closes the audio stream,
|
*
|
||||||
* freeing the device for arecord. Returns a promise that resolves when
|
* In ALSA mode we must wait for STREAM_CLOSED so arecord can reclaim
|
||||||
* the audio stream is confirmed closed.
|
* the device. In ESP32 mode the audio flow never stops — we just
|
||||||
|
* tell the Python process to ignore detections.
|
||||||
*/
|
*/
|
||||||
pause(): Promise<void> {
|
pause(): Promise<void> {
|
||||||
if (!this.process || this._isPaused) return Promise.resolve();
|
if (!this.process || this._isPaused) return Promise.resolve();
|
||||||
@ -149,9 +187,13 @@ export class WakeWordService extends EventEmitter {
|
|||||||
this._isPaused = true;
|
this._isPaused = true;
|
||||||
this._streamClosed = false;
|
this._streamClosed = false;
|
||||||
|
|
||||||
this.process.stdin?.write('PAUSE\n');
|
this.writeControl('PAUSE');
|
||||||
|
|
||||||
|
if (this.usesHardware) {
|
||||||
|
// No physical device to release — resolve immediately.
|
||||||
|
return Promise.resolve();
|
||||||
|
}
|
||||||
|
|
||||||
// Wait for the stream to be closed (so arecord can use the device)
|
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
const checkInterval = setInterval(() => {
|
const checkInterval = setInterval(() => {
|
||||||
if (this._streamClosed || !this.process) {
|
if (this._streamClosed || !this.process) {
|
||||||
@ -160,7 +202,6 @@ export class WakeWordService extends EventEmitter {
|
|||||||
}
|
}
|
||||||
}, 50);
|
}, 50);
|
||||||
|
|
||||||
// Safety timeout
|
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
clearInterval(checkInterval);
|
clearInterval(checkInterval);
|
||||||
resolve();
|
resolve();
|
||||||
@ -168,25 +209,18 @@ export class WakeWordService extends EventEmitter {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Resume wake word detection after pause.
|
|
||||||
* The Python subprocess reopens the audio stream (fast, no model reload).
|
|
||||||
*/
|
|
||||||
resume(): void {
|
resume(): void {
|
||||||
if (!this.process || !this._isPaused) return;
|
if (!this.process || !this._isPaused) return;
|
||||||
|
|
||||||
this._isPaused = false;
|
this._isPaused = false;
|
||||||
this.process.stdin?.write('RESUME\n');
|
this.writeControl('RESUME');
|
||||||
this.logger.info('🎤 Resuming wake word listening...');
|
this.logger.info('🎤 Resuming wake word listening...');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Stop wake word detection permanently.
|
|
||||||
*/
|
|
||||||
stop(): void {
|
stop(): void {
|
||||||
if (this.process) {
|
if (this.process) {
|
||||||
this.process.stdin?.write('QUIT\n');
|
this.writeControl('QUIT');
|
||||||
// Give it a moment to exit cleanly, then force kill
|
this.detachHardware();
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
if (this.process) {
|
if (this.process) {
|
||||||
this.process.kill('SIGTERM');
|
this.process.kill('SIGTERM');
|
||||||
@ -198,4 +232,35 @@ export class WakeWordService extends EventEmitter {
|
|||||||
this._isPaused = false;
|
this._isPaused = false;
|
||||||
this.removeAllListeners();
|
this.removeAllListeners();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ──────────────────────────────────────────────────────────
|
||||||
|
// Internals
|
||||||
|
// ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a text control command. In ALSA mode that goes to stdin;
|
||||||
|
* in ESP32 mode stdin carries audio so commands travel over the
|
||||||
|
* extra pipe at fd 3 (process.stdio[3]).
|
||||||
|
*/
|
||||||
|
private writeControl(cmd: string): void {
|
||||||
|
if (!this.process) return;
|
||||||
|
const line = `${cmd}\n`;
|
||||||
|
if (this.usesHardware) {
|
||||||
|
// stdio[3] is our control pipe — a Node Writable (net.Socket) stream.
|
||||||
|
const control = this.process.stdio[3] as unknown as
|
||||||
|
| (NodeJS.WritableStream & { destroyed?: boolean })
|
||||||
|
| null;
|
||||||
|
if (control && !control.destroyed) {
|
||||||
|
control.write(line);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this.process.stdin?.write(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private detachHardware(): void {
|
||||||
|
if (this.usesHardware && this.hardware) {
|
||||||
|
this.hardware.off('audio_up', this.forwardMicChunk);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
7
apps/robot-hardware/lib/Audio/library.json
Normal file
7
apps/robot-hardware/lib/Audio/library.json
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"name": "Audio",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"description": "Ti-Pote audio I/O — INMP441 mic + MAX98357A speaker via two I2S peripherals.",
|
||||||
|
"frameworks": "arduino",
|
||||||
|
"platforms": "espressif32"
|
||||||
|
}
|
||||||
151
apps/robot-hardware/lib/Audio/src/Audio.cpp
Normal file
151
apps/robot-hardware/lib/Audio/src/Audio.cpp
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
#include "Audio.h"
|
||||||
|
#include <driver/i2s.h>
|
||||||
|
|
||||||
|
namespace tipote {
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
// Shared I2S bus pin assignment — see the header for rationale.
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
static constexpr int PIN_BCLK = 32; // shared: mic SCK + speaker BCLK
|
||||||
|
static constexpr int PIN_LRCLK = 33; // shared: mic WS + speaker LRC
|
||||||
|
static constexpr int PIN_MIC_DIN = 34; // INMP441 SD → ESP32 data-in
|
||||||
|
static constexpr int PIN_SPK_DOUT = 22; // MAX98357A DIN ← ESP32 data-out
|
||||||
|
|
||||||
|
// DMA buffers — 4 × 256 × 8 bytes (stereo 32-bit) ≈ 8 KB each for
|
||||||
|
// RX and TX. That's ~64 ms of audio each way at 16 kHz, plenty of
|
||||||
|
// room to absorb UART jitter.
|
||||||
|
static constexpr int DMA_COUNT = 4;
|
||||||
|
static constexpr int DMA_LEN = 256;
|
||||||
|
|
||||||
|
bool Audio::begin() {
|
||||||
|
// ───── Single I2S port, full duplex, 32-bit stereo slots ─────
|
||||||
|
//
|
||||||
|
// The INMP441 requires 32-bit slots; the MAX98357A happily reads
|
||||||
|
// the 32-bit frames we emit. With a shared bus we get one set of
|
||||||
|
// BCLK/WS for both sides — exactly like the Pi setup that worked.
|
||||||
|
i2s_config_t cfg = {};
|
||||||
|
cfg.mode = static_cast<i2s_mode_t>(I2S_MODE_MASTER |
|
||||||
|
I2S_MODE_RX |
|
||||||
|
I2S_MODE_TX);
|
||||||
|
cfg.sample_rate = SAMPLE_RATE;
|
||||||
|
cfg.bits_per_sample = I2S_BITS_PER_SAMPLE_32BIT;
|
||||||
|
cfg.channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT; // stereo frames
|
||||||
|
cfg.communication_format = I2S_COMM_FORMAT_STAND_I2S;
|
||||||
|
cfg.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1;
|
||||||
|
cfg.dma_buf_count = DMA_COUNT;
|
||||||
|
cfg.dma_buf_len = DMA_LEN;
|
||||||
|
cfg.use_apll = false;
|
||||||
|
cfg.tx_desc_auto_clear = true;
|
||||||
|
cfg.fixed_mclk = 0;
|
||||||
|
|
||||||
|
if (i2s_driver_install(I2S_NUM_0, &cfg, 0, nullptr) != ESP_OK) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
i2s_pin_config_t pins = {};
|
||||||
|
pins.bck_io_num = PIN_BCLK;
|
||||||
|
pins.ws_io_num = PIN_LRCLK;
|
||||||
|
pins.data_out_num = PIN_SPK_DOUT;
|
||||||
|
pins.data_in_num = PIN_MIC_DIN;
|
||||||
|
if (i2s_set_pin(I2S_NUM_0, &pins) != ESP_OK) {
|
||||||
|
i2s_driver_uninstall(I2S_NUM_0);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
i2s_zero_dma_buffer(I2S_NUM_0);
|
||||||
|
micStarted_ = true;
|
||||||
|
spkStarted_ = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Audio::readMicChunk(uint8_t* out, size_t outCapacity) {
|
||||||
|
if (!micStarted_ || outCapacity < 2) return 0;
|
||||||
|
|
||||||
|
// Stereo read: each "sample pair" is L + R, each 32-bit = 8 bytes.
|
||||||
|
// Cap at 320 pairs = 20 ms @ 16 kHz mono per call.
|
||||||
|
constexpr size_t MAX_PAIRS = 320;
|
||||||
|
int32_t raw[MAX_PAIRS * 2];
|
||||||
|
|
||||||
|
size_t wantPairs = outCapacity / 2; // 2 bytes out per mono sample
|
||||||
|
if (wantPairs > MAX_PAIRS) wantPairs = MAX_PAIRS;
|
||||||
|
|
||||||
|
size_t bytesRead = 0;
|
||||||
|
const esp_err_t err = i2s_read(
|
||||||
|
I2S_NUM_0,
|
||||||
|
reinterpret_cast<void*>(raw),
|
||||||
|
wantPairs * 2 * sizeof(int32_t),
|
||||||
|
&bytesRead,
|
||||||
|
0 // non-blocking
|
||||||
|
);
|
||||||
|
if (err != ESP_OK || bytesRead == 0) return 0;
|
||||||
|
|
||||||
|
const size_t pairs = bytesRead / (2 * sizeof(int32_t));
|
||||||
|
int16_t* dst = reinterpret_cast<int16_t*>(out);
|
||||||
|
|
||||||
|
int32_t lMin = INT32_MAX, lMax = INT32_MIN;
|
||||||
|
int32_t rMin = INT32_MAX, rMax = INT32_MIN;
|
||||||
|
int16_t s16Min = INT16_MAX, s16Max = INT16_MIN;
|
||||||
|
|
||||||
|
const bool pickRight = (micChannel_ == MicChannel::Right);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < pairs; ++i) {
|
||||||
|
const int32_t L = raw[2 * i];
|
||||||
|
const int32_t R = raw[2 * i + 1];
|
||||||
|
if (L < lMin) lMin = L;
|
||||||
|
if (L > lMax) lMax = L;
|
||||||
|
if (R < rMin) rMin = R;
|
||||||
|
if (R > rMax) rMax = R;
|
||||||
|
|
||||||
|
// INMP441 is 24-bit left-justified in a 32-bit slot, so the
|
||||||
|
// useful range lives in bits 31..8. A >> 14 gives a comfortable
|
||||||
|
// speech level; bump to >> 11 if the result is too quiet.
|
||||||
|
const int32_t src = pickRight ? R : L;
|
||||||
|
int32_t s = src >> 14;
|
||||||
|
if (s > INT16_MAX) s = INT16_MAX;
|
||||||
|
if (s < INT16_MIN) s = INT16_MIN;
|
||||||
|
const int16_t s16 = static_cast<int16_t>(s);
|
||||||
|
if (s16 < s16Min) s16Min = s16;
|
||||||
|
if (s16 > s16Max) s16Max = s16;
|
||||||
|
dst[i] = s16;
|
||||||
|
}
|
||||||
|
|
||||||
|
lastStats_ = {lMin, lMax, rMin, rMax, s16Min, s16Max, pairs};
|
||||||
|
return pairs * 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Audio::writeSpeakerChunk(const uint8_t* data, size_t len) {
|
||||||
|
if (!spkStarted_ || len == 0) return 0;
|
||||||
|
|
||||||
|
// The UART brings us S16 mono PCM. The I2S bus is running as
|
||||||
|
// 32-bit stereo, so we expand each 16-bit sample to a stereo
|
||||||
|
// pair of 32-bit words. 320 input samples → 2560 output bytes.
|
||||||
|
constexpr size_t MAX_IN_SAMPLES = 320;
|
||||||
|
const size_t inSamples = (len / 2 > MAX_IN_SAMPLES) ? MAX_IN_SAMPLES : len / 2;
|
||||||
|
|
||||||
|
int32_t stereo[MAX_IN_SAMPLES * 2];
|
||||||
|
const int16_t* src = reinterpret_cast<const int16_t*>(data);
|
||||||
|
for (size_t i = 0; i < inSamples; ++i) {
|
||||||
|
// Shift up to place the sample in the upper 16 bits of the
|
||||||
|
// 32-bit slot (matches what the MAX98357A expects).
|
||||||
|
const int32_t s32 = static_cast<int32_t>(src[i]) << 16;
|
||||||
|
stereo[2 * i] = s32; // left
|
||||||
|
stereo[2 * i + 1] = s32; // right duplicated
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t bytesWritten = 0;
|
||||||
|
i2s_write(I2S_NUM_0, stereo, inSamples * 2 * sizeof(int32_t),
|
||||||
|
&bytesWritten, pdMS_TO_TICKS(50));
|
||||||
|
|
||||||
|
// Report bytes accepted in *caller units* (S16 mono) so the
|
||||||
|
// outside world doesn't need to know about our internal format.
|
||||||
|
const size_t pairsWritten = bytesWritten / (2 * sizeof(int32_t));
|
||||||
|
return pairsWritten * 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Audio::flushSpeaker() {
|
||||||
|
if (spkStarted_) {
|
||||||
|
i2s_zero_dma_buffer(I2S_NUM_0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace tipote
|
||||||
84
apps/robot-hardware/lib/Audio/src/Audio.h
Normal file
84
apps/robot-hardware/lib/Audio/src/Audio.h
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
// Ti-Pote — Audio I/O via a single full-duplex I2S bus.
|
||||||
|
//
|
||||||
|
// I2S_NUM_0 is configured as MASTER in RX+TX mode. BCLK and WS are
|
||||||
|
// shared between the INMP441 microphone (RX) and the MAX98357A
|
||||||
|
// amplifier (TX), which is the standard I2S bus layout — exactly
|
||||||
|
// what was working on the Raspberry Pi side.
|
||||||
|
//
|
||||||
|
// Pin map (single shared I2S bus):
|
||||||
|
// BCLK = GPIO 32 shared mic SCK + speaker BCLK
|
||||||
|
// LRCLK / WS = GPIO 33 shared mic WS + speaker LRC
|
||||||
|
// Mic data in = GPIO 34 INMP441 SD (input-only pin, perfect)
|
||||||
|
// Speaker DOUT = GPIO 22 MAX98357A DIN
|
||||||
|
//
|
||||||
|
// Mic L/R stays tied to GND → talks on the LEFT slot of the I2S frame.
|
||||||
|
//
|
||||||
|
// Format exchanged with the Pi on the UART:
|
||||||
|
// PCM signed 16-bit little-endian, mono, 16 kHz.
|
||||||
|
//
|
||||||
|
// Internally the bus runs at 32-bit stereo slots (INMP441 requires it).
|
||||||
|
// readMicChunk() converts the 32-bit left slot down to S16 mono.
|
||||||
|
// writeSpeakerChunk() expands S16 mono to 32-bit stereo frames before
|
||||||
|
// handing them to i2s_write().
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Arduino.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
namespace tipote {
|
||||||
|
|
||||||
|
class Audio {
|
||||||
|
public:
|
||||||
|
static constexpr int SAMPLE_RATE = 16000;
|
||||||
|
static constexpr int CHANNELS = 1;
|
||||||
|
static constexpr int BYTES_PER_SAMPLE = 2; // S16
|
||||||
|
|
||||||
|
// Initialise both I2S ports. Safe to call exactly once from setup().
|
||||||
|
bool begin();
|
||||||
|
|
||||||
|
// Pull whatever the mic DMA has ready. Writes S16 mono little-endian
|
||||||
|
// bytes into `out`, up to `outCapacity` bytes, and returns the number
|
||||||
|
// of bytes actually written (always even, possibly zero).
|
||||||
|
//
|
||||||
|
// Non-blocking (timeout = 0).
|
||||||
|
size_t readMicChunk(uint8_t* out, size_t outCapacity);
|
||||||
|
|
||||||
|
// Push S16 mono little-endian PCM to the speaker DMA. Blocks up to
|
||||||
|
// ~50 ms waiting for room. Returns bytes actually accepted.
|
||||||
|
size_t writeSpeakerChunk(const uint8_t* data, size_t len);
|
||||||
|
|
||||||
|
// Drop anything pending in the speaker DMA. Used on shutdown / reset.
|
||||||
|
void flushSpeaker();
|
||||||
|
|
||||||
|
// ─── Debug / bring-up ────────────────────────────────────────
|
||||||
|
//
|
||||||
|
// Stats updated on every readMicChunk() call, covering *this last
|
||||||
|
// batch only*. Handy to confirm the mic is actually clocking data
|
||||||
|
// into the ESP32 without blowing up the main audio path.
|
||||||
|
struct MicStats {
|
||||||
|
int32_t leftRawMin; // raw int32 sample on left I2S slot
|
||||||
|
int32_t leftRawMax;
|
||||||
|
int32_t rightRawMin; // raw int32 sample on right I2S slot
|
||||||
|
int32_t rightRawMax;
|
||||||
|
int16_t s16Min; // post-shift S16 sample (output channel)
|
||||||
|
int16_t s16Max;
|
||||||
|
size_t samples; // sample pairs in the batch
|
||||||
|
};
|
||||||
|
const MicStats& lastMicStats() const { return lastStats_; }
|
||||||
|
|
||||||
|
// Which I2S slot to route into the S16 output. Flip at runtime if
|
||||||
|
// the mic's L/R pin doesn't land where we expect.
|
||||||
|
enum class MicChannel { Left, Right };
|
||||||
|
void setMicChannel(MicChannel ch) { micChannel_ = ch; }
|
||||||
|
MicChannel micChannel() const { return micChannel_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool micStarted_ = false;
|
||||||
|
bool spkStarted_ = false;
|
||||||
|
MicChannel micChannel_ = MicChannel::Left;
|
||||||
|
MicStats lastStats_ = {0, 0, 0, 0, 0, 0, 0};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace tipote
|
||||||
@ -30,6 +30,11 @@ build_flags =
|
|||||||
-DHW_SERIAL_BAUD=921600
|
-DHW_SERIAL_BAUD=921600
|
||||||
; Idle timeout before the eyes fall back to the default animation (ms)
|
; Idle timeout before the eyes fall back to the default animation (ms)
|
||||||
-DHW_HEARTBEAT_TIMEOUT_MS=5000
|
-DHW_HEARTBEAT_TIMEOUT_MS=5000
|
||||||
|
; Hardware UART2 pins used to talk to the Raspberry Pi.
|
||||||
|
; The OLED eyes already claim GPIO 16/17 (UART2 default pins),
|
||||||
|
; so Serial2 is remapped to these two free pins instead.
|
||||||
|
-DHW_UART_RX_PIN=27
|
||||||
|
-DHW_UART_TX_PIN=13
|
||||||
build_unflags =
|
build_unflags =
|
||||||
-std=gnu++11
|
-std=gnu++11
|
||||||
|
|
||||||
|
|||||||
219
apps/robot-hardware/scripts/esp-play.ts
Normal file
219
apps/robot-hardware/scripts/esp-play.ts
Normal file
@ -0,0 +1,219 @@
|
|||||||
|
/**
|
||||||
|
* Ti-Pote — Play a PCM/WAV file on the ESP32 speaker over USB.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* pnpm esp:play <file.wav|file.raw>
|
||||||
|
*
|
||||||
|
* Accepts either:
|
||||||
|
* - raw S16 LE mono 16 kHz PCM
|
||||||
|
* - WAV file with a 44-byte RIFF header (header is stripped)
|
||||||
|
*
|
||||||
|
* Default port: auto-detected, override with ESP_PORT=/dev/cu.usbserial-XXX
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { execFileSync } from 'node:child_process';
|
||||||
|
import { existsSync, mkdtempSync, readFileSync, readdirSync, rmSync } from 'node:fs';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { join, extname } from 'node:path';
|
||||||
|
import { SerialPort } from 'serialport';
|
||||||
|
|
||||||
|
const SAMPLE_RATE = 16000;
|
||||||
|
|
||||||
|
function findDefaultPort(): string {
|
||||||
|
const envPort = process.env.ESP_PORT;
|
||||||
|
if (envPort) return envPort;
|
||||||
|
const candidates = readdirSync('/dev').filter(
|
||||||
|
(f) =>
|
||||||
|
f.startsWith('cu.usbserial') ||
|
||||||
|
f.startsWith('cu.SLAB_') ||
|
||||||
|
f.startsWith('cu.wchusbserial'),
|
||||||
|
);
|
||||||
|
if (candidates.length === 0) {
|
||||||
|
throw new Error(
|
||||||
|
'No ESP32 serial port detected. Plug the board in, or set ESP_PORT=/dev/cu.usbserial-XXX',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return `/dev/${candidates[0]}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function stripWav(buf: Buffer): Buffer {
|
||||||
|
if (
|
||||||
|
buf.length > 44 &&
|
||||||
|
buf.toString('ascii', 0, 4) === 'RIFF' &&
|
||||||
|
buf.toString('ascii', 8, 12) === 'WAVE'
|
||||||
|
) {
|
||||||
|
return buf.subarray(44);
|
||||||
|
}
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert any audio file macOS can decode (m4a, mp3, ogg, aiff, …) to
|
||||||
|
* S16 LE mono 16 kHz WAV using the built-in `afconvert` tool. Returns
|
||||||
|
* the path to a new .wav file in a temp dir which the caller is
|
||||||
|
* responsible for cleaning up.
|
||||||
|
*/
|
||||||
|
function convertToEsp32Wav(inputPath: string): { wavPath: string; cleanup: () => void } {
|
||||||
|
const dir = mkdtempSync(join(tmpdir(), 'tipote-'));
|
||||||
|
const wavPath = join(dir, 'converted.wav');
|
||||||
|
console.log(`→ converting ${inputPath} → 16 kHz mono S16LE WAV`);
|
||||||
|
try {
|
||||||
|
execFileSync(
|
||||||
|
'afconvert',
|
||||||
|
[
|
||||||
|
'-f', 'WAVE',
|
||||||
|
'-d', 'LEI16@16000',
|
||||||
|
'-c', '1',
|
||||||
|
inputPath,
|
||||||
|
wavPath,
|
||||||
|
],
|
||||||
|
{ stdio: 'inherit' },
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
rmSync(dir, { recursive: true, force: true });
|
||||||
|
throw new Error(`afconvert failed: ${(err as Error).message}`);
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
wavPath,
|
||||||
|
cleanup: () => rmSync(dir, { recursive: true, force: true }),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const inPath = process.argv[2];
|
||||||
|
if (!inPath) {
|
||||||
|
console.error('Usage: esp-play.ts <file> (wav, raw, m4a, mp3, …)');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
if (!existsSync(inPath)) {
|
||||||
|
throw new Error(`file not found: ${inPath}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert anything that isn't already a .wav or raw PCM blob. This
|
||||||
|
// covers m4a / mp3 / ogg / aiff / opus / flac via the built-in
|
||||||
|
// macOS `afconvert` tool.
|
||||||
|
const ext = extname(inPath).toLowerCase();
|
||||||
|
const needsConversion = ext !== '.wav' && ext !== '.raw' && ext !== '.pcm';
|
||||||
|
|
||||||
|
let cleanup: () => void = () => {};
|
||||||
|
let loadPath = inPath;
|
||||||
|
if (needsConversion) {
|
||||||
|
const converted = convertToEsp32Wav(inPath);
|
||||||
|
loadPath = converted.wavPath;
|
||||||
|
cleanup = converted.cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
const raw = readFileSync(loadPath);
|
||||||
|
const pcm = stripWav(raw);
|
||||||
|
const samples = pcm.length / 2;
|
||||||
|
const durationMs = (samples / SAMPLE_RATE) * 1000;
|
||||||
|
console.log(
|
||||||
|
`→ loaded ${loadPath}: ${pcm.length} bytes (${samples} samples, ${durationMs.toFixed(0)} ms)`,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (pcm.length === 0) {
|
||||||
|
cleanup();
|
||||||
|
throw new Error('empty PCM buffer');
|
||||||
|
}
|
||||||
|
if (pcm.length % 2 !== 0) {
|
||||||
|
cleanup();
|
||||||
|
throw new Error(
|
||||||
|
'PCM size must be a multiple of 2 (S16 mono). The source file is probably not 16-bit or not mono. If you passed a raw file, convert it first.',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const path = findDefaultPort();
|
||||||
|
console.log(`→ opening ${path} @ 921600 baud`);
|
||||||
|
|
||||||
|
const port = new SerialPort({ path, baudRate: 921600, autoOpen: false });
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
port.open((err) => (err ? reject(err) : resolve()));
|
||||||
|
});
|
||||||
|
|
||||||
|
let ready = false;
|
||||||
|
const readyWaiters: Array<() => void> = [];
|
||||||
|
|
||||||
|
const finished = new Promise<void>((resolve, reject) => {
|
||||||
|
const timeout = setTimeout(
|
||||||
|
() => reject(new Error(`timeout waiting for OK after ${durationMs + 8000} ms`)),
|
||||||
|
durationMs + 8000,
|
||||||
|
);
|
||||||
|
let lineBuf = '';
|
||||||
|
port.on('data', (data: Buffer) => {
|
||||||
|
lineBuf += data.toString('utf8');
|
||||||
|
let idx: number;
|
||||||
|
while ((idx = lineBuf.indexOf('\n')) >= 0) {
|
||||||
|
const line = lineBuf.slice(0, idx).replace(/\r$/, '').trim();
|
||||||
|
lineBuf = lineBuf.slice(idx + 1);
|
||||||
|
if (!line) continue;
|
||||||
|
if (line === 'OK') {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
resolve();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (line === 'READY') {
|
||||||
|
ready = true;
|
||||||
|
while (readyWaiters.length) readyWaiters.shift()!();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (line.startsWith('ERR ')) {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
reject(new Error(`firmware error: ${line.slice(4)}`));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (line.startsWith('LOG ')) console.log(`[esp] ${line.slice(4)}`);
|
||||||
|
else console.log(`[esp] ${line}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
port.on('error', reject);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait for READY so we don't send PLAY into the bootloader.
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
if (ready) return resolve();
|
||||||
|
const timer = setTimeout(
|
||||||
|
() => reject(new Error('timeout waiting for READY from firmware')),
|
||||||
|
5000,
|
||||||
|
);
|
||||||
|
readyWaiters.push(() => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
await new Promise((r) => setTimeout(r, 50));
|
||||||
|
|
||||||
|
console.log(`→ PLAY ${pcm.length} bytes`);
|
||||||
|
port.write(`PLAY ${pcm.length}\n`);
|
||||||
|
|
||||||
|
// Stream the payload paced EXACTLY at the I2S consumption rate so
|
||||||
|
// the ESP32 RX buffer stays roughly constant in size regardless of
|
||||||
|
// file length. I2S consumes 16 kHz × 2 bytes/sample = 32 KB/s of
|
||||||
|
// S16 mono. A 1024-byte burst is 32 ms of audio → sleeping 32 ms
|
||||||
|
// between bursts matches playback exactly.
|
||||||
|
//
|
||||||
|
// We still pad lightly above 32 KB/s (30 ms instead of 32) so the
|
||||||
|
// DMA never runs dry. The excess fills the ~16 KB RX buffer on the
|
||||||
|
// firmware slowly; even for a 10 s file we stay well under it.
|
||||||
|
const CHUNK = 1024;
|
||||||
|
const PAUSE_MS = 30;
|
||||||
|
for (let off = 0; off < pcm.length; off += CHUNK) {
|
||||||
|
const slice = pcm.subarray(off, off + CHUNK);
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
port.write(slice, (err) => (err ? reject(err) : resolve()));
|
||||||
|
});
|
||||||
|
await new Promise<void>((resolve) => port.drain(() => resolve()));
|
||||||
|
if (off + CHUNK < pcm.length) {
|
||||||
|
await new Promise((r) => setTimeout(r, PAUSE_MS));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await finished;
|
||||||
|
await new Promise<void>((resolve) => port.close(() => resolve()));
|
||||||
|
cleanup();
|
||||||
|
console.log('✅ playback done');
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((err) => {
|
||||||
|
console.error(err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
190
apps/robot-hardware/scripts/esp-record.ts
Normal file
190
apps/robot-hardware/scripts/esp-record.ts
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
/**
|
||||||
|
* Ti-Pote — Record audio from the ESP32 over USB.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* pnpm --filter @ti-pote/robot-client exec tsx \
|
||||||
|
* ../robot-hardware/scripts/esp-record.ts <file.wav> [duration_ms]
|
||||||
|
*
|
||||||
|
* Or with the shortcut from robot-hardware:
|
||||||
|
* pnpm esp:record out.wav 3000
|
||||||
|
*
|
||||||
|
* Defaults:
|
||||||
|
* duration_ms = 3000
|
||||||
|
* port = auto-detected (first /dev/cu.usbserial-* or /dev/cu.SLAB_*)
|
||||||
|
* can be overridden with ESP_PORT=/dev/cu.usbserial-XXX
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { readdirSync, writeFileSync } from 'node:fs';
|
||||||
|
import { SerialPort } from 'serialport';
|
||||||
|
|
||||||
|
const SAMPLE_RATE = 16000;
|
||||||
|
const BYTES_PER_SAMPLE = 2;
|
||||||
|
|
||||||
|
function findDefaultPort(): string {
|
||||||
|
const envPort = process.env.ESP_PORT;
|
||||||
|
if (envPort) return envPort;
|
||||||
|
const candidates = readdirSync('/dev').filter(
|
||||||
|
(f) =>
|
||||||
|
f.startsWith('cu.usbserial') ||
|
||||||
|
f.startsWith('cu.SLAB_') ||
|
||||||
|
f.startsWith('cu.wchusbserial'),
|
||||||
|
);
|
||||||
|
if (candidates.length === 0) {
|
||||||
|
throw new Error(
|
||||||
|
'No ESP32 serial port detected. Plug the board in, or set ESP_PORT=/dev/cu.usbserial-XXX',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return `/dev/${candidates[0]}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function wavHeader(pcmBytes: number, sampleRate: number): Buffer {
|
||||||
|
const header = Buffer.alloc(44);
|
||||||
|
header.write('RIFF', 0);
|
||||||
|
header.writeUInt32LE(36 + pcmBytes, 4);
|
||||||
|
header.write('WAVE', 8);
|
||||||
|
header.write('fmt ', 12);
|
||||||
|
header.writeUInt32LE(16, 16); // fmt chunk size
|
||||||
|
header.writeUInt16LE(1, 20); // PCM
|
||||||
|
header.writeUInt16LE(1, 22); // mono
|
||||||
|
header.writeUInt32LE(sampleRate, 24);
|
||||||
|
header.writeUInt32LE(sampleRate * 2, 28); // byte rate
|
||||||
|
header.writeUInt16LE(2, 32); // block align
|
||||||
|
header.writeUInt16LE(16, 34); // bits per sample
|
||||||
|
header.write('data', 36);
|
||||||
|
header.writeUInt32LE(pcmBytes, 40);
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const outPath = process.argv[2];
|
||||||
|
const durationMs = parseInt(process.argv[3] ?? '3000', 10);
|
||||||
|
|
||||||
|
if (!outPath) {
|
||||||
|
console.error('Usage: esp-record.ts <file.wav> [duration_ms]');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const path = findDefaultPort();
|
||||||
|
console.log(`→ opening ${path} @ 921600 baud`);
|
||||||
|
|
||||||
|
const port = new SerialPort({ path, baudRate: 921600, autoOpen: false });
|
||||||
|
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
port.open((err) => (err ? reject(err) : resolve()));
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── simple line-based state machine for stdout text ───────────
|
||||||
|
let phase: 'idle' | 'streaming' = 'idle';
|
||||||
|
let remaining = 0;
|
||||||
|
const chunks: Buffer[] = [];
|
||||||
|
let lineBuf = '';
|
||||||
|
let ready = false;
|
||||||
|
const readyWaiters: Array<() => void> = [];
|
||||||
|
|
||||||
|
const finished = new Promise<Buffer>((resolve, reject) => {
|
||||||
|
const timeout = setTimeout(
|
||||||
|
() => reject(new Error(`timeout waiting for audio after ${durationMs + 5000} ms`)),
|
||||||
|
durationMs + 5000,
|
||||||
|
);
|
||||||
|
|
||||||
|
port.on('data', (data: Buffer) => {
|
||||||
|
let offset = 0;
|
||||||
|
while (offset < data.length) {
|
||||||
|
if (phase === 'streaming') {
|
||||||
|
const take = Math.min(remaining, data.length - offset);
|
||||||
|
chunks.push(data.subarray(offset, offset + take));
|
||||||
|
offset += take;
|
||||||
|
remaining -= take;
|
||||||
|
if (remaining === 0) {
|
||||||
|
phase = 'idle';
|
||||||
|
lineBuf = '';
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// text mode: accumulate until newline
|
||||||
|
const nl = data.indexOf(0x0a, offset);
|
||||||
|
if (nl === -1) {
|
||||||
|
lineBuf += data.subarray(offset).toString('utf8');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
lineBuf += data.subarray(offset, nl).toString('utf8');
|
||||||
|
offset = nl + 1;
|
||||||
|
const line = lineBuf.replace(/\r$/, '').trim();
|
||||||
|
lineBuf = '';
|
||||||
|
if (!line) continue;
|
||||||
|
|
||||||
|
if (line.startsWith('BEGIN ')) {
|
||||||
|
remaining = parseInt(line.slice(6), 10);
|
||||||
|
phase = 'streaming';
|
||||||
|
console.log(`→ BEGIN ${remaining} bytes`);
|
||||||
|
} else if (line === 'END') {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
const pcm = Buffer.concat(chunks);
|
||||||
|
resolve(pcm);
|
||||||
|
} else if (line === 'READY') {
|
||||||
|
ready = true;
|
||||||
|
while (readyWaiters.length) readyWaiters.shift()!();
|
||||||
|
} else if (line.startsWith('LOG ')) {
|
||||||
|
console.log(`[esp] ${line.slice(4)}`);
|
||||||
|
} else if (line.startsWith('ERR ')) {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
reject(new Error(`firmware error: ${line.slice(4)}`));
|
||||||
|
} else {
|
||||||
|
console.log(`[esp] ${line}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
port.on('error', reject);
|
||||||
|
});
|
||||||
|
|
||||||
|
// The ESP32 resets on port open (DTR/RTS). Wait until it prints
|
||||||
|
// READY so we don't send commands into the bootloader.
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
if (ready) return resolve();
|
||||||
|
const timer = setTimeout(
|
||||||
|
() => reject(new Error('timeout waiting for READY from firmware')),
|
||||||
|
5000,
|
||||||
|
);
|
||||||
|
readyWaiters.push(() => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
await new Promise((r) => setTimeout(r, 50));
|
||||||
|
|
||||||
|
console.log(`→ REC ${durationMs} ms — speak now!`);
|
||||||
|
port.write(`REC ${durationMs}\n`);
|
||||||
|
|
||||||
|
const pcm = await finished;
|
||||||
|
|
||||||
|
await new Promise<void>((resolve) => port.close(() => resolve()));
|
||||||
|
|
||||||
|
// Basic RMS sanity check.
|
||||||
|
let sumSq = 0;
|
||||||
|
const samples = pcm.length / BYTES_PER_SAMPLE;
|
||||||
|
for (let i = 0; i < pcm.length - 1; i += 2) {
|
||||||
|
const s = pcm.readInt16LE(i);
|
||||||
|
sumSq += s * s;
|
||||||
|
}
|
||||||
|
const rms = Math.sqrt(sumSq / samples);
|
||||||
|
console.log(
|
||||||
|
`✅ captured ${pcm.length} bytes (${samples} samples, ${(
|
||||||
|
(samples / SAMPLE_RATE) *
|
||||||
|
1000
|
||||||
|
).toFixed(0)} ms) RMS=${rms.toFixed(0)}`,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (outPath.toLowerCase().endsWith('.wav')) {
|
||||||
|
writeFileSync(outPath, Buffer.concat([wavHeader(pcm.length, SAMPLE_RATE), pcm]));
|
||||||
|
} else {
|
||||||
|
writeFileSync(outPath, pcm);
|
||||||
|
}
|
||||||
|
console.log(`→ wrote ${outPath}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((err) => {
|
||||||
|
console.error(err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
@ -1,147 +1,281 @@
|
|||||||
// Ti-Pote — Robot Hardware firmware (ESP32)
|
// Ti-Pote — Minimal audio bring-up firmware (ESP32-WROOM-32)
|
||||||
//
|
//
|
||||||
// Responsibilities for v0:
|
// GOAL: prove the I2S audio chain (INMP441 + MAX98357A) end to end
|
||||||
// - Listen on UART0 (the USB-connected serial port while the ESP32
|
// with nothing else in the loop — no Pi, no OLED, no protocol frames.
|
||||||
// is plugged into Arthur's laptop; on the real robot this will
|
// The ESP32 is plugged into a computer via USB and the host runs
|
||||||
// eventually be Serial2 wired to the Raspberry Pi).
|
// two tiny scripts:
|
||||||
// - Decode incoming binary frames (see include/protocol_types.h).
|
|
||||||
// - Dispatch commands to the Eyes renderer.
|
|
||||||
// - Reply to PING with PONG.
|
|
||||||
// - Fall back to a sleepy animation if no heartbeat is received
|
|
||||||
// for HW_HEARTBEAT_TIMEOUT_MS (set in platformio.ini).
|
|
||||||
//
|
//
|
||||||
// Intentionally NOT yet implemented (Phase 2):
|
// scripts/esp-record.mjs <file.raw> <duration_ms>
|
||||||
// - I2S audio up/down streaming
|
// scripts/esp-play.mjs <file.raw>
|
||||||
// - Servo / LED commands
|
|
||||||
//
|
//
|
||||||
// The hook points for those are marked with TODO(phase2).
|
// Protocol over USB Serial (921600 baud, line-based for commands,
|
||||||
|
// raw bytes for audio):
|
||||||
|
//
|
||||||
|
// host → esp32
|
||||||
|
// "PING\n" ping
|
||||||
|
// "REC <ms>\n" start recording for <ms> milliseconds
|
||||||
|
// "PLAY <bytes>\n" next <bytes> bytes on the wire are raw
|
||||||
|
// S16 LE mono 16 kHz PCM, play them
|
||||||
|
//
|
||||||
|
// esp32 → host
|
||||||
|
// "READY\n" once at boot
|
||||||
|
// "PONG\n" reply to PING
|
||||||
|
// "LOG <text>\n" human-readable log line
|
||||||
|
// "ERR <text>\n" error message
|
||||||
|
// "BEGIN <bytes>\n" start of a REC response
|
||||||
|
// "<raw bytes>" raw PCM (S16 LE mono 16 kHz)
|
||||||
|
// "END\n" end of a REC response
|
||||||
|
// "OK\n" command completed
|
||||||
|
//
|
||||||
|
// Wiring (shared I2S bus on I2S_NUM_0):
|
||||||
|
// BCLK = GPIO 32 (mic SCK + speaker BCLK)
|
||||||
|
// LRCLK = GPIO 33 (mic WS + speaker LRC)
|
||||||
|
// MIC = GPIO 34 (INMP441 SD → ESP32 data-in, input-only pin)
|
||||||
|
// SPK = GPIO 22 (ESP32 data-out → MAX98357A DIN)
|
||||||
|
|
||||||
#include <Arduino.h>
|
#include <Arduino.h>
|
||||||
#include "Protocol.h"
|
#include <driver/i2s.h>
|
||||||
#include "Eyes.h"
|
#include <string.h>
|
||||||
|
|
||||||
#ifndef HW_SERIAL_BAUD
|
// ──────────────────────────────────────────────────────────
|
||||||
#define HW_SERIAL_BAUD 921600
|
// Audio config
|
||||||
#endif
|
// ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
#ifndef HW_HEARTBEAT_TIMEOUT_MS
|
static constexpr int SAMPLE_RATE = 16000;
|
||||||
#define HW_HEARTBEAT_TIMEOUT_MS 5000
|
static constexpr int PIN_BCLK = 32;
|
||||||
#endif
|
static constexpr int PIN_LRCLK = 33;
|
||||||
|
static constexpr int PIN_MIC_DIN = 34;
|
||||||
|
static constexpr int PIN_SPK_DOUT = 22;
|
||||||
|
|
||||||
// The communication stream. When the ESP32 is plugged into a
|
static constexpr int DMA_COUNT = 4;
|
||||||
// computer, UART0 (Serial) is the USB-CDC port, which is exactly
|
static constexpr int DMA_LEN = 256;
|
||||||
// what the robot-client will talk to during development. Later,
|
|
||||||
// for the Pi wiring, change this to Serial2 and call
|
|
||||||
// `Serial2.begin(HW_SERIAL_BAUD, SERIAL_8N1, RX_PIN, TX_PIN)`.
|
|
||||||
#define HW_COMM Serial
|
|
||||||
|
|
||||||
using namespace tipote;
|
// Staging buffers — keep them outside of functions so we don't eat
|
||||||
|
// stack on every tick.
|
||||||
|
static constexpr size_t OUT_S16_SAMPLES = 320; // 20 ms of S16 mono
|
||||||
|
static int32_t g_rawStereo[OUT_S16_SAMPLES * 2];
|
||||||
|
static int16_t g_micMono [OUT_S16_SAMPLES];
|
||||||
|
static int32_t g_spkStereo[OUT_S16_SAMPLES * 2];
|
||||||
|
static uint8_t g_spkInBuf [OUT_S16_SAMPLES * 2]; // 640 bytes of S16 mono
|
||||||
|
|
||||||
static Eyes eyes;
|
// ──────────────────────────────────────────────────────────
|
||||||
static FrameDecoder decoder;
|
// Line buffer for incoming text commands.
|
||||||
|
// ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
static uint32_t lastHeartbeatMs = 0;
|
static char g_line[64];
|
||||||
static bool idleMode = false;
|
static size_t g_lineLen = 0;
|
||||||
|
|
||||||
// Forward decl
|
static void sendLog(const char* msg) {
|
||||||
static void handleFrame(const Frame& frame, void* userData);
|
Serial.print("LOG ");
|
||||||
static void logLine(const char* line);
|
Serial.println(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void sendErr(const char* msg) {
|
||||||
|
Serial.print("ERR ");
|
||||||
|
Serial.println(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ──────────────────────────────────────────────────────────
|
||||||
|
// I2S init — single port, full duplex, shared BCLK/WS.
|
||||||
|
// ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
static bool audioBegin() {
|
||||||
|
i2s_config_t cfg = {};
|
||||||
|
cfg.mode = static_cast<i2s_mode_t>(I2S_MODE_MASTER |
|
||||||
|
I2S_MODE_RX |
|
||||||
|
I2S_MODE_TX);
|
||||||
|
cfg.sample_rate = SAMPLE_RATE;
|
||||||
|
cfg.bits_per_sample = I2S_BITS_PER_SAMPLE_32BIT;
|
||||||
|
cfg.channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT;
|
||||||
|
cfg.communication_format = I2S_COMM_FORMAT_STAND_I2S;
|
||||||
|
cfg.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1;
|
||||||
|
cfg.dma_buf_count = DMA_COUNT;
|
||||||
|
cfg.dma_buf_len = DMA_LEN;
|
||||||
|
cfg.use_apll = false;
|
||||||
|
cfg.tx_desc_auto_clear = true;
|
||||||
|
cfg.fixed_mclk = 0;
|
||||||
|
|
||||||
|
if (i2s_driver_install(I2S_NUM_0, &cfg, 0, nullptr) != ESP_OK) return false;
|
||||||
|
|
||||||
|
i2s_pin_config_t pins = {};
|
||||||
|
pins.bck_io_num = PIN_BCLK;
|
||||||
|
pins.ws_io_num = PIN_LRCLK;
|
||||||
|
pins.data_out_num = PIN_SPK_DOUT;
|
||||||
|
pins.data_in_num = PIN_MIC_DIN;
|
||||||
|
if (i2s_set_pin(I2S_NUM_0, &pins) != ESP_OK) {
|
||||||
|
i2s_driver_uninstall(I2S_NUM_0);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
i2s_zero_dma_buffer(I2S_NUM_0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert one batch of stereo 32-bit mic samples to S16 mono by
|
||||||
|
// taking the left slot and shifting the 24-bit-aligned data down.
|
||||||
|
// Returns the number of S16 samples written into `out`.
|
||||||
|
static size_t micReadMono(int16_t* out, size_t maxSamples) {
|
||||||
|
size_t wantPairs = maxSamples;
|
||||||
|
if (wantPairs > OUT_S16_SAMPLES) wantPairs = OUT_S16_SAMPLES;
|
||||||
|
|
||||||
|
size_t bytesRead = 0;
|
||||||
|
const esp_err_t err = i2s_read(
|
||||||
|
I2S_NUM_0,
|
||||||
|
g_rawStereo,
|
||||||
|
wantPairs * 2 * sizeof(int32_t),
|
||||||
|
&bytesRead,
|
||||||
|
portMAX_DELAY // block — we're in a dedicated REC loop
|
||||||
|
);
|
||||||
|
if (err != ESP_OK || bytesRead == 0) return 0;
|
||||||
|
|
||||||
|
const size_t pairs = bytesRead / (2 * sizeof(int32_t));
|
||||||
|
for (size_t i = 0; i < pairs; ++i) {
|
||||||
|
int32_t L = g_rawStereo[2 * i];
|
||||||
|
int32_t s = L >> 14;
|
||||||
|
if (s > INT16_MAX) s = INT16_MAX;
|
||||||
|
if (s < INT16_MIN) s = INT16_MIN;
|
||||||
|
out[i] = static_cast<int16_t>(s);
|
||||||
|
}
|
||||||
|
return pairs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write one batch of S16 mono PCM to the speaker by duplicating each
|
||||||
|
// sample into both stereo slots and shifting into the high half of
|
||||||
|
// the 32-bit word (what the MAX98357A expects on a shared bus).
|
||||||
|
static void spkWriteMono(const int16_t* samples, size_t count) {
|
||||||
|
if (count == 0) return;
|
||||||
|
if (count > OUT_S16_SAMPLES) count = OUT_S16_SAMPLES;
|
||||||
|
for (size_t i = 0; i < count; ++i) {
|
||||||
|
const int32_t s32 = static_cast<int32_t>(samples[i]) << 16;
|
||||||
|
g_spkStereo[2 * i] = s32;
|
||||||
|
g_spkStereo[2 * i + 1] = s32;
|
||||||
|
}
|
||||||
|
size_t bytesWritten = 0;
|
||||||
|
i2s_write(I2S_NUM_0, g_spkStereo, count * 2 * sizeof(int32_t),
|
||||||
|
&bytesWritten, portMAX_DELAY);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ──────────────────────────────────────────────────────────
|
||||||
|
// Command handlers
|
||||||
|
// ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
static void handleRec(uint32_t durationMs) {
|
||||||
|
const uint32_t totalSamples = (SAMPLE_RATE * durationMs) / 1000;
|
||||||
|
const uint32_t totalBytes = totalSamples * sizeof(int16_t);
|
||||||
|
|
||||||
|
Serial.print("BEGIN ");
|
||||||
|
Serial.println(totalBytes);
|
||||||
|
|
||||||
|
// Flush whatever old noise is in the mic DMA first.
|
||||||
|
i2s_zero_dma_buffer(I2S_NUM_0);
|
||||||
|
|
||||||
|
uint32_t sent = 0;
|
||||||
|
while (sent < totalSamples) {
|
||||||
|
size_t want = totalSamples - sent;
|
||||||
|
if (want > OUT_S16_SAMPLES) want = OUT_S16_SAMPLES;
|
||||||
|
const size_t got = micReadMono(g_micMono, want);
|
||||||
|
if (got == 0) continue;
|
||||||
|
Serial.write(reinterpret_cast<const uint8_t*>(g_micMono),
|
||||||
|
got * sizeof(int16_t));
|
||||||
|
sent += got;
|
||||||
|
}
|
||||||
|
|
||||||
|
Serial.println();
|
||||||
|
Serial.println("END");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void handlePlay(uint32_t totalBytes) {
|
||||||
|
// Drain any pending crap from the speaker DMA so we don't start
|
||||||
|
// with a pop.
|
||||||
|
i2s_zero_dma_buffer(I2S_NUM_0);
|
||||||
|
|
||||||
|
// Give Serial.readBytes a generous timeout so a jittery host
|
||||||
|
// doesn't abort us mid-playback.
|
||||||
|
Serial.setTimeout(2000);
|
||||||
|
|
||||||
|
uint32_t remaining = totalBytes;
|
||||||
|
while (remaining > 0) {
|
||||||
|
size_t want = remaining;
|
||||||
|
if (want > sizeof(g_spkInBuf)) want = sizeof(g_spkInBuf);
|
||||||
|
// Force an even count so we always have complete S16 samples.
|
||||||
|
if (want & 1) want -= 1;
|
||||||
|
if (want == 0) want = 2;
|
||||||
|
|
||||||
|
const size_t got = Serial.readBytes(g_spkInBuf, want);
|
||||||
|
if (got == 0) {
|
||||||
|
sendErr("PLAY read timeout");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const size_t samples = got / sizeof(int16_t);
|
||||||
|
spkWriteMono(reinterpret_cast<const int16_t*>(g_spkInBuf), samples);
|
||||||
|
remaining -= got;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Let the last frames actually reach the speaker, then clear.
|
||||||
|
delay(50);
|
||||||
|
i2s_zero_dma_buffer(I2S_NUM_0);
|
||||||
|
Serial.println("OK");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void handleLine(const char* line) {
|
||||||
|
if (strcmp(line, "PING") == 0) {
|
||||||
|
Serial.println("PONG");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (strncmp(line, "REC ", 4) == 0) {
|
||||||
|
const long ms = atol(line + 4);
|
||||||
|
if (ms <= 0 || ms > 60000) { sendErr("REC bad duration"); return; }
|
||||||
|
handleRec(static_cast<uint32_t>(ms));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (strncmp(line, "PLAY ", 5) == 0) {
|
||||||
|
const long bytes = atol(line + 5);
|
||||||
|
if (bytes <= 0 || bytes > 16 * 1024 * 1024) {
|
||||||
|
sendErr("PLAY bad size");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
handlePlay(static_cast<uint32_t>(bytes));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sendErr("unknown command");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ──────────────────────────────────────────────────────────
|
||||||
|
// Arduino entry points
|
||||||
|
// ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
void setup() {
|
void setup() {
|
||||||
HW_COMM.begin(HW_SERIAL_BAUD);
|
// Bump the UART RX buffer WAY above the 256-byte default so we
|
||||||
// Give the host a beat to open the port after auto-reset.
|
// can absorb a full PLAY payload (up to a few tens of KB) without
|
||||||
|
// losing bytes if the host floods us.
|
||||||
|
Serial.setRxBufferSize(16 * 1024);
|
||||||
|
Serial.begin(921600);
|
||||||
delay(50);
|
delay(50);
|
||||||
|
|
||||||
eyes.begin();
|
if (!audioBegin()) {
|
||||||
|
sendErr("I2S init failed");
|
||||||
|
} else {
|
||||||
|
sendLog("I2S ready");
|
||||||
|
}
|
||||||
|
|
||||||
decoder.onFrame(handleFrame);
|
Serial.println("READY");
|
||||||
|
|
||||||
lastHeartbeatMs = millis();
|
|
||||||
logLine("robot-hardware ready");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void loop() {
|
void loop() {
|
||||||
// Drain whatever the host has sent since the last tick.
|
while (Serial.available() > 0) {
|
||||||
while (HW_COMM.available() > 0) {
|
const int c = Serial.read();
|
||||||
int b = HW_COMM.read();
|
if (c < 0) break;
|
||||||
if (b < 0) break;
|
if (c == '\r') continue;
|
||||||
decoder.feed(static_cast<uint8_t>(b));
|
if (c == '\n') {
|
||||||
}
|
g_line[g_lineLen] = 0;
|
||||||
|
if (g_lineLen > 0) handleLine(g_line);
|
||||||
// Heartbeat watchdog: if we haven't heard from the host in a
|
g_lineLen = 0;
|
||||||
// while, slip into a sleepy animation so the robot doesn't
|
continue;
|
||||||
// look frozen. Any incoming frame resets this.
|
}
|
||||||
const uint32_t now = millis();
|
if (g_lineLen < sizeof(g_line) - 1) {
|
||||||
if (!idleMode && (now - lastHeartbeatMs) > HW_HEARTBEAT_TIMEOUT_MS) {
|
g_line[g_lineLen++] = static_cast<char>(c);
|
||||||
idleMode = true;
|
} else {
|
||||||
eyes.show(Emotion::SLEEPY);
|
g_lineLen = 0;
|
||||||
|
sendErr("line overflow");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------
|
|
||||||
// Frame dispatcher
|
|
||||||
// ---------------------------------------------------------------
|
|
||||||
|
|
||||||
static void handleFrame(const Frame& frame, void* /*userData*/) {
|
|
||||||
lastHeartbeatMs = millis();
|
|
||||||
if (idleMode) {
|
|
||||||
idleMode = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (frame.type) {
|
|
||||||
case MsgType::DISPLAY_EMOTION: {
|
|
||||||
if (frame.length < 1) {
|
|
||||||
logLine("DISPLAY_EMOTION: empty payload");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const uint8_t code = frame.payload[0];
|
|
||||||
if (code >= static_cast<uint8_t>(Emotion::COUNT)) {
|
|
||||||
logLine("DISPLAY_EMOTION: out-of-range code");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
eyes.show(static_cast<Emotion>(code));
|
|
||||||
|
|
||||||
// ACK back so the host knows it was applied.
|
|
||||||
uint8_t ackPayload[1] = {code};
|
|
||||||
FrameEncoder::writeTo(HW_COMM, MsgType::ACK, ackPayload, 1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
case MsgType::DISPLAY_CLEAR: {
|
|
||||||
eyes.clear();
|
|
||||||
FrameEncoder::writeTo(HW_COMM, MsgType::ACK);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
case MsgType::PING: {
|
|
||||||
// Echo the payload back as PONG. Useful for latency
|
|
||||||
// measurements and proving the link is symmetric.
|
|
||||||
FrameEncoder::writeTo(HW_COMM, MsgType::PONG,
|
|
||||||
frame.payload, frame.length);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
case MsgType::STATUS: {
|
|
||||||
// Heartbeat from host — lastHeartbeatMs was already
|
|
||||||
// bumped above. Nothing else to do for v0.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(phase2): AUDIO_UP / AUDIO_DOWN / SERVO_CMD / LED_CMD
|
|
||||||
default:
|
|
||||||
logLine("unknown frame type");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------
|
|
||||||
// Diagnostic logging — wraps text in a LOG frame so the host
|
|
||||||
// can parse it without getting confused by free text on the wire.
|
|
||||||
// ---------------------------------------------------------------
|
|
||||||
|
|
||||||
static void logLine(const char* line) {
|
|
||||||
const size_t len = strnlen(line, MAX_PAYLOAD_SIZE);
|
|
||||||
FrameEncoder::writeTo(HW_COMM, MsgType::LOG,
|
|
||||||
reinterpret_cast<const uint8_t*>(line),
|
|
||||||
static_cast<uint16_t>(len));
|
|
||||||
}
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user