ok script esp

2026-04-09 02:47:53 +02:00 · 2026-04-09 02:47:53 +02:00 · c19d9a7cf4
commit c19d9a7cf4
parent b29653e3aa
17 changed files with 1860 additions and 390 deletions
--- a/apps/robot-client/package.json
+++ b/apps/robot-client/package.json
@ -12,7 +12,11 @@
    "format": "prettier --write \"src/**/*.ts\"",
    "test": "vitest run",
    "test:watch": "vitest",
-    "hw:demo": "tsx scripts/hardware-demo.ts"
+    "hw:demo": "pnpm exec tsx scripts/hardware-demo.ts",
    "audio:loopback": "pnpm exec tsx scripts/audio-loopback.ts",
    "audio:beep": "pnpm exec tsx scripts/audio-beep.ts",
    "esp:record": "pnpm exec tsx ../robot-hardware/scripts/esp-record.ts",
    "esp:play": "pnpm exec tsx ../robot-hardware/scripts/esp-play.ts"
  },
  "dependencies": {
    "socket.io-client": "^4.8.3",
--- a/apps/robot-client/scripts/audio-beep.ts
+++ b/apps/robot-client/scripts/audio-beep.ts
@ -0,0 +1,99 @@
 /**
 * Ti-Pote — Pure tone speaker test.
 *
 * Generates a 440 Hz sine wave at ~70% of full scale and streams it
 * to the ESP32 speaker via AUDIO_DOWN frames, then a second beep at
 * 880 Hz. Completely independent of the microphone — if this does
 * not produce audible sound, the problem is downstream of the ESP32
 * on the speaker path (MAX98357A wiring, SD pin, VIN, speaker leads).
 *
 * Run with:
 *   HARDWARE_SERIAL_PORT=/dev/serial0 pnpm --filter @ti-pote/robot-client audio:beep
 *
 * Optional env:
 *   BEEP_MS     — length of each beep in ms (default 1500)
 *   BEEP_FREQ   — primary frequency in Hz (default 440)
 *   BEEP_AMP    — amplitude 0.0..1.0 (default 0.7)
 */
 import { HardwareService, Emotion } from '../src/hardware/index.js';
 import { Esp32AudioService } from '../src/services/audio.service.js';
 const path = process.env.HARDWARE_SERIAL_PORT ?? '/dev/serial0';
 const baudRate = parseInt(process.env.HARDWARE_SERIAL_BAUD ?? '921600', 10);
 const beepMs = parseInt(process.env.BEEP_MS ?? '1500', 10);
 const beepFreq = parseInt(process.env.BEEP_FREQ ?? '440', 10);
 const beepAmp = parseFloat(process.env.BEEP_AMP ?? '0.7');
 const SAMPLE_RATE = 16000;
 function generateSine(freqHz: number, durationMs: number, amplitude: number): Buffer {
  const sampleCount = Math.floor((SAMPLE_RATE * durationMs) / 1000);
  const buf = Buffer.alloc(sampleCount * 2);
  const amp = Math.max(0, Math.min(1, amplitude)) * 32767;
  const twoPiF = (2 * Math.PI * freqHz) / SAMPLE_RATE;
  // 5 ms linear attack/release so the speaker doesn't click.
  const rampSamples = Math.floor((SAMPLE_RATE * 5) / 1000);
  for (let i = 0; i < sampleCount; i++) {
    let env = 1;
    if (i < rampSamples) env = i / rampSamples;
    else if (i > sampleCount - rampSamples) env = (sampleCount - i) / rampSamples;
    const s = Math.round(Math.sin(i * twoPiF) * amp * env);
    buf.writeInt16LE(Math.max(-32768, Math.min(32767, s)), i * 2);
  }
  return buf;
 }
 async function sleep(ms: number): Promise<void> {
  return new Promise((r) => setTimeout(r, ms));
 }
 async function main(): Promise<void> {
  const hw = new HardwareService({ path, baudRate, heartbeatIntervalMs: 1000 });
  hw.on('log', (line) => console.log(`[firmware] ${line}`));
  hw.on('error', (err) => console.error(`[firmware error] ${err.message}`));
  console.log(`→ opening ${path} @ ${baudRate} baud`);
  await hw.connect();
  try {
    const rtt = await hw.ping(Buffer.from('beep'));
    console.log(`→ ping round-trip: ${rtt.toFixed(1)} ms`);
    const audio = new Esp32AudioService(
      {
        backend: 'esp32',
        captureDevice: 'default',
        playbackDevice: 'default',
        sampleRate: SAMPLE_RATE,
        bitDepth: 16,
        channels: 1,
        chunkDurationMs: 20,
      },
      hw,
    );
    hw.sendEmotion(Emotion.HAPPY);
    console.log(`🔊 Beep 1: ${beepFreq} Hz · ${beepMs} ms · amp=${beepAmp}`);
    const tone1 = generateSine(beepFreq, beepMs, beepAmp);
    await audio.play(tone1);
    await sleep(400);
    console.log(`🔊 Beep 2: ${beepFreq * 2} Hz · ${beepMs} ms · amp=${beepAmp}`);
    const tone2 = generateSine(beepFreq * 2, beepMs, beepAmp);
    await audio.play(tone2);
    console.log('✅ done — did you hear two beeps?');
  } finally {
    hw.sendEmotion(Emotion.NEUTRAL);
    await sleep(200);
    await hw.disconnect();
  }
 }
 main().catch((err) => {
  console.error('beep failed:', err);
  process.exit(1);
 });
--- a/apps/robot-client/scripts/audio-loopback.ts
+++ b/apps/robot-client/scripts/audio-loopback.ts
@ -0,0 +1,171 @@
 /**
 * Ti-Pote — End-to-end audio loopback test.
 *
 * What it proves: the whole Pi ↔ ESP32 ↔ mic/speaker chain works,
 * without bringing the cloud/wake-word/orchestrator into the picture.
 *
 * What it does:
 *   1. Opens the serial link to the ESP32.
 *   2. Captures `CAPTURE_MS` (default 5000) of mic audio via
 *      AUDIO_UP frames into a single in-memory buffer.
 *   3. Pauses briefly.
 *   4. Streams that buffer back to the ESP32 as AUDIO_DOWN frames
 *      and waits for the speaker to finish playing.
 *
 * Expected result: you say "allô allô" during step 2 and hear your
 * own voice played back on the robot's speaker a moment later.
 *
 * Run with:
 *   HARDWARE_SERIAL_PORT=/dev/serial0 pnpm --filter @ti-pote/robot-client audio:loopback
 *
 * Optional env:
 *   CAPTURE_MS        — capture duration in ms (default 5000)
 *   HARDWARE_SERIAL_PORT / HARDWARE_SERIAL_BAUD
 */
 import { writeFileSync } from 'node:fs';
 import { HardwareService, Emotion } from '../src/hardware/index.js';
 import { Esp32AudioService } from '../src/services/audio.service.js';
 const path = process.env.HARDWARE_SERIAL_PORT ?? '/dev/serial0';
 const baudRate = parseInt(process.env.HARDWARE_SERIAL_BAUD ?? '921600', 10);
 const captureMs = parseInt(process.env.CAPTURE_MS ?? '5000', 10);
 const debug = !!process.env.DEBUG;
 const dumpPath = process.env.DUMP_PATH ?? '/tmp/tipote-capture.raw';
 const skipPlayback = !!process.env.SKIP_PLAYBACK;
 const SAMPLE_RATE = 16000;
 const BYTES_PER_SAMPLE = 2;
 let debugFramesSeen = 0;
 async function sleep(ms: number): Promise<void> {
  return new Promise((r) => setTimeout(r, ms));
 }
 async function main(): Promise<void> {
  const hw = new HardwareService({ path, baudRate, heartbeatIntervalMs: 1000 });
  hw.on('log', (line) => console.log(`[firmware] ${line}`));
  hw.on('error', (err) => console.error(`[firmware error] ${err.message}`));
  if (debug) {
    hw.on('audio_up', (chunk) => {
      // Print first 8 int16 samples of the first few frames
      // so we can see whether the wire carries zeros or real data.
      if (debugFramesSeen < 3) {
        const head: number[] = [];
        for (let i = 0; i < Math.min(chunk.length, 16); i += 2) {
          head.push(chunk.readInt16LE(i));
        }
        console.log(`[debug] frame ${debugFramesSeen} len=${chunk.length} head=${head.join(',')}`);
        debugFramesSeen++;
      }
    });
  }
  console.log(`→ opening ${path} @ ${baudRate} baud`);
  await hw.connect();
  try {
    const rtt = await hw.ping(Buffer.from('loopback'));
    console.log(`→ ping round-trip: ${rtt.toFixed(1)} ms`);
    hw.sendEmotion(Emotion.SURPRISED);
    // ── 1. Capture ────────────────────────────────────────────────
    const chunks: Buffer[] = [];
    let bytesCaptured = 0;
    const collect = (chunk: Buffer): void => {
      chunks.push(chunk);
      bytesCaptured += chunk.length;
    };
    hw.on('audio_up', collect);
    console.log(`🎙️  Recording ${captureMs} ms — say something!`);
    await sleep(captureMs);
    hw.off('audio_up', collect);
    const capture = Buffer.concat(chunks);
    const samples = capture.length / BYTES_PER_SAMPLE;
    const durationMs = (samples / SAMPLE_RATE) * 1000;
    console.log(
      `✅ captured ${capture.length} bytes (${samples} samples, ${durationMs.toFixed(0)} ms)` +
        ` across ${chunks.length} frames`,
    );
    if (capture.length === 0) {
      console.error(
        '❌ no audio received from the ESP32. Check the I2S wiring ' +
          '(BCLK=32, LRCLK=33, DIN=34) and that the firmware got past `audio: I2S ready`.',
      );
      return;
    }
    // Quick RMS sanity check so we catch "mic muted" / "disconnected" early.
    const rms = computeRms(capture);
    console.log(`   RMS level: ${rms.toFixed(0)} (silence ≈ 10, speech ≳ 500)`);
    if (debug) {
      // Dump the raw capture so we can replay it offline:
      //   aplay -r 16000 -f S16_LE -c 1 /tmp/tipote-capture.raw
      writeFileSync(dumpPath, capture);
      console.log(`[debug] raw capture written to ${dumpPath} (${capture.length} bytes)`);
      const allZero = capture.every((b) => b === 0);
      console.log(`[debug] capture.allZero=${allZero}`);
      // Also print some distinct int16 values we saw, to spot patterns.
      const seen = new Set<number>();
      for (let i = 0; i < capture.length - 1 && seen.size < 10; i += 2) {
        seen.add(capture.readInt16LE(i));
      }
      console.log(`[debug] first distinct samples: ${[...seen].join(',')}`);
    }
    if (skipPlayback) {
      console.log('SKIP_PLAYBACK set — not sending AUDIO_DOWN');
      return;
    }
    // ── 2. Playback ───────────────────────────────────────────────
    await sleep(500);
    const audio = new Esp32AudioService(
      {
        backend: 'esp32',
        captureDevice: 'default',
        playbackDevice: 'default',
        sampleRate: SAMPLE_RATE,
        bitDepth: 16,
        channels: 1,
        chunkDurationMs: 20,
      },
      hw,
    );
    hw.sendEmotion(Emotion.HAPPY);
    console.log('🔊 Playing back on the ESP32 speaker...');
    await audio.play(capture);
    console.log('✅ playback done');
  } finally {
    hw.sendEmotion(Emotion.NEUTRAL);
    await sleep(200);
    await hw.disconnect();
  }
 }
 function computeRms(buf: Buffer): number {
  if (buf.length < 2) return 0;
  let sumSquares = 0;
  const samples = buf.length / 2;
  for (let i = 0; i < buf.length - 1; i += 2) {
    const s = buf.readInt16LE(i);
    sumSquares += s * s;
  }
  return Math.sqrt(sumSquares / samples);
 }
 main().catch((err) => {
  console.error('loopback failed:', err);
  process.exit(1);
 });
--- a/apps/robot-client/scripts/wake_word.py
+++ b/apps/robot-client/scripts/wake_word.py
@ -2,94 +2,175 @@
 """
 Ti-Pote Wake Word Detection Script.
-Runs OpenWakeWord model continuously, listening on the specified ALSA device.
+Runs OpenWakeWord continuously and prints "DETECTED" to stdout when
-Prints "DETECTED" to stdout when the wake word is heard.
+the wake word is heard.
-Supports PAUSE/RESUME commands on stdin to temporarily stop/start listening
+Two input modes:
 without reloading the model. When paused, the audio stream is closed so other
 processes (arecord) can use the device.
-Usage:
+1. --input alsa  (default, legacy)
-    python3 wake_word.py --model hey_jarvis --threshold 0.5 --device default --sample-rate 16000
+   Opens an ALSA capture device via PyAudio. PAUSE/RESUME/QUIT
   commands are read from stdin.
-Requirements:
+2. --input stdin
-    pip install openwakeword pyaudio numpy
+   Reads raw S16 mono PCM audio from stdin (fd 0). This is used when
   the Raspberry Pi is just an orchestrator and the microphone lives
   on the ESP32 — the Node client forwards AUDIO_UP frames into this
   script's stdin. Control commands are read from a separate file
   descriptor specified by --control-fd (default: 3).
 Control commands (one per line, uppercase):
   PAUSE   — stop emitting DETECTED events (audio keeps flowing so
             we don't overflow the pipe, but predictions are ignored).
   RESUME  — resume emitting and reset the model buffer.
   RESET   — reset the model buffer without touching the pause flag.
   QUIT    — exit cleanly.
 Usage (ALSA):
    python3 wake_word.py --model hey_jarvis --device default
 Usage (stdin / ESP32 backend):
    python3 wake_word.py --model hey_jarvis --input stdin --control-fd 3
 """
 import argparse
 import sys
 import os
 import signal
-import select
+import sys
 import threading
 import time
 import numpy as np
-def main():
+CHUNK_SAMPLES = 1280  # ≈ 80 ms @ 16 kHz (OpenWakeWord's preferred size)
    parser = argparse.ArgumentParser(description='Ti-Pote Wake Word Detection')
    parser.add_argument('--model', type=str, default='hey_jarvis',
                        help='Wake word model name (default: hey_jarvis as placeholder)')
    parser.add_argument('--threshold', type=float, default=0.5,
                        help='Detection threshold (0.0-1.0)')
    parser.add_argument('--device', type=str, default='default',
                        help='ALSA audio capture device')
    parser.add_argument('--sample-rate', type=int, default=16000,
                        help='Audio sample rate in Hz')
    args = parser.parse_args()
 def load_model(model_name: str):
    try:
        from openwakeword.model import Model
    except ImportError:
-        print("ERROR: openwakeword not installed. Run: pip install openwakeword", file=sys.stderr)
+        print("ERROR: openwakeword not installed. Run: pip install openwakeword",
              file=sys.stderr)
        sys.exit(1)
    try:
        import pyaudio
    except ImportError:
        print("ERROR: pyaudio not installed. Run: pip install pyaudio", file=sys.stderr)
        sys.exit(1)
    # ── Load the wake word model (one time only) ──
    print(f"Loading wake word model: {args.model}...", file=sys.stderr)
    import openwakeword
-    pretrained_paths = openwakeword.get_pretrained_model_paths()
+    pretrained = openwakeword.get_pretrained_model_paths()
-    model_path = None
+    model_path = next(
-    for p in pretrained_paths:
+        (p for p in pretrained if os.path.basename(p).startswith(model_name)),
-        basename = os.path.basename(p)
+        None,
-        if basename.startswith(args.model):
+    )
            model_path = p
            break
    if model_path is None:
-        if os.path.isfile(args.model):
+        if os.path.isfile(model_name):
-            model_path = args.model
+            model_path = model_name
        else:
-            print(f"ERROR: model '{args.model}' not found in pretrained models", file=sys.stderr)
+            print(f"ERROR: model '{model_name}' not found", file=sys.stderr)
-            print(f"Available models:", file=sys.stderr)
+            for p in pretrained:
            for p in pretrained_paths:
                print(f"  - {os.path.basename(p)}", file=sys.stderr)
            sys.exit(1)
-    print(f"Resolved model path: {model_path}", file=sys.stderr)
+    print(f"Loading wake word model: {model_name}...", file=sys.stderr)
    try:
-        oww_model = Model(wakeword_model_paths=[model_path])
+        return Model(wakeword_model_paths=[model_path])
    except Exception as e:
-        print(f"ERROR loading model '{args.model}': {e}", file=sys.stderr)
+        print(f"ERROR loading model '{model_name}': {e}", file=sys.stderr)
        sys.exit(1)
    print(f"Wake word model loaded: {args.model}", file=sys.stderr)
    print(f"Threshold: {args.threshold}", file=sys.stderr)
    print(f"Listening on device: {args.device}", file=sys.stderr)
-    # ── Initialize PyAudio ──
+class State:
    """Shared mutable state between the audio and control threads."""
    def __init__(self):
        self.paused = False
        self.running = True
        self.reset_requested = False
        self.lock = threading.Lock()
 def start_control_reader(state: State, fd: int):
    """Background thread that reads PAUSE/RESUME/RESET/QUIT commands."""
    try:
        f = os.fdopen(fd, 'r', buffering=1)
    except OSError as e:
        print(f"ERROR opening control fd {fd}: {e}", file=sys.stderr)
        return
    def reader():
        while state.running:
            try:
                line = f.readline()
            except Exception:
                break
            if not line:
                break
            cmd = line.strip().upper()
            with state.lock:
                if cmd == 'PAUSE' and not state.paused:
                    state.paused = True
                    print("PAUSED", file=sys.stderr, flush=True)
                elif cmd == 'RESUME' and state.paused:
                    state.paused = False
                    state.reset_requested = True
                    print("RESUMED", file=sys.stderr, flush=True)
                elif cmd == 'RESET':
                    state.reset_requested = True
                elif cmd == 'QUIT':
                    state.running = False
                    break
    t = threading.Thread(target=reader, daemon=True)
    t.start()
 def run_predict_loop(oww_model, read_chunk, state: State, threshold: float):
    """
    Shared loop: pull a chunk from `read_chunk()`, feed the model,
    optionally emit DETECTED. Exits when `read_chunk()` returns None
    or state.running is False.
    """
    print("READY", file=sys.stderr, flush=True)
    try:
        while state.running:
            with state.lock:
                if state.reset_requested:
                    oww_model.reset()
                    state.reset_requested = False
            audio_data = read_chunk()
            if audio_data is None:
                # EOF / error; exit cleanly
                break
            audio_array = np.frombuffer(audio_data, dtype=np.int16)
            oww_model.predict(audio_array)
            with state.lock:
                if state.paused:
                    # Keep draining but don't emit detections.
                    continue
            for _, score in oww_model.prediction_buffer.items():
                if len(score) > 0 and score[-1] > threshold:
                    print("DETECTED", flush=True)
                    oww_model.reset()
                    break
    except KeyboardInterrupt:
        pass
 # ─────────────────────────────────────────────────────────────────
 # ALSA input (legacy backend)
 # ─────────────────────────────────────────────────────────────────
 def run_alsa_mode(args, oww_model, state: State):
    import re
    try:
        import pyaudio
    except ImportError:
        print("ERROR: pyaudio not installed. Run: pip install pyaudio",
              file=sys.stderr)
        sys.exit(1)
    pa = pyaudio.PyAudio()
    # Find the device index
    import re
    device_index = None
    if args.device != 'default':
        try:
@ -97,14 +178,14 @@ def main():
            info = pa.get_device_info_by_index(idx)
            if info.get('maxInputChannels', 0) > 0:
                device_index = idx
-                print(f"Using device by index: [{idx}] {info['name']}", file=sys.stderr)
+                print(f"Using device by index: [{idx}] {info['name']}",
                      file=sys.stderr)
        except (ValueError, IOError):
            pass
        if device_index is None:
            hw_match = re.search(r'(\d+),(\d+)', args.device)
            hw_pattern = f"hw:{hw_match.group(1)},{hw_match.group(2)}" if hw_match else None
            for i in range(pa.get_device_count()):
                info = pa.get_device_info_by_index(i)
                if info.get('maxInputChannels', 0) <= 0:
@ -115,133 +196,134 @@ def main():
                    print(f"Matched device: [{i}] {name}", file=sys.stderr)
                    break
-        if device_index is None:
+    stream = {'handle': None}
            print(f"WARNING: Device '{args.device}' not found, listing available inputs:", file=sys.stderr)
            for i in range(pa.get_device_count()):
                info = pa.get_device_info_by_index(i)
                if info.get('maxInputChannels', 0) > 0:
                    print(f"  [{i}] {info['name']}", file=sys.stderr)
            print("Falling back to default device", file=sys.stderr)
    # ── Audio stream helpers ──
    chunk_size = 1280  # ~80ms at 16kHz (OpenWakeWord expects this)
    stream = None
    def open_stream():
-        nonlocal stream
+        stream['handle'] = pa.open(
        stream = pa.open(
            format=pyaudio.paInt16,
            channels=1,
            rate=args.sample_rate,
            input=True,
-            frames_per_buffer=chunk_size,
+            frames_per_buffer=CHUNK_SAMPLES,
            input_device_index=device_index,
        )
    def close_stream():
-        nonlocal stream
+        h = stream['handle']
-        if stream is not None:
+        if h is not None:
            try:
-                stream.stop_stream()
+                h.stop_stream()
-                stream.close()
+                h.close()
            except Exception:
                pass
-            stream = None
+            stream['handle'] = None
-    # ── Stdin command reader (PAUSE / RESUME) ──
+    def read_chunk():
-
+        with state.lock:
-    paused = False
+            is_paused = state.paused
-    running = True
+        # In ALSA mode, pausing means physically releasing the device.
-    lock = threading.Lock()
+        if is_paused:
-
+            if stream['handle'] is not None:
-    def stdin_reader():
+                close_stream()
-        nonlocal paused, running
+                print("STREAM_CLOSED", file=sys.stderr, flush=True)
-        while running:
+            time.sleep(0.1)
-            try:
+            return b'\x00' * (CHUNK_SAMPLES * 2)  # dummy silence; won't be predicted
-                line = sys.stdin.readline()
+        if stream['handle'] is None:
-                if not line:  # EOF
+            open_stream()
-                    running = False
+            oww_model.reset()
-                    break
+            print("STREAM_REOPENED", file=sys.stderr, flush=True)
-                cmd = line.strip().upper()
+        try:
-                with lock:
+            return stream['handle'].read(CHUNK_SAMPLES, exception_on_overflow=False)
-                    if cmd == 'PAUSE':
+        except Exception as e:
-                        if not paused:
+            print(f"Audio read error: {e}", file=sys.stderr)
-                            paused = True
+            close_stream()
-                            print("PAUSED", file=sys.stderr, flush=True)
+            time.sleep(0.5)
-                    elif cmd == 'RESUME':
+            return b'\x00' * (CHUNK_SAMPLES * 2)
                        if paused:
                            paused = False
                            print("RESUMED", file=sys.stderr, flush=True)
                    elif cmd == 'QUIT':
                        running = False
                        break
            except Exception:
                break
    stdin_thread = threading.Thread(target=stdin_reader, daemon=True)
    stdin_thread.start()
    # ── Signal handling ──
    def handle_signal(sig, frame):
        nonlocal running
        running = False
    signal.signal(signal.SIGTERM, handle_signal)
    signal.signal(signal.SIGINT, handle_signal)
    # ── Main loop ──
    open_stream()
    print("READY", file=sys.stderr, flush=True)
    try:
-        while running:
+        run_predict_loop(oww_model, read_chunk, state, args.threshold)
            with lock:
                is_paused = paused
            if is_paused:
                # Close the audio stream so arecord can use the device
                if stream is not None:
                    close_stream()
                    print("STREAM_CLOSED", file=sys.stderr, flush=True)
                # Wait a bit before checking again
                import time
                time.sleep(0.1)
                continue
            # Reopen stream if it was closed (after resume)
            if stream is None:
                open_stream()
                oww_model.reset()
                print("STREAM_REOPENED", file=sys.stderr, flush=True)
            try:
                audio_data = stream.read(chunk_size, exception_on_overflow=False)
            except Exception as e:
                print(f"Audio read error: {e}", file=sys.stderr)
                close_stream()
                import time
                time.sleep(0.5)
                continue
            audio_array = np.frombuffer(audio_data, dtype=np.int16)
            oww_model.predict(audio_array)
            for model_name, score in oww_model.prediction_buffer.items():
                if len(score) > 0 and score[-1] > args.threshold:
                    print("DETECTED", flush=True)
                    oww_model.reset()
                    break
    except KeyboardInterrupt:
        pass
    finally:
        close_stream()
        pa.terminate()
        print("Wake word detection stopped", file=sys.stderr)
 # ─────────────────────────────────────────────────────────────────
 # Stdin input (ESP32 backend)
 # ─────────────────────────────────────────────────────────────────
 def run_stdin_mode(args, oww_model, state: State):
    """
    Audio bytes arrive on stdin (fd 0), 16-bit signed LE mono at
    `args.sample_rate`. We block until a full CHUNK_SAMPLES chunk is
    available and hand it to the model.
    """
    print("Listening on stdin for raw S16LE mono PCM", file=sys.stderr)
    chunk_bytes = CHUNK_SAMPLES * 2
    stdin = sys.stdin.buffer
    buf = bytearray()
    def read_chunk():
        # Keep reading until we have a full chunk or hit EOF.
        while len(buf) < chunk_bytes and state.running:
            try:
                data = stdin.read(chunk_bytes - len(buf))
            except Exception as e:
                print(f"stdin read error: {e}", file=sys.stderr)
                return None
            if not data:
                return None
            buf.extend(data)
        if len(buf) < chunk_bytes:
            return None
        chunk = bytes(buf[:chunk_bytes])
        del buf[:chunk_bytes]
        return chunk
    try:
        run_predict_loop(oww_model, read_chunk, state, args.threshold)
    finally:
        print("Wake word detection stopped", file=sys.stderr)
 # ─────────────────────────────────────────────────────────────────
 # Entrypoint
 # ─────────────────────────────────────────────────────────────────
 def main():
    parser = argparse.ArgumentParser(description='Ti-Pote Wake Word Detection')
    parser.add_argument('--model', type=str, default='hey_jarvis')
    parser.add_argument('--threshold', type=float, default=0.5)
    parser.add_argument('--input', type=str, choices=['alsa', 'stdin'], default='alsa',
                        help="Audio source. 'alsa' opens PyAudio, 'stdin' reads from fd 0.")
    parser.add_argument('--device', type=str, default='default',
                        help='ALSA audio capture device (only used with --input alsa).')
    parser.add_argument('--control-fd', type=int, default=0,
                        help='File descriptor to read control commands from. '
                             'Default 0 (stdin) for ALSA, pass 3 for stdin mode.')
    parser.add_argument('--sample-rate', type=int, default=16000)
    args = parser.parse_args()
    state = State()
    def handle_signal(_sig, _frame):
        state.running = False
    signal.signal(signal.SIGTERM, handle_signal)
    signal.signal(signal.SIGINT, handle_signal)
    oww_model = load_model(args.model)
    print(f"Wake word model loaded: {args.model}", file=sys.stderr)
    print(f"Threshold: {args.threshold}", file=sys.stderr)
    start_control_reader(state, args.control_fd)
    if args.input == 'stdin':
        run_stdin_mode(args, oww_model, state)
    else:
        print(f"Listening on device: {args.device}", file=sys.stderr)
        run_alsa_mode(args, oww_model, state)
 if __name__ == '__main__':
    main()
--- a/apps/robot-client/src/config/hardware.config.ts
+++ b/apps/robot-client/src/config/hardware.config.ts
@ -1,8 +1,11 @@
 export interface AudioConfig {
-  /** ALSA device for capture (e.g., 'plughw:1,0' or 'default') */
+  /** Which audio backend to use: 'esp32' (default) or 'alsa' (legacy). */
  backend: 'esp32' | 'alsa';
  /** ALSA device for capture (only used when backend='alsa'). */
  captureDevice: string;
-  /** ALSA device for playback (e.g., 'plughw:0,0' or 'default') */
+  /** ALSA device for playback (only used when backend='alsa'). */
  playbackDevice: string;
  /** Sample rate in Hz */
@ -53,8 +56,13 @@ export interface HardwareConfig {
 }
 export function loadHardwareConfig(): HardwareConfig {
  const backend = (process.env.AUDIO_BACKEND || 'esp32').toLowerCase() as
    | 'esp32'
    | 'alsa';
  return {
    audio: {
      backend,
      captureDevice: process.env.AUDIO_CAPTURE_DEVICE || 'default',
      playbackDevice: process.env.AUDIO_PLAYBACK_DEVICE || 'default',
      sampleRate: parseInt(process.env.AUDIO_SAMPLE_RATE || '16000', 10),
@ -69,8 +77,15 @@ export function loadHardwareConfig(): HardwareConfig {
      threshold: parseFloat(process.env.WAKEWORD_THRESHOLD || '0.5'),
    },
    serial: {
-      enabled: (process.env.HARDWARE_SERIAL_ENABLED || 'false').toLowerCase() === 'true',
+      // The ESP32 is now the mic/speaker front-end — serial link is
-      path: process.env.HARDWARE_SERIAL_PORT || '/dev/ttyUSB0',
+      // enabled by default. Set HARDWARE_SERIAL_ENABLED=false only
      // when intentionally falling back to the ALSA backend.
      enabled:
        (process.env.HARDWARE_SERIAL_ENABLED || (backend === 'esp32' ? 'true' : 'false'))
          .toLowerCase() === 'true',
      // Default to /dev/serial0 (the Pi's hardware UART once the
      // console has been freed via raspi-config).
      path: process.env.HARDWARE_SERIAL_PORT || '/dev/serial0',
      baudRate: parseInt(process.env.HARDWARE_SERIAL_BAUD || '921600', 10),
      heartbeatIntervalMs: parseInt(process.env.HARDWARE_HEARTBEAT_MS || '1000', 10),
    },
--- a/apps/robot-client/src/hardware/hardware.service.ts
+++ b/apps/robot-client/src/hardware/hardware.service.ts
@ -27,8 +27,17 @@ export interface HardwareServiceEvents {
  log: (message: string) => void;
  frame: (frame: DecodedFrame) => void;
  ack: (payload: Buffer) => void;
  /** Emitted for each AUDIO_UP frame received from the ESP32 (raw S16 mono PCM). */
  audio_up: (chunk: Buffer) => void;
 }
 /**
 * Max bytes we put in a single AUDIO_DOWN frame. Must stay below
 * MAX_PAYLOAD_SIZE (1024) and should map to a whole number of
 * 20 ms @ 16 kHz chunks: 640 bytes = 20 ms, 320 samples.
 */
 const AUDIO_DOWN_CHUNK_BYTES = 640;
 /**
 * HardwareService — the robot-client's only direct link to the ESP32.
 *
@ -136,6 +145,42 @@ export class HardwareService extends EventEmitter {
    this.writeFrame(MsgType.DISPLAY_CLEAR);
  }
  /**
   * Send a PCM S16 mono 16 kHz buffer to the ESP32 speaker as one or
   * more AUDIO_DOWN frames. The buffer is automatically split into
   * chunks of `AUDIO_DOWN_CHUNK_BYTES` so each frame fits within the
   * UART protocol's MAX_PAYLOAD_SIZE.
   *
   * Back-pressure note: `SerialPort.write` buffers in user-space, so
   * this method is best-effort. For long TTS playbacks, call
   * `drainAudioDown()` between chunks or space them with a `setTimeout`
   * to avoid unbounded growth.
   */
  sendAudioDown(chunk: Buffer): void {
    if (!this.port?.isOpen) {
      this.log.warn('Dropping AUDIO_DOWN — serial port not open');
      return;
    }
    for (let offset = 0; offset < chunk.length; offset += AUDIO_DOWN_CHUNK_BYTES) {
      const slice = chunk.subarray(offset, offset + AUDIO_DOWN_CHUNK_BYTES);
      this.writeFrame(MsgType.AUDIO_DOWN, slice);
    }
  }
  /**
   * Wait for the kernel-side serial buffer to drain. Useful between
   * large AUDIO_DOWN bursts to keep latency bounded.
   */
  drainAudioDown(): Promise<void> {
    return new Promise((resolve, reject) => {
      if (!this.port?.isOpen) {
        resolve();
        return;
      }
      this.port.drain((err) => (err ? reject(err) : resolve()));
    });
  }
  /**
   * Round-trip PING → PONG used for bring-up and latency checks.
   * Resolves with the measured RTT in ms.
@ -187,6 +232,9 @@ export class HardwareService extends EventEmitter {
      case MsgType.ERROR:
        this.log.error({ payload: frame.payload.toString('utf8') }, 'firmware error');
        return;
      case MsgType.AUDIO_UP:
        this.emit('audio_up', frame.payload);
        return;
      default:
        return;
    }
--- a/apps/robot-client/src/main.ts
+++ b/apps/robot-client/src/main.ts
@ -1,7 +1,7 @@
 import { loadRobotConfig, loadHardwareConfig } from './config/index.js';
 import { CloudSocket } from './transport/index.js';
 import {
-  AudioService,
+  createAudioService,
  WakeWordService,
  KeyboardTriggerService,
  HealthService,
@ -72,15 +72,16 @@ async function main(): Promise<void> {
  const resolvedConfig = { ...robotConfig, deviceId, deviceToken };
  const cloudSocket = new CloudSocket(resolvedConfig as Required<typeof resolvedConfig>);
  const audioService = new AudioService(hardwareConfig.audio);
  const healthService = new HealthService(cloudSocket);
-  // ── Optional: hardware bridge (ESP32 firmware) ──
+  // ── Hardware bridge (ESP32 firmware) ──
-  // The serial link is opt-in via HARDWARE_SERIAL_ENABLED=true. We
+  // With AUDIO_BACKEND=esp32 the ESP32 owns the mic AND the speaker,
-  // treat failures here as non-fatal: even without a face, the
+  // so the serial link is mandatory. With AUDIO_BACKEND=alsa we can
-  // robot can still converse with the cloud.
+  // still run without it (face will be missing, but audio works).
  const audioBackend = hardwareConfig.audio.backend;
  let hardwareService: HardwareService | null = null;
  if (hardwareConfig.serial.enabled) {
    hardwareService = new HardwareService({
      path: hardwareConfig.serial.path,
@ -93,19 +94,40 @@ async function main(): Promise<void> {
      hardwareService.sendEmotion(Emotion.HAPPY);
      logger.info('Hardware bridge connected');
    } catch (err) {
      if (audioBackend === 'esp32') {
        logger.fatal(
          { err, path: hardwareConfig.serial.path },
          'Hardware bridge required for AUDIO_BACKEND=esp32 — check the UART wiring or set AUDIO_BACKEND=alsa',
        );
        process.exit(1);
      }
      logger.warn({ err }, 'Hardware bridge unavailable — continuing without face');
      hardwareService = null;
    }
  } else if (audioBackend === 'esp32') {
    logger.fatal(
      'AUDIO_BACKEND=esp32 requires HARDWARE_SERIAL_ENABLED=true. Either enable the serial link or switch to AUDIO_BACKEND=alsa.',
    );
    process.exit(1);
  } else {
    logger.info('Hardware bridge disabled (set HARDWARE_SERIAL_ENABLED=true to enable)');
  }
  // Audio service — pick a backend now that we know whether the
  // hardware bridge is alive.
  const audioService = createAudioService(hardwareConfig.audio, hardwareService);
  logger.info({ backend: audioBackend }, 'Audio service initialised');
  // Choose trigger based on TRIGGER_MODE
  let trigger: ITriggerService;
  if (resolvedConfig.triggerMode === 'wakeword') {
    logger.info('Trigger: wake word (OpenWakeWord)');
-    trigger = new WakeWordService(hardwareConfig.wakeWord, hardwareConfig.audio);
+    trigger = new WakeWordService(
      hardwareConfig.wakeWord,
      hardwareConfig.audio,
      audioBackend === 'esp32' ? hardwareService : null,
    );
  } else {
    logger.info('Trigger: keyboard (press Enter to talk)');
    trigger = new KeyboardTriggerService();
--- a/apps/robot-client/src/services/audio.service.ts
+++ b/apps/robot-client/src/services/audio.service.ts
@ -1,30 +1,48 @@
 import { ChildProcess, spawn } from 'node:child_process';
 import { EventEmitter } from 'node:events';
 import { type AudioConfig } from '../config/index.js';
 import { type HardwareService } from '../hardware/index.js';
 import { createLogger, type Logger } from '../utils/index.js';
 export interface AudioServiceEvents {
-  /** Emitted when a raw PCM audio chunk is captured from the microphone */
+  /** Emitted when a raw PCM audio chunk is captured from the microphone. */
  audio_chunk: (chunk: Buffer) => void;
-  /** Emitted when playback of a response finishes */
+  /** Emitted when playback of a response finishes. */
  playback_done: () => void;
-  /** Emitted on audio errors */
+  /** Emitted on audio errors. */
  error: (error: Error) => void;
 }
 /**
- * Audio service for Raspberry Pi.
+ * Common audio interface used by the orchestrator, wake word service,
 * and test scripts. Two backends implement it:
 *
- * Uses ALSA tools (arecord/aplay) via child processes.
+ *   - `AlsaAudioService` — arecord/aplay child processes, for dev on a
- * Works with any ALSA-compatible audio device:
+ *     machine with a USB mic or when the Pi owns the I2S mic/speaker
- * - I2S (INMP441 mic, MAX98357 amp) connected directly to Pi GPIO
+ *     directly. Selected with `AUDIO_BACKEND=alsa`.
 * - USB audio devices
 * - Default system audio
 *
- * Audio format: PCM signed 16-bit little-endian, mono, 16kHz
+ *   - `Esp32AudioService` — mic and speaker live on the ESP32; audio
 *     flows over UART via `HardwareService`. Selected with
 *     `AUDIO_BACKEND=esp32` (the default in production).
 */
-export class AudioService extends EventEmitter {
+export abstract class AudioService extends EventEmitter {
  abstract get isCapturing(): boolean;
  abstract get isPlaying(): boolean;
  abstract startCapture(): void;
  abstract stopCapture(): void;
  abstract play(audioBuffer: Buffer): Promise<void>;
  abstract stopPlayback(): void;
  abstract destroy(): Promise<void>;
 }
 // ─────────────────────────────────────────────────────────────────
 // ALSA backend — kept for dev on laptops and for Pi setups where
 // the mic/speaker hang off ALSA directly (USB sound card, HAT…).
 // ─────────────────────────────────────────────────────────────────
 export class AlsaAudioService extends AudioService {
  private captureProcess: ChildProcess | null = null;
  private playProcess: ChildProcess | null = null;
  private readonly logger: Logger;
  private _isCapturing = false;
  private _isPlaying = false;
@ -32,7 +50,7 @@ export class AudioService extends EventEmitter {
  constructor(private readonly config: AudioConfig) {
    super();
-    this.logger = createLogger('audio', 'info');
+    this.logger = createLogger('audio:alsa', 'info');
  }
  get isCapturing(): boolean {
@ -43,10 +61,6 @@ export class AudioService extends EventEmitter {
    return this._isPlaying;
  }
  /**
   * Start capturing audio from the microphone.
   * Emits 'audio_chunk' events with raw PCM buffers.
   */
  startCapture(): void {
    if (this._isCapturing) {
      this.logger.warn('Already capturing audio');
@ -58,13 +72,6 @@ export class AudioService extends EventEmitter {
      'Starting audio capture',
    );
    // arecord outputs raw PCM to stdout
    // -D: ALSA device
    // -f: format (S16_LE = signed 16-bit little-endian)
    // -r: sample rate
    // -c: channels
    // -t: type (raw = no header)
    // --buffer-size: in frames, controls latency
    const bufferFrames = Math.floor(this.config.sampleRate * (this.config.chunkDurationMs / 1000));
    this.captureProcess = spawn('arecord', [
@ -112,9 +119,6 @@ export class AudioService extends EventEmitter {
    });
  }
  /**
   * Stop capturing audio from the microphone.
   */
  stopCapture(): void {
    if (!this.captureProcess) return;
@ -125,12 +129,6 @@ export class AudioService extends EventEmitter {
    this._isCapturing = false;
  }
  /**
   * Play audio through the speaker.
   * Accepts either raw PCM or WAV (with RIFF header) data.
   *
   * @returns Promise that resolves when playback is complete
   */
  async play(audioBuffer: Buffer): Promise<void> {
    if (this._isPlaying) {
      this.logger.warn('Already playing audio, queueing...');
@ -152,24 +150,26 @@ export class AudioService extends EventEmitter {
            '-',
          ];
-      const playProcess = spawn('aplay', args, {
+      this.playProcess = spawn('aplay', args, {
        stdio: ['pipe', 'ignore', 'pipe'],
      });
-      playProcess.stderr?.on('data', (data: Buffer) => {
+      this.playProcess.stderr?.on('data', (data: Buffer) => {
        const msg = data.toString().trim();
        if (msg && !msg.startsWith('Playing') && !msg.startsWith('Warning')) {
          this.logger.error({ msg }, 'aplay stderr');
        }
      });
-      playProcess.on('error', (err) => {
+      this.playProcess.on('error', (err) => {
        this._isPlaying = false;
        this.playProcess = null;
        reject(new Error(`Audio playback failed: ${err.message}`));
      });
-      playProcess.on('exit', (code) => {
+      this.playProcess.on('exit', (code) => {
        this._isPlaying = false;
        this.playProcess = null;
        if (code === 0 || code === null) {
          this.emit('playback_done');
          resolve();
@ -178,26 +178,194 @@ export class AudioService extends EventEmitter {
        }
      });
-      // Write audio data to aplay's stdin and close it
+      this.playProcess.stdin?.write(audioBuffer);
-      playProcess.stdin?.write(audioBuffer);
+      this.playProcess.stdin?.end();
      playProcess.stdin?.end();
    });
  }
  /**
   * Stop any currently playing audio.
   */
  stopPlayback(): void {
-    // aplay is spawned per-play, so we can't easily stop it here
+    if (this.playProcess) {
-    // For interrupt support, we'd track the play process
+      this.playProcess.kill('SIGTERM');
      this.playProcess = null;
    }
    this._isPlaying = false;
  }
  /**
   * Clean up resources.
   */
  async destroy(): Promise<void> {
    this.stopCapture();
    this.stopPlayback();
    this.removeAllListeners();
  }
 }
 // ─────────────────────────────────────────────────────────────────
 // ESP32 backend — the mic and speaker live on the firmware side and
 // audio flows over the UART link owned by HardwareService.
 // ─────────────────────────────────────────────────────────────────
 /**
 * Bytes-per-chunk written to the ESP32 per AUDIO_DOWN frame. Must
 * match `AUDIO_DOWN_CHUNK_BYTES` in HardwareService. 640 bytes =
 * 20 ms of 16 kHz S16 mono audio.
 */
 const ESP32_CHUNK_BYTES = 640;
 /** Milliseconds we wait between two AUDIO_DOWN frames during playback. */
 const ESP32_PACING_MS = 18;
 export class Esp32AudioService extends AudioService {
  private readonly logger: Logger;
  private _isCapturing = false;
  private _isPlaying = false;
  private _playbackAbort = false;
  /** Latched listener so we can detach on `stopCapture()`. */
  private readonly forwardAudioUp = (chunk: Buffer): void => {
    if (!this._isCapturing) return;
    this.emit('audio_chunk', chunk);
  };
  constructor(
    _config: AudioConfig,
    private readonly hardware: HardwareService,
  ) {
    super();
    void _config;
    this.logger = createLogger('audio:esp32', 'info');
  }
  get isCapturing(): boolean {
    return this._isCapturing;
  }
  get isPlaying(): boolean {
    return this._isPlaying;
  }
  startCapture(): void {
    if (this._isCapturing) {
      this.logger.warn('Already capturing audio');
      return;
    }
    this.logger.info('Subscribing to ESP32 AUDIO_UP stream');
    this._isCapturing = true;
    // Attach exactly once per capture session — removed in stopCapture.
    this.hardware.on('audio_up', this.forwardAudioUp);
  }
  stopCapture(): void {
    if (!this._isCapturing) return;
    this.logger.info('Unsubscribing from ESP32 AUDIO_UP stream');
    this._isCapturing = false;
    this.hardware.off('audio_up', this.forwardAudioUp);
  }
  /**
   * Play a PCM S16 mono 16 kHz buffer on the ESP32 speaker. If `buf`
   * carries a WAV header, strip it first (the firmware expects raw PCM).
   *
   * We pace the writes manually so the Node serial buffer and the
   * ESP32 speaker DMA stay roughly in sync. Without pacing, the whole
   * buffer would be pushed into the kernel at once and the robot would
   * still be "speaking" long after the orchestrator thinks it's done.
   */
  async play(audioBuffer: Buffer): Promise<void> {
    if (this._isPlaying) {
      this.logger.warn('Already playing audio — ignoring new buffer');
      return;
    }
    const pcm = stripWavHeader(audioBuffer);
    if (pcm.length === 0) {
      this.emit('playback_done');
      return;
    }
    this._isPlaying = true;
    this._playbackAbort = false;
    try {
      for (let offset = 0; offset < pcm.length; offset += ESP32_CHUNK_BYTES) {
        if (this._playbackAbort) break;
        const slice = pcm.subarray(offset, offset + ESP32_CHUNK_BYTES);
        this.hardware.sendAudioDown(slice);
        if (ESP32_PACING_MS > 0) {
          await delay(ESP32_PACING_MS);
        }
      }
      // Let the kernel TX buffer drain so we don't race on destroy.
      try {
        await this.hardware.drainAudioDown();
      } catch (err) {
        this.logger.warn({ err }, 'drain after playback failed');
      }
      this.emit('playback_done');
    } finally {
      this._isPlaying = false;
      this._playbackAbort = false;
    }
  }
  stopPlayback(): void {
    if (!this._isPlaying) return;
    this.logger.info('Aborting playback');
    this._playbackAbort = true;
  }
  async destroy(): Promise<void> {
    this.stopCapture();
    this.stopPlayback();
    this.removeAllListeners();
  }
 }
 // ─────────────────────────────────────────────────────────────────
 // Helpers
 // ─────────────────────────────────────────────────────────────────
 function delay(ms: number): Promise<void> {
  return new Promise((resolve) => setTimeout(resolve, ms));
 }
 /**
 * Strip the 44-byte RIFF/WAVE header if present. The ESP32 I2S driver
 * wants raw S16 mono PCM, nothing else.
 */
 function stripWavHeader(buf: Buffer): Buffer {
  if (buf.length > 44 && buf.toString('ascii', 0, 4) === 'RIFF' && buf.toString('ascii', 8, 12) === 'WAVE') {
    return buf.subarray(44);
  }
  return buf;
 }
 // ─────────────────────────────────────────────────────────────────
 // Factory
 // ─────────────────────────────────────────────────────────────────
 export type AudioBackend = 'alsa' | 'esp32';
 /**
 * Create the right AudioService for the current backend. The default
 * is `esp32`; set `AUDIO_BACKEND=alsa` to fall back to the legacy
 * arecord/aplay path (useful for laptop dev without an ESP32 wired in).
 */
 export function createAudioService(
  config: AudioConfig,
  hardware: HardwareService | null,
 ): AudioService {
  const backend = (config.backend ?? 'esp32') as AudioBackend;
  if (backend === 'alsa') {
    return new AlsaAudioService(config);
  }
  if (backend === 'esp32') {
    if (!hardware) {
      throw new Error(
        'AUDIO_BACKEND=esp32 requires a connected HardwareService — ' +
          'set HARDWARE_SERIAL_ENABLED=true and make sure the ESP32 is reachable, ' +
          'or switch to AUDIO_BACKEND=alsa for local development.',
      );
    }
    return new Esp32AudioService(config, hardware);
  }
  throw new Error(`Unknown AUDIO_BACKEND: ${backend}`);
 }
--- a/apps/robot-client/src/services/index.ts
+++ b/apps/robot-client/src/services/index.ts
@ -1,4 +1,10 @@
-export { AudioService } from './audio.service.js';
+export {
  AudioService,
  AlsaAudioService,
  Esp32AudioService,
  createAudioService,
  type AudioBackend,
 } from './audio.service.js';
 export { WakeWordService } from './wake-word.service.js';
 export { KeyboardTriggerService } from './keyboard-trigger.service.js';
 export { HealthService } from './health.service.js';
--- a/apps/robot-client/src/services/wake-word.service.ts
+++ b/apps/robot-client/src/services/wake-word.service.ts
@ -1,24 +1,35 @@
 import { ChildProcess, spawn } from 'node:child_process';
 import { EventEmitter } from 'node:events';
 import { type WakeWordConfig, type AudioConfig } from '../config/index.js';
 import { type HardwareService } from '../hardware/index.js';
 import { createLogger, type Logger } from '../utils/index.js';
 export interface WakeWordServiceEvents {
  /** Emitted when the wake word is detected */
  detected: () => void;
  /** Emitted when the engine is ready */
  ready: () => void;
  /** Emitted on errors */
  error: (error: Error) => void;
 }
 /**
 * Wake word detection service.
 *
- * Runs OpenWakeWord as a **long-lived** Python subprocess.
+ * Two operating modes, selected by whether a HardwareService is passed
- * The model is loaded once at startup; pause/resume is handled via
+ * to the constructor:
- * PAUSE/RESUME commands on stdin, so the audio device is released
+ *
- * while arecord is capturing, then reclaimed when listening resumes.
+ * 1. **ALSA mode** (no HardwareService)
 *    The Python subprocess opens PyAudio on `audioConfig.captureDevice`
 *    and reads the mic directly. Pause releases the ALSA device so
 *    arecord (the AlsaAudioService) can use it during conversation.
 *
 * 2. **ESP32 mode** (HardwareService provided)
 *    The Python subprocess reads raw S16 mono PCM from stdin. We
 *    subscribe to `hardware.on('audio_up')` and pipe every mic chunk
 *    coming off the UART straight into the Python process. Control
 *    commands (PAUSE/RESUME/RESET/QUIT) go over a separate pipe at
 *    fd 3 because stdin is busy carrying audio.
 *
 * The model is loaded once at startup; pause/resume is cheap and
 * does not reload it.
 */
 export class WakeWordService extends EventEmitter {
  private process: ChildProcess | null = null;
@ -26,51 +37,73 @@ export class WakeWordService extends EventEmitter {
  private _isListening = false;
  private _isPaused = false;
  private _streamClosed = false;
  private readonly usesHardware: boolean;
  /** Latched forwarder so we can detach it on stop / error. */
  private readonly forwardMicChunk = (chunk: Buffer): void => {
    if (!this.process || !this.process.stdin || this.process.stdin.destroyed) return;
    // Node gracefully buffers writes if the pipe is full; we don't
    // apply back-pressure here because dropping wake-word audio would
    // just hurt detection accuracy for a few tens of ms.
    this.process.stdin.write(chunk);
  };
  constructor(
    private readonly wakeWordConfig: WakeWordConfig,
    private readonly audioConfig: AudioConfig,
    private readonly hardware: HardwareService | null = null,
  ) {
    super();
    this.logger = createLogger('wake-word', 'info');
    this.usesHardware = hardware !== null;
  }
  get isListening(): boolean {
    return this._isListening && !this._isPaused;
  }
  /**
   * Start the wake word Python subprocess.
   * The model is loaded once; subsequent pause/resume cycles are fast.
   */
  start(): void {
    if (this.process) {
-      // Process already running — just resume if paused
+      if (this._isPaused) this.resume();
      if (this._isPaused) {
        this.resume();
      }
      return;
    }
    this.logger.info(
-      { model: this.wakeWordConfig.modelName, threshold: this.wakeWordConfig.threshold },
+      {
        mode: this.usesHardware ? 'esp32' : 'alsa',
        model: this.wakeWordConfig.modelName,
        threshold: this.wakeWordConfig.threshold,
      },
      'Starting wake word detection',
    );
-    this.process = spawn(this.wakeWordConfig.pythonPath, [
+    const args = [
      this.wakeWordConfig.scriptPath,
      '--model', this.wakeWordConfig.modelName,
      '--threshold', String(this.wakeWordConfig.threshold),
      '--device', this.audioConfig.captureDevice,
      '--sample-rate', String(this.audioConfig.sampleRate),
-    ], {
+    ];
-      stdio: ['pipe', 'pipe', 'pipe'],
+
-    });
+    if (this.usesHardware) {
      args.push('--input', 'stdin', '--control-fd', '3');
    } else {
      args.push('--input', 'alsa', '--device', this.audioConfig.captureDevice);
    }
    // stdio layout:
    //   0: stdin  — audio in (ESP32 mode) or control (ALSA mode)
    //   1: stdout — DETECTED events
    //   2: stderr — status & log lines
    //   3: extra  — control pipe (ESP32 mode only)
    const stdio: ('pipe' | 'ignore')[] = this.usesHardware
      ? ['pipe', 'pipe', 'pipe', 'pipe']
      : ['pipe', 'pipe', 'pipe'];
    this.process = spawn(this.wakeWordConfig.pythonPath, args, { stdio });
    this._isListening = true;
    this._isPaused = false;
    // ── stdout: DETECTED events ──
    this.process.stdout?.on('data', (data: Buffer) => {
      const lines = data.toString().trim().split('\n');
      for (const line of lines) {
@ -83,7 +116,6 @@ export class WakeWordService extends EventEmitter {
      }
    });
    // ── stderr: status messages ──
    this.process.stderr?.on('data', (data: Buffer) => {
      const lines = data.toString().trim().split('\n');
      for (const line of lines) {
@ -107,10 +139,9 @@ export class WakeWordService extends EventEmitter {
          this.logger.info('⏳ Loading wake word model...');
        } else if (msg.startsWith('Wake word model loaded')) {
          this.logger.info('✅ Wake word model loaded');
-        } else if (msg.startsWith('Matched device') || msg.startsWith('Using device')) {
+        } else if (msg.startsWith('Matched device') || msg.startsWith('Using device') || msg.startsWith('Listening')) {
          this.logger.info(`🔊 ${msg}`);
        } else {
          // Log unknown stderr messages at warn level to catch errors
          this.logger.warn({ msg }, 'Wake word stderr');
        }
      }
@ -119,29 +150,36 @@ export class WakeWordService extends EventEmitter {
    this.process.on('error', (err) => {
      this._isListening = false;
      this.logger.error({ err }, 'Wake word process error');
      this.detachHardware();
      this.emit('error', new Error(`Wake word process failed: ${err.message}`));
    });
    this.process.on('exit', (code) => {
      this._isListening = false;
      this._isPaused = false;
      this.detachHardware();
      this.process = null;
      if (code !== 0 && code !== null) {
        this.logger.warn({ code }, 'Wake word process exited unexpectedly');
        // Auto-restart after a short delay
        setTimeout(() => {
          this.logger.info('Restarting wake word detection...');
          this.start();
        }, 2000);
      }
    });
    // In ESP32 mode, start piping mic audio from the UART.
    if (this.usesHardware && this.hardware) {
      this.hardware.on('audio_up', this.forwardMicChunk);
    }
  }
  /**
   * Pause wake word detection.
-   * Sends PAUSE command to Python subprocess which closes the audio stream,
+   *
-   * freeing the device for arecord. Returns a promise that resolves when
+   * In ALSA mode we must wait for STREAM_CLOSED so arecord can reclaim
-   * the audio stream is confirmed closed.
+   * the device. In ESP32 mode the audio flow never stops — we just
   * tell the Python process to ignore detections.
   */
  pause(): Promise<void> {
    if (!this.process || this._isPaused) return Promise.resolve();
@ -149,9 +187,13 @@ export class WakeWordService extends EventEmitter {
    this._isPaused = true;
    this._streamClosed = false;
-    this.process.stdin?.write('PAUSE\n');
+    this.writeControl('PAUSE');
    if (this.usesHardware) {
      // No physical device to release — resolve immediately.
      return Promise.resolve();
    }
    // Wait for the stream to be closed (so arecord can use the device)
    return new Promise((resolve) => {
      const checkInterval = setInterval(() => {
        if (this._streamClosed || !this.process) {
@ -160,7 +202,6 @@ export class WakeWordService extends EventEmitter {
        }
      }, 50);
      // Safety timeout
      setTimeout(() => {
        clearInterval(checkInterval);
        resolve();
@ -168,25 +209,18 @@ export class WakeWordService extends EventEmitter {
    });
  }
  /**
   * Resume wake word detection after pause.
   * The Python subprocess reopens the audio stream (fast, no model reload).
   */
  resume(): void {
    if (!this.process || !this._isPaused) return;
    this._isPaused = false;
-    this.process.stdin?.write('RESUME\n');
+    this.writeControl('RESUME');
    this.logger.info('🎤 Resuming wake word listening...');
  }
  /**
   * Stop wake word detection permanently.
   */
  stop(): void {
    if (this.process) {
-      this.process.stdin?.write('QUIT\n');
+      this.writeControl('QUIT');
-      // Give it a moment to exit cleanly, then force kill
+      this.detachHardware();
      setTimeout(() => {
        if (this.process) {
          this.process.kill('SIGTERM');
@ -198,4 +232,35 @@ export class WakeWordService extends EventEmitter {
    this._isPaused = false;
    this.removeAllListeners();
  }
  // ──────────────────────────────────────────────────────────
  // Internals
  // ──────────────────────────────────────────────────────────
  /**
   * Write a text control command. In ALSA mode that goes to stdin;
   * in ESP32 mode stdin carries audio so commands travel over the
   * extra pipe at fd 3 (process.stdio[3]).
   */
  private writeControl(cmd: string): void {
    if (!this.process) return;
    const line = `${cmd}\n`;
    if (this.usesHardware) {
      // stdio[3] is our control pipe — a Node Writable (net.Socket) stream.
      const control = this.process.stdio[3] as unknown as
        | (NodeJS.WritableStream & { destroyed?: boolean })
        | null;
      if (control && !control.destroyed) {
        control.write(line);
      }
    } else {
      this.process.stdin?.write(line);
    }
  }
  private detachHardware(): void {
    if (this.usesHardware && this.hardware) {
      this.hardware.off('audio_up', this.forwardMicChunk);
    }
  }
 }
--- a/apps/robot-hardware/lib/Audio/library.json
+++ b/apps/robot-hardware/lib/Audio/library.json
@ -0,0 +1,7 @@
 {
  "name": "Audio",
  "version": "0.1.0",
  "description": "Ti-Pote audio I/O — INMP441 mic + MAX98357A speaker via two I2S peripherals.",
  "frameworks": "arduino",
  "platforms": "espressif32"
 }
--- a/apps/robot-hardware/lib/Audio/src/Audio.cpp
+++ b/apps/robot-hardware/lib/Audio/src/Audio.cpp
@ -0,0 +1,151 @@
 #include "Audio.h"
 #include <driver/i2s.h>
 namespace tipote {
 // ─────────────────────────────────────────────────────────────────
 // Shared I2S bus pin assignment — see the header for rationale.
 // ─────────────────────────────────────────────────────────────────
 static constexpr int PIN_BCLK     = 32;   // shared: mic SCK + speaker BCLK
 static constexpr int PIN_LRCLK    = 33;   // shared: mic WS  + speaker LRC
 static constexpr int PIN_MIC_DIN  = 34;   // INMP441 SD   → ESP32 data-in
 static constexpr int PIN_SPK_DOUT = 22;   // MAX98357A DIN ← ESP32 data-out
 // DMA buffers — 4 × 256 × 8 bytes (stereo 32-bit) ≈ 8 KB each for
 // RX and TX. That's ~64 ms of audio each way at 16 kHz, plenty of
 // room to absorb UART jitter.
 static constexpr int DMA_COUNT = 4;
 static constexpr int DMA_LEN   = 256;
 bool Audio::begin() {
    // ───── Single I2S port, full duplex, 32-bit stereo slots ─────
    //
    // The INMP441 requires 32-bit slots; the MAX98357A happily reads
    // the 32-bit frames we emit. With a shared bus we get one set of
    // BCLK/WS for both sides — exactly like the Pi setup that worked.
    i2s_config_t cfg = {};
    cfg.mode                 = static_cast<i2s_mode_t>(I2S_MODE_MASTER |
                                                       I2S_MODE_RX |
                                                       I2S_MODE_TX);
    cfg.sample_rate          = SAMPLE_RATE;
    cfg.bits_per_sample      = I2S_BITS_PER_SAMPLE_32BIT;
    cfg.channel_format       = I2S_CHANNEL_FMT_RIGHT_LEFT;  // stereo frames
    cfg.communication_format = I2S_COMM_FORMAT_STAND_I2S;
    cfg.intr_alloc_flags     = ESP_INTR_FLAG_LEVEL1;
    cfg.dma_buf_count        = DMA_COUNT;
    cfg.dma_buf_len          = DMA_LEN;
    cfg.use_apll             = false;
    cfg.tx_desc_auto_clear   = true;
    cfg.fixed_mclk           = 0;
    if (i2s_driver_install(I2S_NUM_0, &cfg, 0, nullptr) != ESP_OK) {
        return false;
    }
    i2s_pin_config_t pins = {};
    pins.bck_io_num   = PIN_BCLK;
    pins.ws_io_num    = PIN_LRCLK;
    pins.data_out_num = PIN_SPK_DOUT;
    pins.data_in_num  = PIN_MIC_DIN;
    if (i2s_set_pin(I2S_NUM_0, &pins) != ESP_OK) {
        i2s_driver_uninstall(I2S_NUM_0);
        return false;
    }
    i2s_zero_dma_buffer(I2S_NUM_0);
    micStarted_ = true;
    spkStarted_ = true;
    return true;
 }
 size_t Audio::readMicChunk(uint8_t* out, size_t outCapacity) {
    if (!micStarted_ || outCapacity < 2) return 0;
    // Stereo read: each "sample pair" is L + R, each 32-bit = 8 bytes.
    // Cap at 320 pairs = 20 ms @ 16 kHz mono per call.
    constexpr size_t MAX_PAIRS = 320;
    int32_t raw[MAX_PAIRS * 2];
    size_t wantPairs = outCapacity / 2;  // 2 bytes out per mono sample
    if (wantPairs > MAX_PAIRS) wantPairs = MAX_PAIRS;
    size_t bytesRead = 0;
    const esp_err_t err = i2s_read(
        I2S_NUM_0,
        reinterpret_cast<void*>(raw),
        wantPairs * 2 * sizeof(int32_t),
        &bytesRead,
        0  // non-blocking
    );
    if (err != ESP_OK || bytesRead == 0) return 0;
    const size_t pairs = bytesRead / (2 * sizeof(int32_t));
    int16_t* dst = reinterpret_cast<int16_t*>(out);
    int32_t lMin = INT32_MAX, lMax = INT32_MIN;
    int32_t rMin = INT32_MAX, rMax = INT32_MIN;
    int16_t s16Min = INT16_MAX, s16Max = INT16_MIN;
    const bool pickRight = (micChannel_ == MicChannel::Right);
    for (size_t i = 0; i < pairs; ++i) {
        const int32_t L = raw[2 * i];
        const int32_t R = raw[2 * i + 1];
        if (L < lMin) lMin = L;
        if (L > lMax) lMax = L;
        if (R < rMin) rMin = R;
        if (R > rMax) rMax = R;
        // INMP441 is 24-bit left-justified in a 32-bit slot, so the
        // useful range lives in bits 31..8. A >> 14 gives a comfortable
        // speech level; bump to >> 11 if the result is too quiet.
        const int32_t src = pickRight ? R : L;
        int32_t s = src >> 14;
        if (s >  INT16_MAX) s =  INT16_MAX;
        if (s <  INT16_MIN) s =  INT16_MIN;
        const int16_t s16 = static_cast<int16_t>(s);
        if (s16 < s16Min) s16Min = s16;
        if (s16 > s16Max) s16Max = s16;
        dst[i] = s16;
    }
    lastStats_ = {lMin, lMax, rMin, rMax, s16Min, s16Max, pairs};
    return pairs * 2;
 }
 size_t Audio::writeSpeakerChunk(const uint8_t* data, size_t len) {
    if (!spkStarted_ || len == 0) return 0;
    // The UART brings us S16 mono PCM. The I2S bus is running as
    // 32-bit stereo, so we expand each 16-bit sample to a stereo
    // pair of 32-bit words. 320 input samples → 2560 output bytes.
    constexpr size_t MAX_IN_SAMPLES = 320;
    const size_t inSamples = (len / 2 > MAX_IN_SAMPLES) ? MAX_IN_SAMPLES : len / 2;
    int32_t stereo[MAX_IN_SAMPLES * 2];
    const int16_t* src = reinterpret_cast<const int16_t*>(data);
    for (size_t i = 0; i < inSamples; ++i) {
        // Shift up to place the sample in the upper 16 bits of the
        // 32-bit slot (matches what the MAX98357A expects).
        const int32_t s32 = static_cast<int32_t>(src[i]) << 16;
        stereo[2 * i]     = s32;  // left
        stereo[2 * i + 1] = s32;  // right duplicated
    }
    size_t bytesWritten = 0;
    i2s_write(I2S_NUM_0, stereo, inSamples * 2 * sizeof(int32_t),
              &bytesWritten, pdMS_TO_TICKS(50));
    // Report bytes accepted in *caller units* (S16 mono) so the
    // outside world doesn't need to know about our internal format.
    const size_t pairsWritten = bytesWritten / (2 * sizeof(int32_t));
    return pairsWritten * 2;
 }
 void Audio::flushSpeaker() {
    if (spkStarted_) {
        i2s_zero_dma_buffer(I2S_NUM_0);
    }
 }
 }  // namespace tipote
--- a/apps/robot-hardware/lib/Audio/src/Audio.h
+++ b/apps/robot-hardware/lib/Audio/src/Audio.h
@ -0,0 +1,84 @@
 // Ti-Pote — Audio I/O via a single full-duplex I2S bus.
 //
 // I2S_NUM_0 is configured as MASTER in RX+TX mode. BCLK and WS are
 // shared between the INMP441 microphone (RX) and the MAX98357A
 // amplifier (TX), which is the standard I2S bus layout — exactly
 // what was working on the Raspberry Pi side.
 //
 // Pin map (single shared I2S bus):
 //   BCLK         = GPIO 32   shared mic SCK + speaker BCLK
 //   LRCLK / WS   = GPIO 33   shared mic WS  + speaker LRC
 //   Mic data in  = GPIO 34   INMP441 SD (input-only pin, perfect)
 //   Speaker DOUT = GPIO 22   MAX98357A DIN
 //
 // Mic L/R stays tied to GND → talks on the LEFT slot of the I2S frame.
 //
 // Format exchanged with the Pi on the UART:
 //   PCM signed 16-bit little-endian, mono, 16 kHz.
 //
 // Internally the bus runs at 32-bit stereo slots (INMP441 requires it).
 // readMicChunk() converts the 32-bit left slot down to S16 mono.
 // writeSpeakerChunk() expands S16 mono to 32-bit stereo frames before
 // handing them to i2s_write().
 #pragma once
 #include <Arduino.h>
 #include <stdint.h>
 #include <stddef.h>
 namespace tipote {
 class Audio {
 public:
    static constexpr int       SAMPLE_RATE      = 16000;
    static constexpr int       CHANNELS         = 1;
    static constexpr int       BYTES_PER_SAMPLE = 2;  // S16
    // Initialise both I2S ports. Safe to call exactly once from setup().
    bool begin();
    // Pull whatever the mic DMA has ready. Writes S16 mono little-endian
    // bytes into `out`, up to `outCapacity` bytes, and returns the number
    // of bytes actually written (always even, possibly zero).
    //
    // Non-blocking (timeout = 0).
    size_t readMicChunk(uint8_t* out, size_t outCapacity);
    // Push S16 mono little-endian PCM to the speaker DMA. Blocks up to
    // ~50 ms waiting for room. Returns bytes actually accepted.
    size_t writeSpeakerChunk(const uint8_t* data, size_t len);
    // Drop anything pending in the speaker DMA. Used on shutdown / reset.
    void flushSpeaker();
    // ─── Debug / bring-up ────────────────────────────────────────
    //
    // Stats updated on every readMicChunk() call, covering *this last
    // batch only*. Handy to confirm the mic is actually clocking data
    // into the ESP32 without blowing up the main audio path.
    struct MicStats {
        int32_t leftRawMin;     // raw int32 sample on left I2S slot
        int32_t leftRawMax;
        int32_t rightRawMin;    // raw int32 sample on right I2S slot
        int32_t rightRawMax;
        int16_t s16Min;         // post-shift S16 sample (output channel)
        int16_t s16Max;
        size_t  samples;        // sample pairs in the batch
    };
    const MicStats& lastMicStats() const { return lastStats_; }
    // Which I2S slot to route into the S16 output. Flip at runtime if
    // the mic's L/R pin doesn't land where we expect.
    enum class MicChannel { Left, Right };
    void setMicChannel(MicChannel ch) { micChannel_ = ch; }
    MicChannel micChannel() const { return micChannel_; }
 private:
    bool        micStarted_ = false;
    bool        spkStarted_ = false;
    MicChannel  micChannel_ = MicChannel::Left;
    MicStats    lastStats_  = {0, 0, 0, 0, 0, 0, 0};
 };
 }  // namespace tipote
--- a/apps/robot-hardware/platformio.ini
+++ b/apps/robot-hardware/platformio.ini
@ -30,6 +30,11 @@ build_flags =
    -DHW_SERIAL_BAUD=921600
    ; Idle timeout before the eyes fall back to the default animation (ms)
    -DHW_HEARTBEAT_TIMEOUT_MS=5000
    ; Hardware UART2 pins used to talk to the Raspberry Pi.
    ; The OLED eyes already claim GPIO 16/17 (UART2 default pins),
    ; so Serial2 is remapped to these two free pins instead.
    -DHW_UART_RX_PIN=27
    -DHW_UART_TX_PIN=13
 build_unflags =
    -std=gnu++11
--- a/apps/robot-hardware/scripts/esp-play.ts
+++ b/apps/robot-hardware/scripts/esp-play.ts
@ -0,0 +1,219 @@
 /**
 * Ti-Pote — Play a PCM/WAV file on the ESP32 speaker over USB.
 *
 * Usage:
 *   pnpm esp:play <file.wav|file.raw>
 *
 * Accepts either:
 *   - raw S16 LE mono 16 kHz PCM
 *   - WAV file with a 44-byte RIFF header (header is stripped)
 *
 * Default port: auto-detected, override with ESP_PORT=/dev/cu.usbserial-XXX
 */
 import { execFileSync } from 'node:child_process';
 import { existsSync, mkdtempSync, readFileSync, readdirSync, rmSync } from 'node:fs';
 import { tmpdir } from 'node:os';
 import { join, extname } from 'node:path';
 import { SerialPort } from 'serialport';
 const SAMPLE_RATE = 16000;
 function findDefaultPort(): string {
  const envPort = process.env.ESP_PORT;
  if (envPort) return envPort;
  const candidates = readdirSync('/dev').filter(
    (f) =>
      f.startsWith('cu.usbserial') ||
      f.startsWith('cu.SLAB_') ||
      f.startsWith('cu.wchusbserial'),
  );
  if (candidates.length === 0) {
    throw new Error(
      'No ESP32 serial port detected. Plug the board in, or set ESP_PORT=/dev/cu.usbserial-XXX',
    );
  }
  return `/dev/${candidates[0]}`;
 }
 function stripWav(buf: Buffer): Buffer {
  if (
    buf.length > 44 &&
    buf.toString('ascii', 0, 4) === 'RIFF' &&
    buf.toString('ascii', 8, 12) === 'WAVE'
  ) {
    return buf.subarray(44);
  }
  return buf;
 }
 /**
 * Convert any audio file macOS can decode (m4a, mp3, ogg, aiff, …) to
 * S16 LE mono 16 kHz WAV using the built-in `afconvert` tool. Returns
 * the path to a new .wav file in a temp dir which the caller is
 * responsible for cleaning up.
 */
 function convertToEsp32Wav(inputPath: string): { wavPath: string; cleanup: () => void } {
  const dir = mkdtempSync(join(tmpdir(), 'tipote-'));
  const wavPath = join(dir, 'converted.wav');
  console.log(`→ converting ${inputPath} → 16 kHz mono S16LE WAV`);
  try {
    execFileSync(
      'afconvert',
      [
        '-f', 'WAVE',
        '-d', 'LEI16@16000',
        '-c', '1',
        inputPath,
        wavPath,
      ],
      { stdio: 'inherit' },
    );
  } catch (err) {
    rmSync(dir, { recursive: true, force: true });
    throw new Error(`afconvert failed: ${(err as Error).message}`);
  }
  return {
    wavPath,
    cleanup: () => rmSync(dir, { recursive: true, force: true }),
  };
 }
 async function main(): Promise<void> {
  const inPath = process.argv[2];
  if (!inPath) {
    console.error('Usage: esp-play.ts <file>  (wav, raw, m4a, mp3, …)');
    process.exit(1);
  }
  if (!existsSync(inPath)) {
    throw new Error(`file not found: ${inPath}`);
  }
  // Convert anything that isn't already a .wav or raw PCM blob. This
  // covers m4a / mp3 / ogg / aiff / opus / flac via the built-in
  // macOS `afconvert` tool.
  const ext = extname(inPath).toLowerCase();
  const needsConversion = ext !== '.wav' && ext !== '.raw' && ext !== '.pcm';
  let cleanup: () => void = () => {};
  let loadPath = inPath;
  if (needsConversion) {
    const converted = convertToEsp32Wav(inPath);
    loadPath = converted.wavPath;
    cleanup = converted.cleanup;
  }
  const raw = readFileSync(loadPath);
  const pcm = stripWav(raw);
  const samples = pcm.length / 2;
  const durationMs = (samples / SAMPLE_RATE) * 1000;
  console.log(
    `→ loaded ${loadPath}: ${pcm.length} bytes (${samples} samples, ${durationMs.toFixed(0)} ms)`,
  );
  if (pcm.length === 0) {
    cleanup();
    throw new Error('empty PCM buffer');
  }
  if (pcm.length % 2 !== 0) {
    cleanup();
    throw new Error(
      'PCM size must be a multiple of 2 (S16 mono). The source file is probably not 16-bit or not mono. If you passed a raw file, convert it first.',
    );
  }
  const path = findDefaultPort();
  console.log(`→ opening ${path} @ 921600 baud`);
  const port = new SerialPort({ path, baudRate: 921600, autoOpen: false });
  await new Promise<void>((resolve, reject) => {
    port.open((err) => (err ? reject(err) : resolve()));
  });
  let ready = false;
  const readyWaiters: Array<() => void> = [];
  const finished = new Promise<void>((resolve, reject) => {
    const timeout = setTimeout(
      () => reject(new Error(`timeout waiting for OK after ${durationMs + 8000} ms`)),
      durationMs + 8000,
    );
    let lineBuf = '';
    port.on('data', (data: Buffer) => {
      lineBuf += data.toString('utf8');
      let idx: number;
      while ((idx = lineBuf.indexOf('\n')) >= 0) {
        const line = lineBuf.slice(0, idx).replace(/\r$/, '').trim();
        lineBuf = lineBuf.slice(idx + 1);
        if (!line) continue;
        if (line === 'OK') {
          clearTimeout(timeout);
          resolve();
          return;
        }
        if (line === 'READY') {
          ready = true;
          while (readyWaiters.length) readyWaiters.shift()!();
          continue;
        }
        if (line.startsWith('ERR ')) {
          clearTimeout(timeout);
          reject(new Error(`firmware error: ${line.slice(4)}`));
          return;
        }
        if (line.startsWith('LOG ')) console.log(`[esp] ${line.slice(4)}`);
        else console.log(`[esp] ${line}`);
      }
    });
    port.on('error', reject);
  });
  // Wait for READY so we don't send PLAY into the bootloader.
  await new Promise<void>((resolve, reject) => {
    if (ready) return resolve();
    const timer = setTimeout(
      () => reject(new Error('timeout waiting for READY from firmware')),
      5000,
    );
    readyWaiters.push(() => {
      clearTimeout(timer);
      resolve();
    });
  });
  await new Promise((r) => setTimeout(r, 50));
  console.log(`→ PLAY ${pcm.length} bytes`);
  port.write(`PLAY ${pcm.length}\n`);
  // Stream the payload paced EXACTLY at the I2S consumption rate so
  // the ESP32 RX buffer stays roughly constant in size regardless of
  // file length. I2S consumes 16 kHz × 2 bytes/sample = 32 KB/s of
  // S16 mono. A 1024-byte burst is 32 ms of audio → sleeping 32 ms
  // between bursts matches playback exactly.
  //
  // We still pad lightly above 32 KB/s (30 ms instead of 32) so the
  // DMA never runs dry. The excess fills the ~16 KB RX buffer on the
  // firmware slowly; even for a 10 s file we stay well under it.
  const CHUNK = 1024;
  const PAUSE_MS = 30;
  for (let off = 0; off < pcm.length; off += CHUNK) {
    const slice = pcm.subarray(off, off + CHUNK);
    await new Promise<void>((resolve, reject) => {
      port.write(slice, (err) => (err ? reject(err) : resolve()));
    });
    await new Promise<void>((resolve) => port.drain(() => resolve()));
    if (off + CHUNK < pcm.length) {
      await new Promise((r) => setTimeout(r, PAUSE_MS));
    }
  }
  await finished;
  await new Promise<void>((resolve) => port.close(() => resolve()));
  cleanup();
  console.log('✅ playback done');
 }
 main().catch((err) => {
  console.error(err);
  process.exit(1);
 });
--- a/apps/robot-hardware/scripts/esp-record.ts
+++ b/apps/robot-hardware/scripts/esp-record.ts
@ -0,0 +1,190 @@
 /**
 * Ti-Pote — Record audio from the ESP32 over USB.
 *
 * Usage:
 *   pnpm --filter @ti-pote/robot-client exec tsx \
 *     ../robot-hardware/scripts/esp-record.ts <file.wav> [duration_ms]
 *
 * Or with the shortcut from robot-hardware:
 *   pnpm esp:record out.wav 3000
 *
 * Defaults:
 *   duration_ms = 3000
 *   port        = auto-detected (first /dev/cu.usbserial-* or /dev/cu.SLAB_*)
 *                 can be overridden with ESP_PORT=/dev/cu.usbserial-XXX
 */
 import { readdirSync, writeFileSync } from 'node:fs';
 import { SerialPort } from 'serialport';
 const SAMPLE_RATE = 16000;
 const BYTES_PER_SAMPLE = 2;
 function findDefaultPort(): string {
  const envPort = process.env.ESP_PORT;
  if (envPort) return envPort;
  const candidates = readdirSync('/dev').filter(
    (f) =>
      f.startsWith('cu.usbserial') ||
      f.startsWith('cu.SLAB_') ||
      f.startsWith('cu.wchusbserial'),
  );
  if (candidates.length === 0) {
    throw new Error(
      'No ESP32 serial port detected. Plug the board in, or set ESP_PORT=/dev/cu.usbserial-XXX',
    );
  }
  return `/dev/${candidates[0]}`;
 }
 function wavHeader(pcmBytes: number, sampleRate: number): Buffer {
  const header = Buffer.alloc(44);
  header.write('RIFF', 0);
  header.writeUInt32LE(36 + pcmBytes, 4);
  header.write('WAVE', 8);
  header.write('fmt ', 12);
  header.writeUInt32LE(16, 16); // fmt chunk size
  header.writeUInt16LE(1, 20); // PCM
  header.writeUInt16LE(1, 22); // mono
  header.writeUInt32LE(sampleRate, 24);
  header.writeUInt32LE(sampleRate * 2, 28); // byte rate
  header.writeUInt16LE(2, 32); // block align
  header.writeUInt16LE(16, 34); // bits per sample
  header.write('data', 36);
  header.writeUInt32LE(pcmBytes, 40);
  return header;
 }
 async function main(): Promise<void> {
  const outPath = process.argv[2];
  const durationMs = parseInt(process.argv[3] ?? '3000', 10);
  if (!outPath) {
    console.error('Usage: esp-record.ts <file.wav> [duration_ms]');
    process.exit(1);
  }
  const path = findDefaultPort();
  console.log(`→ opening ${path} @ 921600 baud`);
  const port = new SerialPort({ path, baudRate: 921600, autoOpen: false });
  await new Promise<void>((resolve, reject) => {
    port.open((err) => (err ? reject(err) : resolve()));
  });
  // ── simple line-based state machine for stdout text ───────────
  let phase: 'idle' | 'streaming' = 'idle';
  let remaining = 0;
  const chunks: Buffer[] = [];
  let lineBuf = '';
  let ready = false;
  const readyWaiters: Array<() => void> = [];
  const finished = new Promise<Buffer>((resolve, reject) => {
    const timeout = setTimeout(
      () => reject(new Error(`timeout waiting for audio after ${durationMs + 5000} ms`)),
      durationMs + 5000,
    );
    port.on('data', (data: Buffer) => {
      let offset = 0;
      while (offset < data.length) {
        if (phase === 'streaming') {
          const take = Math.min(remaining, data.length - offset);
          chunks.push(data.subarray(offset, offset + take));
          offset += take;
          remaining -= take;
          if (remaining === 0) {
            phase = 'idle';
            lineBuf = '';
          }
          continue;
        }
        // text mode: accumulate until newline
        const nl = data.indexOf(0x0a, offset);
        if (nl === -1) {
          lineBuf += data.subarray(offset).toString('utf8');
          break;
        }
        lineBuf += data.subarray(offset, nl).toString('utf8');
        offset = nl + 1;
        const line = lineBuf.replace(/\r$/, '').trim();
        lineBuf = '';
        if (!line) continue;
        if (line.startsWith('BEGIN ')) {
          remaining = parseInt(line.slice(6), 10);
          phase = 'streaming';
          console.log(`→ BEGIN ${remaining} bytes`);
        } else if (line === 'END') {
          clearTimeout(timeout);
          const pcm = Buffer.concat(chunks);
          resolve(pcm);
        } else if (line === 'READY') {
          ready = true;
          while (readyWaiters.length) readyWaiters.shift()!();
        } else if (line.startsWith('LOG ')) {
          console.log(`[esp] ${line.slice(4)}`);
        } else if (line.startsWith('ERR ')) {
          clearTimeout(timeout);
          reject(new Error(`firmware error: ${line.slice(4)}`));
        } else {
          console.log(`[esp] ${line}`);
        }
      }
    });
    port.on('error', reject);
  });
  // The ESP32 resets on port open (DTR/RTS). Wait until it prints
  // READY so we don't send commands into the bootloader.
  await new Promise<void>((resolve, reject) => {
    if (ready) return resolve();
    const timer = setTimeout(
      () => reject(new Error('timeout waiting for READY from firmware')),
      5000,
    );
    readyWaiters.push(() => {
      clearTimeout(timer);
      resolve();
    });
  });
  await new Promise((r) => setTimeout(r, 50));
  console.log(`→ REC ${durationMs} ms — speak now!`);
  port.write(`REC ${durationMs}\n`);
  const pcm = await finished;
  await new Promise<void>((resolve) => port.close(() => resolve()));
  // Basic RMS sanity check.
  let sumSq = 0;
  const samples = pcm.length / BYTES_PER_SAMPLE;
  for (let i = 0; i < pcm.length - 1; i += 2) {
    const s = pcm.readInt16LE(i);
    sumSq += s * s;
  }
  const rms = Math.sqrt(sumSq / samples);
  console.log(
    `✅ captured ${pcm.length} bytes (${samples} samples, ${(
      (samples / SAMPLE_RATE) *
      1000
    ).toFixed(0)} ms)   RMS=${rms.toFixed(0)}`,
  );
  if (outPath.toLowerCase().endsWith('.wav')) {
    writeFileSync(outPath, Buffer.concat([wavHeader(pcm.length, SAMPLE_RATE), pcm]));
  } else {
    writeFileSync(outPath, pcm);
  }
  console.log(`→ wrote ${outPath}`);
 }
 main().catch((err) => {
  console.error(err);
  process.exit(1);
 });
--- a/apps/robot-hardware/src/main.cpp
+++ b/apps/robot-hardware/src/main.cpp
@ -1,147 +1,281 @@
-// Ti-Pote — Robot Hardware firmware (ESP32)
+// Ti-Pote — Minimal audio bring-up firmware (ESP32-WROOM-32)
 //
-// Responsibilities for v0:
+// GOAL: prove the I2S audio chain (INMP441 + MAX98357A) end to end
-//   - Listen on UART0 (the USB-connected serial port while the ESP32
+// with nothing else in the loop — no Pi, no OLED, no protocol frames.
-//     is plugged into Arthur's laptop; on the real robot this will
+// The ESP32 is plugged into a computer via USB and the host runs
-//     eventually be Serial2 wired to the Raspberry Pi).
+// two tiny scripts:
 //   - Decode incoming binary frames (see include/protocol_types.h).
 //   - Dispatch commands to the Eyes renderer.
 //   - Reply to PING with PONG.
 //   - Fall back to a sleepy animation if no heartbeat is received
 //     for HW_HEARTBEAT_TIMEOUT_MS (set in platformio.ini).
 //
-// Intentionally NOT yet implemented (Phase 2):
+//   scripts/esp-record.mjs <file.raw> <duration_ms>
-//   - I2S audio up/down streaming
+//   scripts/esp-play.mjs   <file.raw>
 //   - Servo / LED commands
 //
-// The hook points for those are marked with TODO(phase2).
+// Protocol over USB Serial (921600 baud, line-based for commands,
 // raw bytes for audio):
 //
 //   host → esp32
 //     "PING\n"              ping
 //     "REC <ms>\n"           start recording for <ms> milliseconds
 //     "PLAY <bytes>\n"       next <bytes> bytes on the wire are raw
 //                            S16 LE mono 16 kHz PCM, play them
 //
 //   esp32 → host
 //     "READY\n"              once at boot
 //     "PONG\n"               reply to PING
 //     "LOG <text>\n"         human-readable log line
 //     "ERR <text>\n"         error message
 //     "BEGIN <bytes>\n"      start of a REC response
 //     "<raw bytes>"          raw PCM (S16 LE mono 16 kHz)
 //     "END\n"                end of a REC response
 //     "OK\n"                 command completed
 //
 // Wiring (shared I2S bus on I2S_NUM_0):
 //   BCLK  = GPIO 32   (mic SCK + speaker BCLK)
 //   LRCLK = GPIO 33   (mic WS  + speaker LRC)
 //   MIC   = GPIO 34   (INMP441 SD → ESP32 data-in, input-only pin)
 //   SPK   = GPIO 22   (ESP32 data-out → MAX98357A DIN)
 #include <Arduino.h>
-#include "Protocol.h"
+#include <driver/i2s.h>
-#include "Eyes.h"
+#include <string.h>
-#ifndef HW_SERIAL_BAUD
+// ──────────────────────────────────────────────────────────
-#define HW_SERIAL_BAUD 921600
+// Audio config
-#endif
+// ──────────────────────────────────────────────────────────
-#ifndef HW_HEARTBEAT_TIMEOUT_MS
+static constexpr int SAMPLE_RATE      = 16000;
-#define HW_HEARTBEAT_TIMEOUT_MS 5000
+static constexpr int PIN_BCLK         = 32;
-#endif
+static constexpr int PIN_LRCLK        = 33;
 static constexpr int PIN_MIC_DIN      = 34;
 static constexpr int PIN_SPK_DOUT     = 22;
-// The communication stream. When the ESP32 is plugged into a
+static constexpr int DMA_COUNT        = 4;
-// computer, UART0 (Serial) is the USB-CDC port, which is exactly
+static constexpr int DMA_LEN          = 256;
 // what the robot-client will talk to during development. Later,
 // for the Pi wiring, change this to Serial2 and call
 // `Serial2.begin(HW_SERIAL_BAUD, SERIAL_8N1, RX_PIN, TX_PIN)`.
 #define HW_COMM Serial
-using namespace tipote;
+// Staging buffers — keep them outside of functions so we don't eat
 // stack on every tick.
 static constexpr size_t OUT_S16_SAMPLES = 320;  // 20 ms of S16 mono
 static int32_t g_rawStereo[OUT_S16_SAMPLES * 2];
 static int16_t g_micMono  [OUT_S16_SAMPLES];
 static int32_t g_spkStereo[OUT_S16_SAMPLES * 2];
 static uint8_t g_spkInBuf [OUT_S16_SAMPLES * 2];  // 640 bytes of S16 mono
-static Eyes         eyes;
+// ──────────────────────────────────────────────────────────
-static FrameDecoder decoder;
+// Line buffer for incoming text commands.
 // ──────────────────────────────────────────────────────────
-static uint32_t     lastHeartbeatMs = 0;
+static char     g_line[64];
-static bool         idleMode        = false;
+static size_t   g_lineLen = 0;
-// Forward decl
+static void sendLog(const char* msg) {
-static void handleFrame(const Frame& frame, void* userData);
+    Serial.print("LOG ");
-static void logLine(const char* line);
+    Serial.println(msg);
 }
 static void sendErr(const char* msg) {
    Serial.print("ERR ");
    Serial.println(msg);
 }
 // ──────────────────────────────────────────────────────────
 // I2S init — single port, full duplex, shared BCLK/WS.
 // ──────────────────────────────────────────────────────────
 static bool audioBegin() {
    i2s_config_t cfg = {};
    cfg.mode                 = static_cast<i2s_mode_t>(I2S_MODE_MASTER |
                                                       I2S_MODE_RX |
                                                       I2S_MODE_TX);
    cfg.sample_rate          = SAMPLE_RATE;
    cfg.bits_per_sample      = I2S_BITS_PER_SAMPLE_32BIT;
    cfg.channel_format       = I2S_CHANNEL_FMT_RIGHT_LEFT;
    cfg.communication_format = I2S_COMM_FORMAT_STAND_I2S;
    cfg.intr_alloc_flags     = ESP_INTR_FLAG_LEVEL1;
    cfg.dma_buf_count        = DMA_COUNT;
    cfg.dma_buf_len          = DMA_LEN;
    cfg.use_apll             = false;
    cfg.tx_desc_auto_clear   = true;
    cfg.fixed_mclk           = 0;
    if (i2s_driver_install(I2S_NUM_0, &cfg, 0, nullptr) != ESP_OK) return false;
    i2s_pin_config_t pins = {};
    pins.bck_io_num   = PIN_BCLK;
    pins.ws_io_num    = PIN_LRCLK;
    pins.data_out_num = PIN_SPK_DOUT;
    pins.data_in_num  = PIN_MIC_DIN;
    if (i2s_set_pin(I2S_NUM_0, &pins) != ESP_OK) {
        i2s_driver_uninstall(I2S_NUM_0);
        return false;
    }
    i2s_zero_dma_buffer(I2S_NUM_0);
    return true;
 }
 // Convert one batch of stereo 32-bit mic samples to S16 mono by
 // taking the left slot and shifting the 24-bit-aligned data down.
 // Returns the number of S16 samples written into `out`.
 static size_t micReadMono(int16_t* out, size_t maxSamples) {
    size_t wantPairs = maxSamples;
    if (wantPairs > OUT_S16_SAMPLES) wantPairs = OUT_S16_SAMPLES;
    size_t bytesRead = 0;
    const esp_err_t err = i2s_read(
        I2S_NUM_0,
        g_rawStereo,
        wantPairs * 2 * sizeof(int32_t),
        &bytesRead,
        portMAX_DELAY  // block — we're in a dedicated REC loop
    );
    if (err != ESP_OK || bytesRead == 0) return 0;
    const size_t pairs = bytesRead / (2 * sizeof(int32_t));
    for (size_t i = 0; i < pairs; ++i) {
        int32_t L = g_rawStereo[2 * i];
        int32_t s = L >> 14;
        if (s >  INT16_MAX) s =  INT16_MAX;
        if (s <  INT16_MIN) s =  INT16_MIN;
        out[i] = static_cast<int16_t>(s);
    }
    return pairs;
 }
 // Write one batch of S16 mono PCM to the speaker by duplicating each
 // sample into both stereo slots and shifting into the high half of
 // the 32-bit word (what the MAX98357A expects on a shared bus).
 static void spkWriteMono(const int16_t* samples, size_t count) {
    if (count == 0) return;
    if (count > OUT_S16_SAMPLES) count = OUT_S16_SAMPLES;
    for (size_t i = 0; i < count; ++i) {
        const int32_t s32 = static_cast<int32_t>(samples[i]) << 16;
        g_spkStereo[2 * i]     = s32;
        g_spkStereo[2 * i + 1] = s32;
    }
    size_t bytesWritten = 0;
    i2s_write(I2S_NUM_0, g_spkStereo, count * 2 * sizeof(int32_t),
              &bytesWritten, portMAX_DELAY);
 }
 // ──────────────────────────────────────────────────────────
 // Command handlers
 // ──────────────────────────────────────────────────────────
 static void handleRec(uint32_t durationMs) {
    const uint32_t totalSamples = (SAMPLE_RATE * durationMs) / 1000;
    const uint32_t totalBytes   = totalSamples * sizeof(int16_t);
    Serial.print("BEGIN ");
    Serial.println(totalBytes);
    // Flush whatever old noise is in the mic DMA first.
    i2s_zero_dma_buffer(I2S_NUM_0);
    uint32_t sent = 0;
    while (sent < totalSamples) {
        size_t want = totalSamples - sent;
        if (want > OUT_S16_SAMPLES) want = OUT_S16_SAMPLES;
        const size_t got = micReadMono(g_micMono, want);
        if (got == 0) continue;
        Serial.write(reinterpret_cast<const uint8_t*>(g_micMono),
                     got * sizeof(int16_t));
        sent += got;
    }
    Serial.println();
    Serial.println("END");
 }
 static void handlePlay(uint32_t totalBytes) {
    // Drain any pending crap from the speaker DMA so we don't start
    // with a pop.
    i2s_zero_dma_buffer(I2S_NUM_0);
    // Give Serial.readBytes a generous timeout so a jittery host
    // doesn't abort us mid-playback.
    Serial.setTimeout(2000);
    uint32_t remaining = totalBytes;
    while (remaining > 0) {
        size_t want = remaining;
        if (want > sizeof(g_spkInBuf)) want = sizeof(g_spkInBuf);
        // Force an even count so we always have complete S16 samples.
        if (want & 1) want -= 1;
        if (want == 0) want = 2;
        const size_t got = Serial.readBytes(g_spkInBuf, want);
        if (got == 0) {
            sendErr("PLAY read timeout");
            return;
        }
        const size_t samples = got / sizeof(int16_t);
        spkWriteMono(reinterpret_cast<const int16_t*>(g_spkInBuf), samples);
        remaining -= got;
    }
    // Let the last frames actually reach the speaker, then clear.
    delay(50);
    i2s_zero_dma_buffer(I2S_NUM_0);
    Serial.println("OK");
 }
 static void handleLine(const char* line) {
    if (strcmp(line, "PING") == 0) {
        Serial.println("PONG");
        return;
    }
    if (strncmp(line, "REC ", 4) == 0) {
        const long ms = atol(line + 4);
        if (ms <= 0 || ms > 60000) { sendErr("REC bad duration"); return; }
        handleRec(static_cast<uint32_t>(ms));
        return;
    }
    if (strncmp(line, "PLAY ", 5) == 0) {
        const long bytes = atol(line + 5);
        if (bytes <= 0 || bytes > 16 * 1024 * 1024) {
            sendErr("PLAY bad size");
            return;
        }
        handlePlay(static_cast<uint32_t>(bytes));
        return;
    }
    sendErr("unknown command");
 }
 // ──────────────────────────────────────────────────────────
 // Arduino entry points
 // ──────────────────────────────────────────────────────────
 void setup() {
-    HW_COMM.begin(HW_SERIAL_BAUD);
+    // Bump the UART RX buffer WAY above the 256-byte default so we
-    // Give the host a beat to open the port after auto-reset.
+    // can absorb a full PLAY payload (up to a few tens of KB) without
    // losing bytes if the host floods us.
    Serial.setRxBufferSize(16 * 1024);
    Serial.begin(921600);
    delay(50);
-    eyes.begin();
+    if (!audioBegin()) {
        sendErr("I2S init failed");
    } else {
        sendLog("I2S ready");
    }
-    decoder.onFrame(handleFrame);
+    Serial.println("READY");
    lastHeartbeatMs = millis();
    logLine("robot-hardware ready");
 }
 void loop() {
-    // Drain whatever the host has sent since the last tick.
+    while (Serial.available() > 0) {
-    while (HW_COMM.available() > 0) {
+        const int c = Serial.read();
-        int b = HW_COMM.read();
+        if (c < 0) break;
-        if (b < 0) break;
+        if (c == '\r') continue;
-        decoder.feed(static_cast<uint8_t>(b));
+        if (c == '\n') {
-    }
+            g_line[g_lineLen] = 0;
-
+            if (g_lineLen > 0) handleLine(g_line);
-    // Heartbeat watchdog: if we haven't heard from the host in a
+            g_lineLen = 0;
-    // while, slip into a sleepy animation so the robot doesn't
+            continue;
-    // look frozen. Any incoming frame resets this.
+        }
-    const uint32_t now = millis();
+        if (g_lineLen < sizeof(g_line) - 1) {
-    if (!idleMode && (now - lastHeartbeatMs) > HW_HEARTBEAT_TIMEOUT_MS) {
+            g_line[g_lineLen++] = static_cast<char>(c);
-        idleMode = true;
+        } else {
-        eyes.show(Emotion::SLEEPY);
+            g_lineLen = 0;
            sendErr("line overflow");
        }
    }
 }
 // ---------------------------------------------------------------
 // Frame dispatcher
 // ---------------------------------------------------------------
 static void handleFrame(const Frame& frame, void* /*userData*/) {
    lastHeartbeatMs = millis();
    if (idleMode) {
        idleMode = false;
    }
    switch (frame.type) {
        case MsgType::DISPLAY_EMOTION: {
            if (frame.length < 1) {
                logLine("DISPLAY_EMOTION: empty payload");
                return;
            }
            const uint8_t code = frame.payload[0];
            if (code >= static_cast<uint8_t>(Emotion::COUNT)) {
                logLine("DISPLAY_EMOTION: out-of-range code");
                return;
            }
            eyes.show(static_cast<Emotion>(code));
            // ACK back so the host knows it was applied.
            uint8_t ackPayload[1] = {code};
            FrameEncoder::writeTo(HW_COMM, MsgType::ACK, ackPayload, 1);
            return;
        }
        case MsgType::DISPLAY_CLEAR: {
            eyes.clear();
            FrameEncoder::writeTo(HW_COMM, MsgType::ACK);
            return;
        }
        case MsgType::PING: {
            // Echo the payload back as PONG. Useful for latency
            // measurements and proving the link is symmetric.
            FrameEncoder::writeTo(HW_COMM, MsgType::PONG,
                                  frame.payload, frame.length);
            return;
        }
        case MsgType::STATUS: {
            // Heartbeat from host — lastHeartbeatMs was already
            // bumped above. Nothing else to do for v0.
            return;
        }
        // TODO(phase2): AUDIO_UP / AUDIO_DOWN / SERVO_CMD / LED_CMD
        default:
            logLine("unknown frame type");
            return;
    }
 }
 // ---------------------------------------------------------------
 // Diagnostic logging — wraps text in a LOG frame so the host
 // can parse it without getting confused by free text on the wire.
 // ---------------------------------------------------------------
 static void logLine(const char* line) {
    const size_t len = strnlen(line, MAX_PAYLOAD_SIZE);
    FrameEncoder::writeTo(HW_COMM, MsgType::LOG,
                          reinterpret_cast<const uint8_t*>(line),
                          static_cast<uint16_t>(len));
 }