ca marche fluide !

2026-03-27 15:50:41 +01:00 · 2026-03-27 15:50:41 +01:00 · 787a5805b7
commit 787a5805b7
parent 4baabf3727
7 changed files with 113 additions and 31 deletions
--- a/apps/backend/src/adapters/inbound/websocket/robot.gateway.ts
+++ b/apps/backend/src/adapters/inbound/websocket/robot.gateway.ts
@ -130,6 +130,10 @@ export class RobotGateway implements OnGatewayConnection, OnGatewayDisconnect, I
    this.connectedDevices.get(deviceId)?.emit('audio_chunk', { data: base64 });
  }

+  sendResponseText(deviceId: string, text: string, audioBase64?: string) {
+    this.connectedDevices.get(deviceId)?.emit('response_text', { text, audio: audioBase64 });
+  }
+
  sendStatus(deviceId: string, state: RobotState) {
    this.connectedDevices.get(deviceId)?.emit('status', { state });
  }
--- a/apps/backend/src/core/ports/outbound/device-gateway.port.ts
+++ b/apps/backend/src/core/ports/outbound/device-gateway.port.ts
@ -1,5 +1,6 @@
 export interface IDeviceGatewayPort {
  sendAudioChunk(deviceId: string, chunk: Buffer): void;
+  sendResponseText(deviceId: string, text: string, audioBase64?: string): void;
  sendStatus(deviceId: string, state: 'listening' | 'thinking' | 'speaking' | 'idle'): void;
  sendNotification(deviceId: string, payload: Record<string, unknown>): void;
  isDeviceConnected(deviceId: string): boolean;
--- a/apps/backend/src/core/services/conversation.service.ts
+++ b/apps/backend/src/core/services/conversation.service.ts
@ -109,12 +109,14 @@ export class ConversationService implements IConversationPort {
      if (responseText) {
        this.deviceGateway.sendStatus(deviceId, 'speaking');

-        const audioBuffer = await this.ttsPort.synthesize(responseText);
-        this.logger.debug(`TTS complete: ${audioBuffer.length} bytes`);
-        this.deviceGateway.sendAudioChunk(deviceId, audioBuffer);
-      }
+        const pcm = await this.ttsPort.synthesize(responseText);
+        const wav = this.pcmToWav(pcm, 16000);
+        const audioBase64 = wav.toString('base64');

-      this.deviceGateway.sendStatus(deviceId, 'idle');
+        this.deviceGateway.sendResponseText(deviceId, responseText, audioBase64);
+      } else {
+        this.deviceGateway.sendStatus(deviceId, 'idle');
+      }
    } catch (error) {
      this.logger.error(`Error processing conversation for ${deviceId}:`, error);
      this.deviceGateway.sendStatus(deviceId, 'idle');
@ -123,6 +125,32 @@ export class ConversationService implements IConversationPort {
    return finalText;
  }

+  private pcmToWav(pcm: Buffer, sampleRate: number): Buffer {
+    const numChannels = 1;
+    const bitsPerSample = 16;
+    const byteRate = sampleRate * numChannels * (bitsPerSample / 8);
+    const blockAlign = numChannels * (bitsPerSample / 8);
+    const dataSize = pcm.length;
+    const headerSize = 44;
+
+    const header = Buffer.alloc(headerSize);
+    header.write('RIFF', 0);
+    header.writeUInt32LE(dataSize + headerSize - 8, 4);
+    header.write('WAVE', 8);
+    header.write('fmt ', 12);
+    header.writeUInt32LE(16, 16); // subchunk1 size
+    header.writeUInt16LE(1, 20); // PCM format
+    header.writeUInt16LE(numChannels, 22);
+    header.writeUInt32LE(sampleRate, 24);
+    header.writeUInt32LE(byteRate, 28);
+    header.writeUInt16LE(blockAlign, 32);
+    header.writeUInt16LE(bitsPerSample, 34);
+    header.write('data', 36);
+    header.writeUInt32LE(dataSize, 40);
+
+    return Buffer.concat([header, pcm]);
+  }
+
  interrupt(deviceId: string): void {
    const session = this.activeSessions.get(deviceId);
    if (!session) return;
--- a/apps/simulator/src/App.tsx
+++ b/apps/simulator/src/App.tsx
@ -37,7 +37,14 @@ function App() {
    emit('speech_end');
  }, [emit]);

-  const { recording, start: startMic, stop: stopMic } = useMicrophone({ onAudioChunk, onSpeechEnd });
+  const { recording, start: startMic, stop: stopMic, silentStop } = useMicrophone({ onAudioChunk, onSpeechEnd });
+
+  // Stop mic when Ti-Pote starts thinking/speaking
+  useEffect(() => {
+    if (recording && (state === 'thinking' || state === 'speaking')) {
+      silentStop();
+    }
+  }, [state, recording, silentStop]);

  // Auto-restart listening when Ti-Pote finishes speaking
  useEffect(() => {
@ -45,8 +52,11 @@ function App() {
    prevStateRef.current = state;

    if (conversationActive && state === 'idle' && (prevState === 'speaking' || prevState === 'thinking')) {
-      emit('wake_word_detected');
-      startMic();
+      const timer = setTimeout(() => {
+        emit('wake_word_detected');
+        startMic();
+      }, 500);
+      return () => clearTimeout(timer);
    }
  }, [state, conversationActive, emit, startMic]);

--- a/apps/simulator/src/hooks/useAudioPlayer.ts
+++ b/apps/simulator/src/hooks/useAudioPlayer.ts
@ -4,10 +4,12 @@ const SAMPLE_RATE = 16000;

 export function useAudioPlayer() {
  const contextRef = useRef<AudioContext | null>(null);
+  const nextStartTimeRef = useRef(0);

  const getContext = useCallback(() => {
    if (!contextRef.current || contextRef.current.state === 'closed') {
      contextRef.current = new AudioContext({ sampleRate: SAMPLE_RATE });
+      nextStartTimeRef.current = 0;
    }
    if (contextRef.current.state === 'suspended') {
      contextRef.current.resume();
@ -28,6 +30,8 @@ export function useAudioPlayer() {

      // Ensure even byte count for Int16
      const evenLength = bytes.length - (bytes.length % 2);
+      if (evenLength < 2) return;
+
      const int16 = new Int16Array(bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + evenLength));

      // Int16 PCM → Float32
@ -42,17 +46,26 @@ export function useAudioPlayer() {
      const source = ctx.createBufferSource();
      source.buffer = buffer;
      source.connect(ctx.destination);
-      source.start();
+
+      // Schedule seamlessly after the previous chunk
+      const now = ctx.currentTime;
+      const startTime = Math.max(now, nextStartTimeRef.current);
+      source.start(startTime);
+      nextStartTimeRef.current = startTime + buffer.duration;
    },
    [getContext],
  );

-  const flush = useCallback(() => {}, []);
+  const flush = useCallback(() => {
+    // Reset scheduling for next conversation turn
+    nextStartTimeRef.current = 0;
+  }, []);

  const stop = useCallback(() => {
    if (contextRef.current) {
      contextRef.current.close();
      contextRef.current = null;
+      nextStartTimeRef.current = 0;
    }
  }, []);

--- a/apps/simulator/src/hooks/useMicrophone.ts
+++ b/apps/simulator/src/hooks/useMicrophone.ts
@ -3,9 +3,7 @@ import { useRef, useState, useCallback } from 'react';
 interface UseMicrophoneOptions {
  onAudioChunk: (chunk: ArrayBuffer, sampleRate: number) => void;
  onSpeechEnd: () => void;
-  /** Silence duration in ms before triggering speech end (default: 1500) */
  silenceTimeout?: number;
-  /** RMS threshold below which audio is considered silence (default: 0.01) */
  silenceThreshold?: number;
 }

@ -39,7 +37,7 @@ export function useMicrophone({
    contextRef.current = null;
    streamRef.current = null;
    hasSpeechRef.current = false;
-    stoppedRef.current = false;
+    stoppedRef.current = true;
    setRecording(false);
  }, [clearSilenceTimer]);

@ -65,14 +63,12 @@ export function useMicrophone({

        const float32 = e.inputBuffer.getChannelData(0);

-        // Calculate RMS volume
        let sum = 0;
        for (let i = 0; i < float32.length; i++) {
          sum += float32[i] * float32[i];
        }
        const rms = Math.sqrt(sum / float32.length);

-        // Convert and send audio
        const int16 = new Int16Array(float32.length);
        for (let i = 0; i < float32.length; i++) {
          const s = Math.max(-1, Math.min(1, float32[i]));
@ -80,12 +76,10 @@ export function useMicrophone({
        }
        onAudioChunk(int16.buffer, context.sampleRate);

-        // VAD logic
        if (rms > silenceThreshold) {
          hasSpeechRef.current = true;
          clearSilenceTimer();
        } else if (hasSpeechRef.current && !silenceTimerRef.current) {
-          // Speech detected before, now silence — start countdown
          silenceTimerRef.current = setTimeout(() => {
            if (stoppedRef.current) return;
            stoppedRef.current = true;
@ -103,12 +97,18 @@ export function useMicrophone({
    }
  }, [onAudioChunk, onSpeechEnd, silenceTimeout, silenceThreshold, clearSilenceTimer, cleanup]);

+  // Stop and emit speech_end
  const stop = useCallback(() => {
    if (stoppedRef.current) return;
-    stoppedRef.current = true;
    cleanup();
    onSpeechEnd();
  }, [onSpeechEnd, cleanup]);

-  return { recording, start, stop };
+  // Stop silently — no speech_end emitted
+  const silentStop = useCallback(() => {
+    if (stoppedRef.current) return;
+    cleanup();
+  }, [cleanup]);
+
+  return { recording, start, stop, silentStop };
 }
--- a/apps/simulator/src/hooks/useSocket.ts
+++ b/apps/simulator/src/hooks/useSocket.ts
@ -1,6 +1,5 @@
 import { useRef, useState, useCallback } from 'react';
 import { io, Socket } from 'socket.io-client';
-import { useAudioPlayer } from './useAudioPlayer';

 export type RobotState = 'disconnected' | 'idle' | 'listening' | 'thinking' | 'speaking';

@ -16,12 +15,35 @@ export function useSocket() {
  const [state, setState] = useState<RobotState>('disconnected');
  const [connected, setConnected] = useState(false);
  const [logs, setLogs] = useState<LogEntry[]>([]);
-  const audioPlayer = useAudioPlayer();

  const addLog = useCallback((direction: LogEntry['direction'], event: string, data?: string) => {
    setLogs((prev) => [...prev.slice(-200), { timestamp: new Date(), direction, event, data }]);
  }, []);

+  const audioRef = useRef<HTMLAudioElement | null>(null);
+
+  const playAudio = useCallback((audioBase64: string) => {
+    if (audioRef.current) {
+      audioRef.current.pause();
+      audioRef.current = null;
+    }
+
+    const audio = new Audio(`data:audio/wav;base64,${audioBase64}`);
+    audioRef.current = audio;
+
+    audio.onended = () => {
+      audioRef.current = null;
+      setState('idle');
+    };
+
+    audio.onerror = () => {
+      audioRef.current = null;
+      setState('idle');
+    };
+
+    audio.play();
+  }, []);
+
  const connect = useCallback(
    (serverUrl: string, deviceToken: string) => {
      if (socketRef.current) {
@ -52,31 +74,35 @@ export function useSocket() {
      socket.on('status', (payload: { state: RobotState }) => {
        setState(payload.state);
        addLog('in', 'status', payload.state);
-        if (payload.state === 'idle') {
-          audioPlayer.flush();
+      });
+
+      socket.on('response_text', (payload: { text: string; audio?: string }) => {
+        addLog('in', 'response_text', payload.text);
+        if (payload.audio) {
+          playAudio(payload.audio);
+        } else {
+          setState('idle');
        }
      });

      socket.on('audio_chunk', (payload: { data: string }) => {
-        addLog('in', 'audio_chunk', `${payload.data?.length ?? 0} chars (base64)`);
-        if (payload.data) {
-          audioPlayer.playChunk(payload.data);
-        }
+        addLog('in', 'audio_chunk', `${payload.data?.length ?? 0} chars`);
      });

      socket.on('notification', (payload: Record<string, unknown>) => {
        addLog('in', 'notification', JSON.stringify(payload));
      });

-      socket.on('response_start', () => addLog('in', 'response_start'));
-      socket.on('response_end', () => addLog('in', 'response_end'));
-
      socketRef.current = socket;
    },
-    [addLog],
+    [addLog, playAudio],
  );

  const disconnect = useCallback(() => {
+    if (audioRef.current) {
+      audioRef.current.pause();
+      audioRef.current = null;
+    }
    socketRef.current?.disconnect();
    socketRef.current = null;
    setConnected(false);