ca marche fluide !

This commit is contained in:
ordinarthur 2026-03-27 15:50:41 +01:00
parent 4baabf3727
commit 787a5805b7
7 changed files with 113 additions and 31 deletions

View File

@ -130,6 +130,10 @@ export class RobotGateway implements OnGatewayConnection, OnGatewayDisconnect, I
this.connectedDevices.get(deviceId)?.emit('audio_chunk', { data: base64 });
}
sendResponseText(deviceId: string, text: string, audioBase64?: string) {
this.connectedDevices.get(deviceId)?.emit('response_text', { text, audio: audioBase64 });
}
sendStatus(deviceId: string, state: RobotState) {
this.connectedDevices.get(deviceId)?.emit('status', { state });
}

View File

@ -1,5 +1,6 @@
export interface IDeviceGatewayPort {
sendAudioChunk(deviceId: string, chunk: Buffer): void;
sendResponseText(deviceId: string, text: string, audioBase64?: string): void;
sendStatus(deviceId: string, state: 'listening' | 'thinking' | 'speaking' | 'idle'): void;
sendNotification(deviceId: string, payload: Record<string, unknown>): void;
isDeviceConnected(deviceId: string): boolean;

View File

@ -109,12 +109,14 @@ export class ConversationService implements IConversationPort {
if (responseText) {
this.deviceGateway.sendStatus(deviceId, 'speaking');
const audioBuffer = await this.ttsPort.synthesize(responseText);
this.logger.debug(`TTS complete: ${audioBuffer.length} bytes`);
this.deviceGateway.sendAudioChunk(deviceId, audioBuffer);
}
const pcm = await this.ttsPort.synthesize(responseText);
const wav = this.pcmToWav(pcm, 16000);
const audioBase64 = wav.toString('base64');
this.deviceGateway.sendStatus(deviceId, 'idle');
this.deviceGateway.sendResponseText(deviceId, responseText, audioBase64);
} else {
this.deviceGateway.sendStatus(deviceId, 'idle');
}
} catch (error) {
this.logger.error(`Error processing conversation for ${deviceId}:`, error);
this.deviceGateway.sendStatus(deviceId, 'idle');
@ -123,6 +125,32 @@ export class ConversationService implements IConversationPort {
return finalText;
}
private pcmToWav(pcm: Buffer, sampleRate: number): Buffer {
const numChannels = 1;
const bitsPerSample = 16;
const byteRate = sampleRate * numChannels * (bitsPerSample / 8);
const blockAlign = numChannels * (bitsPerSample / 8);
const dataSize = pcm.length;
const headerSize = 44;
const header = Buffer.alloc(headerSize);
header.write('RIFF', 0);
header.writeUInt32LE(dataSize + headerSize - 8, 4);
header.write('WAVE', 8);
header.write('fmt ', 12);
header.writeUInt32LE(16, 16); // subchunk1 size
header.writeUInt16LE(1, 20); // PCM format
header.writeUInt16LE(numChannels, 22);
header.writeUInt32LE(sampleRate, 24);
header.writeUInt32LE(byteRate, 28);
header.writeUInt16LE(blockAlign, 32);
header.writeUInt16LE(bitsPerSample, 34);
header.write('data', 36);
header.writeUInt32LE(dataSize, 40);
return Buffer.concat([header, pcm]);
}
interrupt(deviceId: string): void {
const session = this.activeSessions.get(deviceId);
if (!session) return;

View File

@ -37,7 +37,14 @@ function App() {
emit('speech_end');
}, [emit]);
const { recording, start: startMic, stop: stopMic } = useMicrophone({ onAudioChunk, onSpeechEnd });
const { recording, start: startMic, stop: stopMic, silentStop } = useMicrophone({ onAudioChunk, onSpeechEnd });
// Stop mic when Ti-Pote starts thinking/speaking
useEffect(() => {
if (recording && (state === 'thinking' || state === 'speaking')) {
silentStop();
}
}, [state, recording, silentStop]);
// Auto-restart listening when Ti-Pote finishes speaking
useEffect(() => {
@ -45,8 +52,11 @@ function App() {
prevStateRef.current = state;
if (conversationActive && state === 'idle' && (prevState === 'speaking' || prevState === 'thinking')) {
emit('wake_word_detected');
startMic();
const timer = setTimeout(() => {
emit('wake_word_detected');
startMic();
}, 500);
return () => clearTimeout(timer);
}
}, [state, conversationActive, emit, startMic]);

View File

@ -4,10 +4,12 @@ const SAMPLE_RATE = 16000;
export function useAudioPlayer() {
const contextRef = useRef<AudioContext | null>(null);
const nextStartTimeRef = useRef(0);
const getContext = useCallback(() => {
if (!contextRef.current || contextRef.current.state === 'closed') {
contextRef.current = new AudioContext({ sampleRate: SAMPLE_RATE });
nextStartTimeRef.current = 0;
}
if (contextRef.current.state === 'suspended') {
contextRef.current.resume();
@ -28,6 +30,8 @@ export function useAudioPlayer() {
// Ensure even byte count for Int16
const evenLength = bytes.length - (bytes.length % 2);
if (evenLength < 2) return;
const int16 = new Int16Array(bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + evenLength));
// Int16 PCM → Float32
@ -42,17 +46,26 @@ export function useAudioPlayer() {
const source = ctx.createBufferSource();
source.buffer = buffer;
source.connect(ctx.destination);
source.start();
// Schedule seamlessly after the previous chunk
const now = ctx.currentTime;
const startTime = Math.max(now, nextStartTimeRef.current);
source.start(startTime);
nextStartTimeRef.current = startTime + buffer.duration;
},
[getContext],
);
const flush = useCallback(() => {}, []);
const flush = useCallback(() => {
// Reset scheduling for next conversation turn
nextStartTimeRef.current = 0;
}, []);
const stop = useCallback(() => {
if (contextRef.current) {
contextRef.current.close();
contextRef.current = null;
nextStartTimeRef.current = 0;
}
}, []);

View File

@ -3,9 +3,7 @@ import { useRef, useState, useCallback } from 'react';
interface UseMicrophoneOptions {
onAudioChunk: (chunk: ArrayBuffer, sampleRate: number) => void;
onSpeechEnd: () => void;
/** Silence duration in ms before triggering speech end (default: 1500) */
silenceTimeout?: number;
/** RMS threshold below which audio is considered silence (default: 0.01) */
silenceThreshold?: number;
}
@ -39,7 +37,7 @@ export function useMicrophone({
contextRef.current = null;
streamRef.current = null;
hasSpeechRef.current = false;
stoppedRef.current = false;
stoppedRef.current = true;
setRecording(false);
}, [clearSilenceTimer]);
@ -65,14 +63,12 @@ export function useMicrophone({
const float32 = e.inputBuffer.getChannelData(0);
// Calculate RMS volume
let sum = 0;
for (let i = 0; i < float32.length; i++) {
sum += float32[i] * float32[i];
}
const rms = Math.sqrt(sum / float32.length);
// Convert and send audio
const int16 = new Int16Array(float32.length);
for (let i = 0; i < float32.length; i++) {
const s = Math.max(-1, Math.min(1, float32[i]));
@ -80,12 +76,10 @@ export function useMicrophone({
}
onAudioChunk(int16.buffer, context.sampleRate);
// VAD logic
if (rms > silenceThreshold) {
hasSpeechRef.current = true;
clearSilenceTimer();
} else if (hasSpeechRef.current && !silenceTimerRef.current) {
// Speech detected before, now silence — start countdown
silenceTimerRef.current = setTimeout(() => {
if (stoppedRef.current) return;
stoppedRef.current = true;
@ -103,12 +97,18 @@ export function useMicrophone({
}
}, [onAudioChunk, onSpeechEnd, silenceTimeout, silenceThreshold, clearSilenceTimer, cleanup]);
// Stop and emit speech_end
const stop = useCallback(() => {
if (stoppedRef.current) return;
stoppedRef.current = true;
cleanup();
onSpeechEnd();
}, [onSpeechEnd, cleanup]);
return { recording, start, stop };
// Stop silently — no speech_end emitted
const silentStop = useCallback(() => {
if (stoppedRef.current) return;
cleanup();
}, [cleanup]);
return { recording, start, stop, silentStop };
}

View File

@ -1,6 +1,5 @@
import { useRef, useState, useCallback } from 'react';
import { io, Socket } from 'socket.io-client';
import { useAudioPlayer } from './useAudioPlayer';
export type RobotState = 'disconnected' | 'idle' | 'listening' | 'thinking' | 'speaking';
@ -16,12 +15,35 @@ export function useSocket() {
const [state, setState] = useState<RobotState>('disconnected');
const [connected, setConnected] = useState(false);
const [logs, setLogs] = useState<LogEntry[]>([]);
const audioPlayer = useAudioPlayer();
const addLog = useCallback((direction: LogEntry['direction'], event: string, data?: string) => {
setLogs((prev) => [...prev.slice(-200), { timestamp: new Date(), direction, event, data }]);
}, []);
const audioRef = useRef<HTMLAudioElement | null>(null);
const playAudio = useCallback((audioBase64: string) => {
if (audioRef.current) {
audioRef.current.pause();
audioRef.current = null;
}
const audio = new Audio(`data:audio/wav;base64,${audioBase64}`);
audioRef.current = audio;
audio.onended = () => {
audioRef.current = null;
setState('idle');
};
audio.onerror = () => {
audioRef.current = null;
setState('idle');
};
audio.play();
}, []);
const connect = useCallback(
(serverUrl: string, deviceToken: string) => {
if (socketRef.current) {
@ -52,31 +74,35 @@ export function useSocket() {
socket.on('status', (payload: { state: RobotState }) => {
setState(payload.state);
addLog('in', 'status', payload.state);
if (payload.state === 'idle') {
audioPlayer.flush();
});
socket.on('response_text', (payload: { text: string; audio?: string }) => {
addLog('in', 'response_text', payload.text);
if (payload.audio) {
playAudio(payload.audio);
} else {
setState('idle');
}
});
socket.on('audio_chunk', (payload: { data: string }) => {
addLog('in', 'audio_chunk', `${payload.data?.length ?? 0} chars (base64)`);
if (payload.data) {
audioPlayer.playChunk(payload.data);
}
addLog('in', 'audio_chunk', `${payload.data?.length ?? 0} chars`);
});
socket.on('notification', (payload: Record<string, unknown>) => {
addLog('in', 'notification', JSON.stringify(payload));
});
socket.on('response_start', () => addLog('in', 'response_start'));
socket.on('response_end', () => addLog('in', 'response_end'));
socketRef.current = socket;
},
[addLog],
[addLog, playAudio],
);
const disconnect = useCallback(() => {
if (audioRef.current) {
audioRef.current.pause();
audioRef.current = null;
}
socketRef.current?.disconnect();
socketRef.current = null;
setConnected(false);