ca marche fluide !
This commit is contained in:
parent
4baabf3727
commit
787a5805b7
@ -130,6 +130,10 @@ export class RobotGateway implements OnGatewayConnection, OnGatewayDisconnect, I
|
||||
this.connectedDevices.get(deviceId)?.emit('audio_chunk', { data: base64 });
|
||||
}
|
||||
|
||||
sendResponseText(deviceId: string, text: string, audioBase64?: string) {
|
||||
this.connectedDevices.get(deviceId)?.emit('response_text', { text, audio: audioBase64 });
|
||||
}
|
||||
|
||||
sendStatus(deviceId: string, state: RobotState) {
|
||||
this.connectedDevices.get(deviceId)?.emit('status', { state });
|
||||
}
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
export interface IDeviceGatewayPort {
|
||||
sendAudioChunk(deviceId: string, chunk: Buffer): void;
|
||||
sendResponseText(deviceId: string, text: string, audioBase64?: string): void;
|
||||
sendStatus(deviceId: string, state: 'listening' | 'thinking' | 'speaking' | 'idle'): void;
|
||||
sendNotification(deviceId: string, payload: Record<string, unknown>): void;
|
||||
isDeviceConnected(deviceId: string): boolean;
|
||||
|
||||
@ -109,12 +109,14 @@ export class ConversationService implements IConversationPort {
|
||||
if (responseText) {
|
||||
this.deviceGateway.sendStatus(deviceId, 'speaking');
|
||||
|
||||
const audioBuffer = await this.ttsPort.synthesize(responseText);
|
||||
this.logger.debug(`TTS complete: ${audioBuffer.length} bytes`);
|
||||
this.deviceGateway.sendAudioChunk(deviceId, audioBuffer);
|
||||
}
|
||||
const pcm = await this.ttsPort.synthesize(responseText);
|
||||
const wav = this.pcmToWav(pcm, 16000);
|
||||
const audioBase64 = wav.toString('base64');
|
||||
|
||||
this.deviceGateway.sendStatus(deviceId, 'idle');
|
||||
this.deviceGateway.sendResponseText(deviceId, responseText, audioBase64);
|
||||
} else {
|
||||
this.deviceGateway.sendStatus(deviceId, 'idle');
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error(`Error processing conversation for ${deviceId}:`, error);
|
||||
this.deviceGateway.sendStatus(deviceId, 'idle');
|
||||
@ -123,6 +125,32 @@ export class ConversationService implements IConversationPort {
|
||||
return finalText;
|
||||
}
|
||||
|
||||
private pcmToWav(pcm: Buffer, sampleRate: number): Buffer {
|
||||
const numChannels = 1;
|
||||
const bitsPerSample = 16;
|
||||
const byteRate = sampleRate * numChannels * (bitsPerSample / 8);
|
||||
const blockAlign = numChannels * (bitsPerSample / 8);
|
||||
const dataSize = pcm.length;
|
||||
const headerSize = 44;
|
||||
|
||||
const header = Buffer.alloc(headerSize);
|
||||
header.write('RIFF', 0);
|
||||
header.writeUInt32LE(dataSize + headerSize - 8, 4);
|
||||
header.write('WAVE', 8);
|
||||
header.write('fmt ', 12);
|
||||
header.writeUInt32LE(16, 16); // subchunk1 size
|
||||
header.writeUInt16LE(1, 20); // PCM format
|
||||
header.writeUInt16LE(numChannels, 22);
|
||||
header.writeUInt32LE(sampleRate, 24);
|
||||
header.writeUInt32LE(byteRate, 28);
|
||||
header.writeUInt16LE(blockAlign, 32);
|
||||
header.writeUInt16LE(bitsPerSample, 34);
|
||||
header.write('data', 36);
|
||||
header.writeUInt32LE(dataSize, 40);
|
||||
|
||||
return Buffer.concat([header, pcm]);
|
||||
}
|
||||
|
||||
interrupt(deviceId: string): void {
|
||||
const session = this.activeSessions.get(deviceId);
|
||||
if (!session) return;
|
||||
|
||||
@ -37,7 +37,14 @@ function App() {
|
||||
emit('speech_end');
|
||||
}, [emit]);
|
||||
|
||||
const { recording, start: startMic, stop: stopMic } = useMicrophone({ onAudioChunk, onSpeechEnd });
|
||||
const { recording, start: startMic, stop: stopMic, silentStop } = useMicrophone({ onAudioChunk, onSpeechEnd });
|
||||
|
||||
// Stop mic when Ti-Pote starts thinking/speaking
|
||||
useEffect(() => {
|
||||
if (recording && (state === 'thinking' || state === 'speaking')) {
|
||||
silentStop();
|
||||
}
|
||||
}, [state, recording, silentStop]);
|
||||
|
||||
// Auto-restart listening when Ti-Pote finishes speaking
|
||||
useEffect(() => {
|
||||
@ -45,8 +52,11 @@ function App() {
|
||||
prevStateRef.current = state;
|
||||
|
||||
if (conversationActive && state === 'idle' && (prevState === 'speaking' || prevState === 'thinking')) {
|
||||
emit('wake_word_detected');
|
||||
startMic();
|
||||
const timer = setTimeout(() => {
|
||||
emit('wake_word_detected');
|
||||
startMic();
|
||||
}, 500);
|
||||
return () => clearTimeout(timer);
|
||||
}
|
||||
}, [state, conversationActive, emit, startMic]);
|
||||
|
||||
|
||||
@ -4,10 +4,12 @@ const SAMPLE_RATE = 16000;
|
||||
|
||||
export function useAudioPlayer() {
|
||||
const contextRef = useRef<AudioContext | null>(null);
|
||||
const nextStartTimeRef = useRef(0);
|
||||
|
||||
const getContext = useCallback(() => {
|
||||
if (!contextRef.current || contextRef.current.state === 'closed') {
|
||||
contextRef.current = new AudioContext({ sampleRate: SAMPLE_RATE });
|
||||
nextStartTimeRef.current = 0;
|
||||
}
|
||||
if (contextRef.current.state === 'suspended') {
|
||||
contextRef.current.resume();
|
||||
@ -28,6 +30,8 @@ export function useAudioPlayer() {
|
||||
|
||||
// Ensure even byte count for Int16
|
||||
const evenLength = bytes.length - (bytes.length % 2);
|
||||
if (evenLength < 2) return;
|
||||
|
||||
const int16 = new Int16Array(bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + evenLength));
|
||||
|
||||
// Int16 PCM → Float32
|
||||
@ -42,17 +46,26 @@ export function useAudioPlayer() {
|
||||
const source = ctx.createBufferSource();
|
||||
source.buffer = buffer;
|
||||
source.connect(ctx.destination);
|
||||
source.start();
|
||||
|
||||
// Schedule seamlessly after the previous chunk
|
||||
const now = ctx.currentTime;
|
||||
const startTime = Math.max(now, nextStartTimeRef.current);
|
||||
source.start(startTime);
|
||||
nextStartTimeRef.current = startTime + buffer.duration;
|
||||
},
|
||||
[getContext],
|
||||
);
|
||||
|
||||
const flush = useCallback(() => {}, []);
|
||||
const flush = useCallback(() => {
|
||||
// Reset scheduling for next conversation turn
|
||||
nextStartTimeRef.current = 0;
|
||||
}, []);
|
||||
|
||||
const stop = useCallback(() => {
|
||||
if (contextRef.current) {
|
||||
contextRef.current.close();
|
||||
contextRef.current = null;
|
||||
nextStartTimeRef.current = 0;
|
||||
}
|
||||
}, []);
|
||||
|
||||
|
||||
@ -3,9 +3,7 @@ import { useRef, useState, useCallback } from 'react';
|
||||
interface UseMicrophoneOptions {
|
||||
onAudioChunk: (chunk: ArrayBuffer, sampleRate: number) => void;
|
||||
onSpeechEnd: () => void;
|
||||
/** Silence duration in ms before triggering speech end (default: 1500) */
|
||||
silenceTimeout?: number;
|
||||
/** RMS threshold below which audio is considered silence (default: 0.01) */
|
||||
silenceThreshold?: number;
|
||||
}
|
||||
|
||||
@ -39,7 +37,7 @@ export function useMicrophone({
|
||||
contextRef.current = null;
|
||||
streamRef.current = null;
|
||||
hasSpeechRef.current = false;
|
||||
stoppedRef.current = false;
|
||||
stoppedRef.current = true;
|
||||
setRecording(false);
|
||||
}, [clearSilenceTimer]);
|
||||
|
||||
@ -65,14 +63,12 @@ export function useMicrophone({
|
||||
|
||||
const float32 = e.inputBuffer.getChannelData(0);
|
||||
|
||||
// Calculate RMS volume
|
||||
let sum = 0;
|
||||
for (let i = 0; i < float32.length; i++) {
|
||||
sum += float32[i] * float32[i];
|
||||
}
|
||||
const rms = Math.sqrt(sum / float32.length);
|
||||
|
||||
// Convert and send audio
|
||||
const int16 = new Int16Array(float32.length);
|
||||
for (let i = 0; i < float32.length; i++) {
|
||||
const s = Math.max(-1, Math.min(1, float32[i]));
|
||||
@ -80,12 +76,10 @@ export function useMicrophone({
|
||||
}
|
||||
onAudioChunk(int16.buffer, context.sampleRate);
|
||||
|
||||
// VAD logic
|
||||
if (rms > silenceThreshold) {
|
||||
hasSpeechRef.current = true;
|
||||
clearSilenceTimer();
|
||||
} else if (hasSpeechRef.current && !silenceTimerRef.current) {
|
||||
// Speech detected before, now silence — start countdown
|
||||
silenceTimerRef.current = setTimeout(() => {
|
||||
if (stoppedRef.current) return;
|
||||
stoppedRef.current = true;
|
||||
@ -103,12 +97,18 @@ export function useMicrophone({
|
||||
}
|
||||
}, [onAudioChunk, onSpeechEnd, silenceTimeout, silenceThreshold, clearSilenceTimer, cleanup]);
|
||||
|
||||
// Stop and emit speech_end
|
||||
const stop = useCallback(() => {
|
||||
if (stoppedRef.current) return;
|
||||
stoppedRef.current = true;
|
||||
cleanup();
|
||||
onSpeechEnd();
|
||||
}, [onSpeechEnd, cleanup]);
|
||||
|
||||
return { recording, start, stop };
|
||||
// Stop silently — no speech_end emitted
|
||||
const silentStop = useCallback(() => {
|
||||
if (stoppedRef.current) return;
|
||||
cleanup();
|
||||
}, [cleanup]);
|
||||
|
||||
return { recording, start, stop, silentStop };
|
||||
}
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
import { useRef, useState, useCallback } from 'react';
|
||||
import { io, Socket } from 'socket.io-client';
|
||||
import { useAudioPlayer } from './useAudioPlayer';
|
||||
|
||||
export type RobotState = 'disconnected' | 'idle' | 'listening' | 'thinking' | 'speaking';
|
||||
|
||||
@ -16,12 +15,35 @@ export function useSocket() {
|
||||
const [state, setState] = useState<RobotState>('disconnected');
|
||||
const [connected, setConnected] = useState(false);
|
||||
const [logs, setLogs] = useState<LogEntry[]>([]);
|
||||
const audioPlayer = useAudioPlayer();
|
||||
|
||||
const addLog = useCallback((direction: LogEntry['direction'], event: string, data?: string) => {
|
||||
setLogs((prev) => [...prev.slice(-200), { timestamp: new Date(), direction, event, data }]);
|
||||
}, []);
|
||||
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
|
||||
const playAudio = useCallback((audioBase64: string) => {
|
||||
if (audioRef.current) {
|
||||
audioRef.current.pause();
|
||||
audioRef.current = null;
|
||||
}
|
||||
|
||||
const audio = new Audio(`data:audio/wav;base64,${audioBase64}`);
|
||||
audioRef.current = audio;
|
||||
|
||||
audio.onended = () => {
|
||||
audioRef.current = null;
|
||||
setState('idle');
|
||||
};
|
||||
|
||||
audio.onerror = () => {
|
||||
audioRef.current = null;
|
||||
setState('idle');
|
||||
};
|
||||
|
||||
audio.play();
|
||||
}, []);
|
||||
|
||||
const connect = useCallback(
|
||||
(serverUrl: string, deviceToken: string) => {
|
||||
if (socketRef.current) {
|
||||
@ -52,31 +74,35 @@ export function useSocket() {
|
||||
socket.on('status', (payload: { state: RobotState }) => {
|
||||
setState(payload.state);
|
||||
addLog('in', 'status', payload.state);
|
||||
if (payload.state === 'idle') {
|
||||
audioPlayer.flush();
|
||||
});
|
||||
|
||||
socket.on('response_text', (payload: { text: string; audio?: string }) => {
|
||||
addLog('in', 'response_text', payload.text);
|
||||
if (payload.audio) {
|
||||
playAudio(payload.audio);
|
||||
} else {
|
||||
setState('idle');
|
||||
}
|
||||
});
|
||||
|
||||
socket.on('audio_chunk', (payload: { data: string }) => {
|
||||
addLog('in', 'audio_chunk', `${payload.data?.length ?? 0} chars (base64)`);
|
||||
if (payload.data) {
|
||||
audioPlayer.playChunk(payload.data);
|
||||
}
|
||||
addLog('in', 'audio_chunk', `${payload.data?.length ?? 0} chars`);
|
||||
});
|
||||
|
||||
socket.on('notification', (payload: Record<string, unknown>) => {
|
||||
addLog('in', 'notification', JSON.stringify(payload));
|
||||
});
|
||||
|
||||
socket.on('response_start', () => addLog('in', 'response_start'));
|
||||
socket.on('response_end', () => addLog('in', 'response_end'));
|
||||
|
||||
socketRef.current = socket;
|
||||
},
|
||||
[addLog],
|
||||
[addLog, playAudio],
|
||||
);
|
||||
|
||||
const disconnect = useCallback(() => {
|
||||
if (audioRef.current) {
|
||||
audioRef.current.pause();
|
||||
audioRef.current = null;
|
||||
}
|
||||
socketRef.current?.disconnect();
|
||||
socketRef.current = null;
|
||||
setConnected(false);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user