diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..5327dbb --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,8 @@ +{ + "permissions": { + "allow": [ + "Bash(wc -l /Users/arthurbarre/dev/perso/ti-pote/apps/robot-client/src/**/*.ts)", + "Bash(ssh tipote@192.168.1.124 \"ps aux | grep node | grep -v grep; echo '---LOGS---'; cat /tmp/tipote.log\")" + ] + } +} diff --git a/apps/backend/src/adapters/inbound/websocket/robot.gateway.ts b/apps/backend/src/adapters/inbound/websocket/robot.gateway.ts index cfaa163..2546385 100644 --- a/apps/backend/src/adapters/inbound/websocket/robot.gateway.ts +++ b/apps/backend/src/adapters/inbound/websocket/robot.gateway.ts @@ -121,8 +121,20 @@ export class RobotGateway implements OnGatewayConnection, OnGatewayDisconnect, I @ConnectedSocket() client: AuthenticatedSocket, @MessageBody() message: AudioChunkMessage, ) { - this.conversationPort.processAudioChunk(client.data.deviceId, message.data, message.sampleRate); + const chunk = message.data; + // Debug: log audio chunk info to diagnose STT issues + if (!this._audioLogCount) this._audioLogCount = 0; + this._audioLogCount++; + if (this._audioLogCount <= 3 || this._audioLogCount % 100 === 0) { + this.logger.debug( + `Audio chunk #${this._audioLogCount}: type=${typeof chunk}, isBuffer=${Buffer.isBuffer(chunk)}, ` + + `constructor=${chunk?.constructor?.name}, length=${chunk?.length ?? chunk?.byteLength ?? 'N/A'}, ` + + `sampleRate=${message.sampleRate}`, + ); + } + this.conversationPort.processAudioChunk(client.data.deviceId, chunk, message.sampleRate); } + private _audioLogCount = 0; @SubscribeMessage('speech_end') async handleSpeechEnd(@ConnectedSocket() client: AuthenticatedSocket) { diff --git a/apps/backend/src/core/services/conversation.service.ts b/apps/backend/src/core/services/conversation.service.ts index a5a5b73..5344564 100644 --- a/apps/backend/src/core/services/conversation.service.ts +++ b/apps/backend/src/core/services/conversation.service.ts @@ -97,7 +97,7 @@ export class ConversationService implements IConversationPort { this.activeSessions.set(deviceId, session); const sttStream = await this.sttPort.openStream((result: TranscriptionResult) => { - this.logger.debug( + this.logger.log( `STT [${deviceId}]: "${result.text}" (final: ${result.isFinal}, confidence: ${result.confidence})`, ); @@ -156,6 +156,9 @@ export class ConversationService implements IConversationPort { this.logger.log(`Final transcription for ${deviceId}: "${finalText}"`); if (!finalText) { + this.logger.warn(`No transcription for ${deviceId} — returning to idle`); + this.deviceGateway.sendStatus(deviceId, 'idle'); + this.activeSessions.delete(deviceId); return null; } diff --git a/apps/robot-client/scripts/wake_word.py b/apps/robot-client/scripts/wake_word.py index bf0a74b..7f7e90b 100755 --- a/apps/robot-client/scripts/wake_word.py +++ b/apps/robot-client/scripts/wake_word.py @@ -147,11 +147,15 @@ def run_predict_loop(oww_model, read_chunk, state: State, threshold: float): # Keep draining but don't emit detections. continue - for _, score in oww_model.prediction_buffer.items(): - if len(score) > 0 and score[-1] > threshold: - print("DETECTED", flush=True) - oww_model.reset() - break + for name, score in oww_model.prediction_buffer.items(): + if len(score) > 0: + s = score[-1] + if s > 0.05: + print(f"SCORE: {name}={s:.3f}", file=sys.stderr, flush=True) + if s > threshold: + print("DETECTED", flush=True) + oww_model.reset() + break except KeyboardInterrupt: pass diff --git a/apps/robot-client/src/services/orchestrator.service.ts b/apps/robot-client/src/services/orchestrator.service.ts index 15ea840..b041fce 100644 --- a/apps/robot-client/src/services/orchestrator.service.ts +++ b/apps/robot-client/src/services/orchestrator.service.ts @@ -25,7 +25,7 @@ export class OrchestratorService extends EventEmitter { /** Timer for Voice Activity Detection (silence timeout) */ private silenceTimer: ReturnType | null = null; private readonly silenceTimeoutMs = 2000; // 2s of silence = speech end - private readonly initialGracePeriodMs = 3000; // 3s grace period before silence detection kicks in + private readonly initialGracePeriodMs = 5000; // 5s grace period before silence detection kicks in /** Track when the last audio chunk was received */ private lastAudioChunkTime = 0; @@ -243,6 +243,14 @@ export class OrchestratorService extends EventEmitter { } this.audioBuffer = []; } + + // Reset speech detection: the wake word phrase ("Hey Jarvis") itself + // triggers hasDetectedSpeech=true, and the natural pause after saying it + // fires the 2s silence timeout before the user can ask their question. + // Restart the clock from NOW so the user gets the full grace period. + this.hasDetectedSpeech = false; + this.lastAudioChunkTime = Date.now(); + this.conversationStartTime = Date.now(); } // The cloud drives the state machine for thinking → speaking → idle @@ -269,8 +277,10 @@ export class OrchestratorService extends EventEmitter { } } - // After playback, return to idle and wait for a new wake word - this.returnToIdle(); + // After playback, keep the conversation going — listen for follow-up + // without requiring a new wake word. The conversation ends naturally + // when silence exceeds the grace period (no speech detected). + this.continueListening(); } /** diff --git a/apps/robot-client/src/services/wifi.service.ts b/apps/robot-client/src/services/wifi.service.ts index a5b1fe6..d88841f 100644 --- a/apps/robot-client/src/services/wifi.service.ts +++ b/apps/robot-client/src/services/wifi.service.ts @@ -37,8 +37,9 @@ export class WifiService { } try { - const { stdout } = await execAsync('nmcli -t -f TYPE,STATE device | grep wifi'); - return stdout.includes('connected') && !stdout.includes('disconnected'); + const { stdout } = await execAsync('nmcli -t -f TYPE,STATE device'); + // Check that the main wifi device (not wifi-p2p) is connected + return stdout.split('\n').some((line) => line === 'wifi:connected'); } catch { return false; }