add voxtral

This commit is contained in:
ordinarthur 2026-04-14 02:31:30 +02:00
parent c3b7e018fb
commit 2be22da2ff
9 changed files with 204 additions and 34 deletions

View File

@ -22,13 +22,15 @@
"migration:revert": "pnpm typeorm migration:revert -d src/config/typeorm.config.ts" "migration:revert": "pnpm typeorm migration:revert -d src/config/typeorm.config.ts"
}, },
"dependencies": { "dependencies": {
"@ai-sdk/mistral": "^3.0.30",
"@anthropic-ai/sdk": "^0.80.0", "@anthropic-ai/sdk": "^0.80.0",
"@deepgram/sdk": "^5.0.0", "@deepgram/sdk": "^5.0.0",
"@mastra/core": "^1.17.0", "@mastra/core": "^1.17.0",
"@mistralai/mistralai": "^2.2.0",
"@nestjs/common": "^11.1.17", "@nestjs/common": "^11.1.17",
"@nestjs/config": "^4.0.3", "@nestjs/config": "^4.0.3",
"@nestjs/event-emitter": "^3.0.0",
"@nestjs/core": "^11.1.17", "@nestjs/core": "^11.1.17",
"@nestjs/event-emitter": "^3.0.0",
"@nestjs/jwt": "^11.0.2", "@nestjs/jwt": "^11.0.2",
"@nestjs/passport": "^11.0.5", "@nestjs/passport": "^11.0.5",
"@nestjs/platform-express": "^11.1.17", "@nestjs/platform-express": "^11.1.17",

View File

@ -121,20 +121,8 @@ export class RobotGateway implements OnGatewayConnection, OnGatewayDisconnect, I
@ConnectedSocket() client: AuthenticatedSocket, @ConnectedSocket() client: AuthenticatedSocket,
@MessageBody() message: AudioChunkMessage, @MessageBody() message: AudioChunkMessage,
) { ) {
const chunk = message.data; this.conversationPort.processAudioChunk(client.data.deviceId, message.data, message.sampleRate);
// Debug: log audio chunk info to diagnose STT issues
if (!this._audioLogCount) this._audioLogCount = 0;
this._audioLogCount++;
if (this._audioLogCount <= 3 || this._audioLogCount % 100 === 0) {
this.logger.debug(
`Audio chunk #${this._audioLogCount}: type=${typeof chunk}, isBuffer=${Buffer.isBuffer(chunk)}, ` +
`constructor=${chunk?.constructor?.name}, length=${chunk?.length ?? chunk?.byteLength ?? 'N/A'}, ` +
`sampleRate=${message.sampleRate}`,
);
} }
this.conversationPort.processAudioChunk(client.data.deviceId, chunk, message.sampleRate);
}
private _audioLogCount = 0;
@SubscribeMessage('speech_end') @SubscribeMessage('speech_end')
async handleSpeechEnd(@ConnectedSocket() client: AuthenticatedSocket) { async handleSpeechEnd(@ConnectedSocket() client: AuthenticatedSocket) {

View File

@ -16,11 +16,8 @@ export class AnthropicAdapter implements ILLMPort {
private readonly model: string; private readonly model: string;
constructor(private readonly configService: ConfigService) { constructor(private readonly configService: ConfigService) {
const apiKey = this.configService.get<string>('ANTHROPIC_API_KEY'); const apiKey = this.configService.get<string>('ANTHROPIC_API_KEY', '');
if (!apiKey) { this.client = new Anthropic({ apiKey: apiKey || 'unused' });
throw new Error('ANTHROPIC_API_KEY is not set');
}
this.client = new Anthropic({ apiKey });
this.model = this.configService.get<string>('ANTHROPIC_MODEL', 'claude-sonnet-4-20250514'); this.model = this.configService.get<string>('ANTHROPIC_MODEL', 'claude-sonnet-4-20250514');
} }

View File

@ -16,11 +16,8 @@ export class OpenAIAdapter implements ILLMPort {
private readonly model: string; private readonly model: string;
constructor(private readonly configService: ConfigService) { constructor(private readonly configService: ConfigService) {
const apiKey = this.configService.get<string>('OPENAI_API_KEY'); const apiKey = this.configService.get<string>('OPENAI_API_KEY', '');
if (!apiKey) { this.client = new OpenAI({ apiKey: apiKey || 'unused' });
throw new Error('OPENAI_API_KEY is not set');
}
this.client = new OpenAI({ apiKey });
this.model = this.configService.get<string>('OPENAI_MODEL', 'gpt-4o'); this.model = this.configService.get<string>('OPENAI_MODEL', 'gpt-4o');
} }

View File

@ -10,11 +10,8 @@ export class ElevenLabsAdapter implements ITTSPort {
private readonly defaultVoiceId: string; private readonly defaultVoiceId: string;
constructor(private readonly configService: ConfigService) { constructor(private readonly configService: ConfigService) {
const apiKey = this.configService.get<string>('ELEVENLABS_API_KEY'); const apiKey = this.configService.get<string>('ELEVENLABS_API_KEY', '');
if (!apiKey) { this.client = new ElevenLabsClient({ apiKey: apiKey || 'unused' });
throw new Error('ELEVENLABS_API_KEY is not set');
}
this.client = new ElevenLabsClient({ apiKey });
this.defaultVoiceId = this.configService.get<string>('ELEVENLABS_VOICE_ID', 'pFZP5JQG7iQjIQuC4Bku'); this.defaultVoiceId = this.configService.get<string>('ELEVENLABS_VOICE_ID', 'pFZP5JQG7iQjIQuC4Bku');
} }

View File

@ -0,0 +1,122 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { Mistral } from '@mistralai/mistralai';
import { ITTSPort } from '../../../core/ports/outbound/tts.port';
@Injectable()
export class MistralTTSAdapter implements ITTSPort {
private readonly logger = new Logger(MistralTTSAdapter.name);
private readonly client: Mistral;
private readonly voiceId: string;
private readonly model: string;
constructor(private readonly configService: ConfigService) {
const apiKey = this.configService.get<string>('MISTRAL_API_KEY');
if (!apiKey) {
throw new Error('MISTRAL_API_KEY is not set');
}
this.client = new Mistral({ apiKey });
this.voiceId = this.configService.get<string>('MISTRAL_TTS_VOICE', 'fr_marie_neutral');
this.model = this.configService.get<string>('MISTRAL_TTS_MODEL', 'voxtral-mini-tts-2603');
}
async synthesize(text: string, voice?: string): Promise<Buffer> {
const result = await this.client.audio.speech.complete({
model: this.model,
input: text,
responseFormat: 'wav',
stream: false,
voiceId: voice || this.voiceId,
});
const wavBuffer = Buffer.from(result.audioData, 'base64');
// Extract raw PCM from WAV (skip 44-byte header) and resample to 16kHz
// if needed. Voxtral outputs 24kHz by default.
const pcm = this.extractPcmFromWav(wavBuffer);
return pcm;
}
async synthesizeStream(
text: string,
voice?: string,
onChunk?: (chunk: Buffer) => void,
): Promise<void> {
// Voxtral doesn't support true streaming; synthesize and emit as a single chunk.
const pcm = await this.synthesize(text, voice);
onChunk?.(pcm);
}
/**
* Extract raw PCM data from a WAV buffer and resample to 16kHz mono S16LE
* if the source sample rate differs.
*/
private extractPcmFromWav(wav: Buffer): Buffer {
// Parse WAV header
const sampleRate = wav.readUInt32LE(24);
const bitsPerSample = wav.readUInt16LE(34);
const numChannels = wav.readUInt16LE(22);
// Find the 'data' chunk
let dataOffset = 12;
while (dataOffset < wav.length - 8) {
const chunkId = wav.toString('ascii', dataOffset, dataOffset + 4);
const chunkSize = wav.readUInt32LE(dataOffset + 4);
if (chunkId === 'data') {
dataOffset += 8;
break;
}
dataOffset += 8 + chunkSize;
}
let pcm = wav.subarray(dataOffset);
this.logger.debug(
`WAV: ${sampleRate}Hz, ${bitsPerSample}bit, ${numChannels}ch, ${pcm.length} bytes PCM`,
);
// Convert to mono if stereo
if (numChannels === 2 && bitsPerSample === 16) {
const monoSamples = pcm.length / 4;
const mono = Buffer.alloc(monoSamples * 2);
for (let i = 0; i < monoSamples; i++) {
const left = pcm.readInt16LE(i * 4);
const right = pcm.readInt16LE(i * 4 + 2);
mono.writeInt16LE(Math.round((left + right) / 2), i * 2);
}
pcm = mono;
}
// Resample to 16kHz if needed (simple linear interpolation)
if (sampleRate !== 16000) {
pcm = this.resample(pcm, sampleRate, 16000);
}
return pcm;
}
/**
* Simple linear-interpolation resampler for 16-bit mono PCM.
*/
private resample(pcm: Buffer, fromRate: number, toRate: number): Buffer {
const ratio = fromRate / toRate;
const srcSamples = pcm.length / 2;
const dstSamples = Math.floor(srcSamples / ratio);
const out = Buffer.alloc(dstSamples * 2);
for (let i = 0; i < dstSamples; i++) {
const srcPos = i * ratio;
const srcIdx = Math.floor(srcPos);
const frac = srcPos - srcIdx;
const s0 = pcm.readInt16LE(Math.min(srcIdx, srcSamples - 1) * 2);
const s1 = pcm.readInt16LE(Math.min(srcIdx + 1, srcSamples - 1) * 2);
const sample = Math.round(s0 + frac * (s1 - s0));
out.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2);
}
this.logger.debug(`Resampled ${fromRate}${toRate}Hz: ${srcSamples}${dstSamples} samples`);
return out;
}
}

View File

@ -32,6 +32,7 @@ import { DeepgramAdapter } from './adapters/outbound/stt/deepgram.adapter';
import { AnthropicAdapter } from './adapters/outbound/llm/anthropic.adapter'; import { AnthropicAdapter } from './adapters/outbound/llm/anthropic.adapter';
import { OpenAIAdapter } from './adapters/outbound/llm/openai.adapter'; import { OpenAIAdapter } from './adapters/outbound/llm/openai.adapter';
import { ElevenLabsAdapter } from './adapters/outbound/tts/elevenlabs.adapter'; import { ElevenLabsAdapter } from './adapters/outbound/tts/elevenlabs.adapter';
import { MistralTTSAdapter } from './adapters/outbound/tts/mistral.adapter';
import { RedisAdapter } from './adapters/outbound/cache/redis.adapter'; import { RedisAdapter } from './adapters/outbound/cache/redis.adapter';
import { CONVERSATION_PORT } from './core/ports/inbound/conversation.port'; import { CONVERSATION_PORT } from './core/ports/inbound/conversation.port';
import { HEALTH_TELEMETRY_PORT } from './core/ports/inbound/health-telemetry.port'; import { HEALTH_TELEMETRY_PORT } from './core/ports/inbound/health-telemetry.port';
@ -108,7 +109,14 @@ import { CACHE_PORT } from './core/ports/outbound/cache.port';
}, },
{ {
provide: TTS_PORT, provide: TTS_PORT,
useClass: ElevenLabsAdapter, inject: [ConfigService],
useFactory: (configService: ConfigService) => {
const provider = configService.get<string>('TTS_PROVIDER', 'elevenlabs');
if (provider === 'mistral') {
return new MistralTTSAdapter(configService);
}
return new ElevenLabsAdapter(configService);
},
}, },
{ {
provide: CACHE_PORT, provide: CACHE_PORT,

View File

@ -45,10 +45,18 @@ export class ConversationService implements IConversationPort {
private readonly configService: ConfigService, private readonly configService: ConfigService,
) { ) {
const provider = this.configService.get<string>('LLM_PROVIDER', 'anthropic'); const provider = this.configService.get<string>('LLM_PROVIDER', 'anthropic');
const model = let model: string;
provider === 'openai' switch (provider) {
? `openai/${this.configService.get<string>('OPENAI_MODEL', 'gpt-4o')}` case 'openai':
: `anthropic/${this.configService.get<string>('ANTHROPIC_MODEL', 'claude-sonnet-4-20250514')}`; model = `openai/${this.configService.get<string>('OPENAI_MODEL', 'gpt-4o')}`;
break;
case 'mistral':
model = `mistral/${this.configService.get<string>('MISTRAL_MODEL', 'ministral-3b-latest')}`;
break;
default:
model = `anthropic/${this.configService.get<string>('ANTHROPIC_MODEL', 'claude-sonnet-4-20250514')}`;
break;
}
this.agent = new Agent({ this.agent = new Agent({
id: 'ti-pote', id: 'ti-pote',

51
pnpm-lock.yaml generated
View File

@ -10,6 +10,9 @@ importers:
apps/backend: apps/backend:
dependencies: dependencies:
'@ai-sdk/mistral':
specifier: ^3.0.30
version: 3.0.30(zod@4.3.6)
'@anthropic-ai/sdk': '@anthropic-ai/sdk':
specifier: ^0.80.0 specifier: ^0.80.0
version: 0.80.0(zod@4.3.6) version: 0.80.0(zod@4.3.6)
@ -19,6 +22,9 @@ importers:
'@mastra/core': '@mastra/core':
specifier: ^1.17.0 specifier: ^1.17.0
version: 1.17.0(@standard-community/standard-json@0.3.5(@standard-schema/spec@1.1.0)(@types/json-schema@7.0.15)(quansync@0.2.11)(zod-to-json-schema@3.25.2(zod@4.3.6))(zod@4.3.6))(@standard-community/standard-openapi@0.2.9(@standard-community/standard-json@0.3.5(@standard-schema/spec@1.1.0)(@types/json-schema@7.0.15)(quansync@0.2.11)(zod-to-json-schema@3.25.2(zod@4.3.6))(zod@4.3.6))(@standard-schema/spec@1.1.0)(openapi-types@12.1.3)(zod@4.3.6))(@types/json-schema@7.0.15)(openapi-types@12.1.3)(zod@4.3.6) version: 1.17.0(@standard-community/standard-json@0.3.5(@standard-schema/spec@1.1.0)(@types/json-schema@7.0.15)(quansync@0.2.11)(zod-to-json-schema@3.25.2(zod@4.3.6))(zod@4.3.6))(@standard-community/standard-openapi@0.2.9(@standard-community/standard-json@0.3.5(@standard-schema/spec@1.1.0)(@types/json-schema@7.0.15)(quansync@0.2.11)(zod-to-json-schema@3.25.2(zod@4.3.6))(zod@4.3.6))(@standard-schema/spec@1.1.0)(openapi-types@12.1.3)(zod@4.3.6))(@types/json-schema@7.0.15)(openapi-types@12.1.3)(zod@4.3.6)
'@mistralai/mistralai':
specifier: ^2.2.0
version: 2.2.0
'@nestjs/common': '@nestjs/common':
specifier: ^11.1.17 specifier: ^11.1.17
version: 11.1.17(class-transformer@0.5.1)(class-validator@0.15.1)(reflect-metadata@0.2.2)(rxjs@7.8.2) version: 11.1.17(class-transformer@0.5.1)(class-validator@0.15.1)(reflect-metadata@0.2.2)(rxjs@7.8.2)
@ -284,6 +290,12 @@ packages:
resolution: {integrity: sha512-VTDuRS5V0ATbJ/LkaQlisMnTAeYKXAK6scMguVBstf+KIBQ7HIuKhiXLv+G/hvejkV+THoXzoNifInAkU81P1g==} resolution: {integrity: sha512-VTDuRS5V0ATbJ/LkaQlisMnTAeYKXAK6scMguVBstf+KIBQ7HIuKhiXLv+G/hvejkV+THoXzoNifInAkU81P1g==}
engines: {node: '>=18'} engines: {node: '>=18'}
'@ai-sdk/mistral@3.0.30':
resolution: {integrity: sha512-+j4IXRSk9E661cFSafmIr+XHOzwjFagawwzMOlSqwL6U4Sq4PCFLDF+oHbX5NUqNjUL7FD1zi/9lBIfa41pUvw==}
engines: {node: '>=18'}
peerDependencies:
zod: ^3.25.76 || ^4.1.8
'@ai-sdk/provider-utils@2.2.8': '@ai-sdk/provider-utils@2.2.8':
resolution: {integrity: sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA==} resolution: {integrity: sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA==}
engines: {node: '>=18'} engines: {node: '>=18'}
@ -302,6 +314,12 @@ packages:
peerDependencies: peerDependencies:
zod: ^3.25.76 || ^4.1.8 zod: ^3.25.76 || ^4.1.8
'@ai-sdk/provider-utils@4.0.23':
resolution: {integrity: sha512-z8GlDaCmRSDlqkMF2f4/RFgWxdarvIbyuk+m6WXT1LYgsnGiXRJGTD2Z1+SDl3LqtFuRtGX1aghYvQLoHL/9pg==}
engines: {node: '>=18'}
peerDependencies:
zod: ^3.25.76 || ^4.1.8
'@ai-sdk/provider@1.1.3': '@ai-sdk/provider@1.1.3':
resolution: {integrity: sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg==} resolution: {integrity: sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg==}
engines: {node: '>=18'} engines: {node: '>=18'}
@ -318,6 +336,10 @@ packages:
resolution: {integrity: sha512-2Xmoq6DBJqmSl80U6V9z5jJSJP7ehaJJQMy2iFUqTay06wdCqTnPVBBQbtEL8RCChenL+q5DC5H5WzU3vV3v8w==} resolution: {integrity: sha512-2Xmoq6DBJqmSl80U6V9z5jJSJP7ehaJJQMy2iFUqTay06wdCqTnPVBBQbtEL8RCChenL+q5DC5H5WzU3vV3v8w==}
engines: {node: '>=18'} engines: {node: '>=18'}
'@ai-sdk/provider@3.0.8':
resolution: {integrity: sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ==}
engines: {node: '>=18'}
'@ai-sdk/ui-utils@1.2.11': '@ai-sdk/ui-utils@1.2.11':
resolution: {integrity: sha512-3zcwCc8ezzFlwp3ZD15wAPjf2Au4s3vAbKsXQVyhxODHcmu0iyPO2Eua6D/vicq/AUm/BAo60r97O6HU+EI0+w==} resolution: {integrity: sha512-3zcwCc8ezzFlwp3ZD15wAPjf2Au4s3vAbKsXQVyhxODHcmu0iyPO2Eua6D/vicq/AUm/BAo60r97O6HU+EI0+w==}
engines: {node: '>=18'} engines: {node: '>=18'}
@ -1205,6 +1227,9 @@ packages:
peerDependencies: peerDependencies:
zod: ^3.25.0 || ^4.0.0 zod: ^3.25.0 || ^4.0.0
'@mistralai/mistralai@2.2.0':
resolution: {integrity: sha512-JQUGIXjFWnw/J9LpTSf/ZXwVW3Sh8FBAcfTo5QvAHqkl4CfSiIwnjRJhMoAFcP6ncCe84YPU1ncDGX+p3OXnfg==}
'@modelcontextprotocol/sdk@1.28.0': '@modelcontextprotocol/sdk@1.28.0':
resolution: {integrity: sha512-gmloF+i+flI8ouQK7MWW4mOwuMh4RePBuPFAEPC6+pdqyWOUMDOixb6qZ69owLJpz6XmyllCouc4t8YWO+E2Nw==} resolution: {integrity: sha512-gmloF+i+flI8ouQK7MWW4mOwuMh4RePBuPFAEPC6+pdqyWOUMDOixb6qZ69owLJpz6XmyllCouc4t8YWO+E2Nw==}
engines: {node: '>=18'} engines: {node: '>=18'}
@ -5573,6 +5598,12 @@ snapshots:
transitivePeerDependencies: transitivePeerDependencies:
- supports-color - supports-color
'@ai-sdk/mistral@3.0.30(zod@4.3.6)':
dependencies:
'@ai-sdk/provider': 3.0.8
'@ai-sdk/provider-utils': 4.0.23(zod@4.3.6)
zod: 4.3.6
'@ai-sdk/provider-utils@2.2.8(zod@4.3.6)': '@ai-sdk/provider-utils@2.2.8(zod@4.3.6)':
dependencies: dependencies:
'@ai-sdk/provider': 1.1.3 '@ai-sdk/provider': 1.1.3
@ -5594,6 +5625,13 @@ snapshots:
eventsource-parser: 3.0.6 eventsource-parser: 3.0.6
zod: 4.3.6 zod: 4.3.6
'@ai-sdk/provider-utils@4.0.23(zod@4.3.6)':
dependencies:
'@ai-sdk/provider': 3.0.8
'@standard-schema/spec': 1.1.0
eventsource-parser: 3.0.6
zod: 4.3.6
'@ai-sdk/provider@1.1.3': '@ai-sdk/provider@1.1.3':
dependencies: dependencies:
json-schema: 0.4.0 json-schema: 0.4.0
@ -5610,6 +5648,10 @@ snapshots:
dependencies: dependencies:
json-schema: 0.4.0 json-schema: 0.4.0
'@ai-sdk/provider@3.0.8':
dependencies:
json-schema: 0.4.0
'@ai-sdk/ui-utils@1.2.11(zod@4.3.6)': '@ai-sdk/ui-utils@1.2.11(zod@4.3.6)':
dependencies: dependencies:
'@ai-sdk/provider': 1.1.3 '@ai-sdk/provider': 1.1.3
@ -6529,6 +6571,15 @@ snapshots:
zod-from-json-schema-v3: zod-from-json-schema@0.0.5 zod-from-json-schema-v3: zod-from-json-schema@0.0.5
zod-to-json-schema: 3.25.2(zod@4.3.6) zod-to-json-schema: 3.25.2(zod@4.3.6)
'@mistralai/mistralai@2.2.0':
dependencies:
ws: 8.20.0
zod: 4.3.6
zod-to-json-schema: 3.25.2(zod@4.3.6)
transitivePeerDependencies:
- bufferutil
- utf-8-validate
'@modelcontextprotocol/sdk@1.28.0(zod@4.3.6)': '@modelcontextprotocol/sdk@1.28.0(zod@4.3.6)':
dependencies: dependencies:
'@hono/node-server': 1.19.11(hono@4.12.9) '@hono/node-server': 1.19.11(hono@4.12.9)