Overhaul of the backend AI module to produce better recipes, better images, more reliably, and cheaper. New src/ai/ module: - prompts.ts: long 'Chef Antoine' system prompt (~1500 tokens) with explicit originality rules, technical precision requirements, vocal transcription handling, and 3 few-shot style examples. Long enough to benefit from OpenAI's automatic prompt caching (-50% on cached portion from the 2nd call onward). - recipe-generator.ts: uses Structured Outputs (json_schema strict). Rich schema: titre, description, origine_inspiration, ingredients with quantity/notes/complement flag, numbered etapes with per-step duration, conseils array, accord_boisson. No more JSON.parse crashes. - image-generator.ts: switched from dall-e-3 to gpt-image-1 (medium quality by default). Much better photographic realism. Dedicated magazine-style prompt (editorial food photography, 45-deg overhead, natural light, stoneware). Slugify preserves extended Latin chars (cote-de-boeuf not c-te-de-b-uf). - transcriber.ts: migrated from whisper-1 to gpt-4o-mini-transcribe (50% cheaper, better on French). Includes a context prompt to bias toward culinary vocabulary. - cost.ts: centralized pricing table + helpers. Every OpenAI call now emits a structured log with model, durationMs, costUsd, usage, and cacheHit flag. Plugin refactor: - plugins/ai.ts now delegates to src/ai/* and only keeps the Fastify decoration glue + storage fallback for audio. - OpenAI client configured with maxRetries=3, timeout=60s. - Image generation runs in parallel with the recipe flatten/serialize step (minor speedup, ~0.5s). - flattenRecipe() converts the rich structured recipe into the legacy flat RecipeData shape (for Prisma columns) while preserving the structured form in recipeData.structured. Routes: - recipes.ts stores the structured JSON in generatedRecipe (instead of the aplatissement lossy), enabling future frontends to render rich recipes with per-ingredient notes and step timers. Env vars: - OPENAI_TRANSCRIBE_MODEL, OPENAI_IMAGE_MODEL, OPENAI_IMAGE_QUALITY, OPENAI_IMAGE_SIZE, OPENAI_MAX_RETRIES, OPENAI_TIMEOUT_MS Cost per recipe (estimated): - Before: ~$0.044 (whisper $0.003 + 4o-mini $0.0004 + dall-e-3 $0.04) - After : ~$0.018 (4o-mini-transcribe $0.0015 + 4o-mini $0.0004 + gpt-image-1 medium $0.0165), ~-59%. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
100 lines
2.7 KiB
TypeScript
100 lines
2.7 KiB
TypeScript
import type { OpenAI } from 'openai';
|
|
import type { FastifyBaseLogger } from 'fastify';
|
|
import * as fs from 'node:fs';
|
|
import * as path from 'node:path';
|
|
import * as os from 'node:os';
|
|
import { computeTranscribeCost, type CallLog } from './cost';
|
|
|
|
const TRANSCRIBE_MODEL = process.env.OPENAI_TRANSCRIBE_MODEL || 'gpt-4o-mini-transcribe';
|
|
|
|
export interface TranscribeOptions {
|
|
audioPath: string;
|
|
userId?: string;
|
|
/** Indice de langue ISO-639-1 (défaut: 'fr') pour améliorer la précision */
|
|
language?: string;
|
|
}
|
|
|
|
export interface TranscribeResult {
|
|
text: string;
|
|
log: CallLog;
|
|
}
|
|
|
|
interface TempFile {
|
|
path: string;
|
|
cleanup: () => void;
|
|
}
|
|
|
|
export async function downloadToTemp(
|
|
url: string,
|
|
extension = '.mp3'
|
|
): Promise<TempFile> {
|
|
const response = await fetch(url);
|
|
if (!response.ok) {
|
|
throw new Error(`Échec téléchargement audio: ${response.statusText}`);
|
|
}
|
|
const buffer = Buffer.from(await response.arrayBuffer());
|
|
const tempFilePath = path.join(os.tmpdir(), `freedge-audio-${Date.now()}${extension}`);
|
|
fs.writeFileSync(tempFilePath, buffer);
|
|
return {
|
|
path: tempFilePath,
|
|
cleanup: () => {
|
|
try {
|
|
fs.unlinkSync(tempFilePath);
|
|
} catch {
|
|
/* ignore */
|
|
}
|
|
},
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Transcrit un fichier audio local en texte.
|
|
*
|
|
* Utilise `gpt-4o-mini-transcribe` par défaut (50 % moins cher que whisper-1
|
|
* et meilleur sur les langues romanes).
|
|
*/
|
|
export async function transcribeAudio(
|
|
openai: OpenAI,
|
|
logger: FastifyBaseLogger,
|
|
options: TranscribeOptions
|
|
): Promise<TranscribeResult> {
|
|
const start = Date.now();
|
|
|
|
const stat = fs.statSync(options.audioPath);
|
|
if (stat.size === 0) {
|
|
throw new Error('Fichier audio vide');
|
|
}
|
|
|
|
const transcription = await openai.audio.transcriptions.create({
|
|
file: fs.createReadStream(options.audioPath),
|
|
model: TRANSCRIBE_MODEL,
|
|
language: options.language || 'fr',
|
|
// Prompt court pour aider Whisper à comprendre le contexte
|
|
prompt:
|
|
"Énumération d'ingrédients culinaires en français. Vocabulaire de cuisine.",
|
|
});
|
|
|
|
const text = transcription.text.trim();
|
|
if (!text) {
|
|
throw new Error('Transcription vide');
|
|
}
|
|
|
|
// Estimation de la durée audio à partir de la taille (rough mais suffisant
|
|
// pour le tracking de coût). 1 minute MP3 ≈ 1 MB à 128 kbps.
|
|
const estimatedMinutes = Math.max(0.1, stat.size / (1024 * 1024));
|
|
const cost = computeTranscribeCost(TRANSCRIBE_MODEL, estimatedMinutes);
|
|
|
|
const log: CallLog = {
|
|
userId: options.userId,
|
|
operation: 'transcription',
|
|
model: TRANSCRIBE_MODEL,
|
|
durationMs: Date.now() - start,
|
|
costUsd: cost,
|
|
usage: { audioMinutes: estimatedMinutes },
|
|
};
|
|
|
|
logger.info(log, 'openai_audio_transcribed');
|
|
|
|
return { text, log };
|
|
}
|