From 985119bb0355b30f083ada98404584c567910712 Mon Sep 17 00:00:00 2001 From: admin Date: Wed, 10 Jun 2026 22:03:11 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20=C3=9Cbersetzungs-Retry=20+=20robuster?= =?UTF-8?q?=20Translate-Step=20+=20Nachhol-Endpoints?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - callClaude: Retry mit Backoff bei Überlast/Rate-Limit/Netzfehler (429/500/503/529) — wahrscheinliche Ursache der fehlenden SV-Übersetzung - Translate-Step pro Pair gekapselt: ein Fehler reißt nicht mehr den ganzen Lauf ab, Fehlversuche werden gezählt (pipeline_progress.translateFailures) - translatePair als wiederverwendbarer Helfer extrahiert - POST /pipeline/picture/:id/translate-fill: fehlende Übersetzungen (Sätze + Antwort-Wörter) eines Bildes nachholen Co-Authored-By: Claude Fable 5 --- src/lib/pipeline.js | 59 ++++++++++++++++++++++++------------------ src/lib/translate.js | 48 ++++++++++++++++++++++++---------- src/routes/pipeline.js | 17 +++++++++++- 3 files changed, 84 insertions(+), 40 deletions(-) diff --git a/src/lib/pipeline.js b/src/lib/pipeline.js index dd28a54..4a0e3f1 100644 --- a/src/lib/pipeline.js +++ b/src/lib/pipeline.js @@ -139,33 +139,19 @@ async function runPicture(pictureId) { } catch (err) { await setFailed(pictureId, 'pairs', err); return; } // ── Step 2: Übersetzen (pro Pair, füllt nur fehlende Sprachen) ────────────── + // Pro Pair gekapselt: scheitert eine Übersetzung (z.B. transienter API-Fehler trotz + // Retry), verliert das nicht den ganzen Lauf — der Rest läuft weiter, Fehlende werden + // gezählt und können später über „Übersetzungen nachholen" ergänzt werden. const pairs = await loadPairs(pictureId); progress.pairsTotal = pairs.length; - try { + progress.translateFailures = 0; + await setStep(pictureId, 'translate', progress); + for (const p of pairs) { + try { await translatePair(p); } + catch (err) { progress.translateFailures++; console.error(`Translate-Fehler bei Pair ${p.id}:`, err.message); } + progress.translatedPairs++; await setStep(pictureId, 'translate', progress); - for (const p of pairs) { - let questionRow = null; - if (p.question_id) { - questionRow = (await query( - `SELECT sentence_de, sentence_en, sentence_sv FROM questions WHERE id = $1`, - [p.question_id])).rows[0] || null; - await fillMissingRow('questions', p.question_id, ['sentence']); - } - if (p.answer_type === 'word') { - if (p.positive_statement_id) - await translateWordGroup(p.positive_statement_id, 'statement_positive_words', questionRow, false); - if (p.negative_statement_id) - await translateWordGroup(p.negative_statement_id, 'statement_negative_words', questionRow, false); - } else { - if ((p.answer_type === 'text' || p.answer_type === 'question') && p.positive_statement_id) - await fillMissingRow('statements', p.positive_statement_id, ['positive_sentence']); - if (p.answer_type === 'question' && p.negative_statement_id) - await fillMissingRow('statements', p.negative_statement_id, ['negative_sentence']); - } - progress.translatedPairs++; - await setStep(pictureId, 'translate', progress); - } - } catch (err) { await setFailed(pictureId, 'translate', err); return; } + } // ── Step 3: Audio für alle Sätze + Wörter des Bildes in allen Sprachen ────── try { @@ -214,6 +200,29 @@ async function runPicture(pictureId) { } catch (err) { await setFailed(pictureId, 'finish', err); return; } } +// Übersetzt die fehlenden Sprachen EINES Pairs (Frage + Sätze bzw. Wort-Gruppen). +// overwrite=true übersetzt auch bereits gefüllte Zielsprachen neu. +async function translatePair(p, overwrite = false) { + let questionRow = null; + if (p.question_id) { + questionRow = (await query( + `SELECT sentence_de, sentence_en, sentence_sv FROM questions WHERE id = $1`, + [p.question_id])).rows[0] || null; + await fillMissingRow('questions', p.question_id, ['sentence'], { overwrite }); + } + if (p.answer_type === 'word') { + if (p.positive_statement_id) + await translateWordGroup(p.positive_statement_id, 'statement_positive_words', questionRow, overwrite); + if (p.negative_statement_id) + await translateWordGroup(p.negative_statement_id, 'statement_negative_words', questionRow, overwrite); + } else { + if ((p.answer_type === 'text' || p.answer_type === 'question') && p.positive_statement_id) + await fillMissingRow('statements', p.positive_statement_id, ['positive_sentence'], { overwrite }); + if (p.answer_type === 'question' && p.negative_statement_id) + await fillMissingRow('statements', p.negative_statement_id, ['negative_sentence'], { overwrite }); + } +} + // Alle 3 Sprachen in den genutzten Feldern des Pairs gefüllt? (Spiegel des Review-Checks) async function isPairComplete(p) { if (p.question_id) { @@ -334,4 +343,4 @@ async function generateWithBackoff(u) { } } -module.exports = { enqueue, resumePending, loadPairs, collectAudioUnits, generateWithBackoff }; +module.exports = { enqueue, resumePending, loadPairs, collectAudioUnits, generateWithBackoff, translatePair }; diff --git a/src/lib/translate.js b/src/lib/translate.js index 2a34596..c4aecf7 100644 --- a/src/lib/translate.js +++ b/src/lib/translate.js @@ -57,20 +57,40 @@ function detokenize(translated, tokens, labelsFromClaude) { async function callClaude({ system, user, maxTokens = 2000 }) { const apiKey = process.env.ANTHROPIC_API_KEY; if (!apiKey) { const e = new Error('ANTHROPIC_API_KEY nicht konfiguriert'); e.status = 500; throw e; } - const res = await fetch(ANTHROPIC_API_URL, { - method: 'POST', - headers: { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' }, - body: JSON.stringify({ - model: TRANSLATE_MODEL, max_tokens: maxTokens, system, - messages: [{ role: 'user', content: user }], - }), - }); - if (!res.ok) { const err = await res.json().catch(() => ({})); const e = new Error(err.error?.message || `Claude API ${res.status}`); e.status = res.status; throw e; } - const data = await res.json(); - let raw = data.content[0].text.trim(); - const md = raw.match(/```(?:json)?\s*([\s\S]+?)\s*```/); - if (md) raw = md[1]; - return JSON.parse(raw); + + // Retry bei Überlast/Rate-Limit/Netzfehler — der große Pipeline-Durchlauf macht + // viele Calls hintereinander; ohne Retry brach früher eine Sprache komplett weg. + const RETRYABLE = new Set([429, 500, 503, 529]); + const delays = [1000, 4000, 12000]; + let lastErr; + for (let attempt = 0; attempt <= delays.length; attempt++) { + try { + const res = await fetch(ANTHROPIC_API_URL, { + method: 'POST', + headers: { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' }, + body: JSON.stringify({ + model: TRANSLATE_MODEL, max_tokens: maxTokens, system, + messages: [{ role: 'user', content: user }], + }), + }); + if (!res.ok) { + const err = await res.json().catch(() => ({})); + const e = new Error(err.error?.message || `Claude API ${res.status}`); e.status = res.status; throw e; + } + const data = await res.json(); + let raw = data.content[0].text.trim(); + const md = raw.match(/```(?:json)?\s*([\s\S]+?)\s*```/); + if (md) raw = md[1]; + return JSON.parse(raw); + } catch (err) { + lastErr = err; + // Netzfehler (kein status) oder retrybare HTTP-Codes → erneut versuchen + const retryable = err.status == null || RETRYABLE.has(err.status); + if (!retryable || attempt === delays.length) throw err; + await new Promise(r => setTimeout(r, delays[attempt])); + } + } + throw lastErr; } // Übersetzt einen Text inkl. Placeholder-Schutz. diff --git a/src/routes/pipeline.js b/src/routes/pipeline.js index 9de4225..72ce913 100644 --- a/src/routes/pipeline.js +++ b/src/routes/pipeline.js @@ -3,7 +3,7 @@ const router = require('express').Router(); const { query } = require('../db'); const { LANGS } = require('../lib/translate'); const { loadPairContext, computeReadiness, loadPairContent } = require('../lib/pairContent'); -const { enqueue, loadPairs, collectAudioUnits, generateWithBackoff } = require('../lib/pipeline'); +const { enqueue, loadPairs, collectAudioUnits, generateWithBackoff, translatePair } = require('../lib/pipeline'); const { PLACEHOLDER_RE } = require('../lib/placeholders'); // ── Objekt-Wort-Erkennung in Sätzen (für die manuelle Zuweisung beim Review) ── @@ -207,6 +207,21 @@ router.post('/assign-object', async (req, res, next) => { } catch (err) { next(err); } }); +// POST /api/pipeline/picture/:id/translate-fill — fehlende Übersetzungen dieses Bildes nachholen +// (z.B. wenn bei einem früheren Lauf eine Sprache durch einen API-Fehler weggebrochen ist) +router.post('/picture/:id/translate-fill', async (req, res, next) => { + try { + const pairs = await loadPairs(req.params.id); + if (!pairs.length) return res.status(400).json({ error: 'Bild hat keine Pairs' }); + const result = { translated: 0, failed: 0, errors: [] }; + for (const p of pairs) { + try { await translatePair(p); result.translated++; } + catch (err) { result.failed++; result.errors.push({ pair_id: p.id, error: err.message }); } + } + res.json(result); + } catch (err) { next(err); } +}); + // POST /api/pipeline/picture/:id/audio-fill — fehlende Audios dieses Bildes nachgenerieren router.post('/picture/:id/audio-fill', async (req, res, next) => { try {