Files
snakkimo-API/src/routes/pipeline.js
admin f5b69a9213 feat: ElevenLabs-Voice-Liste + Fehlerdetails in Audio-Batch-Ergebnissen
- GET /api/tts-settings/voices/available listet die Account-Stimmen
  (Grundlage für Voice-Auswahl im CMT statt Freitext-IDs)
- Audio-Batch/-Fill-Fehler enthalten jetzt das ElevenLabs-Detail
  (z.B. voice_not_found) statt nur 'ElevenLabs error'

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 21:00:27 +02:00

351 lines
16 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Content-Pipeline: Freigeben (release) → Auto-Verarbeitung → Review-Bundle → Bild-Publish.
const router = require('express').Router();
const { query } = require('../db');
const { LANGS } = require('../lib/translate');
const { loadPairContext, computeReadiness, loadPairContent } = require('../lib/pairContent');
const { enqueue, loadPairs, collectAudioUnits, generateWithBackoff, translatePair } = require('../lib/pipeline');
const { describeError } = require('./audios');
const { PLACEHOLDER_RE } = require('../lib/placeholders');
// ── Objekt-Wort-Erkennung in Sätzen (für die manuelle Zuweisung beim Review) ──
// Findet `title` als eigenständiges Wort (inkl. einfacher Flexionsendung) außerhalb
// bestehender Placeholder. Liefert den getroffenen Text oder null.
function findWordInSentence(sentence, title) {
if (!sentence || !(title || '').trim()) return null;
const esc = title.trim().replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const re = new RegExp(`(?<![A-Za-zÄÖÜäöüßÅÄÖåäö])(${esc}[a-zäöüßåö]{0,2})(?![A-Za-zÄÖÜäöüßÅÄÖåäö])`, 'iu');
// Nur außerhalb von {{…}} suchen
const segments = sentence.split(PLACEHOLDER_RE);
// split mit Capture-Groups liefert [text, label, type, uuid, text, …] → nur die Text-Segmente (Schrittweite 4)
for (let i = 0; i < segments.length; i += 4) {
const m = (segments[i] || '').match(re);
if (m) return m[1];
}
return null;
}
// Ersetzt das erste Vorkommen von `title` (außerhalb von Placeholdern) durch {{match.o:objectId}}.
function wrapWordAsObject(sentence, title, objectId) {
const hit = findWordInSentence(sentence, title);
if (!hit) return null;
// Sicher ersetzen: Satz an Placeholdern entlang neu zusammensetzen
const parts = [];
let replaced = false;
const tokens = sentence.split(/(\{\{[^}]+\}\})/g);
for (const t of tokens) {
if (!replaced && !t.startsWith('{{')) {
const idx = t.toLowerCase().indexOf(hit.toLowerCase());
if (idx >= 0) {
const actual = t.slice(idx, idx + hit.length);
parts.push(t.slice(0, idx) + `{{${actual}.o:${objectId}}}` + t.slice(idx + hit.length));
replaced = true;
continue;
}
}
parts.push(t);
}
return replaced ? parts.join('') : null;
}
// POST /api/pipeline/release/:pictureId — Bild in die Pipeline geben
router.post('/release/:pictureId', async (req, res, next) => {
try {
const pr = await query(`SELECT id, pipeline_status FROM pictures WHERE id = $1`, [req.params.pictureId]);
if (!pr.rows.length) return res.status(404).json({ error: 'Bild nicht gefunden' });
if (['queued', 'running'].includes(pr.rows[0].pipeline_status))
return res.status(409).json({ error: 'Pipeline läuft bereits für dieses Bild' });
const oc = await query(
`SELECT count(*) AS c FROM object_pictures WHERE picture_id = $1`, [req.params.pictureId]);
if (!parseInt(oc.rows[0].c))
return res.status(400).json({ error: 'Bild hat keine Objekte — erst Objekte anlegen' });
const upd = await query(
`UPDATE pictures SET pipeline_status='queued', pipeline_error=NULL, pipeline_step=NULL,
pipeline_started_at=NULL, pipeline_finished_at=NULL
WHERE id=$1 RETURNING *`, [req.params.pictureId]);
enqueue(req.params.pictureId);
res.status(202).json(upd.rows[0]);
} catch (err) { next(err); }
});
// POST /api/pipeline/retry/:pictureId — fehlgeschlagenen Lauf erneut starten
router.post('/retry/:pictureId', async (req, res, next) => {
try {
const upd = await query(
`UPDATE pictures SET pipeline_status='queued', pipeline_error=NULL
WHERE id=$1 AND pipeline_status='failed' RETURNING *`, [req.params.pictureId]);
if (!upd.rows.length) return res.status(409).json({ error: 'Bild ist nicht im Status "failed"' });
enqueue(req.params.pictureId);
res.status(202).json(upd.rows[0]);
} catch (err) { next(err); }
});
// GET /api/pipeline/overview — alle Bilder, die in der Pipeline sind (Polling-Endpoint)
router.get('/overview', async (req, res, next) => {
try {
const r = await query(
`SELECT p.id, p.design, p.picture_link, p.blurhash, p.status,
p.pipeline_status, p.pipeline_step, p.pipeline_progress, p.pipeline_error,
p.pipeline_started_at, p.pipeline_finished_at,
(SELECT count(*) FROM object_pictures op WHERE op.picture_id = p.id) AS object_count
FROM pictures p
WHERE p.pipeline_status NOT IN ('none', 'published')
ORDER BY p.pipeline_started_at DESC NULLS LAST, p.updated_at DESC`);
res.json(r.rows);
} catch (err) { next(err); }
});
// GET /api/pipeline/picture/:id/bundle — kompletter Review-Payload für die Veröffentlichen-Seite
router.get('/picture/:id/bundle', async (req, res, next) => {
try {
const pr = await query(`SELECT * FROM pictures WHERE id = $1`, [req.params.id]);
if (!pr.rows.length) return res.status(404).json({ error: 'Bild nicht gefunden' });
const picture = pr.rows[0];
const objects = (await query(
`SELECT o.id, o.status, o.selections
FROM object_pictures op JOIN objects o ON o.id = op.object_id
WHERE op.picture_id = $1 AND o.status <> 'blocked'
ORDER BY o.created_at`, [req.params.id])).rows;
for (const obj of objects) {
obj.words = (await query(
`SELECT w.id, w.titel_de, w.titel_en, w.titel_sv, w.status
FROM object_words ow JOIN words w ON w.id = ow.word_id
WHERE ow.object_id = $1`, [obj.id])).rows;
const pairs = (await query(
`SELECT p.id, p.answer_type, p.status, p.difficulty_level,
p.question_id, p.positive_statement_id, p.negative_statement_id
FROM object_pairs op JOIN pairs p ON p.id = op.pair_id
WHERE op.object_id = $1 AND p.status <> 'blocked'
ORDER BY p.created_at`, [obj.id])).rows;
obj.pairs = pairs;
}
// Objekt-Wort-Kandidaten: alle Wörter aller Objekte dieses Bildes
const objectWordCandidates = objects.flatMap(o =>
o.words.filter(w => (w.titel_de || '').trim())
.map(w => ({ object_id: o.id, word_id: w.id, titel_de: w.titel_de })));
for (const obj of objects) {
// Audio-Abdeckung pro Pair × Sprache (für die 🔊-Indikatoren)
for (const p of obj.pairs) {
p.content = await loadPairContent(p);
p.audio = {};
for (const lang of LANGS) {
const ctx = await loadPairContext([p], lang);
const r = computeReadiness(p, ctx, lang, { skipPicturePublished: true, skipStatusChecks: true });
p.audio[lang] = { ready: r.ready, missing: r.missing };
}
// Placeholder-Kandidaten: Objekt-Wörter, die im deutschen Satz vorkommen,
// aber noch keinem Objekt zugewiesen sind (manuelle Zuweisung im Review).
p.candidates = [];
const fields = [
['questions', p.question_id, 'sentence', p.content.question?.sentence_de],
['statements', p.positive_statement_id, 'positive_sentence', p.content.positive?.sentence?.positive_sentence_de],
['statements', p.negative_statement_id, 'negative_sentence', p.content.negative?.sentence?.negative_sentence_de],
];
for (const [table, id, field, sentence] of fields) {
if (!id || !(sentence || '').trim()) continue;
for (const cand of objectWordCandidates) {
const hit = findWordInSentence(sentence, cand.titel_de);
if (hit) p.candidates.push({
source_table: table, source_id: id, source_field: field,
object_id: cand.object_id, word_id: cand.word_id, label: hit,
});
}
}
delete p.question_id; delete p.positive_statement_id; delete p.negative_statement_id;
}
}
res.json({ picture, objects });
} catch (err) { next(err); }
});
// POST /api/pipeline/assign-object — markiert ein Wort im Satz als Objekt-Referenz
// Body: { source_table, source_id, source_field, object_id, word_id }
// Ersetzt das Wort in ALLEN Sprachen (über die Wort-Übersetzungen) durch {{wort.o:objectId}}.
router.post('/assign-object', async (req, res, next) => {
try {
const { source_table, source_id, source_field, object_id, word_id } = req.body || {};
const FIELDS = { questions: ['sentence'], statements: ['positive_sentence', 'negative_sentence'] };
if (!FIELDS[source_table]?.includes(source_field))
return res.status(400).json({ error: 'Ungültige source_table/source_field' });
if (!source_id || !object_id || !word_id)
return res.status(400).json({ error: 'source_id, object_id und word_id sind Pflicht' });
const obj = await query(`SELECT id FROM objects WHERE id = $1`, [object_id]);
if (!obj.rows.length) return res.status(404).json({ error: 'Objekt nicht gefunden' });
const wr = await query(`SELECT titel_de, titel_en, titel_sv FROM words WHERE id = $1`, [word_id]);
if (!wr.rows.length) return res.status(404).json({ error: 'Wort nicht gefunden' });
const word = wr.rows[0];
const cols = LANGS.map(l => `${source_field}_${l}`);
const rr = await query(`SELECT ${cols.join(', ')} FROM ${source_table} WHERE id = $1`, [source_id]);
if (!rr.rows.length) return res.status(404).json({ error: 'Satz-Zeile nicht gefunden' });
const row = rr.rows[0];
const updates = {}, updated = [], skipped = [];
for (const l of LANGS) {
const sentence = row[`${source_field}_${l}`] || '';
const wrapped = wrapWordAsObject(sentence, word[`titel_${l}`], object_id);
if (wrapped) { updates[`${source_field}_${l}`] = wrapped; updated.push(l); }
else skipped.push(l);
}
if (updated.length) {
const cells = Object.keys(updates);
await query(
`UPDATE ${source_table} SET ${cells.map((c, i) => `${c} = $${i + 1}`).join(', ')} WHERE id = $${cells.length + 1}`,
[...cells.map(c => updates[c]), source_id]);
}
res.json({ updated_langs: updated, skipped_langs: skipped });
} catch (err) { next(err); }
});
// POST /api/pipeline/picture/:id/translate-fill — fehlende Übersetzungen dieses Bildes nachholen
// (z.B. wenn bei einem früheren Lauf eine Sprache durch einen API-Fehler weggebrochen ist)
router.post('/picture/:id/translate-fill', async (req, res, next) => {
try {
const pairs = await loadPairs(req.params.id);
if (!pairs.length) return res.status(400).json({ error: 'Bild hat keine Pairs' });
const result = { translated: 0, failed: 0, errors: [] };
for (const p of pairs) {
try { await translatePair(p); result.translated++; }
catch (err) { result.failed++; result.errors.push({ pair_id: p.id, error: err.message }); }
}
res.json(result);
} catch (err) { next(err); }
});
// POST /api/pipeline/picture/:id/audio-fill — fehlende Audios dieses Bildes nachgenerieren
router.post('/picture/:id/audio-fill', async (req, res, next) => {
try {
const pairs = await loadPairs(req.params.id);
if (!pairs.length) return res.status(400).json({ error: 'Bild hat keine Pairs' });
const units = (await collectAudioUnits(req.params.id, pairs)).filter(u => !u.hasAudio);
const result = { generated: 0, failed: 0, errors: [] };
for (const u of units) {
try { await generateWithBackoff(u); result.generated++; }
catch (err) {
result.failed++;
result.errors.push({ source: `${u.source_table}/${u.source_field}/${u.language}`, error: describeError(err) });
}
}
res.json(result);
} catch (err) { next(err); }
});
// GET /api/pipeline/settings
router.get('/settings', async (req, res, next) => {
try {
const r = await query(`SELECT value FROM app_settings WHERE key = 'pipeline.pairs_per_object'`);
const n = parseInt(r.rows[0]?.value);
res.json({ pairs_per_object: isNaN(n) ? 5 : n });
} catch (err) { next(err); }
});
// PUT /api/pipeline/settings — { pairs_per_object }
router.put('/settings', async (req, res, next) => {
try {
const n = Math.min(Math.max(parseInt(req.body.pairs_per_object) || 5, 1), 20);
await query(
`INSERT INTO app_settings (key, value) VALUES ('pipeline.pairs_per_object', $1::jsonb)
ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value`, [JSON.stringify(n)]);
res.json({ pairs_per_object: n });
} catch (err) { next(err); }
});
// POST /api/pipeline/picture/:id/publish — Body { excluded_pair_ids: [] }
// Blockt ausgeschlossene Pairs, validiert den Rest (Text + Audio in allen Sprachen)
// und veröffentlicht dann kaskadierend: Fragen, Statements, Pairs, Wörter, Objekte, Bild.
router.post('/picture/:id/publish', async (req, res, next) => {
try {
const pictureId = req.params.id;
const excluded = Array.isArray(req.body?.excluded_pair_ids) ? req.body.excluded_pair_ids : [];
const pr = await query(`SELECT id, status, pipeline_status FROM pictures WHERE id = $1`, [pictureId]);
if (!pr.rows.length) return res.status(404).json({ error: 'Bild nicht gefunden' });
// 1) Ausgeschlossene Pairs blocken
if (excluded.length) {
await query(
`UPDATE pairs SET status='blocked', blocked_at=NOW(), blocked_topic='Im Publish-Review ausgeschlossen'
WHERE id = ANY($1)`, [excluded]);
}
// 2) Verbleibende Pairs des Bildes laden
const pairs = (await query(
`SELECT DISTINCT p.id, p.answer_type, p.status, p.question_id,
p.positive_statement_id, p.negative_statement_id
FROM object_pairs op
JOIN object_pictures pic ON pic.object_id = op.object_id
JOIN pairs p ON p.id = op.pair_id
WHERE pic.picture_id = $1 AND p.status <> 'blocked'`, [pictureId])).rows;
if (!pairs.length) return res.status(400).json({ error: 'Keine veröffentlichbaren Pairs übrig' });
// 3) Readiness pro Pair × Sprache (Bild-/Status-Checks übersprungen — werden hier mitveröffentlicht)
const notReady = [];
for (const lang of LANGS) {
const ctx = await loadPairContext(pairs, lang);
for (const p of pairs) {
const r = computeReadiness(p, ctx, lang, { skipPicturePublished: true, skipStatusChecks: true });
if (!r.ready) notReady.push({ pair_id: p.id, lang, missing: r.missing });
}
}
if (notReady.length)
return res.status(409).json({ error: 'Noch nicht veröffentlichbar', notReady });
// 4) Kaskadierend veröffentlichen
const now = new Date().toISOString();
const questionIds = [...new Set(pairs.map(p => p.question_id).filter(Boolean))];
const stmtIds = [...new Set(pairs.flatMap(p => [p.positive_statement_id, p.negative_statement_id]).filter(Boolean))];
const pairIds = pairs.map(p => p.id);
if (questionIds.length)
await query(`UPDATE questions SET status='published', published_at=COALESCE(published_at,$2)
WHERE id = ANY($1)`, [questionIds, now]);
if (stmtIds.length)
await query(`UPDATE statements SET status='published', published_at=COALESCE(published_at,$2)
WHERE id = ANY($1)`, [stmtIds, now]);
await query(`UPDATE pairs SET status='published', published_at=COALESCE(published_at,$2)
WHERE id = ANY($1)`, [pairIds, now]);
// Verlinkte Wörter: nur 'generated' → 'published' (translated bleibt für die Bild-Generierung
// im ServerMonitor-Flow; published würde diesen Schritt überspringen)
let publishedWords = 0;
if (stmtIds.length) {
const w = await query(
`UPDATE words SET status='published', published_at=COALESCE(published_at,$2)
WHERE status='generated' AND id IN (
SELECT word_id FROM statement_positive_words WHERE statement_id = ANY($1)
UNION SELECT word_id FROM statement_negative_words WHERE statement_id = ANY($1)
) RETURNING id`, [stmtIds, now]);
publishedWords = w.rows.length;
}
await query(
`UPDATE objects SET status='published', published_at=COALESCE(published_at,$2)
WHERE id IN (SELECT object_id FROM object_pictures WHERE picture_id = $1)
AND status <> 'blocked'`, [pictureId, now]);
await query(
`UPDATE pictures SET status='published', published_timestamp=COALESCE(published_timestamp,$2),
pipeline_status='published'
WHERE id=$1`, [pictureId, now]);
res.json({
published_pairs: pairIds.length,
blocked_pairs: excluded.length,
published_words: publishedWords,
picture_id: pictureId,
});
} catch (err) { next(err); }
});
module.exports = router;