feat: automatische Content-Pipeline (release → pairs → übersetzen → audio → ready)

- pictures.pipeline_* Spalten + app_settings Tabelle (Migration)
- lib/placeholders.js: Placeholder-Auflösung; TTS spricht keine UUIDs mehr
- lib/pairContent.js: geteilte Pair-Logik (Readiness mit Skip-Optionen)
- lib/generatePairs.js: Claude-Generierung (konfigurierbare Anzahl, nur
  Nomen/Adjektive bei word-Pairs) + serverseitige Persistenz inkl. object_pairs
- lib/pipeline.js: In-Process-Runner, idempotente Schritte, Boot-Resume
- routes/pipeline.js: release/retry/overview/bundle/settings + Bild-Publish
  (kaskadiert Fragen/Statements/Pairs/Wörter/Objekte/Bild)

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 20:52:11 +02:00
parent 29a260e351
commit 6af2428df5
10 changed files with 946 additions and 151 deletions

View File

@@ -3,6 +3,7 @@ const { v4: uuidv4 } = require('uuid');
const { query } = require('../db');
const { uploadFile, deleteFile, keyFromUrl } = require('../s3');
const { voiceForLanguage } = require('../voices');
const { resolvePlaceholdersToLabels } = require('../lib/placeholders');
const ELEVENLABS_BASE = 'https://api.elevenlabs.io/v1';
const ALLOWED_STATUSES = ['generated', 'published', 'blocked'];
@@ -40,6 +41,10 @@ async function generateAndStore({ text, voice_id, language, model_id, speed, sta
const apiKey = process.env.ELEVENLABS_API_KEY;
if (!apiKey) { const e = new Error('ELEVENLABS_API_KEY not configured'); e.status = 500; throw e; }
// Placeholder ({{label.w:uuid}}) in Sprechtext auflösen — UUIDs dürfen nie vertont werden.
text = resolvePlaceholdersToLabels(text);
if (!text) { const e = new Error('text is empty after placeholder resolution'); e.status = 400; throw e; }
const cfg = await getTtsSettings(language);
const voice = voice_id || cfg.voice_id;
const m = model_id || cfg.model_id;
@@ -311,3 +316,5 @@ router.delete('/:id', async (req, res, next) => {
});
module.exports = router;
// Für lib/pipeline.js (Audio-Generierung außerhalb des HTTP-Kontexts)
module.exports.generateAndStore = generateAndStore;

View File

@@ -1,6 +1,7 @@
const router = require('express').Router();
const { query } = require('../db');
const { fillMissingRow, translateWords, maybeAutoTranslated } = require('../lib/translate');
const { fillMissingRow } = require('../lib/translate');
const { loadPairContext, computeReadiness, loadPairContent, translateWordGroup } = require('../lib/pairContent');
const STATUSES = ['draft', 'reviewed', 'blocked', 'published'];
const ANSWER_TYPES = new Set(['yes_no', 'text', 'question', 'word']);
@@ -18,88 +19,6 @@ const STATUS_TIMESTAMP = {
const LANGS = ['de', 'en', 'sv'];
// Sammelt für eine Menge Pairs den Kontext (Fragen, Statements, Bilder, Audios) für eine Sprache.
async function loadPairContext(pairs, lang) {
const questionIds = [...new Set(pairs.map(p => p.question_id).filter(Boolean))];
const statementIds = [...new Set([
...pairs.map(p => p.positive_statement_id),
...pairs.map(p => p.negative_statement_id),
].filter(Boolean))];
const pairIds = pairs.map(p => p.id);
const questionsMap = {}, statementsMap = {}, pictureMap = {}, audioMap = {};
if (questionIds.length) {
const r = await query(
`SELECT id, status, sentence_${lang} AS sentence FROM questions WHERE id = ANY($1)`, [questionIds]);
r.rows.forEach(q => { questionsMap[q.id] = q; });
}
if (statementIds.length) {
const r = await query(
`SELECT id, status, positive_sentence_${lang} AS positive, negative_sentence_${lang} AS negative
FROM statements WHERE id = ANY($1)`, [statementIds]);
r.rows.forEach(s => { statementsMap[s.id] = s; });
}
if (pairIds.length) {
const r = await query(
`SELECT op.pair_id,
bool_or(true) AS has_picture,
bool_or(pp.status = 'published') AS has_published_picture
FROM object_pairs op
JOIN object_pictures pic ON pic.object_id = op.object_id
JOIN pictures pp ON pp.id = pic.picture_id
WHERE op.pair_id = ANY($1)
GROUP BY op.pair_id`, [pairIds]);
r.rows.forEach(row => { pictureMap[row.pair_id] = row; });
const ids = [...questionIds, ...statementIds];
if (ids.length) {
const a = await query(
`SELECT source_table, source_id, source_field FROM audios
WHERE source_id = ANY($1) AND language = $2 AND status <> 'blocked'`, [ids, lang]);
a.rows.forEach(x => { audioMap[`${x.source_table}|${x.source_id}|${x.source_field}`] = true; });
}
}
return { questionsMap, statementsMap, pictureMap, audioMap };
}
// Berechnet, was einem Pair zur Veröffentlichung (für eine Sprache) noch fehlt.
function computeReadiness(p, ctx, lang) {
const missing = [];
const q = p.question_id ? ctx.questionsMap[p.question_id] : null;
const ps = p.positive_statement_id ? ctx.statementsMap[p.positive_statement_id] : null;
const ns = p.negative_statement_id ? ctx.statementsMap[p.negative_statement_id] : null;
const pic = ctx.pictureMap[p.id];
// Bild
if (!pic || !pic.has_picture) missing.push('Bild fehlt');
else if (!pic.has_published_picture) missing.push('Bild nicht veröffentlicht');
// Frage
if (q) {
if (!(q.sentence || '').trim()) missing.push(`Frage-Text (${lang}) fehlt`);
else {
if (q.status !== 'published') missing.push('Frage nicht freigegeben');
if (!ctx.audioMap[`questions|${p.question_id}|sentence`]) missing.push('Audio Frage fehlt');
}
}
// Positiv-Statement
if (ps) {
if (!(ps.positive || '').trim()) missing.push(`Positiv-Satz (${lang}) fehlt`);
else {
if (ps.status !== 'published') missing.push('Positiv-Satz nicht freigegeben');
if (!ctx.audioMap[`statements|${p.positive_statement_id}|positive_sentence`]) missing.push('Audio Positiv fehlt');
}
}
// Negativ-Statement (nur wenn Text vorhanden)
if (ns && (ns.negative || '').trim()) {
if (ns.status !== 'published') missing.push('Negativ-Satz nicht freigegeben');
if (!ctx.audioMap[`statements|${p.negative_statement_id}|negative_sentence`]) missing.push('Audio Negativ fehlt');
}
return { missing, missingCount: missing.length, ready: missing.length === 0 };
}
// GET /api/pairs/publishability?lang=sv — Pairs mit Readiness, sortierbar nach "am wenigsten fehlt"
router.get('/publishability', async (req, res, next) => {
try {
@@ -215,72 +134,6 @@ router.patch('/:id', async (req, res, next) => {
} catch (err) { next(err); }
});
// Lädt das vollständige 3-sprachige Inhalts-Bündel eines Pairs (Frage, Statements, Wörter)
// — für das Review-Modal im Frontend.
async function loadPairContent(p) {
const langCols = (prefix) => `${prefix}_de, ${prefix}_en, ${prefix}_sv`;
const content = { answer_type: p.answer_type, question: null, positive: null, negative: null };
if (p.question_id) {
content.question = (await query(
`SELECT ${langCols('sentence')} FROM questions WHERE id = $1`, [p.question_id])).rows[0] || null;
}
async function loadStatement(stmtId, sentencePrefix, linkTable) {
if (!stmtId) return null;
const s = (await query(
`SELECT ${langCols(sentencePrefix)}, answer FROM statements WHERE id = $1`, [stmtId])).rows[0] || {};
const words = (await query(
`SELECT w.id, w.titel_de, w.titel_en, w.titel_sv
FROM ${linkTable} lw JOIN words w ON w.id = lw.word_id
WHERE lw.statement_id = $1
ORDER BY w.titel_de`, [stmtId])).rows;
return { sentence: s, words, answer: s.answer ?? null };
}
content.positive = await loadStatement(p.positive_statement_id, 'positive_sentence', 'statement_positive_words');
content.negative = await loadStatement(p.negative_statement_id, 'negative_sentence', 'statement_negative_words');
return content;
}
// Übersetzt die einem Statement zugeordneten Wörter gemeinsam (ein Claude-Call je Zielsprache),
// mit der Frage als Kontext zur Disambiguierung. `questionRow` = { sentence_de/en/sv } | null.
// Gibt die Anzahl tatsächlich aktualisierter Wort-Felder zurück.
async function translateWordGroup(statementId, linkTable, questionRow, overwrite) {
const rows = (await query(
`SELECT w.id, w.titel_de, w.titel_en, w.titel_sv
FROM ${linkTable} lw JOIN words w ON w.id = lw.word_id
WHERE lw.statement_id = $1`, [statementId])).rows;
if (!rows.length) return 0;
// Quellsprache: erste Sprache, in der mind. ein Wort Text hat
let src = null;
for (const l of LANGS) if (rows.some(w => (w[`titel_${l}`] || '').trim())) { src = l; break; }
if (!src) return 0;
const context = questionRow
? (questionRow[`sentence_${src}`] || questionRow.sentence_de || questionRow.sentence_en || questionRow.sentence_sv || '')
: '';
let count = 0;
for (const to of LANGS) {
if (to === src) continue;
const need = rows
.filter(w => (w[`titel_${src}`] || '').trim() && (overwrite || !(w[`titel_${to}`] || '').trim()))
.map(w => ({ id: w.id, text: (w[`titel_${src}`] || '').trim() }));
if (!need.length) continue;
const map = await translateWords({ words: need, from: src, to, context });
for (const w of need) {
const t = map[w.id];
if (!t) continue;
await query(`UPDATE words SET titel_${to} = $1 WHERE id = $2`, [t, w.id]);
await maybeAutoTranslated(w.id);
count++;
}
}
return count;
}
// POST /api/pairs/:id/translate — übersetzt fehlende Sätze/Wörter dieses Pairs in die
// fehlenden Sprachen (de/en/sv). Body `{ overwrite: true }` übersetzt auch bereits gefüllte
// Zielsprachen neu (Quellsprache bleibt unangetastet). Liefert das aktualisierte Inhalts-Bündel.

207
src/routes/pipeline.js Normal file
View File

@@ -0,0 +1,207 @@
// Content-Pipeline: Freigeben (release) → Auto-Verarbeitung → Review-Bundle → Bild-Publish.
const router = require('express').Router();
const { query } = require('../db');
const { LANGS } = require('../lib/translate');
const { loadPairContext, computeReadiness, loadPairContent } = require('../lib/pairContent');
const { enqueue } = require('../lib/pipeline');
// POST /api/pipeline/release/:pictureId — Bild in die Pipeline geben
router.post('/release/:pictureId', async (req, res, next) => {
try {
const pr = await query(`SELECT id, pipeline_status FROM pictures WHERE id = $1`, [req.params.pictureId]);
if (!pr.rows.length) return res.status(404).json({ error: 'Bild nicht gefunden' });
if (['queued', 'running'].includes(pr.rows[0].pipeline_status))
return res.status(409).json({ error: 'Pipeline läuft bereits für dieses Bild' });
const oc = await query(
`SELECT count(*) AS c FROM object_pictures WHERE picture_id = $1`, [req.params.pictureId]);
if (!parseInt(oc.rows[0].c))
return res.status(400).json({ error: 'Bild hat keine Objekte — erst Objekte anlegen' });
const upd = await query(
`UPDATE pictures SET pipeline_status='queued', pipeline_error=NULL, pipeline_step=NULL,
pipeline_started_at=NULL, pipeline_finished_at=NULL
WHERE id=$1 RETURNING *`, [req.params.pictureId]);
enqueue(req.params.pictureId);
res.status(202).json(upd.rows[0]);
} catch (err) { next(err); }
});
// POST /api/pipeline/retry/:pictureId — fehlgeschlagenen Lauf erneut starten
router.post('/retry/:pictureId', async (req, res, next) => {
try {
const upd = await query(
`UPDATE pictures SET pipeline_status='queued', pipeline_error=NULL
WHERE id=$1 AND pipeline_status='failed' RETURNING *`, [req.params.pictureId]);
if (!upd.rows.length) return res.status(409).json({ error: 'Bild ist nicht im Status "failed"' });
enqueue(req.params.pictureId);
res.status(202).json(upd.rows[0]);
} catch (err) { next(err); }
});
// GET /api/pipeline/overview — alle Bilder, die in der Pipeline sind (Polling-Endpoint)
router.get('/overview', async (req, res, next) => {
try {
const r = await query(
`SELECT p.id, p.design, p.picture_link, p.blurhash, p.status,
p.pipeline_status, p.pipeline_step, p.pipeline_progress, p.pipeline_error,
p.pipeline_started_at, p.pipeline_finished_at,
(SELECT count(*) FROM object_pictures op WHERE op.picture_id = p.id) AS object_count
FROM pictures p
WHERE p.pipeline_status NOT IN ('none', 'published')
ORDER BY p.pipeline_started_at DESC NULLS LAST, p.updated_at DESC`);
res.json(r.rows);
} catch (err) { next(err); }
});
// GET /api/pipeline/picture/:id/bundle — kompletter Review-Payload für die Veröffentlichen-Seite
router.get('/picture/:id/bundle', async (req, res, next) => {
try {
const pr = await query(`SELECT * FROM pictures WHERE id = $1`, [req.params.id]);
if (!pr.rows.length) return res.status(404).json({ error: 'Bild nicht gefunden' });
const picture = pr.rows[0];
const objects = (await query(
`SELECT o.id, o.status, o.selections
FROM object_pictures op JOIN objects o ON o.id = op.object_id
WHERE op.picture_id = $1 AND o.status <> 'blocked'
ORDER BY o.created_at`, [req.params.id])).rows;
for (const obj of objects) {
obj.words = (await query(
`SELECT w.id, w.titel_de, w.titel_en, w.titel_sv, w.status
FROM object_words ow JOIN words w ON w.id = ow.word_id
WHERE ow.object_id = $1`, [obj.id])).rows;
const pairs = (await query(
`SELECT p.id, p.answer_type, p.status, p.difficulty_level,
p.question_id, p.positive_statement_id, p.negative_statement_id
FROM object_pairs op JOIN pairs p ON p.id = op.pair_id
WHERE op.object_id = $1 AND p.status <> 'blocked'
ORDER BY p.created_at`, [obj.id])).rows;
// Audio-Abdeckung pro Pair × Sprache (für die 🔊-Indikatoren)
for (const p of pairs) {
p.content = await loadPairContent(p);
p.audio = {};
for (const lang of LANGS) {
const ctx = await loadPairContext([p], lang);
const r = computeReadiness(p, ctx, lang, { skipPicturePublished: true, skipStatusChecks: true });
p.audio[lang] = { ready: r.ready, missing: r.missing };
}
delete p.question_id; delete p.positive_statement_id; delete p.negative_statement_id;
}
obj.pairs = pairs;
}
res.json({ picture, objects });
} catch (err) { next(err); }
});
// GET /api/pipeline/settings
router.get('/settings', async (req, res, next) => {
try {
const r = await query(`SELECT value FROM app_settings WHERE key = 'pipeline.pairs_per_object'`);
const n = parseInt(r.rows[0]?.value);
res.json({ pairs_per_object: isNaN(n) ? 5 : n });
} catch (err) { next(err); }
});
// PUT /api/pipeline/settings — { pairs_per_object }
router.put('/settings', async (req, res, next) => {
try {
const n = Math.min(Math.max(parseInt(req.body.pairs_per_object) || 5, 1), 20);
await query(
`INSERT INTO app_settings (key, value) VALUES ('pipeline.pairs_per_object', $1::jsonb)
ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value`, [JSON.stringify(n)]);
res.json({ pairs_per_object: n });
} catch (err) { next(err); }
});
// POST /api/pipeline/picture/:id/publish — Body { excluded_pair_ids: [] }
// Blockt ausgeschlossene Pairs, validiert den Rest (Text + Audio in allen Sprachen)
// und veröffentlicht dann kaskadierend: Fragen, Statements, Pairs, Wörter, Objekte, Bild.
router.post('/picture/:id/publish', async (req, res, next) => {
try {
const pictureId = req.params.id;
const excluded = Array.isArray(req.body?.excluded_pair_ids) ? req.body.excluded_pair_ids : [];
const pr = await query(`SELECT id, status, pipeline_status FROM pictures WHERE id = $1`, [pictureId]);
if (!pr.rows.length) return res.status(404).json({ error: 'Bild nicht gefunden' });
// 1) Ausgeschlossene Pairs blocken
if (excluded.length) {
await query(
`UPDATE pairs SET status='blocked', blocked_at=NOW(), blocked_topic='Im Publish-Review ausgeschlossen'
WHERE id = ANY($1)`, [excluded]);
}
// 2) Verbleibende Pairs des Bildes laden
const pairs = (await query(
`SELECT DISTINCT p.id, p.answer_type, p.status, p.question_id,
p.positive_statement_id, p.negative_statement_id
FROM object_pairs op
JOIN object_pictures pic ON pic.object_id = op.object_id
JOIN pairs p ON p.id = op.pair_id
WHERE pic.picture_id = $1 AND p.status <> 'blocked'`, [pictureId])).rows;
if (!pairs.length) return res.status(400).json({ error: 'Keine veröffentlichbaren Pairs übrig' });
// 3) Readiness pro Pair × Sprache (Bild-/Status-Checks übersprungen — werden hier mitveröffentlicht)
const notReady = [];
for (const lang of LANGS) {
const ctx = await loadPairContext(pairs, lang);
for (const p of pairs) {
const r = computeReadiness(p, ctx, lang, { skipPicturePublished: true, skipStatusChecks: true });
if (!r.ready) notReady.push({ pair_id: p.id, lang, missing: r.missing });
}
}
if (notReady.length)
return res.status(409).json({ error: 'Noch nicht veröffentlichbar', notReady });
// 4) Kaskadierend veröffentlichen
const now = new Date().toISOString();
const questionIds = [...new Set(pairs.map(p => p.question_id).filter(Boolean))];
const stmtIds = [...new Set(pairs.flatMap(p => [p.positive_statement_id, p.negative_statement_id]).filter(Boolean))];
const pairIds = pairs.map(p => p.id);
if (questionIds.length)
await query(`UPDATE questions SET status='published', published_at=COALESCE(published_at,$2)
WHERE id = ANY($1)`, [questionIds, now]);
if (stmtIds.length)
await query(`UPDATE statements SET status='published', published_at=COALESCE(published_at,$2)
WHERE id = ANY($1)`, [stmtIds, now]);
await query(`UPDATE pairs SET status='published', published_at=COALESCE(published_at,$2)
WHERE id = ANY($1)`, [pairIds, now]);
// Verlinkte Wörter: nur 'generated' → 'published' (translated bleibt für die Bild-Generierung
// im ServerMonitor-Flow; published würde diesen Schritt überspringen)
let publishedWords = 0;
if (stmtIds.length) {
const w = await query(
`UPDATE words SET status='published', published_at=COALESCE(published_at,$2)
WHERE status='generated' AND id IN (
SELECT word_id FROM statement_positive_words WHERE statement_id = ANY($1)
UNION SELECT word_id FROM statement_negative_words WHERE statement_id = ANY($1)
) RETURNING id`, [stmtIds, now]);
publishedWords = w.rows.length;
}
await query(
`UPDATE objects SET status='published', published_at=COALESCE(published_at,$2)
WHERE id IN (SELECT object_id FROM object_pictures WHERE picture_id = $1)
AND status <> 'blocked'`, [pictureId, now]);
await query(
`UPDATE pictures SET status='published', published_timestamp=COALESCE(published_timestamp,$2),
pipeline_status='published'
WHERE id=$1`, [pictureId, now]);
res.json({
published_pairs: pairIds.length,
blocked_pairs: excluded.length,
published_words: publishedWords,
picture_id: pictureId,
});
} catch (err) { next(err); }
});
module.exports = router;