feat: reviewed-Status für Bilder, Auto-Trigger, Übersetzungen, Vertonbarkeits-Regel
- pictures: reviewed-Status (Constraint + ALLOWED_STATUSES + Auto-Trigger beim Object-Linking) - objects: STATUSES um reviewed erweitert; Auto-Trigger draft→reviewed wenn Pair verlinkt - pairs/statements/questions: STATUSES um reviewed (Phase-1-Lücke) - pairs: POST /:id/review kaskadiert Pair+Frage+Statements (verlangt alle 3 Sprachen) - words: Auto requested→translated wenn alle titel_* gefüllt (POST+PATCH) - audios computeUnits: nur vertonbar wenn ALLE 3 Sprachen pro Feld gefüllt - claude: translate-text/translate-row/translate-missing mit Placeholder-Schutz (⟦PHn:label⟧-Tokenisierung, Label übersetzt, UUID erhalten); translation-coverage Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,96 @@
|
||||
const router = require('express').Router();
|
||||
const { query } = require('../db');
|
||||
|
||||
const ANTHROPIC_API_URL = 'https://api.anthropic.com/v1/messages';
|
||||
const ANTHROPIC_MODEL = 'claude-haiku-4-5-20251001';
|
||||
const LANGS = ['de', 'en', 'sv'];
|
||||
|
||||
const LANG_LABEL = { de: 'Deutsch', en: 'English', sv: 'Svenska' };
|
||||
|
||||
// ── Placeholder-Schutz ────────────────────────────────────────────────────────
|
||||
// Format im Quelltext: {{label.w:uuid}} oder {{label.o:uuid}}
|
||||
const PLACEHOLDER_RE = /\{\{([^.{}]+)\.(w|o):([0-9a-f-]{36})\}\}/g;
|
||||
|
||||
// Sätze für Claude vorbereiten: jedes Placeholder durch ⟦PHn:label⟧-Token ersetzen.
|
||||
// Token-Format ist absichtlich exotisch, damit Claude es nicht versehentlich ändert.
|
||||
function tokenize(text) {
|
||||
const tokens = [];
|
||||
let i = 0;
|
||||
const tokenized = text.replace(PLACEHOLDER_RE, (_, label, type, uuid) => {
|
||||
const safeLabel = String(label).replace(/[⟦⟧:]/g, ' ').trim();
|
||||
const key = `PH${i++}`;
|
||||
tokens.push({ key, uuid, type, sourceLabel: label });
|
||||
return `⟦${key}:${safeLabel}⟧`;
|
||||
});
|
||||
return { tokenized, tokens };
|
||||
}
|
||||
|
||||
// Rückbau: aus Claude-Antwort wieder {{label.type:uuid}} machen.
|
||||
// Erwartet `labels: { PH0: 'apple', ... }` aus dem JSON-Response.
|
||||
function detokenize(translated, tokens, labelsFromClaude) {
|
||||
let out = translated;
|
||||
const seen = new Set();
|
||||
for (const t of tokens) {
|
||||
const label = (labelsFromClaude && labelsFromClaude[t.key]) || t.sourceLabel;
|
||||
// Token-Form im Text kann ⟦PH0:irgendwas⟧ sein — wir matchen über die Key
|
||||
const re = new RegExp(`⟦${t.key}:[^⟧]*⟧`, 'g');
|
||||
let replaced = false;
|
||||
out = out.replace(re, () => { replaced = true; seen.add(t.key); return `{{${label}.${t.type}:${t.uuid}}}`; });
|
||||
if (!replaced) {
|
||||
// Notfall: Token nicht zurückgekommen → an Ende hängen, damit nichts verloren geht
|
||||
out += ` {{${label}.${t.type}:${t.uuid}}}`;
|
||||
seen.add(t.key);
|
||||
}
|
||||
}
|
||||
return { text: out, missingTokens: tokens.filter(t => !seen.has(t.key)).map(t => t.key) };
|
||||
}
|
||||
|
||||
async function callClaude({ system, user, maxTokens = 2000 }) {
|
||||
const apiKey = process.env.ANTHROPIC_API_KEY;
|
||||
if (!apiKey) { const e = new Error('ANTHROPIC_API_KEY nicht konfiguriert'); e.status = 500; throw e; }
|
||||
const res = await fetch(ANTHROPIC_API_URL, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' },
|
||||
body: JSON.stringify({
|
||||
model: ANTHROPIC_MODEL, max_tokens: maxTokens, system,
|
||||
messages: [{ role: 'user', content: user }],
|
||||
}),
|
||||
});
|
||||
if (!res.ok) { const err = await res.json().catch(() => ({})); const e = new Error(err.error?.message || `Claude API ${res.status}`); e.status = res.status; throw e; }
|
||||
const data = await res.json();
|
||||
let raw = data.content[0].text.trim();
|
||||
const md = raw.match(/```(?:json)?\s*([\s\S]+?)\s*```/);
|
||||
if (md) raw = md[1];
|
||||
return JSON.parse(raw);
|
||||
}
|
||||
|
||||
// Übersetzt einen Text inkl. Placeholder-Schutz.
|
||||
async function translateText({ text, from, to }) {
|
||||
if (!text || !text.trim()) return '';
|
||||
const { tokenized, tokens } = tokenize(text);
|
||||
const system = 'Du bist ein professioneller Übersetzer. Antworte AUSSCHLIESSLICH mit gültigem JSON, ohne Markdown, ohne Erklärungen.';
|
||||
const user = `Übersetze diesen Text von ${LANG_LABEL[from] || from} nach ${LANG_LABEL[to] || to}.\n\n` +
|
||||
`WICHTIG: Tokens der Form ⟦PHn:wort⟧ sind Platzhalter. Übersetze NUR das Wort innerhalb des Tokens, ` +
|
||||
`behalte das Token-Format exakt bei (⟦PHn:übersetztesWort⟧). Passe die Beugung des Wortes an den umgebenden Satz an ` +
|
||||
`(Mehrzahl/Kasus). Die Token-Reihenfolge im Satz darfst du frei wählen wie es natürlich klingt.\n\n` +
|
||||
`Quelltext:\n${tokenized}\n\n` +
|
||||
`Antwort-Format:\n{"translated":"...","labels":{${tokens.map(t => `"${t.key}":"<übersetztes Wort>"`).join(',')}}}`;
|
||||
|
||||
const data = await callClaude({ system, user });
|
||||
if (typeof data.translated !== 'string') throw new Error('Ungültiges JSON: translated fehlt');
|
||||
const { text: detok } = detokenize(data.translated, tokens, data.labels || {});
|
||||
return detok;
|
||||
}
|
||||
|
||||
// ── Auto-Status für Wörter (Spiegel zum Trigger in words.js) ──────────────────
|
||||
async function maybeAutoTranslated(wordId) {
|
||||
const r = await query(`SELECT titel_de, titel_en, titel_sv, status FROM words WHERE id = $1`, [wordId]);
|
||||
const w = r.rows[0];
|
||||
if (!w) return;
|
||||
if (w.titel_de && w.titel_en && w.titel_sv && w.status === 'requested')
|
||||
await query(`UPDATE words SET status='translated' WHERE id=$1`, [wordId]);
|
||||
}
|
||||
|
||||
|
||||
// POST /api/claude/generate-pairs
|
||||
// Body: { imageUrl, objects: [{id, words: [{titel_de, titel_en}], selections: [{points:[{x,y}]}]}, ...], selectedObjectId }
|
||||
@@ -132,4 +222,154 @@ router.post('/generate-words', async (req, res, next) => {
|
||||
}
|
||||
});
|
||||
|
||||
// ── Übersetzungs-Konfiguration pro Tabelle ────────────────────────────────────
|
||||
const TRANSLATE_CONFIG = {
|
||||
words: { fields: ['titel'] },
|
||||
questions: { fields: ['sentence'] },
|
||||
statements: { fields: ['positive_sentence', 'negative_sentence'] },
|
||||
};
|
||||
|
||||
// POST /api/claude/translate-text — generischer Übersetzungs-Primitive
|
||||
router.post('/translate-text', async (req, res, next) => {
|
||||
try {
|
||||
const { text, from, to } = req.body;
|
||||
if (!text) return res.status(400).json({ error: 'text fehlt' });
|
||||
if (!LANGS.includes(from)) return res.status(400).json({ error: `from muss eine von: ${LANGS.join(', ')} sein` });
|
||||
if (!LANGS.includes(to)) return res.status(400).json({ error: `to muss eine von: ${LANGS.join(', ')} sein` });
|
||||
if (from === to) return res.status(400).json({ error: 'from und to müssen unterschiedlich sein' });
|
||||
|
||||
const translated = await translateText({ text, from, to });
|
||||
res.json({ translated });
|
||||
} catch (err) {
|
||||
if (err.status) return res.status(err.status).json({ error: err.message });
|
||||
next(err);
|
||||
}
|
||||
});
|
||||
|
||||
// POST /api/claude/translate-row — eine konkrete Zeile übersetzen + speichern
|
||||
router.post('/translate-row', async (req, res, next) => {
|
||||
try {
|
||||
const { source_table, source_id, to } = req.body;
|
||||
let { from } = req.body;
|
||||
const cfg = TRANSLATE_CONFIG[source_table];
|
||||
if (!cfg) return res.status(400).json({ error: `source_table muss eine von: ${Object.keys(TRANSLATE_CONFIG).join(', ')} sein` });
|
||||
if (!source_id) return res.status(400).json({ error: 'source_id fehlt' });
|
||||
if (!LANGS.includes(to)) return res.status(400).json({ error: `to muss eine von: ${LANGS.join(', ')} sein` });
|
||||
|
||||
// Zeile laden
|
||||
const cols = cfg.fields.flatMap(f => LANGS.map(l => `${f}_${l}`));
|
||||
const r = await query(`SELECT ${cols.join(', ')} FROM ${source_table} WHERE id = $1`, [source_id]);
|
||||
if (!r.rows.length) return res.status(404).json({ error: 'source row not found' });
|
||||
const row = r.rows[0];
|
||||
|
||||
// Quell-Sprache: explizit angegeben, sonst erste gefüllte ≠ to
|
||||
if (!from) {
|
||||
for (const f of cfg.fields) for (const l of LANGS)
|
||||
if (l !== to && (row[`${f}_${l}`] || '').trim()) { from = l; break; }
|
||||
if (!from) return res.status(400).json({ error: 'Keine Quellsprache mit Inhalt gefunden' });
|
||||
}
|
||||
if (from === to) return res.status(400).json({ error: 'from und to müssen unterschiedlich sein' });
|
||||
|
||||
// Pro Feld übersetzen (überspringt leere Quell-Felder)
|
||||
const updates = {};
|
||||
for (const field of cfg.fields) {
|
||||
const src = (row[`${field}_${from}`] || '').trim();
|
||||
if (!src) continue;
|
||||
updates[`${field}_${to}`] = await translateText({ text: src, from, to });
|
||||
}
|
||||
if (!Object.keys(updates).length)
|
||||
return res.status(400).json({ error: `Keine Quell-Texte in ${from} vorhanden` });
|
||||
|
||||
const fields = Object.keys(updates);
|
||||
const setClauses = fields.map((f, i) => `${f} = $${i + 1}`).join(', ');
|
||||
const values = [...fields.map(f => updates[f]), source_id];
|
||||
const upd = await query(
|
||||
`UPDATE ${source_table} SET ${setClauses} WHERE id = $${fields.length + 1} RETURNING *`,
|
||||
values
|
||||
);
|
||||
|
||||
if (source_table === 'words') await maybeAutoTranslated(source_id);
|
||||
|
||||
res.json({ ...upd.rows[0], translated_fields: fields });
|
||||
} catch (err) {
|
||||
if (err.status) return res.status(err.status).json({ error: err.message });
|
||||
next(err);
|
||||
}
|
||||
});
|
||||
|
||||
// POST /api/claude/translate-missing — alle Zeilen einer Tabelle in `to` übersetzen,
|
||||
// wo `to` leer aber mindestens eine andere Sprache gefüllt ist.
|
||||
router.post('/translate-missing', async (req, res, next) => {
|
||||
try {
|
||||
const { source_table, to, from } = req.body;
|
||||
const cfg = TRANSLATE_CONFIG[source_table];
|
||||
if (!cfg) return res.status(400).json({ error: `source_table muss eine von: ${Object.keys(TRANSLATE_CONFIG).join(', ')} sein` });
|
||||
if (!LANGS.includes(to)) return res.status(400).json({ error: `to muss eine von: ${LANGS.join(', ')} sein` });
|
||||
|
||||
const cols = cfg.fields.flatMap(f => LANGS.map(l => `${f}_${l}`));
|
||||
// Zielsprache leer für mindestens ein Feld UND Quell-Sprache(n) gefüllt
|
||||
const missingCond = cfg.fields.map(f => `(${f}_${to} IS NULL OR ${f}_${to} = '')`).join(' OR ');
|
||||
const sourceCond = cfg.fields.map(f => LANGS.filter(l => l !== to)
|
||||
.map(l => `(${f}_${l} IS NOT NULL AND ${f}_${l} <> '')`).join(' OR ')).join(' OR ');
|
||||
const rows = (await query(
|
||||
`SELECT id, ${cols.join(', ')} FROM ${source_table}
|
||||
WHERE (${missingCond}) AND (${sourceCond})
|
||||
LIMIT 200`
|
||||
)).rows;
|
||||
|
||||
const results = { translated: 0, failed: 0, errors: [] };
|
||||
for (const row of rows) {
|
||||
try {
|
||||
// Quell-Sprache pro Zeile bestimmen
|
||||
let f = from;
|
||||
if (!f) {
|
||||
for (const lang of LANGS)
|
||||
if (lang !== to && cfg.fields.some(field => (row[`${field}_${lang}`] || '').trim())) { f = lang; break; }
|
||||
}
|
||||
if (!f) { results.failed++; results.errors.push({ id: row.id, error: 'keine Quellsprache' }); continue; }
|
||||
|
||||
const updates = {};
|
||||
for (const field of cfg.fields) {
|
||||
const tgt = (row[`${field}_${to}`] || '').trim();
|
||||
const src = (row[`${field}_${f}`] || '').trim();
|
||||
if (tgt || !src) continue;
|
||||
updates[`${field}_${to}`] = await translateText({ text: src, from: f, to });
|
||||
}
|
||||
if (Object.keys(updates).length) {
|
||||
const fields = Object.keys(updates);
|
||||
const setClauses = fields.map((c, i) => `${c} = $${i + 1}`).join(', ');
|
||||
await query(`UPDATE ${source_table} SET ${setClauses} WHERE id = $${fields.length + 1}`,
|
||||
[...fields.map(c => updates[c]), row.id]);
|
||||
if (source_table === 'words') await maybeAutoTranslated(row.id);
|
||||
results.translated++;
|
||||
}
|
||||
} catch (err) {
|
||||
results.failed++;
|
||||
results.errors.push({ id: row.id, error: err.message });
|
||||
}
|
||||
}
|
||||
res.json(results);
|
||||
} catch (err) { next(err); }
|
||||
});
|
||||
|
||||
// GET /api/claude/translation-coverage — wie viele Zeilen pro Tabelle×Sprache haben Text
|
||||
router.get('/translation-coverage', async (req, res, next) => {
|
||||
try {
|
||||
const coverage = [];
|
||||
for (const [table, cfg] of Object.entries(TRANSLATE_CONFIG)) {
|
||||
// Eine Zeile zählt als "in Sprache vorhanden", wenn ALLE konfigurierten Felder gefüllt sind.
|
||||
// (für statements: pos+neg — wir nehmen einfach pos als Haupt-Indikator, neg ist optional)
|
||||
const mainField = cfg.fields[0];
|
||||
for (const lang of LANGS) {
|
||||
const totalRow = (await query(`SELECT COUNT(*)::int AS c FROM ${table}`)).rows[0];
|
||||
const haveRow = (await query(
|
||||
`SELECT COUNT(*)::int AS c FROM ${table} WHERE ${mainField}_${lang} IS NOT NULL AND ${mainField}_${lang} <> ''`
|
||||
)).rows[0];
|
||||
coverage.push({ source_table: table, language: lang, total: totalRow.c, have: haveRow.c, missing: totalRow.c - haveRow.c });
|
||||
}
|
||||
}
|
||||
res.json({ coverage });
|
||||
} catch (err) { next(err); }
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
|
||||
Reference in New Issue
Block a user