diff --git a/src/lib/translate.js b/src/lib/translate.js new file mode 100644 index 0000000..b9fdc30 --- /dev/null +++ b/src/lib/translate.js @@ -0,0 +1,144 @@ +// Gemeinsamer Übersetzungs-Kern (Claude + Platzhalter-Schutz). +// Wird von routes/claude.js (Endpoints) und routes/pairs.js (Pro-Pair-Übersetzung) genutzt. +const { query } = require('../db'); + +const ANTHROPIC_API_URL = 'https://api.anthropic.com/v1/messages'; +const ANTHROPIC_MODEL = 'claude-haiku-4-5-20251001'; +const LANGS = ['de', 'en', 'sv']; +const LANG_LABEL = { de: 'Deutsch', en: 'English', sv: 'Svenska' }; + +// Welche Felder pro Tabelle übersetzbar sind (Spalten heißen `${field}_${lang}`). +const TRANSLATE_CONFIG = { + words: { fields: ['titel'] }, + questions: { fields: ['sentence'] }, + statements: { fields: ['positive_sentence', 'negative_sentence'] }, +}; + +// ── Placeholder-Schutz ──────────────────────────────────────────────────────── +// Format im Quelltext: {{label.w:uuid}} oder {{label.o:uuid}} +const PLACEHOLDER_RE = /\{\{([^.{}]+)\.(w|o):([0-9a-f-]{36})\}\}/g; + +// Sätze für Claude vorbereiten: jedes Placeholder durch ⟦PHn:label⟧-Token ersetzen. +// Token-Format ist absichtlich exotisch, damit Claude es nicht versehentlich ändert. +function tokenize(text) { + const tokens = []; + let i = 0; + const tokenized = text.replace(PLACEHOLDER_RE, (_, label, type, uuid) => { + const safeLabel = String(label).replace(/[⟦⟧:]/g, ' ').trim(); + const key = `PH${i++}`; + tokens.push({ key, uuid, type, sourceLabel: label }); + return `⟦${key}:${safeLabel}⟧`; + }); + return { tokenized, tokens }; +} + +// Rückbau: aus Claude-Antwort wieder {{label.type:uuid}} machen. +// Erwartet `labels: { PH0: 'apple', ... }` aus dem JSON-Response. +function detokenize(translated, tokens, labelsFromClaude) { + let out = translated; + const seen = new Set(); + for (const t of tokens) { + const label = (labelsFromClaude && labelsFromClaude[t.key]) || t.sourceLabel; + // Token-Form im Text kann ⟦PH0:irgendwas⟧ sein — wir matchen über die Key + const re = new RegExp(`⟦${t.key}:[^⟧]*⟧`, 'g'); + let replaced = false; + out = out.replace(re, () => { replaced = true; seen.add(t.key); return `{{${label}.${t.type}:${t.uuid}}}`; }); + if (!replaced) { + // Notfall: Token nicht zurückgekommen → an Ende hängen, damit nichts verloren geht + out += ` {{${label}.${t.type}:${t.uuid}}}`; + seen.add(t.key); + } + } + return { text: out, missingTokens: tokens.filter(t => !seen.has(t.key)).map(t => t.key) }; +} + +async function callClaude({ system, user, maxTokens = 2000 }) { + const apiKey = process.env.ANTHROPIC_API_KEY; + if (!apiKey) { const e = new Error('ANTHROPIC_API_KEY nicht konfiguriert'); e.status = 500; throw e; } + const res = await fetch(ANTHROPIC_API_URL, { + method: 'POST', + headers: { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' }, + body: JSON.stringify({ + model: ANTHROPIC_MODEL, max_tokens: maxTokens, system, + messages: [{ role: 'user', content: user }], + }), + }); + if (!res.ok) { const err = await res.json().catch(() => ({})); const e = new Error(err.error?.message || `Claude API ${res.status}`); e.status = res.status; throw e; } + const data = await res.json(); + let raw = data.content[0].text.trim(); + const md = raw.match(/```(?:json)?\s*([\s\S]+?)\s*```/); + if (md) raw = md[1]; + return JSON.parse(raw); +} + +// Übersetzt einen Text inkl. Placeholder-Schutz. +async function translateText({ text, from, to }) { + if (!text || !text.trim()) return ''; + const { tokenized, tokens } = tokenize(text); + const system = 'Du bist ein professioneller Übersetzer. Antworte AUSSCHLIESSLICH mit gültigem JSON, ohne Markdown, ohne Erklärungen.'; + const user = `Übersetze diesen Text von ${LANG_LABEL[from] || from} nach ${LANG_LABEL[to] || to}.\n\n` + + `WICHTIG: Tokens der Form ⟦PHn:wort⟧ sind Platzhalter. Übersetze NUR das Wort innerhalb des Tokens, ` + + `behalte das Token-Format exakt bei (⟦PHn:übersetztesWort⟧). Passe die Beugung des Wortes an den umgebenden Satz an ` + + `(Mehrzahl/Kasus). Die Token-Reihenfolge im Satz darfst du frei wählen wie es natürlich klingt.\n\n` + + `Quelltext:\n${tokenized}\n\n` + + `Antwort-Format:\n{"translated":"...","labels":{${tokens.map(t => `"${t.key}":"<übersetztes Wort>"`).join(',')}}}`; + + const data = await callClaude({ system, user }); + if (typeof data.translated !== 'string') throw new Error('Ungültiges JSON: translated fehlt'); + const { text: detok } = detokenize(data.translated, tokens, data.labels || {}); + return detok; +} + +// ── Auto-Status für Wörter (Spiegel zum Trigger in words.js) ────────────────── +async function maybeAutoTranslated(wordId) { + const r = await query(`SELECT titel_de, titel_en, titel_sv, status FROM words WHERE id = $1`, [wordId]); + const w = r.rows[0]; + if (!w) return; + if (w.titel_de && w.titel_en && w.titel_sv && w.status === 'requested') + await query(`UPDATE words SET status='translated' WHERE id=$1`, [wordId]); +} + +// Füllt für eine Zeile alle leeren Zielsprachen der angegebenen Felder auf. +// Quellsprache wird pro Feld automatisch gewählt (erste gefüllte ≠ Ziel), +// sofern `from` nicht explizit übergeben wird. +// Gibt { translatedFields: [...] } zurück; leere Hüllen-Zeilen ⇒ []. +async function fillMissingRow(table, id, fields, { from } = {}) { + const cfg = TRANSLATE_CONFIG[table]; + if (!cfg) throw new Error(`Unbekannte Tabelle: ${table}`); + const useFields = fields && fields.length ? fields : cfg.fields; + const cols = useFields.flatMap(f => LANGS.map(l => `${f}_${l}`)); + const r = await query(`SELECT ${cols.join(', ')} FROM ${table} WHERE id = $1`, [id]); + if (!r.rows.length) return { translatedFields: [] }; + const row = r.rows[0]; + + const updates = {}; + for (const field of useFields) { + // Quellsprache für dieses Feld bestimmen + let src = from; + if (!src) { + for (const l of LANGS) if ((row[`${field}_${l}`] || '').trim()) { src = l; break; } + } + if (!src) continue; // kein Quelltext in irgendeiner Sprache → nichts zu tun + const srcText = (row[`${field}_${src}`] || '').trim(); + if (!srcText) continue; + for (const to of LANGS) { + if (to === src) continue; + if ((row[`${field}_${to}`] || '').trim()) continue; // Ziel schon gefüllt + updates[`${field}_${to}`] = await translateText({ text: srcText, from: src, to }); + } + } + + const cells = Object.keys(updates); + if (!cells.length) return { translatedFields: [] }; + const setClauses = cells.map((c, i) => `${c} = $${i + 1}`).join(', '); + await query(`UPDATE ${table} SET ${setClauses} WHERE id = $${cells.length + 1}`, + [...cells.map(c => updates[c]), id]); + if (table === 'words') await maybeAutoTranslated(id); + return { translatedFields: cells }; +} + +module.exports = { + LANGS, LANG_LABEL, TRANSLATE_CONFIG, + tokenize, detokenize, callClaude, + translateText, maybeAutoTranslated, fillMissingRow, +}; diff --git a/src/routes/claude.js b/src/routes/claude.js index e93b2ff..24a5617 100644 --- a/src/routes/claude.js +++ b/src/routes/claude.js @@ -1,95 +1,11 @@ const router = require('express').Router(); const { query } = require('../db'); +const { + LANGS, TRANSLATE_CONFIG, translateText, maybeAutoTranslated, +} = require('../lib/translate'); const ANTHROPIC_API_URL = 'https://api.anthropic.com/v1/messages'; const ANTHROPIC_MODEL = 'claude-haiku-4-5-20251001'; -const LANGS = ['de', 'en', 'sv']; - -const LANG_LABEL = { de: 'Deutsch', en: 'English', sv: 'Svenska' }; - -// ── Placeholder-Schutz ──────────────────────────────────────────────────────── -// Format im Quelltext: {{label.w:uuid}} oder {{label.o:uuid}} -const PLACEHOLDER_RE = /\{\{([^.{}]+)\.(w|o):([0-9a-f-]{36})\}\}/g; - -// Sätze für Claude vorbereiten: jedes Placeholder durch ⟦PHn:label⟧-Token ersetzen. -// Token-Format ist absichtlich exotisch, damit Claude es nicht versehentlich ändert. -function tokenize(text) { - const tokens = []; - let i = 0; - const tokenized = text.replace(PLACEHOLDER_RE, (_, label, type, uuid) => { - const safeLabel = String(label).replace(/[⟦⟧:]/g, ' ').trim(); - const key = `PH${i++}`; - tokens.push({ key, uuid, type, sourceLabel: label }); - return `⟦${key}:${safeLabel}⟧`; - }); - return { tokenized, tokens }; -} - -// Rückbau: aus Claude-Antwort wieder {{label.type:uuid}} machen. -// Erwartet `labels: { PH0: 'apple', ... }` aus dem JSON-Response. -function detokenize(translated, tokens, labelsFromClaude) { - let out = translated; - const seen = new Set(); - for (const t of tokens) { - const label = (labelsFromClaude && labelsFromClaude[t.key]) || t.sourceLabel; - // Token-Form im Text kann ⟦PH0:irgendwas⟧ sein — wir matchen über die Key - const re = new RegExp(`⟦${t.key}:[^⟧]*⟧`, 'g'); - let replaced = false; - out = out.replace(re, () => { replaced = true; seen.add(t.key); return `{{${label}.${t.type}:${t.uuid}}}`; }); - if (!replaced) { - // Notfall: Token nicht zurückgekommen → an Ende hängen, damit nichts verloren geht - out += ` {{${label}.${t.type}:${t.uuid}}}`; - seen.add(t.key); - } - } - return { text: out, missingTokens: tokens.filter(t => !seen.has(t.key)).map(t => t.key) }; -} - -async function callClaude({ system, user, maxTokens = 2000 }) { - const apiKey = process.env.ANTHROPIC_API_KEY; - if (!apiKey) { const e = new Error('ANTHROPIC_API_KEY nicht konfiguriert'); e.status = 500; throw e; } - const res = await fetch(ANTHROPIC_API_URL, { - method: 'POST', - headers: { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' }, - body: JSON.stringify({ - model: ANTHROPIC_MODEL, max_tokens: maxTokens, system, - messages: [{ role: 'user', content: user }], - }), - }); - if (!res.ok) { const err = await res.json().catch(() => ({})); const e = new Error(err.error?.message || `Claude API ${res.status}`); e.status = res.status; throw e; } - const data = await res.json(); - let raw = data.content[0].text.trim(); - const md = raw.match(/```(?:json)?\s*([\s\S]+?)\s*```/); - if (md) raw = md[1]; - return JSON.parse(raw); -} - -// Übersetzt einen Text inkl. Placeholder-Schutz. -async function translateText({ text, from, to }) { - if (!text || !text.trim()) return ''; - const { tokenized, tokens } = tokenize(text); - const system = 'Du bist ein professioneller Übersetzer. Antworte AUSSCHLIESSLICH mit gültigem JSON, ohne Markdown, ohne Erklärungen.'; - const user = `Übersetze diesen Text von ${LANG_LABEL[from] || from} nach ${LANG_LABEL[to] || to}.\n\n` + - `WICHTIG: Tokens der Form ⟦PHn:wort⟧ sind Platzhalter. Übersetze NUR das Wort innerhalb des Tokens, ` + - `behalte das Token-Format exakt bei (⟦PHn:übersetztesWort⟧). Passe die Beugung des Wortes an den umgebenden Satz an ` + - `(Mehrzahl/Kasus). Die Token-Reihenfolge im Satz darfst du frei wählen wie es natürlich klingt.\n\n` + - `Quelltext:\n${tokenized}\n\n` + - `Antwort-Format:\n{"translated":"...","labels":{${tokens.map(t => `"${t.key}":"<übersetztes Wort>"`).join(',')}}}`; - - const data = await callClaude({ system, user }); - if (typeof data.translated !== 'string') throw new Error('Ungültiges JSON: translated fehlt'); - const { text: detok } = detokenize(data.translated, tokens, data.labels || {}); - return detok; -} - -// ── Auto-Status für Wörter (Spiegel zum Trigger in words.js) ────────────────── -async function maybeAutoTranslated(wordId) { - const r = await query(`SELECT titel_de, titel_en, titel_sv, status FROM words WHERE id = $1`, [wordId]); - const w = r.rows[0]; - if (!w) return; - if (w.titel_de && w.titel_en && w.titel_sv && w.status === 'requested') - await query(`UPDATE words SET status='translated' WHERE id=$1`, [wordId]); -} // POST /api/claude/generate-pairs @@ -222,13 +138,6 @@ router.post('/generate-words', async (req, res, next) => { } }); -// ── Übersetzungs-Konfiguration pro Tabelle ──────────────────────────────────── -const TRANSLATE_CONFIG = { - words: { fields: ['titel'] }, - questions: { fields: ['sentence'] }, - statements: { fields: ['positive_sentence', 'negative_sentence'] }, -}; - // POST /api/claude/translate-text — generischer Übersetzungs-Primitive router.post('/translate-text', async (req, res, next) => { try { diff --git a/src/routes/pairs.js b/src/routes/pairs.js index 2e9ef4d..24cf656 100644 --- a/src/routes/pairs.js +++ b/src/routes/pairs.js @@ -1,5 +1,6 @@ const router = require('express').Router(); const { query } = require('../db'); +const { fillMissingRow } = require('../lib/translate'); const STATUSES = ['draft', 'reviewed', 'blocked', 'published']; const ANSWER_TYPES = new Set(['yes_no', 'text', 'question', 'word']); @@ -214,18 +215,111 @@ router.patch('/:id', async (req, res, next) => { } catch (err) { next(err); } }); +// Lädt das vollständige 3-sprachige Inhalts-Bündel eines Pairs (Frage, Statements, Wörter) +// — für das Review-Modal im Frontend. +async function loadPairContent(p) { + const langCols = (prefix) => `${prefix}_de, ${prefix}_en, ${prefix}_sv`; + const content = { answer_type: p.answer_type, question: null, positive: null, negative: null }; + + if (p.question_id) { + content.question = (await query( + `SELECT ${langCols('sentence')} FROM questions WHERE id = $1`, [p.question_id])).rows[0] || null; + } + + async function loadStatement(stmtId, sentencePrefix, linkTable) { + if (!stmtId) return null; + const s = (await query( + `SELECT ${langCols(sentencePrefix)} FROM statements WHERE id = $1`, [stmtId])).rows[0] || {}; + const words = (await query( + `SELECT w.id, w.titel_de, w.titel_en, w.titel_sv + FROM ${linkTable} lw JOIN words w ON w.id = lw.word_id + WHERE lw.statement_id = $1 + ORDER BY w.titel_de`, [stmtId])).rows; + return { sentence: s, words }; + } + + content.positive = await loadStatement(p.positive_statement_id, 'positive_sentence', 'statement_positive_words'); + content.negative = await loadStatement(p.negative_statement_id, 'negative_sentence', 'statement_negative_words'); + return content; +} + +// POST /api/pairs/:id/translate — übersetzt alle noch fehlenden Sätze/Wörter dieses Pairs +// in die fehlenden Sprachen (de/en/sv). Liefert das aktualisierte Inhalts-Bündel fürs Modal. +router.post('/:id/translate', async (req, res, next) => { + try { + const pr = await query( + `SELECT id, answer_type, question_id, positive_statement_id, negative_statement_id + FROM pairs WHERE id = $1`, [req.params.id]); + if (!pr.rows.length) return res.status(404).json({ error: 'Not found' }); + const p = pr.rows[0]; + + const result = { translated: 0, failed: 0, errors: [] }; + const run = async (label, fn) => { + try { + const { translatedFields } = await fn(); + result.translated += translatedFields.length; + } catch (err) { + result.failed++; + result.errors.push({ item: label, error: err.message }); + } + }; + + // Frage (yes_no / question / word) + if (p.question_id) + await run('Frage', () => fillMissingRow('questions', p.question_id, ['sentence'])); + + // Positiv-Seite + if (p.answer_type === 'word' && p.positive_statement_id) { + const ids = (await query( + `SELECT word_id FROM statement_positive_words WHERE statement_id = $1`, [p.positive_statement_id])).rows; + for (const { word_id } of ids) + await run(`Positiv-Wort ${word_id}`, () => fillMissingRow('words', word_id, ['titel'])); + } else if ((p.answer_type === 'text' || p.answer_type === 'question') && p.positive_statement_id) { + await run('Positiv-Satz', () => fillMissingRow('statements', p.positive_statement_id, ['positive_sentence'])); + } + + // Negativ-Seite + if (p.answer_type === 'word' && p.negative_statement_id) { + const ids = (await query( + `SELECT word_id FROM statement_negative_words WHERE statement_id = $1`, [p.negative_statement_id])).rows; + for (const { word_id } of ids) + await run(`Negativ-Wort ${word_id}`, () => fillMissingRow('words', word_id, ['titel'])); + } else if (p.answer_type === 'question' && p.negative_statement_id) { + // fillMissingRow überspringt leere Felder ohne Quelle automatisch + await run('Negativ-Satz', () => fillMissingRow('statements', p.negative_statement_id, ['negative_sentence'])); + } + + result.content = await loadPairContent(p); + res.json(result); + } catch (err) { next(err); } +}); + // POST /api/pairs/:id/review — setzt Pair + verlinkte Frage + Statements auf 'reviewed' // Voraussetzung: alle 3 Sprachen sind in den verwendeten Feldern gefüllt. router.post('/:id/review', async (req, res, next) => { try { const pr = await query( - `SELECT id, question_id, positive_statement_id, negative_statement_id, status + `SELECT id, answer_type, question_id, positive_statement_id, negative_statement_id, status FROM pairs WHERE id = $1`, [req.params.id]); if (!pr.rows.length) return res.status(404).json({ error: 'Not found' }); const p = pr.rows[0]; - // Vollständigkeit der 3 Sprachen prüfen + // Prüft, ob alle einem Statement zugeordneten Wörter in allen 3 Sprachen übersetzt sind. + async function missingWordTranslations(statementId, linkTable, label) { + const r = await query( + `SELECT w.titel_de, w.titel_en, w.titel_sv + FROM ${linkTable} lw JOIN words w ON w.id = lw.word_id + WHERE lw.statement_id = $1`, [statementId]); + const out = []; + for (const w of r.rows) + for (const l of LANGS) + if (!(w[`titel_${l}`] || '').trim()) out.push(`${label} ${l}`); + return [...new Set(out)]; + } + + // Vollständigkeit der 3 Sprachen prüfen — je nach answer_type werden andere Felder genutzt. const missing = []; + // Frage: bei yes_no / question / word vorhanden if (p.question_id) { const q = (await query( `SELECT sentence_de, sentence_en, sentence_sv FROM questions WHERE id = $1`, [p.question_id])).rows[0]; @@ -233,19 +327,29 @@ router.post('/:id/review', async (req, res, next) => { for (const l of LANGS) if (!(q[`sentence_${l}`] || '').trim()) missing.push(`Frage ${l}`); } if (p.positive_statement_id) { - const s = (await query( - `SELECT positive_sentence_de, positive_sentence_en, positive_sentence_sv FROM statements WHERE id = $1`, - [p.positive_statement_id])).rows[0]; - if (!s) return res.status(409).json({ error: 'Positiv-Statement fehlt' }); - for (const l of LANGS) if (!(s[`positive_sentence_${l}`] || '').trim()) missing.push(`Positiv ${l}`); + if (p.answer_type === 'word') { + // word-Typ: Inhalt steckt in verlinkten Wörtern, nicht im Satz + missing.push(...await missingWordTranslations(p.positive_statement_id, 'statement_positive_words', 'Positiv-Wort')); + } else if (p.answer_type === 'text' || p.answer_type === 'question') { + const s = (await query( + `SELECT positive_sentence_de, positive_sentence_en, positive_sentence_sv FROM statements WHERE id = $1`, + [p.positive_statement_id])).rows[0]; + if (!s) return res.status(409).json({ error: 'Positiv-Statement fehlt' }); + for (const l of LANGS) if (!(s[`positive_sentence_${l}`] || '').trim()) missing.push(`Positiv ${l}`); + } + // yes_no: Positiv-Statement trägt nur die Ja/Nein-Antwort, kein Satz → nichts zu prüfen } if (p.negative_statement_id) { - const s = (await query( - `SELECT negative_sentence_de, negative_sentence_en, negative_sentence_sv FROM statements WHERE id = $1`, - [p.negative_statement_id])).rows[0]; - // Negativ nur prüfen wenn überhaupt in mindestens einer Sprache befüllt - const hasAny = s && LANGS.some(l => (s[`negative_sentence_${l}`] || '').trim()); - if (hasAny) for (const l of LANGS) if (!(s[`negative_sentence_${l}`] || '').trim()) missing.push(`Negativ ${l}`); + if (p.answer_type === 'word') { + missing.push(...await missingWordTranslations(p.negative_statement_id, 'statement_negative_words', 'Negativ-Wort')); + } else if (p.answer_type === 'question') { + const s = (await query( + `SELECT negative_sentence_de, negative_sentence_en, negative_sentence_sv FROM statements WHERE id = $1`, + [p.negative_statement_id])).rows[0]; + // Negativ nur prüfen wenn überhaupt in mindestens einer Sprache befüllt + const hasAny = s && LANGS.some(l => (s[`negative_sentence_${l}`] || '').trim()); + if (hasAny) for (const l of LANGS) if (!(s[`negative_sentence_${l}`] || '').trim()) missing.push(`Negativ ${l}`); + } } if (missing.length) @@ -257,6 +361,20 @@ router.post('/:id/review', async (req, res, next) => { const stmtIds = [p.positive_statement_id, p.negative_statement_id].filter(Boolean); if (stmtIds.length) await query(`UPDATE statements SET status='reviewed' WHERE id = ANY($1) AND status='draft'`, [stmtIds]); + + // Verlinkte Objekte + deren Bilder ebenfalls auf 'reviewed' heben (idempotent). + await query( + `UPDATE objects SET status='reviewed' + WHERE id IN (SELECT object_id FROM object_pairs WHERE pair_id = $1) + AND status = 'draft'`, [p.id]); + await query( + `UPDATE pictures SET status='reviewed' + WHERE id IN ( + SELECT op2.picture_id FROM object_pictures op2 + WHERE op2.object_id IN (SELECT object_id FROM object_pairs WHERE pair_id = $1) + ) + AND status = 'uploaded'`, [p.id]); + const upd = await query( `UPDATE pairs SET status='reviewed' WHERE id=$1 RETURNING *`, [p.id]); res.json(upd.rows[0]);