feat: bessere Übersetzungsqualität (Sonnet + Wörter mit Kontext)
- Übersetzungs-Modell auf Sonnet (env TRANSLATE_MODEL, Default claude-sonnet-4-5).
- Neue translateWords(): übersetzt die Wörter eines word-Pairs gemeinsam in
einem Call, mit der Frage als Kontext → korrekte Bedeutung mehrdeutiger
Wörter (z.B. 'Ranke' → 'ranka' statt 'klänge'), konsistente Gruppe.
- POST /pairs/:id/translate nutzt translateWordGroup für word-Typ und nimmt
{ overwrite:true } entgegen, um falsche bestehende Zielsprachen neu zu
übersetzen (Quellsprache bleibt unangetastet); fillMissingRow erhält
overwrite-Flag.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -3,7 +3,9 @@
|
|||||||
const { query } = require('../db');
|
const { query } = require('../db');
|
||||||
|
|
||||||
const ANTHROPIC_API_URL = 'https://api.anthropic.com/v1/messages';
|
const ANTHROPIC_API_URL = 'https://api.anthropic.com/v1/messages';
|
||||||
const ANTHROPIC_MODEL = 'claude-haiku-4-5-20251001';
|
// Übersetzungen laufen auf Sonnet (bessere Qualität, v.a. Schwedisch/Mehrdeutigkeit).
|
||||||
|
// Per Env überschreibbar, falls ein anderer Modell-Snapshot gewünscht ist.
|
||||||
|
const TRANSLATE_MODEL = process.env.TRANSLATE_MODEL || 'claude-sonnet-4-5';
|
||||||
const LANGS = ['de', 'en', 'sv'];
|
const LANGS = ['de', 'en', 'sv'];
|
||||||
const LANG_LABEL = { de: 'Deutsch', en: 'English', sv: 'Svenska' };
|
const LANG_LABEL = { de: 'Deutsch', en: 'English', sv: 'Svenska' };
|
||||||
|
|
||||||
@@ -59,7 +61,7 @@ async function callClaude({ system, user, maxTokens = 2000 }) {
|
|||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' },
|
headers: { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: ANTHROPIC_MODEL, max_tokens: maxTokens, system,
|
model: TRANSLATE_MODEL, max_tokens: maxTokens, system,
|
||||||
messages: [{ role: 'user', content: user }],
|
messages: [{ role: 'user', content: user }],
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
@@ -102,7 +104,7 @@ async function maybeAutoTranslated(wordId) {
|
|||||||
// Quellsprache wird pro Feld automatisch gewählt (erste gefüllte ≠ Ziel),
|
// Quellsprache wird pro Feld automatisch gewählt (erste gefüllte ≠ Ziel),
|
||||||
// sofern `from` nicht explizit übergeben wird.
|
// sofern `from` nicht explizit übergeben wird.
|
||||||
// Gibt { translatedFields: [...] } zurück; leere Hüllen-Zeilen ⇒ [].
|
// Gibt { translatedFields: [...] } zurück; leere Hüllen-Zeilen ⇒ [].
|
||||||
async function fillMissingRow(table, id, fields, { from } = {}) {
|
async function fillMissingRow(table, id, fields, { from, overwrite = false } = {}) {
|
||||||
const cfg = TRANSLATE_CONFIG[table];
|
const cfg = TRANSLATE_CONFIG[table];
|
||||||
if (!cfg) throw new Error(`Unbekannte Tabelle: ${table}`);
|
if (!cfg) throw new Error(`Unbekannte Tabelle: ${table}`);
|
||||||
const useFields = fields && fields.length ? fields : cfg.fields;
|
const useFields = fields && fields.length ? fields : cfg.fields;
|
||||||
@@ -123,7 +125,7 @@ async function fillMissingRow(table, id, fields, { from } = {}) {
|
|||||||
if (!srcText) continue;
|
if (!srcText) continue;
|
||||||
for (const to of LANGS) {
|
for (const to of LANGS) {
|
||||||
if (to === src) continue;
|
if (to === src) continue;
|
||||||
if ((row[`${field}_${to}`] || '').trim()) continue; // Ziel schon gefüllt
|
if (!overwrite && (row[`${field}_${to}`] || '').trim()) continue; // Ziel schon gefüllt
|
||||||
updates[`${field}_${to}`] = await translateText({ text: srcText, from: src, to });
|
updates[`${field}_${to}`] = await translateText({ text: srcText, from: src, to });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -137,8 +139,50 @@ async function fillMissingRow(table, id, fields, { from } = {}) {
|
|||||||
return { translatedFields: cells };
|
return { translatedFields: cells };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Übersetzt mehrere Wörter in EINEM Claude-Call, optional mit Kontext (z.B. der Frage).
|
||||||
|
// Gemeinsame Übersetzung sorgt für Konsistenz und richtige Bedeutung mehrdeutiger Wörter.
|
||||||
|
// `words`: [{ id, text }]. Rückgabe: Map id → übersetztes Wort (nur erfolgreich übersetzte).
|
||||||
|
async function translateWords({ words, from, to, context }) {
|
||||||
|
const items = (words || []).filter(w => (w.text || '').trim());
|
||||||
|
if (!items.length) return {};
|
||||||
|
|
||||||
|
const ctxLine = (context || '').trim()
|
||||||
|
? `Kontext: Diese Wörter sind Antwortoptionen auf die Frage „${context.trim()}". Übersetze sie fachlich korrekt und passend zu diesem Kontext.\n\n`
|
||||||
|
: '';
|
||||||
|
const system = 'Du bist ein professioneller Übersetzer. Antworte AUSSCHLIESSLICH mit gültigem JSON, ohne Markdown, ohne Erklärungen.';
|
||||||
|
const user =
|
||||||
|
`Übersetze die folgenden Wörter von ${LANG_LABEL[from] || from} nach ${LANG_LABEL[to] || to}.\n\n` +
|
||||||
|
ctxLine +
|
||||||
|
`Wähle bei mehrdeutigen Wörtern die im Kontext fachlich korrekte Bedeutung.\n\n` +
|
||||||
|
`Wörter (JSON-Array):\n${JSON.stringify(items.map(w => w.text))}\n\n` +
|
||||||
|
`Antwort-Format: ein JSON-Array mit den Übersetzungen in EXAKT gleicher Reihenfolge und Länge:\n` +
|
||||||
|
`{"translations":[${items.map(() => '"…"').join(',')}]}`;
|
||||||
|
|
||||||
|
let arr;
|
||||||
|
try {
|
||||||
|
const data = await callClaude({ system, user });
|
||||||
|
arr = Array.isArray(data.translations) ? data.translations : null;
|
||||||
|
} catch { arr = null; }
|
||||||
|
|
||||||
|
// Fallback: kein/ungültiges Array oder Längen-Mismatch → Wort für Wort einzeln.
|
||||||
|
if (!arr || arr.length !== items.length) {
|
||||||
|
const out = {};
|
||||||
|
for (const w of items) {
|
||||||
|
try { out[w.id] = await translateText({ text: w.text, from, to }); } catch { /* skip */ }
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
const out = {};
|
||||||
|
items.forEach((w, i) => {
|
||||||
|
const t = (arr[i] || '').toString().trim();
|
||||||
|
if (t) out[w.id] = t;
|
||||||
|
});
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
LANGS, LANG_LABEL, TRANSLATE_CONFIG,
|
LANGS, LANG_LABEL, TRANSLATE_CONFIG,
|
||||||
tokenize, detokenize, callClaude,
|
tokenize, detokenize, callClaude,
|
||||||
translateText, maybeAutoTranslated, fillMissingRow,
|
translateText, translateWords, maybeAutoTranslated, fillMissingRow,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
const router = require('express').Router();
|
const router = require('express').Router();
|
||||||
const { query } = require('../db');
|
const { query } = require('../db');
|
||||||
const { fillMissingRow } = require('../lib/translate');
|
const { fillMissingRow, translateWords, maybeAutoTranslated } = require('../lib/translate');
|
||||||
|
|
||||||
const STATUSES = ['draft', 'reviewed', 'blocked', 'published'];
|
const STATUSES = ['draft', 'reviewed', 'blocked', 'published'];
|
||||||
const ANSWER_TYPES = new Set(['yes_no', 'text', 'question', 'word']);
|
const ANSWER_TYPES = new Set(['yes_no', 'text', 'question', 'word']);
|
||||||
@@ -243,10 +243,50 @@ async function loadPairContent(p) {
|
|||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
// POST /api/pairs/:id/translate — übersetzt alle noch fehlenden Sätze/Wörter dieses Pairs
|
// Übersetzt die einem Statement zugeordneten Wörter gemeinsam (ein Claude-Call je Zielsprache),
|
||||||
// in die fehlenden Sprachen (de/en/sv). Liefert das aktualisierte Inhalts-Bündel fürs Modal.
|
// mit der Frage als Kontext zur Disambiguierung. `questionRow` = { sentence_de/en/sv } | null.
|
||||||
|
// Gibt die Anzahl tatsächlich aktualisierter Wort-Felder zurück.
|
||||||
|
async function translateWordGroup(statementId, linkTable, questionRow, overwrite) {
|
||||||
|
const rows = (await query(
|
||||||
|
`SELECT w.id, w.titel_de, w.titel_en, w.titel_sv
|
||||||
|
FROM ${linkTable} lw JOIN words w ON w.id = lw.word_id
|
||||||
|
WHERE lw.statement_id = $1`, [statementId])).rows;
|
||||||
|
if (!rows.length) return 0;
|
||||||
|
|
||||||
|
// Quellsprache: erste Sprache, in der mind. ein Wort Text hat
|
||||||
|
let src = null;
|
||||||
|
for (const l of LANGS) if (rows.some(w => (w[`titel_${l}`] || '').trim())) { src = l; break; }
|
||||||
|
if (!src) return 0;
|
||||||
|
|
||||||
|
const context = questionRow
|
||||||
|
? (questionRow[`sentence_${src}`] || questionRow.sentence_de || questionRow.sentence_en || questionRow.sentence_sv || '')
|
||||||
|
: '';
|
||||||
|
|
||||||
|
let count = 0;
|
||||||
|
for (const to of LANGS) {
|
||||||
|
if (to === src) continue;
|
||||||
|
const need = rows
|
||||||
|
.filter(w => (w[`titel_${src}`] || '').trim() && (overwrite || !(w[`titel_${to}`] || '').trim()))
|
||||||
|
.map(w => ({ id: w.id, text: (w[`titel_${src}`] || '').trim() }));
|
||||||
|
if (!need.length) continue;
|
||||||
|
const map = await translateWords({ words: need, from: src, to, context });
|
||||||
|
for (const w of need) {
|
||||||
|
const t = map[w.id];
|
||||||
|
if (!t) continue;
|
||||||
|
await query(`UPDATE words SET titel_${to} = $1 WHERE id = $2`, [t, w.id]);
|
||||||
|
await maybeAutoTranslated(w.id);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// POST /api/pairs/:id/translate — übersetzt fehlende Sätze/Wörter dieses Pairs in die
|
||||||
|
// fehlenden Sprachen (de/en/sv). Body `{ overwrite: true }` übersetzt auch bereits gefüllte
|
||||||
|
// Zielsprachen neu (Quellsprache bleibt unangetastet). Liefert das aktualisierte Inhalts-Bündel.
|
||||||
router.post('/:id/translate', async (req, res, next) => {
|
router.post('/:id/translate', async (req, res, next) => {
|
||||||
try {
|
try {
|
||||||
|
const overwrite = req.body?.overwrite === true;
|
||||||
const pr = await query(
|
const pr = await query(
|
||||||
`SELECT id, answer_type, question_id, positive_statement_id, negative_statement_id
|
`SELECT id, answer_type, question_id, positive_statement_id, negative_statement_id
|
||||||
FROM pairs WHERE id = $1`, [req.params.id]);
|
FROM pairs WHERE id = $1`, [req.params.id]);
|
||||||
@@ -255,38 +295,31 @@ router.post('/:id/translate', async (req, res, next) => {
|
|||||||
|
|
||||||
const result = { translated: 0, failed: 0, errors: [] };
|
const result = { translated: 0, failed: 0, errors: [] };
|
||||||
const run = async (label, fn) => {
|
const run = async (label, fn) => {
|
||||||
try {
|
try { result.translated += await fn(); }
|
||||||
const { translatedFields } = await fn();
|
catch (err) { result.failed++; result.errors.push({ item: label, error: err.message }); }
|
||||||
result.translated += translatedFields.length;
|
|
||||||
} catch (err) {
|
|
||||||
result.failed++;
|
|
||||||
result.errors.push({ item: label, error: err.message });
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
const fields = (fn) => async () => (await fn()).translatedFields.length;
|
||||||
|
|
||||||
// Frage (yes_no / question / word)
|
// Frage (yes_no / question / word)
|
||||||
if (p.question_id)
|
let questionRow = null;
|
||||||
await run('Frage', () => fillMissingRow('questions', p.question_id, ['sentence']));
|
if (p.question_id) {
|
||||||
|
questionRow = (await query(
|
||||||
|
`SELECT sentence_de, sentence_en, sentence_sv FROM questions WHERE id = $1`, [p.question_id])).rows[0] || null;
|
||||||
|
await run('Frage', fields(() => fillMissingRow('questions', p.question_id, ['sentence'], { overwrite })));
|
||||||
|
}
|
||||||
|
|
||||||
// Positiv-Seite
|
// Positiv-Seite
|
||||||
if (p.answer_type === 'word' && p.positive_statement_id) {
|
if (p.answer_type === 'word' && p.positive_statement_id) {
|
||||||
const ids = (await query(
|
await run('Positiv-Wörter', () => translateWordGroup(p.positive_statement_id, 'statement_positive_words', questionRow, overwrite));
|
||||||
`SELECT word_id FROM statement_positive_words WHERE statement_id = $1`, [p.positive_statement_id])).rows;
|
|
||||||
for (const { word_id } of ids)
|
|
||||||
await run(`Positiv-Wort ${word_id}`, () => fillMissingRow('words', word_id, ['titel']));
|
|
||||||
} else if ((p.answer_type === 'text' || p.answer_type === 'question') && p.positive_statement_id) {
|
} else if ((p.answer_type === 'text' || p.answer_type === 'question') && p.positive_statement_id) {
|
||||||
await run('Positiv-Satz', () => fillMissingRow('statements', p.positive_statement_id, ['positive_sentence']));
|
await run('Positiv-Satz', fields(() => fillMissingRow('statements', p.positive_statement_id, ['positive_sentence'], { overwrite })));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Negativ-Seite
|
// Negativ-Seite
|
||||||
if (p.answer_type === 'word' && p.negative_statement_id) {
|
if (p.answer_type === 'word' && p.negative_statement_id) {
|
||||||
const ids = (await query(
|
await run('Negativ-Wörter', () => translateWordGroup(p.negative_statement_id, 'statement_negative_words', questionRow, overwrite));
|
||||||
`SELECT word_id FROM statement_negative_words WHERE statement_id = $1`, [p.negative_statement_id])).rows;
|
|
||||||
for (const { word_id } of ids)
|
|
||||||
await run(`Negativ-Wort ${word_id}`, () => fillMissingRow('words', word_id, ['titel']));
|
|
||||||
} else if (p.answer_type === 'question' && p.negative_statement_id) {
|
} else if (p.answer_type === 'question' && p.negative_statement_id) {
|
||||||
// fillMissingRow überspringt leere Felder ohne Quelle automatisch
|
await run('Negativ-Satz', fields(() => fillMissingRow('statements', p.negative_statement_id, ['negative_sentence'], { overwrite })));
|
||||||
await run('Negativ-Satz', () => fillMissingRow('statements', p.negative_statement_id, ['negative_sentence']));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
result.content = await loadPairContent(p);
|
result.content = await loadPairContent(p);
|
||||||
|
|||||||
Reference in New Issue
Block a user