feat: Placeholder in der Auto-Generierung + Token-Leak-Fix
- Pair-Generierung markiert Nomen per [surface|lemma]-Markup und löst sie zu
{{label.o:objectId}} / {{label.w:wordId}} auf (Words werden auto-erstellt)
- Pipeline übersetzt + vertont Placeholder-Wörter aus den Sätzen mit
- translateText halluziniert keine ⟦PHn⟧-Tokens mehr (kein Token-Prompt ohne
Tokens, defensives Strippen); TTS/Review lösen geleakte Tokens auf
- POST /api/pipeline/repair-tokens repariert bestehende Sätze + Audios
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
// d.h. ein Resume nach Crash/Redeploy überspringt bereits Erledigtes.
|
||||
const { query } = require('../db');
|
||||
const { LANGS, fillMissingRow } = require('./translate');
|
||||
const { PLACEHOLDER_RE } = require('./placeholders');
|
||||
const { translateWordGroup } = require('./pairContent');
|
||||
const { generatePairsForObject, persistPair } = require('./generatePairs');
|
||||
const { reviewPicturePairs } = require('./reviewPairs');
|
||||
@@ -86,6 +87,31 @@ async function loadPairs(pictureId) {
|
||||
ORDER BY p.id`, [pictureId])).rows;
|
||||
}
|
||||
|
||||
// Word-IDs aller {{label.w:uuid}}-Placeholder in den Sätzen der Pairs.
|
||||
// Diese Wörter entstehen bei der Generierung (Nomen im Satz) und hängen nicht an
|
||||
// statement_words/object_words — für Übersetzung + Audio müssen sie mitgenommen werden.
|
||||
async function collectPlaceholderWordIds(pairs) {
|
||||
const ids = new Set();
|
||||
const scan = text => {
|
||||
for (const m of String(text || '').matchAll(PLACEHOLDER_RE)) if (m[2] === 'w') ids.add(m[3]);
|
||||
};
|
||||
const questionIds = [...new Set(pairs.map(p => p.question_id).filter(Boolean))];
|
||||
const stmtIds = [...new Set(pairs.flatMap(p => [p.positive_statement_id, p.negative_statement_id]).filter(Boolean))];
|
||||
if (questionIds.length) {
|
||||
const r = await query(
|
||||
`SELECT sentence_de, sentence_en, sentence_sv FROM questions WHERE id = ANY($1)`, [questionIds]);
|
||||
r.rows.forEach(row => Object.values(row).forEach(scan));
|
||||
}
|
||||
if (stmtIds.length) {
|
||||
const r = await query(
|
||||
`SELECT positive_sentence_de, positive_sentence_en, positive_sentence_sv,
|
||||
negative_sentence_de, negative_sentence_en, negative_sentence_sv
|
||||
FROM statements WHERE id = ANY($1)`, [stmtIds]);
|
||||
r.rows.forEach(row => Object.values(row).forEach(scan));
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
async function runPicture(pictureId) {
|
||||
// Claim — nur Bilder, die in der Pipeline sind
|
||||
const claim = await query(
|
||||
@@ -153,6 +179,13 @@ async function runPicture(pictureId) {
|
||||
progress.translatedPairs++;
|
||||
await setStep(pictureId, 'translate', progress);
|
||||
}
|
||||
// Nomen-Wörter aus Satz-Placeholdern ({{label.w:id}}) mitübersetzen
|
||||
try {
|
||||
for (const wid of await collectPlaceholderWordIds(pairs)) {
|
||||
try { await fillMissingRow('words', wid, ['titel']); }
|
||||
catch (err) { progress.translateFailures++; console.error(`Translate-Fehler bei Wort ${wid}:`, err.message); }
|
||||
}
|
||||
} catch (err) { console.error(`Placeholder-Wörter sammeln fehlgeschlagen:`, err.message); }
|
||||
|
||||
// ── Step 2.5: KI-Review — alle Pairs + Bild an Sonnet zum Korrekturlesen ────
|
||||
// (Rechtschreibung, Übersetzungs-Konsistenz, Plausibilität zum Bild). Korrekturen
|
||||
@@ -297,6 +330,8 @@ async function collectAudioUnits(pictureId, pairs) {
|
||||
JOIN object_pictures op ON op.object_id = ow.object_id
|
||||
WHERE op.picture_id = $1`, [pictureId]);
|
||||
ow.rows.forEach(x => wordIds.add(x.word_id));
|
||||
// + Nomen-Wörter aus Satz-Placeholdern ({{label.w:id}})
|
||||
(await collectPlaceholderWordIds(pairs)).forEach(id => wordIds.add(id));
|
||||
|
||||
const sources = [];
|
||||
if (questionIds.length) {
|
||||
|
||||
Reference in New Issue
Block a user