feat: Placeholder in der Auto-Generierung + Token-Leak-Fix
- Pair-Generierung markiert Nomen per [surface|lemma]-Markup und löst sie zu
{{label.o:objectId}} / {{label.w:wordId}} auf (Words werden auto-erstellt)
- Pipeline übersetzt + vertont Placeholder-Wörter aus den Sätzen mit
- translateText halluziniert keine ⟦PHn⟧-Tokens mehr (kein Token-Prompt ohne
Tokens, defensives Strippen); TTS/Review lösen geleakte Tokens auf
- POST /api/pipeline/repair-tokens repariert bestehende Sätze + Audios
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -5,7 +5,8 @@ const { LANGS } = require('../lib/translate');
|
||||
const { loadPairContext, computeReadiness, loadPairContent } = require('../lib/pairContent');
|
||||
const { enqueue, loadPairs, collectAudioUnits, generateWithBackoff, translatePair } = require('../lib/pipeline');
|
||||
const { describeError } = require('./audios');
|
||||
const { PLACEHOLDER_RE } = require('../lib/placeholders');
|
||||
const { PLACEHOLDER_RE, TOKEN_RE, stripLeakedTokens } = require('../lib/placeholders');
|
||||
const { invalidateAudio } = require('../lib/reviewPairs');
|
||||
|
||||
// ── Objekt-Wort-Erkennung in Sätzen (für die manuelle Zuweisung beim Review) ──
|
||||
|
||||
@@ -241,6 +242,74 @@ router.post('/picture/:id/audio-fill', async (req, res, next) => {
|
||||
} catch (err) { next(err); }
|
||||
});
|
||||
|
||||
// POST /api/pipeline/repair-tokens — Datenreparatur: geleakte ⟦PHn:…⟧-Tokens
|
||||
// (Claude-Halluzination beim Übersetzen, vor dem Fix) aus allen Sätzen entfernen.
|
||||
// Betroffene Audios werden gelöscht und direkt mit dem reparierten Text neu erzeugt.
|
||||
router.post('/repair-tokens', async (req, res, next) => {
|
||||
try {
|
||||
const hasToken = v => { TOKEN_RE.lastIndex = 0; return TOKEN_RE.test(v || ''); };
|
||||
const result = { cells_fixed: 0, audios_regenerated: 0, audios_failed: 0, details: [] };
|
||||
const targets = [
|
||||
{ table: 'questions', fields: ['sentence'] },
|
||||
{ table: 'statements', fields: ['positive_sentence', 'negative_sentence'] },
|
||||
{ table: 'words', fields: ['titel'] },
|
||||
];
|
||||
|
||||
// 1) Textzellen säubern + zugehörige Audios löschen & neu generieren
|
||||
for (const t of targets) {
|
||||
const cols = t.fields.flatMap(f => LANGS.map(l => `${f}_${l}`));
|
||||
const r = await query(
|
||||
`SELECT id, ${cols.join(', ')} FROM ${t.table}
|
||||
WHERE ${cols.map(c => `${c} LIKE '%⟦PH%'`).join(' OR ')}`);
|
||||
for (const row of r.rows) {
|
||||
for (const f of t.fields) {
|
||||
for (const l of LANGS) {
|
||||
const col = `${f}_${l}`;
|
||||
if (!hasToken(row[col])) continue;
|
||||
const fixed = stripLeakedTokens(row[col]).replace(/\s{2,}/g, ' ').trim();
|
||||
await query(`UPDATE ${t.table} SET ${col} = $1 WHERE id = $2`, [fixed, row.id]);
|
||||
await invalidateAudio(t.table, row.id, f, l);
|
||||
result.cells_fixed++;
|
||||
const detail = { table: t.table, id: row.id, column: col, fixed };
|
||||
try {
|
||||
await generateWithBackoff({ text: fixed, language: l, source_table: t.table, source_id: row.id, source_field: f });
|
||||
result.audios_regenerated++;
|
||||
} catch (err) {
|
||||
result.audios_failed++;
|
||||
detail.audio_error = describeError(err);
|
||||
}
|
||||
result.details.push(detail);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Audios, deren vertonter Text noch Tokens enthält (Zelle ggf. schon anderweitig
|
||||
// korrigiert) → löschen und mit dem aktuellen Zellen-Text neu erzeugen
|
||||
const audios = await query(
|
||||
`SELECT id, source_table, source_id, source_field, language FROM audios WHERE text LIKE '%⟦PH%'`);
|
||||
for (const a of audios.rows) {
|
||||
const r = await query(
|
||||
`SELECT ${a.source_field}_${a.language} AS text FROM ${a.source_table} WHERE id = $1`, [a.source_id]);
|
||||
const text = (r.rows[0]?.text || '').trim();
|
||||
await invalidateAudio(a.source_table, a.source_id, a.source_field, a.language);
|
||||
const detail = { table: 'audios', id: a.id, column: `${a.source_field}_${a.language}` };
|
||||
if (text) {
|
||||
try {
|
||||
await generateWithBackoff({ text, language: a.language, source_table: a.source_table, source_id: a.source_id, source_field: a.source_field });
|
||||
result.audios_regenerated++;
|
||||
} catch (err) {
|
||||
result.audios_failed++;
|
||||
detail.audio_error = describeError(err);
|
||||
}
|
||||
}
|
||||
result.details.push(detail);
|
||||
}
|
||||
|
||||
res.json(result);
|
||||
} catch (err) { next(err); }
|
||||
});
|
||||
|
||||
// GET /api/pipeline/settings
|
||||
router.get('/settings', async (req, res, next) => {
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user