feat: words-Tabelle – Brysbaert-Import + hierarchische Kategorien + Batch-Anreicherung
- categories: parent_id (self-referential) + 49 Unterkategorien geseedet - words: neue Spalten conc_m, dom_pos, level, themenfeld_id + unique index titel_en - enrich_batches + word_generative Tabellen - src/lib/enrichWords.js: Batch-Anreicherung (DE/SV-Übersetzung, Wortart, CEFR, Themenfeld) - src/routes/wordGenerative.js: CRUD für KI-Bild-Pipeline - src/routes/words.js: Filter dom_pos/level/themenfeld_id/has_conc_m + picture_count - scripts/import-brysbaert.js: CSV-Import-Skript (lokal gegen Prod-DB) - POST /api/words/enrich-batch als manueller Trigger Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
71
scripts/import-brysbaert.js
Normal file
71
scripts/import-brysbaert.js
Normal file
@@ -0,0 +1,71 @@
|
||||
// Einmaliger Import der Brysbaert-Concreteness-CSV in die words-Tabelle.
|
||||
// Verwendung: node scripts/import-brysbaert.js <pfad-zur-csv>
|
||||
// Setzt titel_en + conc_m; status = 'requested'. Bestehende Zeilen (gleicher titel_en)
|
||||
// bekommen nur conc_m aktualisiert — alle anderen Felder bleiben unverändert.
|
||||
|
||||
require('dotenv').config({ path: require('path').join(__dirname, '..', '.env') });
|
||||
const { query, pool } = require('../src/db');
|
||||
const fs = require('fs');
|
||||
const readline = require('readline');
|
||||
|
||||
async function main() {
|
||||
const csvPath = process.argv[2];
|
||||
if (!csvPath) {
|
||||
console.error('Verwendung: node scripts/import-brysbaert.js <pfad-zur-csv>');
|
||||
process.exit(1);
|
||||
}
|
||||
if (!fs.existsSync(csvPath)) {
|
||||
console.error(`Datei nicht gefunden: ${csvPath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const rl = readline.createInterface({
|
||||
input: fs.createReadStream(csvPath),
|
||||
crlfDelay: Infinity,
|
||||
});
|
||||
|
||||
let header = true;
|
||||
let inserted = 0;
|
||||
let updated = 0;
|
||||
let skipped = 0;
|
||||
let errors = 0;
|
||||
|
||||
for await (const line of rl) {
|
||||
if (header) { header = false; continue; }
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
|
||||
// Letztes Komma trennt Wort und Score (Wort kann Leerzeichen enthalten)
|
||||
const comma = trimmed.lastIndexOf(',');
|
||||
if (comma === -1) { skipped++; continue; }
|
||||
const word = trimmed.slice(0, comma).trim();
|
||||
const conc = parseFloat(trimmed.slice(comma + 1).trim());
|
||||
|
||||
if (!word || isNaN(conc)) { skipped++; continue; }
|
||||
|
||||
try {
|
||||
const res = await query(
|
||||
`INSERT INTO words (titel_en, conc_m, status, requested_at)
|
||||
VALUES ($1, $2, 'requested', NOW())
|
||||
ON CONFLICT (titel_en) DO UPDATE SET conc_m = EXCLUDED.conc_m
|
||||
RETURNING (xmax = 0) AS is_insert`,
|
||||
[word, conc]
|
||||
);
|
||||
if (res.rows[0]?.is_insert) inserted++;
|
||||
else updated++;
|
||||
} catch (err) {
|
||||
errors++;
|
||||
if (errors <= 5) console.error(`Fehler bei "${word}":`, err.message);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Import abgeschlossen:`);
|
||||
console.log(` ${inserted} neu eingefügt`);
|
||||
console.log(` ${updated} aktualisiert (conc_m)`);
|
||||
if (skipped) console.log(` ${skipped} Zeilen übersprungen (leer/ungültig)`);
|
||||
if (errors) console.log(` ${errors} Fehler`);
|
||||
|
||||
await pool.end();
|
||||
}
|
||||
|
||||
main().catch(err => { console.error(err); process.exit(1); });
|
||||
@@ -771,6 +771,128 @@ async function migrate() {
|
||||
ON CONFLICT (key) DO NOTHING
|
||||
`).catch(() => {});
|
||||
|
||||
// ── Brysbaert-Erweiterungen ─────────────────────────────────────────────────
|
||||
|
||||
// parent_id auf categories (self-referential, Oberkategorie → Unterkategorie)
|
||||
await query(`ALTER TABLE categories ADD COLUMN IF NOT EXISTS parent_id UUID REFERENCES categories(id) ON DELETE SET NULL`).catch(() => {});
|
||||
|
||||
// Unterkategorien seeden. Die bestehenden 22 Einträge sind die Oberkategorien (parent_id = NULL).
|
||||
const SUBCATEGORY_TAXONOMY = [
|
||||
// Lebensmittel
|
||||
['Obst', 'Fruit', 'Frukt', 'Lebensmittel'],
|
||||
['Gemüse', 'Vegetables', 'Grönsaker', 'Lebensmittel'],
|
||||
['Fleisch & Fisch', 'Meat & Fish', 'Kött & fisk', 'Lebensmittel'],
|
||||
['Backwaren & Getreide', 'Baked Goods & Grains', 'Bröd & spannmål', 'Lebensmittel'],
|
||||
['Milchprodukte', 'Dairy', 'Mejeriprodukter', 'Lebensmittel'],
|
||||
['Getränke', 'Drinks', 'Drycker', 'Lebensmittel'],
|
||||
['Gewürze & Kräuter', 'Spices & Herbs', 'Kryddor & örter', 'Lebensmittel'],
|
||||
['Süßigkeiten & Snacks', 'Sweets & Snacks', 'Sötsaker & snacks', 'Lebensmittel'],
|
||||
// Tiere
|
||||
['Haustiere', 'Pets', 'Husdjur', 'Tiere'],
|
||||
['Wildtiere', 'Wild Animals', 'Vilda djur', 'Tiere'],
|
||||
['Vögel', 'Birds', 'Fåglar', 'Tiere'],
|
||||
['Reptilien & Amphibien', 'Reptiles & Amphibians', 'Reptiler & amfibier', 'Tiere'],
|
||||
['Insekten & Spinnen', 'Insects & Spiders', 'Insekter & spindlar', 'Tiere'],
|
||||
['Meerestiere', 'Sea Animals', 'Havsdjur', 'Tiere'],
|
||||
// Körper
|
||||
['Kopf & Gesicht', 'Head & Face', 'Huvud & ansikte', 'Körper'],
|
||||
['Rumpf', 'Torso', 'Bål', 'Körper'],
|
||||
['Arme & Beine', 'Arms & Legs', 'Armar & ben', 'Körper'],
|
||||
['Innere Organe', 'Internal Organs', 'Inre organ', 'Körper'],
|
||||
['Körperpflege', 'Personal Care', 'Kroppsvård', 'Körper'],
|
||||
// Kleidung
|
||||
['Oberbekleidung', 'Tops & Outerwear', 'Överkläder', 'Kleidung'],
|
||||
['Unterbekleidung', 'Underwear', 'Underkläder', 'Kleidung'],
|
||||
['Kopfbedeckung', 'Headwear', 'Huvudbonader', 'Kleidung'],
|
||||
['Schuhe & Socken', 'Shoes & Socks', 'Skor & strumpor', 'Kleidung'],
|
||||
['Accessoires', 'Accessories', 'Accessoarer', 'Kleidung'],
|
||||
// Familie & Menschen
|
||||
['Familienmitglieder', 'Family Members', 'Familjemedlemmar', 'Familie & Menschen'],
|
||||
['Berufe & Titel', 'Professions & Titles', 'Yrken & titlar', 'Familie & Menschen'],
|
||||
['Beziehungen', 'Relationships', 'Relationer', 'Familie & Menschen'],
|
||||
// Haushalt
|
||||
['Küchenutensilien', 'Kitchen Utensils', 'Köksredskap', 'Haushalt'],
|
||||
['Reinigung & Pflege', 'Cleaning & Care', 'Rengöring & vård', 'Haushalt'],
|
||||
['Verpackung & Behälter', 'Packaging & Containers', 'Förpackningar & behållare','Haushalt'],
|
||||
// Wohnen & Möbel
|
||||
['Zimmer & Räume', 'Rooms & Spaces', 'Rum & utrymmen', 'Wohnen & Möbel'],
|
||||
['Möbel', 'Furniture', 'Möbler', 'Wohnen & Möbel'],
|
||||
['Beleuchtung & Elektro', 'Lighting & Electronics', 'Belysning & el', 'Wohnen & Möbel'],
|
||||
// Natur & Pflanzen
|
||||
['Pflanzen & Blumen', 'Plants & Flowers', 'Växter & blommor', 'Natur & Pflanzen'],
|
||||
['Bäume & Sträucher', 'Trees & Shrubs', 'Träd & buskar', 'Natur & Pflanzen'],
|
||||
['Landschaftsmerkmale', 'Landscape Features', 'Landskapsdrag', 'Natur & Pflanzen'],
|
||||
['Gesteine & Böden', 'Rocks & Soils', 'Stenar & jordar', 'Natur & Pflanzen'],
|
||||
// Verkehr & Reisen
|
||||
['Fahrzeuge (Land)', 'Land Vehicles', 'Landfordon', 'Verkehr & Reisen'],
|
||||
['Fahrzeuge (Wasser & Luft)', 'Water & Air Vehicles', 'Vatten- & luftfordon', 'Verkehr & Reisen'],
|
||||
['Straße & Infrastruktur', 'Roads & Infrastructure', 'Vägar & infrastruktur', 'Verkehr & Reisen'],
|
||||
// Stadt & Gebäude
|
||||
['Gebäude & Orte', 'Buildings & Places', 'Byggnader & platser', 'Stadt & Gebäude'],
|
||||
['Innenräume & Bereiche', 'Indoor Spaces & Areas', 'Inomhusutrymmen', 'Stadt & Gebäude'],
|
||||
// Technik & Geräte
|
||||
['Haushaltsgeräte', 'Household Appliances', 'Hushållsapparater', 'Technik & Geräte'],
|
||||
['Elektronik & Computer', 'Electronics & Computers', 'Elektronik & datorer', 'Technik & Geräte'],
|
||||
['Werkzeuge & Maschinen', 'Tools & Machines', 'Verktyg & maskiner', 'Technik & Geräte'],
|
||||
// Sport & Freizeit
|
||||
['Sport & Bewegung', 'Sports & Exercise', 'Sport & rörelse', 'Sport & Freizeit'],
|
||||
['Spiele & Spielzeug', 'Games & Toys', 'Spel & leksaker', 'Sport & Freizeit'],
|
||||
['Kunst & Musik', 'Arts & Music', 'Konst & musik', 'Sport & Freizeit'],
|
||||
];
|
||||
for (const [de, en, sv, parentDe] of SUBCATEGORY_TAXONOMY) {
|
||||
await query(
|
||||
`INSERT INTO categories (titel_de, titel_en, titel_sv, status, published_at, parent_id)
|
||||
SELECT $1, $2, $3, 'published', NOW(),
|
||||
(SELECT id FROM categories WHERE lower(titel_de) = lower($4) AND parent_id IS NULL LIMIT 1)
|
||||
WHERE NOT EXISTS (SELECT 1 FROM categories WHERE lower(titel_de) = lower($1))`,
|
||||
[de, en, sv, parentDe]
|
||||
).catch(() => {});
|
||||
}
|
||||
|
||||
// Neue Spalten auf words (Brysbaert-Import + Anreicherung)
|
||||
await query(`ALTER TABLE words ADD COLUMN IF NOT EXISTS conc_m NUMERIC(4,2)`).catch(() => {});
|
||||
await query(`ALTER TABLE words ADD COLUMN IF NOT EXISTS dom_pos VARCHAR(20)`).catch(() => {});
|
||||
await query(`ALTER TABLE words ADD COLUMN IF NOT EXISTS level VARCHAR(5)`).catch(() => {});
|
||||
await query(`ALTER TABLE words ADD COLUMN IF NOT EXISTS themenfeld_id UUID`).catch(() => {});
|
||||
await query(`ALTER TABLE words ADD CONSTRAINT words_themenfeld_id_fkey FOREIGN KEY (themenfeld_id) REFERENCES categories(id) ON DELETE SET NULL`).catch(() => {});
|
||||
await query(`ALTER TABLE words DROP CONSTRAINT IF EXISTS words_dom_pos_check`).catch(() => {});
|
||||
await query(`ALTER TABLE words ADD CONSTRAINT words_dom_pos_check CHECK (dom_pos IN ('noun', 'verb', 'adjective', 'other'))`).catch(() => {});
|
||||
await query(`ALTER TABLE words DROP CONSTRAINT IF EXISTS words_level_check`).catch(() => {});
|
||||
await query(`ALTER TABLE words ADD CONSTRAINT words_level_check CHECK (level IN ('A1', 'A2', 'B1'))`).catch(() => {});
|
||||
|
||||
// Unique-Index auf titel_en — Voraussetzung für ON CONFLICT im CSV-Import.
|
||||
// Falls bestehende Duplikate den Index verhindern, muss erst bereinigt werden.
|
||||
await query(`CREATE UNIQUE INDEX IF NOT EXISTS words_titel_en_key ON words (titel_en)`).catch(() => {});
|
||||
|
||||
// enrich_batches — Status-Tracking für Wort-Anreicherungs-Batches (analog category_batches)
|
||||
await query(`
|
||||
CREATE TABLE IF NOT EXISTS enrich_batches (
|
||||
batch_id TEXT PRIMARY KEY,
|
||||
status TEXT NOT NULL DEFAULT 'submitted',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
)
|
||||
`);
|
||||
|
||||
// word_generative — Pipeline für KI-generierte Wort-Bilder
|
||||
await query(`
|
||||
CREATE TABLE IF NOT EXISTS word_generative (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
word_id UUID NOT NULL REFERENCES words(id) ON DELETE CASCADE,
|
||||
prompt TEXT,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'pending'
|
||||
CHECK (status IN ('pending', 'generating', 'generated', 'accepted', 'rejected')),
|
||||
picture_link TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
)
|
||||
`);
|
||||
await query(`
|
||||
DROP TRIGGER IF EXISTS word_generative_updated_at ON word_generative;
|
||||
CREATE TRIGGER word_generative_updated_at
|
||||
BEFORE UPDATE ON word_generative
|
||||
FOR EACH ROW EXECUTE FUNCTION update_updated_at()
|
||||
`);
|
||||
|
||||
// ── Migrate old {{uuid}} placeholders → new {{label.w:uuid}} / {{label.o:uuid}} ──
|
||||
await migratePlaceholders();
|
||||
|
||||
|
||||
19
src/index.js
19
src/index.js
@@ -44,6 +44,7 @@ app.use('/api/audios', auth, require('./routes/audios'));
|
||||
app.use('/api/tts-settings', auth, require('./routes/tts-settings'));
|
||||
app.use('/api/claude', auth, require('./routes/claude'));
|
||||
app.use('/api/pipeline', auth, require('./routes/pipeline'));
|
||||
app.use('/api/word-generative', auth, require('./routes/wordGenerative'));
|
||||
|
||||
// 404
|
||||
app.use((req, res) => {
|
||||
@@ -66,9 +67,27 @@ migrate()
|
||||
// Automatische Wort-Kategorisierung (Message Batches API): kurz nach Boot + stündlich.
|
||||
// Submit/Collect-Ticks, entkoppelt von generate-words und Publish.
|
||||
const { runCategorizationTick } = require('./lib/classifyWords');
|
||||
const { runEnrichTick, enrichWordsSync } = require('./lib/enrichWords');
|
||||
const HOUR = 60 * 60 * 1000;
|
||||
const tick = () => runCategorizationTick().catch(err => console.error('Auto-Kategorisierung:', err.message));
|
||||
const enrichTick = () => runEnrichTick().catch(err => console.error('Auto-Anreicherung:', err.message));
|
||||
setTimeout(tick, 30_000);
|
||||
setTimeout(enrichTick, 60_000);
|
||||
setInterval(tick, HOUR);
|
||||
setInterval(enrichTick, HOUR);
|
||||
|
||||
// Manueller Trigger: POST /api/words/enrich-batch
|
||||
app.post('/api/words/enrich-batch', auth, async (req, res, next) => {
|
||||
try {
|
||||
const sync = req.query.sync === 'true';
|
||||
if (sync) {
|
||||
const max = parseInt(req.query.max) || 500;
|
||||
const result = await enrichWordsSync({ max });
|
||||
return res.json(result);
|
||||
}
|
||||
const result = await runEnrichTick();
|
||||
res.json(result);
|
||||
} catch (err) { next(err); }
|
||||
});
|
||||
})
|
||||
.catch(err => { console.error('Migration failed:', err); process.exit(1); });
|
||||
|
||||
229
src/lib/enrichWords.js
Normal file
229
src/lib/enrichWords.js
Normal file
@@ -0,0 +1,229 @@
|
||||
// Automatische Wort-Anreicherung über die Anthropic Message Batches API (asynchron, ~50 % günstiger).
|
||||
// Ziel: Brysbaert-Importwörter (titel_en + conc_m gesetzt) nach DE+SV übersetzen und mit
|
||||
// dom_pos, CEFR-level und themenfeld_id versehen. Folgt dem Muster von classifyWords.js.
|
||||
const { query } = require('../db');
|
||||
|
||||
const ANTHROPIC_BASE = 'https://api.anthropic.com';
|
||||
const MODEL = 'claude-haiku-4-5-20251001';
|
||||
const BATCH_LIMIT = 500;
|
||||
|
||||
let running = false;
|
||||
|
||||
function headers() {
|
||||
const apiKey = process.env.ANTHROPIC_API_KEY;
|
||||
if (!apiKey) throw new Error('ANTHROPIC_API_KEY nicht konfiguriert');
|
||||
return { 'Content-Type': 'application/json', 'x-api-key': apiKey, 'anthropic-version': '2023-06-01' };
|
||||
}
|
||||
|
||||
// Alle veröffentlichten Kategorien laden (Unter- und Oberkategorien).
|
||||
// Gibt byName-Map (lower(titel_de|titel_en) → Row) + sortierte Namensliste zurück.
|
||||
async function loadAllCategories() {
|
||||
const r = await query(
|
||||
`SELECT id, titel_de, titel_en, parent_id FROM categories WHERE status = 'published'`
|
||||
);
|
||||
const byName = new Map();
|
||||
for (const c of r.rows) {
|
||||
if (c.titel_de) byName.set(c.titel_de.toLowerCase(), c);
|
||||
if (c.titel_en) byName.set(c.titel_en.toLowerCase(), c);
|
||||
}
|
||||
// Unterkategorien zuerst → Batch-Prompt bevorzugt granulare Einträge
|
||||
const subcats = r.rows.filter(c => c.parent_id).map(c => c.titel_de).filter(Boolean);
|
||||
const topCats = r.rows.filter(c => !c.parent_id).map(c => c.titel_de).filter(Boolean);
|
||||
return { byName, names: [...subcats, ...topCats] };
|
||||
}
|
||||
|
||||
// Wörter die angereichert werden sollen: haben conc_m + titel_en, aber fehlendes DE/dom_pos/themenfeld.
|
||||
async function findWordsToEnrich(limit = BATCH_LIMIT) {
|
||||
const r = await query(
|
||||
`SELECT id, titel_en FROM words
|
||||
WHERE conc_m IS NOT NULL
|
||||
AND titel_en IS NOT NULL
|
||||
AND (titel_de IS NULL OR dom_pos IS NULL OR themenfeld_id IS NULL)
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $1`,
|
||||
[limit]
|
||||
);
|
||||
return r.rows;
|
||||
}
|
||||
|
||||
function buildEnrichPrompt(word, categoryNames) {
|
||||
return (
|
||||
`Themenfelder (bevorzuge Unterkategorien wie "Obst", "Haustiere", "Kopf & Gesicht" statt der Oberkategorie):\n` +
|
||||
`${categoryNames.join(', ')}\n\n` +
|
||||
`Wort (Englisch): "${word.titel_en}"\n\n` +
|
||||
`Regeln:\n` +
|
||||
`- titel_de / titel_sv: Grundform ohne Artikel\n` +
|
||||
`- dom_pos: noun | verb | adjective | other\n` +
|
||||
`- level: A1 | A2 | B1 | null (null wenn B2+ oder unklar)\n` +
|
||||
`- themenfeld: exakter Name aus der Liste oben, Fallback "Sonstiges"\n\n` +
|
||||
`Antworte NUR mit JSON:\n` +
|
||||
`{"titel_de":"...","titel_sv":"...","dom_pos":"noun","level":"A1","themenfeld":"Obst"}`
|
||||
);
|
||||
}
|
||||
|
||||
// Wort-Update in DB (COALESCE: Neuwert wenn vorhanden, sonst bestehender Wert bleibt).
|
||||
async function applyEnrichResult(wordId, result, byName) {
|
||||
if (!result) return;
|
||||
const fallback = byName.get('sonstiges') || null;
|
||||
const cat = (result.themenfeld && byName.get(result.themenfeld.toLowerCase())) || fallback;
|
||||
|
||||
await query(
|
||||
`UPDATE words SET
|
||||
titel_de = COALESCE($2, titel_de),
|
||||
titel_sv = COALESCE($3, titel_sv),
|
||||
dom_pos = COALESCE($4, dom_pos),
|
||||
level = COALESCE($5, level),
|
||||
themenfeld_id = COALESCE($6, themenfeld_id)
|
||||
WHERE id = $1`,
|
||||
[wordId, result.titel_de || null, result.titel_sv || null,
|
||||
result.dom_pos || null, result.level || null, cat?.id || null]
|
||||
).catch(() => {});
|
||||
|
||||
// Auto-Promote: requested → translated wenn jetzt alle 3 Sprachen gefüllt sind
|
||||
await query(
|
||||
`UPDATE words SET status = 'translated'
|
||||
WHERE id = $1 AND status = 'requested'
|
||||
AND titel_de IS NOT NULL AND titel_en IS NOT NULL AND titel_sv IS NOT NULL`,
|
||||
[wordId]
|
||||
).catch(() => {});
|
||||
}
|
||||
|
||||
// ── Asynchroner Batch-Weg ──────────────────────────────────────────────────
|
||||
|
||||
async function submitEnrichBatch(words, categoryNames) {
|
||||
const system = 'Du bist ein präziser Lexikograph. Antworte AUSSCHLIESSLICH mit gültigem JSON, ohne Markdown.';
|
||||
const requests = words.map(w => ({
|
||||
custom_id: w.id,
|
||||
params: {
|
||||
model: MODEL,
|
||||
max_tokens: 150,
|
||||
system,
|
||||
messages: [{ role: 'user', content: buildEnrichPrompt(w, categoryNames) }],
|
||||
},
|
||||
}));
|
||||
|
||||
const res = await fetch(`${ANTHROPIC_BASE}/v1/messages/batches`, {
|
||||
method: 'POST', headers: headers(), body: JSON.stringify({ requests }),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const err = await res.text().catch(() => '');
|
||||
throw new Error(`Enrich-Batch-Submit fehlgeschlagen (${res.status}): ${err.slice(0, 300)}`);
|
||||
}
|
||||
const data = await res.json();
|
||||
await query(
|
||||
`INSERT INTO enrich_batches (batch_id, status) VALUES ($1, 'submitted') ON CONFLICT DO NOTHING`,
|
||||
[data.id]
|
||||
);
|
||||
return data.id;
|
||||
}
|
||||
|
||||
function parseJson(text) {
|
||||
if (!text) return null;
|
||||
let raw = text.trim();
|
||||
const md = raw.match(/```(?:json)?\s*([\s\S]+?)\s*```/);
|
||||
if (md) raw = md[1];
|
||||
try { return JSON.parse(raw); } catch { return null; }
|
||||
}
|
||||
|
||||
async function collectEnrichBatch(batchId) {
|
||||
const res = await fetch(`${ANTHROPIC_BASE}/v1/messages/batches/${batchId}`, { headers: headers() });
|
||||
if (!res.ok) {
|
||||
if (res.status === 404) await query(`DELETE FROM enrich_batches WHERE batch_id = $1`, [batchId]);
|
||||
return { ended: false, enriched: 0 };
|
||||
}
|
||||
const batch = await res.json();
|
||||
if (batch.processing_status !== 'ended' || !batch.results_url) return { ended: false, enriched: 0 };
|
||||
|
||||
const { byName } = await loadAllCategories();
|
||||
const r = await fetch(batch.results_url, { headers: headers() });
|
||||
if (!r.ok) return { ended: false, enriched: 0 };
|
||||
|
||||
let enriched = 0;
|
||||
for (const line of (await r.text()).split('\n')) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
let entry;
|
||||
try { entry = JSON.parse(trimmed); } catch { continue; }
|
||||
if (entry.result?.type !== 'succeeded') continue;
|
||||
const parsed = parseJson(entry.result.message?.content?.[0]?.text);
|
||||
await applyEnrichResult(entry.custom_id, parsed, byName);
|
||||
if (parsed) enriched++;
|
||||
}
|
||||
|
||||
await query(`DELETE FROM enrich_batches WHERE batch_id = $1`, [batchId]);
|
||||
return { ended: true, enriched };
|
||||
}
|
||||
|
||||
// Ein Tick: offenen Batch einsammeln; sonst neuen Batch für unbereicherte Wörter einreichen.
|
||||
async function runEnrichTick() {
|
||||
if (running) return { skipped: true };
|
||||
running = true;
|
||||
try {
|
||||
const open = await query(`SELECT batch_id FROM enrich_batches ORDER BY created_at ASC LIMIT 1`);
|
||||
if (open.rows.length) {
|
||||
const { ended, enriched } = await collectEnrichBatch(open.rows[0].batch_id);
|
||||
return { collected: ended, enriched, batchId: open.rows[0].batch_id };
|
||||
}
|
||||
const words = await findWordsToEnrich();
|
||||
if (!words.length) return { remaining: 0 };
|
||||
const { names } = await loadAllCategories();
|
||||
const batchId = await submitEnrichBatch(words, names);
|
||||
return { submitted: words.length, batchId };
|
||||
} finally {
|
||||
running = false;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Synchroner Weg für ?sync=true ─────────────────────────────────────────
|
||||
|
||||
async function enrichWordsSync({ max = 500 } = {}) {
|
||||
if (running) return { skipped: true };
|
||||
running = true;
|
||||
try {
|
||||
const { byName, names } = await loadAllCategories();
|
||||
const system = 'Du bist ein präziser Lexikograph. Antworte AUSSCHLIESSLICH mit gültigem JSON, ohne Markdown.';
|
||||
let processed = 0;
|
||||
let enriched = 0;
|
||||
|
||||
while (processed < max) {
|
||||
const words = await findWordsToEnrich(Math.min(20, max - processed));
|
||||
if (!words.length) break;
|
||||
|
||||
const items = words.map((w, i) => `${i + 1}. "${w.titel_en}" (id: ${w.id})`).join('\n');
|
||||
const user =
|
||||
`Themenfelder (bevorzuge Unterkategorien):\n${names.join(', ')}\n\n` +
|
||||
`Regeln:\n` +
|
||||
`- titel_de / titel_sv: Grundform ohne Artikel\n` +
|
||||
`- dom_pos: noun | verb | adjective | other\n` +
|
||||
`- level: A1 | A2 | B1 | null\n` +
|
||||
`- themenfeld: exakter Name aus der Liste, Fallback "Sonstiges"\n\n` +
|
||||
`Wörter:\n${items}\n\n` +
|
||||
`Antworte NUR mit JSON:\n` +
|
||||
`{"results":[{"id":"<uuid>","titel_de":"...","titel_sv":"...","dom_pos":"noun","level":"A1","themenfeld":"Obst"}]}`;
|
||||
|
||||
let results = [];
|
||||
try {
|
||||
const res = await fetch(`${ANTHROPIC_BASE}/v1/messages`, {
|
||||
method: 'POST', headers: headers(),
|
||||
body: JSON.stringify({ model: MODEL, max_tokens: 3000, system, messages: [{ role: 'user', content: user }] }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
const data = await res.json();
|
||||
const parsed = parseJson(data.content?.[0]?.text);
|
||||
results = Array.isArray(parsed?.results) ? parsed.results : [];
|
||||
} catch { /* Charge überspringen, nächste Runde */ }
|
||||
|
||||
for (const r of results) {
|
||||
await applyEnrichResult(r.id, r, byName);
|
||||
enriched++;
|
||||
}
|
||||
processed += words.length;
|
||||
if (!results.length) break; // Sicherung gegen Endlosschleife
|
||||
}
|
||||
return { processed, enriched };
|
||||
} finally {
|
||||
running = false;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { runEnrichTick, enrichWordsSync };
|
||||
69
src/routes/wordGenerative.js
Normal file
69
src/routes/wordGenerative.js
Normal file
@@ -0,0 +1,69 @@
|
||||
const router = require('express').Router();
|
||||
const { query } = require('../db');
|
||||
|
||||
const STATUSES = ['pending', 'generating', 'generated', 'accepted', 'rejected'];
|
||||
|
||||
// GET /api/word-generative
|
||||
router.get('/', async (req, res, next) => {
|
||||
try {
|
||||
const { status, word_id, limit = 50, offset = 0 } = req.query;
|
||||
const params = [Math.min(parseInt(limit), 500), parseInt(offset)];
|
||||
const conditions = [];
|
||||
if (status) { conditions.push(`status = $${params.length + 1}`); params.push(status); }
|
||||
if (word_id) { conditions.push(`word_id = $${params.length + 1}`); params.push(word_id); }
|
||||
const where = conditions.length ? `WHERE ${conditions.join(' AND ')}` : '';
|
||||
const result = await query(
|
||||
`SELECT * FROM word_generative ${where} ORDER BY created_at DESC LIMIT $1 OFFSET $2`,
|
||||
params
|
||||
);
|
||||
res.json(result.rows);
|
||||
} catch (err) { next(err); }
|
||||
});
|
||||
|
||||
// POST /api/word-generative
|
||||
router.post('/', async (req, res, next) => {
|
||||
try {
|
||||
const { word_id, prompt, status } = req.body;
|
||||
if (!word_id) return res.status(400).json({ error: 'word_id ist erforderlich' });
|
||||
if (status && !STATUSES.includes(status))
|
||||
return res.status(400).json({ error: `status muss eines sein von: ${STATUSES.join(', ')}` });
|
||||
const result = await query(
|
||||
`INSERT INTO word_generative (word_id, prompt, status)
|
||||
VALUES ($1, $2, $3) RETURNING *`,
|
||||
[word_id, prompt || null, status || 'pending']
|
||||
);
|
||||
res.status(201).json(result.rows[0]);
|
||||
} catch (err) { next(err); }
|
||||
});
|
||||
|
||||
// PATCH /api/word-generative/:id
|
||||
router.patch('/:id', async (req, res, next) => {
|
||||
try {
|
||||
const allowed = ['prompt', 'status', 'picture_link'];
|
||||
const fields = Object.keys(req.body).filter(k => allowed.includes(k));
|
||||
if (!fields.length) return res.status(400).json({ error: 'Keine gültigen Felder angegeben' });
|
||||
if (req.body.status && !STATUSES.includes(req.body.status))
|
||||
return res.status(400).json({ error: `status muss eines sein von: ${STATUSES.join(', ')}` });
|
||||
const setClauses = fields.map((f, i) => `${f} = $${i + 1}`).join(', ');
|
||||
const values = [...fields.map(f => req.body[f]), req.params.id];
|
||||
const result = await query(
|
||||
`UPDATE word_generative SET ${setClauses} WHERE id = $${fields.length + 1} RETURNING *`,
|
||||
values
|
||||
);
|
||||
if (!result.rows.length) return res.status(404).json({ error: 'Not found' });
|
||||
res.json(result.rows[0]);
|
||||
} catch (err) { next(err); }
|
||||
});
|
||||
|
||||
// DELETE /api/word-generative/:id
|
||||
router.delete('/:id', async (req, res, next) => {
|
||||
try {
|
||||
const result = await query(
|
||||
`DELETE FROM word_generative WHERE id = $1 RETURNING id`, [req.params.id]
|
||||
);
|
||||
if (!result.rows.length) return res.status(404).json({ error: 'Not found' });
|
||||
res.status(204).end();
|
||||
} catch (err) { next(err); }
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
@@ -12,11 +12,17 @@ const STATUS_TIMESTAMP = {
|
||||
// GET /api/words
|
||||
router.get('/', async (req, res, next) => {
|
||||
try {
|
||||
const { status, titel_de, search, limit = 50, offset = 0 } = req.query;
|
||||
const { status, titel_de, search, dom_pos, level, themenfeld_id, has_conc_m,
|
||||
limit = 50, offset = 0 } = req.query;
|
||||
const params = [Math.min(parseInt(limit), 500), parseInt(offset)];
|
||||
const conditions = [];
|
||||
if (status) { conditions.push(`w.status = $${params.length + 1}`); params.push(status); }
|
||||
if (titel_de) { conditions.push(`lower(w.titel_de) = lower($${params.length + 1})`); params.push(titel_de); }
|
||||
if (dom_pos) { conditions.push(`w.dom_pos = $${params.length + 1}`); params.push(dom_pos); }
|
||||
if (level) { conditions.push(`w.level = $${params.length + 1}`); params.push(level); }
|
||||
if (themenfeld_id) { conditions.push(`w.themenfeld_id = $${params.length + 1}`); params.push(themenfeld_id); }
|
||||
if (has_conc_m === 'true') conditions.push(`w.conc_m IS NOT NULL`);
|
||||
if (has_conc_m === 'false') conditions.push(`w.conc_m IS NULL`);
|
||||
if (search) {
|
||||
const p = `%${search.toLowerCase()}%`;
|
||||
conditions.push(`(lower(w.titel_de) LIKE $${params.length + 1} OR lower(w.titel_en) LIKE $${params.length + 1} OR lower(w.titel_sv) LIKE $${params.length + 1})`);
|
||||
@@ -26,12 +32,14 @@ router.get('/', async (req, res, next) => {
|
||||
const result = await query(
|
||||
`SELECT w.*,
|
||||
COALESCE(json_agg(DISTINCT p.id) FILTER (WHERE p.id IS NOT NULL), '[]') AS picture_ids,
|
||||
COALESCE(json_agg(DISTINCT c.id) FILTER (WHERE c.id IS NOT NULL), '[]') AS category_ids
|
||||
COALESCE(json_agg(DISTINCT c.id) FILTER (WHERE c.id IS NOT NULL), '[]') AS category_ids,
|
||||
COUNT(DISTINCT wp2.picture_id)::int AS picture_count
|
||||
FROM words w
|
||||
LEFT JOIN word_pictures wp ON wp.word_id = w.id
|
||||
LEFT JOIN pictures p ON p.id = wp.picture_id
|
||||
LEFT JOIN word_categories wc ON wc.word_id = w.id
|
||||
LEFT JOIN categories c ON c.id = wc.category_id
|
||||
LEFT JOIN word_pictures wp2 ON wp2.word_id = w.id
|
||||
${where}
|
||||
GROUP BY w.id
|
||||
ORDER BY w.created_at DESC
|
||||
@@ -69,7 +77,8 @@ router.post('/', async (req, res, next) => {
|
||||
router.patch('/:id', async (req, res, next) => {
|
||||
try {
|
||||
const allowed = ['titel_de', 'titel_en', 'titel_sv', 'status',
|
||||
'difficulty_level', 'requested_at', 'published_at', 'blocked_at'];
|
||||
'difficulty_level', 'requested_at', 'published_at', 'blocked_at',
|
||||
'conc_m', 'dom_pos', 'level', 'themenfeld_id'];
|
||||
const fields = Object.keys(req.body).filter(k => allowed.includes(k));
|
||||
if (!fields.length) return res.status(400).json({ error: 'No valid fields provided' });
|
||||
|
||||
@@ -117,12 +126,14 @@ router.get('/:id', async (req, res, next) => {
|
||||
const result = await query(
|
||||
`SELECT w.*,
|
||||
COALESCE(json_agg(DISTINCT p.id) FILTER (WHERE p.id IS NOT NULL), '[]') AS picture_ids,
|
||||
COALESCE(json_agg(DISTINCT c.id) FILTER (WHERE c.id IS NOT NULL), '[]') AS category_ids
|
||||
COALESCE(json_agg(DISTINCT c.id) FILTER (WHERE c.id IS NOT NULL), '[]') AS category_ids,
|
||||
COUNT(DISTINCT wp2.picture_id)::int AS picture_count
|
||||
FROM words w
|
||||
LEFT JOIN word_pictures wp ON wp.word_id = w.id
|
||||
LEFT JOIN pictures p ON p.id = wp.picture_id
|
||||
LEFT JOIN word_categories wc ON wc.word_id = w.id
|
||||
LEFT JOIN categories c ON c.id = wc.category_id
|
||||
LEFT JOIN word_pictures wp2 ON wp2.word_id = w.id
|
||||
WHERE w.id = $1
|
||||
GROUP BY w.id`,
|
||||
[req.params.id]
|
||||
|
||||
Reference in New Issue
Block a user