feat: words-Tabelle – Brysbaert-Import + hierarchische Kategorien + Batch-Anreicherung
- categories: parent_id (self-referential) + 49 Unterkategorien geseedet - words: neue Spalten conc_m, dom_pos, level, themenfeld_id + unique index titel_en - enrich_batches + word_generative Tabellen - src/lib/enrichWords.js: Batch-Anreicherung (DE/SV-Übersetzung, Wortart, CEFR, Themenfeld) - src/routes/wordGenerative.js: CRUD für KI-Bild-Pipeline - src/routes/words.js: Filter dom_pos/level/themenfeld_id/has_conc_m + picture_count - scripts/import-brysbaert.js: CSV-Import-Skript (lokal gegen Prod-DB) - POST /api/words/enrich-batch als manueller Trigger Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
71
scripts/import-brysbaert.js
Normal file
71
scripts/import-brysbaert.js
Normal file
@@ -0,0 +1,71 @@
|
||||
// Einmaliger Import der Brysbaert-Concreteness-CSV in die words-Tabelle.
|
||||
// Verwendung: node scripts/import-brysbaert.js <pfad-zur-csv>
|
||||
// Setzt titel_en + conc_m; status = 'requested'. Bestehende Zeilen (gleicher titel_en)
|
||||
// bekommen nur conc_m aktualisiert — alle anderen Felder bleiben unverändert.
|
||||
|
||||
require('dotenv').config({ path: require('path').join(__dirname, '..', '.env') });
|
||||
const { query, pool } = require('../src/db');
|
||||
const fs = require('fs');
|
||||
const readline = require('readline');
|
||||
|
||||
async function main() {
|
||||
const csvPath = process.argv[2];
|
||||
if (!csvPath) {
|
||||
console.error('Verwendung: node scripts/import-brysbaert.js <pfad-zur-csv>');
|
||||
process.exit(1);
|
||||
}
|
||||
if (!fs.existsSync(csvPath)) {
|
||||
console.error(`Datei nicht gefunden: ${csvPath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const rl = readline.createInterface({
|
||||
input: fs.createReadStream(csvPath),
|
||||
crlfDelay: Infinity,
|
||||
});
|
||||
|
||||
let header = true;
|
||||
let inserted = 0;
|
||||
let updated = 0;
|
||||
let skipped = 0;
|
||||
let errors = 0;
|
||||
|
||||
for await (const line of rl) {
|
||||
if (header) { header = false; continue; }
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
|
||||
// Letztes Komma trennt Wort und Score (Wort kann Leerzeichen enthalten)
|
||||
const comma = trimmed.lastIndexOf(',');
|
||||
if (comma === -1) { skipped++; continue; }
|
||||
const word = trimmed.slice(0, comma).trim();
|
||||
const conc = parseFloat(trimmed.slice(comma + 1).trim());
|
||||
|
||||
if (!word || isNaN(conc)) { skipped++; continue; }
|
||||
|
||||
try {
|
||||
const res = await query(
|
||||
`INSERT INTO words (titel_en, conc_m, status, requested_at)
|
||||
VALUES ($1, $2, 'requested', NOW())
|
||||
ON CONFLICT (titel_en) DO UPDATE SET conc_m = EXCLUDED.conc_m
|
||||
RETURNING (xmax = 0) AS is_insert`,
|
||||
[word, conc]
|
||||
);
|
||||
if (res.rows[0]?.is_insert) inserted++;
|
||||
else updated++;
|
||||
} catch (err) {
|
||||
errors++;
|
||||
if (errors <= 5) console.error(`Fehler bei "${word}":`, err.message);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Import abgeschlossen:`);
|
||||
console.log(` ${inserted} neu eingefügt`);
|
||||
console.log(` ${updated} aktualisiert (conc_m)`);
|
||||
if (skipped) console.log(` ${skipped} Zeilen übersprungen (leer/ungültig)`);
|
||||
if (errors) console.log(` ${errors} Fehler`);
|
||||
|
||||
await pool.end();
|
||||
}
|
||||
|
||||
main().catch(err => { console.error(err); process.exit(1); });
|
||||
Reference in New Issue
Block a user