- categories: parent_id (self-referential) + 49 Unterkategorien geseedet - words: neue Spalten conc_m, dom_pos, level, themenfeld_id + unique index titel_en - enrich_batches + word_generative Tabellen - src/lib/enrichWords.js: Batch-Anreicherung (DE/SV-Übersetzung, Wortart, CEFR, Themenfeld) - src/routes/wordGenerative.js: CRUD für KI-Bild-Pipeline - src/routes/words.js: Filter dom_pos/level/themenfeld_id/has_conc_m + picture_count - scripts/import-brysbaert.js: CSV-Import-Skript (lokal gegen Prod-DB) - POST /api/words/enrich-batch als manueller Trigger Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
72 lines
2.3 KiB
JavaScript
72 lines
2.3 KiB
JavaScript
// Einmaliger Import der Brysbaert-Concreteness-CSV in die words-Tabelle.
|
|
// Verwendung: node scripts/import-brysbaert.js <pfad-zur-csv>
|
|
// Setzt titel_en + conc_m; status = 'requested'. Bestehende Zeilen (gleicher titel_en)
|
|
// bekommen nur conc_m aktualisiert — alle anderen Felder bleiben unverändert.
|
|
|
|
require('dotenv').config({ path: require('path').join(__dirname, '..', '.env') });
|
|
const { query, pool } = require('../src/db');
|
|
const fs = require('fs');
|
|
const readline = require('readline');
|
|
|
|
async function main() {
|
|
const csvPath = process.argv[2];
|
|
if (!csvPath) {
|
|
console.error('Verwendung: node scripts/import-brysbaert.js <pfad-zur-csv>');
|
|
process.exit(1);
|
|
}
|
|
if (!fs.existsSync(csvPath)) {
|
|
console.error(`Datei nicht gefunden: ${csvPath}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
const rl = readline.createInterface({
|
|
input: fs.createReadStream(csvPath),
|
|
crlfDelay: Infinity,
|
|
});
|
|
|
|
let header = true;
|
|
let inserted = 0;
|
|
let updated = 0;
|
|
let skipped = 0;
|
|
let errors = 0;
|
|
|
|
for await (const line of rl) {
|
|
if (header) { header = false; continue; }
|
|
const trimmed = line.trim();
|
|
if (!trimmed) continue;
|
|
|
|
// Letztes Komma trennt Wort und Score (Wort kann Leerzeichen enthalten)
|
|
const comma = trimmed.lastIndexOf(',');
|
|
if (comma === -1) { skipped++; continue; }
|
|
const word = trimmed.slice(0, comma).trim();
|
|
const conc = parseFloat(trimmed.slice(comma + 1).trim());
|
|
|
|
if (!word || isNaN(conc)) { skipped++; continue; }
|
|
|
|
try {
|
|
const res = await query(
|
|
`INSERT INTO words (titel_en, conc_m, status, requested_at)
|
|
VALUES ($1, $2, 'requested', NOW())
|
|
ON CONFLICT (titel_en) DO UPDATE SET conc_m = EXCLUDED.conc_m
|
|
RETURNING (xmax = 0) AS is_insert`,
|
|
[word, conc]
|
|
);
|
|
if (res.rows[0]?.is_insert) inserted++;
|
|
else updated++;
|
|
} catch (err) {
|
|
errors++;
|
|
if (errors <= 5) console.error(`Fehler bei "${word}":`, err.message);
|
|
}
|
|
}
|
|
|
|
console.log(`Import abgeschlossen:`);
|
|
console.log(` ${inserted} neu eingefügt`);
|
|
console.log(` ${updated} aktualisiert (conc_m)`);
|
|
if (skipped) console.log(` ${skipped} Zeilen übersprungen (leer/ungültig)`);
|
|
if (errors) console.log(` ${errors} Fehler`);
|
|
|
|
await pool.end();
|
|
}
|
|
|
|
main().catch(err => { console.error(err); process.exit(1); });
|