fix: prevent pronouns/function words from being detected as vocabulary
- fuzzyMatch threshold 0.6 → 0.85 (man/Mann 0.75 now fails) - min token length 2 → 4 (excludes man, der, die, das, ich, wir...) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -77,7 +77,7 @@ async function resolvePlaceholders(text, allObjects) {
|
|||||||
|
|
||||||
// ─── Fuzzy word matching ──────────────────────────────────────────────────────
|
// ─── Fuzzy word matching ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
function fuzzyMatch(token, wordTitle, threshold = 0.6) {
|
function fuzzyMatch(token, wordTitle, threshold = 0.85) {
|
||||||
const t = token.toLowerCase();
|
const t = token.toLowerCase();
|
||||||
const w = wordTitle.toLowerCase();
|
const w = wordTitle.toLowerCase();
|
||||||
if (!t.startsWith(w) && !w.startsWith(t)) return false;
|
if (!t.startsWith(w) && !w.startsWith(t)) return false;
|
||||||
@@ -234,7 +234,7 @@ function PairForm({ objectId, allObjects, onPairSaved }) {
|
|||||||
if (!allText.trim()) { setWordMap({}); return; }
|
if (!allText.trim()) { setWordMap({}); return; }
|
||||||
const t = setTimeout(async () => {
|
const t = setTimeout(async () => {
|
||||||
try {
|
try {
|
||||||
const tokens = [...new Set(allText.split(/[\s.,!?;:()\[\]"']+/).filter(w => w.length >= 2))];
|
const tokens = [...new Set(allText.split(/[\s.,!?;:()\[\]"']+/).filter(w => w.length >= 4))];
|
||||||
if (!tokens.length) return;
|
if (!tokens.length) return;
|
||||||
const results = await Promise.allSettled(
|
const results = await Promise.allSettled(
|
||||||
tokens.map(async w => {
|
tokens.map(async w => {
|
||||||
@@ -532,7 +532,7 @@ function EditPairForm({ pair, allObjects, onSaved, onCancel, onDeleted }) {
|
|||||||
if (!allText.trim()) { setWordMap({}); return; }
|
if (!allText.trim()) { setWordMap({}); return; }
|
||||||
const t = setTimeout(async () => {
|
const t = setTimeout(async () => {
|
||||||
try {
|
try {
|
||||||
const tokens = [...new Set(allText.split(/[\s.,!?;:()\[\]"']+/).filter(w => w.length >= 2))];
|
const tokens = [...new Set(allText.split(/[\s.,!?;:()\[\]"']+/).filter(w => w.length >= 4))];
|
||||||
if (!tokens.length) return;
|
if (!tokens.length) return;
|
||||||
const results = await Promise.allSettled(
|
const results = await Promise.allSettled(
|
||||||
tokens.map(async w => {
|
tokens.map(async w => {
|
||||||
|
|||||||
Reference in New Issue
Block a user