Fix tokenizer jieba tag flow and debug logging

This commit is contained in:
2026-02-03 22:13:51 +08:00
parent b0ed876cb0
commit 12db08abe0
10 changed files with 1357 additions and 70 deletions

View File

@@ -14,6 +14,12 @@ const MAX_CAUSED_BY = 2;
// factUpdates 清洗
// ═══════════════════════════════════════════════════════════════════════════
function normalizeRelationPredicate(p) {
if (/^对.+的看法$/.test(p)) return p;
if (/^与.+的关系$/.test(p)) return p;
return null;
}
function sanitizeFacts(parsed) {
if (!parsed) return;
@@ -22,23 +28,25 @@ function sanitizeFacts(parsed) {
for (const item of updates) {
const s = String(item?.s || '').trim();
const p = String(item?.p || '').trim();
const pRaw = String(item?.p || '').trim();
if (!s || !p) continue;
if (!s || !pRaw) continue;
// 删除操作
if (item.retracted === true) {
ok.push({ s, p, retracted: true });
ok.push({ s, p: pRaw, retracted: true });
continue;
}
const o = String(item?.o || '').trim();
if (!o) continue;
const fact = { s, p, o };
const relP = normalizeRelationPredicate(pRaw);
const isRel = !!relP;
const fact = { s, p: isRel ? relP : pRaw, o };
// 关系类保留 trend
if (/^对.+的/.test(p) && item.trend) {
if (isRel && item.trend) {
const validTrends = ['破裂', '厌恶', '反感', '陌生', '投缘', '亲密', '交融'];
if (validTrends.includes(item.trend)) {
fact.trend = item.trend;