Fix tokenizer jieba tag flow and debug logging

This commit is contained in:
2026-02-03 22:13:51 +08:00
parent b0ed876cb0
commit 12db08abe0
10 changed files with 1357 additions and 70 deletions

View File

@@ -14,6 +14,12 @@ const MAX_CAUSED_BY = 2;
// factUpdates 清洗
// ═══════════════════════════════════════════════════════════════════════════
function normalizeRelationPredicate(p) {
if (/^对.+的看法$/.test(p)) return p;
if (/^与.+的关系$/.test(p)) return p;
return null;
}
function sanitizeFacts(parsed) {
if (!parsed) return;
@@ -22,23 +28,25 @@ function sanitizeFacts(parsed) {
for (const item of updates) {
const s = String(item?.s || '').trim();
const p = String(item?.p || '').trim();
const pRaw = String(item?.p || '').trim();
if (!s || !p) continue;
if (!s || !pRaw) continue;
// 删除操作
if (item.retracted === true) {
ok.push({ s, p, retracted: true });
ok.push({ s, p: pRaw, retracted: true });
continue;
}
const o = String(item?.o || '').trim();
if (!o) continue;
const fact = { s, p, o };
const relP = normalizeRelationPredicate(pRaw);
const isRel = !!relP;
const fact = { s, p: isRel ? relP : pRaw, o };
// 关系类保留 trend
if (/^对.+的/.test(p) && item.trend) {
if (isRel && item.trend) {
const validTrends = ['破裂', '厌恶', '反感', '陌生', '投缘', '亲密', '交融'];
if (validTrends.includes(item.trend)) {
fact.trend = item.trend;

View File

@@ -102,7 +102,8 @@ Acknowledged. Now reviewing the incremental summarization specifications:
[Fact Tracking - SPO Triples]
├─ s: 主体(角色名/物品名)
├─ p: 谓词(属性名/对X的看法
├─ p: 谓词(属性名)
│ - 关系类只允许对X的看法 / 与X的关系
├─ o: 值(当前状态)
├─ trend: 仅关系类填写
├─ retracted: 删除标记
@@ -191,7 +192,7 @@ Before generating, observe the USER and analyze carefully:
## factUpdates 规则
- s+p 为键,相同键会覆盖旧值
- 状态类s=角色名, p=属性(生死/位置/状态等), o=值
- 关系类s=角色A, p="对B的看法", o=描述, trend=趋势
- 关系类s=角色A, p="对B的看法" 或 p="与B的关系"trend 仅限关系类
- 删除:设置 retracted: true不需要填 o
- 只输出有变化的条目
- 硬约束才记录,避免叙事化,确保少、硬、稳定