modules/story-summary/vector/retrieval/lexical-index.js

import MiniSearch from '../../../../libs/minisearch.mjs';
import { getContext } from '../../../../../../../extensions.js';
import { getSummaryStore } from '../../data/store.js';
import { getAllChunks } from '../storage/chunk-store.js';
import { xbLog } from '../../../../core/debug-core.js';
import { tokenizeForIndex } from '../utils/tokenizer.js';

const MODULE_ID = 'lexical-index';

// In-memory index cache
let cachedIndex = null;
let cachedChatId = null;
let cachedFingerprint = null;
let building = false;
let buildPromise = null;

// floor -> chunk doc ids (L1 only)
let floorDocIds = new Map();

// IDF stats over lexical docs (L1 chunks + L2 events)
let termDfMap = new Map();
let docTokenSets = new Map(); // docId -> Set<token>
let lexicalDocCount = 0;

const IDF_MIN = 1.0;
const IDF_MAX = 4.0;
const BUILD_BATCH_SIZE = 500;

function cleanSummary(summary) {
    return String(summary || '')
        .replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, '')
        .trim();
}

function fnv1a32(input, seed = 0x811C9DC5) {
    let hash = seed >>> 0;
    const text = String(input || '');
    for (let i = 0; i < text.length; i++) {
        hash ^= text.charCodeAt(i);
        hash = Math.imul(hash, 0x01000193) >>> 0;
    }
    return hash >>> 0;
}

function compareDocKeys(a, b) {
    const ka = `${a?.type || ''}:${a?.id || ''}`;
    const kb = `${b?.type || ''}:${b?.id || ''}`;
    if (ka < kb) return -1;
    if (ka > kb) return 1;
    return 0;
}

function computeFingerprintFromDocs(docs) {
    const normalizedDocs = Array.isArray(docs) ? [...docs].sort(compareDocKeys) : [];
    let hash = 0x811C9DC5;

    for (const doc of normalizedDocs) {
        const payload = `${doc?.type || ''}\u001F${doc?.id || ''}\u001F${doc?.floor ?? ''}\u001F${doc?.text || ''}\u001E`;
        hash = fnv1a32(payload, hash);
    }

    return `${normalizedDocs.length}:${(hash >>> 0).toString(16)}`;
}

function yieldToMain() {
    return new Promise(resolve => setTimeout(resolve, 0));
}

function clamp(v, min, max) {
    return Math.max(min, Math.min(max, v));
}

function normalizeTerm(term) {
    return String(term || '').trim().toLowerCase();
}

function computeIdfFromDf(df, docCount) {
    if (!docCount || docCount <= 0) return 1;
    const raw = Math.log((docCount + 1) / ((df || 0) + 1)) + 1;
    return clamp(raw, IDF_MIN, IDF_MAX);
}

function computeIdf(term) {
    const t = normalizeTerm(term);
    if (!t || lexicalDocCount <= 0) return 1;
    return computeIdfFromDf(termDfMap.get(t) || 0, lexicalDocCount);
}

function extractUniqueTokens(text) {
    return new Set(tokenizeForIndex(String(text || '')).map(normalizeTerm).filter(Boolean));
}

function clearIdfState() {
    termDfMap = new Map();
    docTokenSets = new Map();
    lexicalDocCount = 0;
}

function removeDocumentIdf(docId) {
    const id = String(docId || '');
    if (!id) return;

    const tokens = docTokenSets.get(id);
    if (!tokens) return;

    for (const token of tokens) {
        const current = termDfMap.get(token) || 0;
        if (current <= 1) {
            termDfMap.delete(token);
        } else {
            termDfMap.set(token, current - 1);
        }
    }

    docTokenSets.delete(id);
    lexicalDocCount = Math.max(0, lexicalDocCount - 1);
}

function addDocumentIdf(docId, text) {
    const id = String(docId || '');
    if (!id) return;

    // Replace semantics: remove old token set first if this id already exists.
    removeDocumentIdf(id);

    const tokens = extractUniqueTokens(text);
    docTokenSets.set(id, tokens);
    lexicalDocCount += 1;

    for (const token of tokens) {
        termDfMap.set(token, (termDfMap.get(token) || 0) + 1);
    }
}

function rebuildIdfFromDocs(docs) {
    clearIdfState();
    for (const doc of docs || []) {
        const id = String(doc?.id || '');
        const text = String(doc?.text || '');
        if (!id || !text.trim()) continue;
        addDocumentIdf(id, text);
    }
}

function buildEventDoc(ev) {
    if (!ev?.id) return null;

    const parts = [];
    if (ev.title) parts.push(ev.title);
    if (ev.participants?.length) parts.push(ev.participants.join(' '));

    const summary = cleanSummary(ev.summary);
    if (summary) parts.push(summary);

    const text = parts.join(' ').trim();
    if (!text) return null;

    return {
        id: ev.id,
        type: 'event',
        floor: null,
        text,
    };
}

function collectDocuments(chunks, events) {
    const docs = [];

    for (const chunk of chunks || []) {
        if (!chunk?.chunkId || !chunk.text) continue;

        const floor = chunk.floor ?? -1;
        docs.push({
            id: chunk.chunkId,
            type: 'chunk',
            floor,
            text: chunk.text,
        });

        if (floor >= 0) {
            if (!floorDocIds.has(floor)) floorDocIds.set(floor, []);
            floorDocIds.get(floor).push(chunk.chunkId);
        }
    }

    for (const ev of events || []) {
        const doc = buildEventDoc(ev);
        if (doc) docs.push(doc);
    }

    return docs;
}

async function buildIndexAsync(docs) {
    const T0 = performance.now();

    const index = new MiniSearch({
        fields: ['text'],
        storeFields: ['type', 'floor'],
        idField: 'id',
        searchOptions: {
            boost: { text: 1 },
            fuzzy: 0.2,
            prefix: true,
        },
        tokenize: tokenizeForIndex,
    });

    if (!docs.length) return index;

    for (let i = 0; i < docs.length; i += BUILD_BATCH_SIZE) {
        const batch = docs.slice(i, i + BUILD_BATCH_SIZE);
        index.addAll(batch);

        if (i + BUILD_BATCH_SIZE < docs.length) {
            await yieldToMain();
        }
    }

    const elapsed = Math.round(performance.now() - T0);
    xbLog.info(MODULE_ID, `Index built: ${docs.length} docs (${elapsed}ms)`);
    return index;
}

/**
 * @typedef {object} LexicalSearchResult
 * @property {string[]} atomIds - Reserved for backward compatibility (currently empty).
 * @property {Set<number>} atomFloors - Reserved for backward compatibility (currently empty).
 * @property {string[]} chunkIds - Matched L1 chunk ids sorted by weighted lexical score.
 * @property {Set<number>} chunkFloors - Floor ids covered by matched chunks.
 * @property {string[]} eventIds - Matched L2 event ids sorted by weighted lexical score.
 * @property {object[]} chunkScores - Weighted lexical scores for matched chunks.
 * @property {boolean} idfEnabled - Whether IDF stats are available for weighting.
 * @property {number} idfDocCount - Number of lexical docs used to compute IDF.
 * @property {Array<{term:string,idf:number}>} topIdfTerms - Top query terms by IDF.
 * @property {string[]} queryTerms - Normalized query terms actually searched.
 * @property {Record<string, Array<{floor:number, weightedScore:number, chunkId:string}>>} termFloorHits - Chunk-floor hits by term.
 * @property {Array<{floor:number, score:number, hitTermsCount:number}>} floorLexScores - Aggregated lexical floor scores (debug).
 * @property {number} termSearches - Number of per-term MiniSearch queries executed.
 * @property {number} searchTime - Total lexical search time in milliseconds.
 */

/**
 * Search lexical index by terms, using per-term MiniSearch and IDF-weighted score aggregation.
 * This keeps existing outputs compatible while adding observability fields.
 *
 * @param {MiniSearch} index
 * @param {string[]} terms
 * @returns {LexicalSearchResult}
 */
export function searchLexicalIndex(index, terms) {
    const T0 = performance.now();

    const result = {
        atomIds: [],
        atomFloors: new Set(),
        chunkIds: [],
        chunkFloors: new Set(),
        eventIds: [],
        chunkScores: [],
        idfEnabled: lexicalDocCount > 0,
        idfDocCount: lexicalDocCount,
        topIdfTerms: [],
        queryTerms: [],
        termFloorHits: {},
        floorLexScores: [],
        termSearches: 0,
        searchTime: 0,
    };

    if (!index || !terms?.length) {
        result.searchTime = Math.round(performance.now() - T0);
        return result;
    }

    const queryTerms = Array.from(new Set((terms || []).map(normalizeTerm).filter(Boolean)));
    result.queryTerms = [...queryTerms];
    const weightedScores = new Map(); // docId -> score
    const hitMeta = new Map(); // docId -> { type, floor }
    const idfPairs = [];
    const termFloorHits = new Map(); // term -> [{ floor, weightedScore, chunkId }]
    const floorLexAgg = new Map(); // floor -> { score, terms:Set<string> }

    for (const term of queryTerms) {
        const idf = computeIdf(term);
        idfPairs.push({ term, idf });

        let hits = [];
        try {
            hits = index.search(term, {
                boost: { text: 1 },
                fuzzy: 0.2,
                prefix: true,
                combineWith: 'OR',
                tokenize: tokenizeForIndex,
            });
        } catch (e) {
            xbLog.warn(MODULE_ID, `Lexical term search failed: ${term}`, e);
            continue;
        }

        result.termSearches += 1;

        for (const hit of hits) {
            const id = String(hit.id || '');
            if (!id) continue;

            const weighted = (hit.score || 0) * idf;
            weightedScores.set(id, (weightedScores.get(id) || 0) + weighted);

            if (!hitMeta.has(id)) {
                hitMeta.set(id, {
                    type: hit.type,
                    floor: hit.floor,
                });
            }

            if (hit.type === 'chunk' && typeof hit.floor === 'number' && hit.floor >= 0) {
                if (!termFloorHits.has(term)) termFloorHits.set(term, []);
                termFloorHits.get(term).push({
                    floor: hit.floor,
                    weightedScore: weighted,
                    chunkId: id,
                });

                const floorAgg = floorLexAgg.get(hit.floor) || { score: 0, terms: new Set() };
                floorAgg.score += weighted;
                floorAgg.terms.add(term);
                floorLexAgg.set(hit.floor, floorAgg);
            }
        }
    }

    idfPairs.sort((a, b) => b.idf - a.idf);
    result.topIdfTerms = idfPairs.slice(0, 5);
    result.termFloorHits = Object.fromEntries(
        [...termFloorHits.entries()].map(([term, hits]) => [term, hits]),
    );
    result.floorLexScores = [...floorLexAgg.entries()]
        .map(([floor, info]) => ({
            floor,
            score: Number(info.score.toFixed(6)),
            hitTermsCount: info.terms.size,
        }))
        .sort((a, b) => b.score - a.score);

    const sortedHits = Array.from(weightedScores.entries())
        .sort((a, b) => b[1] - a[1]);

    for (const [id, score] of sortedHits) {
        const meta = hitMeta.get(id);
        if (!meta) continue;

        if (meta.type === 'chunk') {
            result.chunkIds.push(id);
            result.chunkScores.push({ chunkId: id, score });
            if (typeof meta.floor === 'number' && meta.floor >= 0) {
                result.chunkFloors.add(meta.floor);
            }
            continue;
        }

        if (meta.type === 'event') {
            result.eventIds.push(id);
        }
    }

    result.searchTime = Math.round(performance.now() - T0);

    xbLog.info(
        MODULE_ID,
        `Lexical search terms=[${queryTerms.slice(0, 5).join(',')}] chunks=${result.chunkIds.length} events=${result.eventIds.length} termSearches=${result.termSearches} (${result.searchTime}ms)`,
    );

    return result;
}

async function collectAndBuild(chatId) {
    floorDocIds = new Map();

    const store = getSummaryStore();
    const events = store?.json?.events || [];

    let chunks = [];
    try {
        chunks = await getAllChunks(chatId);
    } catch (e) {
        xbLog.warn(MODULE_ID, 'Failed to load chunks', e);
    }

    const docs = collectDocuments(chunks, events);
    const fp = computeFingerprintFromDocs(docs);

    if (cachedIndex && cachedChatId === chatId && cachedFingerprint === fp) {
        return { index: cachedIndex, fingerprint: fp };
    }

    rebuildIdfFromDocs(docs);
    const index = await buildIndexAsync(docs);

    return { index, fingerprint: fp };
}

/**
 * Expose IDF accessor for query-term selection in query-builder.
 * If index stats are not ready, this gracefully falls back to idf=1.
 */
export function getLexicalIdfAccessor() {
    return {
        enabled: lexicalDocCount > 0,
        docCount: lexicalDocCount,
        getIdf(term) {
            return computeIdf(term);
        },
    };
}

export async function getLexicalIndex() {
    const { chatId } = getContext();
    if (!chatId) return null;

    if (cachedIndex && cachedChatId === chatId && cachedFingerprint) {
        return cachedIndex;
    }

    if (building && buildPromise) {
        try {
            await buildPromise;
            if (cachedIndex && cachedChatId === chatId && cachedFingerprint) {
                return cachedIndex;
            }
        } catch {
            // Continue to rebuild below.
        }
    }

    xbLog.info(MODULE_ID, `Lexical cache miss; rebuilding (chatId=${chatId.slice(0, 8)})`);

    building = true;
    buildPromise = collectAndBuild(chatId);

    try {
        const { index, fingerprint } = await buildPromise;
        cachedIndex = index;
        cachedChatId = chatId;
        cachedFingerprint = fingerprint;
        return index;
    } catch (e) {
        xbLog.error(MODULE_ID, 'Index build failed', e);
        return null;
    } finally {
        building = false;
        buildPromise = null;
    }
}

export function warmupIndex() {
    const { chatId } = getContext();
    if (!chatId || building) return;

    getLexicalIndex().catch(e => {
        xbLog.warn(MODULE_ID, 'Warmup failed', e);
    });
}

export function invalidateLexicalIndex() {
    if (cachedIndex) {
        xbLog.info(MODULE_ID, 'Lexical index cache invalidated');
    }
    cachedIndex = null;
    cachedChatId = null;
    cachedFingerprint = null;
    floorDocIds = new Map();
    clearIdfState();
}

export function addDocumentsForFloor(floor, chunks) {
    if (!cachedIndex || !chunks?.length) return;

    removeDocumentsByFloor(floor);

    const docs = [];
    const docIds = [];

    for (const chunk of chunks) {
        if (!chunk?.chunkId || !chunk.text) continue;

        const doc = {
            id: chunk.chunkId,
            type: 'chunk',
            floor: chunk.floor ?? floor,
            text: chunk.text,
        };
        docs.push(doc);
        docIds.push(chunk.chunkId);
    }

    if (!docs.length) return;

    cachedIndex.addAll(docs);
    floorDocIds.set(floor, docIds);

    for (const doc of docs) {
        addDocumentIdf(doc.id, doc.text);
    }

    xbLog.info(MODULE_ID, `Incremental add floor=${floor} chunks=${docs.length}`);
}

export function removeDocumentsByFloor(floor) {
    if (!cachedIndex) return;

    const docIds = floorDocIds.get(floor);
    if (!docIds?.length) return;

    for (const id of docIds) {
        try {
            cachedIndex.discard(id);
        } catch {
            // Ignore if the doc was already removed/rebuilt.
        }
        removeDocumentIdf(id);
    }

    floorDocIds.delete(floor);
    xbLog.info(MODULE_ID, `Incremental remove floor=${floor} chunks=${docIds.length}`);
}

export function addEventDocuments(events) {
    if (!cachedIndex || !events?.length) return;

    const docs = [];

    for (const ev of events) {
        const doc = buildEventDoc(ev);
        if (!doc) continue;

        try {
            cachedIndex.discard(doc.id);
        } catch {
            // Ignore if previous document does not exist.
        }
        removeDocumentIdf(doc.id);
        docs.push(doc);
    }

    if (!docs.length) return;

    cachedIndex.addAll(docs);
    for (const doc of docs) {
        addDocumentIdf(doc.id, doc.text);
    }

    xbLog.info(MODULE_ID, `Incremental add events=${docs.length}`);
}
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`import MiniSearch from '../../../../libs/minisearch.mjs';`
			`import { getContext } from '../../../../../../../extensions.js';`
			`import { getSummaryStore } from '../../data/store.js';`
			`import { getAllChunks } from '../storage/chunk-store.js';`
			`import { xbLog } from '../../../../core/debug-core.js';`
			`import { tokenizeForIndex } from '../utils/tokenizer.js';`

			`const MODULE_ID = 'lexical-index';`

Add files via upload 2026-02-17 22:45:01 +08:00			`// In-memory index cache`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`let cachedIndex = null;`
			`let cachedChatId = null;`
			`let cachedFingerprint = null;`
			`let building = false;`
			`let buildPromise = null;`
Add files via upload 2026-02-17 22:45:01 +08:00
			`// floor -> chunk doc ids (L1 only)`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`let floorDocIds = new Map();`

Add files via upload 2026-02-17 22:45:01 +08:00			`// IDF stats over lexical docs (L1 chunks + L2 events)`
			`let termDfMap = new Map();`
			`let docTokenSets = new Map(); // docId -> Set<token>`
			`let lexicalDocCount = 0;`

			`const IDF_MIN = 1.0;`
			`const IDF_MAX = 4.0;`
			`const BUILD_BATCH_SIZE = 500;`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00
			`function cleanSummary(summary) {`
			`return String(summary \|\| '')`
			`.replace(/\s\(#\d+(?:-\d+)?\)\s$/, '')`
			`.trim();`
			`}`

Add files via upload 2026-02-17 22:45:01 +08:00			`function fnv1a32(input, seed = 0x811C9DC5) {`
			`let hash = seed >>> 0;`
			`const text = String(input \|\| '');`
			`for (let i = 0; i < text.length; i++) {`
			`hash ^= text.charCodeAt(i);`
			`hash = Math.imul(hash, 0x01000193) >>> 0;`
			`}`
			`return hash >>> 0;`
			`}`

			`function compareDocKeys(a, b) {`
			const ka = `${a?.type \|\| ''}:${a?.id \|\| ''}`;
			const kb = `${b?.type \|\| ''}:${b?.id \|\| ''}`;
			`if (ka < kb) return -1;`
			`if (ka > kb) return 1;`
			`return 0;`
			`}`

			`function computeFingerprintFromDocs(docs) {`
			`const normalizedDocs = Array.isArray(docs) ? [...docs].sort(compareDocKeys) : [];`
			`let hash = 0x811C9DC5;`

			`for (const doc of normalizedDocs) {`
			const payload = `${doc?.type \|\| ''}\u001F${doc?.id \|\| ''}\u001F${doc?.floor ?? ''}\u001F${doc?.text \|\| ''}\u001E`;
			`hash = fnv1a32(payload, hash);`
			`}`

			return `${normalizedDocs.length}:${(hash >>> 0).toString(16)}`;
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`

			`function yieldToMain() {`
			`return new Promise(resolve => setTimeout(resolve, 0));`
			`}`

Add files via upload 2026-02-17 22:45:01 +08:00			`function clamp(v, min, max) {`
			`return Math.max(min, Math.min(max, v));`
			`}`

			`function normalizeTerm(term) {`
			`return String(term \|\| '').trim().toLowerCase();`
			`}`

			`function computeIdfFromDf(df, docCount) {`
			`if (!docCount \|\| docCount <= 0) return 1;`
			`const raw = Math.log((docCount + 1) / ((df \|\| 0) + 1)) + 1;`
			`return clamp(raw, IDF_MIN, IDF_MAX);`
			`}`

			`function computeIdf(term) {`
			`const t = normalizeTerm(term);`
			`if (!t \|\| lexicalDocCount <= 0) return 1;`
			`return computeIdfFromDf(termDfMap.get(t) \|\| 0, lexicalDocCount);`
			`}`

			`function extractUniqueTokens(text) {`
			`return new Set(tokenizeForIndex(String(text \|\| '')).map(normalizeTerm).filter(Boolean));`
			`}`

			`function clearIdfState() {`
			`termDfMap = new Map();`
			`docTokenSets = new Map();`
			`lexicalDocCount = 0;`
			`}`

			`function removeDocumentIdf(docId) {`
			`const id = String(docId \|\| '');`
			`if (!id) return;`

			`const tokens = docTokenSets.get(id);`
			`if (!tokens) return;`

			`for (const token of tokens) {`
			`const current = termDfMap.get(token) \|\| 0;`
			`if (current <= 1) {`
			`termDfMap.delete(token);`
			`} else {`
			`termDfMap.set(token, current - 1);`
			`}`
			`}`

			`docTokenSets.delete(id);`
			`lexicalDocCount = Math.max(0, lexicalDocCount - 1);`
			`}`

			`function addDocumentIdf(docId, text) {`
			`const id = String(docId \|\| '');`
			`if (!id) return;`

			`// Replace semantics: remove old token set first if this id already exists.`
			`removeDocumentIdf(id);`

			`const tokens = extractUniqueTokens(text);`
			`docTokenSets.set(id, tokens);`
			`lexicalDocCount += 1;`

			`for (const token of tokens) {`
			`termDfMap.set(token, (termDfMap.get(token) \|\| 0) + 1);`
			`}`
			`}`

			`function rebuildIdfFromDocs(docs) {`
			`clearIdfState();`
			`for (const doc of docs \|\| []) {`
			`const id = String(doc?.id \|\| '');`
			`const text = String(doc?.text \|\| '');`
			`if (!id \|\| !text.trim()) continue;`
			`addDocumentIdf(id, text);`
			`}`
			`}`

			`function buildEventDoc(ev) {`
			`if (!ev?.id) return null;`

			`const parts = [];`
			`if (ev.title) parts.push(ev.title);`
			`if (ev.participants?.length) parts.push(ev.participants.join(' '));`

			`const summary = cleanSummary(ev.summary);`
			`if (summary) parts.push(summary);`

			`const text = parts.join(' ').trim();`
			`if (!text) return null;`

			`return {`
			`id: ev.id,`
			`type: 'event',`
			`floor: null,`
			`text,`
			`};`
			`}`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00
			`function collectDocuments(chunks, events) {`
			`const docs = [];`

Add files via upload 2026-02-17 22:45:01 +08:00			`for (const chunk of chunks \|\| []) {`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`if (!chunk?.chunkId \|\| !chunk.text) continue;`

			`const floor = chunk.floor ?? -1;`
			`docs.push({`
			`id: chunk.chunkId,`
			`type: 'chunk',`
			`floor,`
			`text: chunk.text,`
			`});`

			`if (floor >= 0) {`
Add files via upload 2026-02-17 22:45:01 +08:00			`if (!floorDocIds.has(floor)) floorDocIds.set(floor, []);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`floorDocIds.get(floor).push(chunk.chunkId);`
			`}`
			`}`

Add files via upload 2026-02-17 22:45:01 +08:00			`for (const ev of events \|\| []) {`
			`const doc = buildEventDoc(ev);`
			`if (doc) docs.push(doc);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`

			`return docs;`
			`}`

			`async function buildIndexAsync(docs) {`
			`const T0 = performance.now();`

			`const index = new MiniSearch({`
			`fields: ['text'],`
			`storeFields: ['type', 'floor'],`
			`idField: 'id',`
			`searchOptions: {`
			`boost: { text: 1 },`
			`fuzzy: 0.2,`
			`prefix: true,`
			`},`
			`tokenize: tokenizeForIndex,`
			`});`

Add files via upload 2026-02-17 22:45:01 +08:00			`if (!docs.length) return index;`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00
			`for (let i = 0; i < docs.length; i += BUILD_BATCH_SIZE) {`
			`const batch = docs.slice(i, i + BUILD_BATCH_SIZE);`
			`index.addAll(batch);`

			`if (i + BUILD_BATCH_SIZE < docs.length) {`
			`await yieldToMain();`
			`}`
			`}`

			`const elapsed = Math.round(performance.now() - T0);`
Add files via upload 2026-02-17 22:45:01 +08:00			xbLog.info(MODULE_ID, `Index built: ${docs.length} docs (${elapsed}ms)`);
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`return index;`
			`}`

			`/**`
			`* @typedef {object} LexicalSearchResult`
Add files via upload 2026-02-17 22:45:01 +08:00			`* @property {string[]} atomIds - Reserved for backward compatibility (currently empty).`
			`* @property {Set<number>} atomFloors - Reserved for backward compatibility (currently empty).`
			`* @property {string[]} chunkIds - Matched L1 chunk ids sorted by weighted lexical score.`
			`* @property {Set<number>} chunkFloors - Floor ids covered by matched chunks.`
			`* @property {string[]} eventIds - Matched L2 event ids sorted by weighted lexical score.`
			`* @property {object[]} chunkScores - Weighted lexical scores for matched chunks.`
			`* @property {boolean} idfEnabled - Whether IDF stats are available for weighting.`
			`* @property {number} idfDocCount - Number of lexical docs used to compute IDF.`
			`* @property {Array<{term:string,idf:number}>} topIdfTerms - Top query terms by IDF.`
			`* @property {string[]} queryTerms - Normalized query terms actually searched.`
			`* @property {Record<string, Array<{floor:number, weightedScore:number, chunkId:string}>>} termFloorHits - Chunk-floor hits by term.`
			`* @property {Array<{floor:number, score:number, hitTermsCount:number}>} floorLexScores - Aggregated lexical floor scores (debug).`
			`* @property {number} termSearches - Number of per-term MiniSearch queries executed.`
			`* @property {number} searchTime - Total lexical search time in milliseconds.`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`*/`

			`/**`
Add files via upload 2026-02-17 22:45:01 +08:00			`* Search lexical index by terms, using per-term MiniSearch and IDF-weighted score aggregation.`
			`* This keeps existing outputs compatible while adding observability fields.`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`*`
Add files via upload 2026-02-17 22:45:01 +08:00			`* @param {MiniSearch} index`
			`* @param {string[]} terms`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`* @returns {LexicalSearchResult}`
			`*/`
			`export function searchLexicalIndex(index, terms) {`
			`const T0 = performance.now();`

			`const result = {`
			`atomIds: [],`
			`atomFloors: new Set(),`
			`chunkIds: [],`
			`chunkFloors: new Set(),`
			`eventIds: [],`
			`chunkScores: [],`
Add files via upload 2026-02-17 22:45:01 +08:00			`idfEnabled: lexicalDocCount > 0,`
			`idfDocCount: lexicalDocCount,`
			`topIdfTerms: [],`
			`queryTerms: [],`
			`termFloorHits: {},`
			`floorLexScores: [],`
			`termSearches: 0,`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`searchTime: 0,`
			`};`

			`if (!index \|\| !terms?.length) {`
			`result.searchTime = Math.round(performance.now() - T0);`
			`return result;`
			`}`

Add files via upload 2026-02-17 22:45:01 +08:00			`const queryTerms = Array.from(new Set((terms \|\| []).map(normalizeTerm).filter(Boolean)));`
			`result.queryTerms = [...queryTerms];`
			`const weightedScores = new Map(); // docId -> score`
			`const hitMeta = new Map(); // docId -> { type, floor }`
			`const idfPairs = [];`
			`const termFloorHits = new Map(); // term -> [{ floor, weightedScore, chunkId }]`
			`const floorLexAgg = new Map(); // floor -> { score, terms:Set<string> }`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00
Add files via upload 2026-02-17 22:45:01 +08:00			`for (const term of queryTerms) {`
			`const idf = computeIdf(term);`
			`idfPairs.push({ term, idf });`

			`let hits = [];`
			`try {`
			`hits = index.search(term, {`
			`boost: { text: 1 },`
			`fuzzy: 0.2,`
			`prefix: true,`
			`combineWith: 'OR',`
			`tokenize: tokenizeForIndex,`
			`});`
			`} catch (e) {`
			xbLog.warn(MODULE_ID, `Lexical term search failed: ${term}`, e);
			`continue;`
			`}`

			`result.termSearches += 1;`

			`for (const hit of hits) {`
			`const id = String(hit.id \|\| '');`
			`if (!id) continue;`

			`const weighted = (hit.score \|\| 0) * idf;`
			`weightedScores.set(id, (weightedScores.get(id) \|\| 0) + weighted);`

			`if (!hitMeta.has(id)) {`
			`hitMeta.set(id, {`
			`type: hit.type,`
			`floor: hit.floor,`
			`});`
			`}`

			`if (hit.type === 'chunk' && typeof hit.floor === 'number' && hit.floor >= 0) {`
			`if (!termFloorHits.has(term)) termFloorHits.set(term, []);`
			`termFloorHits.get(term).push({`
			`floor: hit.floor,`
			`weightedScore: weighted,`
			`chunkId: id,`
			`});`

			`const floorAgg = floorLexAgg.get(hit.floor) \|\| { score: 0, terms: new Set() };`
			`floorAgg.score += weighted;`
			`floorAgg.terms.add(term);`
			`floorLexAgg.set(hit.floor, floorAgg);`
			`}`
			`}`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`

Add files via upload 2026-02-17 22:45:01 +08:00			`idfPairs.sort((a, b) => b.idf - a.idf);`
			`result.topIdfTerms = idfPairs.slice(0, 5);`
			`result.termFloorHits = Object.fromEntries(`
			`[...termFloorHits.entries()].map(([term, hits]) => [term, hits]),`
			`);`
			`result.floorLexScores = [...floorLexAgg.entries()]`
			`.map(([floor, info]) => ({`
			`floor,`
			`score: Number(info.score.toFixed(6)),`
			`hitTermsCount: info.terms.size,`
			`}))`
			`.sort((a, b) => b.score - a.score);`

			`const sortedHits = Array.from(weightedScores.entries())`
			`.sort((a, b) => b[1] - a[1]);`

			`for (const [id, score] of sortedHits) {`
			`const meta = hitMeta.get(id);`
			`if (!meta) continue;`

			`if (meta.type === 'chunk') {`
			`result.chunkIds.push(id);`
			`result.chunkScores.push({ chunkId: id, score });`
			`if (typeof meta.floor === 'number' && meta.floor >= 0) {`
			`result.chunkFloors.add(meta.floor);`
			`}`
			`continue;`
			`}`

			`if (meta.type === 'event') {`
			`result.eventIds.push(id);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`
			`}`

			`result.searchTime = Math.round(performance.now() - T0);`

Add files via upload 2026-02-17 22:45:01 +08:00			`xbLog.info(`
			`MODULE_ID,`
			`Lexical search terms=[${queryTerms.slice(0, 5).join(',')}] chunks=${result.chunkIds.length} events=${result.eventIds.length} termSearches=${result.termSearches} (${result.searchTime}ms)`,
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`);`

			`return result;`
			`}`

			`async function collectAndBuild(chatId) {`
			`floorDocIds = new Map();`

			`const store = getSummaryStore();`
			`const events = store?.json?.events \|\| [];`

			`let chunks = [];`
			`try {`
			`chunks = await getAllChunks(chatId);`
			`} catch (e) {`
Add files via upload 2026-02-17 22:45:01 +08:00			`xbLog.warn(MODULE_ID, 'Failed to load chunks', e);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`

Add files via upload 2026-02-17 22:45:01 +08:00			`const docs = collectDocuments(chunks, events);`
			`const fp = computeFingerprintFromDocs(docs);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00
			`if (cachedIndex && cachedChatId === chatId && cachedFingerprint === fp) {`
			`return { index: cachedIndex, fingerprint: fp };`
			`}`

Add files via upload 2026-02-17 22:45:01 +08:00			`rebuildIdfFromDocs(docs);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`const index = await buildIndexAsync(docs);`

			`return { index, fingerprint: fp };`
			`}`

			`/**`
Add files via upload 2026-02-17 22:45:01 +08:00			`* Expose IDF accessor for query-term selection in query-builder.`
			`* If index stats are not ready, this gracefully falls back to idf=1.`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`*/`
Add files via upload 2026-02-17 22:45:01 +08:00			`export function getLexicalIdfAccessor() {`
			`return {`
			`enabled: lexicalDocCount > 0,`
			`docCount: lexicalDocCount,`
			`getIdf(term) {`
			`return computeIdf(term);`
			`},`
			`};`
			`}`

2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`export async function getLexicalIndex() {`
			`const { chatId } = getContext();`
			`if (!chatId) return null;`

			`if (cachedIndex && cachedChatId === chatId && cachedFingerprint) {`
			`return cachedIndex;`
			`}`

			`if (building && buildPromise) {`
			`try {`
			`await buildPromise;`
			`if (cachedIndex && cachedChatId === chatId && cachedFingerprint) {`
			`return cachedIndex;`
			`}`
			`} catch {`
Add files via upload 2026-02-17 22:45:01 +08:00			`// Continue to rebuild below.`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`
			`}`

Add files via upload 2026-02-17 22:45:01 +08:00			xbLog.info(MODULE_ID, `Lexical cache miss; rebuilding (chatId=${chatId.slice(0, 8)})`);
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00
			`building = true;`
			`buildPromise = collectAndBuild(chatId);`

			`try {`
			`const { index, fingerprint } = await buildPromise;`
			`cachedIndex = index;`
			`cachedChatId = chatId;`
			`cachedFingerprint = fingerprint;`
			`return index;`
			`} catch (e) {`
Add files via upload 2026-02-17 22:45:01 +08:00			`xbLog.error(MODULE_ID, 'Index build failed', e);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`return null;`
			`} finally {`
			`building = false;`
			`buildPromise = null;`
			`}`
			`}`

			`export function warmupIndex() {`
			`const { chatId } = getContext();`
Add files via upload 2026-02-17 22:45:01 +08:00			`if (!chatId \|\| building) return;`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00
			`getLexicalIndex().catch(e => {`
Add files via upload 2026-02-17 22:45:01 +08:00			`xbLog.warn(MODULE_ID, 'Warmup failed', e);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`});`
			`}`

			`export function invalidateLexicalIndex() {`
			`if (cachedIndex) {`
Add files via upload 2026-02-17 22:45:01 +08:00			`xbLog.info(MODULE_ID, 'Lexical index cache invalidated');`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`
			`cachedIndex = null;`
			`cachedChatId = null;`
			`cachedFingerprint = null;`
			`floorDocIds = new Map();`
Add files via upload 2026-02-17 22:45:01 +08:00			`clearIdfState();`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`

			`export function addDocumentsForFloor(floor, chunks) {`
			`if (!cachedIndex \|\| !chunks?.length) return;`

			`removeDocumentsByFloor(floor);`

			`const docs = [];`
			`const docIds = [];`

			`for (const chunk of chunks) {`
			`if (!chunk?.chunkId \|\| !chunk.text) continue;`
Add files via upload 2026-02-17 22:45:01 +08:00
			`const doc = {`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`id: chunk.chunkId,`
			`type: 'chunk',`
			`floor: chunk.floor ?? floor,`
			`text: chunk.text,`
Add files via upload 2026-02-17 22:45:01 +08:00			`};`
			`docs.push(doc);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`docIds.push(chunk.chunkId);`
			`}`

Add files via upload 2026-02-17 22:45:01 +08:00			`if (!docs.length) return;`

			`cachedIndex.addAll(docs);`
			`floorDocIds.set(floor, docIds);`

			`for (const doc of docs) {`
			`addDocumentIdf(doc.id, doc.text);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`
Add files via upload 2026-02-17 22:45:01 +08:00
			xbLog.info(MODULE_ID, `Incremental add floor=${floor} chunks=${docs.length}`);
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`

			`export function removeDocumentsByFloor(floor) {`
			`if (!cachedIndex) return;`

			`const docIds = floorDocIds.get(floor);`
			`if (!docIds?.length) return;`

			`for (const id of docIds) {`
			`try {`
			`cachedIndex.discard(id);`
			`} catch {`
Add files via upload 2026-02-17 22:45:01 +08:00			`// Ignore if the doc was already removed/rebuilt.`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`
Add files via upload 2026-02-17 22:45:01 +08:00			`removeDocumentIdf(id);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`

			`floorDocIds.delete(floor);`
Add files via upload 2026-02-17 22:45:01 +08:00			xbLog.info(MODULE_ID, `Incremental remove floor=${floor} chunks=${docIds.length}`);
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`

			`export function addEventDocuments(events) {`
			`if (!cachedIndex \|\| !events?.length) return;`

			`const docs = [];`

			`for (const ev of events) {`
Add files via upload 2026-02-17 22:45:01 +08:00			`const doc = buildEventDoc(ev);`
			`if (!doc) continue;`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00
			`try {`
Add files via upload 2026-02-17 22:45:01 +08:00			`cachedIndex.discard(doc.id);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`} catch {`
Add files via upload 2026-02-17 22:45:01 +08:00			`// Ignore if previous document does not exist.`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`
Add files via upload 2026-02-17 22:45:01 +08:00			`removeDocumentIdf(doc.id);`
			`docs.push(doc);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`

Add files via upload 2026-02-17 22:45:01 +08:00			`if (!docs.length) return;`

			`cachedIndex.addAll(docs);`
			`for (const doc of docs) {`
			`addDocumentIdf(doc.id, doc.text);`
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`
Add files via upload 2026-02-17 22:45:01 +08:00
			xbLog.info(MODULE_ID, `Incremental add events=${docs.length}`);
2.0变量 , 向量总结正式推送 2026-02-16 00:30:59 +08:00			`}`