Upload LittleWhiteBox extension
This commit is contained in:
5912
libs/dexie.mjs
Normal file
5912
libs/dexie.mjs
Normal file
File diff suppressed because it is too large
Load Diff
2665
libs/fflate.mjs
Normal file
2665
libs/fflate.mjs
Normal file
File diff suppressed because it is too large
Load Diff
25
libs/jieba-wasm/LICENSE
Normal file
25
libs/jieba-wasm/LICENSE
Normal file
@@ -0,0 +1,25 @@
|
||||
Copyright (c) 2018 fengkx <liangkx8237@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
134
libs/jieba-wasm/README.md
Normal file
134
libs/jieba-wasm/README.md
Normal file
@@ -0,0 +1,134 @@
|
||||
# jieba-wasm
|
||||
|
||||
> [jieba-rs](https://github.com/messense/jieba-rs) 的 wasm binding
|
||||
|
||||
_编译成 WASM 摆脱编译 Node Addon 的烦恼_
|
||||
|
||||
# Usage
|
||||
## Node.js
|
||||
```js
|
||||
const {
|
||||
cut,
|
||||
cut_all,
|
||||
cut_for_search,
|
||||
tokenize,
|
||||
add_word,
|
||||
} = require("jieba-wasm");
|
||||
cut("中华人民共和国武汉市长江大桥", true);
|
||||
// [ '中华人民共和国', '武汉市', '长江大桥' ]
|
||||
cut_all("中华人民共和国武汉市长江大桥", true);
|
||||
/*
|
||||
[
|
||||
'中', '中华',
|
||||
'中华人民', '中华人民共和国',
|
||||
'华', '华人',
|
||||
'人', '人民',
|
||||
'人民共和国', '民',
|
||||
'共', '共和',
|
||||
'共和国', '和',
|
||||
'国', '武',
|
||||
'武汉', '武汉市',
|
||||
'汉', '市',
|
||||
'市长', '长',
|
||||
'长江', '长江大桥',
|
||||
'江', '大',
|
||||
'大桥', '桥'
|
||||
]
|
||||
*/
|
||||
cut_for_search("中华人民共和国武汉市长江大桥", true);
|
||||
/*
|
||||
[
|
||||
'中华', '华人',
|
||||
'人民', '共和',
|
||||
'共和国', '中华人民共和国',
|
||||
'武汉', '武汉市',
|
||||
'长江', '大桥',
|
||||
'长江大桥'
|
||||
]
|
||||
*/
|
||||
tokenize("中华人民共和国武汉市长江大桥", "default", true);
|
||||
/*
|
||||
[
|
||||
{ word: '中华人民共和国', start: 0, end: 7 },
|
||||
{ word: '武汉市', start: 7, end: 10 },
|
||||
{ word: '长江大桥', start: 10, end: 14 }
|
||||
]
|
||||
*/
|
||||
tokenize("中华人民共和国武汉市长江大桥", "search", true);
|
||||
/*
|
||||
[
|
||||
{ word: '中华', start: 0, end: 2 },
|
||||
{ word: '华人', start: 1, end: 3 },
|
||||
{ word: '人民', start: 2, end: 4 },
|
||||
{ word: '共和', start: 4, end: 6 },
|
||||
{ word: '共和国', start: 4, end: 7 },
|
||||
{ word: '中华人民共和国', start: 0, end: 7 },
|
||||
{ word: '武汉', start: 7, end: 9 },
|
||||
{ word: '武汉市', start: 7, end: 10 },
|
||||
{ word: '长江', start: 10, end: 12 },
|
||||
{ word: '大桥', start: 12, end: 14 },
|
||||
{ word: '长江大桥', start: 10, end: 14 }
|
||||
]
|
||||
*/
|
||||
|
||||
cut("桥大江长市汉武的省北湖国和共民人华中");
|
||||
/*
|
||||
[
|
||||
'桥', '大江', '长',
|
||||
'市', '汉', '武',
|
||||
'的', '省', '北湖',
|
||||
'国', '和', '共',
|
||||
'民', '人', '华中'
|
||||
]
|
||||
*/
|
||||
["桥大江长", "市汉武", "省北湖", "国和共民人华中"].map((word) => {
|
||||
add_word(word);
|
||||
});
|
||||
cut("桥大江长市汉武的省北湖国和共民人华中");
|
||||
// ["桥大江长", "市汉武", "的", "省北湖", "国和共民人华中"];
|
||||
|
||||
with_dict("自动借书机 1 n"); // 导入自定义字典,词条格式:词语 词频 词性(可选),以换行符分隔
|
||||
cut("你好我是一个自动借书机");
|
||||
// ["你好", "我", "是", "一个", "自动借书机"];
|
||||
```
|
||||
|
||||
## Browser
|
||||
```ts
|
||||
import init, { cut } from 'jieba-wasm';
|
||||
|
||||
// 重要:使用前必须初始化
|
||||
await init();
|
||||
|
||||
cut("中华人民共和国武汉市长江大桥", true);
|
||||
// [ '中华人民共和国', '武汉市', '长江大桥' ]
|
||||
```
|
||||
|
||||
# 示例 Demo
|
||||
|
||||
## 安装依赖
|
||||
|
||||
安装 wasm-bindgen 和 wasm-opt
|
||||
|
||||
```bash
|
||||
cargo install wasm-bindgen-cli --locked
|
||||
cargo install wasm-opt --locked
|
||||
```
|
||||
|
||||
## 前期准备
|
||||
|
||||
首先保证存在 rust 环境,然后运行以下命令
|
||||
```bash
|
||||
npm run build:cargo
|
||||
npm run build
|
||||
```
|
||||
|
||||
## 运行浏览器端示例
|
||||
```bash
|
||||
cd demo/web
|
||||
npm install
|
||||
npm run dev
|
||||
```
|
||||
|
||||
# Piror Art
|
||||
|
||||
https://github.com/messense/jieba-rs
|
||||
73
libs/jieba-wasm/jieba_rs_wasm.d.ts
vendored
Normal file
73
libs/jieba-wasm/jieba_rs_wasm.d.ts
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
/* tslint:disable */
|
||||
/* eslint-disable */
|
||||
export function cut(text: string, hmm?: boolean | null): string[];
|
||||
export function cut_all(text: string): string[];
|
||||
export function cut_for_search(text: string, hmm?: boolean | null): string[];
|
||||
export function tokenize(text: string, mode: string, hmm?: boolean | null): Token[];
|
||||
export function add_word(word: string, freq?: number | null, tag?: string | null): number;
|
||||
export function tag(sentence: string, hmm?: boolean | null): Tag[];
|
||||
export function with_dict(dict: string): void;
|
||||
|
||||
/** Represents a single token with its word and position. */
|
||||
export interface Token {
|
||||
word: string;
|
||||
start: number;
|
||||
end: number;
|
||||
}
|
||||
|
||||
/** Represents a single word and its part-of-speech tag. */
|
||||
export interface Tag {
|
||||
word: string;
|
||||
tag: string;
|
||||
}
|
||||
|
||||
|
||||
|
||||
export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
|
||||
|
||||
export interface InitOutput {
|
||||
readonly memory: WebAssembly.Memory;
|
||||
readonly cut: (a: number, b: number, c: number) => [number, number];
|
||||
readonly cut_all: (a: number, b: number) => [number, number];
|
||||
readonly cut_for_search: (a: number, b: number, c: number) => [number, number];
|
||||
readonly tokenize: (a: number, b: number, c: number, d: number, e: number) => [number, number, number, number];
|
||||
readonly add_word: (a: number, b: number, c: number, d: number, e: number) => number;
|
||||
readonly tag: (a: number, b: number, c: number) => [number, number];
|
||||
readonly with_dict: (a: number, b: number) => [number, number];
|
||||
readonly rust_zstd_wasm_shim_qsort: (a: number, b: number, c: number, d: number) => void;
|
||||
readonly rust_zstd_wasm_shim_malloc: (a: number) => number;
|
||||
readonly rust_zstd_wasm_shim_memcmp: (a: number, b: number, c: number) => number;
|
||||
readonly rust_zstd_wasm_shim_calloc: (a: number, b: number) => number;
|
||||
readonly rust_zstd_wasm_shim_free: (a: number) => void;
|
||||
readonly rust_zstd_wasm_shim_memcpy: (a: number, b: number, c: number) => number;
|
||||
readonly rust_zstd_wasm_shim_memmove: (a: number, b: number, c: number) => number;
|
||||
readonly rust_zstd_wasm_shim_memset: (a: number, b: number, c: number) => number;
|
||||
readonly __wbindgen_malloc: (a: number, b: number) => number;
|
||||
readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
|
||||
readonly __wbindgen_export_2: WebAssembly.Table;
|
||||
readonly __externref_drop_slice: (a: number, b: number) => void;
|
||||
readonly __wbindgen_free: (a: number, b: number, c: number) => void;
|
||||
readonly __externref_table_dealloc: (a: number) => void;
|
||||
readonly __wbindgen_start: () => void;
|
||||
}
|
||||
|
||||
export type SyncInitInput = BufferSource | WebAssembly.Module;
|
||||
/**
|
||||
* Instantiates the given `module`, which can either be bytes or
|
||||
* a precompiled `WebAssembly.Module`.
|
||||
*
|
||||
* @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated.
|
||||
*
|
||||
* @returns {InitOutput}
|
||||
*/
|
||||
export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput;
|
||||
|
||||
/**
|
||||
* If `module_or_path` is {RequestInfo} or {URL}, makes a request and
|
||||
* for everything else, calls `WebAssembly.instantiate` directly.
|
||||
*
|
||||
* @param {{ module_or_path: InitInput | Promise<InitInput> }} module_or_path - Passing `InitInput` directly is deprecated.
|
||||
*
|
||||
* @returns {Promise<InitOutput>}
|
||||
*/
|
||||
export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput> } | InitInput | Promise<InitInput>): Promise<InitOutput>;
|
||||
438
libs/jieba-wasm/jieba_rs_wasm.js
Normal file
438
libs/jieba-wasm/jieba_rs_wasm.js
Normal file
@@ -0,0 +1,438 @@
|
||||
let wasm;
|
||||
|
||||
let cachedUint8ArrayMemory0 = null;
|
||||
|
||||
function getUint8ArrayMemory0() {
|
||||
if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
|
||||
cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
|
||||
}
|
||||
return cachedUint8ArrayMemory0;
|
||||
}
|
||||
|
||||
let cachedTextDecoder = (typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) : { decode: () => { throw Error('TextDecoder not available') } } );
|
||||
|
||||
if (typeof TextDecoder !== 'undefined') { cachedTextDecoder.decode(); };
|
||||
|
||||
const MAX_SAFARI_DECODE_BYTES = 2146435072;
|
||||
let numBytesDecoded = 0;
|
||||
function decodeText(ptr, len) {
|
||||
numBytesDecoded += len;
|
||||
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
|
||||
cachedTextDecoder = (typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) : { decode: () => { throw Error('TextDecoder not available') } } );
|
||||
cachedTextDecoder.decode();
|
||||
numBytesDecoded = len;
|
||||
}
|
||||
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
|
||||
}
|
||||
|
||||
function getStringFromWasm0(ptr, len) {
|
||||
ptr = ptr >>> 0;
|
||||
return decodeText(ptr, len);
|
||||
}
|
||||
|
||||
function debugString(val) {
|
||||
// primitive types
|
||||
const type = typeof val;
|
||||
if (type == 'number' || type == 'boolean' || val == null) {
|
||||
return `${val}`;
|
||||
}
|
||||
if (type == 'string') {
|
||||
return `"${val}"`;
|
||||
}
|
||||
if (type == 'symbol') {
|
||||
const description = val.description;
|
||||
if (description == null) {
|
||||
return 'Symbol';
|
||||
} else {
|
||||
return `Symbol(${description})`;
|
||||
}
|
||||
}
|
||||
if (type == 'function') {
|
||||
const name = val.name;
|
||||
if (typeof name == 'string' && name.length > 0) {
|
||||
return `Function(${name})`;
|
||||
} else {
|
||||
return 'Function';
|
||||
}
|
||||
}
|
||||
// objects
|
||||
if (Array.isArray(val)) {
|
||||
const length = val.length;
|
||||
let debug = '[';
|
||||
if (length > 0) {
|
||||
debug += debugString(val[0]);
|
||||
}
|
||||
for(let i = 1; i < length; i++) {
|
||||
debug += ', ' + debugString(val[i]);
|
||||
}
|
||||
debug += ']';
|
||||
return debug;
|
||||
}
|
||||
// Test for built-in
|
||||
const builtInMatches = /\[object ([^\]]+)\]/.exec(toString.call(val));
|
||||
let className;
|
||||
if (builtInMatches && builtInMatches.length > 1) {
|
||||
className = builtInMatches[1];
|
||||
} else {
|
||||
// Failed to match the standard '[object ClassName]'
|
||||
return toString.call(val);
|
||||
}
|
||||
if (className == 'Object') {
|
||||
// we're a user defined class or Object
|
||||
// JSON.stringify avoids problems with cycles, and is generally much
|
||||
// easier than looping through ownProperties of `val`.
|
||||
try {
|
||||
return 'Object(' + JSON.stringify(val) + ')';
|
||||
} catch (_) {
|
||||
return 'Object';
|
||||
}
|
||||
}
|
||||
// errors
|
||||
if (val instanceof Error) {
|
||||
return `${val.name}: ${val.message}\n${val.stack}`;
|
||||
}
|
||||
// TODO we could test for more things here, like `Set`s and `Map`s.
|
||||
return className;
|
||||
}
|
||||
|
||||
let WASM_VECTOR_LEN = 0;
|
||||
|
||||
const cachedTextEncoder = (typeof TextEncoder !== 'undefined' ? new TextEncoder('utf-8') : { encode: () => { throw Error('TextEncoder not available') } } );
|
||||
|
||||
const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
|
||||
? function (arg, view) {
|
||||
return cachedTextEncoder.encodeInto(arg, view);
|
||||
}
|
||||
: function (arg, view) {
|
||||
const buf = cachedTextEncoder.encode(arg);
|
||||
view.set(buf);
|
||||
return {
|
||||
read: arg.length,
|
||||
written: buf.length
|
||||
};
|
||||
});
|
||||
|
||||
function passStringToWasm0(arg, malloc, realloc) {
|
||||
|
||||
if (realloc === undefined) {
|
||||
const buf = cachedTextEncoder.encode(arg);
|
||||
const ptr = malloc(buf.length, 1) >>> 0;
|
||||
getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
|
||||
WASM_VECTOR_LEN = buf.length;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
let len = arg.length;
|
||||
let ptr = malloc(len, 1) >>> 0;
|
||||
|
||||
const mem = getUint8ArrayMemory0();
|
||||
|
||||
let offset = 0;
|
||||
|
||||
for (; offset < len; offset++) {
|
||||
const code = arg.charCodeAt(offset);
|
||||
if (code > 0x7F) break;
|
||||
mem[ptr + offset] = code;
|
||||
}
|
||||
|
||||
if (offset !== len) {
|
||||
if (offset !== 0) {
|
||||
arg = arg.slice(offset);
|
||||
}
|
||||
ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
|
||||
const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
|
||||
const ret = encodeString(arg, view);
|
||||
|
||||
offset += ret.written;
|
||||
ptr = realloc(ptr, len, offset, 1) >>> 0;
|
||||
}
|
||||
|
||||
WASM_VECTOR_LEN = offset;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
let cachedDataViewMemory0 = null;
|
||||
|
||||
function getDataViewMemory0() {
|
||||
if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) {
|
||||
cachedDataViewMemory0 = new DataView(wasm.memory.buffer);
|
||||
}
|
||||
return cachedDataViewMemory0;
|
||||
}
|
||||
|
||||
function isLikeNone(x) {
|
||||
return x === undefined || x === null;
|
||||
}
|
||||
|
||||
function getArrayJsValueFromWasm0(ptr, len) {
|
||||
ptr = ptr >>> 0;
|
||||
const mem = getDataViewMemory0();
|
||||
const result = [];
|
||||
for (let i = ptr; i < ptr + 4 * len; i += 4) {
|
||||
result.push(wasm.__wbindgen_export_2.get(mem.getUint32(i, true)));
|
||||
}
|
||||
wasm.__externref_drop_slice(ptr, len);
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {boolean | null} [hmm]
|
||||
* @returns {string[]}
|
||||
*/
|
||||
export function cut(text, hmm) {
|
||||
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
||||
const len0 = WASM_VECTOR_LEN;
|
||||
const ret = wasm.cut(ptr0, len0, isLikeNone(hmm) ? 0xFFFFFF : hmm ? 1 : 0);
|
||||
var v2 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
|
||||
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
||||
return v2;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string[]}
|
||||
*/
|
||||
export function cut_all(text) {
|
||||
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
||||
const len0 = WASM_VECTOR_LEN;
|
||||
const ret = wasm.cut_all(ptr0, len0);
|
||||
var v2 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
|
||||
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
||||
return v2;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {boolean | null} [hmm]
|
||||
* @returns {string[]}
|
||||
*/
|
||||
export function cut_for_search(text, hmm) {
|
||||
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
||||
const len0 = WASM_VECTOR_LEN;
|
||||
const ret = wasm.cut_for_search(ptr0, len0, isLikeNone(hmm) ? 0xFFFFFF : hmm ? 1 : 0);
|
||||
var v2 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
|
||||
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
||||
return v2;
|
||||
}
|
||||
|
||||
function takeFromExternrefTable0(idx) {
|
||||
const value = wasm.__wbindgen_export_2.get(idx);
|
||||
wasm.__externref_table_dealloc(idx);
|
||||
return value;
|
||||
}
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {string} mode
|
||||
* @param {boolean | null} [hmm]
|
||||
* @returns {Token[]}
|
||||
*/
|
||||
export function tokenize(text, mode, hmm) {
|
||||
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
||||
const len0 = WASM_VECTOR_LEN;
|
||||
const ptr1 = passStringToWasm0(mode, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
||||
const len1 = WASM_VECTOR_LEN;
|
||||
const ret = wasm.tokenize(ptr0, len0, ptr1, len1, isLikeNone(hmm) ? 0xFFFFFF : hmm ? 1 : 0);
|
||||
if (ret[3]) {
|
||||
throw takeFromExternrefTable0(ret[2]);
|
||||
}
|
||||
var v3 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
|
||||
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
||||
return v3;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} word
|
||||
* @param {number | null} [freq]
|
||||
* @param {string | null} [tag]
|
||||
* @returns {number}
|
||||
*/
|
||||
export function add_word(word, freq, tag) {
|
||||
const ptr0 = passStringToWasm0(word, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
||||
const len0 = WASM_VECTOR_LEN;
|
||||
var ptr1 = isLikeNone(tag) ? 0 : passStringToWasm0(tag, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
||||
var len1 = WASM_VECTOR_LEN;
|
||||
const ret = wasm.add_word(ptr0, len0, isLikeNone(freq) ? 0x100000001 : (freq) >>> 0, ptr1, len1);
|
||||
return ret >>> 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} sentence
|
||||
* @param {boolean | null} [hmm]
|
||||
* @returns {Tag[]}
|
||||
*/
|
||||
export function tag(sentence, hmm) {
|
||||
const ptr0 = passStringToWasm0(sentence, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
||||
const len0 = WASM_VECTOR_LEN;
|
||||
const ret = wasm.tag(ptr0, len0, isLikeNone(hmm) ? 0xFFFFFF : hmm ? 1 : 0);
|
||||
var v2 = getArrayJsValueFromWasm0(ret[0], ret[1]).slice();
|
||||
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
||||
return v2;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} dict
|
||||
*/
|
||||
export function with_dict(dict) {
|
||||
const ptr0 = passStringToWasm0(dict, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
||||
const len0 = WASM_VECTOR_LEN;
|
||||
const ret = wasm.with_dict(ptr0, len0);
|
||||
if (ret[1]) {
|
||||
throw takeFromExternrefTable0(ret[0]);
|
||||
}
|
||||
}
|
||||
|
||||
const EXPECTED_RESPONSE_TYPES = new Set(['basic', 'cors', 'default']);
|
||||
|
||||
async function __wbg_load(module, imports) {
|
||||
if (typeof Response === 'function' && module instanceof Response) {
|
||||
if (typeof WebAssembly.instantiateStreaming === 'function') {
|
||||
try {
|
||||
return await WebAssembly.instantiateStreaming(module, imports);
|
||||
|
||||
} catch (e) {
|
||||
const validResponse = module.ok && EXPECTED_RESPONSE_TYPES.has(module.type);
|
||||
|
||||
if (validResponse && module.headers.get('Content-Type') !== 'application/wasm') {
|
||||
console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve Wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
|
||||
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const bytes = await module.arrayBuffer();
|
||||
return await WebAssembly.instantiate(bytes, imports);
|
||||
|
||||
} else {
|
||||
const instance = await WebAssembly.instantiate(module, imports);
|
||||
|
||||
if (instance instanceof WebAssembly.Instance) {
|
||||
return { instance, module };
|
||||
|
||||
} else {
|
||||
return instance;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function __wbg_get_imports() {
|
||||
const imports = {};
|
||||
imports.wbg = {};
|
||||
imports.wbg.__wbg_Error_0497d5bdba9362e5 = function(arg0, arg1) {
|
||||
const ret = Error(getStringFromWasm0(arg0, arg1));
|
||||
return ret;
|
||||
};
|
||||
imports.wbg.__wbg_new_07b483f72211fd66 = function() {
|
||||
const ret = new Object();
|
||||
return ret;
|
||||
};
|
||||
imports.wbg.__wbg_set_3f1d0b984ed272ed = function(arg0, arg1, arg2) {
|
||||
arg0[arg1] = arg2;
|
||||
};
|
||||
imports.wbg.__wbindgen_bigint_from_u64 = function(arg0) {
|
||||
const ret = BigInt.asUintN(64, arg0);
|
||||
return ret;
|
||||
};
|
||||
imports.wbg.__wbindgen_debug_string = function(arg0, arg1) {
|
||||
const ret = debugString(arg1);
|
||||
const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
||||
const len1 = WASM_VECTOR_LEN;
|
||||
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
||||
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
||||
};
|
||||
imports.wbg.__wbindgen_init_externref_table = function() {
|
||||
const table = wasm.__wbindgen_export_2;
|
||||
const offset = table.grow(4);
|
||||
table.set(0, undefined);
|
||||
table.set(offset + 0, undefined);
|
||||
table.set(offset + 1, null);
|
||||
table.set(offset + 2, true);
|
||||
table.set(offset + 3, false);
|
||||
;
|
||||
};
|
||||
imports.wbg.__wbindgen_number_new = function(arg0) {
|
||||
const ret = arg0;
|
||||
return ret;
|
||||
};
|
||||
imports.wbg.__wbindgen_string_new = function(arg0, arg1) {
|
||||
const ret = getStringFromWasm0(arg0, arg1);
|
||||
return ret;
|
||||
};
|
||||
imports.wbg.__wbindgen_throw = function(arg0, arg1) {
|
||||
throw new Error(getStringFromWasm0(arg0, arg1));
|
||||
};
|
||||
|
||||
return imports;
|
||||
}
|
||||
|
||||
function __wbg_init_memory(imports, memory) {
|
||||
|
||||
}
|
||||
|
||||
function __wbg_finalize_init(instance, module) {
|
||||
wasm = instance.exports;
|
||||
__wbg_init.__wbindgen_wasm_module = module;
|
||||
cachedDataViewMemory0 = null;
|
||||
cachedUint8ArrayMemory0 = null;
|
||||
|
||||
|
||||
wasm.__wbindgen_start();
|
||||
return wasm;
|
||||
}
|
||||
|
||||
function initSync(module) {
|
||||
if (wasm !== undefined) return wasm;
|
||||
|
||||
|
||||
if (typeof module !== 'undefined') {
|
||||
if (Object.getPrototypeOf(module) === Object.prototype) {
|
||||
({module} = module)
|
||||
} else {
|
||||
console.warn('using deprecated parameters for `initSync()`; pass a single object instead')
|
||||
}
|
||||
}
|
||||
|
||||
const imports = __wbg_get_imports();
|
||||
|
||||
__wbg_init_memory(imports);
|
||||
|
||||
if (!(module instanceof WebAssembly.Module)) {
|
||||
module = new WebAssembly.Module(module);
|
||||
}
|
||||
|
||||
const instance = new WebAssembly.Instance(module, imports);
|
||||
|
||||
return __wbg_finalize_init(instance, module);
|
||||
}
|
||||
|
||||
async function __wbg_init(module_or_path) {
|
||||
if (wasm !== undefined) return wasm;
|
||||
|
||||
|
||||
if (typeof module_or_path !== 'undefined') {
|
||||
if (Object.getPrototypeOf(module_or_path) === Object.prototype) {
|
||||
({module_or_path} = module_or_path)
|
||||
} else {
|
||||
console.warn('using deprecated parameters for the initialization function; pass a single object instead')
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof module_or_path === 'undefined') {
|
||||
module_or_path = new URL('jieba_rs_wasm_bg.wasm', import.meta.url);
|
||||
}
|
||||
const imports = __wbg_get_imports();
|
||||
|
||||
if (typeof module_or_path === 'string' || (typeof Request === 'function' && module_or_path instanceof Request) || (typeof URL === 'function' && module_or_path instanceof URL)) {
|
||||
module_or_path = fetch(module_or_path);
|
||||
}
|
||||
|
||||
__wbg_init_memory(imports);
|
||||
|
||||
const { instance, module } = await __wbg_load(await module_or_path, imports);
|
||||
|
||||
return __wbg_finalize_init(instance, module);
|
||||
}
|
||||
|
||||
export { initSync };
|
||||
export default __wbg_init;
|
||||
BIN
libs/jieba-wasm/jieba_rs_wasm_bg.wasm
Normal file
BIN
libs/jieba-wasm/jieba_rs_wasm_bg.wasm
Normal file
Binary file not shown.
25
libs/jieba-wasm/jieba_rs_wasm_bg.wasm.d.ts
vendored
Normal file
25
libs/jieba-wasm/jieba_rs_wasm_bg.wasm.d.ts
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
/* tslint:disable */
|
||||
/* eslint-disable */
|
||||
export const memory: WebAssembly.Memory;
|
||||
export const cut: (a: number, b: number, c: number) => [number, number];
|
||||
export const cut_all: (a: number, b: number) => [number, number];
|
||||
export const cut_for_search: (a: number, b: number, c: number) => [number, number];
|
||||
export const tokenize: (a: number, b: number, c: number, d: number, e: number) => [number, number, number, number];
|
||||
export const add_word: (a: number, b: number, c: number, d: number, e: number) => number;
|
||||
export const tag: (a: number, b: number, c: number) => [number, number];
|
||||
export const with_dict: (a: number, b: number) => [number, number];
|
||||
export const rust_zstd_wasm_shim_qsort: (a: number, b: number, c: number, d: number) => void;
|
||||
export const rust_zstd_wasm_shim_malloc: (a: number) => number;
|
||||
export const rust_zstd_wasm_shim_memcmp: (a: number, b: number, c: number) => number;
|
||||
export const rust_zstd_wasm_shim_calloc: (a: number, b: number) => number;
|
||||
export const rust_zstd_wasm_shim_free: (a: number) => void;
|
||||
export const rust_zstd_wasm_shim_memcpy: (a: number, b: number, c: number) => number;
|
||||
export const rust_zstd_wasm_shim_memmove: (a: number, b: number, c: number) => number;
|
||||
export const rust_zstd_wasm_shim_memset: (a: number, b: number, c: number) => number;
|
||||
export const __wbindgen_malloc: (a: number, b: number) => number;
|
||||
export const __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
|
||||
export const __wbindgen_export_2: WebAssembly.Table;
|
||||
export const __externref_drop_slice: (a: number, b: number) => void;
|
||||
export const __wbindgen_free: (a: number, b: number, c: number) => void;
|
||||
export const __externref_table_dealloc: (a: number) => void;
|
||||
export const __wbindgen_start: () => void;
|
||||
129
libs/jieba-wasm/package.json
Normal file
129
libs/jieba-wasm/package.json
Normal file
@@ -0,0 +1,129 @@
|
||||
{
|
||||
"name": "jieba-wasm",
|
||||
"version": "2.4.0",
|
||||
"description": "WASM binding to jieba-rs",
|
||||
"main": "./pkg/nodejs/jieba_rs_wasm.js",
|
||||
"types": "./pkg/nodejs/jieba_rs_wasm.d.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"node": {
|
||||
"types": "./pkg/nodejs/jieba_rs_wasm.d.ts",
|
||||
"default": "./pkg/nodejs/jieba_rs_wasm.js"
|
||||
},
|
||||
"deno": {
|
||||
"types": "./pkg/deno/jieba_rs_wasm.d.ts",
|
||||
"default": "./pkg/deno/jieba_rs_wasm.js"
|
||||
},
|
||||
"browser": {
|
||||
"types": "./pkg/web/jieba_rs_wasm.d.ts",
|
||||
"default": "./pkg/web/jieba_rs_wasm.js"
|
||||
},
|
||||
"import": {
|
||||
"types": "./pkg/web/jieba_rs_wasm.d.ts",
|
||||
"default": "./pkg/web/jieba_rs_wasm.js"
|
||||
},
|
||||
"require": {
|
||||
"types": "./pkg/nodejs/jieba_rs_wasm.d.ts",
|
||||
"default": "./pkg/nodejs/jieba_rs_wasm.js"
|
||||
}
|
||||
},
|
||||
"./web": {
|
||||
"types": "./pkg/web/jieba_rs_wasm.d.ts",
|
||||
"default": "./pkg/web/jieba_rs_wasm.js"
|
||||
},
|
||||
"./node": {
|
||||
"types": "./pkg/nodejs/jieba_rs_wasm.d.ts",
|
||||
"default": "./pkg/nodejs/jieba_rs_wasm.js"
|
||||
},
|
||||
"./deno": {
|
||||
"types": "./pkg/deno/jieba_rs_wasm.d.ts",
|
||||
"default": "./pkg/deno/jieba_rs_wasm.js"
|
||||
}
|
||||
},
|
||||
"directories": {
|
||||
"test": "tests"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "wireit",
|
||||
"build:cargo": "wireit",
|
||||
"build:bundler": "wireit",
|
||||
"build:nodejs": "wireit",
|
||||
"build:deno": "wireit",
|
||||
"build:web": "wireit",
|
||||
"build:opt": "wireit",
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"wireit": {
|
||||
"build:cargo": {
|
||||
"command": "cargo build --release --target wasm32-unknown-unknown"
|
||||
},
|
||||
"build:bundler": {
|
||||
"command": "wasm-bindgen target/wasm32-unknown-unknown/release/jieba_rs_wasm.wasm --out-dir ./pkg/bundler --target bundler",
|
||||
"dependencies": [
|
||||
"build:cargo"
|
||||
]
|
||||
},
|
||||
"build:nodejs": {
|
||||
"command": "wasm-bindgen target/wasm32-unknown-unknown/release/jieba_rs_wasm.wasm --out-dir ./pkg/nodejs --target nodejs",
|
||||
"dependencies": [
|
||||
"build:cargo"
|
||||
]
|
||||
},
|
||||
"build:deno": {
|
||||
"command": "wasm-bindgen target/wasm32-unknown-unknown/release/jieba_rs_wasm.wasm --out-dir ./pkg/deno --target deno",
|
||||
"dependencies": [
|
||||
"build:cargo"
|
||||
]
|
||||
},
|
||||
"build:web": {
|
||||
"command": "wasm-bindgen target/wasm32-unknown-unknown/release/jieba_rs_wasm.wasm --out-dir ./pkg/web --target web",
|
||||
"dependencies": [
|
||||
"build:cargo"
|
||||
]
|
||||
},
|
||||
"build": {
|
||||
"dependencies": [
|
||||
"build:cargo",
|
||||
"build:bundler",
|
||||
"build:nodejs",
|
||||
"build:deno",
|
||||
"build:web",
|
||||
"build:opt"
|
||||
]
|
||||
},
|
||||
"build:opt": {
|
||||
"command": "node scripts/opt.js",
|
||||
"dependencies": [
|
||||
"build:cargo",
|
||||
"build:bundler",
|
||||
"build:nodejs",
|
||||
"build:deno",
|
||||
"build:web"
|
||||
]
|
||||
}
|
||||
},
|
||||
"files": [
|
||||
"pkg/**/*"
|
||||
],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/fengkx/jieba-wasm.git"
|
||||
},
|
||||
"keywords": [
|
||||
"wasm",
|
||||
"jieba",
|
||||
"chinese",
|
||||
"segment",
|
||||
"中文分词"
|
||||
],
|
||||
"author": "fengkx",
|
||||
"license": "MIT",
|
||||
"bugs": {
|
||||
"url": "https://github.com/fengkx/jieba-wasm/issues"
|
||||
},
|
||||
"homepage": "https://github.com/fengkx/jieba-wasm#readme",
|
||||
"devDependencies": {
|
||||
"@jsdevtools/ez-spawn": "^3.0.4",
|
||||
"wireit": "^0.14.4"
|
||||
}
|
||||
}
|
||||
3851
libs/js-yaml.mjs
Normal file
3851
libs/js-yaml.mjs
Normal file
File diff suppressed because it is too large
Load Diff
2036
libs/minisearch.mjs
Normal file
2036
libs/minisearch.mjs
Normal file
File diff suppressed because it is too large
Load Diff
1162
libs/pixi.min.js
vendored
Normal file
1162
libs/pixi.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
177
libs/tiny-segmenter.js
Normal file
177
libs/tiny-segmenter.js
Normal file
@@ -0,0 +1,177 @@
|
||||
// TinySegmenter 0.1 -- Super compact Japanese tokenizer in Javascript
|
||||
// (c) 2008 Taku Kudo <taku@chasen.org>
|
||||
// TinySegmenter is freely distributable under the terms of a new BSD licence.
|
||||
// For details, see http://chasen.org/~taku/software/TinySegmenter/LICENCE.txt
|
||||
|
||||
function TinySegmenter() {
|
||||
var patterns = {
|
||||
"[一二三四五六七八九十百千万億兆]":"M",
|
||||
"[一-龠々〆ヵヶ]":"H",
|
||||
"[ぁ-ん]":"I",
|
||||
"[ァ-ヴーア-ン゙ー]":"K",
|
||||
"[a-zA-Za-zA-Z]":"A",
|
||||
"[0-90-9]":"N"
|
||||
}
|
||||
this.chartype_ = [];
|
||||
for (var i in patterns) {
|
||||
var regexp = new RegExp;
|
||||
regexp.compile(i)
|
||||
this.chartype_.push([regexp, patterns[i]]);
|
||||
}
|
||||
|
||||
this.BIAS__ = -332
|
||||
this.BC1__ = {"HH":6,"II":2461,"KH":406,"OH":-1378};
|
||||
this.BC2__ = {"AA":-3267,"AI":2744,"AN":-878,"HH":-4070,"HM":-1711,"HN":4012,"HO":3761,"IA":1327,"IH":-1184,"II":-1332,"IK":1721,"IO":5492,"KI":3831,"KK":-8741,"MH":-3132,"MK":3334,"OO":-2920};
|
||||
this.BC3__ = {"HH":996,"HI":626,"HK":-721,"HN":-1307,"HO":-836,"IH":-301,"KK":2762,"MK":1079,"MM":4034,"OA":-1652,"OH":266};
|
||||
this.BP1__ = {"BB":295,"OB":304,"OO":-125,"UB":352};
|
||||
this.BP2__ = {"BO":60,"OO":-1762};
|
||||
this.BQ1__ = {"BHH":1150,"BHM":1521,"BII":-1158,"BIM":886,"BMH":1208,"BNH":449,"BOH":-91,"BOO":-2597,"OHI":451,"OIH":-296,"OKA":1851,"OKH":-1020,"OKK":904,"OOO":2965};
|
||||
this.BQ2__ = {"BHH":118,"BHI":-1159,"BHM":466,"BIH":-919,"BKK":-1720,"BKO":864,"OHH":-1139,"OHM":-181,"OIH":153,"UHI":-1146};
|
||||
this.BQ3__ = {"BHH":-792,"BHI":2664,"BII":-299,"BKI":419,"BMH":937,"BMM":8335,"BNN":998,"BOH":775,"OHH":2174,"OHM":439,"OII":280,"OKH":1798,"OKI":-793,"OKO":-2242,"OMH":-2402,"OOO":11699};
|
||||
this.BQ4__ = {"BHH":-3895,"BIH":3761,"BII":-4654,"BIK":1348,"BKK":-1806,"BMI":-3385,"BOO":-12396,"OAH":926,"OHH":266,"OHK":-2036,"ONN":-973};
|
||||
this.BW1__ = {",と":660,",同":727,"B1あ":1404,"B1同":542,"、と":660,"、同":727,"」と":1682,"あっ":1505,"いう":1743,"いっ":-2055,"いる":672,"うし":-4817,"うん":665,"から":3472,"がら":600,"こう":-790,"こと":2083,"こん":-1262,"さら":-4143,"さん":4573,"した":2641,"して":1104,"すで":-3399,"そこ":1977,"それ":-871,"たち":1122,"ため":601,"った":3463,"つい":-802,"てい":805,"てき":1249,"でき":1127,"です":3445,"では":844,"とい":-4915,"とみ":1922,"どこ":3887,"ない":5713,"なっ":3015,"など":7379,"なん":-1113,"にし":2468,"には":1498,"にも":1671,"に対":-912,"の一":-501,"の中":741,"ませ":2448,"まで":1711,"まま":2600,"まる":-2155,"やむ":-1947,"よっ":-2565,"れた":2369,"れで":-913,"をし":1860,"を見":731,"亡く":-1886,"京都":2558,"取り":-2784,"大き":-2604,"大阪":1497,"平方":-2314,"引き":-1336,"日本":-195,"本当":-2423,"毎日":-2113,"目指":-724,"B1あ":1404,"B1同":542,"」と":1682};
|
||||
this.BW2__ = {"..":-11822,"11":-669,"――":-5730,"−−":-13175,"いう":-1609,"うか":2490,"かし":-1350,"かも":-602,"から":-7194,"かれ":4612,"がい":853,"がら":-3198,"きた":1941,"くな":-1597,"こと":-8392,"この":-4193,"させ":4533,"され":13168,"さん":-3977,"しい":-1819,"しか":-545,"した":5078,"して":972,"しな":939,"その":-3744,"たい":-1253,"たた":-662,"ただ":-3857,"たち":-786,"たと":1224,"たは":-939,"った":4589,"って":1647,"っと":-2094,"てい":6144,"てき":3640,"てく":2551,"ては":-3110,"ても":-3065,"でい":2666,"でき":-1528,"でし":-3828,"です":-4761,"でも":-4203,"とい":1890,"とこ":-1746,"とと":-2279,"との":720,"とみ":5168,"とも":-3941,"ない":-2488,"なが":-1313,"など":-6509,"なの":2614,"なん":3099,"にお":-1615,"にし":2748,"にな":2454,"によ":-7236,"に対":-14943,"に従":-4688,"に関":-11388,"のか":2093,"ので":-7059,"のに":-6041,"のの":-6125,"はい":1073,"はが":-1033,"はず":-2532,"ばれ":1813,"まし":-1316,"まで":-6621,"まれ":5409,"めて":-3153,"もい":2230,"もの":-10713,"らか":-944,"らし":-1611,"らに":-1897,"りし":651,"りま":1620,"れた":4270,"れて":849,"れば":4114,"ろう":6067,"われ":7901,"を通":-11877,"んだ":728,"んな":-4115,"一人":602,"一方":-1375,"一日":970,"一部":-1051,"上が":-4479,"会社":-1116,"出て":2163,"分の":-7758,"同党":970,"同日":-913,"大阪":-2471,"委員":-1250,"少な":-1050,"年度":-8669,"年間":-1626,"府県":-2363,"手権":-1982,"新聞":-4066,"日新":-722,"日本":-7068,"日米":3372,"曜日":-601,"朝鮮":-2355,"本人":-2697,"東京":-1543,"然と":-1384,"社会":-1276,"立て":-990,"第に":-1612,"米国":-4268,"11":-669};
|
||||
this.BW3__ = {"あた":-2194,"あり":719,"ある":3846,"い.":-1185,"い。":-1185,"いい":5308,"いえ":2079,"いく":3029,"いた":2056,"いっ":1883,"いる":5600,"いわ":1527,"うち":1117,"うと":4798,"えと":1454,"か.":2857,"か。":2857,"かけ":-743,"かっ":-4098,"かに":-669,"から":6520,"かり":-2670,"が,":1816,"が、":1816,"がき":-4855,"がけ":-1127,"がっ":-913,"がら":-4977,"がり":-2064,"きた":1645,"けど":1374,"こと":7397,"この":1542,"ころ":-2757,"さい":-714,"さを":976,"し,":1557,"し、":1557,"しい":-3714,"した":3562,"して":1449,"しな":2608,"しま":1200,"す.":-1310,"す。":-1310,"する":6521,"ず,":3426,"ず、":3426,"ずに":841,"そう":428,"た.":8875,"た。":8875,"たい":-594,"たの":812,"たり":-1183,"たる":-853,"だ.":4098,"だ。":4098,"だっ":1004,"った":-4748,"って":300,"てい":6240,"てお":855,"ても":302,"です":1437,"でに":-1482,"では":2295,"とう":-1387,"とし":2266,"との":541,"とも":-3543,"どう":4664,"ない":1796,"なく":-903,"など":2135,"に,":-1021,"に、":-1021,"にし":1771,"にな":1906,"には":2644,"の,":-724,"の、":-724,"の子":-1000,"は,":1337,"は、":1337,"べき":2181,"まし":1113,"ます":6943,"まっ":-1549,"まで":6154,"まれ":-793,"らし":1479,"られ":6820,"るる":3818,"れ,":854,"れ、":854,"れた":1850,"れて":1375,"れば":-3246,"れる":1091,"われ":-605,"んだ":606,"んで":798,"カ月":990,"会議":860,"入り":1232,"大会":2217,"始め":1681,"市":965,"新聞":-5055,"日,":974,"日、":974,"社会":2024,"カ月":990};
|
||||
this.TC1__ = {"AAA":1093,"HHH":1029,"HHM":580,"HII":998,"HOH":-390,"HOM":-331,"IHI":1169,"IOH":-142,"IOI":-1015,"IOM":467,"MMH":187,"OOI":-1832};
|
||||
this.TC2__ = {"HHO":2088,"HII":-1023,"HMM":-1154,"IHI":-1965,"KKH":703,"OII":-2649};
|
||||
this.TC3__ = {"AAA":-294,"HHH":346,"HHI":-341,"HII":-1088,"HIK":731,"HOH":-1486,"IHH":128,"IHI":-3041,"IHO":-1935,"IIH":-825,"IIM":-1035,"IOI":-542,"KHH":-1216,"KKA":491,"KKH":-1217,"KOK":-1009,"MHH":-2694,"MHM":-457,"MHO":123,"MMH":-471,"NNH":-1689,"NNO":662,"OHO":-3393};
|
||||
this.TC4__ = {"HHH":-203,"HHI":1344,"HHK":365,"HHM":-122,"HHN":182,"HHO":669,"HIH":804,"HII":679,"HOH":446,"IHH":695,"IHO":-2324,"IIH":321,"III":1497,"IIO":656,"IOO":54,"KAK":4845,"KKA":3386,"KKK":3065,"MHH":-405,"MHI":201,"MMH":-241,"MMM":661,"MOM":841};
|
||||
this.TQ1__ = {"BHHH":-227,"BHHI":316,"BHIH":-132,"BIHH":60,"BIII":1595,"BNHH":-744,"BOHH":225,"BOOO":-908,"OAKK":482,"OHHH":281,"OHIH":249,"OIHI":200,"OIIH":-68};
|
||||
this.TQ2__ = {"BIHH":-1401,"BIII":-1033,"BKAK":-543,"BOOO":-5591};
|
||||
this.TQ3__ = {"BHHH":478,"BHHM":-1073,"BHIH":222,"BHII":-504,"BIIH":-116,"BIII":-105,"BMHI":-863,"BMHM":-464,"BOMH":620,"OHHH":346,"OHHI":1729,"OHII":997,"OHMH":481,"OIHH":623,"OIIH":1344,"OKAK":2792,"OKHH":587,"OKKA":679,"OOHH":110,"OOII":-685};
|
||||
this.TQ4__ = {"BHHH":-721,"BHHM":-3604,"BHII":-966,"BIIH":-607,"BIII":-2181,"OAAA":-2763,"OAKK":180,"OHHH":-294,"OHHI":2446,"OHHO":480,"OHIH":-1573,"OIHH":1935,"OIHI":-493,"OIIH":626,"OIII":-4007,"OKAK":-8156};
|
||||
this.TW1__ = {"につい":-4681,"東京都":2026};
|
||||
this.TW2__ = {"ある程":-2049,"いった":-1256,"ころが":-2434,"しょう":3873,"その後":-4430,"だって":-1049,"ていた":1833,"として":-4657,"ともに":-4517,"もので":1882,"一気に":-792,"初めて":-1512,"同時に":-8097,"大きな":-1255,"対して":-2721,"社会党":-3216};
|
||||
this.TW3__ = {"いただ":-1734,"してい":1314,"として":-4314,"につい":-5483,"にとっ":-5989,"に当た":-6247,"ので,":-727,"ので、":-727,"のもの":-600,"れから":-3752,"十二月":-2287};
|
||||
this.TW4__ = {"いう.":8576,"いう。":8576,"からな":-2348,"してい":2958,"たが,":1516,"たが、":1516,"ている":1538,"という":1349,"ました":5543,"ません":1097,"ようと":-4258,"よると":5865};
|
||||
this.UC1__ = {"A":484,"K":93,"M":645,"O":-505};
|
||||
this.UC2__ = {"A":819,"H":1059,"I":409,"M":3987,"N":5775,"O":646};
|
||||
this.UC3__ = {"A":-1370,"I":2311};
|
||||
this.UC4__ = {"A":-2643,"H":1809,"I":-1032,"K":-3450,"M":3565,"N":3876,"O":6646};
|
||||
this.UC5__ = {"H":313,"I":-1238,"K":-799,"M":539,"O":-831};
|
||||
this.UC6__ = {"H":-506,"I":-253,"K":87,"M":247,"O":-387};
|
||||
this.UP1__ = {"O":-214};
|
||||
this.UP2__ = {"B":69,"O":935};
|
||||
this.UP3__ = {"B":189};
|
||||
this.UQ1__ = {"BH":21,"BI":-12,"BK":-99,"BN":142,"BO":-56,"OH":-95,"OI":477,"OK":410,"OO":-2422};
|
||||
this.UQ2__ = {"BH":216,"BI":113,"OK":1759};
|
||||
this.UQ3__ = {"BA":-479,"BH":42,"BI":1913,"BK":-7198,"BM":3160,"BN":6427,"BO":14761,"OI":-827,"ON":-3212};
|
||||
this.UW1__ = {",":156,"、":156,"「":-463,"あ":-941,"う":-127,"が":-553,"き":121,"こ":505,"で":-201,"と":-547,"ど":-123,"に":-789,"の":-185,"は":-847,"も":-466,"や":-470,"よ":182,"ら":-292,"り":208,"れ":169,"を":-446,"ん":-137,"・":-135,"主":-402,"京":-268,"区":-912,"午":871,"国":-460,"大":561,"委":729,"市":-411,"日":-141,"理":361,"生":-408,"県":-386,"都":-718,"「":-463,"・":-135};
|
||||
this.UW2__ = {",":-829,"、":-829,"〇":892,"「":-645,"」":3145,"あ":-538,"い":505,"う":134,"お":-502,"か":1454,"が":-856,"く":-412,"こ":1141,"さ":878,"ざ":540,"し":1529,"す":-675,"せ":300,"そ":-1011,"た":188,"だ":1837,"つ":-949,"て":-291,"で":-268,"と":-981,"ど":1273,"な":1063,"に":-1764,"の":130,"は":-409,"ひ":-1273,"べ":1261,"ま":600,"も":-1263,"や":-402,"よ":1639,"り":-579,"る":-694,"れ":571,"を":-2516,"ん":2095,"ア":-587,"カ":306,"キ":568,"ッ":831,"三":-758,"不":-2150,"世":-302,"中":-968,"主":-861,"事":492,"人":-123,"会":978,"保":362,"入":548,"初":-3025,"副":-1566,"北":-3414,"区":-422,"大":-1769,"天":-865,"太":-483,"子":-1519,"学":760,"実":1023,"小":-2009,"市":-813,"年":-1060,"強":1067,"手":-1519,"揺":-1033,"政":1522,"文":-1355,"新":-1682,"日":-1815,"明":-1462,"最":-630,"朝":-1843,"本":-1650,"東":-931,"果":-665,"次":-2378,"民":-180,"気":-1740,"理":752,"発":529,"目":-1584,"相":-242,"県":-1165,"立":-763,"第":810,"米":509,"自":-1353,"行":838,"西":-744,"見":-3874,"調":1010,"議":1198,"込":3041,"開":1758,"間":-1257,"「":-645,"」":3145,"ッ":831,"ア":-587,"カ":306,"キ":568};
|
||||
this.UW3__ = {",":4889,"1":-800,"−":-1723,"、":4889,"々":-2311,"〇":5827,"」":2670,"〓":-3573,"あ":-2696,"い":1006,"う":2342,"え":1983,"お":-4864,"か":-1163,"が":3271,"く":1004,"け":388,"げ":401,"こ":-3552,"ご":-3116,"さ":-1058,"し":-395,"す":584,"せ":3685,"そ":-5228,"た":842,"ち":-521,"っ":-1444,"つ":-1081,"て":6167,"で":2318,"と":1691,"ど":-899,"な":-2788,"に":2745,"の":4056,"は":4555,"ひ":-2171,"ふ":-1798,"へ":1199,"ほ":-5516,"ま":-4384,"み":-120,"め":1205,"も":2323,"や":-788,"よ":-202,"ら":727,"り":649,"る":5905,"れ":2773,"わ":-1207,"を":6620,"ん":-518,"ア":551,"グ":1319,"ス":874,"ッ":-1350,"ト":521,"ム":1109,"ル":1591,"ロ":2201,"ン":278,"・":-3794,"一":-1619,"下":-1759,"世":-2087,"両":3815,"中":653,"主":-758,"予":-1193,"二":974,"人":2742,"今":792,"他":1889,"以":-1368,"低":811,"何":4265,"作":-361,"保":-2439,"元":4858,"党":3593,"全":1574,"公":-3030,"六":755,"共":-1880,"円":5807,"再":3095,"分":457,"初":2475,"別":1129,"前":2286,"副":4437,"力":365,"動":-949,"務":-1872,"化":1327,"北":-1038,"区":4646,"千":-2309,"午":-783,"協":-1006,"口":483,"右":1233,"各":3588,"合":-241,"同":3906,"和":-837,"員":4513,"国":642,"型":1389,"場":1219,"外":-241,"妻":2016,"学":-1356,"安":-423,"実":-1008,"家":1078,"小":-513,"少":-3102,"州":1155,"市":3197,"平":-1804,"年":2416,"広":-1030,"府":1605,"度":1452,"建":-2352,"当":-3885,"得":1905,"思":-1291,"性":1822,"戸":-488,"指":-3973,"政":-2013,"教":-1479,"数":3222,"文":-1489,"新":1764,"日":2099,"旧":5792,"昨":-661,"時":-1248,"曜":-951,"最":-937,"月":4125,"期":360,"李":3094,"村":364,"東":-805,"核":5156,"森":2438,"業":484,"氏":2613,"民":-1694,"決":-1073,"法":1868,"海":-495,"無":979,"物":461,"特":-3850,"生":-273,"用":914,"町":1215,"的":7313,"直":-1835,"省":792,"県":6293,"知":-1528,"私":4231,"税":401,"立":-960,"第":1201,"米":7767,"系":3066,"約":3663,"級":1384,"統":-4229,"総":1163,"線":1255,"者":6457,"能":725,"自":-2869,"英":785,"見":1044,"調":-562,"財":-733,"費":1777,"車":1835,"軍":1375,"込":-1504,"通":-1136,"選":-681,"郎":1026,"郡":4404,"部":1200,"金":2163,"長":421,"開":-1432,"間":1302,"関":-1282,"雨":2009,"電":-1045,"非":2066,"駅":1620,"1":-800,"」":2670,"・":-3794,"ッ":-1350,"ア":551,"グ":1319,"ス":874,"ト":521,"ム":1109,"ル":1591,"ロ":2201,"ン":278};
|
||||
this.UW4__ = {",":3930,".":3508,"―":-4841,"、":3930,"。":3508,"〇":4999,"「":1895,"」":3798,"〓":-5156,"あ":4752,"い":-3435,"う":-640,"え":-2514,"お":2405,"か":530,"が":6006,"き":-4482,"ぎ":-3821,"く":-3788,"け":-4376,"げ":-4734,"こ":2255,"ご":1979,"さ":2864,"し":-843,"じ":-2506,"す":-731,"ず":1251,"せ":181,"そ":4091,"た":5034,"だ":5408,"ち":-3654,"っ":-5882,"つ":-1659,"て":3994,"で":7410,"と":4547,"な":5433,"に":6499,"ぬ":1853,"ね":1413,"の":7396,"は":8578,"ば":1940,"ひ":4249,"び":-4134,"ふ":1345,"へ":6665,"べ":-744,"ほ":1464,"ま":1051,"み":-2082,"む":-882,"め":-5046,"も":4169,"ゃ":-2666,"や":2795,"ょ":-1544,"よ":3351,"ら":-2922,"り":-9726,"る":-14896,"れ":-2613,"ろ":-4570,"わ":-1783,"を":13150,"ん":-2352,"カ":2145,"コ":1789,"セ":1287,"ッ":-724,"ト":-403,"メ":-1635,"ラ":-881,"リ":-541,"ル":-856,"ン":-3637,"・":-4371,"ー":-11870,"一":-2069,"中":2210,"予":782,"事":-190,"井":-1768,"人":1036,"以":544,"会":950,"体":-1286,"作":530,"側":4292,"先":601,"党":-2006,"共":-1212,"内":584,"円":788,"初":1347,"前":1623,"副":3879,"力":-302,"動":-740,"務":-2715,"化":776,"区":4517,"協":1013,"参":1555,"合":-1834,"和":-681,"員":-910,"器":-851,"回":1500,"国":-619,"園":-1200,"地":866,"場":-1410,"塁":-2094,"士":-1413,"多":1067,"大":571,"子":-4802,"学":-1397,"定":-1057,"寺":-809,"小":1910,"屋":-1328,"山":-1500,"島":-2056,"川":-2667,"市":2771,"年":374,"庁":-4556,"後":456,"性":553,"感":916,"所":-1566,"支":856,"改":787,"政":2182,"教":704,"文":522,"方":-856,"日":1798,"時":1829,"最":845,"月":-9066,"木":-485,"来":-442,"校":-360,"業":-1043,"氏":5388,"民":-2716,"気":-910,"沢":-939,"済":-543,"物":-735,"率":672,"球":-1267,"生":-1286,"産":-1101,"田":-2900,"町":1826,"的":2586,"目":922,"省":-3485,"県":2997,"空":-867,"立":-2112,"第":788,"米":2937,"系":786,"約":2171,"経":1146,"統":-1169,"総":940,"線":-994,"署":749,"者":2145,"能":-730,"般":-852,"行":-792,"規":792,"警":-1184,"議":-244,"谷":-1000,"賞":730,"車":-1481,"軍":1158,"輪":-1433,"込":-3370,"近":929,"道":-1291,"選":2596,"郎":-4866,"都":1192,"野":-1100,"銀":-2213,"長":357,"間":-2344,"院":-2297,"際":-2604,"電":-878,"領":-1659,"題":-792,"館":-1984,"首":1749,"高":2120,"「":1895,"」":3798,"・":-4371,"ッ":-724,"ー":-11870,"カ":2145,"コ":1789,"セ":1287,"ト":-403,"メ":-1635,"ラ":-881,"リ":-541,"ル":-856,"ン":-3637};
|
||||
this.UW5__ = {",":465,".":-299,"1":-514,"E2":-32768,"]":-2762,"、":465,"。":-299,"「":363,"あ":1655,"い":331,"う":-503,"え":1199,"お":527,"か":647,"が":-421,"き":1624,"ぎ":1971,"く":312,"げ":-983,"さ":-1537,"し":-1371,"す":-852,"だ":-1186,"ち":1093,"っ":52,"つ":921,"て":-18,"で":-850,"と":-127,"ど":1682,"な":-787,"に":-1224,"の":-635,"は":-578,"べ":1001,"み":502,"め":865,"ゃ":3350,"ょ":854,"り":-208,"る":429,"れ":504,"わ":419,"を":-1264,"ん":327,"イ":241,"ル":451,"ン":-343,"中":-871,"京":722,"会":-1153,"党":-654,"務":3519,"区":-901,"告":848,"員":2104,"大":-1296,"学":-548,"定":1785,"嵐":-1304,"市":-2991,"席":921,"年":1763,"思":872,"所":-814,"挙":1618,"新":-1682,"日":218,"月":-4353,"査":932,"格":1356,"機":-1508,"氏":-1347,"田":240,"町":-3912,"的":-3149,"相":1319,"省":-1052,"県":-4003,"研":-997,"社":-278,"空":-813,"統":1955,"者":-2233,"表":663,"語":-1073,"議":1219,"選":-1018,"郎":-368,"長":786,"間":1191,"題":2368,"館":-689,"1":-514,"E2":-32768,"「":363,"イ":241,"ル":451,"ン":-343};
|
||||
this.UW6__ = {",":227,".":808,"1":-270,"E1":306,"、":227,"。":808,"あ":-307,"う":189,"か":241,"が":-73,"く":-121,"こ":-200,"じ":1782,"す":383,"た":-428,"っ":573,"て":-1014,"で":101,"と":-105,"な":-253,"に":-149,"の":-417,"は":-236,"も":-206,"り":187,"る":-135,"を":195,"ル":-673,"ン":-496,"一":-277,"中":201,"件":-800,"会":624,"前":302,"区":1792,"員":-1212,"委":798,"学":-960,"市":887,"広":-695,"後":535,"業":-697,"相":753,"社":-507,"福":974,"空":-822,"者":1811,"連":463,"郎":1082,"1":-270,"E1":306,"ル":-673,"ン":-496};
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
TinySegmenter.prototype.ctype_ = function(str) {
|
||||
for (var i in this.chartype_) {
|
||||
if (str.match(this.chartype_[i][0])) {
|
||||
return this.chartype_[i][1];
|
||||
}
|
||||
}
|
||||
return "O";
|
||||
}
|
||||
|
||||
TinySegmenter.prototype.ts_ = function(v) {
|
||||
if (v) { return v; }
|
||||
return 0;
|
||||
}
|
||||
|
||||
TinySegmenter.prototype.segment = function(input) {
|
||||
if (input == null || input == undefined || input == "") {
|
||||
return [];
|
||||
}
|
||||
var result = [];
|
||||
var seg = ["B3","B2","B1"];
|
||||
var ctype = ["O","O","O"];
|
||||
var o = input.split("");
|
||||
for (i = 0; i < o.length; ++i) {
|
||||
seg.push(o[i]);
|
||||
ctype.push(this.ctype_(o[i]))
|
||||
}
|
||||
seg.push("E1");
|
||||
seg.push("E2");
|
||||
seg.push("E3");
|
||||
ctype.push("O");
|
||||
ctype.push("O");
|
||||
ctype.push("O");
|
||||
var word = seg[3];
|
||||
var p1 = "U";
|
||||
var p2 = "U";
|
||||
var p3 = "U";
|
||||
for (var i = 4; i < seg.length - 3; ++i) {
|
||||
var score = this.BIAS__;
|
||||
var w1 = seg[i-3];
|
||||
var w2 = seg[i-2];
|
||||
var w3 = seg[i-1];
|
||||
var w4 = seg[i];
|
||||
var w5 = seg[i+1];
|
||||
var w6 = seg[i+2];
|
||||
var c1 = ctype[i-3];
|
||||
var c2 = ctype[i-2];
|
||||
var c3 = ctype[i-1];
|
||||
var c4 = ctype[i];
|
||||
var c5 = ctype[i+1];
|
||||
var c6 = ctype[i+2];
|
||||
score += this.ts_(this.UP1__[p1]);
|
||||
score += this.ts_(this.UP2__[p2]);
|
||||
score += this.ts_(this.UP3__[p3]);
|
||||
score += this.ts_(this.BP1__[p1 + p2]);
|
||||
score += this.ts_(this.BP2__[p2 + p3]);
|
||||
score += this.ts_(this.UW1__[w1]);
|
||||
score += this.ts_(this.UW2__[w2]);
|
||||
score += this.ts_(this.UW3__[w3]);
|
||||
score += this.ts_(this.UW4__[w4]);
|
||||
score += this.ts_(this.UW5__[w5]);
|
||||
score += this.ts_(this.UW6__[w6]);
|
||||
score += this.ts_(this.BW1__[w2 + w3]);
|
||||
score += this.ts_(this.BW2__[w3 + w4]);
|
||||
score += this.ts_(this.BW3__[w4 + w5]);
|
||||
score += this.ts_(this.TW1__[w1 + w2 + w3]);
|
||||
score += this.ts_(this.TW2__[w2 + w3 + w4]);
|
||||
score += this.ts_(this.TW3__[w3 + w4 + w5]);
|
||||
score += this.ts_(this.TW4__[w4 + w5 + w6]);
|
||||
score += this.ts_(this.UC1__[c1]);
|
||||
score += this.ts_(this.UC2__[c2]);
|
||||
score += this.ts_(this.UC3__[c3]);
|
||||
score += this.ts_(this.UC4__[c4]);
|
||||
score += this.ts_(this.UC5__[c5]);
|
||||
score += this.ts_(this.UC6__[c6]);
|
||||
score += this.ts_(this.BC1__[c2 + c3]);
|
||||
score += this.ts_(this.BC2__[c3 + c4]);
|
||||
score += this.ts_(this.BC3__[c4 + c5]);
|
||||
score += this.ts_(this.TC1__[c1 + c2 + c3]);
|
||||
score += this.ts_(this.TC2__[c2 + c3 + c4]);
|
||||
score += this.ts_(this.TC3__[c3 + c4 + c5]);
|
||||
score += this.ts_(this.TC4__[c4 + c5 + c6]);
|
||||
// score += this.ts_(this.TC5__[c4 + c5 + c6]);
|
||||
score += this.ts_(this.UQ1__[p1 + c1]);
|
||||
score += this.ts_(this.UQ2__[p2 + c2]);
|
||||
score += this.ts_(this.UQ3__[p3 + c3]);
|
||||
score += this.ts_(this.BQ1__[p2 + c2 + c3]);
|
||||
score += this.ts_(this.BQ2__[p2 + c3 + c4]);
|
||||
score += this.ts_(this.BQ3__[p3 + c2 + c3]);
|
||||
score += this.ts_(this.BQ4__[p3 + c3 + c4]);
|
||||
score += this.ts_(this.TQ1__[p2 + c1 + c2 + c3]);
|
||||
score += this.ts_(this.TQ2__[p2 + c2 + c3 + c4]);
|
||||
score += this.ts_(this.TQ3__[p3 + c1 + c2 + c3]);
|
||||
score += this.ts_(this.TQ4__[p3 + c2 + c3 + c4]);
|
||||
var p = "O";
|
||||
if (score > 0) {
|
||||
result.push(word);
|
||||
word = "";
|
||||
p = "B";
|
||||
}
|
||||
p1 = p2;
|
||||
p2 = p3;
|
||||
p3 = p;
|
||||
word += seg[i];
|
||||
}
|
||||
result.push(word);
|
||||
|
||||
return result;
|
||||
}
|
||||
export { TinySegmenter };
|
||||
Reference in New Issue
Block a user