You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
548 lines
12 KiB
548 lines
12 KiB
/* eslint no-loop-func: 0 */
|
|
/**
|
|
* Mnemonist SymSpell
|
|
* ===================
|
|
*
|
|
* JavaScript implementation of the Symmetric Delete Spelling dictionary to
|
|
* efficiently index & query expression based on edit distance.
|
|
* Note that the current implementation target the v3.0 of the algorithm.
|
|
*
|
|
* [Reference]:
|
|
* http://blog.faroo.com/2012/06/07/improved-edit-distance-based-spelling-correction/
|
|
* https://github.com/wolfgarbe/symspell
|
|
*
|
|
* [Author]:
|
|
* Wolf Garbe
|
|
*/
|
|
var forEach = require('obliterator/foreach');
|
|
|
|
/**
|
|
* Constants.
|
|
*/
|
|
var DEFAULT_MAX_DISTANCE = 2,
|
|
DEFAULT_VERBOSITY = 2;
|
|
|
|
var VERBOSITY = new Set([
|
|
// Returns only the top suggestion
|
|
0,
|
|
// Returns suggestions with the smallest edit distance
|
|
1,
|
|
// Returns every suggestion (no early termination)
|
|
2
|
|
]);
|
|
|
|
var VERBOSITY_EXPLANATIONS = {
|
|
0: 'Returns only the top suggestion',
|
|
1: 'Returns suggestions with the smallest edit distance',
|
|
2: 'Returns every suggestion (no early termination)'
|
|
};
|
|
|
|
/**
|
|
* Functions.
|
|
*/
|
|
|
|
/**
|
|
* Function creating a dictionary item.
|
|
*
|
|
* @param {number} [value] - An optional suggestion.
|
|
* @return {object} - The created item.
|
|
*/
|
|
function createDictionaryItem(value) {
|
|
var suggestions = new Set();
|
|
|
|
if (typeof value === 'number')
|
|
suggestions.add(value);
|
|
|
|
return {
|
|
suggestions,
|
|
count: 0
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Function creating a suggestion item.
|
|
*
|
|
* @return {object} - The created item.
|
|
*/
|
|
function createSuggestionItem(term, distance, count) {
|
|
return {
|
|
term: term || '',
|
|
distance: distance || 0,
|
|
count: count || 0
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Simplified edit function.
|
|
*
|
|
* @param {string} word - Target word.
|
|
* @param {number} distance - Distance.
|
|
* @param {number} max - Max distance.
|
|
* @param {Set} [deletes] - Set mutated to store deletes.
|
|
*/
|
|
function edits(word, distance, max, deletes) {
|
|
deletes = deletes || new Set();
|
|
distance++;
|
|
|
|
var deletedItem,
|
|
l = word.length,
|
|
i;
|
|
|
|
if (l > 1) {
|
|
for (i = 0; i < l; i++) {
|
|
deletedItem = word.substring(0, i) + word.substring(i + 1);
|
|
|
|
if (!deletes.has(deletedItem)) {
|
|
deletes.add(deletedItem);
|
|
|
|
if (distance < max)
|
|
edits(deletedItem, distance, max, deletes);
|
|
}
|
|
}
|
|
}
|
|
|
|
return deletes;
|
|
}
|
|
|
|
/**
|
|
* Function used to conditionally add suggestions.
|
|
*
|
|
* @param {array} words - Words list.
|
|
* @param {number} verbosity - Verbosity level.
|
|
* @param {object} item - The target item.
|
|
* @param {string} suggestion - The target suggestion.
|
|
* @param {number} int - Integer key of the word.
|
|
* @param {object} deletedItem - Considered deleted item.
|
|
* @param {SymSpell}
|
|
*/
|
|
function addLowestDistance(words, verbosity, item, suggestion, int, deletedItem) {
|
|
var first = item.suggestions.values().next().value;
|
|
|
|
if (verbosity < 2 &&
|
|
item.suggestions.size > 0 &&
|
|
words[first].length - deletedItem.length > suggestion.length - deletedItem.length) {
|
|
item.suggestions = new Set();
|
|
item.count = 0;
|
|
}
|
|
|
|
if (verbosity === 2 ||
|
|
!item.suggestions.size ||
|
|
words[first].length - deletedItem.length >= suggestion.length - deletedItem.length) {
|
|
item.suggestions.add(int);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Custom Damerau-Levenshtein used by the algorithm.
|
|
*
|
|
* @param {string} source - First string.
|
|
* @param {string} target - Second string.
|
|
* @return {number} - The distance.
|
|
*/
|
|
function damerauLevenshtein(source, target) {
|
|
var m = source.length,
|
|
n = target.length,
|
|
H = [[]],
|
|
INF = m + n,
|
|
sd = new Map(),
|
|
i,
|
|
l,
|
|
j;
|
|
|
|
H[0][0] = INF;
|
|
|
|
for (i = 0; i <= m; i++) {
|
|
if (!H[i + 1])
|
|
H[i + 1] = [];
|
|
H[i + 1][1] = i;
|
|
H[i + 1][0] = INF;
|
|
}
|
|
|
|
for (j = 0; j <= n; j++) {
|
|
H[1][j + 1] = j;
|
|
H[0][j + 1] = INF;
|
|
}
|
|
|
|
var st = source + target,
|
|
letter;
|
|
|
|
for (i = 0, l = st.length; i < l; i++) {
|
|
letter = st[i];
|
|
|
|
if (!sd.has(letter))
|
|
sd.set(letter, 0);
|
|
}
|
|
|
|
// Iterating
|
|
for (i = 1; i <= m; i++) {
|
|
var DB = 0;
|
|
|
|
for (j = 1; j <= n; j++) {
|
|
var i1 = sd.get(target[j - 1]),
|
|
j1 = DB;
|
|
|
|
if (source[i - 1] === target[j - 1]) {
|
|
H[i + 1][j + 1] = H[i][j];
|
|
DB = j;
|
|
}
|
|
else {
|
|
H[i + 1][j + 1] = Math.min(
|
|
H[i][j],
|
|
H[i + 1][j],
|
|
H[i][j + 1]
|
|
) + 1;
|
|
}
|
|
|
|
H[i + 1][j + 1] = Math.min(
|
|
H[i + 1][j + 1],
|
|
H[i1][j1] + (i - i1 - 1) + 1 + (j - j1 - 1)
|
|
);
|
|
}
|
|
|
|
sd.set(source[i - 1], i);
|
|
}
|
|
|
|
return H[m + 1][n + 1];
|
|
}
|
|
|
|
/**
|
|
* Lookup function.
|
|
*
|
|
* @param {object} dictionary - A SymSpell dictionary.
|
|
* @param {array} words - Unique words list.
|
|
* @param {number} verbosity - Verbosity level.
|
|
* @param {number} maxDistance - Maximum distance.
|
|
* @param {number} maxLength - Maximum word length in the dictionary.
|
|
* @param {string} input - Input string.
|
|
* @return {array} - The list of suggestions.
|
|
*/
|
|
function lookup(dictionary, words, verbosity, maxDistance, maxLength, input) {
|
|
var length = input.length;
|
|
|
|
if (length - maxDistance > maxLength)
|
|
return [];
|
|
|
|
var candidates = [input],
|
|
candidateSet = new Set(),
|
|
suggestionSet = new Set();
|
|
|
|
var suggestions = [],
|
|
candidate,
|
|
item;
|
|
|
|
// Exhausting every candidates
|
|
while (candidates.length > 0) {
|
|
candidate = candidates.shift();
|
|
|
|
// Early termination
|
|
if (
|
|
verbosity < 2 &&
|
|
suggestions.length > 0 &&
|
|
length - candidate.length > suggestions[0].distance
|
|
)
|
|
break;
|
|
|
|
item = dictionary[candidate];
|
|
|
|
if (item !== undefined) {
|
|
if (typeof item === 'number')
|
|
item = createDictionaryItem(item);
|
|
|
|
if (item.count > 0 && !suggestionSet.has(candidate)) {
|
|
suggestionSet.add(candidate);
|
|
|
|
var suggestItem = createSuggestionItem(
|
|
candidate,
|
|
length - candidate.length,
|
|
item.count
|
|
);
|
|
|
|
suggestions.push(suggestItem);
|
|
|
|
// Another early termination
|
|
if (verbosity < 2 && length - candidate.length === 0)
|
|
break;
|
|
}
|
|
|
|
// Iterating over the item's suggestions
|
|
item.suggestions.forEach(index => {
|
|
var suggestion = words[index];
|
|
|
|
// Do we already have this suggestion?
|
|
if (suggestionSet.has(suggestion))
|
|
return;
|
|
|
|
suggestionSet.add(suggestion);
|
|
|
|
// Computing distance between candidate & suggestion
|
|
var distance = 0;
|
|
|
|
if (input !== suggestion) {
|
|
if (suggestion.length === candidate.length) {
|
|
distance = length - candidate.length;
|
|
}
|
|
else if (length === candidate.length) {
|
|
distance = suggestion.length - candidate.length;
|
|
}
|
|
else {
|
|
var ii = 0,
|
|
jj = 0;
|
|
|
|
var l = suggestion.length;
|
|
|
|
while (
|
|
ii < l &&
|
|
ii < length &&
|
|
suggestion[ii] === input[ii]
|
|
) {
|
|
ii++;
|
|
}
|
|
|
|
while (
|
|
jj < l - ii &&
|
|
jj < length &&
|
|
suggestion[l - jj - 1] === input[length - jj - 1]
|
|
) {
|
|
jj++;
|
|
}
|
|
|
|
if (ii > 0 || jj > 0) {
|
|
distance = damerauLevenshtein(
|
|
suggestion.substr(ii, l - ii - jj),
|
|
input.substr(ii, length - ii - jj)
|
|
);
|
|
}
|
|
else {
|
|
distance = damerauLevenshtein(suggestion, input);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Removing suggestions of higher distance
|
|
if (verbosity < 2 &&
|
|
suggestions.length > 0 &&
|
|
suggestions[0].distance > distance) {
|
|
suggestions = [];
|
|
}
|
|
|
|
if (verbosity < 2 &&
|
|
suggestions.length > 0 &&
|
|
distance > suggestions[0].distance) {
|
|
return;
|
|
}
|
|
|
|
if (distance <= maxDistance) {
|
|
var target = dictionary[suggestion];
|
|
|
|
if (target !== undefined) {
|
|
suggestions.push(createSuggestionItem(
|
|
suggestion,
|
|
distance,
|
|
target.count
|
|
));
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
// Adding edits
|
|
if (length - candidate.length < maxDistance) {
|
|
|
|
if (verbosity < 2 &&
|
|
suggestions.length > 0 &&
|
|
length - candidate.length >= suggestions[0].distance)
|
|
continue;
|
|
|
|
for (var i = 0, l = candidate.length; i < l; i++) {
|
|
var deletedItem = (
|
|
candidate.substring(0, i) +
|
|
candidate.substring(i + 1)
|
|
);
|
|
|
|
if (!candidateSet.has(deletedItem)) {
|
|
candidateSet.add(deletedItem);
|
|
candidates.push(deletedItem);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (verbosity === 0)
|
|
return suggestions.slice(0, 1);
|
|
|
|
return suggestions;
|
|
}
|
|
|
|
/**
|
|
* SymSpell.
|
|
*
|
|
* @constructor
|
|
*/
|
|
function SymSpell(options) {
|
|
options = options || {};
|
|
|
|
this.clear();
|
|
|
|
// Properties
|
|
this.maxDistance = typeof options.maxDistance === 'number' ?
|
|
options.maxDistance :
|
|
DEFAULT_MAX_DISTANCE;
|
|
this.verbosity = typeof options.verbosity === 'number' ?
|
|
options.verbosity :
|
|
DEFAULT_VERBOSITY;
|
|
|
|
// Sanity checks
|
|
if (typeof this.maxDistance !== 'number' || this.maxDistance <= 0)
|
|
throw Error('mnemonist/SymSpell.constructor: invalid `maxDistance` option. Should be a integer greater than 0.');
|
|
|
|
if (!VERBOSITY.has(this.verbosity))
|
|
throw Error('mnemonist/SymSpell.constructor: invalid `verbosity` option. Should be either 0, 1 or 2.');
|
|
}
|
|
|
|
/**
|
|
* Method used to clear the structure.
|
|
*
|
|
* @return {undefined}
|
|
*/
|
|
SymSpell.prototype.clear = function() {
|
|
|
|
// Properties
|
|
this.size = 0;
|
|
this.dictionary = Object.create(null);
|
|
this.maxLength = 0;
|
|
this.words = [];
|
|
};
|
|
|
|
/**
|
|
* Method used to add a word to the index.
|
|
*
|
|
* @param {string} word - Word to add.
|
|
* @param {SymSpell}
|
|
*/
|
|
SymSpell.prototype.add = function(word) {
|
|
var item = this.dictionary[word];
|
|
|
|
if (item !== undefined) {
|
|
if (typeof item === 'number') {
|
|
item = createDictionaryItem(item);
|
|
this.dictionary[word] = item;
|
|
}
|
|
|
|
item.count++;
|
|
}
|
|
|
|
else {
|
|
item = createDictionaryItem();
|
|
item.count++;
|
|
|
|
this.dictionary[word] = item;
|
|
|
|
if (word.length > this.maxLength)
|
|
this.maxLength = word.length;
|
|
}
|
|
|
|
if (item.count === 1) {
|
|
var number = this.words.length;
|
|
this.words.push(word);
|
|
|
|
var deletes = edits(word, 0, this.maxDistance);
|
|
|
|
deletes.forEach(deletedItem => {
|
|
var target = this.dictionary[deletedItem];
|
|
|
|
if (target !== undefined) {
|
|
if (typeof target === 'number') {
|
|
target = createDictionaryItem(target);
|
|
|
|
this.dictionary[deletedItem] = target;
|
|
}
|
|
|
|
if (!target.suggestions.has(number)) {
|
|
addLowestDistance(
|
|
this.words,
|
|
this.verbosity,
|
|
target,
|
|
word,
|
|
number,
|
|
deletedItem
|
|
);
|
|
}
|
|
}
|
|
else {
|
|
this.dictionary[deletedItem] = number;
|
|
}
|
|
});
|
|
}
|
|
|
|
this.size++;
|
|
|
|
return this;
|
|
};
|
|
|
|
/**
|
|
* Method used to search the index.
|
|
*
|
|
* @param {string} input - Input query.
|
|
* @return {array} - The found suggestions.
|
|
*/
|
|
SymSpell.prototype.search = function(input) {
|
|
return lookup(
|
|
this.dictionary,
|
|
this.words,
|
|
this.verbosity,
|
|
this.maxDistance,
|
|
this.maxLength,
|
|
input
|
|
);
|
|
};
|
|
|
|
/**
|
|
* Convenience known methods.
|
|
*/
|
|
SymSpell.prototype.inspect = function() {
|
|
var array = [];
|
|
|
|
array.size = this.size;
|
|
array.maxDistance = this.maxDistance;
|
|
array.verbosity = this.verbosity;
|
|
array.behavior = VERBOSITY_EXPLANATIONS[this.verbosity];
|
|
|
|
for (var k in this.dictionary) {
|
|
if (typeof this.dictionary[k] === 'object' && this.dictionary[k].count)
|
|
array.push([k, this.dictionary[k].count]);
|
|
}
|
|
|
|
// Trick so that node displays the name of the constructor
|
|
Object.defineProperty(array, 'constructor', {
|
|
value: SymSpell,
|
|
enumerable: false
|
|
});
|
|
|
|
return array;
|
|
};
|
|
|
|
if (typeof Symbol !== 'undefined')
|
|
SymSpell.prototype[Symbol.for('nodejs.util.inspect.custom')] = SymSpell.prototype.inspect;
|
|
|
|
/**
|
|
* Static @.from function taking an arbitrary iterable & converting it into
|
|
* a structure.
|
|
*
|
|
* @param {Iterable} iterable - Target iterable.
|
|
* @return {SymSpell}
|
|
*/
|
|
SymSpell.from = function(iterable, options) {
|
|
var index = new SymSpell(options);
|
|
|
|
forEach(iterable, function(value) {
|
|
index.add(value);
|
|
});
|
|
|
|
return index;
|
|
};
|
|
|
|
/**
|
|
* Exporting.
|
|
*/
|
|
module.exports = SymSpell;
|