You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					548 lines
				
				12 KiB
			
		
		
			
		
	
	
					548 lines
				
				12 KiB
			| 
											3 years ago
										 | /* eslint no-loop-func: 0 */ | ||
|  | /** | ||
|  |  * Mnemonist SymSpell | ||
|  |  * =================== | ||
|  |  * | ||
|  |  * JavaScript implementation of the Symmetric Delete Spelling dictionary to | ||
|  |  * efficiently index & query expression based on edit distance. | ||
|  |  * Note that the current implementation target the v3.0 of the algorithm. | ||
|  |  * | ||
|  |  * [Reference]: | ||
|  |  * http://blog.faroo.com/2012/06/07/improved-edit-distance-based-spelling-correction/
 | ||
|  |  * https://github.com/wolfgarbe/symspell
 | ||
|  |  * | ||
|  |  * [Author]: | ||
|  |  * Wolf Garbe | ||
|  |  */ | ||
|  | var forEach = require('obliterator/foreach'); | ||
|  | 
 | ||
|  | /** | ||
|  |  * Constants. | ||
|  |  */ | ||
|  | var DEFAULT_MAX_DISTANCE = 2, | ||
|  |     DEFAULT_VERBOSITY = 2; | ||
|  | 
 | ||
|  | var VERBOSITY = new Set([ | ||
|  |   // Returns only the top suggestion
 | ||
|  |   0, | ||
|  |   // Returns suggestions with the smallest edit distance
 | ||
|  |   1, | ||
|  |   // Returns every suggestion (no early termination)
 | ||
|  |   2 | ||
|  | ]); | ||
|  | 
 | ||
|  | var VERBOSITY_EXPLANATIONS = { | ||
|  |   0: 'Returns only the top suggestion', | ||
|  |   1: 'Returns suggestions with the smallest edit distance', | ||
|  |   2: 'Returns every suggestion (no early termination)' | ||
|  | }; | ||
|  | 
 | ||
|  | /** | ||
|  |  * Functions. | ||
|  |  */ | ||
|  | 
 | ||
|  | /** | ||
|  |  * Function creating a dictionary item. | ||
|  |  * | ||
|  |  * @param  {number} [value] - An optional suggestion. | ||
|  |  * @return {object}         - The created item. | ||
|  |  */ | ||
|  | function createDictionaryItem(value) { | ||
|  |   var suggestions = new Set(); | ||
|  | 
 | ||
|  |   if (typeof value === 'number') | ||
|  |     suggestions.add(value); | ||
|  | 
 | ||
|  |   return { | ||
|  |     suggestions, | ||
|  |     count: 0 | ||
|  |   }; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * Function creating a suggestion item. | ||
|  |  * | ||
|  |  * @return {object} - The created item. | ||
|  |  */ | ||
|  | function createSuggestionItem(term, distance, count) { | ||
|  |   return { | ||
|  |     term: term || '', | ||
|  |     distance: distance || 0, | ||
|  |     count: count || 0 | ||
|  |   }; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * Simplified edit function. | ||
|  |  * | ||
|  |  * @param {string} word      - Target word. | ||
|  |  * @param {number} distance  - Distance. | ||
|  |  * @param {number} max       - Max distance. | ||
|  |  * @param {Set}    [deletes] - Set mutated to store deletes. | ||
|  |  */ | ||
|  | function edits(word, distance, max, deletes) { | ||
|  |   deletes = deletes || new Set(); | ||
|  |   distance++; | ||
|  | 
 | ||
|  |   var deletedItem, | ||
|  |       l = word.length, | ||
|  |       i; | ||
|  | 
 | ||
|  |   if (l > 1) { | ||
|  |     for (i = 0; i < l; i++) { | ||
|  |       deletedItem = word.substring(0, i) + word.substring(i + 1); | ||
|  | 
 | ||
|  |       if (!deletes.has(deletedItem)) { | ||
|  |         deletes.add(deletedItem); | ||
|  | 
 | ||
|  |         if (distance < max) | ||
|  |           edits(deletedItem, distance, max, deletes); | ||
|  |       } | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   return deletes; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * Function used to conditionally add suggestions. | ||
|  |  * | ||
|  |  * @param {array}  words       - Words list. | ||
|  |  * @param {number} verbosity   - Verbosity level. | ||
|  |  * @param {object} item        - The target item. | ||
|  |  * @param {string} suggestion  - The target suggestion. | ||
|  |  * @param {number} int         - Integer key of the word. | ||
|  |  * @param {object} deletedItem - Considered deleted item. | ||
|  |  * @param {SymSpell} | ||
|  |  */ | ||
|  | function addLowestDistance(words, verbosity, item, suggestion, int, deletedItem) { | ||
|  |   var first = item.suggestions.values().next().value; | ||
|  | 
 | ||
|  |   if (verbosity < 2 && | ||
|  |       item.suggestions.size > 0 && | ||
|  |       words[first].length - deletedItem.length > suggestion.length - deletedItem.length) { | ||
|  |     item.suggestions = new Set(); | ||
|  |     item.count = 0; | ||
|  |   } | ||
|  | 
 | ||
|  |   if (verbosity === 2 || | ||
|  |       !item.suggestions.size || | ||
|  |       words[first].length - deletedItem.length >= suggestion.length - deletedItem.length) { | ||
|  |     item.suggestions.add(int); | ||
|  |   } | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * Custom Damerau-Levenshtein used by the algorithm. | ||
|  |  * | ||
|  |  * @param  {string} source - First string. | ||
|  |  * @param  {string} target - Second string. | ||
|  |  * @return {number}        - The distance. | ||
|  |  */ | ||
|  | function damerauLevenshtein(source, target) { | ||
|  |   var m = source.length, | ||
|  |       n = target.length, | ||
|  |       H = [[]], | ||
|  |       INF = m + n, | ||
|  |       sd = new Map(), | ||
|  |       i, | ||
|  |       l, | ||
|  |       j; | ||
|  | 
 | ||
|  |   H[0][0] = INF; | ||
|  | 
 | ||
|  |   for (i = 0; i <= m; i++) { | ||
|  |     if (!H[i + 1]) | ||
|  |       H[i + 1] = []; | ||
|  |     H[i + 1][1] = i; | ||
|  |     H[i + 1][0] = INF; | ||
|  |   } | ||
|  | 
 | ||
|  |   for (j = 0; j <= n; j++) { | ||
|  |     H[1][j + 1] = j; | ||
|  |     H[0][j + 1] = INF; | ||
|  |   } | ||
|  | 
 | ||
|  |   var st = source + target, | ||
|  |       letter; | ||
|  | 
 | ||
|  |   for (i = 0, l = st.length; i < l; i++) { | ||
|  |     letter = st[i]; | ||
|  | 
 | ||
|  |     if (!sd.has(letter)) | ||
|  |       sd.set(letter, 0); | ||
|  |   } | ||
|  | 
 | ||
|  |   // Iterating
 | ||
|  |   for (i = 1; i <= m; i++) { | ||
|  |     var DB = 0; | ||
|  | 
 | ||
|  |     for (j = 1; j <= n; j++) { | ||
|  |       var i1 = sd.get(target[j - 1]), | ||
|  |           j1 = DB; | ||
|  | 
 | ||
|  |       if (source[i - 1] === target[j - 1]) { | ||
|  |         H[i + 1][j + 1] = H[i][j]; | ||
|  |         DB = j; | ||
|  |       } | ||
|  |       else { | ||
|  |         H[i + 1][j + 1] = Math.min( | ||
|  |           H[i][j], | ||
|  |           H[i + 1][j], | ||
|  |           H[i][j + 1] | ||
|  |         ) + 1; | ||
|  |       } | ||
|  | 
 | ||
|  |       H[i + 1][j + 1] = Math.min( | ||
|  |         H[i + 1][j + 1], | ||
|  |         H[i1][j1] + (i - i1 - 1) + 1 + (j - j1 - 1) | ||
|  |       ); | ||
|  |     } | ||
|  | 
 | ||
|  |     sd.set(source[i - 1], i); | ||
|  |   } | ||
|  | 
 | ||
|  |   return H[m + 1][n + 1]; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * Lookup function. | ||
|  |  * | ||
|  |  * @param  {object} dictionary  - A SymSpell dictionary. | ||
|  |  * @param  {array}  words       - Unique words list. | ||
|  |  * @param  {number} verbosity   - Verbosity level. | ||
|  |  * @param  {number} maxDistance - Maximum distance. | ||
|  |  * @param  {number} maxLength   - Maximum word length in the dictionary. | ||
|  |  * @param  {string} input       - Input string. | ||
|  |  * @return {array}              - The list of suggestions. | ||
|  |  */ | ||
|  | function lookup(dictionary, words, verbosity, maxDistance, maxLength, input) { | ||
|  |   var length = input.length; | ||
|  | 
 | ||
|  |   if (length - maxDistance > maxLength) | ||
|  |     return []; | ||
|  | 
 | ||
|  |   var candidates = [input], | ||
|  |       candidateSet = new Set(), | ||
|  |       suggestionSet = new Set(); | ||
|  | 
 | ||
|  |   var suggestions = [], | ||
|  |       candidate, | ||
|  |       item; | ||
|  | 
 | ||
|  |   // Exhausting every candidates
 | ||
|  |   while (candidates.length > 0) { | ||
|  |     candidate = candidates.shift(); | ||
|  | 
 | ||
|  |     // Early termination
 | ||
|  |     if ( | ||
|  |       verbosity < 2 && | ||
|  |       suggestions.length > 0 && | ||
|  |       length - candidate.length > suggestions[0].distance | ||
|  |     ) | ||
|  |       break; | ||
|  | 
 | ||
|  |     item = dictionary[candidate]; | ||
|  | 
 | ||
|  |     if (item !== undefined) { | ||
|  |       if (typeof item === 'number') | ||
|  |         item = createDictionaryItem(item); | ||
|  | 
 | ||
|  |       if (item.count > 0 && !suggestionSet.has(candidate)) { | ||
|  |         suggestionSet.add(candidate); | ||
|  | 
 | ||
|  |         var suggestItem = createSuggestionItem( | ||
|  |           candidate, | ||
|  |           length - candidate.length, | ||
|  |           item.count | ||
|  |         ); | ||
|  | 
 | ||
|  |         suggestions.push(suggestItem); | ||
|  | 
 | ||
|  |         // Another early termination
 | ||
|  |         if (verbosity < 2 && length - candidate.length === 0) | ||
|  |           break; | ||
|  |       } | ||
|  | 
 | ||
|  |       // Iterating over the item's suggestions
 | ||
|  |       item.suggestions.forEach(index => { | ||
|  |         var suggestion = words[index]; | ||
|  | 
 | ||
|  |         // Do we already have this suggestion?
 | ||
|  |         if (suggestionSet.has(suggestion)) | ||
|  |           return; | ||
|  | 
 | ||
|  |         suggestionSet.add(suggestion); | ||
|  | 
 | ||
|  |         // Computing distance between candidate & suggestion
 | ||
|  |         var distance = 0; | ||
|  | 
 | ||
|  |         if (input !== suggestion) { | ||
|  |           if (suggestion.length === candidate.length) { | ||
|  |             distance = length - candidate.length; | ||
|  |           } | ||
|  |           else if (length === candidate.length) { | ||
|  |             distance = suggestion.length - candidate.length; | ||
|  |           } | ||
|  |           else { | ||
|  |             var ii = 0, | ||
|  |                 jj = 0; | ||
|  | 
 | ||
|  |             var l = suggestion.length; | ||
|  | 
 | ||
|  |             while ( | ||
|  |               ii < l && | ||
|  |               ii < length && | ||
|  |               suggestion[ii] === input[ii] | ||
|  |             ) { | ||
|  |               ii++; | ||
|  |             } | ||
|  | 
 | ||
|  |             while ( | ||
|  |               jj < l - ii && | ||
|  |               jj < length && | ||
|  |               suggestion[l - jj - 1] === input[length - jj - 1] | ||
|  |             ) { | ||
|  |               jj++; | ||
|  |             } | ||
|  | 
 | ||
|  |             if (ii > 0 || jj > 0) { | ||
|  |               distance = damerauLevenshtein( | ||
|  |                 suggestion.substr(ii, l - ii - jj), | ||
|  |                 input.substr(ii, length - ii - jj) | ||
|  |               ); | ||
|  |             } | ||
|  |             else { | ||
|  |               distance = damerauLevenshtein(suggestion, input); | ||
|  |             } | ||
|  |           } | ||
|  |         } | ||
|  | 
 | ||
|  |         // Removing suggestions of higher distance
 | ||
|  |         if (verbosity < 2 && | ||
|  |             suggestions.length > 0 && | ||
|  |             suggestions[0].distance > distance) { | ||
|  |           suggestions = []; | ||
|  |         } | ||
|  | 
 | ||
|  |         if (verbosity < 2 && | ||
|  |             suggestions.length > 0 && | ||
|  |             distance > suggestions[0].distance) { | ||
|  |           return; | ||
|  |         } | ||
|  | 
 | ||
|  |         if (distance <= maxDistance) { | ||
|  |           var target = dictionary[suggestion]; | ||
|  | 
 | ||
|  |           if (target !== undefined) { | ||
|  |             suggestions.push(createSuggestionItem( | ||
|  |               suggestion, | ||
|  |               distance, | ||
|  |               target.count | ||
|  |             )); | ||
|  |           } | ||
|  |         } | ||
|  |       }); | ||
|  |     } | ||
|  | 
 | ||
|  |     // Adding edits
 | ||
|  |     if (length - candidate.length < maxDistance) { | ||
|  | 
 | ||
|  |       if (verbosity < 2 && | ||
|  |           suggestions.length > 0 && | ||
|  |           length - candidate.length >= suggestions[0].distance) | ||
|  |         continue; | ||
|  | 
 | ||
|  |       for (var i = 0, l = candidate.length; i < l; i++) { | ||
|  |         var deletedItem = ( | ||
|  |           candidate.substring(0, i) + | ||
|  |           candidate.substring(i + 1) | ||
|  |         ); | ||
|  | 
 | ||
|  |         if (!candidateSet.has(deletedItem)) { | ||
|  |           candidateSet.add(deletedItem); | ||
|  |           candidates.push(deletedItem); | ||
|  |         } | ||
|  |       } | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   if (verbosity === 0) | ||
|  |     return suggestions.slice(0, 1); | ||
|  | 
 | ||
|  |   return suggestions; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * SymSpell. | ||
|  |  * | ||
|  |  * @constructor | ||
|  |  */ | ||
|  | function SymSpell(options) { | ||
|  |   options = options || {}; | ||
|  | 
 | ||
|  |   this.clear(); | ||
|  | 
 | ||
|  |   // Properties
 | ||
|  |   this.maxDistance = typeof options.maxDistance === 'number' ? | ||
|  |     options.maxDistance : | ||
|  |     DEFAULT_MAX_DISTANCE; | ||
|  |   this.verbosity = typeof options.verbosity === 'number' ? | ||
|  |     options.verbosity : | ||
|  |     DEFAULT_VERBOSITY; | ||
|  | 
 | ||
|  |   // Sanity checks
 | ||
|  |   if (typeof this.maxDistance !== 'number' || this.maxDistance <= 0) | ||
|  |     throw Error('mnemonist/SymSpell.constructor: invalid `maxDistance` option. Should be a integer greater than 0.'); | ||
|  | 
 | ||
|  |   if (!VERBOSITY.has(this.verbosity)) | ||
|  |     throw Error('mnemonist/SymSpell.constructor: invalid `verbosity` option. Should be either 0, 1 or 2.'); | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * Method used to clear the structure. | ||
|  |  * | ||
|  |  * @return {undefined} | ||
|  |  */ | ||
|  | SymSpell.prototype.clear = function() { | ||
|  | 
 | ||
|  |   // Properties
 | ||
|  |   this.size = 0; | ||
|  |   this.dictionary = Object.create(null); | ||
|  |   this.maxLength = 0; | ||
|  |   this.words = []; | ||
|  | }; | ||
|  | 
 | ||
|  | /** | ||
|  |  * Method used to add a word to the index. | ||
|  |  * | ||
|  |  * @param {string} word - Word to add. | ||
|  |  * @param {SymSpell} | ||
|  |  */ | ||
|  | SymSpell.prototype.add = function(word) { | ||
|  |   var item = this.dictionary[word]; | ||
|  | 
 | ||
|  |   if (item !== undefined) { | ||
|  |     if (typeof item === 'number') { | ||
|  |       item = createDictionaryItem(item); | ||
|  |       this.dictionary[word] = item; | ||
|  |     } | ||
|  | 
 | ||
|  |     item.count++; | ||
|  |   } | ||
|  | 
 | ||
|  |   else { | ||
|  |     item = createDictionaryItem(); | ||
|  |     item.count++; | ||
|  | 
 | ||
|  |     this.dictionary[word] = item; | ||
|  | 
 | ||
|  |     if (word.length > this.maxLength) | ||
|  |       this.maxLength = word.length; | ||
|  |   } | ||
|  | 
 | ||
|  |   if (item.count === 1) { | ||
|  |     var number = this.words.length; | ||
|  |     this.words.push(word); | ||
|  | 
 | ||
|  |     var deletes = edits(word, 0, this.maxDistance); | ||
|  | 
 | ||
|  |     deletes.forEach(deletedItem => { | ||
|  |       var target = this.dictionary[deletedItem]; | ||
|  | 
 | ||
|  |       if (target !== undefined) { | ||
|  |         if (typeof target === 'number') { | ||
|  |           target = createDictionaryItem(target); | ||
|  | 
 | ||
|  |           this.dictionary[deletedItem] = target; | ||
|  |         } | ||
|  | 
 | ||
|  |         if (!target.suggestions.has(number)) { | ||
|  |           addLowestDistance( | ||
|  |             this.words, | ||
|  |             this.verbosity, | ||
|  |             target, | ||
|  |             word, | ||
|  |             number, | ||
|  |             deletedItem | ||
|  |           ); | ||
|  |         } | ||
|  |       } | ||
|  |       else { | ||
|  |         this.dictionary[deletedItem] = number; | ||
|  |       } | ||
|  |     }); | ||
|  |   } | ||
|  | 
 | ||
|  |   this.size++; | ||
|  | 
 | ||
|  |   return this; | ||
|  | }; | ||
|  | 
 | ||
|  | /** | ||
|  |  * Method used to search the index. | ||
|  |  * | ||
|  |  * @param  {string} input - Input query. | ||
|  |  * @return {array}        - The found suggestions. | ||
|  |  */ | ||
|  | SymSpell.prototype.search = function(input) { | ||
|  |   return lookup( | ||
|  |     this.dictionary, | ||
|  |     this.words, | ||
|  |     this.verbosity, | ||
|  |     this.maxDistance, | ||
|  |     this.maxLength, | ||
|  |     input | ||
|  |   ); | ||
|  | }; | ||
|  | 
 | ||
|  | /** | ||
|  |  * Convenience known methods. | ||
|  |  */ | ||
|  | SymSpell.prototype.inspect = function() { | ||
|  |   var array = []; | ||
|  | 
 | ||
|  |   array.size = this.size; | ||
|  |   array.maxDistance = this.maxDistance; | ||
|  |   array.verbosity = this.verbosity; | ||
|  |   array.behavior = VERBOSITY_EXPLANATIONS[this.verbosity]; | ||
|  | 
 | ||
|  |   for (var k in this.dictionary) { | ||
|  |     if (typeof this.dictionary[k] === 'object' && this.dictionary[k].count) | ||
|  |       array.push([k, this.dictionary[k].count]); | ||
|  |   } | ||
|  | 
 | ||
|  |   // Trick so that node displays the name of the constructor
 | ||
|  |   Object.defineProperty(array, 'constructor', { | ||
|  |     value: SymSpell, | ||
|  |     enumerable: false | ||
|  |   }); | ||
|  | 
 | ||
|  |   return array; | ||
|  | }; | ||
|  | 
 | ||
|  | if (typeof Symbol !== 'undefined') | ||
|  |   SymSpell.prototype[Symbol.for('nodejs.util.inspect.custom')] = SymSpell.prototype.inspect; | ||
|  | 
 | ||
|  | /** | ||
|  |  * Static @.from function taking an arbitrary iterable & converting it into | ||
|  |  * a structure. | ||
|  |  * | ||
|  |  * @param  {Iterable} iterable - Target iterable. | ||
|  |  * @return {SymSpell} | ||
|  |  */ | ||
|  | SymSpell.from = function(iterable, options) { | ||
|  |   var index = new SymSpell(options); | ||
|  | 
 | ||
|  |   forEach(iterable, function(value) { | ||
|  |     index.add(value); | ||
|  |   }); | ||
|  | 
 | ||
|  |   return index; | ||
|  | }; | ||
|  | 
 | ||
|  | /** | ||
|  |  * Exporting. | ||
|  |  */ | ||
|  | module.exports = SymSpell; |