You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							548 lines
						
					
					
						
							12 KiB
						
					
					
				
			
		
		
	
	
							548 lines
						
					
					
						
							12 KiB
						
					
					
				| /* eslint no-loop-func: 0 */
 | |
| /**
 | |
|  * Mnemonist SymSpell
 | |
|  * ===================
 | |
|  *
 | |
|  * JavaScript implementation of the Symmetric Delete Spelling dictionary to
 | |
|  * efficiently index & query expression based on edit distance.
 | |
|  * Note that the current implementation target the v3.0 of the algorithm.
 | |
|  *
 | |
|  * [Reference]:
 | |
|  * http://blog.faroo.com/2012/06/07/improved-edit-distance-based-spelling-correction/
 | |
|  * https://github.com/wolfgarbe/symspell
 | |
|  *
 | |
|  * [Author]:
 | |
|  * Wolf Garbe
 | |
|  */
 | |
| var forEach = require('obliterator/foreach');
 | |
| 
 | |
| /**
 | |
|  * Constants.
 | |
|  */
 | |
| var DEFAULT_MAX_DISTANCE = 2,
 | |
|     DEFAULT_VERBOSITY = 2;
 | |
| 
 | |
| var VERBOSITY = new Set([
 | |
|   // Returns only the top suggestion
 | |
|   0,
 | |
|   // Returns suggestions with the smallest edit distance
 | |
|   1,
 | |
|   // Returns every suggestion (no early termination)
 | |
|   2
 | |
| ]);
 | |
| 
 | |
| var VERBOSITY_EXPLANATIONS = {
 | |
|   0: 'Returns only the top suggestion',
 | |
|   1: 'Returns suggestions with the smallest edit distance',
 | |
|   2: 'Returns every suggestion (no early termination)'
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * Functions.
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * Function creating a dictionary item.
 | |
|  *
 | |
|  * @param  {number} [value] - An optional suggestion.
 | |
|  * @return {object}         - The created item.
 | |
|  */
 | |
| function createDictionaryItem(value) {
 | |
|   var suggestions = new Set();
 | |
| 
 | |
|   if (typeof value === 'number')
 | |
|     suggestions.add(value);
 | |
| 
 | |
|   return {
 | |
|     suggestions,
 | |
|     count: 0
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Function creating a suggestion item.
 | |
|  *
 | |
|  * @return {object} - The created item.
 | |
|  */
 | |
| function createSuggestionItem(term, distance, count) {
 | |
|   return {
 | |
|     term: term || '',
 | |
|     distance: distance || 0,
 | |
|     count: count || 0
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Simplified edit function.
 | |
|  *
 | |
|  * @param {string} word      - Target word.
 | |
|  * @param {number} distance  - Distance.
 | |
|  * @param {number} max       - Max distance.
 | |
|  * @param {Set}    [deletes] - Set mutated to store deletes.
 | |
|  */
 | |
| function edits(word, distance, max, deletes) {
 | |
|   deletes = deletes || new Set();
 | |
|   distance++;
 | |
| 
 | |
|   var deletedItem,
 | |
|       l = word.length,
 | |
|       i;
 | |
| 
 | |
|   if (l > 1) {
 | |
|     for (i = 0; i < l; i++) {
 | |
|       deletedItem = word.substring(0, i) + word.substring(i + 1);
 | |
| 
 | |
|       if (!deletes.has(deletedItem)) {
 | |
|         deletes.add(deletedItem);
 | |
| 
 | |
|         if (distance < max)
 | |
|           edits(deletedItem, distance, max, deletes);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return deletes;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Function used to conditionally add suggestions.
 | |
|  *
 | |
|  * @param {array}  words       - Words list.
 | |
|  * @param {number} verbosity   - Verbosity level.
 | |
|  * @param {object} item        - The target item.
 | |
|  * @param {string} suggestion  - The target suggestion.
 | |
|  * @param {number} int         - Integer key of the word.
 | |
|  * @param {object} deletedItem - Considered deleted item.
 | |
|  * @param {SymSpell}
 | |
|  */
 | |
| function addLowestDistance(words, verbosity, item, suggestion, int, deletedItem) {
 | |
|   var first = item.suggestions.values().next().value;
 | |
| 
 | |
|   if (verbosity < 2 &&
 | |
|       item.suggestions.size > 0 &&
 | |
|       words[first].length - deletedItem.length > suggestion.length - deletedItem.length) {
 | |
|     item.suggestions = new Set();
 | |
|     item.count = 0;
 | |
|   }
 | |
| 
 | |
|   if (verbosity === 2 ||
 | |
|       !item.suggestions.size ||
 | |
|       words[first].length - deletedItem.length >= suggestion.length - deletedItem.length) {
 | |
|     item.suggestions.add(int);
 | |
|   }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Custom Damerau-Levenshtein used by the algorithm.
 | |
|  *
 | |
|  * @param  {string} source - First string.
 | |
|  * @param  {string} target - Second string.
 | |
|  * @return {number}        - The distance.
 | |
|  */
 | |
| function damerauLevenshtein(source, target) {
 | |
|   var m = source.length,
 | |
|       n = target.length,
 | |
|       H = [[]],
 | |
|       INF = m + n,
 | |
|       sd = new Map(),
 | |
|       i,
 | |
|       l,
 | |
|       j;
 | |
| 
 | |
|   H[0][0] = INF;
 | |
| 
 | |
|   for (i = 0; i <= m; i++) {
 | |
|     if (!H[i + 1])
 | |
|       H[i + 1] = [];
 | |
|     H[i + 1][1] = i;
 | |
|     H[i + 1][0] = INF;
 | |
|   }
 | |
| 
 | |
|   for (j = 0; j <= n; j++) {
 | |
|     H[1][j + 1] = j;
 | |
|     H[0][j + 1] = INF;
 | |
|   }
 | |
| 
 | |
|   var st = source + target,
 | |
|       letter;
 | |
| 
 | |
|   for (i = 0, l = st.length; i < l; i++) {
 | |
|     letter = st[i];
 | |
| 
 | |
|     if (!sd.has(letter))
 | |
|       sd.set(letter, 0);
 | |
|   }
 | |
| 
 | |
|   // Iterating
 | |
|   for (i = 1; i <= m; i++) {
 | |
|     var DB = 0;
 | |
| 
 | |
|     for (j = 1; j <= n; j++) {
 | |
|       var i1 = sd.get(target[j - 1]),
 | |
|           j1 = DB;
 | |
| 
 | |
|       if (source[i - 1] === target[j - 1]) {
 | |
|         H[i + 1][j + 1] = H[i][j];
 | |
|         DB = j;
 | |
|       }
 | |
|       else {
 | |
|         H[i + 1][j + 1] = Math.min(
 | |
|           H[i][j],
 | |
|           H[i + 1][j],
 | |
|           H[i][j + 1]
 | |
|         ) + 1;
 | |
|       }
 | |
| 
 | |
|       H[i + 1][j + 1] = Math.min(
 | |
|         H[i + 1][j + 1],
 | |
|         H[i1][j1] + (i - i1 - 1) + 1 + (j - j1 - 1)
 | |
|       );
 | |
|     }
 | |
| 
 | |
|     sd.set(source[i - 1], i);
 | |
|   }
 | |
| 
 | |
|   return H[m + 1][n + 1];
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Lookup function.
 | |
|  *
 | |
|  * @param  {object} dictionary  - A SymSpell dictionary.
 | |
|  * @param  {array}  words       - Unique words list.
 | |
|  * @param  {number} verbosity   - Verbosity level.
 | |
|  * @param  {number} maxDistance - Maximum distance.
 | |
|  * @param  {number} maxLength   - Maximum word length in the dictionary.
 | |
|  * @param  {string} input       - Input string.
 | |
|  * @return {array}              - The list of suggestions.
 | |
|  */
 | |
| function lookup(dictionary, words, verbosity, maxDistance, maxLength, input) {
 | |
|   var length = input.length;
 | |
| 
 | |
|   if (length - maxDistance > maxLength)
 | |
|     return [];
 | |
| 
 | |
|   var candidates = [input],
 | |
|       candidateSet = new Set(),
 | |
|       suggestionSet = new Set();
 | |
| 
 | |
|   var suggestions = [],
 | |
|       candidate,
 | |
|       item;
 | |
| 
 | |
|   // Exhausting every candidates
 | |
|   while (candidates.length > 0) {
 | |
|     candidate = candidates.shift();
 | |
| 
 | |
|     // Early termination
 | |
|     if (
 | |
|       verbosity < 2 &&
 | |
|       suggestions.length > 0 &&
 | |
|       length - candidate.length > suggestions[0].distance
 | |
|     )
 | |
|       break;
 | |
| 
 | |
|     item = dictionary[candidate];
 | |
| 
 | |
|     if (item !== undefined) {
 | |
|       if (typeof item === 'number')
 | |
|         item = createDictionaryItem(item);
 | |
| 
 | |
|       if (item.count > 0 && !suggestionSet.has(candidate)) {
 | |
|         suggestionSet.add(candidate);
 | |
| 
 | |
|         var suggestItem = createSuggestionItem(
 | |
|           candidate,
 | |
|           length - candidate.length,
 | |
|           item.count
 | |
|         );
 | |
| 
 | |
|         suggestions.push(suggestItem);
 | |
| 
 | |
|         // Another early termination
 | |
|         if (verbosity < 2 && length - candidate.length === 0)
 | |
|           break;
 | |
|       }
 | |
| 
 | |
|       // Iterating over the item's suggestions
 | |
|       item.suggestions.forEach(index => {
 | |
|         var suggestion = words[index];
 | |
| 
 | |
|         // Do we already have this suggestion?
 | |
|         if (suggestionSet.has(suggestion))
 | |
|           return;
 | |
| 
 | |
|         suggestionSet.add(suggestion);
 | |
| 
 | |
|         // Computing distance between candidate & suggestion
 | |
|         var distance = 0;
 | |
| 
 | |
|         if (input !== suggestion) {
 | |
|           if (suggestion.length === candidate.length) {
 | |
|             distance = length - candidate.length;
 | |
|           }
 | |
|           else if (length === candidate.length) {
 | |
|             distance = suggestion.length - candidate.length;
 | |
|           }
 | |
|           else {
 | |
|             var ii = 0,
 | |
|                 jj = 0;
 | |
| 
 | |
|             var l = suggestion.length;
 | |
| 
 | |
|             while (
 | |
|               ii < l &&
 | |
|               ii < length &&
 | |
|               suggestion[ii] === input[ii]
 | |
|             ) {
 | |
|               ii++;
 | |
|             }
 | |
| 
 | |
|             while (
 | |
|               jj < l - ii &&
 | |
|               jj < length &&
 | |
|               suggestion[l - jj - 1] === input[length - jj - 1]
 | |
|             ) {
 | |
|               jj++;
 | |
|             }
 | |
| 
 | |
|             if (ii > 0 || jj > 0) {
 | |
|               distance = damerauLevenshtein(
 | |
|                 suggestion.substr(ii, l - ii - jj),
 | |
|                 input.substr(ii, length - ii - jj)
 | |
|               );
 | |
|             }
 | |
|             else {
 | |
|               distance = damerauLevenshtein(suggestion, input);
 | |
|             }
 | |
|           }
 | |
|         }
 | |
| 
 | |
|         // Removing suggestions of higher distance
 | |
|         if (verbosity < 2 &&
 | |
|             suggestions.length > 0 &&
 | |
|             suggestions[0].distance > distance) {
 | |
|           suggestions = [];
 | |
|         }
 | |
| 
 | |
|         if (verbosity < 2 &&
 | |
|             suggestions.length > 0 &&
 | |
|             distance > suggestions[0].distance) {
 | |
|           return;
 | |
|         }
 | |
| 
 | |
|         if (distance <= maxDistance) {
 | |
|           var target = dictionary[suggestion];
 | |
| 
 | |
|           if (target !== undefined) {
 | |
|             suggestions.push(createSuggestionItem(
 | |
|               suggestion,
 | |
|               distance,
 | |
|               target.count
 | |
|             ));
 | |
|           }
 | |
|         }
 | |
|       });
 | |
|     }
 | |
| 
 | |
|     // Adding edits
 | |
|     if (length - candidate.length < maxDistance) {
 | |
| 
 | |
|       if (verbosity < 2 &&
 | |
|           suggestions.length > 0 &&
 | |
|           length - candidate.length >= suggestions[0].distance)
 | |
|         continue;
 | |
| 
 | |
|       for (var i = 0, l = candidate.length; i < l; i++) {
 | |
|         var deletedItem = (
 | |
|           candidate.substring(0, i) +
 | |
|           candidate.substring(i + 1)
 | |
|         );
 | |
| 
 | |
|         if (!candidateSet.has(deletedItem)) {
 | |
|           candidateSet.add(deletedItem);
 | |
|           candidates.push(deletedItem);
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (verbosity === 0)
 | |
|     return suggestions.slice(0, 1);
 | |
| 
 | |
|   return suggestions;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * SymSpell.
 | |
|  *
 | |
|  * @constructor
 | |
|  */
 | |
| function SymSpell(options) {
 | |
|   options = options || {};
 | |
| 
 | |
|   this.clear();
 | |
| 
 | |
|   // Properties
 | |
|   this.maxDistance = typeof options.maxDistance === 'number' ?
 | |
|     options.maxDistance :
 | |
|     DEFAULT_MAX_DISTANCE;
 | |
|   this.verbosity = typeof options.verbosity === 'number' ?
 | |
|     options.verbosity :
 | |
|     DEFAULT_VERBOSITY;
 | |
| 
 | |
|   // Sanity checks
 | |
|   if (typeof this.maxDistance !== 'number' || this.maxDistance <= 0)
 | |
|     throw Error('mnemonist/SymSpell.constructor: invalid `maxDistance` option. Should be a integer greater than 0.');
 | |
| 
 | |
|   if (!VERBOSITY.has(this.verbosity))
 | |
|     throw Error('mnemonist/SymSpell.constructor: invalid `verbosity` option. Should be either 0, 1 or 2.');
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Method used to clear the structure.
 | |
|  *
 | |
|  * @return {undefined}
 | |
|  */
 | |
| SymSpell.prototype.clear = function() {
 | |
| 
 | |
|   // Properties
 | |
|   this.size = 0;
 | |
|   this.dictionary = Object.create(null);
 | |
|   this.maxLength = 0;
 | |
|   this.words = [];
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * Method used to add a word to the index.
 | |
|  *
 | |
|  * @param {string} word - Word to add.
 | |
|  * @param {SymSpell}
 | |
|  */
 | |
| SymSpell.prototype.add = function(word) {
 | |
|   var item = this.dictionary[word];
 | |
| 
 | |
|   if (item !== undefined) {
 | |
|     if (typeof item === 'number') {
 | |
|       item = createDictionaryItem(item);
 | |
|       this.dictionary[word] = item;
 | |
|     }
 | |
| 
 | |
|     item.count++;
 | |
|   }
 | |
| 
 | |
|   else {
 | |
|     item = createDictionaryItem();
 | |
|     item.count++;
 | |
| 
 | |
|     this.dictionary[word] = item;
 | |
| 
 | |
|     if (word.length > this.maxLength)
 | |
|       this.maxLength = word.length;
 | |
|   }
 | |
| 
 | |
|   if (item.count === 1) {
 | |
|     var number = this.words.length;
 | |
|     this.words.push(word);
 | |
| 
 | |
|     var deletes = edits(word, 0, this.maxDistance);
 | |
| 
 | |
|     deletes.forEach(deletedItem => {
 | |
|       var target = this.dictionary[deletedItem];
 | |
| 
 | |
|       if (target !== undefined) {
 | |
|         if (typeof target === 'number') {
 | |
|           target = createDictionaryItem(target);
 | |
| 
 | |
|           this.dictionary[deletedItem] = target;
 | |
|         }
 | |
| 
 | |
|         if (!target.suggestions.has(number)) {
 | |
|           addLowestDistance(
 | |
|             this.words,
 | |
|             this.verbosity,
 | |
|             target,
 | |
|             word,
 | |
|             number,
 | |
|             deletedItem
 | |
|           );
 | |
|         }
 | |
|       }
 | |
|       else {
 | |
|         this.dictionary[deletedItem] = number;
 | |
|       }
 | |
|     });
 | |
|   }
 | |
| 
 | |
|   this.size++;
 | |
| 
 | |
|   return this;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * Method used to search the index.
 | |
|  *
 | |
|  * @param  {string} input - Input query.
 | |
|  * @return {array}        - The found suggestions.
 | |
|  */
 | |
| SymSpell.prototype.search = function(input) {
 | |
|   return lookup(
 | |
|     this.dictionary,
 | |
|     this.words,
 | |
|     this.verbosity,
 | |
|     this.maxDistance,
 | |
|     this.maxLength,
 | |
|     input
 | |
|   );
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * Convenience known methods.
 | |
|  */
 | |
| SymSpell.prototype.inspect = function() {
 | |
|   var array = [];
 | |
| 
 | |
|   array.size = this.size;
 | |
|   array.maxDistance = this.maxDistance;
 | |
|   array.verbosity = this.verbosity;
 | |
|   array.behavior = VERBOSITY_EXPLANATIONS[this.verbosity];
 | |
| 
 | |
|   for (var k in this.dictionary) {
 | |
|     if (typeof this.dictionary[k] === 'object' && this.dictionary[k].count)
 | |
|       array.push([k, this.dictionary[k].count]);
 | |
|   }
 | |
| 
 | |
|   // Trick so that node displays the name of the constructor
 | |
|   Object.defineProperty(array, 'constructor', {
 | |
|     value: SymSpell,
 | |
|     enumerable: false
 | |
|   });
 | |
| 
 | |
|   return array;
 | |
| };
 | |
| 
 | |
| if (typeof Symbol !== 'undefined')
 | |
|   SymSpell.prototype[Symbol.for('nodejs.util.inspect.custom')] = SymSpell.prototype.inspect;
 | |
| 
 | |
| /**
 | |
|  * Static @.from function taking an arbitrary iterable & converting it into
 | |
|  * a structure.
 | |
|  *
 | |
|  * @param  {Iterable} iterable - Target iterable.
 | |
|  * @return {SymSpell}
 | |
|  */
 | |
| SymSpell.from = function(iterable, options) {
 | |
|   var index = new SymSpell(options);
 | |
| 
 | |
|   forEach(iterable, function(value) {
 | |
|     index.add(value);
 | |
|   });
 | |
| 
 | |
|   return index;
 | |
| };
 | |
| 
 | |
| /**
 | |
|  * Exporting.
 | |
|  */
 | |
| module.exports = SymSpell;
 |