You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							250 lines
						
					
					
						
							5.3 KiB
						
					
					
				
			
		
		
	
	
							250 lines
						
					
					
						
							5.3 KiB
						
					
					
				/**
 | 
						|
 * Mnemonist Inverted Index
 | 
						|
 * =========================
 | 
						|
 *
 | 
						|
 * JavaScript implementation of an inverted index.
 | 
						|
 */
 | 
						|
var Iterator = require('obliterator/iterator'),
 | 
						|
    forEach = require('obliterator/foreach'),
 | 
						|
    helpers = require('./utils/merge.js');
 | 
						|
 | 
						|
function identity(x) {
 | 
						|
  return x;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * InvertedIndex.
 | 
						|
 *
 | 
						|
 * @constructor
 | 
						|
 * @param {function} tokenizer - Tokenizer function.
 | 
						|
 */
 | 
						|
function InvertedIndex(descriptor) {
 | 
						|
  this.clear();
 | 
						|
 | 
						|
  if (Array.isArray(descriptor)) {
 | 
						|
    this.documentTokenizer = descriptor[0];
 | 
						|
    this.queryTokenizer = descriptor[1];
 | 
						|
  }
 | 
						|
  else {
 | 
						|
    this.documentTokenizer = descriptor;
 | 
						|
    this.queryTokenizer = descriptor;
 | 
						|
  }
 | 
						|
 | 
						|
  if (!this.documentTokenizer)
 | 
						|
    this.documentTokenizer = identity;
 | 
						|
  if (!this.queryTokenizer)
 | 
						|
    this.queryTokenizer = identity;
 | 
						|
 | 
						|
  if (typeof this.documentTokenizer !== 'function')
 | 
						|
    throw new Error('mnemonist/InvertedIndex.constructor: document tokenizer is not a function.');
 | 
						|
 | 
						|
  if (typeof this.queryTokenizer !== 'function')
 | 
						|
    throw new Error('mnemonist/InvertedIndex.constructor: query tokenizer is not a function.');
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Method used to clear the InvertedIndex.
 | 
						|
 *
 | 
						|
 * @return {undefined}
 | 
						|
 */
 | 
						|
InvertedIndex.prototype.clear = function() {
 | 
						|
 | 
						|
  // Properties
 | 
						|
  this.items = [];
 | 
						|
  this.mapping = new Map();
 | 
						|
  this.size = 0;
 | 
						|
  this.dimension = 0;
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * Method used to add a document to the index.
 | 
						|
 *
 | 
						|
 * @param  {any} doc - Item to add.
 | 
						|
 * @return {InvertedIndex}
 | 
						|
 */
 | 
						|
InvertedIndex.prototype.add = function(doc) {
 | 
						|
 | 
						|
  // Increasing size
 | 
						|
  this.size++;
 | 
						|
 | 
						|
  // Storing document
 | 
						|
  var key = this.items.length;
 | 
						|
  this.items.push(doc);
 | 
						|
 | 
						|
  // Tokenizing the document
 | 
						|
  var tokens = this.documentTokenizer(doc);
 | 
						|
 | 
						|
  if (!Array.isArray(tokens))
 | 
						|
    throw new Error('mnemonist/InvertedIndex.add: tokenizer function should return an array of tokens.');
 | 
						|
 | 
						|
  // Indexing
 | 
						|
  var done = new Set(),
 | 
						|
      token,
 | 
						|
      container;
 | 
						|
 | 
						|
  for (var i = 0, l = tokens.length; i < l; i++) {
 | 
						|
    token = tokens[i];
 | 
						|
 | 
						|
    if (done.has(token))
 | 
						|
      continue;
 | 
						|
 | 
						|
    done.add(token);
 | 
						|
 | 
						|
    container = this.mapping.get(token);
 | 
						|
 | 
						|
    if (!container) {
 | 
						|
      container = [];
 | 
						|
      this.mapping.set(token, container);
 | 
						|
    }
 | 
						|
 | 
						|
    container.push(key);
 | 
						|
  }
 | 
						|
 | 
						|
  this.dimension = this.mapping.size;
 | 
						|
 | 
						|
  return this;
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * Method used to query the index in a AND fashion.
 | 
						|
 *
 | 
						|
 * @param  {any} query - Query
 | 
						|
 * @return {Set}       - Intersection of documents matching the query.
 | 
						|
 */
 | 
						|
InvertedIndex.prototype.get = function(query) {
 | 
						|
 | 
						|
  // Early termination
 | 
						|
  if (!this.size)
 | 
						|
    return [];
 | 
						|
 | 
						|
  // First we need to tokenize the query
 | 
						|
  var tokens = this.queryTokenizer(query);
 | 
						|
 | 
						|
  if (!Array.isArray(tokens))
 | 
						|
    throw new Error('mnemonist/InvertedIndex.query: tokenizer function should return an array of tokens.');
 | 
						|
 | 
						|
  if (!tokens.length)
 | 
						|
    return [];
 | 
						|
 | 
						|
  var results = this.mapping.get(tokens[0]),
 | 
						|
      c,
 | 
						|
      i,
 | 
						|
      l;
 | 
						|
 | 
						|
  if (typeof results === 'undefined' || results.length === 0)
 | 
						|
    return [];
 | 
						|
 | 
						|
  if (tokens.length > 1) {
 | 
						|
    for (i = 1, l = tokens.length; i < l; i++) {
 | 
						|
      c = this.mapping.get(tokens[i]);
 | 
						|
 | 
						|
      if (typeof c === 'undefined' || c.length === 0)
 | 
						|
        return [];
 | 
						|
 | 
						|
      results = helpers.intersectionUniqueArrays(results, c);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  var docs = new Array(results.length);
 | 
						|
 | 
						|
  for (i = 0, l = docs.length; i < l; i++)
 | 
						|
    docs[i] = this.items[results[i]];
 | 
						|
 | 
						|
  return docs;
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * Method used to iterate over each of the documents.
 | 
						|
 *
 | 
						|
 * @param  {function}  callback - Function to call for each item.
 | 
						|
 * @param  {object}    scope    - Optional scope.
 | 
						|
 * @return {undefined}
 | 
						|
 */
 | 
						|
InvertedIndex.prototype.forEach = function(callback, scope) {
 | 
						|
  scope = arguments.length > 1 ? scope : this;
 | 
						|
 | 
						|
  for (var i = 0, l = this.documents.length; i < l; i++)
 | 
						|
    callback.call(scope, this.documents[i], i, this);
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * Method returning an iterator over the index's documents.
 | 
						|
 *
 | 
						|
 * @return {Iterator}
 | 
						|
 */
 | 
						|
InvertedIndex.prototype.documents = function() {
 | 
						|
  var documents = this.items,
 | 
						|
      l = documents.length,
 | 
						|
      i = 0;
 | 
						|
 | 
						|
  return new Iterator(function() {
 | 
						|
    if (i >= l)
 | 
						|
      return {
 | 
						|
        done: true
 | 
						|
      };
 | 
						|
 | 
						|
      var value = documents[i++];
 | 
						|
 | 
						|
      return {
 | 
						|
        value: value,
 | 
						|
        done: false
 | 
						|
      };
 | 
						|
  });
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * Method returning an iterator over the index's tokens.
 | 
						|
 *
 | 
						|
 * @return {Iterator}
 | 
						|
 */
 | 
						|
InvertedIndex.prototype.tokens = function() {
 | 
						|
  return this.mapping.keys();
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * Attaching the #.values method to Symbol.iterator if possible.
 | 
						|
 */
 | 
						|
if (typeof Symbol !== 'undefined')
 | 
						|
  InvertedIndex.prototype[Symbol.iterator] = InvertedIndex.prototype.documents;
 | 
						|
 | 
						|
/**
 | 
						|
 * Convenience known methods.
 | 
						|
 */
 | 
						|
InvertedIndex.prototype.inspect = function() {
 | 
						|
  var array = this.items.slice();
 | 
						|
 | 
						|
  // Trick so that node displays the name of the constructor
 | 
						|
  Object.defineProperty(array, 'constructor', {
 | 
						|
    value: InvertedIndex,
 | 
						|
    enumerable: false
 | 
						|
  });
 | 
						|
 | 
						|
  return array;
 | 
						|
};
 | 
						|
 | 
						|
if (typeof Symbol !== 'undefined')
 | 
						|
  InvertedIndex.prototype[Symbol.for('nodejs.util.inspect.custom')] = InvertedIndex.prototype.inspect;
 | 
						|
 | 
						|
/**
 | 
						|
 * Static @.from function taking an arbitrary iterable & converting it into
 | 
						|
 * a InvertedIndex.
 | 
						|
 *
 | 
						|
 * @param  {Iterable} iterable - Target iterable.
 | 
						|
 * @param  {function} tokenizer - Tokenizer function.
 | 
						|
 * @return {InvertedIndex}
 | 
						|
 */
 | 
						|
InvertedIndex.from = function(iterable, descriptor) {
 | 
						|
  var index = new InvertedIndex(descriptor);
 | 
						|
 | 
						|
  forEach(iterable, function(doc) {
 | 
						|
    index.add(doc);
 | 
						|
  });
 | 
						|
 | 
						|
  return index;
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * Exporting.
 | 
						|
 */
 | 
						|
module.exports = InvertedIndex;
 |