You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					368 lines
				
				8.9 KiB
			
		
		
			
		
	
	
					368 lines
				
				8.9 KiB
			| 
											3 years ago
										 | /** | ||
|  |  * Mnemonist Vantage Point Tree | ||
|  |  * ============================= | ||
|  |  * | ||
|  |  * JavaScript implementation of the Vantage Point Tree storing the binary | ||
|  |  * tree as a flat byte array. | ||
|  |  * | ||
|  |  * Note that a VPTree has worst cases and is likely not to be perfectly | ||
|  |  * balanced because of median ambiguity. It is therefore not suitable | ||
|  |  * for hairballs and tiny datasets. | ||
|  |  * | ||
|  |  * [Reference]: | ||
|  |  * https://en.wikipedia.org/wiki/Vantage-point_tree
 | ||
|  |  */ | ||
|  | var iterables = require('./utils/iterables.js'), | ||
|  |     typed = require('./utils/typed-arrays.js'), | ||
|  |     inplaceQuickSortIndices = require('./sort/quick.js').inplaceQuickSortIndices, | ||
|  |     lowerBoundIndices = require('./utils/binary-search.js').lowerBoundIndices, | ||
|  |     Heap = require('./heap.js'); | ||
|  | 
 | ||
|  | var getPointerArray = typed.getPointerArray; | ||
|  | 
 | ||
|  | // TODO: implement vantage point selection techniques (by swapping with last)
 | ||
|  | // TODO: is this required to implement early termination for k <= size?
 | ||
|  | 
 | ||
|  | /** | ||
|  |  * Heap comparator used by the #.nearestNeighbors method. | ||
|  |  */ | ||
|  | function comparator(a, b) { | ||
|  |   if (a.distance < b.distance) | ||
|  |     return 1; | ||
|  | 
 | ||
|  |   if (a.distance > b.distance) | ||
|  |     return -1; | ||
|  | 
 | ||
|  |   return 0; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * Function used to create the binary tree. | ||
|  |  * | ||
|  |  * @param  {function}     distance - Distance function to use. | ||
|  |  * @param  {array}        items    - Items to index (will be mutated). | ||
|  |  * @param  {array}        indices  - Indexes of the items. | ||
|  |  * @return {Float64Array}          - The flat binary tree. | ||
|  |  */ | ||
|  | function createBinaryTree(distance, items, indices) { | ||
|  |   var N = indices.length; | ||
|  | 
 | ||
|  |   var PointerArray = getPointerArray(N); | ||
|  | 
 | ||
|  |   var C = 0, | ||
|  |       nodes = new PointerArray(N), | ||
|  |       lefts = new PointerArray(N), | ||
|  |       rights = new PointerArray(N), | ||
|  |       mus = new Float64Array(N), | ||
|  |       stack = [0, 0, N], | ||
|  |       distances = new Float64Array(N), | ||
|  |       nodeIndex, | ||
|  |       vantagePoint, | ||
|  |       medianIndex, | ||
|  |       lo, | ||
|  |       hi, | ||
|  |       mid, | ||
|  |       mu, | ||
|  |       i, | ||
|  |       l; | ||
|  | 
 | ||
|  |   while (stack.length) { | ||
|  |     hi = stack.pop(); | ||
|  |     lo = stack.pop(); | ||
|  |     nodeIndex = stack.pop(); | ||
|  | 
 | ||
|  |     // Getting our vantage point
 | ||
|  |     vantagePoint = indices[hi - 1]; | ||
|  |     hi--; | ||
|  | 
 | ||
|  |     l = hi - lo; | ||
|  | 
 | ||
|  |     // Storing vantage point
 | ||
|  |     nodes[nodeIndex] = vantagePoint; | ||
|  | 
 | ||
|  |     // We are in a leaf
 | ||
|  |     if (l === 0) | ||
|  |       continue; | ||
|  | 
 | ||
|  |     // We only have two elements, the second one has to go right
 | ||
|  |     if (l === 1) { | ||
|  | 
 | ||
|  |       // We put remaining item to the right
 | ||
|  |       mu = distance(items[vantagePoint], items[indices[lo]]); | ||
|  | 
 | ||
|  |       mus[nodeIndex] = mu; | ||
|  | 
 | ||
|  |       // Right
 | ||
|  |       C++; | ||
|  |       rights[nodeIndex] = C; | ||
|  |       nodes[C] = indices[lo]; | ||
|  | 
 | ||
|  |       continue; | ||
|  |     } | ||
|  | 
 | ||
|  |     // Computing distance from vantage point to other points
 | ||
|  |     for (i = lo; i < hi; i++) | ||
|  |       distances[indices[i]] = distance(items[vantagePoint], items[indices[i]]); | ||
|  | 
 | ||
|  |     inplaceQuickSortIndices(distances, indices, lo, hi); | ||
|  | 
 | ||
|  |     // Finding median of distances
 | ||
|  |     medianIndex = lo + (l / 2) - 1; | ||
|  | 
 | ||
|  |     // Need to interpolate?
 | ||
|  |     if (medianIndex === (medianIndex | 0)) { | ||
|  |       mu = ( | ||
|  |         distances[indices[medianIndex]] + | ||
|  |         distances[indices[medianIndex + 1]] | ||
|  |       ) / 2; | ||
|  |     } | ||
|  |     else { | ||
|  |       mu = distances[indices[Math.ceil(medianIndex)]]; | ||
|  |     } | ||
|  | 
 | ||
|  |     // Storing mu
 | ||
|  |     mus[nodeIndex] = mu; | ||
|  | 
 | ||
|  |     mid = lowerBoundIndices(distances, indices, mu, lo, hi); | ||
|  | 
 | ||
|  |     // console.log('Vantage point', items[vantagePoint], vantagePoint);
 | ||
|  |     // console.log('mu =', mu);
 | ||
|  |     // console.log('lo =', lo);
 | ||
|  |     // console.log('hi =', hi);
 | ||
|  |     // console.log('mid =', mid);
 | ||
|  | 
 | ||
|  |     // console.log('need to split', Array.from(indices).slice(lo, hi).map(i => {
 | ||
|  |     //   return [distances[i], distance(items[vantagePoint], items[i]), items[i]];
 | ||
|  |     // }));
 | ||
|  | 
 | ||
|  |     // Right
 | ||
|  |     if (hi - mid > 0) { | ||
|  |       C++; | ||
|  |       rights[nodeIndex] = C; | ||
|  |       stack.push(C, mid, hi); | ||
|  |       // console.log('Went right with ', Array.from(indices).slice(mid, hi).map(i => {
 | ||
|  |       //   return [distances[i], distance(items[vantagePoint], items[i]), items[i]];
 | ||
|  |       // }));
 | ||
|  |     } | ||
|  | 
 | ||
|  |     // Left
 | ||
|  |     if (mid - lo > 0) { | ||
|  |       C++; | ||
|  |       lefts[nodeIndex] = C; | ||
|  |       stack.push(C, lo, mid); | ||
|  |       // console.log('Went left with', Array.from(indices).slice(lo, mid).map(i => {
 | ||
|  |       //   return [distances[i], distance(items[vantagePoint], items[i]), items[i]];
 | ||
|  |       // }));
 | ||
|  |     } | ||
|  | 
 | ||
|  |     // console.log();
 | ||
|  |   } | ||
|  | 
 | ||
|  |   return { | ||
|  |     nodes: nodes, | ||
|  |     lefts: lefts, | ||
|  |     rights: rights, | ||
|  |     mus: mus | ||
|  |   }; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * VPTree. | ||
|  |  * | ||
|  |  * @constructor | ||
|  |  * @param {function} distance - Distance function to use. | ||
|  |  * @param {Iterable} items    - Items to store. | ||
|  |  */ | ||
|  | function VPTree(distance, items) { | ||
|  |   if (typeof distance !== 'function') | ||
|  |     throw new Error('mnemonist/VPTree.constructor: given `distance` must be a function.'); | ||
|  | 
 | ||
|  |   if (!items) | ||
|  |     throw new Error('mnemonist/VPTree.constructor: you must provide items to the tree. A VPTree cannot be updated after its creation.'); | ||
|  | 
 | ||
|  |   // Properties
 | ||
|  |   this.distance = distance; | ||
|  |   this.heap = new Heap(comparator); | ||
|  |   this.D = 0; | ||
|  | 
 | ||
|  |   var arrays = iterables.toArrayWithIndices(items); | ||
|  |   this.items = arrays[0]; | ||
|  |   var indices = arrays[1]; | ||
|  | 
 | ||
|  |   // Creating the binary tree
 | ||
|  |   this.size = indices.length; | ||
|  | 
 | ||
|  |   var result = createBinaryTree(distance, this.items, indices); | ||
|  | 
 | ||
|  |   this.nodes = result.nodes; | ||
|  |   this.lefts = result.lefts; | ||
|  |   this.rights = result.rights; | ||
|  |   this.mus = result.mus; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * Function used to retrieve the k nearest neighbors of the query. | ||
|  |  * | ||
|  |  * @param  {number} k     - Number of neighbors to retrieve. | ||
|  |  * @param  {any}    query - The query. | ||
|  |  * @return {array} | ||
|  |  */ | ||
|  | VPTree.prototype.nearestNeighbors = function(k, query) { | ||
|  |   var neighbors = this.heap, | ||
|  |       stack = [0], | ||
|  |       tau = Infinity, | ||
|  |       nodeIndex, | ||
|  |       itemIndex, | ||
|  |       vantagePoint, | ||
|  |       leftIndex, | ||
|  |       rightIndex, | ||
|  |       mu, | ||
|  |       d; | ||
|  | 
 | ||
|  |   this.D = 0; | ||
|  | 
 | ||
|  |   while (stack.length) { | ||
|  |     nodeIndex = stack.pop(); | ||
|  |     itemIndex = this.nodes[nodeIndex]; | ||
|  |     vantagePoint = this.items[itemIndex]; | ||
|  | 
 | ||
|  |     // Distance between query & the current vantage point
 | ||
|  |     d = this.distance(vantagePoint, query); | ||
|  |     this.D++; | ||
|  | 
 | ||
|  |     if (d < tau) { | ||
|  |       neighbors.push({distance: d, item: vantagePoint}); | ||
|  | 
 | ||
|  |       // Trimming
 | ||
|  |       if (neighbors.size > k) | ||
|  |         neighbors.pop(); | ||
|  | 
 | ||
|  |       // Adjusting tau (only if we already have k items, else it stays Infinity)
 | ||
|  |       if (neighbors.size >= k) | ||
|  |        tau = neighbors.peek().distance; | ||
|  |     } | ||
|  | 
 | ||
|  |     leftIndex = this.lefts[nodeIndex]; | ||
|  |     rightIndex = this.rights[nodeIndex]; | ||
|  | 
 | ||
|  |     // We are a leaf
 | ||
|  |     if (!leftIndex && !rightIndex) | ||
|  |       continue; | ||
|  | 
 | ||
|  |     mu = this.mus[nodeIndex]; | ||
|  | 
 | ||
|  |     if (d < mu) { | ||
|  |       if (leftIndex && d < mu + tau) | ||
|  |         stack.push(leftIndex); | ||
|  |       if (rightIndex && d >= mu - tau) // Might not be necessary to test d
 | ||
|  |         stack.push(rightIndex); | ||
|  |     } | ||
|  |     else { | ||
|  |       if (rightIndex && d >= mu - tau) | ||
|  |         stack.push(rightIndex); | ||
|  |       if (leftIndex && d < mu + tau) // Might not be necessary to test d
 | ||
|  |         stack.push(leftIndex); | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   var array = new Array(neighbors.size); | ||
|  | 
 | ||
|  |   for (var i = neighbors.size - 1; i >= 0; i--) | ||
|  |     array[i] = neighbors.pop(); | ||
|  | 
 | ||
|  |   return array; | ||
|  | }; | ||
|  | 
 | ||
|  | /** | ||
|  |  * Function used to retrieve every neighbors of query in the given radius. | ||
|  |  * | ||
|  |  * @param  {number} radius - Radius. | ||
|  |  * @param  {any}    query  - The query. | ||
|  |  * @return {array} | ||
|  |  */ | ||
|  | VPTree.prototype.neighbors = function(radius, query) { | ||
|  |   var neighbors = [], | ||
|  |       stack = [0], | ||
|  |       nodeIndex, | ||
|  |       itemIndex, | ||
|  |       vantagePoint, | ||
|  |       leftIndex, | ||
|  |       rightIndex, | ||
|  |       mu, | ||
|  |       d; | ||
|  | 
 | ||
|  |   this.D = 0; | ||
|  | 
 | ||
|  |   while (stack.length) { | ||
|  |     nodeIndex = stack.pop(); | ||
|  |     itemIndex = this.nodes[nodeIndex]; | ||
|  |     vantagePoint = this.items[itemIndex]; | ||
|  | 
 | ||
|  |     // Distance between query & the current vantage point
 | ||
|  |     d = this.distance(vantagePoint, query); | ||
|  |     this.D++; | ||
|  | 
 | ||
|  |     if (d <= radius) | ||
|  |       neighbors.push({distance: d, item: vantagePoint}); | ||
|  | 
 | ||
|  |     leftIndex = this.lefts[nodeIndex]; | ||
|  |     rightIndex = this.rights[nodeIndex]; | ||
|  | 
 | ||
|  |     // We are a leaf
 | ||
|  |     if (!leftIndex && !rightIndex) | ||
|  |       continue; | ||
|  | 
 | ||
|  |     mu = this.mus[nodeIndex]; | ||
|  | 
 | ||
|  |     if (d < mu) { | ||
|  |       if (leftIndex && d < mu + radius) | ||
|  |         stack.push(leftIndex); | ||
|  |       if (rightIndex && d >= mu - radius) // Might not be necessary to test d
 | ||
|  |         stack.push(rightIndex); | ||
|  |     } | ||
|  |     else { | ||
|  |       if (rightIndex && d >= mu - radius) | ||
|  |         stack.push(rightIndex); | ||
|  |       if (leftIndex && d < mu + radius) // Might not be necessary to test d
 | ||
|  |         stack.push(leftIndex); | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   return neighbors; | ||
|  | }; | ||
|  | 
 | ||
|  | /** | ||
|  |  * Convenience known methods. | ||
|  |  */ | ||
|  | VPTree.prototype.inspect = function() { | ||
|  |   var array = this.items.slice(); | ||
|  | 
 | ||
|  |   // Trick so that node displays the name of the constructor
 | ||
|  |   Object.defineProperty(array, 'constructor', { | ||
|  |     value: VPTree, | ||
|  |     enumerable: false | ||
|  |   }); | ||
|  | 
 | ||
|  |   return array; | ||
|  | }; | ||
|  | 
 | ||
|  | if (typeof Symbol !== 'undefined') | ||
|  |   VPTree.prototype[Symbol.for('nodejs.util.inspect.custom')] = VPTree.prototype.inspect; | ||
|  | 
 | ||
|  | /** | ||
|  |  * Static @.from function taking an arbitrary iterable & converting it into | ||
|  |  * a tree. | ||
|  |  * | ||
|  |  * @param  {Iterable} iterable - Target iterable. | ||
|  |  * @param  {function} distance - Distance function to use. | ||
|  |  * @return {VPTree} | ||
|  |  */ | ||
|  | VPTree.from = function(iterable, distance) { | ||
|  |   return new VPTree(distance, iterable); | ||
|  | }; | ||
|  | 
 | ||
|  | /** | ||
|  |  * Exporting. | ||
|  |  */ | ||
|  | module.exports = VPTree; |