You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
368 lines
8.9 KiB
368 lines
8.9 KiB
/**
|
|
* Mnemonist Vantage Point Tree
|
|
* =============================
|
|
*
|
|
* JavaScript implementation of the Vantage Point Tree storing the binary
|
|
* tree as a flat byte array.
|
|
*
|
|
* Note that a VPTree has worst cases and is likely not to be perfectly
|
|
* balanced because of median ambiguity. It is therefore not suitable
|
|
* for hairballs and tiny datasets.
|
|
*
|
|
* [Reference]:
|
|
* https://en.wikipedia.org/wiki/Vantage-point_tree
|
|
*/
|
|
var iterables = require('./utils/iterables.js'),
|
|
typed = require('./utils/typed-arrays.js'),
|
|
inplaceQuickSortIndices = require('./sort/quick.js').inplaceQuickSortIndices,
|
|
lowerBoundIndices = require('./utils/binary-search.js').lowerBoundIndices,
|
|
Heap = require('./heap.js');
|
|
|
|
var getPointerArray = typed.getPointerArray;
|
|
|
|
// TODO: implement vantage point selection techniques (by swapping with last)
|
|
// TODO: is this required to implement early termination for k <= size?
|
|
|
|
/**
|
|
* Heap comparator used by the #.nearestNeighbors method.
|
|
*/
|
|
function comparator(a, b) {
|
|
if (a.distance < b.distance)
|
|
return 1;
|
|
|
|
if (a.distance > b.distance)
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Function used to create the binary tree.
|
|
*
|
|
* @param {function} distance - Distance function to use.
|
|
* @param {array} items - Items to index (will be mutated).
|
|
* @param {array} indices - Indexes of the items.
|
|
* @return {Float64Array} - The flat binary tree.
|
|
*/
|
|
function createBinaryTree(distance, items, indices) {
|
|
var N = indices.length;
|
|
|
|
var PointerArray = getPointerArray(N);
|
|
|
|
var C = 0,
|
|
nodes = new PointerArray(N),
|
|
lefts = new PointerArray(N),
|
|
rights = new PointerArray(N),
|
|
mus = new Float64Array(N),
|
|
stack = [0, 0, N],
|
|
distances = new Float64Array(N),
|
|
nodeIndex,
|
|
vantagePoint,
|
|
medianIndex,
|
|
lo,
|
|
hi,
|
|
mid,
|
|
mu,
|
|
i,
|
|
l;
|
|
|
|
while (stack.length) {
|
|
hi = stack.pop();
|
|
lo = stack.pop();
|
|
nodeIndex = stack.pop();
|
|
|
|
// Getting our vantage point
|
|
vantagePoint = indices[hi - 1];
|
|
hi--;
|
|
|
|
l = hi - lo;
|
|
|
|
// Storing vantage point
|
|
nodes[nodeIndex] = vantagePoint;
|
|
|
|
// We are in a leaf
|
|
if (l === 0)
|
|
continue;
|
|
|
|
// We only have two elements, the second one has to go right
|
|
if (l === 1) {
|
|
|
|
// We put remaining item to the right
|
|
mu = distance(items[vantagePoint], items[indices[lo]]);
|
|
|
|
mus[nodeIndex] = mu;
|
|
|
|
// Right
|
|
C++;
|
|
rights[nodeIndex] = C;
|
|
nodes[C] = indices[lo];
|
|
|
|
continue;
|
|
}
|
|
|
|
// Computing distance from vantage point to other points
|
|
for (i = lo; i < hi; i++)
|
|
distances[indices[i]] = distance(items[vantagePoint], items[indices[i]]);
|
|
|
|
inplaceQuickSortIndices(distances, indices, lo, hi);
|
|
|
|
// Finding median of distances
|
|
medianIndex = lo + (l / 2) - 1;
|
|
|
|
// Need to interpolate?
|
|
if (medianIndex === (medianIndex | 0)) {
|
|
mu = (
|
|
distances[indices[medianIndex]] +
|
|
distances[indices[medianIndex + 1]]
|
|
) / 2;
|
|
}
|
|
else {
|
|
mu = distances[indices[Math.ceil(medianIndex)]];
|
|
}
|
|
|
|
// Storing mu
|
|
mus[nodeIndex] = mu;
|
|
|
|
mid = lowerBoundIndices(distances, indices, mu, lo, hi);
|
|
|
|
// console.log('Vantage point', items[vantagePoint], vantagePoint);
|
|
// console.log('mu =', mu);
|
|
// console.log('lo =', lo);
|
|
// console.log('hi =', hi);
|
|
// console.log('mid =', mid);
|
|
|
|
// console.log('need to split', Array.from(indices).slice(lo, hi).map(i => {
|
|
// return [distances[i], distance(items[vantagePoint], items[i]), items[i]];
|
|
// }));
|
|
|
|
// Right
|
|
if (hi - mid > 0) {
|
|
C++;
|
|
rights[nodeIndex] = C;
|
|
stack.push(C, mid, hi);
|
|
// console.log('Went right with ', Array.from(indices).slice(mid, hi).map(i => {
|
|
// return [distances[i], distance(items[vantagePoint], items[i]), items[i]];
|
|
// }));
|
|
}
|
|
|
|
// Left
|
|
if (mid - lo > 0) {
|
|
C++;
|
|
lefts[nodeIndex] = C;
|
|
stack.push(C, lo, mid);
|
|
// console.log('Went left with', Array.from(indices).slice(lo, mid).map(i => {
|
|
// return [distances[i], distance(items[vantagePoint], items[i]), items[i]];
|
|
// }));
|
|
}
|
|
|
|
// console.log();
|
|
}
|
|
|
|
return {
|
|
nodes: nodes,
|
|
lefts: lefts,
|
|
rights: rights,
|
|
mus: mus
|
|
};
|
|
}
|
|
|
|
/**
|
|
* VPTree.
|
|
*
|
|
* @constructor
|
|
* @param {function} distance - Distance function to use.
|
|
* @param {Iterable} items - Items to store.
|
|
*/
|
|
function VPTree(distance, items) {
|
|
if (typeof distance !== 'function')
|
|
throw new Error('mnemonist/VPTree.constructor: given `distance` must be a function.');
|
|
|
|
if (!items)
|
|
throw new Error('mnemonist/VPTree.constructor: you must provide items to the tree. A VPTree cannot be updated after its creation.');
|
|
|
|
// Properties
|
|
this.distance = distance;
|
|
this.heap = new Heap(comparator);
|
|
this.D = 0;
|
|
|
|
var arrays = iterables.toArrayWithIndices(items);
|
|
this.items = arrays[0];
|
|
var indices = arrays[1];
|
|
|
|
// Creating the binary tree
|
|
this.size = indices.length;
|
|
|
|
var result = createBinaryTree(distance, this.items, indices);
|
|
|
|
this.nodes = result.nodes;
|
|
this.lefts = result.lefts;
|
|
this.rights = result.rights;
|
|
this.mus = result.mus;
|
|
}
|
|
|
|
/**
|
|
* Function used to retrieve the k nearest neighbors of the query.
|
|
*
|
|
* @param {number} k - Number of neighbors to retrieve.
|
|
* @param {any} query - The query.
|
|
* @return {array}
|
|
*/
|
|
VPTree.prototype.nearestNeighbors = function(k, query) {
|
|
var neighbors = this.heap,
|
|
stack = [0],
|
|
tau = Infinity,
|
|
nodeIndex,
|
|
itemIndex,
|
|
vantagePoint,
|
|
leftIndex,
|
|
rightIndex,
|
|
mu,
|
|
d;
|
|
|
|
this.D = 0;
|
|
|
|
while (stack.length) {
|
|
nodeIndex = stack.pop();
|
|
itemIndex = this.nodes[nodeIndex];
|
|
vantagePoint = this.items[itemIndex];
|
|
|
|
// Distance between query & the current vantage point
|
|
d = this.distance(vantagePoint, query);
|
|
this.D++;
|
|
|
|
if (d < tau) {
|
|
neighbors.push({distance: d, item: vantagePoint});
|
|
|
|
// Trimming
|
|
if (neighbors.size > k)
|
|
neighbors.pop();
|
|
|
|
// Adjusting tau (only if we already have k items, else it stays Infinity)
|
|
if (neighbors.size >= k)
|
|
tau = neighbors.peek().distance;
|
|
}
|
|
|
|
leftIndex = this.lefts[nodeIndex];
|
|
rightIndex = this.rights[nodeIndex];
|
|
|
|
// We are a leaf
|
|
if (!leftIndex && !rightIndex)
|
|
continue;
|
|
|
|
mu = this.mus[nodeIndex];
|
|
|
|
if (d < mu) {
|
|
if (leftIndex && d < mu + tau)
|
|
stack.push(leftIndex);
|
|
if (rightIndex && d >= mu - tau) // Might not be necessary to test d
|
|
stack.push(rightIndex);
|
|
}
|
|
else {
|
|
if (rightIndex && d >= mu - tau)
|
|
stack.push(rightIndex);
|
|
if (leftIndex && d < mu + tau) // Might not be necessary to test d
|
|
stack.push(leftIndex);
|
|
}
|
|
}
|
|
|
|
var array = new Array(neighbors.size);
|
|
|
|
for (var i = neighbors.size - 1; i >= 0; i--)
|
|
array[i] = neighbors.pop();
|
|
|
|
return array;
|
|
};
|
|
|
|
/**
|
|
* Function used to retrieve every neighbors of query in the given radius.
|
|
*
|
|
* @param {number} radius - Radius.
|
|
* @param {any} query - The query.
|
|
* @return {array}
|
|
*/
|
|
VPTree.prototype.neighbors = function(radius, query) {
|
|
var neighbors = [],
|
|
stack = [0],
|
|
nodeIndex,
|
|
itemIndex,
|
|
vantagePoint,
|
|
leftIndex,
|
|
rightIndex,
|
|
mu,
|
|
d;
|
|
|
|
this.D = 0;
|
|
|
|
while (stack.length) {
|
|
nodeIndex = stack.pop();
|
|
itemIndex = this.nodes[nodeIndex];
|
|
vantagePoint = this.items[itemIndex];
|
|
|
|
// Distance between query & the current vantage point
|
|
d = this.distance(vantagePoint, query);
|
|
this.D++;
|
|
|
|
if (d <= radius)
|
|
neighbors.push({distance: d, item: vantagePoint});
|
|
|
|
leftIndex = this.lefts[nodeIndex];
|
|
rightIndex = this.rights[nodeIndex];
|
|
|
|
// We are a leaf
|
|
if (!leftIndex && !rightIndex)
|
|
continue;
|
|
|
|
mu = this.mus[nodeIndex];
|
|
|
|
if (d < mu) {
|
|
if (leftIndex && d < mu + radius)
|
|
stack.push(leftIndex);
|
|
if (rightIndex && d >= mu - radius) // Might not be necessary to test d
|
|
stack.push(rightIndex);
|
|
}
|
|
else {
|
|
if (rightIndex && d >= mu - radius)
|
|
stack.push(rightIndex);
|
|
if (leftIndex && d < mu + radius) // Might not be necessary to test d
|
|
stack.push(leftIndex);
|
|
}
|
|
}
|
|
|
|
return neighbors;
|
|
};
|
|
|
|
/**
|
|
* Convenience known methods.
|
|
*/
|
|
VPTree.prototype.inspect = function() {
|
|
var array = this.items.slice();
|
|
|
|
// Trick so that node displays the name of the constructor
|
|
Object.defineProperty(array, 'constructor', {
|
|
value: VPTree,
|
|
enumerable: false
|
|
});
|
|
|
|
return array;
|
|
};
|
|
|
|
if (typeof Symbol !== 'undefined')
|
|
VPTree.prototype[Symbol.for('nodejs.util.inspect.custom')] = VPTree.prototype.inspect;
|
|
|
|
/**
|
|
* Static @.from function taking an arbitrary iterable & converting it into
|
|
* a tree.
|
|
*
|
|
* @param {Iterable} iterable - Target iterable.
|
|
* @param {function} distance - Distance function to use.
|
|
* @return {VPTree}
|
|
*/
|
|
VPTree.from = function(iterable, distance) {
|
|
return new VPTree(distance, iterable);
|
|
};
|
|
|
|
/**
|
|
* Exporting.
|
|
*/
|
|
module.exports = VPTree;
|