You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					214 lines
				
				6.1 KiB
			
		
		
			
		
	
	
					214 lines
				
				6.1 KiB
			| 
								 
											3 years ago
										 
									 | 
							
								/*
							 | 
						||
| 
								 | 
							
								  Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation
							 | 
						||
| 
								 | 
							
								  by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								var EventEmitter = require('events').EventEmitter,
							 | 
						||
| 
								 | 
							
								    inherits = require('util').inherits;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								function jsmemcmp(buf1, pos1, buf2, pos2, num) {
							 | 
						||
| 
								 | 
							
								  for (var i = 0; i < num; ++i, ++pos1, ++pos2)
							 | 
						||
| 
								 | 
							
								    if (buf1[pos1] !== buf2[pos2])
							 | 
						||
| 
								 | 
							
								      return false;
							 | 
						||
| 
								 | 
							
								  return true;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								function SBMH(needle) {
							 | 
						||
| 
								 | 
							
								  if (typeof needle === 'string')
							 | 
						||
| 
								 | 
							
								    needle = new Buffer(needle);
							 | 
						||
| 
								 | 
							
								  var i, j, needle_len = needle.length;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  this.maxMatches = Infinity;
							 | 
						||
| 
								 | 
							
								  this.matches = 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  this._occ = new Array(256);
							 | 
						||
| 
								 | 
							
								  this._lookbehind_size = 0;
							 | 
						||
| 
								 | 
							
								  this._needle = needle;
							 | 
						||
| 
								 | 
							
								  this._bufpos = 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  this._lookbehind = new Buffer(needle_len);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Initialize occurrence table.
							 | 
						||
| 
								 | 
							
								  for (j = 0; j < 256; ++j)
							 | 
						||
| 
								 | 
							
								    this._occ[j] = needle_len;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Populate occurrence table with analysis of the needle,
							 | 
						||
| 
								 | 
							
								  // ignoring last letter.
							 | 
						||
| 
								 | 
							
								  if (needle_len >= 1) {
							 | 
						||
| 
								 | 
							
								    for (i = 0; i < needle_len - 1; ++i)
							 | 
						||
| 
								 | 
							
								      this._occ[needle[i]] = needle_len - 1 - i;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								inherits(SBMH, EventEmitter);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								SBMH.prototype.reset = function() {
							 | 
						||
| 
								 | 
							
								  this._lookbehind_size = 0;
							 | 
						||
| 
								 | 
							
								  this.matches = 0;
							 | 
						||
| 
								 | 
							
								  this._bufpos = 0;
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								SBMH.prototype.push = function(chunk, pos) {
							 | 
						||
| 
								 | 
							
								  var r, chlen;
							 | 
						||
| 
								 | 
							
								  if (!Buffer.isBuffer(chunk))
							 | 
						||
| 
								 | 
							
								    chunk = new Buffer(chunk, 'binary');
							 | 
						||
| 
								 | 
							
								  chlen = chunk.length;
							 | 
						||
| 
								 | 
							
								  this._bufpos = pos || 0;
							 | 
						||
| 
								 | 
							
								  while (r !== chlen && this.matches < this.maxMatches)
							 | 
						||
| 
								 | 
							
								    r = this._sbmh_feed(chunk);
							 | 
						||
| 
								 | 
							
								  return r;
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								SBMH.prototype._sbmh_feed = function(data) {
							 | 
						||
| 
								 | 
							
								  var len = data.length, needle = this._needle, needle_len = needle.length;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Positive: points to a position in `data`
							 | 
						||
| 
								 | 
							
								  //           pos == 3 points to data[3]
							 | 
						||
| 
								 | 
							
								  // Negative: points to a position in the lookbehind buffer
							 | 
						||
| 
								 | 
							
								  //           pos == -2 points to lookbehind[lookbehind_size - 2]
							 | 
						||
| 
								 | 
							
								  var pos = -this._lookbehind_size,
							 | 
						||
| 
								 | 
							
								      last_needle_char = needle[needle_len - 1],
							 | 
						||
| 
								 | 
							
								      occ = this._occ,
							 | 
						||
| 
								 | 
							
								      lookbehind = this._lookbehind;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (pos < 0) {
							 | 
						||
| 
								 | 
							
								    // Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool
							 | 
						||
| 
								 | 
							
								    // search with character lookup code that considers both the
							 | 
						||
| 
								 | 
							
								    // lookbehind buffer and the current round's haystack data.
							 | 
						||
| 
								 | 
							
								    //
							 | 
						||
| 
								 | 
							
								    // Loop until
							 | 
						||
| 
								 | 
							
								    //   there is a match.
							 | 
						||
| 
								 | 
							
								    // or until
							 | 
						||
| 
								 | 
							
								    //   we've moved past the position that requires the
							 | 
						||
| 
								 | 
							
								    //   lookbehind buffer. In this case we switch to the
							 | 
						||
| 
								 | 
							
								    //   optimized loop.
							 | 
						||
| 
								 | 
							
								    // or until
							 | 
						||
| 
								 | 
							
								    //   the character to look at lies outside the haystack.
							 | 
						||
| 
								 | 
							
								    while (pos < 0 && pos <= len - needle_len) {
							 | 
						||
| 
								 | 
							
								       var ch = this._sbmh_lookup_char(data, pos + needle_len - 1);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      if (ch === last_needle_char
							 | 
						||
| 
								 | 
							
								          && this._sbmh_memcmp(data, pos, needle_len - 1)) {
							 | 
						||
| 
								 | 
							
								        this._lookbehind_size = 0;
							 | 
						||
| 
								 | 
							
								        ++this.matches;
							 | 
						||
| 
								 | 
							
								        if (pos > -this._lookbehind_size)
							 | 
						||
| 
								 | 
							
								          this.emit('info', true, lookbehind, 0, this._lookbehind_size + pos);
							 | 
						||
| 
								 | 
							
								        else
							 | 
						||
| 
								 | 
							
								          this.emit('info', true);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        this._bufpos = pos + needle_len;
							 | 
						||
| 
								 | 
							
								        return pos + needle_len;
							 | 
						||
| 
								 | 
							
								      } else
							 | 
						||
| 
								 | 
							
								        pos += occ[ch];
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // No match.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (pos < 0) {
							 | 
						||
| 
								 | 
							
								      // There's too few data for Boyer-Moore-Horspool to run,
							 | 
						||
| 
								 | 
							
								      // so let's use a different algorithm to skip as much as
							 | 
						||
| 
								 | 
							
								      // we can.
							 | 
						||
| 
								 | 
							
								      // Forward pos until
							 | 
						||
| 
								 | 
							
								      //   the trailing part of lookbehind + data
							 | 
						||
| 
								 | 
							
								      //   looks like the beginning of the needle
							 | 
						||
| 
								 | 
							
								      // or until
							 | 
						||
| 
								 | 
							
								      //   pos == 0
							 | 
						||
| 
								 | 
							
								      while (pos < 0 && !this._sbmh_memcmp(data, pos, len - pos))
							 | 
						||
| 
								 | 
							
								        pos++;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (pos >= 0) {
							 | 
						||
| 
								 | 
							
								      // Discard lookbehind buffer.
							 | 
						||
| 
								 | 
							
								      this.emit('info', false, lookbehind, 0, this._lookbehind_size);
							 | 
						||
| 
								 | 
							
								      this._lookbehind_size = 0;
							 | 
						||
| 
								 | 
							
								    } else {
							 | 
						||
| 
								 | 
							
								      // Cut off part of the lookbehind buffer that has
							 | 
						||
| 
								 | 
							
								      // been processed and append the entire haystack
							 | 
						||
| 
								 | 
							
								      // into it.
							 | 
						||
| 
								 | 
							
								      var bytesToCutOff = this._lookbehind_size + pos;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      if (bytesToCutOff > 0) {
							 | 
						||
| 
								 | 
							
								        // The cut off data is guaranteed not to contain the needle.
							 | 
						||
| 
								 | 
							
								        this.emit('info', false, lookbehind, 0, bytesToCutOff);
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      lookbehind.copy(lookbehind, 0, bytesToCutOff,
							 | 
						||
| 
								 | 
							
								                      this._lookbehind_size - bytesToCutOff);
							 | 
						||
| 
								 | 
							
								      this._lookbehind_size -= bytesToCutOff;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      data.copy(lookbehind, this._lookbehind_size);
							 | 
						||
| 
								 | 
							
								      this._lookbehind_size += len;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      this._bufpos = len;
							 | 
						||
| 
								 | 
							
								      return len;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (pos >= 0)
							 | 
						||
| 
								 | 
							
								    pos += this._bufpos;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Lookbehind buffer is now empty. Perform Boyer-Moore-Horspool
							 | 
						||
| 
								 | 
							
								  // search with optimized character lookup code that only considers
							 | 
						||
| 
								 | 
							
								  // the current round's haystack data.
							 | 
						||
| 
								 | 
							
								  while (pos <= len - needle_len) {
							 | 
						||
| 
								 | 
							
								    var ch = data[pos + needle_len - 1];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (ch === last_needle_char
							 | 
						||
| 
								 | 
							
								        && data[pos] === needle[0]
							 | 
						||
| 
								 | 
							
								        && jsmemcmp(needle, 0, data, pos, needle_len - 1)) {
							 | 
						||
| 
								 | 
							
								      ++this.matches;
							 | 
						||
| 
								 | 
							
								      if (pos > 0)
							 | 
						||
| 
								 | 
							
								        this.emit('info', true, data, this._bufpos, pos);
							 | 
						||
| 
								 | 
							
								      else
							 | 
						||
| 
								 | 
							
								        this.emit('info', true);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      this._bufpos = pos + needle_len;
							 | 
						||
| 
								 | 
							
								      return pos + needle_len;
							 | 
						||
| 
								 | 
							
								    } else
							 | 
						||
| 
								 | 
							
								      pos += occ[ch];
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // There was no match. If there's trailing haystack data that we cannot
							 | 
						||
| 
								 | 
							
								  // match yet using the Boyer-Moore-Horspool algorithm (because the trailing
							 | 
						||
| 
								 | 
							
								  // data is less than the needle size) then match using a modified
							 | 
						||
| 
								 | 
							
								  // algorithm that starts matching from the beginning instead of the end.
							 | 
						||
| 
								 | 
							
								  // Whatever trailing data is left after running this algorithm is added to
							 | 
						||
| 
								 | 
							
								  // the lookbehind buffer.
							 | 
						||
| 
								 | 
							
								  if (pos < len) {
							 | 
						||
| 
								 | 
							
								    while (pos < len && (data[pos] !== needle[0]
							 | 
						||
| 
								 | 
							
								                         || !jsmemcmp(data, pos, needle, 0, len - pos))) {
							 | 
						||
| 
								 | 
							
								      ++pos;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    if (pos < len) {
							 | 
						||
| 
								 | 
							
								      data.copy(lookbehind, 0, pos, pos + (len - pos));
							 | 
						||
| 
								 | 
							
								      this._lookbehind_size = len - pos;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Everything until pos is guaranteed not to contain needle data.
							 | 
						||
| 
								 | 
							
								  if (pos > 0)
							 | 
						||
| 
								 | 
							
								    this.emit('info', false, data, this._bufpos, pos < len ? pos : len);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  this._bufpos = len;
							 | 
						||
| 
								 | 
							
								  return len;
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								SBMH.prototype._sbmh_lookup_char = function(data, pos) {
							 | 
						||
| 
								 | 
							
								  if (pos < 0)
							 | 
						||
| 
								 | 
							
								    return this._lookbehind[this._lookbehind_size + pos];
							 | 
						||
| 
								 | 
							
								  else
							 | 
						||
| 
								 | 
							
								    return data[pos];
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								SBMH.prototype._sbmh_memcmp = function(data, pos, len) {
							 | 
						||
| 
								 | 
							
								  var i = 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  while (i < len) {
							 | 
						||
| 
								 | 
							
								    if (this._sbmh_lookup_char(data, pos + i) === this._needle[i])
							 | 
						||
| 
								 | 
							
								      ++i;
							 | 
						||
| 
								 | 
							
								    else
							 | 
						||
| 
								 | 
							
								      return false;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  return true;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								module.exports = SBMH;
							 |