You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					214 lines
				
				6.1 KiB
			
		
		
			
		
	
	
					214 lines
				
				6.1 KiB
			| 
											3 years ago
										 | /* | ||
|  |   Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation | ||
|  |   by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool
 | ||
|  | */ | ||
|  | var EventEmitter = require('events').EventEmitter, | ||
|  |     inherits = require('util').inherits; | ||
|  | 
 | ||
|  | function jsmemcmp(buf1, pos1, buf2, pos2, num) { | ||
|  |   for (var i = 0; i < num; ++i, ++pos1, ++pos2) | ||
|  |     if (buf1[pos1] !== buf2[pos2]) | ||
|  |       return false; | ||
|  |   return true; | ||
|  | } | ||
|  | 
 | ||
|  | function SBMH(needle) { | ||
|  |   if (typeof needle === 'string') | ||
|  |     needle = new Buffer(needle); | ||
|  |   var i, j, needle_len = needle.length; | ||
|  | 
 | ||
|  |   this.maxMatches = Infinity; | ||
|  |   this.matches = 0; | ||
|  | 
 | ||
|  |   this._occ = new Array(256); | ||
|  |   this._lookbehind_size = 0; | ||
|  |   this._needle = needle; | ||
|  |   this._bufpos = 0; | ||
|  | 
 | ||
|  |   this._lookbehind = new Buffer(needle_len); | ||
|  | 
 | ||
|  |   // Initialize occurrence table.
 | ||
|  |   for (j = 0; j < 256; ++j) | ||
|  |     this._occ[j] = needle_len; | ||
|  | 
 | ||
|  |   // Populate occurrence table with analysis of the needle,
 | ||
|  |   // ignoring last letter.
 | ||
|  |   if (needle_len >= 1) { | ||
|  |     for (i = 0; i < needle_len - 1; ++i) | ||
|  |       this._occ[needle[i]] = needle_len - 1 - i; | ||
|  |   } | ||
|  | } | ||
|  | inherits(SBMH, EventEmitter); | ||
|  | 
 | ||
|  | SBMH.prototype.reset = function() { | ||
|  |   this._lookbehind_size = 0; | ||
|  |   this.matches = 0; | ||
|  |   this._bufpos = 0; | ||
|  | }; | ||
|  | 
 | ||
|  | SBMH.prototype.push = function(chunk, pos) { | ||
|  |   var r, chlen; | ||
|  |   if (!Buffer.isBuffer(chunk)) | ||
|  |     chunk = new Buffer(chunk, 'binary'); | ||
|  |   chlen = chunk.length; | ||
|  |   this._bufpos = pos || 0; | ||
|  |   while (r !== chlen && this.matches < this.maxMatches) | ||
|  |     r = this._sbmh_feed(chunk); | ||
|  |   return r; | ||
|  | }; | ||
|  | 
 | ||
|  | SBMH.prototype._sbmh_feed = function(data) { | ||
|  |   var len = data.length, needle = this._needle, needle_len = needle.length; | ||
|  | 
 | ||
|  |   // Positive: points to a position in `data`
 | ||
|  |   //           pos == 3 points to data[3]
 | ||
|  |   // Negative: points to a position in the lookbehind buffer
 | ||
|  |   //           pos == -2 points to lookbehind[lookbehind_size - 2]
 | ||
|  |   var pos = -this._lookbehind_size, | ||
|  |       last_needle_char = needle[needle_len - 1], | ||
|  |       occ = this._occ, | ||
|  |       lookbehind = this._lookbehind; | ||
|  | 
 | ||
|  |   if (pos < 0) { | ||
|  |     // Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool
 | ||
|  |     // search with character lookup code that considers both the
 | ||
|  |     // lookbehind buffer and the current round's haystack data.
 | ||
|  |     //
 | ||
|  |     // Loop until
 | ||
|  |     //   there is a match.
 | ||
|  |     // or until
 | ||
|  |     //   we've moved past the position that requires the
 | ||
|  |     //   lookbehind buffer. In this case we switch to the
 | ||
|  |     //   optimized loop.
 | ||
|  |     // or until
 | ||
|  |     //   the character to look at lies outside the haystack.
 | ||
|  |     while (pos < 0 && pos <= len - needle_len) { | ||
|  |        var ch = this._sbmh_lookup_char(data, pos + needle_len - 1); | ||
|  | 
 | ||
|  |       if (ch === last_needle_char | ||
|  |           && this._sbmh_memcmp(data, pos, needle_len - 1)) { | ||
|  |         this._lookbehind_size = 0; | ||
|  |         ++this.matches; | ||
|  |         if (pos > -this._lookbehind_size) | ||
|  |           this.emit('info', true, lookbehind, 0, this._lookbehind_size + pos); | ||
|  |         else | ||
|  |           this.emit('info', true); | ||
|  | 
 | ||
|  |         this._bufpos = pos + needle_len; | ||
|  |         return pos + needle_len; | ||
|  |       } else | ||
|  |         pos += occ[ch]; | ||
|  |     } | ||
|  | 
 | ||
|  |     // No match.
 | ||
|  | 
 | ||
|  |     if (pos < 0) { | ||
|  |       // There's too few data for Boyer-Moore-Horspool to run,
 | ||
|  |       // so let's use a different algorithm to skip as much as
 | ||
|  |       // we can.
 | ||
|  |       // Forward pos until
 | ||
|  |       //   the trailing part of lookbehind + data
 | ||
|  |       //   looks like the beginning of the needle
 | ||
|  |       // or until
 | ||
|  |       //   pos == 0
 | ||
|  |       while (pos < 0 && !this._sbmh_memcmp(data, pos, len - pos)) | ||
|  |         pos++; | ||
|  |     } | ||
|  | 
 | ||
|  |     if (pos >= 0) { | ||
|  |       // Discard lookbehind buffer.
 | ||
|  |       this.emit('info', false, lookbehind, 0, this._lookbehind_size); | ||
|  |       this._lookbehind_size = 0; | ||
|  |     } else { | ||
|  |       // Cut off part of the lookbehind buffer that has
 | ||
|  |       // been processed and append the entire haystack
 | ||
|  |       // into it.
 | ||
|  |       var bytesToCutOff = this._lookbehind_size + pos; | ||
|  | 
 | ||
|  |       if (bytesToCutOff > 0) { | ||
|  |         // The cut off data is guaranteed not to contain the needle.
 | ||
|  |         this.emit('info', false, lookbehind, 0, bytesToCutOff); | ||
|  |       } | ||
|  | 
 | ||
|  |       lookbehind.copy(lookbehind, 0, bytesToCutOff, | ||
|  |                       this._lookbehind_size - bytesToCutOff); | ||
|  |       this._lookbehind_size -= bytesToCutOff; | ||
|  | 
 | ||
|  |       data.copy(lookbehind, this._lookbehind_size); | ||
|  |       this._lookbehind_size += len; | ||
|  | 
 | ||
|  |       this._bufpos = len; | ||
|  |       return len; | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   if (pos >= 0) | ||
|  |     pos += this._bufpos; | ||
|  | 
 | ||
|  |   // Lookbehind buffer is now empty. Perform Boyer-Moore-Horspool
 | ||
|  |   // search with optimized character lookup code that only considers
 | ||
|  |   // the current round's haystack data.
 | ||
|  |   while (pos <= len - needle_len) { | ||
|  |     var ch = data[pos + needle_len - 1]; | ||
|  | 
 | ||
|  |     if (ch === last_needle_char | ||
|  |         && data[pos] === needle[0] | ||
|  |         && jsmemcmp(needle, 0, data, pos, needle_len - 1)) { | ||
|  |       ++this.matches; | ||
|  |       if (pos > 0) | ||
|  |         this.emit('info', true, data, this._bufpos, pos); | ||
|  |       else | ||
|  |         this.emit('info', true); | ||
|  | 
 | ||
|  |       this._bufpos = pos + needle_len; | ||
|  |       return pos + needle_len; | ||
|  |     } else | ||
|  |       pos += occ[ch]; | ||
|  |   } | ||
|  | 
 | ||
|  |   // There was no match. If there's trailing haystack data that we cannot
 | ||
|  |   // match yet using the Boyer-Moore-Horspool algorithm (because the trailing
 | ||
|  |   // data is less than the needle size) then match using a modified
 | ||
|  |   // algorithm that starts matching from the beginning instead of the end.
 | ||
|  |   // Whatever trailing data is left after running this algorithm is added to
 | ||
|  |   // the lookbehind buffer.
 | ||
|  |   if (pos < len) { | ||
|  |     while (pos < len && (data[pos] !== needle[0] | ||
|  |                          || !jsmemcmp(data, pos, needle, 0, len - pos))) { | ||
|  |       ++pos; | ||
|  |     } | ||
|  |     if (pos < len) { | ||
|  |       data.copy(lookbehind, 0, pos, pos + (len - pos)); | ||
|  |       this._lookbehind_size = len - pos; | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   // Everything until pos is guaranteed not to contain needle data.
 | ||
|  |   if (pos > 0) | ||
|  |     this.emit('info', false, data, this._bufpos, pos < len ? pos : len); | ||
|  | 
 | ||
|  |   this._bufpos = len; | ||
|  |   return len; | ||
|  | }; | ||
|  | 
 | ||
|  | SBMH.prototype._sbmh_lookup_char = function(data, pos) { | ||
|  |   if (pos < 0) | ||
|  |     return this._lookbehind[this._lookbehind_size + pos]; | ||
|  |   else | ||
|  |     return data[pos]; | ||
|  | } | ||
|  | 
 | ||
|  | SBMH.prototype._sbmh_memcmp = function(data, pos, len) { | ||
|  |   var i = 0; | ||
|  | 
 | ||
|  |   while (i < len) { | ||
|  |     if (this._sbmh_lookup_char(data, pos + i) === this._needle[i]) | ||
|  |       ++i; | ||
|  |     else | ||
|  |       return false; | ||
|  |   } | ||
|  |   return true; | ||
|  | } | ||
|  | 
 | ||
|  | module.exports = SBMH; |