You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					283 lines
				
				6.4 KiB
			
		
		
			
		
	
	
					283 lines
				
				6.4 KiB
			| 
								 
											3 years ago
										 
									 | 
							
								const util      = require('./util');
							 | 
						||
| 
								 | 
							
								const types     = require('./types');
							 | 
						||
| 
								 | 
							
								const sets      = require('./sets');
							 | 
						||
| 
								 | 
							
								const positions = require('./positions');
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								module.exports = (regexpStr) => {
							 | 
						||
| 
								 | 
							
								  var i = 0, l, c,
							 | 
						||
| 
								 | 
							
								    start = { type: types.ROOT, stack: []},
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // Keep track of last clause/group and stack.
							 | 
						||
| 
								 | 
							
								    lastGroup = start,
							 | 
						||
| 
								 | 
							
								    last = start.stack,
							 | 
						||
| 
								 | 
							
								    groupStack = [];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  var repeatErr = (i) => {
							 | 
						||
| 
								 | 
							
								    util.error(regexpStr, `Nothing to repeat at column ${i - 1}`);
							 | 
						||
| 
								 | 
							
								  };
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Decode a few escaped characters.
							 | 
						||
| 
								 | 
							
								  var str = util.strToChars(regexpStr);
							 | 
						||
| 
								 | 
							
								  l = str.length;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Iterate through each character in string.
							 | 
						||
| 
								 | 
							
								  while (i < l) {
							 | 
						||
| 
								 | 
							
								    c = str[i++];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    switch (c) {
							 | 
						||
| 
								 | 
							
								      // Handle escaped characters, inclues a few sets.
							 | 
						||
| 
								 | 
							
								      case '\\':
							 | 
						||
| 
								 | 
							
								        c = str[i++];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        switch (c) {
							 | 
						||
| 
								 | 
							
								          case 'b':
							 | 
						||
| 
								 | 
							
								            last.push(positions.wordBoundary());
							 | 
						||
| 
								 | 
							
								            break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 'B':
							 | 
						||
| 
								 | 
							
								            last.push(positions.nonWordBoundary());
							 | 
						||
| 
								 | 
							
								            break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 'w':
							 | 
						||
| 
								 | 
							
								            last.push(sets.words());
							 | 
						||
| 
								 | 
							
								            break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 'W':
							 | 
						||
| 
								 | 
							
								            last.push(sets.notWords());
							 | 
						||
| 
								 | 
							
								            break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 'd':
							 | 
						||
| 
								 | 
							
								            last.push(sets.ints());
							 | 
						||
| 
								 | 
							
								            break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 'D':
							 | 
						||
| 
								 | 
							
								            last.push(sets.notInts());
							 | 
						||
| 
								 | 
							
								            break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 's':
							 | 
						||
| 
								 | 
							
								            last.push(sets.whitespace());
							 | 
						||
| 
								 | 
							
								            break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          case 'S':
							 | 
						||
| 
								 | 
							
								            last.push(sets.notWhitespace());
							 | 
						||
| 
								 | 
							
								            break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          default:
							 | 
						||
| 
								 | 
							
								            // Check if c is integer.
							 | 
						||
| 
								 | 
							
								            // In which case it's a reference.
							 | 
						||
| 
								 | 
							
								            if (/\d/.test(c)) {
							 | 
						||
| 
								 | 
							
								              last.push({ type: types.REFERENCE, value: parseInt(c, 10) });
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            // Escaped character.
							 | 
						||
| 
								 | 
							
								            } else {
							 | 
						||
| 
								 | 
							
								              last.push({ type: types.CHAR, value: c.charCodeAt(0) });
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      // Positionals.
							 | 
						||
| 
								 | 
							
								      case '^':
							 | 
						||
| 
								 | 
							
								        last.push(positions.begin());
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case '$':
							 | 
						||
| 
								 | 
							
								        last.push(positions.end());
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      // Handle custom sets.
							 | 
						||
| 
								 | 
							
								      case '[':
							 | 
						||
| 
								 | 
							
								        // Check if this class is 'anti' i.e. [^abc].
							 | 
						||
| 
								 | 
							
								        var not;
							 | 
						||
| 
								 | 
							
								        if (str[i] === '^') {
							 | 
						||
| 
								 | 
							
								          not = true;
							 | 
						||
| 
								 | 
							
								          i++;
							 | 
						||
| 
								 | 
							
								        } else {
							 | 
						||
| 
								 | 
							
								          not = false;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Get all the characters in class.
							 | 
						||
| 
								 | 
							
								        var classTokens = util.tokenizeClass(str.slice(i), regexpStr);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Increase index by length of class.
							 | 
						||
| 
								 | 
							
								        i += classTokens[1];
							 | 
						||
| 
								 | 
							
								        last.push({
							 | 
						||
| 
								 | 
							
								          type: types.SET,
							 | 
						||
| 
								 | 
							
								          set: classTokens[0],
							 | 
						||
| 
								 | 
							
								          not,
							 | 
						||
| 
								 | 
							
								        });
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      // Class of any character except \n.
							 | 
						||
| 
								 | 
							
								      case '.':
							 | 
						||
| 
								 | 
							
								        last.push(sets.anyChar());
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      // Push group onto stack.
							 | 
						||
| 
								 | 
							
								      case '(':
							 | 
						||
| 
								 | 
							
								        // Create group.
							 | 
						||
| 
								 | 
							
								        var group = {
							 | 
						||
| 
								 | 
							
								          type: types.GROUP,
							 | 
						||
| 
								 | 
							
								          stack: [],
							 | 
						||
| 
								 | 
							
								          remember: true,
							 | 
						||
| 
								 | 
							
								        };
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        c = str[i];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // If if this is a special kind of group.
							 | 
						||
| 
								 | 
							
								        if (c === '?') {
							 | 
						||
| 
								 | 
							
								          c = str[i + 1];
							 | 
						||
| 
								 | 
							
								          i += 2;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          // Match if followed by.
							 | 
						||
| 
								 | 
							
								          if (c === '=') {
							 | 
						||
| 
								 | 
							
								            group.followedBy = true;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          // Match if not followed by.
							 | 
						||
| 
								 | 
							
								          } else if (c === '!') {
							 | 
						||
| 
								 | 
							
								            group.notFollowedBy = true;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          } else if (c !== ':') {
							 | 
						||
| 
								 | 
							
								            util.error(regexpStr,
							 | 
						||
| 
								 | 
							
								              `Invalid group, character '${c}'` +
							 | 
						||
| 
								 | 
							
								              ` after '?' at column ${i - 1}`);
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          group.remember = false;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Insert subgroup into current group stack.
							 | 
						||
| 
								 | 
							
								        last.push(group);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Remember the current group for when the group closes.
							 | 
						||
| 
								 | 
							
								        groupStack.push(lastGroup);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Make this new group the current group.
							 | 
						||
| 
								 | 
							
								        lastGroup = group;
							 | 
						||
| 
								 | 
							
								        last = group.stack;
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      // Pop group out of stack.
							 | 
						||
| 
								 | 
							
								      case ')':
							 | 
						||
| 
								 | 
							
								        if (groupStack.length === 0) {
							 | 
						||
| 
								 | 
							
								          util.error(regexpStr, `Unmatched ) at column ${i - 1}`);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        lastGroup = groupStack.pop();
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Check if this group has a PIPE.
							 | 
						||
| 
								 | 
							
								        // To get back the correct last stack.
							 | 
						||
| 
								 | 
							
								        last = lastGroup.options ?
							 | 
						||
| 
								 | 
							
								          lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack;
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      // Use pipe character to give more choices.
							 | 
						||
| 
								 | 
							
								      case '|':
							 | 
						||
| 
								 | 
							
								        // Create array where options are if this is the first PIPE
							 | 
						||
| 
								 | 
							
								        // in this clause.
							 | 
						||
| 
								 | 
							
								        if (!lastGroup.options) {
							 | 
						||
| 
								 | 
							
								          lastGroup.options = [lastGroup.stack];
							 | 
						||
| 
								 | 
							
								          delete lastGroup.stack;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Create a new stack and add to options for rest of clause.
							 | 
						||
| 
								 | 
							
								        var stack = [];
							 | 
						||
| 
								 | 
							
								        lastGroup.options.push(stack);
							 | 
						||
| 
								 | 
							
								        last = stack;
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      // Repetition.
							 | 
						||
| 
								 | 
							
								      // For every repetition, remove last element from last stack
							 | 
						||
| 
								 | 
							
								      // then insert back a RANGE object.
							 | 
						||
| 
								 | 
							
								      // This design is chosen because there could be more than
							 | 
						||
| 
								 | 
							
								      // one repetition symbols in a regex i.e. `a?+{2,3}`.
							 | 
						||
| 
								 | 
							
								      case '{':
							 | 
						||
| 
								 | 
							
								        var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
							 | 
						||
| 
								 | 
							
								        if (rs !== null) {
							 | 
						||
| 
								 | 
							
								          if (last.length === 0) {
							 | 
						||
| 
								 | 
							
								            repeatErr(i);
							 | 
						||
| 
								 | 
							
								          }
							 | 
						||
| 
								 | 
							
								          min = parseInt(rs[1], 10);
							 | 
						||
| 
								 | 
							
								          max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
							 | 
						||
| 
								 | 
							
								          i += rs[0].length;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								          last.push({
							 | 
						||
| 
								 | 
							
								            type: types.REPETITION,
							 | 
						||
| 
								 | 
							
								            min,
							 | 
						||
| 
								 | 
							
								            max,
							 | 
						||
| 
								 | 
							
								            value: last.pop(),
							 | 
						||
| 
								 | 
							
								          });
							 | 
						||
| 
								 | 
							
								        } else {
							 | 
						||
| 
								 | 
							
								          last.push({
							 | 
						||
| 
								 | 
							
								            type: types.CHAR,
							 | 
						||
| 
								 | 
							
								            value: 123,
							 | 
						||
| 
								 | 
							
								          });
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case '?':
							 | 
						||
| 
								 | 
							
								        if (last.length === 0) {
							 | 
						||
| 
								 | 
							
								          repeatErr(i);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        last.push({
							 | 
						||
| 
								 | 
							
								          type: types.REPETITION,
							 | 
						||
| 
								 | 
							
								          min: 0,
							 | 
						||
| 
								 | 
							
								          max: 1,
							 | 
						||
| 
								 | 
							
								          value: last.pop(),
							 | 
						||
| 
								 | 
							
								        });
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case '+':
							 | 
						||
| 
								 | 
							
								        if (last.length === 0) {
							 | 
						||
| 
								 | 
							
								          repeatErr(i);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        last.push({
							 | 
						||
| 
								 | 
							
								          type: types.REPETITION,
							 | 
						||
| 
								 | 
							
								          min: 1,
							 | 
						||
| 
								 | 
							
								          max: Infinity,
							 | 
						||
| 
								 | 
							
								          value: last.pop(),
							 | 
						||
| 
								 | 
							
								        });
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      case '*':
							 | 
						||
| 
								 | 
							
								        if (last.length === 0) {
							 | 
						||
| 
								 | 
							
								          repeatErr(i);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        last.push({
							 | 
						||
| 
								 | 
							
								          type: types.REPETITION,
							 | 
						||
| 
								 | 
							
								          min: 0,
							 | 
						||
| 
								 | 
							
								          max: Infinity,
							 | 
						||
| 
								 | 
							
								          value: last.pop(),
							 | 
						||
| 
								 | 
							
								        });
							 | 
						||
| 
								 | 
							
								        break;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      // Default is a character that is not `\[](){}?+*^$`.
							 | 
						||
| 
								 | 
							
								      default:
							 | 
						||
| 
								 | 
							
								        last.push({
							 | 
						||
| 
								 | 
							
								          type: types.CHAR,
							 | 
						||
| 
								 | 
							
								          value: c.charCodeAt(0),
							 | 
						||
| 
								 | 
							
								        });
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Check if any groups have not been closed.
							 | 
						||
| 
								 | 
							
								  if (groupStack.length !== 0) {
							 | 
						||
| 
								 | 
							
								    util.error(regexpStr, 'Unterminated group');
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  return start;
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								module.exports.types = types;
							 |