You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							283 lines
						
					
					
						
							6.4 KiB
						
					
					
				
			
		
		
	
	
							283 lines
						
					
					
						
							6.4 KiB
						
					
					
				| const util      = require('./util');
 | |
| const types     = require('./types');
 | |
| const sets      = require('./sets');
 | |
| const positions = require('./positions');
 | |
| 
 | |
| 
 | |
| module.exports = (regexpStr) => {
 | |
|   var i = 0, l, c,
 | |
|     start = { type: types.ROOT, stack: []},
 | |
| 
 | |
|     // Keep track of last clause/group and stack.
 | |
|     lastGroup = start,
 | |
|     last = start.stack,
 | |
|     groupStack = [];
 | |
| 
 | |
| 
 | |
|   var repeatErr = (i) => {
 | |
|     util.error(regexpStr, `Nothing to repeat at column ${i - 1}`);
 | |
|   };
 | |
| 
 | |
|   // Decode a few escaped characters.
 | |
|   var str = util.strToChars(regexpStr);
 | |
|   l = str.length;
 | |
| 
 | |
|   // Iterate through each character in string.
 | |
|   while (i < l) {
 | |
|     c = str[i++];
 | |
| 
 | |
|     switch (c) {
 | |
|       // Handle escaped characters, inclues a few sets.
 | |
|       case '\\':
 | |
|         c = str[i++];
 | |
| 
 | |
|         switch (c) {
 | |
|           case 'b':
 | |
|             last.push(positions.wordBoundary());
 | |
|             break;
 | |
| 
 | |
|           case 'B':
 | |
|             last.push(positions.nonWordBoundary());
 | |
|             break;
 | |
| 
 | |
|           case 'w':
 | |
|             last.push(sets.words());
 | |
|             break;
 | |
| 
 | |
|           case 'W':
 | |
|             last.push(sets.notWords());
 | |
|             break;
 | |
| 
 | |
|           case 'd':
 | |
|             last.push(sets.ints());
 | |
|             break;
 | |
| 
 | |
|           case 'D':
 | |
|             last.push(sets.notInts());
 | |
|             break;
 | |
| 
 | |
|           case 's':
 | |
|             last.push(sets.whitespace());
 | |
|             break;
 | |
| 
 | |
|           case 'S':
 | |
|             last.push(sets.notWhitespace());
 | |
|             break;
 | |
| 
 | |
|           default:
 | |
|             // Check if c is integer.
 | |
|             // In which case it's a reference.
 | |
|             if (/\d/.test(c)) {
 | |
|               last.push({ type: types.REFERENCE, value: parseInt(c, 10) });
 | |
| 
 | |
|             // Escaped character.
 | |
|             } else {
 | |
|               last.push({ type: types.CHAR, value: c.charCodeAt(0) });
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         break;
 | |
| 
 | |
| 
 | |
|       // Positionals.
 | |
|       case '^':
 | |
|         last.push(positions.begin());
 | |
|         break;
 | |
| 
 | |
|       case '$':
 | |
|         last.push(positions.end());
 | |
|         break;
 | |
| 
 | |
| 
 | |
|       // Handle custom sets.
 | |
|       case '[':
 | |
|         // Check if this class is 'anti' i.e. [^abc].
 | |
|         var not;
 | |
|         if (str[i] === '^') {
 | |
|           not = true;
 | |
|           i++;
 | |
|         } else {
 | |
|           not = false;
 | |
|         }
 | |
| 
 | |
|         // Get all the characters in class.
 | |
|         var classTokens = util.tokenizeClass(str.slice(i), regexpStr);
 | |
| 
 | |
|         // Increase index by length of class.
 | |
|         i += classTokens[1];
 | |
|         last.push({
 | |
|           type: types.SET,
 | |
|           set: classTokens[0],
 | |
|           not,
 | |
|         });
 | |
| 
 | |
|         break;
 | |
| 
 | |
| 
 | |
|       // Class of any character except \n.
 | |
|       case '.':
 | |
|         last.push(sets.anyChar());
 | |
|         break;
 | |
| 
 | |
| 
 | |
|       // Push group onto stack.
 | |
|       case '(':
 | |
|         // Create group.
 | |
|         var group = {
 | |
|           type: types.GROUP,
 | |
|           stack: [],
 | |
|           remember: true,
 | |
|         };
 | |
| 
 | |
|         c = str[i];
 | |
| 
 | |
|         // If if this is a special kind of group.
 | |
|         if (c === '?') {
 | |
|           c = str[i + 1];
 | |
|           i += 2;
 | |
| 
 | |
|           // Match if followed by.
 | |
|           if (c === '=') {
 | |
|             group.followedBy = true;
 | |
| 
 | |
|           // Match if not followed by.
 | |
|           } else if (c === '!') {
 | |
|             group.notFollowedBy = true;
 | |
| 
 | |
|           } else if (c !== ':') {
 | |
|             util.error(regexpStr,
 | |
|               `Invalid group, character '${c}'` +
 | |
|               ` after '?' at column ${i - 1}`);
 | |
|           }
 | |
| 
 | |
|           group.remember = false;
 | |
|         }
 | |
| 
 | |
|         // Insert subgroup into current group stack.
 | |
|         last.push(group);
 | |
| 
 | |
|         // Remember the current group for when the group closes.
 | |
|         groupStack.push(lastGroup);
 | |
| 
 | |
|         // Make this new group the current group.
 | |
|         lastGroup = group;
 | |
|         last = group.stack;
 | |
|         break;
 | |
| 
 | |
| 
 | |
|       // Pop group out of stack.
 | |
|       case ')':
 | |
|         if (groupStack.length === 0) {
 | |
|           util.error(regexpStr, `Unmatched ) at column ${i - 1}`);
 | |
|         }
 | |
|         lastGroup = groupStack.pop();
 | |
| 
 | |
|         // Check if this group has a PIPE.
 | |
|         // To get back the correct last stack.
 | |
|         last = lastGroup.options ?
 | |
|           lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack;
 | |
|         break;
 | |
| 
 | |
| 
 | |
|       // Use pipe character to give more choices.
 | |
|       case '|':
 | |
|         // Create array where options are if this is the first PIPE
 | |
|         // in this clause.
 | |
|         if (!lastGroup.options) {
 | |
|           lastGroup.options = [lastGroup.stack];
 | |
|           delete lastGroup.stack;
 | |
|         }
 | |
| 
 | |
|         // Create a new stack and add to options for rest of clause.
 | |
|         var stack = [];
 | |
|         lastGroup.options.push(stack);
 | |
|         last = stack;
 | |
|         break;
 | |
| 
 | |
| 
 | |
|       // Repetition.
 | |
|       // For every repetition, remove last element from last stack
 | |
|       // then insert back a RANGE object.
 | |
|       // This design is chosen because there could be more than
 | |
|       // one repetition symbols in a regex i.e. `a?+{2,3}`.
 | |
|       case '{':
 | |
|         var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
 | |
|         if (rs !== null) {
 | |
|           if (last.length === 0) {
 | |
|             repeatErr(i);
 | |
|           }
 | |
|           min = parseInt(rs[1], 10);
 | |
|           max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
 | |
|           i += rs[0].length;
 | |
| 
 | |
|           last.push({
 | |
|             type: types.REPETITION,
 | |
|             min,
 | |
|             max,
 | |
|             value: last.pop(),
 | |
|           });
 | |
|         } else {
 | |
|           last.push({
 | |
|             type: types.CHAR,
 | |
|             value: 123,
 | |
|           });
 | |
|         }
 | |
|         break;
 | |
| 
 | |
|       case '?':
 | |
|         if (last.length === 0) {
 | |
|           repeatErr(i);
 | |
|         }
 | |
|         last.push({
 | |
|           type: types.REPETITION,
 | |
|           min: 0,
 | |
|           max: 1,
 | |
|           value: last.pop(),
 | |
|         });
 | |
|         break;
 | |
| 
 | |
|       case '+':
 | |
|         if (last.length === 0) {
 | |
|           repeatErr(i);
 | |
|         }
 | |
|         last.push({
 | |
|           type: types.REPETITION,
 | |
|           min: 1,
 | |
|           max: Infinity,
 | |
|           value: last.pop(),
 | |
|         });
 | |
|         break;
 | |
| 
 | |
|       case '*':
 | |
|         if (last.length === 0) {
 | |
|           repeatErr(i);
 | |
|         }
 | |
|         last.push({
 | |
|           type: types.REPETITION,
 | |
|           min: 0,
 | |
|           max: Infinity,
 | |
|           value: last.pop(),
 | |
|         });
 | |
|         break;
 | |
| 
 | |
| 
 | |
|       // Default is a character that is not `\[](){}?+*^$`.
 | |
|       default:
 | |
|         last.push({
 | |
|           type: types.CHAR,
 | |
|           value: c.charCodeAt(0),
 | |
|         });
 | |
|     }
 | |
| 
 | |
|   }
 | |
| 
 | |
|   // Check if any groups have not been closed.
 | |
|   if (groupStack.length !== 0) {
 | |
|     util.error(regexpStr, 'Unterminated group');
 | |
|   }
 | |
| 
 | |
|   return start;
 | |
| };
 | |
| 
 | |
| module.exports.types = types;
 |