You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					80 lines
				
				3.2 KiB
			
		
		
			
		
	
	
					80 lines
				
				3.2 KiB
			| 
								 
											3 years ago
										 
									 | 
							
								"use strict";
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Object.defineProperty(exports, "__esModule", {
							 | 
						||
| 
								 | 
							
								  value: true
							 | 
						||
| 
								 | 
							
								});
							 | 
						||
| 
								 | 
							
								exports.LEAD_CLASS = void 0;
							 | 
						||
| 
								 | 
							
								exports["default"] = isValidCandidate;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var _constants = require("../constants.js");
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var _util = require("./util.js");
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var _utf = require("./utf-8.js");
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Copy-pasted from `PhoneNumberMatcher.js`.
							 | 
						||
| 
								 | 
							
								var OPENING_PARENS = "(\\[\uFF08\uFF3B";
							 | 
						||
| 
								 | 
							
								var CLOSING_PARENS = ")\\]\uFF09\uFF3D";
							 | 
						||
| 
								 | 
							
								var NON_PARENS = "[^".concat(OPENING_PARENS).concat(CLOSING_PARENS, "]");
							 | 
						||
| 
								 | 
							
								var LEAD_CLASS = "[".concat(OPENING_PARENS).concat(_constants.PLUS_CHARS, "]"); // Punctuation that may be at the start of a phone number - brackets and plus signs.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								exports.LEAD_CLASS = LEAD_CLASS;
							 | 
						||
| 
								 | 
							
								var LEAD_CLASS_LEADING = new RegExp('^' + LEAD_CLASS); // Limit on the number of pairs of brackets in a phone number.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var BRACKET_PAIR_LIMIT = (0, _util.limit)(0, 3);
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * Pattern to check that brackets match. Opening brackets should be closed within a phone number.
							 | 
						||
| 
								 | 
							
								 * This also checks that there is something inside the brackets. Having no brackets at all is also
							 | 
						||
| 
								 | 
							
								 * fine.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * An opening bracket at the beginning may not be closed, but subsequent ones should be.  It's
							 | 
						||
| 
								 | 
							
								 * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a
							 | 
						||
| 
								 | 
							
								 * closing bracket first. We limit the sets of brackets in a phone number to four.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var MATCHING_BRACKETS_ENTIRE = new RegExp('^' + "(?:[" + OPENING_PARENS + "])?" + "(?:" + NON_PARENS + "+" + "[" + CLOSING_PARENS + "])?" + NON_PARENS + "+" + "(?:[" + OPENING_PARENS + "]" + NON_PARENS + "+[" + CLOSING_PARENS + "])" + BRACKET_PAIR_LIMIT + NON_PARENS + "*" + '$');
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * Matches strings that look like publication pages. Example:
							 | 
						||
| 
								 | 
							
								 * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns.
							 | 
						||
| 
								 | 
							
								 * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre>
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * The string "211-227 (2003)" is not a telephone number.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var PUB_PAGES = /\d{1,5}-+\d{1,5}\s{0,4}\(\d{1,4}/;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								function isValidCandidate(candidate, offset, text, leniency) {
							 | 
						||
| 
								 | 
							
								  // Check the candidate doesn't contain any formatting
							 | 
						||
| 
								 | 
							
								  // which would indicate that it really isn't a phone number.
							 | 
						||
| 
								 | 
							
								  if (!MATCHING_BRACKETS_ENTIRE.test(candidate) || PUB_PAGES.test(candidate)) {
							 | 
						||
| 
								 | 
							
								    return;
							 | 
						||
| 
								 | 
							
								  } // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
							 | 
						||
| 
								 | 
							
								  // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (leniency !== 'POSSIBLE') {
							 | 
						||
| 
								 | 
							
								    // If the candidate is not at the start of the text,
							 | 
						||
| 
								 | 
							
								    // and does not start with phone-number punctuation,
							 | 
						||
| 
								 | 
							
								    // check the previous character.
							 | 
						||
| 
								 | 
							
								    if (offset > 0 && !LEAD_CLASS_LEADING.test(candidate)) {
							 | 
						||
| 
								 | 
							
								      var previousChar = text[offset - 1]; // We return null if it is a latin letter or an invalid punctuation symbol.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      if ((0, _utf.isInvalidPunctuationSymbol)(previousChar) || (0, _utf.isLatinLetter)(previousChar)) {
							 | 
						||
| 
								 | 
							
								        return false;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    var lastCharIndex = offset + candidate.length;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (lastCharIndex < text.length) {
							 | 
						||
| 
								 | 
							
								      var nextChar = text[lastCharIndex];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								      if ((0, _utf.isInvalidPunctuationSymbol)(nextChar) || (0, _utf.isLatinLetter)(nextChar)) {
							 | 
						||
| 
								 | 
							
								        return false;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  return true;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								//# sourceMappingURL=isValidCandidate.js.map
							 |