You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					86 lines
				
				3.0 KiB
			
		
		
			
		
	
	
					86 lines
				
				3.0 KiB
			| 
											3 years ago
										 | // Copy-pasted from `PhoneNumberMatcher.js`.
 | ||
|  | 
 | ||
|  | import { PLUS_CHARS } from '../constants.js' | ||
|  | import { limit } from './util.js' | ||
|  | 
 | ||
|  | import { | ||
|  | 	isLatinLetter, | ||
|  | 	isInvalidPunctuationSymbol | ||
|  | } from './utf-8.js' | ||
|  | 
 | ||
|  | const OPENING_PARENS = '(\\[\uFF08\uFF3B' | ||
|  | const CLOSING_PARENS = ')\\]\uFF09\uFF3D' | ||
|  | const NON_PARENS = `[^${OPENING_PARENS}${CLOSING_PARENS}]` | ||
|  | 
 | ||
|  | export const LEAD_CLASS = `[${OPENING_PARENS}${PLUS_CHARS}]` | ||
|  | 
 | ||
|  | // Punctuation that may be at the start of a phone number - brackets and plus signs.
 | ||
|  | const LEAD_CLASS_LEADING = new RegExp('^' + LEAD_CLASS) | ||
|  | 
 | ||
|  | // Limit on the number of pairs of brackets in a phone number.
 | ||
|  | const BRACKET_PAIR_LIMIT = limit(0, 3) | ||
|  | 
 | ||
|  | /** | ||
|  |  * Pattern to check that brackets match. Opening brackets should be closed within a phone number. | ||
|  |  * This also checks that there is something inside the brackets. Having no brackets at all is also | ||
|  |  * fine. | ||
|  |  * | ||
|  |  * An opening bracket at the beginning may not be closed, but subsequent ones should be.  It's | ||
|  |  * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a | ||
|  |  * closing bracket first. We limit the sets of brackets in a phone number to four. | ||
|  |  */ | ||
|  | const MATCHING_BRACKETS_ENTIRE = new RegExp | ||
|  | ( | ||
|  | 	'^' | ||
|  | 	+ "(?:[" + OPENING_PARENS + "])?" + "(?:" + NON_PARENS + "+" + "[" + CLOSING_PARENS + "])?" | ||
|  | 	+ NON_PARENS + "+" | ||
|  | 	+ "(?:[" + OPENING_PARENS + "]" + NON_PARENS + "+[" + CLOSING_PARENS + "])" + BRACKET_PAIR_LIMIT | ||
|  | 	+ NON_PARENS + "*" | ||
|  | 	+ '$' | ||
|  | ) | ||
|  | 
 | ||
|  | /** | ||
|  |  * Matches strings that look like publication pages. Example: | ||
|  |  * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns. | ||
|  |  * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre> | ||
|  |  * | ||
|  |  * The string "211-227 (2003)" is not a telephone number. | ||
|  |  */ | ||
|  | const PUB_PAGES = /\d{1,5}-+\d{1,5}\s{0,4}\(\d{1,4}/ | ||
|  | 
 | ||
|  | export default function isValidCandidate(candidate, offset, text, leniency) | ||
|  | { | ||
|  | 	// Check the candidate doesn't contain any formatting
 | ||
|  | 	// which would indicate that it really isn't a phone number.
 | ||
|  | 	if (!MATCHING_BRACKETS_ENTIRE.test(candidate) || PUB_PAGES.test(candidate)) { | ||
|  | 		return | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
 | ||
|  | 	// by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
 | ||
|  | 	if (leniency !== 'POSSIBLE') | ||
|  | 	{ | ||
|  | 		// If the candidate is not at the start of the text,
 | ||
|  | 		// and does not start with phone-number punctuation,
 | ||
|  | 		// check the previous character.
 | ||
|  | 		if (offset > 0 && !LEAD_CLASS_LEADING.test(candidate)) | ||
|  | 		{ | ||
|  | 			const previousChar = text[offset - 1] | ||
|  | 			// We return null if it is a latin letter or an invalid punctuation symbol.
 | ||
|  | 			if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) { | ||
|  | 				return false | ||
|  | 			} | ||
|  | 		} | ||
|  | 
 | ||
|  | 		const lastCharIndex = offset + candidate.length | ||
|  | 		if (lastCharIndex < text.length) | ||
|  | 		{ | ||
|  | 			const nextChar = text[lastCharIndex] | ||
|  | 			if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) { | ||
|  | 				return false | ||
|  | 			} | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return true | ||
|  | } |