You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							86 lines
						
					
					
						
							3.0 KiB
						
					
					
				
			
		
		
	
	
							86 lines
						
					
					
						
							3.0 KiB
						
					
					
				// Copy-pasted from `PhoneNumberMatcher.js`.
 | 
						|
 | 
						|
import { PLUS_CHARS } from '../constants.js'
 | 
						|
import { limit } from './util.js'
 | 
						|
 | 
						|
import {
 | 
						|
	isLatinLetter,
 | 
						|
	isInvalidPunctuationSymbol
 | 
						|
} from './utf-8.js'
 | 
						|
 | 
						|
const OPENING_PARENS = '(\\[\uFF08\uFF3B'
 | 
						|
const CLOSING_PARENS = ')\\]\uFF09\uFF3D'
 | 
						|
const NON_PARENS = `[^${OPENING_PARENS}${CLOSING_PARENS}]`
 | 
						|
 | 
						|
export const LEAD_CLASS = `[${OPENING_PARENS}${PLUS_CHARS}]`
 | 
						|
 | 
						|
// Punctuation that may be at the start of a phone number - brackets and plus signs.
 | 
						|
const LEAD_CLASS_LEADING = new RegExp('^' + LEAD_CLASS)
 | 
						|
 | 
						|
// Limit on the number of pairs of brackets in a phone number.
 | 
						|
const BRACKET_PAIR_LIMIT = limit(0, 3)
 | 
						|
 | 
						|
/**
 | 
						|
 * Pattern to check that brackets match. Opening brackets should be closed within a phone number.
 | 
						|
 * This also checks that there is something inside the brackets. Having no brackets at all is also
 | 
						|
 * fine.
 | 
						|
 *
 | 
						|
 * An opening bracket at the beginning may not be closed, but subsequent ones should be.  It's
 | 
						|
 * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a
 | 
						|
 * closing bracket first. We limit the sets of brackets in a phone number to four.
 | 
						|
 */
 | 
						|
const MATCHING_BRACKETS_ENTIRE = new RegExp
 | 
						|
(
 | 
						|
	'^'
 | 
						|
	+ "(?:[" + OPENING_PARENS + "])?" + "(?:" + NON_PARENS + "+" + "[" + CLOSING_PARENS + "])?"
 | 
						|
	+ NON_PARENS + "+"
 | 
						|
	+ "(?:[" + OPENING_PARENS + "]" + NON_PARENS + "+[" + CLOSING_PARENS + "])" + BRACKET_PAIR_LIMIT
 | 
						|
	+ NON_PARENS + "*"
 | 
						|
	+ '$'
 | 
						|
)
 | 
						|
 | 
						|
/**
 | 
						|
 * Matches strings that look like publication pages. Example:
 | 
						|
 * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns.
 | 
						|
 * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre>
 | 
						|
 *
 | 
						|
 * The string "211-227 (2003)" is not a telephone number.
 | 
						|
 */
 | 
						|
const PUB_PAGES = /\d{1,5}-+\d{1,5}\s{0,4}\(\d{1,4}/
 | 
						|
 | 
						|
export default function isValidCandidate(candidate, offset, text, leniency)
 | 
						|
{
 | 
						|
	// Check the candidate doesn't contain any formatting
 | 
						|
	// which would indicate that it really isn't a phone number.
 | 
						|
	if (!MATCHING_BRACKETS_ENTIRE.test(candidate) || PUB_PAGES.test(candidate)) {
 | 
						|
		return
 | 
						|
	}
 | 
						|
 | 
						|
	// If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
 | 
						|
	// by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
 | 
						|
	if (leniency !== 'POSSIBLE')
 | 
						|
	{
 | 
						|
		// If the candidate is not at the start of the text,
 | 
						|
		// and does not start with phone-number punctuation,
 | 
						|
		// check the previous character.
 | 
						|
		if (offset > 0 && !LEAD_CLASS_LEADING.test(candidate))
 | 
						|
		{
 | 
						|
			const previousChar = text[offset - 1]
 | 
						|
			// We return null if it is a latin letter or an invalid punctuation symbol.
 | 
						|
			if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) {
 | 
						|
				return false
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		const lastCharIndex = offset + candidate.length
 | 
						|
		if (lastCharIndex < text.length)
 | 
						|
		{
 | 
						|
			const nextChar = text[lastCharIndex]
 | 
						|
			if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) {
 | 
						|
				return false
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	return true
 | 
						|
} |