You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					86 lines
				
				3.0 KiB
			
		
		
			
		
	
	
					86 lines
				
				3.0 KiB
			| 
								 
											3 years ago
										 
									 | 
							
								// Copy-pasted from `PhoneNumberMatcher.js`.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import { PLUS_CHARS } from '../constants.js'
							 | 
						||
| 
								 | 
							
								import { limit } from './util.js'
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import {
							 | 
						||
| 
								 | 
							
									isLatinLetter,
							 | 
						||
| 
								 | 
							
									isInvalidPunctuationSymbol
							 | 
						||
| 
								 | 
							
								} from './utf-8.js'
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								const OPENING_PARENS = '(\\[\uFF08\uFF3B'
							 | 
						||
| 
								 | 
							
								const CLOSING_PARENS = ')\\]\uFF09\uFF3D'
							 | 
						||
| 
								 | 
							
								const NON_PARENS = `[^${OPENING_PARENS}${CLOSING_PARENS}]`
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								export const LEAD_CLASS = `[${OPENING_PARENS}${PLUS_CHARS}]`
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Punctuation that may be at the start of a phone number - brackets and plus signs.
							 | 
						||
| 
								 | 
							
								const LEAD_CLASS_LEADING = new RegExp('^' + LEAD_CLASS)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Limit on the number of pairs of brackets in a phone number.
							 | 
						||
| 
								 | 
							
								const BRACKET_PAIR_LIMIT = limit(0, 3)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * Pattern to check that brackets match. Opening brackets should be closed within a phone number.
							 | 
						||
| 
								 | 
							
								 * This also checks that there is something inside the brackets. Having no brackets at all is also
							 | 
						||
| 
								 | 
							
								 * fine.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * An opening bracket at the beginning may not be closed, but subsequent ones should be.  It's
							 | 
						||
| 
								 | 
							
								 * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a
							 | 
						||
| 
								 | 
							
								 * closing bracket first. We limit the sets of brackets in a phone number to four.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								const MATCHING_BRACKETS_ENTIRE = new RegExp
							 | 
						||
| 
								 | 
							
								(
							 | 
						||
| 
								 | 
							
									'^'
							 | 
						||
| 
								 | 
							
									+ "(?:[" + OPENING_PARENS + "])?" + "(?:" + NON_PARENS + "+" + "[" + CLOSING_PARENS + "])?"
							 | 
						||
| 
								 | 
							
									+ NON_PARENS + "+"
							 | 
						||
| 
								 | 
							
									+ "(?:[" + OPENING_PARENS + "]" + NON_PARENS + "+[" + CLOSING_PARENS + "])" + BRACKET_PAIR_LIMIT
							 | 
						||
| 
								 | 
							
									+ NON_PARENS + "*"
							 | 
						||
| 
								 | 
							
									+ '$'
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * Matches strings that look like publication pages. Example:
							 | 
						||
| 
								 | 
							
								 * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns.
							 | 
						||
| 
								 | 
							
								 * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre>
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * The string "211-227 (2003)" is not a telephone number.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								const PUB_PAGES = /\d{1,5}-+\d{1,5}\s{0,4}\(\d{1,4}/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								export default function isValidCandidate(candidate, offset, text, leniency)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
									// Check the candidate doesn't contain any formatting
							 | 
						||
| 
								 | 
							
									// which would indicate that it really isn't a phone number.
							 | 
						||
| 
								 | 
							
									if (!MATCHING_BRACKETS_ENTIRE.test(candidate) || PUB_PAGES.test(candidate)) {
							 | 
						||
| 
								 | 
							
										return
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
							 | 
						||
| 
								 | 
							
									// by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
							 | 
						||
| 
								 | 
							
									if (leniency !== 'POSSIBLE')
							 | 
						||
| 
								 | 
							
									{
							 | 
						||
| 
								 | 
							
										// If the candidate is not at the start of the text,
							 | 
						||
| 
								 | 
							
										// and does not start with phone-number punctuation,
							 | 
						||
| 
								 | 
							
										// check the previous character.
							 | 
						||
| 
								 | 
							
										if (offset > 0 && !LEAD_CLASS_LEADING.test(candidate))
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											const previousChar = text[offset - 1]
							 | 
						||
| 
								 | 
							
											// We return null if it is a latin letter or an invalid punctuation symbol.
							 | 
						||
| 
								 | 
							
											if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) {
							 | 
						||
| 
								 | 
							
												return false
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										const lastCharIndex = offset + candidate.length
							 | 
						||
| 
								 | 
							
										if (lastCharIndex < text.length)
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											const nextChar = text[lastCharIndex]
							 | 
						||
| 
								 | 
							
											if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) {
							 | 
						||
| 
								 | 
							
												return false
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									return true
							 | 
						||
| 
								 | 
							
								}
							 |