You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					70 lines
				
				3.0 KiB
			
		
		
			
		
	
	
					70 lines
				
				3.0 KiB
			| 
											3 years ago
										 | /** Used to compose unicode character classes. */ | ||
|  | var rsAstralRange = '\\ud800-\\udfff', | ||
|  |     rsComboMarksRange = '\\u0300-\\u036f', | ||
|  |     reComboHalfMarksRange = '\\ufe20-\\ufe2f', | ||
|  |     rsComboSymbolsRange = '\\u20d0-\\u20ff', | ||
|  |     rsComboRange = rsComboMarksRange + reComboHalfMarksRange + rsComboSymbolsRange, | ||
|  |     rsDingbatRange = '\\u2700-\\u27bf', | ||
|  |     rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff', | ||
|  |     rsMathOpRange = '\\xac\\xb1\\xd7\\xf7', | ||
|  |     rsNonCharRange = '\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf', | ||
|  |     rsPunctuationRange = '\\u2000-\\u206f', | ||
|  |     rsSpaceRange = ' \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000', | ||
|  |     rsUpperRange = 'A-Z\\xc0-\\xd6\\xd8-\\xde', | ||
|  |     rsVarRange = '\\ufe0e\\ufe0f', | ||
|  |     rsBreakRange = rsMathOpRange + rsNonCharRange + rsPunctuationRange + rsSpaceRange; | ||
|  | 
 | ||
|  | /** Used to compose unicode capture groups. */ | ||
|  | var rsApos = "['\u2019]", | ||
|  |     rsBreak = '[' + rsBreakRange + ']', | ||
|  |     rsCombo = '[' + rsComboRange + ']', | ||
|  |     rsDigits = '\\d+', | ||
|  |     rsDingbat = '[' + rsDingbatRange + ']', | ||
|  |     rsLower = '[' + rsLowerRange + ']', | ||
|  |     rsMisc = '[^' + rsAstralRange + rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange + ']', | ||
|  |     rsFitz = '\\ud83c[\\udffb-\\udfff]', | ||
|  |     rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')', | ||
|  |     rsNonAstral = '[^' + rsAstralRange + ']', | ||
|  |     rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', | ||
|  |     rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', | ||
|  |     rsUpper = '[' + rsUpperRange + ']', | ||
|  |     rsZWJ = '\\u200d'; | ||
|  | 
 | ||
|  | /** Used to compose unicode regexes. */ | ||
|  | var rsMiscLower = '(?:' + rsLower + '|' + rsMisc + ')', | ||
|  |     rsMiscUpper = '(?:' + rsUpper + '|' + rsMisc + ')', | ||
|  |     rsOptContrLower = '(?:' + rsApos + '(?:d|ll|m|re|s|t|ve))?', | ||
|  |     rsOptContrUpper = '(?:' + rsApos + '(?:D|LL|M|RE|S|T|VE))?', | ||
|  |     reOptMod = rsModifier + '?', | ||
|  |     rsOptVar = '[' + rsVarRange + ']?', | ||
|  |     rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*', | ||
|  |     rsOrdLower = '\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])', | ||
|  |     rsOrdUpper = '\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])', | ||
|  |     rsSeq = rsOptVar + reOptMod + rsOptJoin, | ||
|  |     rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq; | ||
|  | 
 | ||
|  | /** Used to match complex or compound words. */ | ||
|  | var reUnicodeWord = RegExp([ | ||
|  |   rsUpper + '?' + rsLower + '+' + rsOptContrLower + '(?=' + [rsBreak, rsUpper, '$'].join('|') + ')', | ||
|  |   rsMiscUpper + '+' + rsOptContrUpper + '(?=' + [rsBreak, rsUpper + rsMiscLower, '$'].join('|') + ')', | ||
|  |   rsUpper + '?' + rsMiscLower + '+' + rsOptContrLower, | ||
|  |   rsUpper + '+' + rsOptContrUpper, | ||
|  |   rsOrdUpper, | ||
|  |   rsOrdLower, | ||
|  |   rsDigits, | ||
|  |   rsEmoji | ||
|  | ].join('|'), 'g'); | ||
|  | 
 | ||
|  | /** | ||
|  |  * Splits a Unicode `string` into an array of its words. | ||
|  |  * | ||
|  |  * @private | ||
|  |  * @param {string} The string to inspect. | ||
|  |  * @returns {Array} Returns the words of `string`. | ||
|  |  */ | ||
|  | function unicodeWords(string) { | ||
|  |   return string.match(reUnicodeWord) || []; | ||
|  | } | ||
|  | 
 | ||
|  | module.exports = unicodeWords; |