|
|
|
|
/**
|
|
|
|
|
* Finite State Machine generation utilities
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Define a basic state machine state. j is the list of character transitions,
|
|
|
|
|
* jr is the list of regex-match transitions, jd is the default state to
|
|
|
|
|
* transition to t is the accepting token type, if any. If this is the terminal
|
|
|
|
|
* state, then it does not emit a token.
|
|
|
|
|
* @param {string|class} token to emit
|
|
|
|
|
*/
|
|
|
|
|
function State(token) {
|
|
|
|
|
this.j = {}; // IMPLEMENTATION 1
|
|
|
|
|
// this.j = []; // IMPLEMENTATION 2
|
|
|
|
|
|
|
|
|
|
this.jr = [];
|
|
|
|
|
this.jd = null;
|
|
|
|
|
this.t = token;
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
* Take the transition from this state to the next one on the given input.
|
|
|
|
|
* If this state does not exist deterministically, will create it.
|
|
|
|
|
*
|
|
|
|
|
* @param {string} input character or token to transition on
|
|
|
|
|
* @param {string|class} [token] token or multi-token to emit when reaching
|
|
|
|
|
* this state
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
State.prototype = {
|
|
|
|
|
/**
|
|
|
|
|
* @param {State} state
|
|
|
|
|
*/
|
|
|
|
|
accepts: function accepts() {
|
|
|
|
|
return !!this.t;
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Short for "take transition", this is a method for building/working with
|
|
|
|
|
* state machines.
|
|
|
|
|
*
|
|
|
|
|
* If a state already exists for the given input, returns it.
|
|
|
|
|
*
|
|
|
|
|
* If a token is specified, that state will emit that token when reached by
|
|
|
|
|
* the linkify engine.
|
|
|
|
|
*
|
|
|
|
|
* If no state exists, it will be initialized with some default transitions
|
|
|
|
|
* that resemble existing default transitions.
|
|
|
|
|
*
|
|
|
|
|
* If a state is given for the second argument, that state will be
|
|
|
|
|
* transitioned to on the given input regardless of what that input
|
|
|
|
|
* previously did.
|
|
|
|
|
*
|
|
|
|
|
* @param {string} input character or token to transition on
|
|
|
|
|
* @param {Token|State} tokenOrState transition to a matching state
|
|
|
|
|
* @returns State taken after the given input
|
|
|
|
|
*/
|
|
|
|
|
tt: function tt(input, tokenOrState) {
|
|
|
|
|
if (tokenOrState && tokenOrState.j) {
|
|
|
|
|
// State, default a basic transition
|
|
|
|
|
this.j[input] = tokenOrState;
|
|
|
|
|
return tokenOrState;
|
|
|
|
|
} // See if there's a direct state transition (not regex or default)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var token = tokenOrState;
|
|
|
|
|
var nextState = this.j[input];
|
|
|
|
|
|
|
|
|
|
if (nextState) {
|
|
|
|
|
if (token) {
|
|
|
|
|
nextState.t = token;
|
|
|
|
|
} // overrwites previous token
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return nextState;
|
|
|
|
|
} // Create a new state for this input
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
nextState = makeState(); // Take the transition using the usual default mechanisms
|
|
|
|
|
|
|
|
|
|
var templateState = takeT(this, input);
|
|
|
|
|
|
|
|
|
|
if (templateState) {
|
|
|
|
|
// Some default state transition, make a prime state based on this one
|
|
|
|
|
Object.assign(nextState.j, templateState.j);
|
|
|
|
|
nextState.jr.append(templateState.jr);
|
|
|
|
|
nextState.jr = templateState.jd;
|
|
|
|
|
nextState.t = token || templateState.t;
|
|
|
|
|
} else {
|
|
|
|
|
nextState.t = token;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this.j[input] = nextState;
|
|
|
|
|
return nextState;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
/**
|
|
|
|
|
* Utility function to create state without using new keyword (reduced file size
|
|
|
|
|
* when minified)
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var makeState = function makeState() {
|
|
|
|
|
return new State();
|
|
|
|
|
};
|
|
|
|
|
/**
|
|
|
|
|
* Similar to previous except it is an accepting state that emits a token
|
|
|
|
|
* @param {Token} token
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var makeAcceptingState = function makeAcceptingState(token) {
|
|
|
|
|
return new State(token);
|
|
|
|
|
};
|
|
|
|
|
/**
|
|
|
|
|
* Create a transition from startState to nextState via the given character
|
|
|
|
|
* @param {State} startState transition from thie starting state
|
|
|
|
|
* @param {Token} input via this input character or other concrete token type
|
|
|
|
|
* @param {State} nextState to this next state
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var makeT = function makeT(startState, input, nextState) {
|
|
|
|
|
// IMPLEMENTATION 1: Add to object (fast)
|
|
|
|
|
if (!startState.j[input]) {
|
|
|
|
|
startState.j[input] = nextState;
|
|
|
|
|
} // IMPLEMENTATION 2: Add to array (slower)
|
|
|
|
|
// startState.j.push([input, nextState]);
|
|
|
|
|
|
|
|
|
|
};
|
|
|
|
|
/**
|
|
|
|
|
*
|
|
|
|
|
* @param {State} startState stransition from this starting state
|
|
|
|
|
* @param {RegExp} regex Regular expression to match on input
|
|
|
|
|
* @param {State} nextState transition to this next state if there's are regex match
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var makeRegexT = function makeRegexT(startState, regex, nextState) {
|
|
|
|
|
startState.jr.push([regex, nextState]);
|
|
|
|
|
};
|
|
|
|
|
/**
|
|
|
|
|
* Follow the transition from the given character to the next state
|
|
|
|
|
* @param {State} state
|
|
|
|
|
* @param {Token} input character or other concrete token type to transition
|
|
|
|
|
* @returns {?State} the next state, if any
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var takeT = function takeT(state, input) {
|
|
|
|
|
// IMPLEMENTATION 1: Object key lookup (faster)
|
|
|
|
|
var nextState = state.j[input];
|
|
|
|
|
|
|
|
|
|
if (nextState) {
|
|
|
|
|
return nextState;
|
|
|
|
|
} // IMPLEMENTATION 2: List lookup (slower)
|
|
|
|
|
// Loop through all the state transitions and see if there's a match
|
|
|
|
|
// for (let i = 0; i < state.j.length; i++) {
|
|
|
|
|
// const val = state.j[i][0];
|
|
|
|
|
// const nextState = state.j[i][1];
|
|
|
|
|
// if (input === val) { return nextState; }
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (var i = 0; i < state.jr.length; i++) {
|
|
|
|
|
var regex = state.jr[i][0];
|
|
|
|
|
var _nextState = state.jr[i][1];
|
|
|
|
|
|
|
|
|
|
if (regex.test(input)) {
|
|
|
|
|
return _nextState;
|
|
|
|
|
}
|
|
|
|
|
} // Nowhere left to jump! Return default, if any
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return state.jd;
|
|
|
|
|
};
|
|
|
|
|
/**
|
|
|
|
|
* Similar to makeT, but takes a list of characters that all transition to the
|
|
|
|
|
* same nextState startState
|
|
|
|
|
* @param {State} startState
|
|
|
|
|
* @param {Array} chars
|
|
|
|
|
* @param {State} nextState
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var makeMultiT = function makeMultiT(startState, chars, nextState) {
|
|
|
|
|
for (var i = 0; i < chars.length; i++) {
|
|
|
|
|
makeT(startState, chars[i], nextState);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
/**
|
|
|
|
|
* Set up a list of multiple transitions at once. transitions is a list of
|
|
|
|
|
* tuples, where the first element is the transitions character and the second
|
|
|
|
|
* is the state to transition to
|
|
|
|
|
* @param {State} startState
|
|
|
|
|
* @param {Array} transitions
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var makeBatchT = function makeBatchT(startState, transitions) {
|
|
|
|
|
for (var i = 0; i < transitions.length; i++) {
|
|
|
|
|
var input = transitions[i][0];
|
|
|
|
|
var nextState = transitions[i][1];
|
|
|
|
|
makeT(startState, input, nextState);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
/**
|
|
|
|
|
* For state machines that transition on characters only; given a non-empty
|
|
|
|
|
* target string, generates states (if required) for each consecutive substring
|
|
|
|
|
* of characters starting from the beginning of the string. The final state will
|
|
|
|
|
* have a special value, as specified in options. All other "in between"
|
|
|
|
|
* substrings will have a default end state.
|
|
|
|
|
*
|
|
|
|
|
* This turns the state machine into a Trie-like data structure (rather than a
|
|
|
|
|
* intelligently-designed DFA).
|
|
|
|
|
* @param {State} state
|
|
|
|
|
* @param {string} str
|
|
|
|
|
* @param {Token} endStateFactory
|
|
|
|
|
* @param {Token} defaultStateFactory
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var makeChainT = function makeChainT(state, str, endState, defaultStateFactory) {
|
|
|
|
|
var i = 0,
|
|
|
|
|
len = str.length,
|
|
|
|
|
nextState; // Find the next state without a jump to the next character
|
|
|
|
|
|
|
|
|
|
while (i < len && (nextState = state.j[str[i]])) {
|
|
|
|
|
state = nextState;
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (i >= len) {
|
|
|
|
|
return [];
|
|
|
|
|
} // no new tokens were added
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while (i < len - 1) {
|
|
|
|
|
nextState = defaultStateFactory();
|
|
|
|
|
makeT(state, str[i], nextState);
|
|
|
|
|
state = nextState;
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
makeT(state, str[len - 1], endState);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/******************************************************************************
|
|
|
|
|
Text Tokens
|
|
|
|
|
Tokens composed of strings
|
|
|
|
|
******************************************************************************/
|
|
|
|
|
// A valid web domain token
|
|
|
|
|
var DOMAIN = 'DOMAIN';
|
|
|
|
|
var LOCALHOST = 'LOCALHOST'; // special case of domain
|
|
|
|
|
// Valid top-level domain (see tlds.js)
|
|
|
|
|
|
|
|
|
|
var TLD = 'TLD'; // Any sequence of digits 0-9
|
|
|
|
|
|
|
|
|
|
var NUM = 'NUM'; // A web URL protocol. Supported types include
|
|
|
|
|
// - `http:`
|
|
|
|
|
// - `https:`
|
|
|
|
|
// - `ftp:`
|
|
|
|
|
// - `ftps:`
|
|
|
|
|
// - user-defined custom protocols
|
|
|
|
|
|
|
|
|
|
var PROTOCOL = 'PROTOCOL'; // Start of the email URI protocol
|
|
|
|
|
|
|
|
|
|
var MAILTO = 'MAILTO'; // mailto:
|
|
|
|
|
// Any number of consecutive whitespace characters that are not newline
|
|
|
|
|
|
|
|
|
|
var WS = 'WS'; // New line (unix style)
|
|
|
|
|
|
|
|
|
|
var NL = 'NL'; // \n
|
|
|
|
|
// Opening/closing bracket classes
|
|
|
|
|
|
|
|
|
|
var OPENBRACE = 'OPENBRACE'; // {
|
|
|
|
|
|
|
|
|
|
var OPENBRACKET = 'OPENBRACKET'; // [
|
|
|
|
|
|
|
|
|
|
var OPENANGLEBRACKET = 'OPENANGLEBRACKET'; // <
|
|
|
|
|
|
|
|
|
|
var OPENPAREN = 'OPENPAREN'; // (
|
|
|
|
|
|
|
|
|
|
var CLOSEBRACE = 'CLOSEBRACE'; // }
|
|
|
|
|
|
|
|
|
|
var CLOSEBRACKET = 'CLOSEBRACKET'; // ]
|
|
|
|
|
|
|
|
|
|
var CLOSEANGLEBRACKET = 'CLOSEANGLEBRACKET'; // >
|
|
|
|
|
|
|
|
|
|
var CLOSEPAREN = 'CLOSEPAREN'; // )
|
|
|
|
|
// Various symbols
|
|
|
|
|
|
|
|
|
|
var AMPERSAND = 'AMPERSAND'; // &
|
|
|
|
|
|
|
|
|
|
var APOSTROPHE = 'APOSTROPHE'; // '
|
|
|
|
|
|
|
|
|
|
var ASTERISK = 'ASTERISK'; // *
|
|
|
|
|
|
|
|
|
|
var AT = 'AT'; // @
|
|
|
|
|
|
|
|
|
|
var BACKSLASH = 'BACKSLASH'; // \
|
|
|
|
|
|
|
|
|
|
var BACKTICK = 'BACKTICK'; // `
|
|
|
|
|
|
|
|
|
|
var CARET = 'CARET'; // ^
|
|
|
|
|
|
|
|
|
|
var COLON = 'COLON'; // :
|
|
|
|
|
|
|
|
|
|
var COMMA = 'COMMA'; // ,
|
|
|
|
|
|
|
|
|
|
var DOLLAR = 'DOLLAR'; // $
|
|
|
|
|
|
|
|
|
|
var DOT = 'DOT'; // .
|
|
|
|
|
|
|
|
|
|
var EQUALS = 'EQUALS'; // =
|
|
|
|
|
|
|
|
|
|
var EXCLAMATION = 'EXCLAMATION'; // !
|
|
|
|
|
|
|
|
|
|
var HYPHEN = 'HYPHEN'; // -
|
|
|
|
|
|
|
|
|
|
var PERCENT = 'PERCENT'; // %
|
|
|
|
|
|
|
|
|
|
var PIPE = 'PIPE'; // |
|
|
|
|
|
|
|
|
|
|
var PLUS = 'PLUS'; // +
|
|
|
|
|
|
|
|
|
|
var POUND = 'POUND'; // #
|
|
|
|
|
|
|
|
|
|
var QUERY = 'QUERY'; // ?
|
|
|
|
|
|
|
|
|
|
var QUOTE = 'QUOTE'; // "
|
|
|
|
|
|
|
|
|
|
var SEMI = 'SEMI'; // ;
|
|
|
|
|
|
|
|
|
|
var SLASH = 'SLASH'; // /
|
|
|
|
|
|
|
|
|
|
var TILDE = 'TILDE'; // ~
|
|
|
|
|
|
|
|
|
|
var UNDERSCORE = 'UNDERSCORE'; // _
|
|
|
|
|
// Default token - anything that is not one of the above
|
|
|
|
|
|
|
|
|
|
var SYM = 'SYM';
|
|
|
|
|
|
|
|
|
|
var text = /*#__PURE__*/Object.freeze({
|
|
|
|
|
__proto__: null,
|
|
|
|
|
DOMAIN: DOMAIN,
|
|
|
|
|
LOCALHOST: LOCALHOST,
|
|
|
|
|
TLD: TLD,
|
|
|
|
|
NUM: NUM,
|
|
|
|
|
PROTOCOL: PROTOCOL,
|
|
|
|
|
MAILTO: MAILTO,
|
|
|
|
|
WS: WS,
|
|
|
|
|
NL: NL,
|
|
|
|
|
OPENBRACE: OPENBRACE,
|
|
|
|
|
OPENBRACKET: OPENBRACKET,
|
|
|
|
|
OPENANGLEBRACKET: OPENANGLEBRACKET,
|
|
|
|
|
OPENPAREN: OPENPAREN,
|
|
|
|
|
CLOSEBRACE: CLOSEBRACE,
|
|
|
|
|
CLOSEBRACKET: CLOSEBRACKET,
|
|
|
|
|
CLOSEANGLEBRACKET: CLOSEANGLEBRACKET,
|
|
|
|
|
CLOSEPAREN: CLOSEPAREN,
|
|
|
|
|
AMPERSAND: AMPERSAND,
|
|
|
|
|
APOSTROPHE: APOSTROPHE,
|
|
|
|
|
ASTERISK: ASTERISK,
|
|
|
|
|
AT: AT,
|
|
|
|
|
BACKSLASH: BACKSLASH,
|
|
|
|
|
BACKTICK: BACKTICK,
|
|
|
|
|
CARET: CARET,
|
|
|
|
|
COLON: COLON,
|
|
|
|
|
COMMA: COMMA,
|
|
|
|
|
DOLLAR: DOLLAR,
|
|
|
|
|
DOT: DOT,
|
|
|
|
|
EQUALS: EQUALS,
|
|
|
|
|
EXCLAMATION: EXCLAMATION,
|
|
|
|
|
HYPHEN: HYPHEN,
|
|
|
|
|
PERCENT: PERCENT,
|
|
|
|
|
PIPE: PIPE,
|
|
|
|
|
PLUS: PLUS,
|
|
|
|
|
POUND: POUND,
|
|
|
|
|
QUERY: QUERY,
|
|
|
|
|
QUOTE: QUOTE,
|
|
|
|
|
SEMI: SEMI,
|
|
|
|
|
SLASH: SLASH,
|
|
|
|
|
TILDE: TILDE,
|
|
|
|
|
UNDERSCORE: UNDERSCORE,
|
|
|
|
|
SYM: SYM
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// NOTE: punycode versions of IDNs are not included here because these will not
|
|
|
|
|
// be as commonly used without the http prefix anyway and linkify will already
|
|
|
|
|
// force-encode those.
|
|
|
|
|
// To be updated with the values in this list
|
|
|
|
|
// http://data.iana.org/TLD/tlds-alpha-by-domain.txt
|
|
|
|
|
// Version 2021022800, Last Updated Sun Feb 28 07:07:01 2021 UTC
|
|
|
|
|
var tlds = 'aaa \
|
|
|
|
|
aarp \
|
|
|
|
|
abarth \
|
|
|
|
|
abb \
|
|
|
|
|
abbott \
|
|
|
|
|
abbvie \
|
|
|
|
|
abc \
|
|
|
|
|
able \
|
|
|
|
|
abogado \
|
|
|
|
|
abudhabi \
|
|
|
|
|
ac \
|
|
|
|
|
academy \
|
|
|
|
|
accenture \
|
|
|
|
|
accountant \
|
|
|
|
|
accountants \
|
|
|
|
|
aco \
|
|
|
|
|
actor \
|
|
|
|
|
ad \
|
|
|
|
|
adac \
|
|
|
|
|
ads \
|
|
|
|
|
adult \
|
|
|
|
|
ae \
|
|
|
|
|
aeg \
|
|
|
|
|
aero \
|
|
|
|
|
aetna \
|
|
|
|
|
af \
|
|
|
|
|
afamilycompany \
|
|
|
|
|
afl \
|
|
|
|
|
africa \
|
|
|
|
|
ag \
|
|
|
|
|
agakhan \
|
|
|
|
|
agency \
|
|
|
|
|
ai \
|
|
|
|
|
aig \
|
|
|
|
|
airbus \
|
|
|
|
|
airforce \
|
|
|
|
|
airtel \
|
|
|
|
|
akdn \
|
|
|
|
|
al \
|
|
|
|
|
alfaromeo \
|
|
|
|
|
alibaba \
|
|
|
|
|
alipay \
|
|
|
|
|
allfinanz \
|
|
|
|
|
allstate \
|
|
|
|
|
ally \
|
|
|
|
|
alsace \
|
|
|
|
|
alstom \
|
|
|
|
|
am \
|
|
|
|
|
amazon \
|
|
|
|
|
americanexpress \
|
|
|
|
|
americanfamily \
|
|
|
|
|
amex \
|
|
|
|
|
amfam \
|
|
|
|
|
amica \
|
|
|
|
|
amsterdam \
|
|
|
|
|
analytics \
|
|
|
|
|
android \
|
|
|
|
|
anquan \
|
|
|
|
|
anz \
|
|
|
|
|
ao \
|
|
|
|
|
aol \
|
|
|
|
|
apartments \
|
|
|
|
|
app \
|
|
|
|
|
apple \
|
|
|
|
|
aq \
|
|
|
|
|
aquarelle \
|
|
|
|
|
ar \
|
|
|
|
|
arab \
|
|
|
|
|
aramco \
|
|
|
|
|
archi \
|
|
|
|
|
army \
|
|
|
|
|
arpa \
|
|
|
|
|
art \
|
|
|
|
|
arte \
|
|
|
|
|
as \
|
|
|
|
|
asda \
|
|
|
|
|
asia \
|
|
|
|
|
associates \
|
|
|
|
|
at \
|
|
|
|
|
athleta \
|
|
|
|
|
attorney \
|
|
|
|
|
au \
|
|
|
|
|
auction \
|
|
|
|
|
audi \
|
|
|
|
|
audible \
|
|
|
|
|
audio \
|
|
|
|
|
auspost \
|
|
|
|
|
author \
|
|
|
|
|
auto \
|
|
|
|
|
autos \
|
|
|
|
|
avianca \
|
|
|
|
|
aw \
|
|
|
|
|
aws \
|
|
|
|
|
ax \
|
|
|
|
|
axa \
|
|
|
|
|
az \
|
|
|
|
|
azure \
|
|
|
|
|
ba \
|
|
|
|
|
baby \
|
|
|
|
|
baidu \
|
|
|
|
|
banamex \
|
|
|
|
|
bananarepublic \
|
|
|
|
|
band \
|
|
|
|
|
bank \
|
|
|
|
|
bar \
|
|
|
|
|
barcelona \
|
|
|
|
|
barclaycard \
|
|
|
|
|
barclays \
|
|
|
|
|
barefoot \
|
|
|
|
|
bargains \
|
|
|
|
|
baseball \
|
|
|
|
|
basketball \
|
|
|
|
|
bauhaus \
|
|
|
|
|
bayern \
|
|
|
|
|
bb \
|
|
|
|
|
bbc \
|
|
|
|
|
bbt \
|
|
|
|
|
bbva \
|
|
|
|
|
bcg \
|
|
|
|
|
bcn \
|
|
|
|
|
bd \
|
|
|
|
|
be \
|
|
|
|
|
beats \
|
|
|
|
|
beauty \
|
|
|
|
|
beer \
|
|
|
|
|
bentley \
|
|
|
|
|
berlin \
|
|
|
|
|
best \
|
|
|
|
|
bestbuy \
|
|
|
|
|
bet \
|
|
|
|
|
bf \
|
|
|
|
|
bg \
|
|
|
|
|
bh \
|
|
|
|
|
bharti \
|
|
|
|
|
bi \
|
|
|
|
|
bible \
|
|
|
|
|
bid \
|
|
|
|
|
bike \
|
|
|
|
|
bing \
|
|
|
|
|
bingo \
|
|
|
|
|
bio \
|
|
|
|
|
biz \
|
|
|
|
|
bj \
|
|
|
|
|
black \
|
|
|
|
|
blackfriday \
|
|
|
|
|
blockbuster \
|
|
|
|
|
blog \
|
|
|
|
|
bloomberg \
|
|
|
|
|
blue \
|
|
|
|
|
bm \
|
|
|
|
|
bms \
|
|
|
|
|
bmw \
|
|
|
|
|
bn \
|
|
|
|
|
bnpparibas \
|
|
|
|
|
bo \
|
|
|
|
|
boats \
|
|
|
|
|
boehringer \
|
|
|
|
|
bofa \
|
|
|
|
|
bom \
|
|
|
|
|
bond \
|
|
|
|
|
boo \
|
|
|
|
|
book \
|
|
|
|
|
booking \
|
|
|
|
|
bosch \
|
|
|
|
|
bostik \
|
|
|
|
|
boston \
|
|
|
|
|
bot \
|
|
|
|
|
boutique \
|
|
|
|
|
box \
|
|
|
|
|
br \
|
|
|
|
|
bradesco \
|
|
|
|
|
bridgestone \
|
|
|
|
|
broadway \
|
|
|
|
|
broker \
|
|
|
|
|
brother \
|
|
|
|
|
brussels \
|
|
|
|
|
bs \
|
|
|
|
|
bt \
|
|
|
|
|
budapest \
|
|
|
|
|
bugatti \
|
|
|
|
|
build \
|
|
|
|
|
builders \
|
|
|
|
|
business \
|
|
|
|
|
buy \
|
|
|
|
|
buzz \
|
|
|
|
|
bv \
|
|
|
|
|
bw \
|
|
|
|
|
by \
|
|
|
|
|
bz \
|
|
|
|
|
bzh \
|
|
|
|
|
ca \
|
|
|
|
|
cab \
|
|
|
|
|
cafe \
|
|
|
|
|
cal \
|
|
|
|
|
call \
|
|
|
|
|
calvinklein \
|
|
|
|
|
cam \
|
|
|
|
|
camera \
|
|
|
|
|
camp \
|
|
|
|
|
cancerresearch \
|
|
|
|
|
canon \
|
|
|
|
|
capetown \
|
|
|
|
|
capital \
|
|
|
|
|
capitalone \
|
|
|
|
|
car \
|
|
|
|
|
caravan \
|
|
|
|
|
cards \
|
|
|
|
|
care \
|
|
|
|
|
career \
|
|
|
|
|
careers \
|
|
|
|
|
cars \
|
|
|
|
|
casa \
|
|
|
|
|
case \
|
|
|
|
|
cash \
|
|
|
|
|
casino \
|
|
|
|
|
cat \
|
|
|
|
|
catering \
|
|
|
|
|
catholic \
|
|
|
|
|
cba \
|
|
|
|
|
cbn \
|
|
|
|
|
cbre \
|
|
|
|
|
cbs \
|
|
|
|
|
cc \
|
|
|
|
|
cd \
|
|
|
|
|
center \
|
|
|
|
|
ceo \
|
|
|
|
|
cern \
|
|
|
|
|
cf \
|
|
|
|
|
cfa \
|
|
|
|
|
cfd \
|
|
|
|
|
cg \
|
|
|
|
|
ch \
|
|
|
|
|
chanel \
|
|
|
|
|
channel \
|
|
|
|
|
charity \
|
|
|
|
|
chase \
|
|
|
|
|
chat \
|
|
|
|
|
cheap \
|
|
|
|
|
chintai \
|
|
|
|
|
christmas \
|
|
|
|
|
chrome \
|
|
|
|
|
church \
|
|
|
|
|
ci \
|
|
|
|
|
cipriani \
|
|
|
|
|
circle \
|
|
|
|
|
cisco \
|
|
|
|
|
citadel \
|
|
|
|
|
citi \
|
|
|
|
|
citic \
|
|
|
|
|
city \
|
|
|
|
|
cityeats \
|
|
|
|
|
ck \
|
|
|
|
|
cl \
|
|
|
|
|
claims \
|
|
|
|
|
cleaning \
|
|
|
|
|
click \
|
|
|
|
|
clinic \
|
|
|
|
|
clinique \
|
|
|
|
|
clothing \
|
|
|
|
|
cloud \
|
|
|
|
|
club \
|
|
|
|
|
clubmed \
|
|
|
|
|
cm \
|
|
|
|
|
cn \
|
|
|
|
|
co \
|
|
|
|
|
coach \
|
|
|
|
|
codes \
|
|
|
|
|
coffee \
|
|
|
|
|
college \
|
|
|
|
|
cologne \
|
|
|
|
|
com \
|
|
|
|
|
comcast \
|
|
|
|
|
commbank \
|
|
|
|
|
community \
|
|
|
|
|
company \
|
|
|
|
|
compare \
|
|
|
|
|
computer \
|
|
|
|
|
comsec \
|
|
|
|
|
condos \
|
|
|
|
|
construction \
|
|
|
|
|
consulting \
|
|
|
|
|
contact \
|
|
|
|
|
contractors \
|
|
|
|
|
cooking \
|
|
|
|
|
cookingchannel \
|
|
|
|
|
cool \
|
|
|
|
|
coop \
|
|
|
|
|
corsica \
|
|
|
|
|
country \
|
|
|
|
|
coupon \
|
|
|
|
|
coupons \
|
|
|
|
|
courses \
|
|
|
|
|
cpa \
|
|
|
|
|
cr \
|
|
|
|
|
credit \
|
|
|
|
|
creditcard \
|
|
|
|
|
creditunion \
|
|
|
|
|
cricket \
|
|
|
|
|
crown \
|
|
|
|
|
crs \
|
|
|
|
|
cruise \
|
|
|
|
|
cruises \
|
|
|
|
|
csc \
|
|
|
|
|
cu \
|
|
|
|
|
cuisinella \
|
|
|
|
|
cv \
|
|
|
|
|
cw \
|
|
|
|
|
cx \
|
|
|
|
|
cy \
|
|
|
|
|
cymru \
|
|
|
|
|
cyou \
|
|
|
|
|
cz \
|
|
|
|
|
dabur \
|
|
|
|
|
dad \
|
|
|
|
|
dance \
|
|
|
|
|
data \
|
|
|
|
|
date \
|
|
|
|
|
dating \
|
|
|
|
|
datsun \
|
|
|
|
|
day \
|
|
|
|
|
dclk \
|
|
|
|
|
dds \
|
|
|
|
|
de \
|
|
|
|
|
deal \
|
|
|
|
|
dealer \
|
|
|
|
|
deals \
|
|
|
|
|
degree \
|
|
|
|
|
delivery \
|
|
|
|
|
dell \
|
|
|
|
|
deloitte \
|
|
|
|
|
delta \
|
|
|
|
|
democrat \
|
|
|
|
|
dental \
|
|
|
|
|
dentist \
|
|
|
|
|
desi \
|
|
|
|
|
design \
|
|
|
|
|
dev \
|
|
|
|
|
dhl \
|
|
|
|
|
diamonds \
|
|
|
|
|
diet \
|
|
|
|
|
digital \
|
|
|
|
|
direct \
|
|
|
|
|
directory \
|
|
|
|
|
discount \
|
|
|
|
|
discover \
|
|
|
|
|
dish \
|
|
|
|
|
diy \
|
|
|
|
|
dj \
|
|
|
|
|
dk \
|
|
|
|
|
dm \
|
|
|
|
|
dnp \
|
|
|
|
|
do \
|
|
|
|
|
docs \
|
|
|
|
|
doctor \
|
|
|
|
|
dog \
|
|
|
|
|
domains \
|
|
|
|
|
dot \
|
|
|
|
|
download \
|
|
|
|
|
drive \
|
|
|
|
|
dtv \
|
|
|
|
|
dubai \
|
|
|
|
|
duck \
|
|
|
|
|
dunlop \
|
|
|
|
|
dupont \
|
|
|
|
|
durban \
|
|
|
|
|
dvag \
|
|
|
|
|
dvr \
|
|
|
|
|
dz \
|
|
|
|
|
earth \
|
|
|
|
|
eat \
|
|
|
|
|
ec \
|
|
|
|
|
eco \
|
|
|
|
|
edeka \
|
|
|
|
|
edu \
|
|
|
|
|
education \
|
|
|
|
|
ee \
|
|
|
|
|
eg \
|
|
|
|
|
email \
|
|
|
|
|
emerck \
|
|
|
|
|
energy \
|
|
|
|
|
engineer \
|
|
|
|
|
engineering \
|
|
|
|
|
enterprises \
|
|
|
|
|
epson \
|
|
|
|
|
equipment \
|
|
|
|
|
er \
|
|
|
|
|
ericsson \
|
|
|
|
|
erni \
|
|
|
|
|
es \
|
|
|
|
|
esq \
|
|
|
|
|
estate \
|
|
|
|
|
et \
|
|
|
|
|
etisalat \
|
|
|
|
|
eu \
|
|
|
|
|
eurovision \
|
|
|
|
|
eus \
|
|
|
|
|
events \
|
|
|
|
|
exchange \
|
|
|
|
|
expert \
|
|
|
|
|
exposed \
|
|
|
|
|
express \
|
|
|
|
|
extraspace \
|
|
|
|
|
fage \
|
|
|
|
|
fail \
|
|
|
|
|
fairwinds \
|
|
|
|
|
faith \
|
|
|
|
|
family \
|
|
|
|
|
fan \
|
|
|
|
|
fans \
|
|
|
|
|
farm \
|
|
|
|
|
farmers \
|
|
|
|
|
fashion \
|
|
|
|
|
fast \
|
|
|
|
|
fedex \
|
|
|
|
|
feedback \
|
|
|
|
|
ferrari \
|
|
|
|
|
ferrero \
|
|
|
|
|
fi \
|
|
|
|
|
fiat \
|
|
|
|
|
fidelity \
|
|
|
|
|
fido \
|
|
|
|
|
film \
|
|
|
|
|
final \
|
|
|
|
|
finance \
|
|
|
|
|
financial \
|
|
|
|
|
fire \
|
|
|
|
|
firestone \
|
|
|
|
|
firmdale \
|
|
|
|
|
fish \
|
|
|
|
|
fishing \
|
|
|
|
|
fit \
|
|
|
|
|
fitness \
|
|
|
|
|
fj \
|
|
|
|
|
fk \
|
|
|
|
|
flickr \
|
|
|
|
|
flights \
|
|
|
|
|
flir \
|
|
|
|
|
florist \
|
|
|
|
|
flowers \
|
|
|
|
|
fly \
|
|
|
|
|
fm \
|
|
|
|
|
fo \
|
|
|
|
|
foo \
|
|
|
|
|
food \
|
|
|
|
|
foodnetwork \
|
|
|
|
|
football \
|
|
|
|
|
ford \
|
|
|
|
|
forex \
|
|
|
|
|
forsale \
|
|
|
|
|
forum \
|
|
|
|
|
foundation \
|
|
|
|
|
fox \
|
|
|
|
|
fr \
|
|
|
|
|
free \
|
|
|
|
|
fresenius \
|
|
|
|
|
frl \
|
|
|
|
|
frogans \
|
|
|
|
|
frontdoor \
|
|
|
|
|
frontier \
|
|
|
|
|
ftr \
|
|
|
|
|
fujitsu \
|
|
|
|
|
fujixerox \
|
|
|
|
|
fun \
|
|
|
|
|
fund \
|
|
|
|
|
furniture \
|
|
|
|
|
futbol \
|
|
|
|
|
fyi \
|
|
|
|
|
ga \
|
|
|
|
|
gal \
|
|
|
|
|
gallery \
|
|
|
|
|
gallo \
|
|
|
|
|
gallup \
|
|
|
|
|
game \
|
|
|
|
|
games \
|
|
|
|
|
gap \
|
|
|
|
|
garden \
|
|
|
|
|
gay \
|
|
|
|
|
gb \
|
|
|
|
|
gbiz \
|
|
|
|
|
gd \
|
|
|
|
|
gdn \
|
|
|
|
|
ge \
|
|
|
|
|
gea \
|
|
|
|
|
gent \
|
|
|
|
|
genting \
|
|
|
|
|
george \
|
|
|
|
|
gf \
|
|
|
|
|
gg \
|
|
|
|
|
ggee \
|
|
|
|
|
gh \
|
|
|
|
|
gi \
|
|
|
|
|
gift \
|
|
|
|
|
gifts \
|
|
|
|
|
gives \
|
|
|
|
|
giving \
|
|
|
|
|
gl \
|
|
|
|
|
glade \
|
|
|
|
|
glass \
|
|
|
|
|
gle \
|
|
|
|
|
global \
|
|
|
|
|
globo \
|
|
|
|
|
gm \
|
|
|
|
|
gmail \
|
|
|
|
|
gmbh \
|
|
|
|
|
gmo \
|
|
|
|
|
gmx \
|
|
|
|
|
gn \
|
|
|
|
|
godaddy \
|
|
|
|
|
gold \
|
|
|
|
|
goldpoint \
|
|
|
|
|
golf \
|
|
|
|
|
goo \
|
|
|
|
|
goodyear \
|
|
|
|
|
goog \
|
|
|
|
|
google \
|
|
|
|
|
gop \
|
|
|
|
|
got \
|
|
|
|
|
gov \
|
|
|
|
|
gp \
|
|
|
|
|
gq \
|
|
|
|
|
gr \
|
|
|
|
|
grainger \
|
|
|
|
|
graphics \
|
|
|
|
|
gratis \
|
|
|
|
|
green \
|
|
|
|
|
gripe \
|
|
|
|
|
grocery \
|
|
|
|
|
group \
|
|
|
|
|
gs \
|
|
|
|
|
gt \
|
|
|
|
|
gu \
|
|
|
|
|
guardian \
|
|
|
|
|
gucci \
|
|
|
|
|
guge \
|
|
|
|
|
guide \
|
|
|
|
|
guitars \
|
|
|
|
|
guru \
|
|
|
|
|
gw \
|
|
|
|
|
gy \
|
|
|
|
|
hair \
|
|
|
|
|
hamburg \
|
|
|
|
|
hangout \
|
|
|
|
|
haus \
|
|
|
|
|
hbo \
|
|
|
|
|
hdfc \
|
|
|
|
|
hdfcbank \
|
|
|
|
|
health \
|
|
|
|
|
healthcare \
|
|
|
|
|
help \
|
|
|
|
|
helsinki \
|
|
|
|
|
here \
|
|
|
|
|
hermes \
|
|
|
|
|
hgtv \
|
|
|
|
|
hiphop \
|
|
|
|
|
hisamitsu \
|
|
|
|
|
hitachi \
|
|
|
|
|
hiv \
|
|
|
|
|
hk \
|
|
|
|
|
hkt \
|
|
|
|
|
hm \
|
|
|
|
|
hn \
|
|
|
|
|
hockey \
|
|
|
|
|
holdings \
|
|
|
|
|
holiday \
|
|
|
|
|
homedepot \
|
|
|
|
|
homegoods \
|
|
|
|
|
homes \
|
|
|
|
|
homesense \
|
|
|
|
|
honda \
|
|
|
|
|
horse \
|
|
|
|
|
hospital \
|
|
|
|
|
host \
|
|
|
|
|
hosting \
|
|
|
|
|
hot \
|
|
|
|
|
hoteles \
|
|
|
|
|
hotels \
|
|
|
|
|
hotmail \
|
|
|
|
|
house \
|
|
|
|
|
how \
|
|
|
|
|
hr \
|
|
|
|
|
hsbc \
|
|
|
|
|
ht \
|
|
|
|
|
hu \
|
|
|
|
|
hughes \
|
|
|
|
|
hyatt \
|
|
|
|
|
hyundai \
|
|
|
|
|
ibm \
|
|
|
|
|
icbc \
|
|
|
|
|
ice \
|
|
|
|
|
icu \
|
|
|
|
|
id \
|
|
|
|
|
ie \
|
|
|
|
|
ieee \
|
|
|
|
|
ifm \
|
|
|
|
|
ikano \
|
|
|
|
|
il \
|
|
|
|
|
im \
|
|
|
|
|
imamat \
|
|
|
|
|
imdb \
|
|
|
|
|
immo \
|
|
|
|
|
immobilien \
|
|
|
|
|
in \
|
|
|
|
|
inc \
|
|
|
|
|
industries \
|
|
|
|
|
infiniti \
|
|
|
|
|
info \
|
|
|
|
|
ing \
|
|
|
|
|
ink \
|
|
|
|
|
institute \
|
|
|
|
|
insurance \
|
|
|
|
|
insure \
|
|
|
|
|
int \
|
|
|
|
|
international \
|
|
|
|
|
intuit \
|
|
|
|
|
investments \
|
|
|
|
|
io \
|
|
|
|
|
ipiranga \
|
|
|
|
|
iq \
|
|
|
|
|
ir \
|
|
|
|
|
irish \
|
|
|
|
|
is \
|
|
|
|
|
ismaili \
|
|
|
|
|
ist \
|
|
|
|
|
istanbul \
|
|
|
|
|
it \
|
|
|
|
|
itau \
|
|
|
|
|
itv \
|
|
|
|
|
iveco \
|
|
|
|
|
jaguar \
|
|
|
|
|
java \
|
|
|
|
|
jcb \
|
|
|
|
|
je \
|
|
|
|
|
jeep \
|
|
|
|
|
jetzt \
|
|
|
|
|
jewelry \
|
|
|
|
|
jio \
|
|
|
|
|
jll \
|
|
|
|
|
jm \
|
|
|
|
|
jmp \
|
|
|
|
|
jnj \
|
|
|
|
|
jo \
|
|
|
|
|
jobs \
|
|
|
|
|
joburg \
|
|
|
|
|
jot \
|
|
|
|
|
joy \
|
|
|
|
|
jp \
|
|
|
|
|
jpmorgan \
|
|
|
|
|
jprs \
|
|
|
|
|
juegos \
|
|
|
|
|
juniper \
|
|
|
|
|
kaufen \
|
|
|
|
|
kddi \
|
|
|
|
|
ke \
|
|
|
|
|
kerryhotels \
|
|
|
|
|
kerrylogistics \
|
|
|
|
|
kerryproperties \
|
|
|
|
|
kfh \
|
|
|
|
|
kg \
|
|
|
|
|
kh \
|
|
|
|
|
ki \
|
|
|
|
|
kia \
|
|
|
|
|
kim \
|
|
|
|
|
kinder \
|
|
|
|
|
kindle \
|
|
|
|
|
kitchen \
|
|
|
|
|
kiwi \
|
|
|
|
|
km \
|
|
|
|
|
kn \
|
|
|
|
|
koeln \
|
|
|
|
|
komatsu \
|
|
|
|
|
kosher \
|
|
|
|
|
kp \
|
|
|
|
|
kpmg \
|
|
|
|
|
kpn \
|
|
|
|
|
kr \
|
|
|
|
|
krd \
|
|
|
|
|
kred \
|
|
|
|
|
kuokgroup \
|
|
|
|
|
kw \
|
|
|
|
|
ky \
|
|
|
|
|
kyoto \
|
|
|
|
|
kz \
|
|
|
|
|
la \
|
|
|
|
|
lacaixa \
|
|
|
|
|
lamborghini \
|
|
|
|
|
lamer \
|
|
|
|
|
lancaster \
|
|
|
|
|
lancia \
|
|
|
|
|
land \
|
|
|
|
|
landrover \
|
|
|
|
|
lanxess \
|
|
|
|
|
lasalle \
|
|
|
|
|
lat \
|
|
|
|
|
latino \
|
|
|
|
|
latrobe \
|
|
|
|
|
law \
|
|
|
|
|
lawyer \
|
|
|
|
|
lb \
|
|
|
|
|
lc \
|
|
|
|
|
lds \
|
|
|
|
|
lease \
|
|
|
|
|
leclerc \
|
|
|
|
|
lefrak \
|
|
|
|
|
legal \
|
|
|
|
|
lego \
|
|
|
|
|
lexus \
|
|
|
|
|
lgbt \
|
|
|
|
|
li \
|
|
|
|
|
lidl \
|
|
|
|
|
life \
|
|
|
|
|
lifeinsurance \
|
|
|
|
|
lifestyle \
|
|
|
|
|
lighting \
|
|
|
|
|
like \
|
|
|
|
|
lilly \
|
|
|
|
|
limited \
|
|
|
|
|
limo \
|
|
|
|
|
lincoln \
|
|
|
|
|
linde \
|
|
|
|
|
link \
|
|
|
|
|
lipsy \
|
|
|
|
|
live \
|
|
|
|
|
living \
|
|
|
|
|
lixil \
|
|
|
|
|
lk \
|
|
|
|
|
llc \
|
|
|
|
|
llp \
|
|
|
|
|
loan \
|
|
|
|
|
loans \
|
|
|
|
|
locker \
|
|
|
|
|
locus \
|
|
|
|
|
loft \
|
|
|
|
|
lol \
|
|
|
|
|
london \
|
|
|
|
|
lotte \
|
|
|
|
|
lotto \
|
|
|
|
|
love \
|
|
|
|
|
lpl \
|
|
|
|
|
lplfinancial \
|
|
|
|
|
lr \
|
|
|
|
|
ls \
|
|
|
|
|
lt \
|
|
|
|
|
ltd \
|
|
|
|
|
ltda \
|
|
|
|
|
lu \
|
|
|
|
|
lundbeck \
|
|
|
|
|
luxe \
|
|
|
|
|
luxury \
|
|
|
|
|
lv \
|
|
|
|
|
ly \
|
|
|
|
|
ma \
|
|
|
|
|
macys \
|
|
|
|
|
madrid \
|
|
|
|
|
maif \
|
|
|
|
|
maison \
|
|
|
|
|
makeup \
|
|
|
|
|
man \
|
|
|
|
|
management \
|
|
|
|
|
mango \
|
|
|
|
|
map \
|
|
|
|
|
market \
|
|
|
|
|
marketing \
|
|
|
|
|
markets \
|
|
|
|
|
marriott \
|
|
|
|
|
marshalls \
|
|
|
|
|
maserati \
|
|
|
|
|
mattel \
|
|
|
|
|
mba \
|
|
|
|
|
mc \
|
|
|
|
|
mckinsey \
|
|
|
|
|
md \
|
|
|
|
|
me \
|
|
|
|
|
med \
|
|
|
|
|
media \
|
|
|
|
|
meet \
|
|
|
|
|
melbourne \
|
|
|
|
|
meme \
|
|
|
|
|
memorial \
|
|
|
|
|
men \
|
|
|
|
|
menu \
|
|
|
|
|
merckmsd \
|
|
|
|
|
mg \
|
|
|
|
|
mh \
|
|
|
|
|
miami \
|
|
|
|
|
microsoft \
|
|
|
|
|
mil \
|
|
|
|
|
mini \
|
|
|
|
|
mint \
|
|
|
|
|
mit \
|
|
|
|
|
mitsubishi \
|
|
|
|
|
mk \
|
|
|
|
|
ml \
|
|
|
|
|
mlb \
|
|
|
|
|
mls \
|
|
|
|
|
mm \
|
|
|
|
|
mma \
|
|
|
|
|
mn \
|
|
|
|
|
mo \
|
|
|
|
|
mobi \
|
|
|
|
|
mobile \
|
|
|
|
|
moda \
|
|
|
|
|
moe \
|
|
|
|
|
moi \
|
|
|
|
|
mom \
|
|
|
|
|
monash \
|
|
|
|
|
money \
|
|
|
|
|
monster \
|
|
|
|
|
mormon \
|
|
|
|
|
mortgage \
|
|
|
|
|
moscow \
|
|
|
|
|
moto \
|
|
|
|
|
motorcycles \
|
|
|
|
|
mov \
|
|
|
|
|
movie \
|
|
|
|
|
mp \
|
|
|
|
|
mq \
|
|
|
|
|
mr \
|
|
|
|
|
ms \
|
|
|
|
|
msd \
|
|
|
|
|
mt \
|
|
|
|
|
mtn \
|
|
|
|
|
mtr \
|
|
|
|
|
mu \
|
|
|
|
|
museum \
|
|
|
|
|
mutual \
|
|
|
|
|
mv \
|
|
|
|
|
mw \
|
|
|
|
|
mx \
|
|
|
|
|
my \
|
|
|
|
|
mz \
|
|
|
|
|
na \
|
|
|
|
|
nab \
|
|
|
|
|
nagoya \
|
|
|
|
|
name \
|
|
|
|
|
nationwide \
|
|
|
|
|
natura \
|
|
|
|
|
navy \
|
|
|
|
|
nba \
|
|
|
|
|
nc \
|
|
|
|
|
ne \
|
|
|
|
|
nec \
|
|
|
|
|
net \
|
|
|
|
|
netbank \
|
|
|
|
|
netflix \
|
|
|
|
|
network \
|
|
|
|
|
neustar \
|
|
|
|
|
new \
|
|
|
|
|
news \
|
|
|
|
|
next \
|
|
|
|
|
nextdirect \
|
|
|
|
|
nexus \
|
|
|
|
|
nf \
|
|
|
|
|
nfl \
|
|
|
|
|
ng \
|
|
|
|
|
ngo \
|
|
|
|
|
nhk \
|
|
|
|
|
ni \
|
|
|
|
|
nico \
|
|
|
|
|
nike \
|
|
|
|
|
nikon \
|
|
|
|
|
ninja \
|
|
|
|
|
nissan \
|
|
|
|
|
nissay \
|
|
|
|
|
nl \
|
|
|
|
|
no \
|
|
|
|
|
nokia \
|
|
|
|
|
northwesternmutual \
|
|
|
|
|
norton \
|
|
|
|
|
now \
|
|
|
|
|
nowruz \
|
|
|
|
|
nowtv \
|
|
|
|
|
np \
|
|
|
|
|
nr \
|
|
|
|
|
nra \
|
|
|
|
|
nrw \
|
|
|
|
|
ntt \
|
|
|
|
|
nu \
|
|
|
|
|
nyc \
|
|
|
|
|
nz \
|
|
|
|
|
obi \
|
|
|
|
|
observer \
|
|
|
|
|
off \
|
|
|
|
|
office \
|
|
|
|
|
okinawa \
|
|
|
|
|
olayan \
|
|
|
|
|
olayangroup \
|
|
|
|
|
oldnavy \
|
|
|
|
|
ollo \
|
|
|
|
|
om \
|
|
|
|
|
omega \
|
|
|
|
|
one \
|
|
|
|
|
ong \
|
|
|
|
|
onl \
|
|
|
|
|
online \
|
|
|
|
|
onyourside \
|
|
|
|
|
ooo \
|
|
|
|
|
open \
|
|
|
|
|
oracle \
|
|
|
|
|
orange \
|
|
|
|
|
org \
|
|
|
|
|
organic \
|
|
|
|
|
origins \
|
|
|
|
|
osaka \
|
|
|
|
|
otsuka \
|
|
|
|
|
ott \
|
|
|
|
|
ovh \
|
|
|
|
|
pa \
|
|
|
|
|
page \
|
|
|
|
|
panasonic \
|
|
|
|
|
paris \
|
|
|
|
|
pars \
|
|
|
|
|
partners \
|
|
|
|
|
parts \
|
|
|
|
|
party \
|
|
|
|
|
passagens \
|
|
|
|
|
pay \
|
|
|
|
|
pccw \
|
|
|
|
|
pe \
|
|
|
|
|
pet \
|
|
|
|
|
pf \
|
|
|
|
|
pfizer \
|
|
|
|
|
pg \
|
|
|
|
|
ph \
|
|
|
|
|
pharmacy \
|
|
|
|
|
phd \
|
|
|
|
|
philips \
|
|
|
|
|
phone \
|
|
|
|
|
photo \
|
|
|
|
|
photography \
|
|
|
|
|
photos \
|
|
|
|
|
physio \
|
|
|
|
|
pics \
|
|
|
|
|
pictet \
|
|
|
|
|
pictures \
|
|
|
|
|
pid \
|
|
|
|
|
pin \
|
|
|
|
|
ping \
|
|
|
|
|
pink \
|
|
|
|
|
pioneer \
|
|
|
|
|
pizza \
|
|
|
|
|
pk \
|
|
|
|
|
pl \
|
|
|
|
|
place \
|
|
|
|
|
play \
|
|
|
|
|
playstation \
|
|
|
|
|
plumbing \
|
|
|
|
|
plus \
|
|
|
|
|
pm \
|
|
|
|
|
pn \
|
|
|
|
|
pnc \
|
|
|
|
|
pohl \
|
|
|
|
|
poker \
|
|
|
|
|
politie \
|
|
|
|
|
porn \
|
|
|
|
|
post \
|
|
|
|
|
pr \
|
|
|
|
|
pramerica \
|
|
|
|
|
praxi \
|
|
|
|
|
press \
|
|
|
|
|
prime \
|
|
|
|
|
pro \
|
|
|
|
|
prod \
|
|
|
|
|
productions \
|
|
|
|
|
prof \
|
|
|
|
|
progressive \
|
|
|
|
|
promo \
|
|
|
|
|
properties \
|
|
|
|
|
property \
|
|
|
|
|
protection \
|
|
|
|
|
pru \
|
|
|
|
|
prudential \
|
|
|
|
|
ps \
|
|
|
|
|
pt \
|
|
|
|
|
pub \
|
|
|
|
|
pw \
|
|
|
|
|
pwc \
|
|
|
|
|
py \
|
|
|
|
|
qa \
|
|
|
|
|
qpon \
|
|
|
|
|
quebec \
|
|
|
|
|
quest \
|
|
|
|
|
qvc \
|
|
|
|
|
racing \
|
|
|
|
|
radio \
|
|
|
|
|
raid \
|
|
|
|
|
re \
|
|
|
|
|
read \
|
|
|
|
|
realestate \
|
|
|
|
|
realtor \
|
|
|
|
|
realty \
|
|
|
|
|
recipes \
|
|
|
|
|
red \
|
|
|
|
|
redstone \
|
|
|
|
|
redumbrella \
|
|
|
|
|
rehab \
|
|
|
|
|
reise \
|
|
|
|
|
reisen \
|
|
|
|
|
reit \
|
|
|
|
|
reliance \
|
|
|
|
|
ren \
|
|
|
|
|
rent \
|
|
|
|
|
rentals \
|
|
|
|
|
repair \
|
|
|
|
|
report \
|
|
|
|
|
republican \
|
|
|
|
|
rest \
|
|
|
|
|
restaurant \
|
|
|
|
|
review \
|
|
|
|
|
reviews \
|
|
|
|
|
rexroth \
|
|
|
|
|
rich \
|
|
|
|
|
richardli \
|
|
|
|
|
ricoh \
|
|
|
|
|
ril \
|
|
|
|
|
rio \
|
|
|
|
|
rip \
|
|
|
|
|
rmit \
|
|
|
|
|
ro \
|
|
|
|
|
rocher \
|
|
|
|
|
rocks \
|
|
|
|
|
rodeo \
|
|
|
|
|
rogers \
|
|
|
|
|
room \
|
|
|
|
|
rs \
|
|
|
|
|
rsvp \
|
|
|
|
|
ru \
|
|
|
|
|
rugby \
|
|
|
|
|
ruhr \
|
|
|
|
|
run \
|
|
|
|
|
rw \
|
|
|
|
|
rwe \
|
|
|
|
|
ryukyu \
|
|
|
|
|
sa \
|
|
|
|
|
saarland \
|
|
|
|
|
safe \
|
|
|
|
|
safety \
|
|
|
|
|
sakura \
|
|
|
|
|
sale \
|
|
|
|
|
salon \
|
|
|
|
|
samsclub \
|
|
|
|
|
samsung \
|
|
|
|
|
sandvik \
|
|
|
|
|
sandvikcoromant \
|
|
|
|
|
sanofi \
|
|
|
|
|
sap \
|
|
|
|
|
sarl \
|
|
|
|
|
sas \
|
|
|
|
|
save \
|
|
|
|
|
saxo \
|
|
|
|
|
sb \
|
|
|
|
|
sbi \
|
|
|
|
|
sbs \
|
|
|
|
|
sc \
|
|
|
|
|
sca \
|
|
|
|
|
scb \
|
|
|
|
|
schaeffler \
|
|
|
|
|
schmidt \
|
|
|
|
|
scholarships \
|
|
|
|
|
school \
|
|
|
|
|
schule \
|
|
|
|
|
schwarz \
|
|
|
|
|
science \
|
|
|
|
|
scjohnson \
|
|
|
|
|
scot \
|
|
|
|
|
sd \
|
|
|
|
|
se \
|
|
|
|
|
search \
|
|
|
|
|
seat \
|
|
|
|
|
secure \
|
|
|
|
|
security \
|
|
|
|
|
seek \
|
|
|
|
|
select \
|
|
|
|
|
sener \
|
|
|
|
|
services \
|
|
|
|
|
ses \
|
|
|
|
|
seven \
|
|
|
|
|
sew \
|
|
|
|
|
sex \
|
|
|
|
|
sexy \
|
|
|
|
|
sfr \
|
|
|
|
|
sg \
|
|
|
|
|
sh \
|
|
|
|
|
shangrila \
|
|
|
|
|
sharp \
|
|
|
|
|
shaw \
|
|
|
|
|
shell \
|
|
|
|
|
shia \
|
|
|
|
|
shiksha \
|
|
|
|
|
shoes \
|
|
|
|
|
shop \
|
|
|
|
|
shopping \
|
|
|
|
|
shouji \
|
|
|
|
|
show \
|
|
|
|
|
showtime \
|
|
|
|
|
si \
|
|
|
|
|
silk \
|
|
|
|
|
sina \
|
|
|
|
|
singles \
|
|
|
|
|
site \
|
|
|
|
|
sj \
|
|
|
|
|
sk \
|
|
|
|
|
ski \
|
|
|
|
|
skin \
|
|
|
|
|
sky \
|
|
|
|
|
skype \
|
|
|
|
|
sl \
|
|
|
|
|
sling \
|
|
|
|
|
sm \
|
|
|
|
|
smart \
|
|
|
|
|
smile \
|
|
|
|
|
sn \
|
|
|
|
|
sncf \
|
|
|
|
|
so \
|
|
|
|
|
soccer \
|
|
|
|
|
social \
|
|
|
|
|
softbank \
|
|
|
|
|
software \
|
|
|
|
|
sohu \
|
|
|
|
|
solar \
|
|
|
|
|
solutions \
|
|
|
|
|
song \
|
|
|
|
|
sony \
|
|
|
|
|
soy \
|
|
|
|
|
spa \
|
|
|
|
|
space \
|
|
|
|
|
sport \
|
|
|
|
|
spot \
|
|
|
|
|
spreadbetting \
|
|
|
|
|
sr \
|
|
|
|
|
srl \
|
|
|
|
|
ss \
|
|
|
|
|
st \
|
|
|
|
|
stada \
|
|
|
|
|
staples \
|
|
|
|
|
star \
|
|
|
|
|
statebank \
|
|
|
|
|
statefarm \
|
|
|
|
|
stc \
|
|
|
|
|
stcgroup \
|
|
|
|
|
stockholm \
|
|
|
|
|
storage \
|
|
|
|
|
store \
|
|
|
|
|
stream \
|
|
|
|
|
studio \
|
|
|
|
|
study \
|
|
|
|
|
style \
|
|
|
|
|
su \
|
|
|
|
|
sucks \
|
|
|
|
|
supplies \
|
|
|
|
|
supply \
|
|
|
|
|
support \
|
|
|
|
|
surf \
|
|
|
|
|
surgery \
|
|
|
|
|
suzuki \
|
|
|
|
|
sv \
|
|
|
|
|
swatch \
|
|
|
|
|
swiftcover \
|
|
|
|
|
swiss \
|
|
|
|
|
sx \
|
|
|
|
|
sy \
|
|
|
|
|
sydney \
|
|
|
|
|
systems \
|
|
|
|
|
sz \
|
|
|
|
|
tab \
|
|
|
|
|
taipei \
|
|
|
|
|
talk \
|
|
|
|
|
taobao \
|
|
|
|
|
target \
|
|
|
|
|
tatamotors \
|
|
|
|
|
tatar \
|
|
|
|
|
tattoo \
|
|
|
|
|
tax \
|
|
|
|
|
taxi \
|
|
|
|
|
tc \
|
|
|
|
|
tci \
|
|
|
|
|
td \
|
|
|
|
|
tdk \
|
|
|
|
|
team \
|
|
|
|
|
tech \
|
|
|
|
|
technology \
|
|
|
|
|
tel \
|
|
|
|
|
temasek \
|
|
|
|
|
tennis \
|
|
|
|
|
teva \
|
|
|
|
|
tf \
|
|
|
|
|
tg \
|
|
|
|
|
th \
|
|
|
|
|
thd \
|
|
|
|
|
theater \
|
|
|
|
|
theatre \
|
|
|
|
|
tiaa \
|
|
|
|
|
tickets \
|
|
|
|
|
tienda \
|
|
|
|
|
tiffany \
|
|
|
|
|
tips \
|
|
|
|
|
tires \
|
|
|
|
|
tirol \
|
|
|
|
|
tj \
|
|
|
|
|
tjmaxx \
|
|
|
|
|
tjx \
|
|
|
|
|
tk \
|
|
|
|
|
tkmaxx \
|
|
|
|
|
tl \
|
|
|
|
|
tm \
|
|
|
|
|
tmall \
|
|
|
|
|
tn \
|
|
|
|
|
to \
|
|
|
|
|
today \
|
|
|
|
|
tokyo \
|
|
|
|
|
tools \
|
|
|
|
|
top \
|
|
|
|
|
toray \
|
|
|
|
|
toshiba \
|
|
|
|
|
total \
|
|
|
|
|
tours \
|
|
|
|
|
town \
|
|
|
|
|
toyota \
|
|
|
|
|
toys \
|
|
|
|
|
tr \
|
|
|
|
|
trade \
|
|
|
|
|
trading \
|
|
|
|
|
training \
|
|
|
|
|
travel \
|
|
|
|
|
travelchannel \
|
|
|
|
|
travelers \
|
|
|
|
|
travelersinsurance \
|
|
|
|
|
trust \
|
|
|
|
|
trv \
|
|
|
|
|
tt \
|
|
|
|
|
tube \
|
|
|
|
|
tui \
|
|
|
|
|
tunes \
|
|
|
|
|
tushu \
|
|
|
|
|
tv \
|
|
|
|
|
tvs \
|
|
|
|
|
tw \
|
|
|
|
|
tz \
|
|
|
|
|
ua \
|
|
|
|
|
ubank \
|
|
|
|
|
ubs \
|
|
|
|
|
ug \
|
|
|
|
|
uk \
|
|
|
|
|
unicom \
|
|
|
|
|
university \
|
|
|
|
|
uno \
|
|
|
|
|
uol \
|
|
|
|
|
ups \
|
|
|
|
|
us \
|
|
|
|
|
uy \
|
|
|
|
|
uz \
|
|
|
|
|
va \
|
|
|
|
|
vacations \
|
|
|
|
|
vana \
|
|
|
|
|
vanguard \
|
|
|
|
|
vc \
|
|
|
|
|
ve \
|
|
|
|
|
vegas \
|
|
|
|
|
ventures \
|
|
|
|
|
verisign \
|
|
|
|
|
versicherung \
|
|
|
|
|
vet \
|
|
|
|
|
vg \
|
|
|
|
|
vi \
|
|
|
|
|
viajes \
|
|
|
|
|
video \
|
|
|
|
|
vig \
|
|
|
|
|
viking \
|
|
|
|
|
villas \
|
|
|
|
|
vin \
|
|
|
|
|
vip \
|
|
|
|
|
virgin \
|
|
|
|
|
visa \
|
|
|
|
|
vision \
|
|
|
|
|
viva \
|
|
|
|
|
vivo \
|
|
|
|
|
vlaanderen \
|
|
|
|
|
vn \
|
|
|
|
|
vodka \
|
|
|
|
|
volkswagen \
|
|
|
|
|
volvo \
|
|
|
|
|
vote \
|
|
|
|
|
voting \
|
|
|
|
|
voto \
|
|
|
|
|
voyage \
|
|
|
|
|
vu \
|
|
|
|
|
vuelos \
|
|
|
|
|
wales \
|
|
|
|
|
walmart \
|
|
|
|
|
walter \
|
|
|
|
|
wang \
|
|
|
|
|
wanggou \
|
|
|
|
|
watch \
|
|
|
|
|
watches \
|
|
|
|
|
weather \
|
|
|
|
|
weatherchannel \
|
|
|
|
|
webcam \
|
|
|
|
|
weber \
|
|
|
|
|
website \
|
|
|
|
|
wed \
|
|
|
|
|
wedding \
|
|
|
|
|
weibo \
|
|
|
|
|
weir \
|
|
|
|
|
wf \
|
|
|
|
|
whoswho \
|
|
|
|
|
wien \
|
|
|
|
|
wiki \
|
|
|
|
|
williamhill \
|
|
|
|
|
win \
|
|
|
|
|
windows \
|
|
|
|
|
wine \
|
|
|
|
|
winners \
|
|
|
|
|
wme \
|
|
|
|
|
wolterskluwer \
|
|
|
|
|
woodside \
|
|
|
|
|
work \
|
|
|
|
|
works \
|
|
|
|
|
world \
|
|
|
|
|
wow \
|
|
|
|
|
ws \
|
|
|
|
|
wtc \
|
|
|
|
|
wtf \
|
|
|
|
|
xbox \
|
|
|
|
|
xerox \
|
|
|
|
|
xfinity \
|
|
|
|
|
xihuan \
|
|
|
|
|
xin \
|
|
|
|
|
xxx \
|
|
|
|
|
xyz \
|
|
|
|
|
yachts \
|
|
|
|
|
yahoo \
|
|
|
|
|
yamaxun \
|
|
|
|
|
yandex \
|
|
|
|
|
ye \
|
|
|
|
|
yodobashi \
|
|
|
|
|
yoga \
|
|
|
|
|
yokohama \
|
|
|
|
|
you \
|
|
|
|
|
youtube \
|
|
|
|
|
yt \
|
|
|
|
|
yun \
|
|
|
|
|
za \
|
|
|
|
|
zappos \
|
|
|
|
|
zara \
|
|
|
|
|
zero \
|
|
|
|
|
zip \
|
|
|
|
|
zm \
|
|
|
|
|
zone \
|
|
|
|
|
zuerich \
|
|
|
|
|
zw \
|
|
|
|
|
vermögensberater-ctb \
|
|
|
|
|
vermögensberatung-pwb \
|
|
|
|
|
ελ \
|
|
|
|
|
ευ \
|
|
|
|
|
бг \
|
|
|
|
|
бел \
|
|
|
|
|
дети \
|
|
|
|
|
ею \
|
|
|
|
|
католик \
|
|
|
|
|
ком \
|
|
|
|
|
қаз \
|
|
|
|
|
мкд \
|
|
|
|
|
мон \
|
|
|
|
|
москва \
|
|
|
|
|
онлайн \
|
|
|
|
|
орг \
|
|
|
|
|
рус \
|
|
|
|
|
рф \
|
|
|
|
|
сайт \
|
|
|
|
|
срб \
|
|
|
|
|
укр \
|
|
|
|
|
გე \
|
|
|
|
|
հայ \
|
|
|
|
|
ישראל \
|
|
|
|
|
קום \
|
|
|
|
|
ابوظبي \
|
|
|
|
|
اتصالات \
|
|
|
|
|
ارامكو \
|
|
|
|
|
الاردن \
|
|
|
|
|
البحرين \
|
|
|
|
|
الجزائر \
|
|
|
|
|
السعودية \
|
|
|
|
|
العليان \
|
|
|
|
|
المغرب \
|
|
|
|
|
امارات \
|
|
|
|
|
ایران \
|
|
|
|
|
بارت \
|
|
|
|
|
بازار \
|
|
|
|
|
بھارت \
|
|
|
|
|
بيتك \
|
|
|
|
|
پاکستان \
|
|
|
|
|
ڀارت \
|
|
|
|
|
تونس \
|
|
|
|
|
سودان \
|
|
|
|
|
سورية \
|
|
|
|
|
شبكة \
|
|
|
|
|
عراق \
|
|
|
|
|
عرب \
|
|
|
|
|
عمان \
|
|
|
|
|
فلسطين \
|
|
|
|
|
قطر \
|
|
|
|
|
كاثوليك \
|
|
|
|
|
كوم \
|
|
|
|
|
مصر \
|
|
|
|
|
مليسيا \
|
|
|
|
|
موريتانيا \
|
|
|
|
|
موقع \
|
|
|
|
|
همراه \
|
|
|
|
|
कॉम \
|
|
|
|
|
नेट \
|
|
|
|
|
भारत \
|
|
|
|
|
भारतम् \
|
|
|
|
|
भारोत \
|
|
|
|
|
संगठन \
|
|
|
|
|
বাংলা \
|
|
|
|
|
ভারত \
|
|
|
|
|
ভাৰত \
|
|
|
|
|
ਭਾਰਤ \
|
|
|
|
|
ભારત \
|
|
|
|
|
ଭାରତ \
|
|
|
|
|
இந்தியா \
|
|
|
|
|
இலங்கை \
|
|
|
|
|
சிங்கப்பூர் \
|
|
|
|
|
భారత్ \
|
|
|
|
|
ಭಾರತ \
|
|
|
|
|
ഭാരതം \
|
|
|
|
|
ලංකා \
|
|
|
|
|
คอม \
|
|
|
|
|
ไทย \
|
|
|
|
|
ລາວ \
|
|
|
|
|
닷넷 \
|
|
|
|
|
닷컴 \
|
|
|
|
|
삼성 \
|
|
|
|
|
한국 \
|
|
|
|
|
アマゾン \
|
|
|
|
|
グーグル \
|
|
|
|
|
クラウド \
|
|
|
|
|
コム \
|
|
|
|
|
ストア \
|
|
|
|
|
セール \
|
|
|
|
|
ファッション \
|
|
|
|
|
ポイント \
|
|
|
|
|
みんな \
|
|
|
|
|
世界 \
|
|
|
|
|
中信 \
|
|
|
|
|
中国 \
|
|
|
|
|
中國 \
|
|
|
|
|
中文网 \
|
|
|
|
|
亚马逊 \
|
|
|
|
|
企业 \
|
|
|
|
|
佛山 \
|
|
|
|
|
信息 \
|
|
|
|
|
健康 \
|
|
|
|
|
八卦 \
|
|
|
|
|
公司 \
|
|
|
|
|
公益 \
|
|
|
|
|
台湾 \
|
|
|
|
|
台灣 \
|
|
|
|
|
商城 \
|
|
|
|
|
商店 \
|
|
|
|
|
商标 \
|
|
|
|
|
嘉里 \
|
|
|
|
|
嘉里大酒店 \
|
|
|
|
|
在线 \
|
|
|
|
|
大众汽车 \
|
|
|
|
|
大拿 \
|
|
|
|
|
天主教 \
|
|
|
|
|
娱乐 \
|
|
|
|
|
家電 \
|
|
|
|
|
广东 \
|
|
|
|
|
微博 \
|
|
|
|
|
慈善 \
|
|
|
|
|
我爱你 \
|
|
|
|
|
手机 \
|
|
|
|
|
招聘 \
|
|
|
|
|
政务 \
|
|
|
|
|
政府 \
|
|
|
|
|
新加坡 \
|
|
|
|
|
新闻 \
|
|
|
|
|
时尚 \
|
|
|
|
|
書籍 \
|
|
|
|
|
机构 \
|
|
|
|
|
淡马锡 \
|
|
|
|
|
游戏 \
|
|
|
|
|
澳門 \
|
|
|
|
|
点看 \
|
|
|
|
|
移动 \
|
|
|
|
|
组织机构 \
|
|
|
|
|
网址 \
|
|
|
|
|
网店 \
|
|
|
|
|
网站 \
|
|
|
|
|
网络 \
|
|
|
|
|
联通 \
|
|
|
|
|
诺基亚 \
|
|
|
|
|
谷歌 \
|
|
|
|
|
购物 \
|
|
|
|
|
通販 \
|
|
|
|
|
集团 \
|
|
|
|
|
電訊盈科 \
|
|
|
|
|
飞利浦 \
|
|
|
|
|
食品 \
|
|
|
|
|
餐厅 \
|
|
|
|
|
香格里拉 \
|
|
|
|
|
香港'.split(' ');
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
The scanner provides an interface that takes a string of text as input, and
|
|
|
|
|
outputs an array of tokens instances that can be used for easy URL parsing.
|
|
|
|
|
|
|
|
|
|
@module linkify
|
|
|
|
|
@submodule scanner
|
|
|
|
|
@main scanner
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var LETTER = /(?:[A-Za-z\xAA\xB5\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0370-\u0374\u0376\u0377\u037A-\u037D\u037F\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03F5\u03F7-\u0481\u048A-\u052F\u0531-\u0556\u0559\u0560-\u0588\u05D0-\u05EA\u05EF-\u05F2\u0620-\u064A\u066E\u066F\u0671-\u06D3\u06D5\u06E5\u06E6\u06EE\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u07F4\u07F5\u07FA\u0800-\u0815\u081A\u0824\u0828\u0840-\u0858\u0860-\u086A\u0870-\u0887\u0889-\u088E\u08A0-\u08C9\u0904-\u0939\u093D\u0950\u0958-\u0961\u0971-\u0980\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC\u09DD\u09DF-\u09E1\u09F0\u09F1\u09FC\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0\u0AE1\u0AF9\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B3D\u0B5C\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D\u0C58-\u0C5A\u0C5D\u0C60\u0C61\u0C80\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDD\u0CDE\u0CE0\u0CE1\u0CF1\u0CF2\u0D04-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D54-\u0D56\u0D5F-\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32\u0E33\u0E40-\u0E46\u0E81\u0E82\u0E84\u0E86-\u0E8A\u0E8C-\u0EA3\u0EA5\u0EA7-\u0EB0\u0EB2\u0EB3\u0EBD\u0EC0-\u0EC4\u0EC6\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065\u1066\u106E-\u1070\u1075-\u1081\u108E\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u13A0-\u13F5\u13F8-\u13FD\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16F1-\u16F8\u1700-\u1711\u171F-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17D7\u17DC\u1820-\u1878\u1880-\u1884\u1887-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191E\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A16\u1A20-\u1A54\u1AA7\u1B05-\u1B33\u1B45-\u1B4C\u1B83-\u1BA0\u1BAE\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C7D\u1C80-\u1C88\u1C90-\u1CBA\u1CBD-\u1CBF\u1CE9-\u1CEC\u1CEE-\u1CF3\u1CF5\u1CF6\u1CFA\u1D00-\u1DBF\u1E00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2071\u207F\u2090-\u209C\u2102\u2107\u210A-\u2113\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u212F-\u2139\u213C-\u213F\u2145-\u2149\u214E\u2183\u2184\u2C00-\u2CE4\u2CEB-\u2CEE\u2CF2\u2CF3\u2D00-\u2D25\u2D27\u2D2D\u2D30-\u2D67\u2D6F\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u2E2F\u3005\u3006\u3031-\u3035\u303B\u303C\u3041-\u3096\u309D-\u309F\u30A1-\u30FA\u30FC-\u30FF\u3105-\u312F\u3131-\u318E\u31A0-\u31BF\u31F0-\u31FF\u3400-\u4DBF\u4E00-\uA48C\uA4D0-\uA4FD\uA500-\uA60C\uA610-\uA61F\uA62A\uA62B\uA640-\uA66E\uA67F-\uA69D\uA6A0-\uA6E5\uA717-\uA71F\uA722-\uA788\uA78B-\uA7CA\uA7D0\uA7D1\uA7D3\uA7D5-\uA7D9\uA7F2-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA8FD\uA8FE\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9CF\uA9E0-\uA9E4\uA9E6-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA76\uAA7A\uAA7E-\uAAAF\uAAB1\uAAB5\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADD\uAAE0-\uAAEA\uAAF2-\uAAF4\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uAB30-\uAB5A\uAB5C-\uAB69\uAB70-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3
|
|
|
|
|
|
|
|
|
|
var EMOJI = /(?:[#\*0-9\xA9\xAE\u203C\u2049\u2122\u2139\u2194-\u2199\u21A9\u21AA\u231A\u231B\u2328\u23CF\u23E9-\u23F3\u23F8-\u23FA\u24C2\u25AA\u25AB\u25B6\u25C0\u25FB-\u25FE\u2600-\u2604\u260E\u2611\u2614\u2615\u2618\u261D\u2620\u2622\u2623\u2626\u262A\u262E\u262F\u2638-\u263A\u2640\u2642\u2648-\u2653\u265F\u2660\u2663\u2665\u2666\u2668\u267B\u267E\u267F\u2692-\u2697\u2699\u269B\u269C\u26A0\u26A1\u26A7\u26AA\u26AB\u26B0\u26B1\u26BD\u26BE\u26C4\u26C5\u26C8\u26CE\u26CF\u26D1\u26D3\u26D4\u26E9\u26EA\u26F0-\u26F5\u26F7-\u26FA\u26FD\u2702\u2705\u2708-\u270D\u270F\u2712\u2714\u2716\u271D\u2721\u2728\u2733\u2734\u2744\u2747\u274C\u274E\u2753-\u2755\u2757\u2763\u2764\u2795-\u2797\u27A1\u27B0\u27BF\u2934\u2935\u2B05-\u2B07\u2B1B\u2B1C\u2B50\u2B55\u3030\u303D\u3297\u3299]|\uD83C[\uDC04\uDCCF\uDD70\uDD71\uDD7E\uDD7F\uDD8E\uDD91-\uDD9A\uDDE6-\uDDFF\uDE01\uDE02\uDE1A\uDE2F\uDE32-\uDE3A\uDE50\uDE51\uDF00-\uDF21\uDF24-\uDF93\uDF96\uDF97\uDF99-\uDF9B\uDF9E-\uDFF0\uDFF3-\uDFF5\uDFF7-\uDFFF]|\uD83D[\uDC00-\uDCFD\uDCFF-\uDD3D\uDD49-\uDD4E\uDD50-\uDD67\uDD6F\uDD70\uDD73-\uDD7A\uDD87\uDD8A-\uDD8D\uDD90\uDD95\uDD96\uDDA4\uDDA5\uDDA8\uDDB1\uDDB2\uDDBC\uDDC2-\uDDC4\uDDD1-\uDDD3\uDDDC-\uDDDE\uDDE1\uDDE3\uDDE8\uDDEF\uDDF3\uDDFA-\uDE4F\uDE80-\uDEC5\uDECB-\uDED2\uDED5-\uDED7\uDEDD-\uDEE5\uDEE9\uDEEB\uDEEC\uDEF0\uDEF3-\uDEFC\uDFE0-\uDFEB\uDFF0]|\uD83E[\uDD0C-\uDD3A\uDD3C-\uDD45\uDD47-\uDDFF\uDE70-\uDE74\uDE78-\uDE7C\uDE80-\uDE86\uDE90-\uDEAC\uDEB0-\uDEBA\uDEC0-\uDEC5\uDED0-\uDED9\uDEE0-\uDEE7\uDEF0-\uDEF6])/; // Any Unicode emoji character
|
|
|
|
|
|
|
|
|
|
var EMOJI_VARIATION = /\uFE0F/; // Variation selector, follows heart and others
|
|
|
|
|
|
|
|
|
|
var DIGIT = /\d/;
|
|
|
|
|
var SPACE = /\s/;
|
|
|
|
|
/**
|
|
|
|
|
* Initialize the scanner character-based state machine for the given start state
|
|
|
|
|
* @return {State} scanner starting state
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function init$2() {
|
|
|
|
|
var customProtocols = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : [];
|
|
|
|
|
// Frequently used states
|
|
|
|
|
var S_START = makeState();
|
|
|
|
|
var S_NUM = makeAcceptingState(NUM);
|
|
|
|
|
var S_DOMAIN = makeAcceptingState(DOMAIN);
|
|
|
|
|
var S_DOMAIN_HYPHEN = makeState(); // domain followed by 1 or more hyphen characters
|
|
|
|
|
|
|
|
|
|
var S_WS = makeAcceptingState(WS);
|
|
|
|
|
var DOMAIN_REGEX_TRANSITIONS = [[DIGIT, S_DOMAIN], [LETTER, S_DOMAIN], [EMOJI, S_DOMAIN], [EMOJI_VARIATION, S_DOMAIN]]; // Create a state which emits a domain token
|
|
|
|
|
|
|
|
|
|
var makeDomainState = function makeDomainState() {
|
|
|
|
|
var state = makeAcceptingState(DOMAIN);
|
|
|
|
|
state.j = {
|
|
|
|
|
'-': S_DOMAIN_HYPHEN
|
|
|
|
|
};
|
|
|
|
|
state.jr = [].concat(DOMAIN_REGEX_TRANSITIONS);
|
|
|
|
|
return state;
|
|
|
|
|
}; // Create a state which does not emit a domain state but the usual alphanumeric
|
|
|
|
|
// transitions are domains
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var makeNearDomainState = function makeNearDomainState(token) {
|
|
|
|
|
var state = makeDomainState();
|
|
|
|
|
state.t = token;
|
|
|
|
|
return state;
|
|
|
|
|
}; // States for special URL symbols that accept immediately after start
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
makeBatchT(S_START, [["'", makeAcceptingState(APOSTROPHE)], ['{', makeAcceptingState(OPENBRACE)], ['[', makeAcceptingState(OPENBRACKET)], ['<', makeAcceptingState(OPENANGLEBRACKET)], ['(', makeAcceptingState(OPENPAREN)], ['}', makeAcceptingState(CLOSEBRACE)], [']', makeAcceptingState(CLOSEBRACKET)], ['>', makeAcceptingState(CLOSEANGLEBRACKET)], [')', makeAcceptingState(CLOSEPAREN)], ['&', makeAcceptingState(AMPERSAND)], ['*', makeAcceptingState(ASTERISK)], ['@', makeAcceptingState(AT)], ['`', makeAcceptingState(BACKTICK)], ['^', makeAcceptingState(CARET)], [':', makeAcceptingState(COLON)], [',', makeAcceptingState(COMMA)], ['$', makeAcceptingState(DOLLAR)], ['.', makeAcceptingState(DOT)], ['=', makeAcceptingState(EQUALS)], ['!', makeAcceptingState(EXCLAMATION)], ['-', makeAcceptingState(HYPHEN)], ['%', makeAcceptingState(PERCENT)], ['|', makeAcceptingState(PIPE)], ['+', makeAcceptingState(PLUS)], ['#', makeAcceptingState(POUND)], ['?', makeAcceptingState(QUERY)], ['"', makeAcceptingState(QUOTE)], ['/', makeAcceptingState(SLASH)], [';', makeAcceptingState(SEMI)], ['~', makeAcceptingState(TILDE)], ['_', makeAcceptingState(UNDERSCORE)], ['\\', makeAcceptingState(BACKSLASH)]]); // Whitespace jumps
|
|
|
|
|
// Tokens of only non-newline whitespace are arbitrarily long
|
|
|
|
|
|
|
|
|
|
makeT(S_START, '\n', makeAcceptingState(NL));
|
|
|
|
|
makeRegexT(S_START, SPACE, S_WS); // If any whitespace except newline, more whitespace!
|
|
|
|
|
|
|
|
|
|
makeT(S_WS, '\n', makeState()); // non-accepting state
|
|
|
|
|
|
|
|
|
|
makeRegexT(S_WS, SPACE, S_WS); // Generates states for top-level domains
|
|
|
|
|
// Note that this is most accurate when tlds are in alphabetical order
|
|
|
|
|
|
|
|
|
|
for (var i = 0; i < tlds.length; i++) {
|
|
|
|
|
makeChainT(S_START, tlds[i], makeNearDomainState(TLD), makeDomainState);
|
|
|
|
|
} // Collect the states generated by different protocls
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var S_PROTOCOL_FILE = makeDomainState();
|
|
|
|
|
var S_PROTOCOL_FTP = makeDomainState();
|
|
|
|
|
var S_PROTOCOL_HTTP = makeDomainState();
|
|
|
|
|
var S_MAILTO = makeDomainState();
|
|
|
|
|
makeChainT(S_START, 'file', S_PROTOCOL_FILE, makeDomainState);
|
|
|
|
|
makeChainT(S_START, 'ftp', S_PROTOCOL_FTP, makeDomainState);
|
|
|
|
|
makeChainT(S_START, 'http', S_PROTOCOL_HTTP, makeDomainState);
|
|
|
|
|
makeChainT(S_START, 'mailto', S_MAILTO, makeDomainState); // Protocol states
|
|
|
|
|
|
|
|
|
|
var S_PROTOCOL_SECURE = makeDomainState();
|
|
|
|
|
var S_FULL_PROTOCOL = makeAcceptingState(PROTOCOL); // Full protocol ends with COLON
|
|
|
|
|
|
|
|
|
|
var S_FULL_MAILTO = makeAcceptingState(MAILTO); // Mailto ends with COLON
|
|
|
|
|
// Secure protocols (end with 's')
|
|
|
|
|
|
|
|
|
|
makeT(S_PROTOCOL_FTP, 's', S_PROTOCOL_SECURE);
|
|
|
|
|
makeT(S_PROTOCOL_FTP, ':', S_FULL_PROTOCOL);
|
|
|
|
|
makeT(S_PROTOCOL_HTTP, 's', S_PROTOCOL_SECURE);
|
|
|
|
|
makeT(S_PROTOCOL_HTTP, ':', S_FULL_PROTOCOL); // Become protocol tokens after a COLON
|
|
|
|
|
|
|
|
|
|
makeT(S_PROTOCOL_FILE, ':', S_FULL_PROTOCOL);
|
|
|
|
|
makeT(S_PROTOCOL_SECURE, ':', S_FULL_PROTOCOL);
|
|
|
|
|
makeT(S_MAILTO, ':', S_FULL_MAILTO); // Register custom protocols
|
|
|
|
|
|
|
|
|
|
var S_CUSTOM_PROTOCOL = makeDomainState();
|
|
|
|
|
|
|
|
|
|
for (var _i = 0; _i < customProtocols.length; _i++) {
|
|
|
|
|
makeChainT(S_START, customProtocols[_i], S_CUSTOM_PROTOCOL, makeDomainState);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
makeT(S_CUSTOM_PROTOCOL, ':', S_FULL_PROTOCOL); // Localhost
|
|
|
|
|
|
|
|
|
|
makeChainT(S_START, 'localhost', makeNearDomainState(LOCALHOST), makeDomainState); // Everything else
|
|
|
|
|
// DOMAINs make more DOMAINs
|
|
|
|
|
// Number and character transitions
|
|
|
|
|
|
|
|
|
|
makeRegexT(S_START, DIGIT, S_NUM);
|
|
|
|
|
makeRegexT(S_START, LETTER, S_DOMAIN);
|
|
|
|
|
makeRegexT(S_START, EMOJI, S_DOMAIN);
|
|
|
|
|
makeRegexT(S_START, EMOJI_VARIATION, S_DOMAIN);
|
|
|
|
|
makeRegexT(S_NUM, DIGIT, S_NUM);
|
|
|
|
|
makeRegexT(S_NUM, LETTER, S_DOMAIN); // number becomes DOMAIN
|
|
|
|
|
|
|
|
|
|
makeRegexT(S_NUM, EMOJI, S_DOMAIN); // number becomes DOMAIN
|
|
|
|
|
|
|
|
|
|
makeRegexT(S_NUM, EMOJI_VARIATION, S_DOMAIN); // number becomes DOMAIN
|
|
|
|
|
|
|
|
|
|
makeT(S_NUM, '-', S_DOMAIN_HYPHEN); // Default domain transitions
|
|
|
|
|
|
|
|
|
|
makeT(S_DOMAIN, '-', S_DOMAIN_HYPHEN);
|
|
|
|
|
makeT(S_DOMAIN_HYPHEN, '-', S_DOMAIN_HYPHEN);
|
|
|
|
|
makeRegexT(S_DOMAIN, DIGIT, S_DOMAIN);
|
|
|
|
|
makeRegexT(S_DOMAIN, LETTER, S_DOMAIN);
|
|
|
|
|
makeRegexT(S_DOMAIN, EMOJI, S_DOMAIN);
|
|
|
|
|
makeRegexT(S_DOMAIN, EMOJI_VARIATION, S_DOMAIN);
|
|
|
|
|
makeRegexT(S_DOMAIN_HYPHEN, DIGIT, S_DOMAIN);
|
|
|
|
|
makeRegexT(S_DOMAIN_HYPHEN, LETTER, S_DOMAIN);
|
|
|
|
|
makeRegexT(S_DOMAIN_HYPHEN, EMOJI, S_DOMAIN);
|
|
|
|
|
makeRegexT(S_DOMAIN_HYPHEN, EMOJI_VARIATION, S_DOMAIN); // Set default transition for start state (some symbol)
|
|
|
|
|
|
|
|
|
|
S_START.jd = makeAcceptingState(SYM);
|
|
|
|
|
return S_START;
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
Given a string, returns an array of TOKEN instances representing the
|
|
|
|
|
composition of that string.
|
|
|
|
|
|
|
|
|
|
@method run
|
|
|
|
|
@param {State} start scanner starting state
|
|
|
|
|
@param {string} str input string to scan
|
|
|
|
|
@return {{t: string, v: string, s: number, l: number}[]} list of tokens, each with a type and value
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function run$1(start, str) {
|
|
|
|
|
// State machine is not case sensitive, so input is tokenized in lowercased
|
|
|
|
|
// form (still returns the regular case though) Uses selective `toLowerCase`
|
|
|
|
|
// is used because lowercasing the entire string causes the length and
|
|
|
|
|
// character position to vary in some non-English strings with V8-based
|
|
|
|
|
// runtimes.
|
|
|
|
|
var iterable = stringToArray(str.replace(/[A-Z]/g, function (c) {
|
|
|
|
|
return c.toLowerCase();
|
|
|
|
|
}));
|
|
|
|
|
var charCount = iterable.length; // <= len if there are emojis, etc
|
|
|
|
|
|
|
|
|
|
var tokens = []; // return value
|
|
|
|
|
// cursor through the string itself, accounting for characters that have
|
|
|
|
|
// width with length 2 such as emojis
|
|
|
|
|
|
|
|
|
|
var cursor = 0; // Cursor through the array-representation of the string
|
|
|
|
|
|
|
|
|
|
var charCursor = 0; // Tokenize the string
|
|
|
|
|
|
|
|
|
|
while (charCursor < charCount) {
|
|
|
|
|
var state = start;
|
|
|
|
|
var nextState = null;
|
|
|
|
|
var tokenLength = 0;
|
|
|
|
|
var latestAccepting = null;
|
|
|
|
|
var sinceAccepts = -1;
|
|
|
|
|
var charsSinceAccepts = -1;
|
|
|
|
|
|
|
|
|
|
while (charCursor < charCount && (nextState = takeT(state, iterable[charCursor]))) {
|
|
|
|
|
state = nextState; // Keep track of the latest accepting state
|
|
|
|
|
|
|
|
|
|
if (state.accepts()) {
|
|
|
|
|
sinceAccepts = 0;
|
|
|
|
|
charsSinceAccepts = 0;
|
|
|
|
|
latestAccepting = state;
|
|
|
|
|
} else if (sinceAccepts >= 0) {
|
|
|
|
|
sinceAccepts += iterable[charCursor].length;
|
|
|
|
|
charsSinceAccepts++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tokenLength += iterable[charCursor].length;
|
|
|
|
|
cursor += iterable[charCursor].length;
|
|
|
|
|
charCursor++;
|
|
|
|
|
} // Roll back to the latest accepting state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cursor -= sinceAccepts;
|
|
|
|
|
charCursor -= charsSinceAccepts;
|
|
|
|
|
tokenLength -= sinceAccepts; // No more jumps, just make a new token from the last accepting one
|
|
|
|
|
// TODO: If possible, don't output v, instead output range where values ocur
|
|
|
|
|
|
|
|
|
|
tokens.push({
|
|
|
|
|
t: latestAccepting.t,
|
|
|
|
|
// token type/name
|
|
|
|
|
v: str.substr(cursor - tokenLength, tokenLength),
|
|
|
|
|
// string value
|
|
|
|
|
s: cursor - tokenLength,
|
|
|
|
|
// start index
|
|
|
|
|
e: cursor // end index (excluding)
|
|
|
|
|
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return tokens;
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
* Convert a String to an Array of characters, taking into account that some
|
|
|
|
|
* characters like emojis take up two string indexes.
|
|
|
|
|
*
|
|
|
|
|
* Adapted from core-js (MIT license)
|
|
|
|
|
* https://github.com/zloirock/core-js/blob/2d69cf5f99ab3ea3463c395df81e5a15b68f49d9/packages/core-js/internals/string-multibyte.js
|
|
|
|
|
*
|
|
|
|
|
* @function stringToArray
|
|
|
|
|
* @param {string} str
|
|
|
|
|
* @returns {string[]}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function stringToArray(str) {
|
|
|
|
|
var result = [];
|
|
|
|
|
var len = str.length;
|
|
|
|
|
var index = 0;
|
|
|
|
|
|
|
|
|
|
while (index < len) {
|
|
|
|
|
var first = str.charCodeAt(index);
|
|
|
|
|
var second = void 0;
|
|
|
|
|
var char = first < 0xd800 || first > 0xdbff || index + 1 === len || (second = str.charCodeAt(index + 1)) < 0xdc00 || second > 0xdfff ? str[index] // single character
|
|
|
|
|
: str.slice(index, index + 2); // two-index characters
|
|
|
|
|
|
|
|
|
|
result.push(char);
|
|
|
|
|
index += char.length;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function _typeof(obj) {
|
|
|
|
|
"@babel/helpers - typeof";
|
|
|
|
|
|
|
|
|
|
if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") {
|
|
|
|
|
_typeof = function (obj) {
|
|
|
|
|
return typeof obj;
|
|
|
|
|
};
|
|
|
|
|
} else {
|
|
|
|
|
_typeof = function (obj) {
|
|
|
|
|
return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj;
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return _typeof(obj);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @property {string} defaultProtocol
|
|
|
|
|
* @property {{[string]: (event) => void}]} [events]
|
|
|
|
|
*/
|
|
|
|
|
var defaults = {
|
|
|
|
|
defaultProtocol: 'http',
|
|
|
|
|
events: null,
|
|
|
|
|
format: noop,
|
|
|
|
|
formatHref: noop,
|
|
|
|
|
nl2br: false,
|
|
|
|
|
tagName: 'a',
|
|
|
|
|
target: null,
|
|
|
|
|
rel: null,
|
|
|
|
|
validate: true,
|
|
|
|
|
truncate: 0,
|
|
|
|
|
className: null,
|
|
|
|
|
attributes: null,
|
|
|
|
|
ignoreTags: []
|
|
|
|
|
};
|
|
|
|
|
/**
|
|
|
|
|
* @class Options
|
|
|
|
|
* @param {Object} [opts] Set option properties besides the defaults
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function Options(opts) {
|
|
|
|
|
opts = opts || {};
|
|
|
|
|
this.defaultProtocol = 'defaultProtocol' in opts ? opts.defaultProtocol : defaults.defaultProtocol;
|
|
|
|
|
this.events = 'events' in opts ? opts.events : defaults.events;
|
|
|
|
|
this.format = 'format' in opts ? opts.format : defaults.format;
|
|
|
|
|
this.formatHref = 'formatHref' in opts ? opts.formatHref : defaults.formatHref;
|
|
|
|
|
this.nl2br = 'nl2br' in opts ? opts.nl2br : defaults.nl2br;
|
|
|
|
|
this.tagName = 'tagName' in opts ? opts.tagName : defaults.tagName;
|
|
|
|
|
this.target = 'target' in opts ? opts.target : defaults.target;
|
|
|
|
|
this.rel = 'rel' in opts ? opts.rel : defaults.rel;
|
|
|
|
|
this.validate = 'validate' in opts ? opts.validate : defaults.validate;
|
|
|
|
|
this.truncate = 'truncate' in opts ? opts.truncate : defaults.truncate;
|
|
|
|
|
this.className = 'className' in opts ? opts.className : defaults.className;
|
|
|
|
|
this.attributes = opts.attributes || defaults.attributes;
|
|
|
|
|
this.ignoreTags = []; // Make all tags names upper case
|
|
|
|
|
|
|
|
|
|
var ignoredTags = 'ignoreTags' in opts ? opts.ignoreTags : defaults.ignoreTags;
|
|
|
|
|
|
|
|
|
|
for (var i = 0; i < ignoredTags.length; i++) {
|
|
|
|
|
this.ignoreTags.push(ignoredTags[i].toUpperCase());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Options.prototype = {
|
|
|
|
|
/**
|
|
|
|
|
* Given the token, return all options for how it should be displayed
|
|
|
|
|
*/
|
|
|
|
|
resolve: function resolve(token) {
|
|
|
|
|
var href = token.toHref(this.defaultProtocol);
|
|
|
|
|
return {
|
|
|
|
|
formatted: this.get('format', token.toString(), token),
|
|
|
|
|
formattedHref: this.get('formatHref', href, token),
|
|
|
|
|
tagName: this.get('tagName', href, token),
|
|
|
|
|
className: this.get('className', href, token),
|
|
|
|
|
target: this.get('target', href, token),
|
|
|
|
|
rel: this.get('rel', href, token),
|
|
|
|
|
events: this.getObject('events', href, token),
|
|
|
|
|
attributes: this.getObject('attributes', href, token),
|
|
|
|
|
truncate: this.get('truncate', href, token)
|
|
|
|
|
};
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns true or false based on whether a token should be displayed as a
|
|
|
|
|
* link based on the user options. By default,
|
|
|
|
|
*/
|
|
|
|
|
check: function check(token) {
|
|
|
|
|
return this.get('validate', token.toString(), token);
|
|
|
|
|
},
|
|
|
|
|
// Private methods
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Resolve an option's value based on the value of the option and the given
|
|
|
|
|
* params.
|
|
|
|
|
* @param {string} key Name of option to use
|
|
|
|
|
* @param operator will be passed to the target option if it's method
|
|
|
|
|
* @param {MultiToken} token The token from linkify.tokenize
|
|
|
|
|
*/
|
|
|
|
|
get: function get(key, operator, token) {
|
|
|
|
|
var option = this[key];
|
|
|
|
|
|
|
|
|
|
if (!option) {
|
|
|
|
|
return option;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var optionValue;
|
|
|
|
|
|
|
|
|
|
switch (_typeof(option)) {
|
|
|
|
|
case 'function':
|
|
|
|
|
return option(operator, token.t);
|
|
|
|
|
|
|
|
|
|
case 'object':
|
|
|
|
|
optionValue = token.t in option ? option[token.t] : defaults[key];
|
|
|
|
|
return typeof optionValue === 'function' ? optionValue(operator, token.t) : optionValue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return option;
|
|
|
|
|
},
|
|
|
|
|
getObject: function getObject(key, operator, token) {
|
|
|
|
|
var option = this[key];
|
|
|
|
|
return typeof option === 'function' ? option(operator, token.t) : option;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
function noop(val) {
|
|
|
|
|
return val;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var options = /*#__PURE__*/Object.freeze({
|
|
|
|
|
__proto__: null,
|
|
|
|
|
defaults: defaults,
|
|
|
|
|
Options: Options
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
/******************************************************************************
|
|
|
|
|
Multi-Tokens
|
|
|
|
|
Tokens composed of arrays of TextTokens
|
|
|
|
|
******************************************************************************/
|
|
|
|
|
|
|
|
|
|
function inherits(parent, child) {
|
|
|
|
|
var props = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {};
|
|
|
|
|
var extended = Object.create(parent.prototype);
|
|
|
|
|
|
|
|
|
|
for (var p in props) {
|
|
|
|
|
extended[p] = props[p];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extended.constructor = child;
|
|
|
|
|
child.prototype = extended;
|
|
|
|
|
return child;
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
Abstract class used for manufacturing tokens of text tokens. That is rather
|
|
|
|
|
than the value for a token being a small string of text, it's value an array
|
|
|
|
|
of text tokens.
|
|
|
|
|
|
|
|
|
|
Used for grouping together URLs, emails, hashtags, and other potential
|
|
|
|
|
creations.
|
|
|
|
|
|
|
|
|
|
@class MultiToken
|
|
|
|
|
@param {string} value
|
|
|
|
|
@param {{t: string, v: string, s: number, e: number}[]} tokens
|
|
|
|
|
@abstract
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function MultiToken() {}
|
|
|
|
|
MultiToken.prototype = {
|
|
|
|
|
/**
|
|
|
|
|
String representing the type for this token
|
|
|
|
|
@property t
|
|
|
|
|
@default 'token'
|
|
|
|
|
*/
|
|
|
|
|
t: 'token',
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
Is this multitoken a link?
|
|
|
|
|
@property isLink
|
|
|
|
|
@default false
|
|
|
|
|
*/
|
|
|
|
|
isLink: false,
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
Return the string this token represents.
|
|
|
|
|
@method toString
|
|
|
|
|
@return {string}
|
|
|
|
|
*/
|
|
|
|
|
toString: function toString() {
|
|
|
|
|
return this.v;
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
What should the value for this token be in the `href` HTML attribute?
|
|
|
|
|
Returns the `.toString` value by default.
|
|
|
|
|
@method toHref
|
|
|
|
|
@return {string}
|
|
|
|
|
*/
|
|
|
|
|
toHref: function toHref() {
|
|
|
|
|
return this.toString();
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The start index of this token in the original input string
|
|
|
|
|
* @returns {number}
|
|
|
|
|
*/
|
|
|
|
|
startIndex: function startIndex() {
|
|
|
|
|
return this.tk[0].s;
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The end index of this token in the original input string (up to this
|
|
|
|
|
* index but not including it)
|
|
|
|
|
* @returns {number}
|
|
|
|
|
*/
|
|
|
|
|
endIndex: function endIndex() {
|
|
|
|
|
return this.tk[this.tk.length - 1].e;
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
Returns a hash of relevant values for this token, which includes keys
|
|
|
|
|
* type - Kind of token ('url', 'email', etc.)
|
|
|
|
|
* value - Original text
|
|
|
|
|
* href - The value that should be added to the anchor tag's href
|
|
|
|
|
attribute
|
|
|
|
|
@method toObject
|
|
|
|
|
@param {string} [protocol] `'http'` by default
|
|
|
|
|
*/
|
|
|
|
|
toObject: function toObject() {
|
|
|
|
|
var protocol = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : defaults.defaultProtocol;
|
|
|
|
|
return {
|
|
|
|
|
type: this.t,
|
|
|
|
|
value: this.v,
|
|
|
|
|
isLink: this.isLink,
|
|
|
|
|
href: this.toHref(protocol),
|
|
|
|
|
start: this.startIndex(),
|
|
|
|
|
end: this.endIndex()
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}; // Base token
|
|
|
|
|
/**
|
|
|
|
|
* Create a new token that can be emitted by the parser state machine
|
|
|
|
|
* @param {string} type readable type of the token
|
|
|
|
|
* @param {object} props properties to assign or override, including isLink = true or false
|
|
|
|
|
* @returns {(value: string, tokens: {t: string, v: string, s: number, e: number}) => MultiToken} new token class
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function createTokenClass(type, props) {
|
|
|
|
|
function Token(value, tokens) {
|
|
|
|
|
this.t = type;
|
|
|
|
|
this.v = value;
|
|
|
|
|
this.tk = tokens;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inherits(MultiToken, Token, props);
|
|
|
|
|
return Token;
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
Represents an arbitrarily mailto email address with the prefix included
|
|
|
|
|
@class MailtoEmail
|
|
|
|
|
@extends MultiToken
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var MailtoEmail = createTokenClass('email', {
|
|
|
|
|
isLink: true
|
|
|
|
|
});
|
|
|
|
|
/**
|
|
|
|
|
Represents a list of tokens making up a valid email address
|
|
|
|
|
@class Email
|
|
|
|
|
@extends MultiToken
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var Email = createTokenClass('email', {
|
|
|
|
|
isLink: true,
|
|
|
|
|
toHref: function toHref() {
|
|
|
|
|
return 'mailto:' + this.toString();
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
/**
|
|
|
|
|
Represents some plain text
|
|
|
|
|
@class Text
|
|
|
|
|
@extends MultiToken
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var Text = createTokenClass('text');
|
|
|
|
|
/**
|
|
|
|
|
Multi-linebreak token - represents a line break
|
|
|
|
|
@class Nl
|
|
|
|
|
@extends MultiToken
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var Nl = createTokenClass('nl');
|
|
|
|
|
/**
|
|
|
|
|
Represents a list of text tokens making up a valid URL
|
|
|
|
|
@class Url
|
|
|
|
|
@extends MultiToken
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
var Url = createTokenClass('url', {
|
|
|
|
|
isLink: true,
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
Lowercases relevant parts of the domain and adds the protocol if
|
|
|
|
|
required. Note that this will not escape unsafe HTML characters in the
|
|
|
|
|
URL.
|
|
|
|
|
@method href
|
|
|
|
|
@param {string} protocol
|
|
|
|
|
@return {string}
|
|
|
|
|
*/
|
|
|
|
|
toHref: function toHref() {
|
|
|
|
|
var protocol = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : defaults.defaultProtocol;
|
|
|
|
|
var tokens = this.tk;
|
|
|
|
|
var hasProtocol = false;
|
|
|
|
|
var hasSlashSlash = false;
|
|
|
|
|
var result = [];
|
|
|
|
|
var i = 0; // Make the first part of the domain lowercase
|
|
|
|
|
// Lowercase protocol
|
|
|
|
|
|
|
|
|
|
while (tokens[i].t === PROTOCOL) {
|
|
|
|
|
hasProtocol = true;
|
|
|
|
|
result.push(tokens[i].v);
|
|
|
|
|
i++;
|
|
|
|
|
} // Skip slash-slash
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while (tokens[i].t === SLASH) {
|
|
|
|
|
hasSlashSlash = true;
|
|
|
|
|
result.push(tokens[i].v);
|
|
|
|
|
i++;
|
|
|
|
|
} // Continue pushing characters
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (; i < tokens.length; i++) {
|
|
|
|
|
result.push(tokens[i].v);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result = result.join('');
|
|
|
|
|
|
|
|
|
|
if (!(hasProtocol || hasSlashSlash)) {
|
|
|
|
|
result = "".concat(protocol, "://").concat(result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
},
|
|
|
|
|
hasProtocol: function hasProtocol() {
|
|
|
|
|
return this.tk[0].t === PROTOCOL;
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
var multi = /*#__PURE__*/Object.freeze({
|
|
|
|
|
__proto__: null,
|
|
|
|
|
MultiToken: MultiToken,
|
|
|
|
|
Base: MultiToken,
|
|
|
|
|
createTokenClass: createTokenClass,
|
|
|
|
|
MailtoEmail: MailtoEmail,
|
|
|
|
|
Email: Email,
|
|
|
|
|
Text: Text,
|
|
|
|
|
Nl: Nl,
|
|
|
|
|
Url: Url
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
Not exactly parser, more like the second-stage scanner (although we can
|
|
|
|
|
theoretically hotswap the code here with a real parser in the future... but
|
|
|
|
|
for a little URL-finding utility abstract syntax trees may be a little
|
|
|
|
|
overkill).
|
|
|
|
|
|
|
|
|
|
URL format: http://en.wikipedia.org/wiki/URI_scheme
|
|
|
|
|
Email format: http://en.wikipedia.org/wiki/Email_address (links to RFC in
|
|
|
|
|
reference)
|
|
|
|
|
|
|
|
|
|
@module linkify
|
|
|
|
|
@submodule parser
|
|
|
|
|
@main run
|
|
|
|
|
*/
|
|
|
|
|
/**
|
|
|
|
|
* Generate the parser multi token-based state machine
|
|
|
|
|
* @returns {State} the starting state
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function init$1() {
|
|
|
|
|
// The universal starting state.
|
|
|
|
|
var S_START = makeState(); // Intermediate states for URLs. Note that domains that begin with a protocol
|
|
|
|
|
// are treated slighly differently from those that don't.
|
|
|
|
|
|
|
|
|
|
var S_PROTOCOL = makeState(); // e.g., 'http:'
|
|
|
|
|
|
|
|
|
|
var S_MAILTO = makeState(); // 'mailto:'
|
|
|
|
|
|
|
|
|
|
var S_PROTOCOL_SLASH = makeState(); // e.g., 'http:/''
|
|
|
|
|
|
|
|
|
|
var S_PROTOCOL_SLASH_SLASH = makeState(); // e.g.,'http://'
|
|
|
|
|
|
|
|
|
|
var S_DOMAIN = makeState(); // parsed string ends with a potential domain name (A)
|
|
|
|
|
|
|
|
|
|
var S_DOMAIN_DOT = makeState(); // (A) domain followed by DOT
|
|
|
|
|
|
|
|
|
|
var S_TLD = makeAcceptingState(Url); // (A) Simplest possible URL with no query string
|
|
|
|
|
|
|
|
|
|
var S_TLD_COLON = makeState(); // (A) URL followed by colon (potential port number here)
|
|
|
|
|
|
|
|
|
|
var S_TLD_PORT = makeAcceptingState(Url); // TLD followed by a port number
|
|
|
|
|
|
|
|
|
|
var S_URL = makeAcceptingState(Url); // Long URL with optional port and maybe query string
|
|
|
|
|
|
|
|
|
|
var S_URL_NON_ACCEPTING = makeState(); // URL followed by some symbols (will not be part of the final URL)
|
|
|
|
|
|
|
|
|
|
var S_URL_OPENBRACE = makeState(); // URL followed by {
|
|
|
|
|
|
|
|
|
|
var S_URL_OPENBRACKET = makeState(); // URL followed by [
|
|
|
|
|
|
|
|
|
|
var S_URL_OPENANGLEBRACKET = makeState(); // URL followed by <
|
|
|
|
|
|
|
|
|
|
var S_URL_OPENPAREN = makeState(); // URL followed by (
|
|
|
|
|
|
|
|
|
|
var S_URL_OPENBRACE_Q = makeAcceptingState(Url); // URL followed by { and some symbols that the URL can end it
|
|
|
|
|
|
|
|
|
|
var S_URL_OPENBRACKET_Q = makeAcceptingState(Url); // URL followed by [ and some symbols that the URL can end it
|
|
|
|
|
|
|
|
|
|
var S_URL_OPENANGLEBRACKET_Q = makeAcceptingState(Url); // URL followed by < and some symbols that the URL can end it
|
|
|
|
|
|
|
|
|
|
var S_URL_OPENPAREN_Q = makeAcceptingState(Url); // URL followed by ( and some symbols that the URL can end it
|
|
|
|
|
|
|
|
|
|
var S_URL_OPENBRACE_SYMS = makeState(); // S_URL_OPENBRACE_Q followed by some symbols it cannot end it
|
|
|
|
|
|
|
|
|
|
var S_URL_OPENBRACKET_SYMS = makeState(); // S_URL_OPENBRACKET_Q followed by some symbols it cannot end it
|
|
|
|
|
|
|
|
|
|
var S_URL_OPENANGLEBRACKET_SYMS = makeState(); // S_URL_OPENANGLEBRACKET_Q followed by some symbols it cannot end it
|
|
|
|
|
|
|
|
|
|
var S_URL_OPENPAREN_SYMS = makeState(); // S_URL_OPENPAREN_Q followed by some symbols it cannot end it
|
|
|
|
|
|
|
|
|
|
var S_EMAIL_DOMAIN = makeState(); // parsed string starts with local email info + @ with a potential domain name (C)
|
|
|
|
|
|
|
|
|
|
var S_EMAIL_DOMAIN_DOT = makeState(); // (C) domain followed by DOT
|
|
|
|
|
|
|
|
|
|
var S_EMAIL = makeAcceptingState(Email); // (C) Possible email address (could have more tlds)
|
|
|
|
|
|
|
|
|
|
var S_EMAIL_COLON = makeState(); // (C) URL followed by colon (potential port number here)
|
|
|
|
|
|
|
|
|
|
var S_EMAIL_PORT = makeAcceptingState(Email); // (C) Email address with a port
|
|
|
|
|
|
|
|
|
|
var S_MAILTO_EMAIL = makeAcceptingState(MailtoEmail); // Email that begins with the mailto prefix (D)
|
|
|
|
|
|
|
|
|
|
var S_MAILTO_EMAIL_NON_ACCEPTING = makeState(); // (D) Followed by some non-query string chars
|
|
|
|
|
|
|
|
|
|
var S_LOCALPART = makeState(); // Local part of the email address
|
|
|
|
|
|
|
|
|
|
var S_LOCALPART_AT = makeState(); // Local part of the email address plus @
|
|
|
|
|
|
|
|
|
|
var S_LOCALPART_DOT = makeState(); // Local part of the email address plus '.' (localpart cannot end in .)
|
|
|
|
|
|
|
|
|
|
var S_NL = makeAcceptingState(Nl); // single new line
|
|
|
|
|
// Make path from start to protocol (with '//')
|
|
|
|
|
|
|
|
|
|
makeT(S_START, NL, S_NL);
|
|
|
|
|
makeT(S_START, PROTOCOL, S_PROTOCOL);
|
|
|
|
|
makeT(S_START, MAILTO, S_MAILTO);
|
|
|
|
|
makeT(S_PROTOCOL, SLASH, S_PROTOCOL_SLASH);
|
|
|
|
|
makeT(S_PROTOCOL_SLASH, SLASH, S_PROTOCOL_SLASH_SLASH); // The very first potential domain name
|
|
|
|
|
|
|
|
|
|
makeT(S_START, TLD, S_DOMAIN);
|
|
|
|
|
makeT(S_START, DOMAIN, S_DOMAIN);
|
|
|
|
|
makeT(S_START, LOCALHOST, S_TLD);
|
|
|
|
|
makeT(S_START, NUM, S_DOMAIN); // Force URL for protocol followed by anything sane
|
|
|
|
|
|
|
|
|
|
makeT(S_PROTOCOL_SLASH_SLASH, TLD, S_URL);
|
|
|
|
|
makeT(S_PROTOCOL_SLASH_SLASH, DOMAIN, S_URL);
|
|
|
|
|
makeT(S_PROTOCOL_SLASH_SLASH, NUM, S_URL);
|
|
|
|
|
makeT(S_PROTOCOL_SLASH_SLASH, LOCALHOST, S_URL); // Account for dots and hyphens
|
|
|
|
|
// hyphens are usually parts of domain names
|
|
|
|
|
|
|
|
|
|
makeT(S_DOMAIN, DOT, S_DOMAIN_DOT);
|
|
|
|
|
makeT(S_EMAIL_DOMAIN, DOT, S_EMAIL_DOMAIN_DOT); // Hyphen can jump back to a domain name
|
|
|
|
|
// After the first domain and a dot, we can find either a URL or another domain
|
|
|
|
|
|
|
|
|
|
makeT(S_DOMAIN_DOT, TLD, S_TLD);
|
|
|
|
|
makeT(S_DOMAIN_DOT, DOMAIN, S_DOMAIN);
|
|
|
|
|
makeT(S_DOMAIN_DOT, NUM, S_DOMAIN);
|
|
|
|
|
makeT(S_DOMAIN_DOT, LOCALHOST, S_DOMAIN);
|
|
|
|
|
makeT(S_EMAIL_DOMAIN_DOT, TLD, S_EMAIL);
|
|
|
|
|
makeT(S_EMAIL_DOMAIN_DOT, DOMAIN, S_EMAIL_DOMAIN);
|
|
|
|
|
makeT(S_EMAIL_DOMAIN_DOT, NUM, S_EMAIL_DOMAIN);
|
|
|
|
|
makeT(S_EMAIL_DOMAIN_DOT, LOCALHOST, S_EMAIL_DOMAIN); // S_TLD accepts! But the URL could be longer, try to find a match greedily
|
|
|
|
|
// The `run` function should be able to "rollback" to the accepting state
|
|
|
|
|
|
|
|
|
|
makeT(S_TLD, DOT, S_DOMAIN_DOT);
|
|
|
|
|
makeT(S_EMAIL, DOT, S_EMAIL_DOMAIN_DOT); // Become real URLs after `SLASH` or `COLON NUM SLASH`
|
|
|
|
|
// Here PSS and non-PSS converge
|
|
|
|
|
|
|
|
|
|
makeT(S_TLD, COLON, S_TLD_COLON);
|
|
|
|
|
makeT(S_TLD, SLASH, S_URL);
|
|
|
|
|
makeT(S_TLD_COLON, NUM, S_TLD_PORT);
|
|
|
|
|
makeT(S_TLD_PORT, SLASH, S_URL);
|
|
|
|
|
makeT(S_EMAIL, COLON, S_EMAIL_COLON);
|
|
|
|
|
makeT(S_EMAIL_COLON, NUM, S_EMAIL_PORT); // Types of characters the URL can definitely end in
|
|
|
|
|
|
|
|
|
|
var qsAccepting = [AMPERSAND, ASTERISK, AT, BACKSLASH, BACKTICK, CARET, DOLLAR, DOMAIN, EQUALS, HYPHEN, LOCALHOST, NUM, PERCENT, PIPE, PLUS, POUND, PROTOCOL, SLASH, SYM, TILDE, TLD, UNDERSCORE]; // Types of tokens that can follow a URL and be part of the query string
|
|
|
|
|
// but cannot be the very last characters
|
|
|
|
|
// Characters that cannot appear in the URL at all should be excluded
|
|
|
|
|
|
|
|
|
|
var qsNonAccepting = [APOSTROPHE, CLOSEANGLEBRACKET, CLOSEBRACE, CLOSEBRACKET, CLOSEPAREN, COLON, COMMA, DOT, EXCLAMATION, OPENANGLEBRACKET, OPENBRACE, OPENBRACKET, OPENPAREN, QUERY, QUOTE, SEMI]; // These states are responsible primarily for determining whether or not to
|
|
|
|
|
// include the final round bracket.
|
|
|
|
|
// URL, followed by an opening bracket
|
|
|
|
|
|
|
|
|
|
makeT(S_URL, OPENBRACE, S_URL_OPENBRACE);
|
|
|
|
|
makeT(S_URL, OPENBRACKET, S_URL_OPENBRACKET);
|
|
|
|
|
makeT(S_URL, OPENANGLEBRACKET, S_URL_OPENANGLEBRACKET);
|
|
|
|
|
makeT(S_URL, OPENPAREN, S_URL_OPENPAREN); // URL with extra symbols at the end, followed by an opening bracket
|
|
|
|
|
|
|
|
|
|
makeT(S_URL_NON_ACCEPTING, OPENBRACE, S_URL_OPENBRACE);
|
|
|
|
|
makeT(S_URL_NON_ACCEPTING, OPENBRACKET, S_URL_OPENBRACKET);
|
|
|
|
|
makeT(S_URL_NON_ACCEPTING, OPENANGLEBRACKET, S_URL_OPENANGLEBRACKET);
|
|
|
|
|
makeT(S_URL_NON_ACCEPTING, OPENPAREN, S_URL_OPENPAREN); // Closing bracket component. This character WILL be included in the URL
|
|
|
|
|
|
|
|
|
|
makeT(S_URL_OPENBRACE, CLOSEBRACE, S_URL);
|
|
|
|
|
makeT(S_URL_OPENBRACKET, CLOSEBRACKET, S_URL);
|
|
|
|
|
makeT(S_URL_OPENANGLEBRACKET, CLOSEANGLEBRACKET, S_URL);
|
|
|
|
|
makeT(S_URL_OPENPAREN, CLOSEPAREN, S_URL);
|
|
|
|
|
makeT(S_URL_OPENBRACE_Q, CLOSEBRACE, S_URL);
|
|
|
|
|
makeT(S_URL_OPENBRACKET_Q, CLOSEBRACKET, S_URL);
|
|
|
|
|
makeT(S_URL_OPENANGLEBRACKET_Q, CLOSEANGLEBRACKET, S_URL);
|
|
|
|
|
makeT(S_URL_OPENPAREN_Q, CLOSEPAREN, S_URL);
|
|
|
|
|
makeT(S_URL_OPENBRACE_SYMS, CLOSEBRACE, S_URL);
|
|
|
|
|
makeT(S_URL_OPENBRACKET_SYMS, CLOSEBRACKET, S_URL);
|
|
|
|
|
makeT(S_URL_OPENANGLEBRACKET_SYMS, CLOSEANGLEBRACKET, S_URL);
|
|
|
|
|
makeT(S_URL_OPENPAREN_SYMS, CLOSEPAREN, S_URL); // URL that beings with an opening bracket, followed by a symbols.
|
|
|
|
|
// Note that the final state can still be `S_URL_OPENBRACE_Q` (if the URL only
|
|
|
|
|
// has a single opening bracket for some reason).
|
|
|
|
|
|
|
|
|
|
makeMultiT(S_URL_OPENBRACE, qsAccepting, S_URL_OPENBRACE_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENBRACKET, qsAccepting, S_URL_OPENBRACKET_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENANGLEBRACKET, qsAccepting, S_URL_OPENANGLEBRACKET_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENPAREN, qsAccepting, S_URL_OPENPAREN_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENBRACE, qsNonAccepting, S_URL_OPENBRACE_SYMS);
|
|
|
|
|
makeMultiT(S_URL_OPENBRACKET, qsNonAccepting, S_URL_OPENBRACKET_SYMS);
|
|
|
|
|
makeMultiT(S_URL_OPENANGLEBRACKET, qsNonAccepting, S_URL_OPENANGLEBRACKET_SYMS);
|
|
|
|
|
makeMultiT(S_URL_OPENPAREN, qsNonAccepting, S_URL_OPENPAREN_SYMS); // URL that begins with an opening bracket, followed by some symbols
|
|
|
|
|
|
|
|
|
|
makeMultiT(S_URL_OPENBRACE_Q, qsAccepting, S_URL_OPENBRACE_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENBRACKET_Q, qsAccepting, S_URL_OPENBRACKET_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENANGLEBRACKET_Q, qsAccepting, S_URL_OPENANGLEBRACKET_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENPAREN_Q, qsAccepting, S_URL_OPENPAREN_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENBRACE_Q, qsNonAccepting, S_URL_OPENBRACE_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENBRACKET_Q, qsNonAccepting, S_URL_OPENBRACKET_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENANGLEBRACKET_Q, qsNonAccepting, S_URL_OPENANGLEBRACKET_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENPAREN_Q, qsNonAccepting, S_URL_OPENPAREN_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENBRACE_SYMS, qsAccepting, S_URL_OPENBRACE_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENBRACKET_SYMS, qsAccepting, S_URL_OPENBRACKET_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENANGLEBRACKET_SYMS, qsAccepting, S_URL_OPENANGLEBRACKET_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENPAREN_SYMS, qsAccepting, S_URL_OPENPAREN_Q);
|
|
|
|
|
makeMultiT(S_URL_OPENBRACE_SYMS, qsNonAccepting, S_URL_OPENBRACE_SYMS);
|
|
|
|
|
makeMultiT(S_URL_OPENBRACKET_SYMS, qsNonAccepting, S_URL_OPENBRACKET_SYMS);
|
|
|
|
|
makeMultiT(S_URL_OPENANGLEBRACKET_SYMS, qsNonAccepting, S_URL_OPENANGLEBRACKET_SYMS);
|
|
|
|
|
makeMultiT(S_URL_OPENPAREN_SYMS, qsNonAccepting, S_URL_OPENPAREN_SYMS); // Account for the query string
|
|
|
|
|
|
|
|
|
|
makeMultiT(S_URL, qsAccepting, S_URL);
|
|
|
|
|
makeMultiT(S_URL_NON_ACCEPTING, qsAccepting, S_URL);
|
|
|
|
|
makeMultiT(S_URL, qsNonAccepting, S_URL_NON_ACCEPTING);
|
|
|
|
|
makeMultiT(S_URL_NON_ACCEPTING, qsNonAccepting, S_URL_NON_ACCEPTING); // Email address-specific state definitions
|
|
|
|
|
// Note: We are not allowing '/' in email addresses since this would interfere
|
|
|
|
|
// with real URLs
|
|
|
|
|
// For addresses with the mailto prefix
|
|
|
|
|
// 'mailto:' followed by anything sane is a valid email
|
|
|
|
|
|
|
|
|
|
makeT(S_MAILTO, TLD, S_MAILTO_EMAIL);
|
|
|
|
|
makeT(S_MAILTO, DOMAIN, S_MAILTO_EMAIL);
|
|
|
|
|
makeT(S_MAILTO, NUM, S_MAILTO_EMAIL);
|
|
|
|
|
makeT(S_MAILTO, LOCALHOST, S_MAILTO_EMAIL); // Greedily get more potential valid email values
|
|
|
|
|
|
|
|
|
|
makeMultiT(S_MAILTO_EMAIL, qsAccepting, S_MAILTO_EMAIL);
|
|
|
|
|
makeMultiT(S_MAILTO_EMAIL, qsNonAccepting, S_MAILTO_EMAIL_NON_ACCEPTING);
|
|
|
|
|
makeMultiT(S_MAILTO_EMAIL_NON_ACCEPTING, qsAccepting, S_MAILTO_EMAIL);
|
|
|
|
|
makeMultiT(S_MAILTO_EMAIL_NON_ACCEPTING, qsNonAccepting, S_MAILTO_EMAIL_NON_ACCEPTING); // For addresses without the mailto prefix
|
|
|
|
|
// Tokens allowed in the localpart of the email
|
|
|
|
|
|
|
|
|
|
var localpartAccepting = [AMPERSAND, APOSTROPHE, ASTERISK, BACKSLASH, BACKTICK, CARET, CLOSEBRACE, DOLLAR, DOMAIN, EQUALS, HYPHEN, NUM, OPENBRACE, PERCENT, PIPE, PLUS, POUND, QUERY, SLASH, SYM, TILDE, TLD, UNDERSCORE]; // Some of the tokens in `localpartAccepting` are already accounted for here and
|
|
|
|
|
// will not be overwritten (don't worry)
|
|
|
|
|
|
|
|
|
|
makeMultiT(S_DOMAIN, localpartAccepting, S_LOCALPART);
|
|
|
|
|
makeT(S_DOMAIN, AT, S_LOCALPART_AT);
|
|
|
|
|
makeMultiT(S_TLD, localpartAccepting, S_LOCALPART);
|
|
|
|
|
makeT(S_TLD, AT, S_LOCALPART_AT);
|
|
|
|
|
makeMultiT(S_DOMAIN_DOT, localpartAccepting, S_LOCALPART); // Now in localpart of address
|
|
|
|
|
// TODO: IP addresses and what if the email starts with numbers?
|
|
|
|
|
|
|
|
|
|
makeMultiT(S_LOCALPART, localpartAccepting, S_LOCALPART);
|
|
|
|
|
makeT(S_LOCALPART, AT, S_LOCALPART_AT); // close to an email address now
|
|
|
|
|
|
|
|
|
|
makeT(S_LOCALPART, DOT, S_LOCALPART_DOT);
|
|
|
|
|
makeMultiT(S_LOCALPART_DOT, localpartAccepting, S_LOCALPART);
|
|
|
|
|
makeT(S_LOCALPART_AT, TLD, S_EMAIL_DOMAIN);
|
|
|
|
|
makeT(S_LOCALPART_AT, DOMAIN, S_EMAIL_DOMAIN);
|
|
|
|
|
makeT(S_LOCALPART_AT, NUM, S_EMAIL_DOMAIN);
|
|
|
|
|
makeT(S_LOCALPART_AT, LOCALHOST, S_EMAIL); // States following `@` defined above
|
|
|
|
|
|
|
|
|
|
return S_START;
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
* Run the parser state machine on a list of scanned string-based tokens to
|
|
|
|
|
* create a list of multi tokens, each of which represents a URL, email address,
|
|
|
|
|
* plain text, etc.
|
|
|
|
|
*
|
|
|
|
|
* @param {State} start parser start state
|
|
|
|
|
* @param {string} input the original input used to generate the given tokens
|
|
|
|
|
* @param {{t: string, v: string, s: number, e: number}[]} tokens list of scanned tokens
|
|
|
|
|
* @returns {MultiToken[]}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function run(start, input, tokens) {
|
|
|
|
|
var len = tokens.length;
|
|
|
|
|
var cursor = 0;
|
|
|
|
|
var multis = [];
|
|
|
|
|
var textTokens = [];
|
|
|
|
|
|
|
|
|
|
while (cursor < len) {
|
|
|
|
|
var state = start;
|
|
|
|
|
var secondState = null;
|
|
|
|
|
var nextState = null;
|
|
|
|
|
var multiLength = 0;
|
|
|
|
|
var latestAccepting = null;
|
|
|
|
|
var sinceAccepts = -1;
|
|
|
|
|
|
|
|
|
|
while (cursor < len && !(secondState = takeT(state, tokens[cursor].t))) {
|
|
|
|
|
// Starting tokens with nowhere to jump to.
|
|
|
|
|
// Consider these to be just plain text
|
|
|
|
|
textTokens.push(tokens[cursor++]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (cursor < len && (nextState = secondState || takeT(state, tokens[cursor].t))) {
|
|
|
|
|
// Get the next state
|
|
|
|
|
secondState = null;
|
|
|
|
|
state = nextState; // Keep track of the latest accepting state
|
|
|
|
|
|
|
|
|
|
if (state.accepts()) {
|
|
|
|
|
sinceAccepts = 0;
|
|
|
|
|
latestAccepting = state;
|
|
|
|
|
} else if (sinceAccepts >= 0) {
|
|
|
|
|
sinceAccepts++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cursor++;
|
|
|
|
|
multiLength++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (sinceAccepts < 0) {
|
|
|
|
|
// No accepting state was found, part of a regular text token
|
|
|
|
|
// Add all the tokens we looked at to the text tokens array
|
|
|
|
|
for (var i = cursor - multiLength; i < cursor; i++) {
|
|
|
|
|
textTokens.push(tokens[i]);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// Accepting state!
|
|
|
|
|
// First close off the textTokens (if available)
|
|
|
|
|
if (textTokens.length > 0) {
|
|
|
|
|
multis.push(parserCreateMultiToken(Text, input, textTokens));
|
|
|
|
|
textTokens = [];
|
|
|
|
|
} // Roll back to the latest accepting state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cursor -= sinceAccepts;
|
|
|
|
|
multiLength -= sinceAccepts; // Create a new multitoken
|
|
|
|
|
|
|
|
|
|
var Multi = latestAccepting.t;
|
|
|
|
|
var subtokens = tokens.slice(cursor - multiLength, cursor);
|
|
|
|
|
multis.push(parserCreateMultiToken(Multi, input, subtokens));
|
|
|
|
|
}
|
|
|
|
|
} // Finally close off the textTokens (if available)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (textTokens.length > 0) {
|
|
|
|
|
multis.push(parserCreateMultiToken(Text, input, textTokens));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return multis;
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
* Utility function for instantiating a new multitoken with all the relevant
|
|
|
|
|
* fields during parsing.
|
|
|
|
|
* @param {Class<MultiToken>} Multi class to instantiate
|
|
|
|
|
* @param {string} input original input string
|
|
|
|
|
* @param {{t: string, v: string, s: number, e: number}[]} tokens consecutive tokens scanned from input string
|
|
|
|
|
* @returns {MultiToken}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function parserCreateMultiToken(Multi, input, tokens) {
|
|
|
|
|
var startIdx = tokens[0].s;
|
|
|
|
|
var endIdx = tokens[tokens.length - 1].e;
|
|
|
|
|
var value = input.substr(startIdx, endIdx - startIdx);
|
|
|
|
|
return new Multi(value, tokens);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var warn = typeof console !== 'undefined' && console && console.warn || function () {}; // Side-effect initialization state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var INIT = {
|
|
|
|
|
scanner: null,
|
|
|
|
|
parser: null,
|
|
|
|
|
pluginQueue: [],
|
|
|
|
|
customProtocols: [],
|
|
|
|
|
initialized: false
|
|
|
|
|
};
|
|
|
|
|
/**
|
|
|
|
|
* De-register all plugins and reset the internal state-machine. Used for
|
|
|
|
|
* testing; not required in practice.
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function reset() {
|
|
|
|
|
INIT.scanner = null;
|
|
|
|
|
INIT.parser = null;
|
|
|
|
|
INIT.pluginQueue = [];
|
|
|
|
|
INIT.customProtocols = [];
|
|
|
|
|
INIT.initialized = false;
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
* Register a linkify extension plugin
|
|
|
|
|
* @param {string} name of plugin to register
|
|
|
|
|
* @param {Function} plugin function that accepts mutable linkify state
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function registerPlugin(name, plugin) {
|
|
|
|
|
for (var i = 0; i < INIT.pluginQueue.length; i++) {
|
|
|
|
|
if (name === INIT.pluginQueue[i][0]) {
|
|
|
|
|
warn("linkifyjs: plugin \"".concat(name, "\" already registered - will be overwritten"));
|
|
|
|
|
INIT.pluginQueue[i] = [name, plugin];
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
INIT.pluginQueue.push([name, plugin]);
|
|
|
|
|
|
|
|
|
|
if (INIT.initialized) {
|
|
|
|
|
warn("linkifyjs: already initialized - will not register plugin \"".concat(name, "\" until you manually call linkify.init(). To avoid this warning, please register all plugins before invoking linkify the first time."));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
* Detect URLs with the following additional protocol. Anything following
|
|
|
|
|
* "protocol:" will be considered a link.
|
|
|
|
|
* @param {string} protocol
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function registerCustomProtocol(protocol) {
|
|
|
|
|
if (INIT.initialized) {
|
|
|
|
|
warn("linkifyjs: already initialized - will not register custom protocol \"".concat(protocol, "\" until you manually call linkify.init(). To avoid this warning, please register all custom protocols before invoking linkify the first time."));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!/^[a-z-]+$/.test(protocol)) {
|
|
|
|
|
throw Error('linkifyjs: protocols containing characters other than a-z or - (hyphen) are not supported');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
INIT.customProtocols.push(protocol);
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
* Initialize the linkify state machine. Called automatically the first time
|
|
|
|
|
* linkify is called on a string, but may be called manually as well.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function init() {
|
|
|
|
|
// Initialize state machines
|
|
|
|
|
INIT.scanner = {
|
|
|
|
|
start: init$2(INIT.customProtocols),
|
|
|
|
|
tokens: text
|
|
|
|
|
};
|
|
|
|
|
INIT.parser = {
|
|
|
|
|
start: init$1(),
|
|
|
|
|
tokens: multi
|
|
|
|
|
};
|
|
|
|
|
var utils = {
|
|
|
|
|
createTokenClass: createTokenClass
|
|
|
|
|
}; // Initialize plugins
|
|
|
|
|
|
|
|
|
|
for (var i = 0; i < INIT.pluginQueue.length; i++) {
|
|
|
|
|
INIT.pluginQueue[i][1]({
|
|
|
|
|
scanner: INIT.scanner,
|
|
|
|
|
parser: INIT.parser,
|
|
|
|
|
utils: utils
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
INIT.initialized = true;
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
Parse a string into tokens that represent linkable and non-linkable sub-components
|
|
|
|
|
@param {string} str
|
|
|
|
|
@return {MultiToken[]} tokens
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function tokenize(str) {
|
|
|
|
|
if (!INIT.initialized) {
|
|
|
|
|
init();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return run(INIT.parser.start, str, run$1(INIT.scanner.start, str));
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
Find a list of linkable items in the given string.
|
|
|
|
|
@param {string} str string to find links in
|
|
|
|
|
@param {string} [type] (optional) only find links of a specific type, e.g.,
|
|
|
|
|
'url' or 'email'
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function find(str) {
|
|
|
|
|
var type = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null;
|
|
|
|
|
var tokens = tokenize(str);
|
|
|
|
|
var filtered = [];
|
|
|
|
|
|
|
|
|
|
for (var i = 0; i < tokens.length; i++) {
|
|
|
|
|
var token = tokens[i];
|
|
|
|
|
|
|
|
|
|
if (token.isLink && (!type || token.t === type)) {
|
|
|
|
|
filtered.push(token.toObject());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return filtered;
|
|
|
|
|
}
|
|
|
|
|
/**
|
|
|
|
|
* Is the given string valid linkable text of some sort. Note that this does not
|
|
|
|
|
* trim the text for you.
|
|
|
|
|
*
|
|
|
|
|
* Optionally pass in a second `type` param, which is the type of link to test
|
|
|
|
|
* for.
|
|
|
|
|
*
|
|
|
|
|
* For example,
|
|
|
|
|
*
|
|
|
|
|
* linkify.test(str, 'email');
|
|
|
|
|
*
|
|
|
|
|
* Returns `true` if str is a valid email.
|
|
|
|
|
* @param {string} str string to test for links
|
|
|
|
|
* @param {string} [type] optional specific link type to look for
|
|
|
|
|
* @returns boolean true/false
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
function test(str) {
|
|
|
|
|
var type = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null;
|
|
|
|
|
var tokens = tokenize(str);
|
|
|
|
|
return tokens.length === 1 && tokens[0].isLink && (!type || tokens[0].t === type);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export { Options, find, init, options, registerCustomProtocol, registerPlugin, reset, test, tokenize };
|