You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2962 lines
68 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/**
* Finite State Machine generation utilities
*/
/**
* Define a basic state machine state. j is the list of character transitions,
* jr is the list of regex-match transitions, jd is the default state to
* transition to t is the accepting token type, if any. If this is the terminal
* state, then it does not emit a token.
* @param {string|class} token to emit
*/
function State(token) {
this.j = {}; // IMPLEMENTATION 1
// this.j = []; // IMPLEMENTATION 2
this.jr = [];
this.jd = null;
this.t = token;
}
/**
* Take the transition from this state to the next one on the given input.
* If this state does not exist deterministically, will create it.
*
* @param {string} input character or token to transition on
* @param {string|class} [token] token or multi-token to emit when reaching
* this state
*/
State.prototype = {
/**
* @param {State} state
*/
accepts: function accepts() {
return !!this.t;
},
/**
* Short for "take transition", this is a method for building/working with
* state machines.
*
* If a state already exists for the given input, returns it.
*
* If a token is specified, that state will emit that token when reached by
* the linkify engine.
*
* If no state exists, it will be initialized with some default transitions
* that resemble existing default transitions.
*
* If a state is given for the second argument, that state will be
* transitioned to on the given input regardless of what that input
* previously did.
*
* @param {string} input character or token to transition on
* @param {Token|State} tokenOrState transition to a matching state
* @returns State taken after the given input
*/
tt: function tt(input, tokenOrState) {
if (tokenOrState && tokenOrState.j) {
// State, default a basic transition
this.j[input] = tokenOrState;
return tokenOrState;
} // See if there's a direct state transition (not regex or default)
var token = tokenOrState;
var nextState = this.j[input];
if (nextState) {
if (token) {
nextState.t = token;
} // overrwites previous token
return nextState;
} // Create a new state for this input
nextState = makeState(); // Take the transition using the usual default mechanisms
var templateState = takeT(this, input);
if (templateState) {
// Some default state transition, make a prime state based on this one
Object.assign(nextState.j, templateState.j);
nextState.jr.append(templateState.jr);
nextState.jr = templateState.jd;
nextState.t = token || templateState.t;
} else {
nextState.t = token;
}
this.j[input] = nextState;
return nextState;
}
};
/**
* Utility function to create state without using new keyword (reduced file size
* when minified)
*/
var makeState = function makeState() {
return new State();
};
/**
* Similar to previous except it is an accepting state that emits a token
* @param {Token} token
*/
var makeAcceptingState = function makeAcceptingState(token) {
return new State(token);
};
/**
* Create a transition from startState to nextState via the given character
* @param {State} startState transition from thie starting state
* @param {Token} input via this input character or other concrete token type
* @param {State} nextState to this next state
*/
var makeT = function makeT(startState, input, nextState) {
// IMPLEMENTATION 1: Add to object (fast)
if (!startState.j[input]) {
startState.j[input] = nextState;
} // IMPLEMENTATION 2: Add to array (slower)
// startState.j.push([input, nextState]);
};
/**
*
* @param {State} startState stransition from this starting state
* @param {RegExp} regex Regular expression to match on input
* @param {State} nextState transition to this next state if there's are regex match
*/
var makeRegexT = function makeRegexT(startState, regex, nextState) {
startState.jr.push([regex, nextState]);
};
/**
* Follow the transition from the given character to the next state
* @param {State} state
* @param {Token} input character or other concrete token type to transition
* @returns {?State} the next state, if any
*/
var takeT = function takeT(state, input) {
// IMPLEMENTATION 1: Object key lookup (faster)
var nextState = state.j[input];
if (nextState) {
return nextState;
} // IMPLEMENTATION 2: List lookup (slower)
// Loop through all the state transitions and see if there's a match
// for (let i = 0; i < state.j.length; i++) {
// const val = state.j[i][0];
// const nextState = state.j[i][1];
// if (input === val) { return nextState; }
// }
for (var i = 0; i < state.jr.length; i++) {
var regex = state.jr[i][0];
var _nextState = state.jr[i][1];
if (regex.test(input)) {
return _nextState;
}
} // Nowhere left to jump! Return default, if any
return state.jd;
};
/**
* Similar to makeT, but takes a list of characters that all transition to the
* same nextState startState
* @param {State} startState
* @param {Array} chars
* @param {State} nextState
*/
var makeMultiT = function makeMultiT(startState, chars, nextState) {
for (var i = 0; i < chars.length; i++) {
makeT(startState, chars[i], nextState);
}
};
/**
* Set up a list of multiple transitions at once. transitions is a list of
* tuples, where the first element is the transitions character and the second
* is the state to transition to
* @param {State} startState
* @param {Array} transitions
*/
var makeBatchT = function makeBatchT(startState, transitions) {
for (var i = 0; i < transitions.length; i++) {
var input = transitions[i][0];
var nextState = transitions[i][1];
makeT(startState, input, nextState);
}
};
/**
* For state machines that transition on characters only; given a non-empty
* target string, generates states (if required) for each consecutive substring
* of characters starting from the beginning of the string. The final state will
* have a special value, as specified in options. All other "in between"
* substrings will have a default end state.
*
* This turns the state machine into a Trie-like data structure (rather than a
* intelligently-designed DFA).
* @param {State} state
* @param {string} str
* @param {Token} endStateFactory
* @param {Token} defaultStateFactory
*/
var makeChainT = function makeChainT(state, str, endState, defaultStateFactory) {
var i = 0,
len = str.length,
nextState; // Find the next state without a jump to the next character
while (i < len && (nextState = state.j[str[i]])) {
state = nextState;
i++;
}
if (i >= len) {
return [];
} // no new tokens were added
while (i < len - 1) {
nextState = defaultStateFactory();
makeT(state, str[i], nextState);
state = nextState;
i++;
}
makeT(state, str[len - 1], endState);
};
/******************************************************************************
Text Tokens
Tokens composed of strings
******************************************************************************/
// A valid web domain token
var DOMAIN = 'DOMAIN';
var LOCALHOST = 'LOCALHOST'; // special case of domain
// Valid top-level domain (see tlds.js)
var TLD = 'TLD'; // Any sequence of digits 0-9
var NUM = 'NUM'; // A web URL protocol. Supported types include
// - `http:`
// - `https:`
// - `ftp:`
// - `ftps:`
// - user-defined custom protocols
var PROTOCOL = 'PROTOCOL'; // Start of the email URI protocol
var MAILTO = 'MAILTO'; // mailto:
// Any number of consecutive whitespace characters that are not newline
var WS = 'WS'; // New line (unix style)
var NL = 'NL'; // \n
// Opening/closing bracket classes
var OPENBRACE = 'OPENBRACE'; // {
var OPENBRACKET = 'OPENBRACKET'; // [
var OPENANGLEBRACKET = 'OPENANGLEBRACKET'; // <
var OPENPAREN = 'OPENPAREN'; // (
var CLOSEBRACE = 'CLOSEBRACE'; // }
var CLOSEBRACKET = 'CLOSEBRACKET'; // ]
var CLOSEANGLEBRACKET = 'CLOSEANGLEBRACKET'; // >
var CLOSEPAREN = 'CLOSEPAREN'; // )
// Various symbols
var AMPERSAND = 'AMPERSAND'; // &
var APOSTROPHE = 'APOSTROPHE'; // '
var ASTERISK = 'ASTERISK'; // *
var AT = 'AT'; // @
var BACKSLASH = 'BACKSLASH'; // \
var BACKTICK = 'BACKTICK'; // `
var CARET = 'CARET'; // ^
var COLON = 'COLON'; // :
var COMMA = 'COMMA'; // ,
var DOLLAR = 'DOLLAR'; // $
var DOT = 'DOT'; // .
var EQUALS = 'EQUALS'; // =
var EXCLAMATION = 'EXCLAMATION'; // !
var HYPHEN = 'HYPHEN'; // -
var PERCENT = 'PERCENT'; // %
var PIPE = 'PIPE'; // |
var PLUS = 'PLUS'; // +
var POUND = 'POUND'; // #
var QUERY = 'QUERY'; // ?
var QUOTE = 'QUOTE'; // "
var SEMI = 'SEMI'; // ;
var SLASH = 'SLASH'; // /
var TILDE = 'TILDE'; // ~
var UNDERSCORE = 'UNDERSCORE'; // _
// Default token - anything that is not one of the above
var SYM = 'SYM';
var text = /*#__PURE__*/Object.freeze({
__proto__: null,
DOMAIN: DOMAIN,
LOCALHOST: LOCALHOST,
TLD: TLD,
NUM: NUM,
PROTOCOL: PROTOCOL,
MAILTO: MAILTO,
WS: WS,
NL: NL,
OPENBRACE: OPENBRACE,
OPENBRACKET: OPENBRACKET,
OPENANGLEBRACKET: OPENANGLEBRACKET,
OPENPAREN: OPENPAREN,
CLOSEBRACE: CLOSEBRACE,
CLOSEBRACKET: CLOSEBRACKET,
CLOSEANGLEBRACKET: CLOSEANGLEBRACKET,
CLOSEPAREN: CLOSEPAREN,
AMPERSAND: AMPERSAND,
APOSTROPHE: APOSTROPHE,
ASTERISK: ASTERISK,
AT: AT,
BACKSLASH: BACKSLASH,
BACKTICK: BACKTICK,
CARET: CARET,
COLON: COLON,
COMMA: COMMA,
DOLLAR: DOLLAR,
DOT: DOT,
EQUALS: EQUALS,
EXCLAMATION: EXCLAMATION,
HYPHEN: HYPHEN,
PERCENT: PERCENT,
PIPE: PIPE,
PLUS: PLUS,
POUND: POUND,
QUERY: QUERY,
QUOTE: QUOTE,
SEMI: SEMI,
SLASH: SLASH,
TILDE: TILDE,
UNDERSCORE: UNDERSCORE,
SYM: SYM
});
// NOTE: punycode versions of IDNs are not included here because these will not
// be as commonly used without the http prefix anyway and linkify will already
// force-encode those.
// To be updated with the values in this list
// http://data.iana.org/TLD/tlds-alpha-by-domain.txt
// Version 2021022800, Last Updated Sun Feb 28 07:07:01 2021 UTC
var tlds = 'aaa \
aarp \
abarth \
abb \
abbott \
abbvie \
abc \
able \
abogado \
abudhabi \
ac \
academy \
accenture \
accountant \
accountants \
aco \
actor \
ad \
adac \
ads \
adult \
ae \
aeg \
aero \
aetna \
af \
afamilycompany \
afl \
africa \
ag \
agakhan \
agency \
ai \
aig \
airbus \
airforce \
airtel \
akdn \
al \
alfaromeo \
alibaba \
alipay \
allfinanz \
allstate \
ally \
alsace \
alstom \
am \
amazon \
americanexpress \
americanfamily \
amex \
amfam \
amica \
amsterdam \
analytics \
android \
anquan \
anz \
ao \
aol \
apartments \
app \
apple \
aq \
aquarelle \
ar \
arab \
aramco \
archi \
army \
arpa \
art \
arte \
as \
asda \
asia \
associates \
at \
athleta \
attorney \
au \
auction \
audi \
audible \
audio \
auspost \
author \
auto \
autos \
avianca \
aw \
aws \
ax \
axa \
az \
azure \
ba \
baby \
baidu \
banamex \
bananarepublic \
band \
bank \
bar \
barcelona \
barclaycard \
barclays \
barefoot \
bargains \
baseball \
basketball \
bauhaus \
bayern \
bb \
bbc \
bbt \
bbva \
bcg \
bcn \
bd \
be \
beats \
beauty \
beer \
bentley \
berlin \
best \
bestbuy \
bet \
bf \
bg \
bh \
bharti \
bi \
bible \
bid \
bike \
bing \
bingo \
bio \
biz \
bj \
black \
blackfriday \
blockbuster \
blog \
bloomberg \
blue \
bm \
bms \
bmw \
bn \
bnpparibas \
bo \
boats \
boehringer \
bofa \
bom \
bond \
boo \
book \
booking \
bosch \
bostik \
boston \
bot \
boutique \
box \
br \
bradesco \
bridgestone \
broadway \
broker \
brother \
brussels \
bs \
bt \
budapest \
bugatti \
build \
builders \
business \
buy \
buzz \
bv \
bw \
by \
bz \
bzh \
ca \
cab \
cafe \
cal \
call \
calvinklein \
cam \
camera \
camp \
cancerresearch \
canon \
capetown \
capital \
capitalone \
car \
caravan \
cards \
care \
career \
careers \
cars \
casa \
case \
cash \
casino \
cat \
catering \
catholic \
cba \
cbn \
cbre \
cbs \
cc \
cd \
center \
ceo \
cern \
cf \
cfa \
cfd \
cg \
ch \
chanel \
channel \
charity \
chase \
chat \
cheap \
chintai \
christmas \
chrome \
church \
ci \
cipriani \
circle \
cisco \
citadel \
citi \
citic \
city \
cityeats \
ck \
cl \
claims \
cleaning \
click \
clinic \
clinique \
clothing \
cloud \
club \
clubmed \
cm \
cn \
co \
coach \
codes \
coffee \
college \
cologne \
com \
comcast \
commbank \
community \
company \
compare \
computer \
comsec \
condos \
construction \
consulting \
contact \
contractors \
cooking \
cookingchannel \
cool \
coop \
corsica \
country \
coupon \
coupons \
courses \
cpa \
cr \
credit \
creditcard \
creditunion \
cricket \
crown \
crs \
cruise \
cruises \
csc \
cu \
cuisinella \
cv \
cw \
cx \
cy \
cymru \
cyou \
cz \
dabur \
dad \
dance \
data \
date \
dating \
datsun \
day \
dclk \
dds \
de \
deal \
dealer \
deals \
degree \
delivery \
dell \
deloitte \
delta \
democrat \
dental \
dentist \
desi \
design \
dev \
dhl \
diamonds \
diet \
digital \
direct \
directory \
discount \
discover \
dish \
diy \
dj \
dk \
dm \
dnp \
do \
docs \
doctor \
dog \
domains \
dot \
download \
drive \
dtv \
dubai \
duck \
dunlop \
dupont \
durban \
dvag \
dvr \
dz \
earth \
eat \
ec \
eco \
edeka \
edu \
education \
ee \
eg \
email \
emerck \
energy \
engineer \
engineering \
enterprises \
epson \
equipment \
er \
ericsson \
erni \
es \
esq \
estate \
et \
etisalat \
eu \
eurovision \
eus \
events \
exchange \
expert \
exposed \
express \
extraspace \
fage \
fail \
fairwinds \
faith \
family \
fan \
fans \
farm \
farmers \
fashion \
fast \
fedex \
feedback \
ferrari \
ferrero \
fi \
fiat \
fidelity \
fido \
film \
final \
finance \
financial \
fire \
firestone \
firmdale \
fish \
fishing \
fit \
fitness \
fj \
fk \
flickr \
flights \
flir \
florist \
flowers \
fly \
fm \
fo \
foo \
food \
foodnetwork \
football \
ford \
forex \
forsale \
forum \
foundation \
fox \
fr \
free \
fresenius \
frl \
frogans \
frontdoor \
frontier \
ftr \
fujitsu \
fujixerox \
fun \
fund \
furniture \
futbol \
fyi \
ga \
gal \
gallery \
gallo \
gallup \
game \
games \
gap \
garden \
gay \
gb \
gbiz \
gd \
gdn \
ge \
gea \
gent \
genting \
george \
gf \
gg \
ggee \
gh \
gi \
gift \
gifts \
gives \
giving \
gl \
glade \
glass \
gle \
global \
globo \
gm \
gmail \
gmbh \
gmo \
gmx \
gn \
godaddy \
gold \
goldpoint \
golf \
goo \
goodyear \
goog \
google \
gop \
got \
gov \
gp \
gq \
gr \
grainger \
graphics \
gratis \
green \
gripe \
grocery \
group \
gs \
gt \
gu \
guardian \
gucci \
guge \
guide \
guitars \
guru \
gw \
gy \
hair \
hamburg \
hangout \
haus \
hbo \
hdfc \
hdfcbank \
health \
healthcare \
help \
helsinki \
here \
hermes \
hgtv \
hiphop \
hisamitsu \
hitachi \
hiv \
hk \
hkt \
hm \
hn \
hockey \
holdings \
holiday \
homedepot \
homegoods \
homes \
homesense \
honda \
horse \
hospital \
host \
hosting \
hot \
hoteles \
hotels \
hotmail \
house \
how \
hr \
hsbc \
ht \
hu \
hughes \
hyatt \
hyundai \
ibm \
icbc \
ice \
icu \
id \
ie \
ieee \
ifm \
ikano \
il \
im \
imamat \
imdb \
immo \
immobilien \
in \
inc \
industries \
infiniti \
info \
ing \
ink \
institute \
insurance \
insure \
int \
international \
intuit \
investments \
io \
ipiranga \
iq \
ir \
irish \
is \
ismaili \
ist \
istanbul \
it \
itau \
itv \
iveco \
jaguar \
java \
jcb \
je \
jeep \
jetzt \
jewelry \
jio \
jll \
jm \
jmp \
jnj \
jo \
jobs \
joburg \
jot \
joy \
jp \
jpmorgan \
jprs \
juegos \
juniper \
kaufen \
kddi \
ke \
kerryhotels \
kerrylogistics \
kerryproperties \
kfh \
kg \
kh \
ki \
kia \
kim \
kinder \
kindle \
kitchen \
kiwi \
km \
kn \
koeln \
komatsu \
kosher \
kp \
kpmg \
kpn \
kr \
krd \
kred \
kuokgroup \
kw \
ky \
kyoto \
kz \
la \
lacaixa \
lamborghini \
lamer \
lancaster \
lancia \
land \
landrover \
lanxess \
lasalle \
lat \
latino \
latrobe \
law \
lawyer \
lb \
lc \
lds \
lease \
leclerc \
lefrak \
legal \
lego \
lexus \
lgbt \
li \
lidl \
life \
lifeinsurance \
lifestyle \
lighting \
like \
lilly \
limited \
limo \
lincoln \
linde \
link \
lipsy \
live \
living \
lixil \
lk \
llc \
llp \
loan \
loans \
locker \
locus \
loft \
lol \
london \
lotte \
lotto \
love \
lpl \
lplfinancial \
lr \
ls \
lt \
ltd \
ltda \
lu \
lundbeck \
luxe \
luxury \
lv \
ly \
ma \
macys \
madrid \
maif \
maison \
makeup \
man \
management \
mango \
map \
market \
marketing \
markets \
marriott \
marshalls \
maserati \
mattel \
mba \
mc \
mckinsey \
md \
me \
med \
media \
meet \
melbourne \
meme \
memorial \
men \
menu \
merckmsd \
mg \
mh \
miami \
microsoft \
mil \
mini \
mint \
mit \
mitsubishi \
mk \
ml \
mlb \
mls \
mm \
mma \
mn \
mo \
mobi \
mobile \
moda \
moe \
moi \
mom \
monash \
money \
monster \
mormon \
mortgage \
moscow \
moto \
motorcycles \
mov \
movie \
mp \
mq \
mr \
ms \
msd \
mt \
mtn \
mtr \
mu \
museum \
mutual \
mv \
mw \
mx \
my \
mz \
na \
nab \
nagoya \
name \
nationwide \
natura \
navy \
nba \
nc \
ne \
nec \
net \
netbank \
netflix \
network \
neustar \
new \
news \
next \
nextdirect \
nexus \
nf \
nfl \
ng \
ngo \
nhk \
ni \
nico \
nike \
nikon \
ninja \
nissan \
nissay \
nl \
no \
nokia \
northwesternmutual \
norton \
now \
nowruz \
nowtv \
np \
nr \
nra \
nrw \
ntt \
nu \
nyc \
nz \
obi \
observer \
off \
office \
okinawa \
olayan \
olayangroup \
oldnavy \
ollo \
om \
omega \
one \
ong \
onl \
online \
onyourside \
ooo \
open \
oracle \
orange \
org \
organic \
origins \
osaka \
otsuka \
ott \
ovh \
pa \
page \
panasonic \
paris \
pars \
partners \
parts \
party \
passagens \
pay \
pccw \
pe \
pet \
pf \
pfizer \
pg \
ph \
pharmacy \
phd \
philips \
phone \
photo \
photography \
photos \
physio \
pics \
pictet \
pictures \
pid \
pin \
ping \
pink \
pioneer \
pizza \
pk \
pl \
place \
play \
playstation \
plumbing \
plus \
pm \
pn \
pnc \
pohl \
poker \
politie \
porn \
post \
pr \
pramerica \
praxi \
press \
prime \
pro \
prod \
productions \
prof \
progressive \
promo \
properties \
property \
protection \
pru \
prudential \
ps \
pt \
pub \
pw \
pwc \
py \
qa \
qpon \
quebec \
quest \
qvc \
racing \
radio \
raid \
re \
read \
realestate \
realtor \
realty \
recipes \
red \
redstone \
redumbrella \
rehab \
reise \
reisen \
reit \
reliance \
ren \
rent \
rentals \
repair \
report \
republican \
rest \
restaurant \
review \
reviews \
rexroth \
rich \
richardli \
ricoh \
ril \
rio \
rip \
rmit \
ro \
rocher \
rocks \
rodeo \
rogers \
room \
rs \
rsvp \
ru \
rugby \
ruhr \
run \
rw \
rwe \
ryukyu \
sa \
saarland \
safe \
safety \
sakura \
sale \
salon \
samsclub \
samsung \
sandvik \
sandvikcoromant \
sanofi \
sap \
sarl \
sas \
save \
saxo \
sb \
sbi \
sbs \
sc \
sca \
scb \
schaeffler \
schmidt \
scholarships \
school \
schule \
schwarz \
science \
scjohnson \
scot \
sd \
se \
search \
seat \
secure \
security \
seek \
select \
sener \
services \
ses \
seven \
sew \
sex \
sexy \
sfr \
sg \
sh \
shangrila \
sharp \
shaw \
shell \
shia \
shiksha \
shoes \
shop \
shopping \
shouji \
show \
showtime \
si \
silk \
sina \
singles \
site \
sj \
sk \
ski \
skin \
sky \
skype \
sl \
sling \
sm \
smart \
smile \
sn \
sncf \
so \
soccer \
social \
softbank \
software \
sohu \
solar \
solutions \
song \
sony \
soy \
spa \
space \
sport \
spot \
spreadbetting \
sr \
srl \
ss \
st \
stada \
staples \
star \
statebank \
statefarm \
stc \
stcgroup \
stockholm \
storage \
store \
stream \
studio \
study \
style \
su \
sucks \
supplies \
supply \
support \
surf \
surgery \
suzuki \
sv \
swatch \
swiftcover \
swiss \
sx \
sy \
sydney \
systems \
sz \
tab \
taipei \
talk \
taobao \
target \
tatamotors \
tatar \
tattoo \
tax \
taxi \
tc \
tci \
td \
tdk \
team \
tech \
technology \
tel \
temasek \
tennis \
teva \
tf \
tg \
th \
thd \
theater \
theatre \
tiaa \
tickets \
tienda \
tiffany \
tips \
tires \
tirol \
tj \
tjmaxx \
tjx \
tk \
tkmaxx \
tl \
tm \
tmall \
tn \
to \
today \
tokyo \
tools \
top \
toray \
toshiba \
total \
tours \
town \
toyota \
toys \
tr \
trade \
trading \
training \
travel \
travelchannel \
travelers \
travelersinsurance \
trust \
trv \
tt \
tube \
tui \
tunes \
tushu \
tv \
tvs \
tw \
tz \
ua \
ubank \
ubs \
ug \
uk \
unicom \
university \
uno \
uol \
ups \
us \
uy \
uz \
va \
vacations \
vana \
vanguard \
vc \
ve \
vegas \
ventures \
verisign \
versicherung \
vet \
vg \
vi \
viajes \
video \
vig \
viking \
villas \
vin \
vip \
virgin \
visa \
vision \
viva \
vivo \
vlaanderen \
vn \
vodka \
volkswagen \
volvo \
vote \
voting \
voto \
voyage \
vu \
vuelos \
wales \
walmart \
walter \
wang \
wanggou \
watch \
watches \
weather \
weatherchannel \
webcam \
weber \
website \
wed \
wedding \
weibo \
weir \
wf \
whoswho \
wien \
wiki \
williamhill \
win \
windows \
wine \
winners \
wme \
wolterskluwer \
woodside \
work \
works \
world \
wow \
ws \
wtc \
wtf \
xbox \
xerox \
xfinity \
xihuan \
xin \
xxx \
xyz \
yachts \
yahoo \
yamaxun \
yandex \
ye \
yodobashi \
yoga \
yokohama \
you \
youtube \
yt \
yun \
za \
zappos \
zara \
zero \
zip \
zm \
zone \
zuerich \
zw \
vermögensberater-ctb \
vermögensberatung-pwb \
ελ \
ευ \
бг \
бел \
дети \
ею \
католик \
ком \
қаз \
мкд \
мон \
москва \
онлайн \
орг \
рус \
рф \
сайт \
срб \
укр \
გე \
հայ \
ישראל \
קום \
ابوظبي \
اتصالات \
ارامكو \
الاردن \
البحرين \
الجزائر \
السعودية \
العليان \
المغرب \
امارات \
ایران \
بارت \
بازار \
بھارت \
بيتك \
پاکستان \
ڀارت \
تونس \
سودان \
سورية \
شبكة \
عراق \
عرب \
عمان \
فلسطين \
قطر \
كاثوليك \
كوم \
مصر \
مليسيا \
موريتانيا \
موقع \
همراه \
कॉम \
नेट \
भारत \
भारतम् \
भारोत \
संगठन \
বাংলা \
ভারত \
ভাৰত \
ਭਾਰਤ \
ભારત \
ଭାରତ \
இந்தியா \
இலங்கை \
சிங்கப்பூர் \
భారత్ \
ಭಾರತ \
ഭാരതം \
ලංකා \
คอม \
ไทย \
ລາວ \
닷넷 \
닷컴 \
삼성 \
한국 \
アマゾン \
グーグル \
クラウド \
コム \
ストア \
セール \
ファッション \
ポイント \
みんな \
世界 \
中信 \
中国 \
中國 \
中文网 \
亚马逊 \
企业 \
佛山 \
信息 \
健康 \
八卦 \
公司 \
公益 \
台湾 \
台灣 \
商城 \
商店 \
商标 \
嘉里 \
嘉里大酒店 \
在线 \
大众汽车 \
大拿 \
天主教 \
娱乐 \
家電 \
广东 \
微博 \
慈善 \
我爱你 \
手机 \
招聘 \
政务 \
政府 \
新加坡 \
新闻 \
时尚 \
書籍 \
机构 \
淡马锡 \
游戏 \
澳門 \
点看 \
移动 \
组织机构 \
网址 \
网店 \
网站 \
网络 \
联通 \
诺基亚 \
谷歌 \
购物 \
通販 \
集团 \
電訊盈科 \
飞利浦 \
食品 \
餐厅 \
香格里拉 \
香港'.split(' ');
/**
The scanner provides an interface that takes a string of text as input, and
outputs an array of tokens instances that can be used for easy URL parsing.
@module linkify
@submodule scanner
@main scanner
*/
var LETTER = /(?:[A-Za-z\xAA\xB5\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0370-\u0374\u0376\u0377\u037A-\u037D\u037F\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03F5\u03F7-\u0481\u048A-\u052F\u0531-\u0556\u0559\u0560-\u0588\u05D0-\u05EA\u05EF-\u05F2\u0620-\u064A\u066E\u066F\u0671-\u06D3\u06D5\u06E5\u06E6\u06EE\u06EF\u06FA-\u06FC\u06FF\u0710\u0712-\u072F\u074D-\u07A5\u07B1\u07CA-\u07EA\u07F4\u07F5\u07FA\u0800-\u0815\u081A\u0824\u0828\u0840-\u0858\u0860-\u086A\u0870-\u0887\u0889-\u088E\u08A0-\u08C9\u0904-\u0939\u093D\u0950\u0958-\u0961\u0971-\u0980\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BD\u09CE\u09DC\u09DD\u09DF-\u09E1\u09F0\u09F1\u09FC\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABD\u0AD0\u0AE0\u0AE1\u0AF9\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B3D\u0B5C\u0B5D\u0B5F-\u0B61\u0B71\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BD0\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D\u0C58-\u0C5A\u0C5D\u0C60\u0C61\u0C80\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBD\u0CDD\u0CDE\u0CE0\u0CE1\u0CF1\u0CF2\u0D04-\u0D0C\u0D0E-\u0D10\u0D12-\u0D3A\u0D3D\u0D4E\u0D54-\u0D56\u0D5F-\u0D61\u0D7A-\u0D7F\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0E01-\u0E30\u0E32\u0E33\u0E40-\u0E46\u0E81\u0E82\u0E84\u0E86-\u0E8A\u0E8C-\u0EA3\u0EA5\u0EA7-\u0EB0\u0EB2\u0EB3\u0EBD\u0EC0-\u0EC4\u0EC6\u0EDC-\u0EDF\u0F00\u0F40-\u0F47\u0F49-\u0F6C\u0F88-\u0F8C\u1000-\u102A\u103F\u1050-\u1055\u105A-\u105D\u1061\u1065\u1066\u106E-\u1070\u1075-\u1081\u108E\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u1380-\u138F\u13A0-\u13F5\u13F8-\u13FD\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16F1-\u16F8\u1700-\u1711\u171F-\u1731\u1740-\u1751\u1760-\u176C\u176E-\u1770\u1780-\u17B3\u17D7\u17DC\u1820-\u1878\u1880-\u1884\u1887-\u18A8\u18AA\u18B0-\u18F5\u1900-\u191E\u1950-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u1A00-\u1A16\u1A20-\u1A54\u1AA7\u1B05-\u1B33\u1B45-\u1B4C\u1B83-\u1BA0\u1BAE\u1BAF\u1BBA-\u1BE5\u1C00-\u1C23\u1C4D-\u1C4F\u1C5A-\u1C7D\u1C80-\u1C88\u1C90-\u1CBA\u1CBD-\u1CBF\u1CE9-\u1CEC\u1CEE-\u1CF3\u1CF5\u1CF6\u1CFA\u1D00-\u1DBF\u1E00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2071\u207F\u2090-\u209C\u2102\u2107\u210A-\u2113\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u212F-\u2139\u213C-\u213F\u2145-\u2149\u214E\u2183\u2184\u2C00-\u2CE4\u2CEB-\u2CEE\u2CF2\u2CF3\u2D00-\u2D25\u2D27\u2D2D\u2D30-\u2D67\u2D6F\u2D80-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u2E2F\u3005\u3006\u3031-\u3035\u303B\u303C\u3041-\u3096\u309D-\u309F\u30A1-\u30FA\u30FC-\u30FF\u3105-\u312F\u3131-\u318E\u31A0-\u31BF\u31F0-\u31FF\u3400-\u4DBF\u4E00-\uA48C\uA4D0-\uA4FD\uA500-\uA60C\uA610-\uA61F\uA62A\uA62B\uA640-\uA66E\uA67F-\uA69D\uA6A0-\uA6E5\uA717-\uA71F\uA722-\uA788\uA78B-\uA7CA\uA7D0\uA7D1\uA7D3\uA7D5-\uA7D9\uA7F2-\uA801\uA803-\uA805\uA807-\uA80A\uA80C-\uA822\uA840-\uA873\uA882-\uA8B3\uA8F2-\uA8F7\uA8FB\uA8FD\uA8FE\uA90A-\uA925\uA930-\uA946\uA960-\uA97C\uA984-\uA9B2\uA9CF\uA9E0-\uA9E4\uA9E6-\uA9EF\uA9FA-\uA9FE\uAA00-\uAA28\uAA40-\uAA42\uAA44-\uAA4B\uAA60-\uAA76\uAA7A\uAA7E-\uAAAF\uAAB1\uAAB5\uAAB6\uAAB9-\uAABD\uAAC0\uAAC2\uAADB-\uAADD\uAAE0-\uAAEA\uAAF2-\uAAF4\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uAB30-\uAB5A\uAB5C-\uAB69\uAB70-\uABE2\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB00-\uFB06\uFB13-\uFB17\uFB1D\uFB1F-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE70-\uFE74\uFE76-\uFEFC\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]|\uD800[\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1F\uDF2D-\uDF40\uDF42-\uDF49\uDF50-\uDF75\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF]|\uD801[\uDC00-\uDC9D\uDCB0-\uDCD3\uDCD8-\uDCFB\uDD00-\uDD27\uDD30-\uDD63\uDD70-\uDD7A\uDD7C-\uDD8A\uDD8C-\uDD92\uDD94\uDD95\uDD97-\uDDA1\uDDA3-\uDDB1\uDDB3-\uDDB9\uDDBB\uDDBC\uDE00-\uDF36\uDF40-\uDF55\uDF60-\uDF67\uDF80-\uDF85\uDF87-\uDFB0\uDFB2-\uDFBA]|\uD802[\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDC60-\uDC76\uDC80-\uDC9E\uDCE0-\uDCF2\uDCF4\uDCF5\uDD00-\uDD15\uDD20-\uDD39\uDD80-\uDDB7\uDDBE\uDDBF\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE35\uDE60-\uDE7C\uDE80-\uDE9C\uDEC0-\uDEC7\uDEC9-\uDEE4\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72\uDF80-\uDF91]|\uD803[\uDC00-\uDC48\uDC80-\uDCB2\uDCC0-\uDCF2\uDD00-\uDD23\uDE80-\uDEA9\uDEB0\uDEB1\uDF00-\uDF1C\uDF27\uDF30-\uDF45\uDF70-\uDF81\uDFB0-\uDFC4\uDFE0-\uDFF6]|\uD804[\uDC03-\uDC37\uDC71\uDC72\uDC75\uDC83-\uDCAF\uDCD0-\uDCE8\uDD03-\uDD26\uDD44\uDD47\uDD50-\uDD72\uDD76\uDD83-\uDDB2\uDDC1-\uDDC4\uDDDA\uDDDC\uDE00-\uDE11\uDE13-\uDE2B\uDE80-\uDE86\uDE88\uDE8A-\uDE8D\uDE8F-\uDE9D\uDE9F-\uDEA8\uDEB0-\uDEDE\uDF05-\uDF0C\uDF0F\uDF10\uDF13-\uDF28\uDF2A-\uDF30\uDF32\uDF33\uDF35-\uDF39\uDF3D\uDF50\uDF5D-\uDF61]|\uD805[\uDC00-\uDC34\uDC47-\uDC4A\uDC5F-\uDC61\uDC80-\uDCAF\uDCC4\uDCC5\uDCC7\uDD80-\uDDAE\uDDD8-\uDDDB\uDE00-\uDE2F\uDE44\uDE80-\uDEAA\uDEB8\uDF00-\uDF1A\uDF40-\uDF46]|\uD806[\uDC00-\uDC2B\uDCA0-\uDCDF\uDCFF-\uDD06\uDD09\uDD0C-\uDD13\uDD15\uDD16\uDD18-\uDD2F\uDD3F\uDD41\uDDA0-\uDDA7\uDDAA-\uDDD0\uDDE1\uDDE3\uDE00\uDE0B-\uDE32\uDE3A\uDE50\uDE5C-\uDE89\uDE9D\uDEB0-\uDEF8]|\uD807[\uDC00-\uDC08\uDC0A-\uDC2E\uDC40\uDC72-\uDC8F\uDD00-\uDD06\uDD08\uDD09\uDD0B-\uDD30\uDD46\uDD60-\uDD65\uDD67\uDD68\uDD6A-\uDD89\uDD98\uDEE0-\uDEF2\uDFB0]|\uD808[\uDC00-\uDF99]|\uD809[\uDC80-\uDD43]|\uD80B[\uDF90-\uDFF0]|[\uD80C\uD81C-\uD820\uD822\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879\uD880-\uD883][\uDC00-\uDFFF]|\uD80D[\uDC00-\uDC2E]|\uD811[\uDC00-\uDE46]|\uD81A[\uDC00-\uDE38\uDE40-\uDE5E\uDE70-\uDEBE\uDED0-\uDEED\uDF00-\uDF2F\uDF40-\uDF43\uDF63-\uDF77\uDF7D-\uDF8F]|\uD81B[\uDE40-\uDE7F\uDF00-\uDF4A\uDF50\uDF93-\uDF9F\uDFE0\uDFE1\uDFE3]|\uD821[\uDC00-\uDFF7]|\uD823[\uDC00-\uDCD5\uDD00-\uDD08]|\uD82B[\uDFF0-\uDFF3\uDFF5-\uDFFB\uDFFD\uDFFE]|\uD82C[\uDC00-\uDD22\uDD50-\uDD52\uDD64-\uDD67\uDD70-\uDEFB]|\uD82F[\uDC00-\uDC6A\uDC70-\uDC7C\uDC80-\uDC88\uDC90-\uDC99]|\uD835[\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB]|\uD837[\uDF00-\uDF1E]|\uD838[\uDD00-\uDD2C\uDD37-\uDD3D\uDD4E\uDE90-\uDEAD\uDEC0-\uDEEB]|\uD839[\uDFE0-\uDFE6\uDFE8-\uDFEB\uDFED\uDFEE\uDFF0-\uDFFE]|\uD83A[\uDC00-\uDCC4\uDD00-\uDD43\uDD4B]|\uD83B[\uDE00-\uDE03\uDE05-\uDE1F\uDE21\uDE22\uDE24\uDE27\uDE29-\uDE32\uDE34-\uDE37\uDE39\uDE3B\uDE42\uDE47\uDE49\uDE4B\uDE4D-\uDE4F\uDE51\uDE52\uDE54\uDE57\uDE59\uDE5B\uDE5D\uDE5F\uDE61\uDE62\uDE64\uDE67-\uDE6A\uDE6C-\uDE72\uDE74-\uDE77\uDE79-\uDE7C\uDE7E\uDE80-\uDE89\uDE8B-\uDE9B\uDEA1-\uDEA3\uDEA5-\uDEA9\uDEAB-\uDEBB]|\uD869[\uDC00-\uDEDF\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF38\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0]|\uD87E[\uDC00-\uDE1D]|\uD884[\uDC00-\uDF4A])/; // Any Unicode character with letter data type
var EMOJI = /(?:[#\*0-9\xA9\xAE\u203C\u2049\u2122\u2139\u2194-\u2199\u21A9\u21AA\u231A\u231B\u2328\u23CF\u23E9-\u23F3\u23F8-\u23FA\u24C2\u25AA\u25AB\u25B6\u25C0\u25FB-\u25FE\u2600-\u2604\u260E\u2611\u2614\u2615\u2618\u261D\u2620\u2622\u2623\u2626\u262A\u262E\u262F\u2638-\u263A\u2640\u2642\u2648-\u2653\u265F\u2660\u2663\u2665\u2666\u2668\u267B\u267E\u267F\u2692-\u2697\u2699\u269B\u269C\u26A0\u26A1\u26A7\u26AA\u26AB\u26B0\u26B1\u26BD\u26BE\u26C4\u26C5\u26C8\u26CE\u26CF\u26D1\u26D3\u26D4\u26E9\u26EA\u26F0-\u26F5\u26F7-\u26FA\u26FD\u2702\u2705\u2708-\u270D\u270F\u2712\u2714\u2716\u271D\u2721\u2728\u2733\u2734\u2744\u2747\u274C\u274E\u2753-\u2755\u2757\u2763\u2764\u2795-\u2797\u27A1\u27B0\u27BF\u2934\u2935\u2B05-\u2B07\u2B1B\u2B1C\u2B50\u2B55\u3030\u303D\u3297\u3299]|\uD83C[\uDC04\uDCCF\uDD70\uDD71\uDD7E\uDD7F\uDD8E\uDD91-\uDD9A\uDDE6-\uDDFF\uDE01\uDE02\uDE1A\uDE2F\uDE32-\uDE3A\uDE50\uDE51\uDF00-\uDF21\uDF24-\uDF93\uDF96\uDF97\uDF99-\uDF9B\uDF9E-\uDFF0\uDFF3-\uDFF5\uDFF7-\uDFFF]|\uD83D[\uDC00-\uDCFD\uDCFF-\uDD3D\uDD49-\uDD4E\uDD50-\uDD67\uDD6F\uDD70\uDD73-\uDD7A\uDD87\uDD8A-\uDD8D\uDD90\uDD95\uDD96\uDDA4\uDDA5\uDDA8\uDDB1\uDDB2\uDDBC\uDDC2-\uDDC4\uDDD1-\uDDD3\uDDDC-\uDDDE\uDDE1\uDDE3\uDDE8\uDDEF\uDDF3\uDDFA-\uDE4F\uDE80-\uDEC5\uDECB-\uDED2\uDED5-\uDED7\uDEDD-\uDEE5\uDEE9\uDEEB\uDEEC\uDEF0\uDEF3-\uDEFC\uDFE0-\uDFEB\uDFF0]|\uD83E[\uDD0C-\uDD3A\uDD3C-\uDD45\uDD47-\uDDFF\uDE70-\uDE74\uDE78-\uDE7C\uDE80-\uDE86\uDE90-\uDEAC\uDEB0-\uDEBA\uDEC0-\uDEC5\uDED0-\uDED9\uDEE0-\uDEE7\uDEF0-\uDEF6])/; // Any Unicode emoji character
var EMOJI_VARIATION = /\uFE0F/; // Variation selector, follows heart and others
var DIGIT = /\d/;
var SPACE = /\s/;
/**
* Initialize the scanner character-based state machine for the given start state
* @return {State} scanner starting state
*/
function init$2() {
var customProtocols = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : [];
// Frequently used states
var S_START = makeState();
var S_NUM = makeAcceptingState(NUM);
var S_DOMAIN = makeAcceptingState(DOMAIN);
var S_DOMAIN_HYPHEN = makeState(); // domain followed by 1 or more hyphen characters
var S_WS = makeAcceptingState(WS);
var DOMAIN_REGEX_TRANSITIONS = [[DIGIT, S_DOMAIN], [LETTER, S_DOMAIN], [EMOJI, S_DOMAIN], [EMOJI_VARIATION, S_DOMAIN]]; // Create a state which emits a domain token
var makeDomainState = function makeDomainState() {
var state = makeAcceptingState(DOMAIN);
state.j = {
'-': S_DOMAIN_HYPHEN
};
state.jr = [].concat(DOMAIN_REGEX_TRANSITIONS);
return state;
}; // Create a state which does not emit a domain state but the usual alphanumeric
// transitions are domains
var makeNearDomainState = function makeNearDomainState(token) {
var state = makeDomainState();
state.t = token;
return state;
}; // States for special URL symbols that accept immediately after start
makeBatchT(S_START, [["'", makeAcceptingState(APOSTROPHE)], ['{', makeAcceptingState(OPENBRACE)], ['[', makeAcceptingState(OPENBRACKET)], ['<', makeAcceptingState(OPENANGLEBRACKET)], ['(', makeAcceptingState(OPENPAREN)], ['}', makeAcceptingState(CLOSEBRACE)], [']', makeAcceptingState(CLOSEBRACKET)], ['>', makeAcceptingState(CLOSEANGLEBRACKET)], [')', makeAcceptingState(CLOSEPAREN)], ['&', makeAcceptingState(AMPERSAND)], ['*', makeAcceptingState(ASTERISK)], ['@', makeAcceptingState(AT)], ['`', makeAcceptingState(BACKTICK)], ['^', makeAcceptingState(CARET)], [':', makeAcceptingState(COLON)], [',', makeAcceptingState(COMMA)], ['$', makeAcceptingState(DOLLAR)], ['.', makeAcceptingState(DOT)], ['=', makeAcceptingState(EQUALS)], ['!', makeAcceptingState(EXCLAMATION)], ['-', makeAcceptingState(HYPHEN)], ['%', makeAcceptingState(PERCENT)], ['|', makeAcceptingState(PIPE)], ['+', makeAcceptingState(PLUS)], ['#', makeAcceptingState(POUND)], ['?', makeAcceptingState(QUERY)], ['"', makeAcceptingState(QUOTE)], ['/', makeAcceptingState(SLASH)], [';', makeAcceptingState(SEMI)], ['~', makeAcceptingState(TILDE)], ['_', makeAcceptingState(UNDERSCORE)], ['\\', makeAcceptingState(BACKSLASH)]]); // Whitespace jumps
// Tokens of only non-newline whitespace are arbitrarily long
makeT(S_START, '\n', makeAcceptingState(NL));
makeRegexT(S_START, SPACE, S_WS); // If any whitespace except newline, more whitespace!
makeT(S_WS, '\n', makeState()); // non-accepting state
makeRegexT(S_WS, SPACE, S_WS); // Generates states for top-level domains
// Note that this is most accurate when tlds are in alphabetical order
for (var i = 0; i < tlds.length; i++) {
makeChainT(S_START, tlds[i], makeNearDomainState(TLD), makeDomainState);
} // Collect the states generated by different protocls
var S_PROTOCOL_FILE = makeDomainState();
var S_PROTOCOL_FTP = makeDomainState();
var S_PROTOCOL_HTTP = makeDomainState();
var S_MAILTO = makeDomainState();
makeChainT(S_START, 'file', S_PROTOCOL_FILE, makeDomainState);
makeChainT(S_START, 'ftp', S_PROTOCOL_FTP, makeDomainState);
makeChainT(S_START, 'http', S_PROTOCOL_HTTP, makeDomainState);
makeChainT(S_START, 'mailto', S_MAILTO, makeDomainState); // Protocol states
var S_PROTOCOL_SECURE = makeDomainState();
var S_FULL_PROTOCOL = makeAcceptingState(PROTOCOL); // Full protocol ends with COLON
var S_FULL_MAILTO = makeAcceptingState(MAILTO); // Mailto ends with COLON
// Secure protocols (end with 's')
makeT(S_PROTOCOL_FTP, 's', S_PROTOCOL_SECURE);
makeT(S_PROTOCOL_FTP, ':', S_FULL_PROTOCOL);
makeT(S_PROTOCOL_HTTP, 's', S_PROTOCOL_SECURE);
makeT(S_PROTOCOL_HTTP, ':', S_FULL_PROTOCOL); // Become protocol tokens after a COLON
makeT(S_PROTOCOL_FILE, ':', S_FULL_PROTOCOL);
makeT(S_PROTOCOL_SECURE, ':', S_FULL_PROTOCOL);
makeT(S_MAILTO, ':', S_FULL_MAILTO); // Register custom protocols
var S_CUSTOM_PROTOCOL = makeDomainState();
for (var _i = 0; _i < customProtocols.length; _i++) {
makeChainT(S_START, customProtocols[_i], S_CUSTOM_PROTOCOL, makeDomainState);
}
makeT(S_CUSTOM_PROTOCOL, ':', S_FULL_PROTOCOL); // Localhost
makeChainT(S_START, 'localhost', makeNearDomainState(LOCALHOST), makeDomainState); // Everything else
// DOMAINs make more DOMAINs
// Number and character transitions
makeRegexT(S_START, DIGIT, S_NUM);
makeRegexT(S_START, LETTER, S_DOMAIN);
makeRegexT(S_START, EMOJI, S_DOMAIN);
makeRegexT(S_START, EMOJI_VARIATION, S_DOMAIN);
makeRegexT(S_NUM, DIGIT, S_NUM);
makeRegexT(S_NUM, LETTER, S_DOMAIN); // number becomes DOMAIN
makeRegexT(S_NUM, EMOJI, S_DOMAIN); // number becomes DOMAIN
makeRegexT(S_NUM, EMOJI_VARIATION, S_DOMAIN); // number becomes DOMAIN
makeT(S_NUM, '-', S_DOMAIN_HYPHEN); // Default domain transitions
makeT(S_DOMAIN, '-', S_DOMAIN_HYPHEN);
makeT(S_DOMAIN_HYPHEN, '-', S_DOMAIN_HYPHEN);
makeRegexT(S_DOMAIN, DIGIT, S_DOMAIN);
makeRegexT(S_DOMAIN, LETTER, S_DOMAIN);
makeRegexT(S_DOMAIN, EMOJI, S_DOMAIN);
makeRegexT(S_DOMAIN, EMOJI_VARIATION, S_DOMAIN);
makeRegexT(S_DOMAIN_HYPHEN, DIGIT, S_DOMAIN);
makeRegexT(S_DOMAIN_HYPHEN, LETTER, S_DOMAIN);
makeRegexT(S_DOMAIN_HYPHEN, EMOJI, S_DOMAIN);
makeRegexT(S_DOMAIN_HYPHEN, EMOJI_VARIATION, S_DOMAIN); // Set default transition for start state (some symbol)
S_START.jd = makeAcceptingState(SYM);
return S_START;
}
/**
Given a string, returns an array of TOKEN instances representing the
composition of that string.
@method run
@param {State} start scanner starting state
@param {string} str input string to scan
@return {{t: string, v: string, s: number, l: number}[]} list of tokens, each with a type and value
*/
function run$1(start, str) {
// State machine is not case sensitive, so input is tokenized in lowercased
// form (still returns the regular case though) Uses selective `toLowerCase`
// is used because lowercasing the entire string causes the length and
// character position to vary in some non-English strings with V8-based
// runtimes.
var iterable = stringToArray(str.replace(/[A-Z]/g, function (c) {
return c.toLowerCase();
}));
var charCount = iterable.length; // <= len if there are emojis, etc
var tokens = []; // return value
// cursor through the string itself, accounting for characters that have
// width with length 2 such as emojis
var cursor = 0; // Cursor through the array-representation of the string
var charCursor = 0; // Tokenize the string
while (charCursor < charCount) {
var state = start;
var nextState = null;
var tokenLength = 0;
var latestAccepting = null;
var sinceAccepts = -1;
var charsSinceAccepts = -1;
while (charCursor < charCount && (nextState = takeT(state, iterable[charCursor]))) {
state = nextState; // Keep track of the latest accepting state
if (state.accepts()) {
sinceAccepts = 0;
charsSinceAccepts = 0;
latestAccepting = state;
} else if (sinceAccepts >= 0) {
sinceAccepts += iterable[charCursor].length;
charsSinceAccepts++;
}
tokenLength += iterable[charCursor].length;
cursor += iterable[charCursor].length;
charCursor++;
} // Roll back to the latest accepting state
cursor -= sinceAccepts;
charCursor -= charsSinceAccepts;
tokenLength -= sinceAccepts; // No more jumps, just make a new token from the last accepting one
// TODO: If possible, don't output v, instead output range where values ocur
tokens.push({
t: latestAccepting.t,
// token type/name
v: str.substr(cursor - tokenLength, tokenLength),
// string value
s: cursor - tokenLength,
// start index
e: cursor // end index (excluding)
});
}
return tokens;
}
/**
* Convert a String to an Array of characters, taking into account that some
* characters like emojis take up two string indexes.
*
* Adapted from core-js (MIT license)
* https://github.com/zloirock/core-js/blob/2d69cf5f99ab3ea3463c395df81e5a15b68f49d9/packages/core-js/internals/string-multibyte.js
*
* @function stringToArray
* @param {string} str
* @returns {string[]}
*/
function stringToArray(str) {
var result = [];
var len = str.length;
var index = 0;
while (index < len) {
var first = str.charCodeAt(index);
var second = void 0;
var char = first < 0xd800 || first > 0xdbff || index + 1 === len || (second = str.charCodeAt(index + 1)) < 0xdc00 || second > 0xdfff ? str[index] // single character
: str.slice(index, index + 2); // two-index characters
result.push(char);
index += char.length;
}
return result;
}
function _typeof(obj) {
"@babel/helpers - typeof";
if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") {
_typeof = function (obj) {
return typeof obj;
};
} else {
_typeof = function (obj) {
return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj;
};
}
return _typeof(obj);
}
/**
* @property {string} defaultProtocol
* @property {{[string]: (event) => void}]} [events]
*/
var defaults = {
defaultProtocol: 'http',
events: null,
format: noop,
formatHref: noop,
nl2br: false,
tagName: 'a',
target: null,
rel: null,
validate: true,
truncate: 0,
className: null,
attributes: null,
ignoreTags: []
};
/**
* @class Options
* @param {Object} [opts] Set option properties besides the defaults
*/
function Options(opts) {
opts = opts || {};
this.defaultProtocol = 'defaultProtocol' in opts ? opts.defaultProtocol : defaults.defaultProtocol;
this.events = 'events' in opts ? opts.events : defaults.events;
this.format = 'format' in opts ? opts.format : defaults.format;
this.formatHref = 'formatHref' in opts ? opts.formatHref : defaults.formatHref;
this.nl2br = 'nl2br' in opts ? opts.nl2br : defaults.nl2br;
this.tagName = 'tagName' in opts ? opts.tagName : defaults.tagName;
this.target = 'target' in opts ? opts.target : defaults.target;
this.rel = 'rel' in opts ? opts.rel : defaults.rel;
this.validate = 'validate' in opts ? opts.validate : defaults.validate;
this.truncate = 'truncate' in opts ? opts.truncate : defaults.truncate;
this.className = 'className' in opts ? opts.className : defaults.className;
this.attributes = opts.attributes || defaults.attributes;
this.ignoreTags = []; // Make all tags names upper case
var ignoredTags = 'ignoreTags' in opts ? opts.ignoreTags : defaults.ignoreTags;
for (var i = 0; i < ignoredTags.length; i++) {
this.ignoreTags.push(ignoredTags[i].toUpperCase());
}
}
Options.prototype = {
/**
* Given the token, return all options for how it should be displayed
*/
resolve: function resolve(token) {
var href = token.toHref(this.defaultProtocol);
return {
formatted: this.get('format', token.toString(), token),
formattedHref: this.get('formatHref', href, token),
tagName: this.get('tagName', href, token),
className: this.get('className', href, token),
target: this.get('target', href, token),
rel: this.get('rel', href, token),
events: this.getObject('events', href, token),
attributes: this.getObject('attributes', href, token),
truncate: this.get('truncate', href, token)
};
},
/**
* Returns true or false based on whether a token should be displayed as a
* link based on the user options. By default,
*/
check: function check(token) {
return this.get('validate', token.toString(), token);
},
// Private methods
/**
* Resolve an option's value based on the value of the option and the given
* params.
* @param {string} key Name of option to use
* @param operator will be passed to the target option if it's method
* @param {MultiToken} token The token from linkify.tokenize
*/
get: function get(key, operator, token) {
var option = this[key];
if (!option) {
return option;
}
var optionValue;
switch (_typeof(option)) {
case 'function':
return option(operator, token.t);
case 'object':
optionValue = token.t in option ? option[token.t] : defaults[key];
return typeof optionValue === 'function' ? optionValue(operator, token.t) : optionValue;
}
return option;
},
getObject: function getObject(key, operator, token) {
var option = this[key];
return typeof option === 'function' ? option(operator, token.t) : option;
}
};
function noop(val) {
return val;
}
var options = /*#__PURE__*/Object.freeze({
__proto__: null,
defaults: defaults,
Options: Options
});
/******************************************************************************
Multi-Tokens
Tokens composed of arrays of TextTokens
******************************************************************************/
function inherits(parent, child) {
var props = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {};
var extended = Object.create(parent.prototype);
for (var p in props) {
extended[p] = props[p];
}
extended.constructor = child;
child.prototype = extended;
return child;
}
/**
Abstract class used for manufacturing tokens of text tokens. That is rather
than the value for a token being a small string of text, it's value an array
of text tokens.
Used for grouping together URLs, emails, hashtags, and other potential
creations.
@class MultiToken
@param {string} value
@param {{t: string, v: string, s: number, e: number}[]} tokens
@abstract
*/
function MultiToken() {}
MultiToken.prototype = {
/**
String representing the type for this token
@property t
@default 'token'
*/
t: 'token',
/**
Is this multitoken a link?
@property isLink
@default false
*/
isLink: false,
/**
Return the string this token represents.
@method toString
@return {string}
*/
toString: function toString() {
return this.v;
},
/**
What should the value for this token be in the `href` HTML attribute?
Returns the `.toString` value by default.
@method toHref
@return {string}
*/
toHref: function toHref() {
return this.toString();
},
/**
* The start index of this token in the original input string
* @returns {number}
*/
startIndex: function startIndex() {
return this.tk[0].s;
},
/**
* The end index of this token in the original input string (up to this
* index but not including it)
* @returns {number}
*/
endIndex: function endIndex() {
return this.tk[this.tk.length - 1].e;
},
/**
Returns a hash of relevant values for this token, which includes keys
* type - Kind of token ('url', 'email', etc.)
* value - Original text
* href - The value that should be added to the anchor tag's href
attribute
@method toObject
@param {string} [protocol] `'http'` by default
*/
toObject: function toObject() {
var protocol = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : defaults.defaultProtocol;
return {
type: this.t,
value: this.v,
isLink: this.isLink,
href: this.toHref(protocol),
start: this.startIndex(),
end: this.endIndex()
};
}
}; // Base token
/**
* Create a new token that can be emitted by the parser state machine
* @param {string} type readable type of the token
* @param {object} props properties to assign or override, including isLink = true or false
* @returns {(value: string, tokens: {t: string, v: string, s: number, e: number}) => MultiToken} new token class
*/
function createTokenClass(type, props) {
function Token(value, tokens) {
this.t = type;
this.v = value;
this.tk = tokens;
}
inherits(MultiToken, Token, props);
return Token;
}
/**
Represents an arbitrarily mailto email address with the prefix included
@class MailtoEmail
@extends MultiToken
*/
var MailtoEmail = createTokenClass('email', {
isLink: true
});
/**
Represents a list of tokens making up a valid email address
@class Email
@extends MultiToken
*/
var Email = createTokenClass('email', {
isLink: true,
toHref: function toHref() {
return 'mailto:' + this.toString();
}
});
/**
Represents some plain text
@class Text
@extends MultiToken
*/
var Text = createTokenClass('text');
/**
Multi-linebreak token - represents a line break
@class Nl
@extends MultiToken
*/
var Nl = createTokenClass('nl');
/**
Represents a list of text tokens making up a valid URL
@class Url
@extends MultiToken
*/
var Url = createTokenClass('url', {
isLink: true,
/**
Lowercases relevant parts of the domain and adds the protocol if
required. Note that this will not escape unsafe HTML characters in the
URL.
@method href
@param {string} protocol
@return {string}
*/
toHref: function toHref() {
var protocol = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : defaults.defaultProtocol;
var tokens = this.tk;
var hasProtocol = false;
var hasSlashSlash = false;
var result = [];
var i = 0; // Make the first part of the domain lowercase
// Lowercase protocol
while (tokens[i].t === PROTOCOL) {
hasProtocol = true;
result.push(tokens[i].v);
i++;
} // Skip slash-slash
while (tokens[i].t === SLASH) {
hasSlashSlash = true;
result.push(tokens[i].v);
i++;
} // Continue pushing characters
for (; i < tokens.length; i++) {
result.push(tokens[i].v);
}
result = result.join('');
if (!(hasProtocol || hasSlashSlash)) {
result = "".concat(protocol, "://").concat(result);
}
return result;
},
hasProtocol: function hasProtocol() {
return this.tk[0].t === PROTOCOL;
}
});
var multi = /*#__PURE__*/Object.freeze({
__proto__: null,
MultiToken: MultiToken,
Base: MultiToken,
createTokenClass: createTokenClass,
MailtoEmail: MailtoEmail,
Email: Email,
Text: Text,
Nl: Nl,
Url: Url
});
/**
Not exactly parser, more like the second-stage scanner (although we can
theoretically hotswap the code here with a real parser in the future... but
for a little URL-finding utility abstract syntax trees may be a little
overkill).
URL format: http://en.wikipedia.org/wiki/URI_scheme
Email format: http://en.wikipedia.org/wiki/Email_address (links to RFC in
reference)
@module linkify
@submodule parser
@main run
*/
/**
* Generate the parser multi token-based state machine
* @returns {State} the starting state
*/
function init$1() {
// The universal starting state.
var S_START = makeState(); // Intermediate states for URLs. Note that domains that begin with a protocol
// are treated slighly differently from those that don't.
var S_PROTOCOL = makeState(); // e.g., 'http:'
var S_MAILTO = makeState(); // 'mailto:'
var S_PROTOCOL_SLASH = makeState(); // e.g., 'http:/''
var S_PROTOCOL_SLASH_SLASH = makeState(); // e.g.,'http://'
var S_DOMAIN = makeState(); // parsed string ends with a potential domain name (A)
var S_DOMAIN_DOT = makeState(); // (A) domain followed by DOT
var S_TLD = makeAcceptingState(Url); // (A) Simplest possible URL with no query string
var S_TLD_COLON = makeState(); // (A) URL followed by colon (potential port number here)
var S_TLD_PORT = makeAcceptingState(Url); // TLD followed by a port number
var S_URL = makeAcceptingState(Url); // Long URL with optional port and maybe query string
var S_URL_NON_ACCEPTING = makeState(); // URL followed by some symbols (will not be part of the final URL)
var S_URL_OPENBRACE = makeState(); // URL followed by {
var S_URL_OPENBRACKET = makeState(); // URL followed by [
var S_URL_OPENANGLEBRACKET = makeState(); // URL followed by <
var S_URL_OPENPAREN = makeState(); // URL followed by (
var S_URL_OPENBRACE_Q = makeAcceptingState(Url); // URL followed by { and some symbols that the URL can end it
var S_URL_OPENBRACKET_Q = makeAcceptingState(Url); // URL followed by [ and some symbols that the URL can end it
var S_URL_OPENANGLEBRACKET_Q = makeAcceptingState(Url); // URL followed by < and some symbols that the URL can end it
var S_URL_OPENPAREN_Q = makeAcceptingState(Url); // URL followed by ( and some symbols that the URL can end it
var S_URL_OPENBRACE_SYMS = makeState(); // S_URL_OPENBRACE_Q followed by some symbols it cannot end it
var S_URL_OPENBRACKET_SYMS = makeState(); // S_URL_OPENBRACKET_Q followed by some symbols it cannot end it
var S_URL_OPENANGLEBRACKET_SYMS = makeState(); // S_URL_OPENANGLEBRACKET_Q followed by some symbols it cannot end it
var S_URL_OPENPAREN_SYMS = makeState(); // S_URL_OPENPAREN_Q followed by some symbols it cannot end it
var S_EMAIL_DOMAIN = makeState(); // parsed string starts with local email info + @ with a potential domain name (C)
var S_EMAIL_DOMAIN_DOT = makeState(); // (C) domain followed by DOT
var S_EMAIL = makeAcceptingState(Email); // (C) Possible email address (could have more tlds)
var S_EMAIL_COLON = makeState(); // (C) URL followed by colon (potential port number here)
var S_EMAIL_PORT = makeAcceptingState(Email); // (C) Email address with a port
var S_MAILTO_EMAIL = makeAcceptingState(MailtoEmail); // Email that begins with the mailto prefix (D)
var S_MAILTO_EMAIL_NON_ACCEPTING = makeState(); // (D) Followed by some non-query string chars
var S_LOCALPART = makeState(); // Local part of the email address
var S_LOCALPART_AT = makeState(); // Local part of the email address plus @
var S_LOCALPART_DOT = makeState(); // Local part of the email address plus '.' (localpart cannot end in .)
var S_NL = makeAcceptingState(Nl); // single new line
// Make path from start to protocol (with '//')
makeT(S_START, NL, S_NL);
makeT(S_START, PROTOCOL, S_PROTOCOL);
makeT(S_START, MAILTO, S_MAILTO);
makeT(S_PROTOCOL, SLASH, S_PROTOCOL_SLASH);
makeT(S_PROTOCOL_SLASH, SLASH, S_PROTOCOL_SLASH_SLASH); // The very first potential domain name
makeT(S_START, TLD, S_DOMAIN);
makeT(S_START, DOMAIN, S_DOMAIN);
makeT(S_START, LOCALHOST, S_TLD);
makeT(S_START, NUM, S_DOMAIN); // Force URL for protocol followed by anything sane
makeT(S_PROTOCOL_SLASH_SLASH, TLD, S_URL);
makeT(S_PROTOCOL_SLASH_SLASH, DOMAIN, S_URL);
makeT(S_PROTOCOL_SLASH_SLASH, NUM, S_URL);
makeT(S_PROTOCOL_SLASH_SLASH, LOCALHOST, S_URL); // Account for dots and hyphens
// hyphens are usually parts of domain names
makeT(S_DOMAIN, DOT, S_DOMAIN_DOT);
makeT(S_EMAIL_DOMAIN, DOT, S_EMAIL_DOMAIN_DOT); // Hyphen can jump back to a domain name
// After the first domain and a dot, we can find either a URL or another domain
makeT(S_DOMAIN_DOT, TLD, S_TLD);
makeT(S_DOMAIN_DOT, DOMAIN, S_DOMAIN);
makeT(S_DOMAIN_DOT, NUM, S_DOMAIN);
makeT(S_DOMAIN_DOT, LOCALHOST, S_DOMAIN);
makeT(S_EMAIL_DOMAIN_DOT, TLD, S_EMAIL);
makeT(S_EMAIL_DOMAIN_DOT, DOMAIN, S_EMAIL_DOMAIN);
makeT(S_EMAIL_DOMAIN_DOT, NUM, S_EMAIL_DOMAIN);
makeT(S_EMAIL_DOMAIN_DOT, LOCALHOST, S_EMAIL_DOMAIN); // S_TLD accepts! But the URL could be longer, try to find a match greedily
// The `run` function should be able to "rollback" to the accepting state
makeT(S_TLD, DOT, S_DOMAIN_DOT);
makeT(S_EMAIL, DOT, S_EMAIL_DOMAIN_DOT); // Become real URLs after `SLASH` or `COLON NUM SLASH`
// Here PSS and non-PSS converge
makeT(S_TLD, COLON, S_TLD_COLON);
makeT(S_TLD, SLASH, S_URL);
makeT(S_TLD_COLON, NUM, S_TLD_PORT);
makeT(S_TLD_PORT, SLASH, S_URL);
makeT(S_EMAIL, COLON, S_EMAIL_COLON);
makeT(S_EMAIL_COLON, NUM, S_EMAIL_PORT); // Types of characters the URL can definitely end in
var qsAccepting = [AMPERSAND, ASTERISK, AT, BACKSLASH, BACKTICK, CARET, DOLLAR, DOMAIN, EQUALS, HYPHEN, LOCALHOST, NUM, PERCENT, PIPE, PLUS, POUND, PROTOCOL, SLASH, SYM, TILDE, TLD, UNDERSCORE]; // Types of tokens that can follow a URL and be part of the query string
// but cannot be the very last characters
// Characters that cannot appear in the URL at all should be excluded
var qsNonAccepting = [APOSTROPHE, CLOSEANGLEBRACKET, CLOSEBRACE, CLOSEBRACKET, CLOSEPAREN, COLON, COMMA, DOT, EXCLAMATION, OPENANGLEBRACKET, OPENBRACE, OPENBRACKET, OPENPAREN, QUERY, QUOTE, SEMI]; // These states are responsible primarily for determining whether or not to
// include the final round bracket.
// URL, followed by an opening bracket
makeT(S_URL, OPENBRACE, S_URL_OPENBRACE);
makeT(S_URL, OPENBRACKET, S_URL_OPENBRACKET);
makeT(S_URL, OPENANGLEBRACKET, S_URL_OPENANGLEBRACKET);
makeT(S_URL, OPENPAREN, S_URL_OPENPAREN); // URL with extra symbols at the end, followed by an opening bracket
makeT(S_URL_NON_ACCEPTING, OPENBRACE, S_URL_OPENBRACE);
makeT(S_URL_NON_ACCEPTING, OPENBRACKET, S_URL_OPENBRACKET);
makeT(S_URL_NON_ACCEPTING, OPENANGLEBRACKET, S_URL_OPENANGLEBRACKET);
makeT(S_URL_NON_ACCEPTING, OPENPAREN, S_URL_OPENPAREN); // Closing bracket component. This character WILL be included in the URL
makeT(S_URL_OPENBRACE, CLOSEBRACE, S_URL);
makeT(S_URL_OPENBRACKET, CLOSEBRACKET, S_URL);
makeT(S_URL_OPENANGLEBRACKET, CLOSEANGLEBRACKET, S_URL);
makeT(S_URL_OPENPAREN, CLOSEPAREN, S_URL);
makeT(S_URL_OPENBRACE_Q, CLOSEBRACE, S_URL);
makeT(S_URL_OPENBRACKET_Q, CLOSEBRACKET, S_URL);
makeT(S_URL_OPENANGLEBRACKET_Q, CLOSEANGLEBRACKET, S_URL);
makeT(S_URL_OPENPAREN_Q, CLOSEPAREN, S_URL);
makeT(S_URL_OPENBRACE_SYMS, CLOSEBRACE, S_URL);
makeT(S_URL_OPENBRACKET_SYMS, CLOSEBRACKET, S_URL);
makeT(S_URL_OPENANGLEBRACKET_SYMS, CLOSEANGLEBRACKET, S_URL);
makeT(S_URL_OPENPAREN_SYMS, CLOSEPAREN, S_URL); // URL that beings with an opening bracket, followed by a symbols.
// Note that the final state can still be `S_URL_OPENBRACE_Q` (if the URL only
// has a single opening bracket for some reason).
makeMultiT(S_URL_OPENBRACE, qsAccepting, S_URL_OPENBRACE_Q);
makeMultiT(S_URL_OPENBRACKET, qsAccepting, S_URL_OPENBRACKET_Q);
makeMultiT(S_URL_OPENANGLEBRACKET, qsAccepting, S_URL_OPENANGLEBRACKET_Q);
makeMultiT(S_URL_OPENPAREN, qsAccepting, S_URL_OPENPAREN_Q);
makeMultiT(S_URL_OPENBRACE, qsNonAccepting, S_URL_OPENBRACE_SYMS);
makeMultiT(S_URL_OPENBRACKET, qsNonAccepting, S_URL_OPENBRACKET_SYMS);
makeMultiT(S_URL_OPENANGLEBRACKET, qsNonAccepting, S_URL_OPENANGLEBRACKET_SYMS);
makeMultiT(S_URL_OPENPAREN, qsNonAccepting, S_URL_OPENPAREN_SYMS); // URL that begins with an opening bracket, followed by some symbols
makeMultiT(S_URL_OPENBRACE_Q, qsAccepting, S_URL_OPENBRACE_Q);
makeMultiT(S_URL_OPENBRACKET_Q, qsAccepting, S_URL_OPENBRACKET_Q);
makeMultiT(S_URL_OPENANGLEBRACKET_Q, qsAccepting, S_URL_OPENANGLEBRACKET_Q);
makeMultiT(S_URL_OPENPAREN_Q, qsAccepting, S_URL_OPENPAREN_Q);
makeMultiT(S_URL_OPENBRACE_Q, qsNonAccepting, S_URL_OPENBRACE_Q);
makeMultiT(S_URL_OPENBRACKET_Q, qsNonAccepting, S_URL_OPENBRACKET_Q);
makeMultiT(S_URL_OPENANGLEBRACKET_Q, qsNonAccepting, S_URL_OPENANGLEBRACKET_Q);
makeMultiT(S_URL_OPENPAREN_Q, qsNonAccepting, S_URL_OPENPAREN_Q);
makeMultiT(S_URL_OPENBRACE_SYMS, qsAccepting, S_URL_OPENBRACE_Q);
makeMultiT(S_URL_OPENBRACKET_SYMS, qsAccepting, S_URL_OPENBRACKET_Q);
makeMultiT(S_URL_OPENANGLEBRACKET_SYMS, qsAccepting, S_URL_OPENANGLEBRACKET_Q);
makeMultiT(S_URL_OPENPAREN_SYMS, qsAccepting, S_URL_OPENPAREN_Q);
makeMultiT(S_URL_OPENBRACE_SYMS, qsNonAccepting, S_URL_OPENBRACE_SYMS);
makeMultiT(S_URL_OPENBRACKET_SYMS, qsNonAccepting, S_URL_OPENBRACKET_SYMS);
makeMultiT(S_URL_OPENANGLEBRACKET_SYMS, qsNonAccepting, S_URL_OPENANGLEBRACKET_SYMS);
makeMultiT(S_URL_OPENPAREN_SYMS, qsNonAccepting, S_URL_OPENPAREN_SYMS); // Account for the query string
makeMultiT(S_URL, qsAccepting, S_URL);
makeMultiT(S_URL_NON_ACCEPTING, qsAccepting, S_URL);
makeMultiT(S_URL, qsNonAccepting, S_URL_NON_ACCEPTING);
makeMultiT(S_URL_NON_ACCEPTING, qsNonAccepting, S_URL_NON_ACCEPTING); // Email address-specific state definitions
// Note: We are not allowing '/' in email addresses since this would interfere
// with real URLs
// For addresses with the mailto prefix
// 'mailto:' followed by anything sane is a valid email
makeT(S_MAILTO, TLD, S_MAILTO_EMAIL);
makeT(S_MAILTO, DOMAIN, S_MAILTO_EMAIL);
makeT(S_MAILTO, NUM, S_MAILTO_EMAIL);
makeT(S_MAILTO, LOCALHOST, S_MAILTO_EMAIL); // Greedily get more potential valid email values
makeMultiT(S_MAILTO_EMAIL, qsAccepting, S_MAILTO_EMAIL);
makeMultiT(S_MAILTO_EMAIL, qsNonAccepting, S_MAILTO_EMAIL_NON_ACCEPTING);
makeMultiT(S_MAILTO_EMAIL_NON_ACCEPTING, qsAccepting, S_MAILTO_EMAIL);
makeMultiT(S_MAILTO_EMAIL_NON_ACCEPTING, qsNonAccepting, S_MAILTO_EMAIL_NON_ACCEPTING); // For addresses without the mailto prefix
// Tokens allowed in the localpart of the email
var localpartAccepting = [AMPERSAND, APOSTROPHE, ASTERISK, BACKSLASH, BACKTICK, CARET, CLOSEBRACE, DOLLAR, DOMAIN, EQUALS, HYPHEN, NUM, OPENBRACE, PERCENT, PIPE, PLUS, POUND, QUERY, SLASH, SYM, TILDE, TLD, UNDERSCORE]; // Some of the tokens in `localpartAccepting` are already accounted for here and
// will not be overwritten (don't worry)
makeMultiT(S_DOMAIN, localpartAccepting, S_LOCALPART);
makeT(S_DOMAIN, AT, S_LOCALPART_AT);
makeMultiT(S_TLD, localpartAccepting, S_LOCALPART);
makeT(S_TLD, AT, S_LOCALPART_AT);
makeMultiT(S_DOMAIN_DOT, localpartAccepting, S_LOCALPART); // Now in localpart of address
// TODO: IP addresses and what if the email starts with numbers?
makeMultiT(S_LOCALPART, localpartAccepting, S_LOCALPART);
makeT(S_LOCALPART, AT, S_LOCALPART_AT); // close to an email address now
makeT(S_LOCALPART, DOT, S_LOCALPART_DOT);
makeMultiT(S_LOCALPART_DOT, localpartAccepting, S_LOCALPART);
makeT(S_LOCALPART_AT, TLD, S_EMAIL_DOMAIN);
makeT(S_LOCALPART_AT, DOMAIN, S_EMAIL_DOMAIN);
makeT(S_LOCALPART_AT, NUM, S_EMAIL_DOMAIN);
makeT(S_LOCALPART_AT, LOCALHOST, S_EMAIL); // States following `@` defined above
return S_START;
}
/**
* Run the parser state machine on a list of scanned string-based tokens to
* create a list of multi tokens, each of which represents a URL, email address,
* plain text, etc.
*
* @param {State} start parser start state
* @param {string} input the original input used to generate the given tokens
* @param {{t: string, v: string, s: number, e: number}[]} tokens list of scanned tokens
* @returns {MultiToken[]}
*/
function run(start, input, tokens) {
var len = tokens.length;
var cursor = 0;
var multis = [];
var textTokens = [];
while (cursor < len) {
var state = start;
var secondState = null;
var nextState = null;
var multiLength = 0;
var latestAccepting = null;
var sinceAccepts = -1;
while (cursor < len && !(secondState = takeT(state, tokens[cursor].t))) {
// Starting tokens with nowhere to jump to.
// Consider these to be just plain text
textTokens.push(tokens[cursor++]);
}
while (cursor < len && (nextState = secondState || takeT(state, tokens[cursor].t))) {
// Get the next state
secondState = null;
state = nextState; // Keep track of the latest accepting state
if (state.accepts()) {
sinceAccepts = 0;
latestAccepting = state;
} else if (sinceAccepts >= 0) {
sinceAccepts++;
}
cursor++;
multiLength++;
}
if (sinceAccepts < 0) {
// No accepting state was found, part of a regular text token
// Add all the tokens we looked at to the text tokens array
for (var i = cursor - multiLength; i < cursor; i++) {
textTokens.push(tokens[i]);
}
} else {
// Accepting state!
// First close off the textTokens (if available)
if (textTokens.length > 0) {
multis.push(parserCreateMultiToken(Text, input, textTokens));
textTokens = [];
} // Roll back to the latest accepting state
cursor -= sinceAccepts;
multiLength -= sinceAccepts; // Create a new multitoken
var Multi = latestAccepting.t;
var subtokens = tokens.slice(cursor - multiLength, cursor);
multis.push(parserCreateMultiToken(Multi, input, subtokens));
}
} // Finally close off the textTokens (if available)
if (textTokens.length > 0) {
multis.push(parserCreateMultiToken(Text, input, textTokens));
}
return multis;
}
/**
* Utility function for instantiating a new multitoken with all the relevant
* fields during parsing.
* @param {Class<MultiToken>} Multi class to instantiate
* @param {string} input original input string
* @param {{t: string, v: string, s: number, e: number}[]} tokens consecutive tokens scanned from input string
* @returns {MultiToken}
*/
function parserCreateMultiToken(Multi, input, tokens) {
var startIdx = tokens[0].s;
var endIdx = tokens[tokens.length - 1].e;
var value = input.substr(startIdx, endIdx - startIdx);
return new Multi(value, tokens);
}
var warn = typeof console !== 'undefined' && console && console.warn || function () {}; // Side-effect initialization state
var INIT = {
scanner: null,
parser: null,
pluginQueue: [],
customProtocols: [],
initialized: false
};
/**
* De-register all plugins and reset the internal state-machine. Used for
* testing; not required in practice.
* @private
*/
function reset() {
INIT.scanner = null;
INIT.parser = null;
INIT.pluginQueue = [];
INIT.customProtocols = [];
INIT.initialized = false;
}
/**
* Register a linkify extension plugin
* @param {string} name of plugin to register
* @param {Function} plugin function that accepts mutable linkify state
*/
function registerPlugin(name, plugin) {
for (var i = 0; i < INIT.pluginQueue.length; i++) {
if (name === INIT.pluginQueue[i][0]) {
warn("linkifyjs: plugin \"".concat(name, "\" already registered - will be overwritten"));
INIT.pluginQueue[i] = [name, plugin];
return;
}
}
INIT.pluginQueue.push([name, plugin]);
if (INIT.initialized) {
warn("linkifyjs: already initialized - will not register plugin \"".concat(name, "\" until you manually call linkify.init(). To avoid this warning, please register all plugins before invoking linkify the first time."));
}
}
/**
* Detect URLs with the following additional protocol. Anything following
* "protocol:" will be considered a link.
* @param {string} protocol
*/
function registerCustomProtocol(protocol) {
if (INIT.initialized) {
warn("linkifyjs: already initialized - will not register custom protocol \"".concat(protocol, "\" until you manually call linkify.init(). To avoid this warning, please register all custom protocols before invoking linkify the first time."));
}
if (!/^[a-z-]+$/.test(protocol)) {
throw Error('linkifyjs: protocols containing characters other than a-z or - (hyphen) are not supported');
}
INIT.customProtocols.push(protocol);
}
/**
* Initialize the linkify state machine. Called automatically the first time
* linkify is called on a string, but may be called manually as well.
*/
function init() {
// Initialize state machines
INIT.scanner = {
start: init$2(INIT.customProtocols),
tokens: text
};
INIT.parser = {
start: init$1(),
tokens: multi
};
var utils = {
createTokenClass: createTokenClass
}; // Initialize plugins
for (var i = 0; i < INIT.pluginQueue.length; i++) {
INIT.pluginQueue[i][1]({
scanner: INIT.scanner,
parser: INIT.parser,
utils: utils
});
}
INIT.initialized = true;
}
/**
Parse a string into tokens that represent linkable and non-linkable sub-components
@param {string} str
@return {MultiToken[]} tokens
*/
function tokenize(str) {
if (!INIT.initialized) {
init();
}
return run(INIT.parser.start, str, run$1(INIT.scanner.start, str));
}
/**
Find a list of linkable items in the given string.
@param {string} str string to find links in
@param {string} [type] (optional) only find links of a specific type, e.g.,
'url' or 'email'
*/
function find(str) {
var type = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null;
var tokens = tokenize(str);
var filtered = [];
for (var i = 0; i < tokens.length; i++) {
var token = tokens[i];
if (token.isLink && (!type || token.t === type)) {
filtered.push(token.toObject());
}
}
return filtered;
}
/**
* Is the given string valid linkable text of some sort. Note that this does not
* trim the text for you.
*
* Optionally pass in a second `type` param, which is the type of link to test
* for.
*
* For example,
*
* linkify.test(str, 'email');
*
* Returns `true` if str is a valid email.
* @param {string} str string to test for links
* @param {string} [type] optional specific link type to look for
* @returns boolean true/false
*/
function test(str) {
var type = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null;
var tokens = tokenize(str);
return tokens.length === 1 && tokens[0].isLink && (!type || tokens[0].t === type);
}
export { Options, find, init, options, registerCustomProtocol, registerPlugin, reset, test, tokenize };