You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					460 lines
				
				10 KiB
			
		
		
			
		
	
	
					460 lines
				
				10 KiB
			| 
											3 years ago
										 | /** | ||
|  |  * default settings | ||
|  |  * | ||
|  |  * @author Zongmin Lei<leizongmin@gmail.com> | ||
|  |  */ | ||
|  | 
 | ||
|  | var FilterCSS = require("cssfilter").FilterCSS; | ||
|  | var getDefaultCSSWhiteList = require("cssfilter").getDefaultWhiteList; | ||
|  | var _ = require("./util"); | ||
|  | 
 | ||
|  | function getDefaultWhiteList() { | ||
|  |   return { | ||
|  |     a: ["target", "href", "title"], | ||
|  |     abbr: ["title"], | ||
|  |     address: [], | ||
|  |     area: ["shape", "coords", "href", "alt"], | ||
|  |     article: [], | ||
|  |     aside: [], | ||
|  |     audio: [ | ||
|  |       "autoplay", | ||
|  |       "controls", | ||
|  |       "crossorigin", | ||
|  |       "loop", | ||
|  |       "muted", | ||
|  |       "preload", | ||
|  |       "src", | ||
|  |     ], | ||
|  |     b: [], | ||
|  |     bdi: ["dir"], | ||
|  |     bdo: ["dir"], | ||
|  |     big: [], | ||
|  |     blockquote: ["cite"], | ||
|  |     br: [], | ||
|  |     caption: [], | ||
|  |     center: [], | ||
|  |     cite: [], | ||
|  |     code: [], | ||
|  |     col: ["align", "valign", "span", "width"], | ||
|  |     colgroup: ["align", "valign", "span", "width"], | ||
|  |     dd: [], | ||
|  |     del: ["datetime"], | ||
|  |     details: ["open"], | ||
|  |     div: [], | ||
|  |     dl: [], | ||
|  |     dt: [], | ||
|  |     em: [], | ||
|  |     figcaption: [], | ||
|  |     figure: [], | ||
|  |     font: ["color", "size", "face"], | ||
|  |     footer: [], | ||
|  |     h1: [], | ||
|  |     h2: [], | ||
|  |     h3: [], | ||
|  |     h4: [], | ||
|  |     h5: [], | ||
|  |     h6: [], | ||
|  |     header: [], | ||
|  |     hr: [], | ||
|  |     i: [], | ||
|  |     img: ["src", "alt", "title", "width", "height"], | ||
|  |     ins: ["datetime"], | ||
|  |     li: [], | ||
|  |     mark: [], | ||
|  |     nav: [], | ||
|  |     ol: [], | ||
|  |     p: [], | ||
|  |     pre: [], | ||
|  |     s: [], | ||
|  |     section: [], | ||
|  |     small: [], | ||
|  |     span: [], | ||
|  |     sub: [], | ||
|  |     summary: [], | ||
|  |     sup: [], | ||
|  |     strong: [], | ||
|  |     strike: [], | ||
|  |     table: ["width", "border", "align", "valign"], | ||
|  |     tbody: ["align", "valign"], | ||
|  |     td: ["width", "rowspan", "colspan", "align", "valign"], | ||
|  |     tfoot: ["align", "valign"], | ||
|  |     th: ["width", "rowspan", "colspan", "align", "valign"], | ||
|  |     thead: ["align", "valign"], | ||
|  |     tr: ["rowspan", "align", "valign"], | ||
|  |     tt: [], | ||
|  |     u: [], | ||
|  |     ul: [], | ||
|  |     video: [ | ||
|  |       "autoplay", | ||
|  |       "controls", | ||
|  |       "crossorigin", | ||
|  |       "loop", | ||
|  |       "muted", | ||
|  |       "playsinline", | ||
|  |       "poster", | ||
|  |       "preload", | ||
|  |       "src", | ||
|  |       "height", | ||
|  |       "width", | ||
|  |     ], | ||
|  |   }; | ||
|  | } | ||
|  | 
 | ||
|  | var defaultCSSFilter = new FilterCSS(); | ||
|  | 
 | ||
|  | /** | ||
|  |  * default onTag function | ||
|  |  * | ||
|  |  * @param {String} tag | ||
|  |  * @param {String} html | ||
|  |  * @param {Object} options | ||
|  |  * @return {String} | ||
|  |  */ | ||
|  | function onTag(tag, html, options) { | ||
|  |   // do nothing
 | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * default onIgnoreTag function | ||
|  |  * | ||
|  |  * @param {String} tag | ||
|  |  * @param {String} html | ||
|  |  * @param {Object} options | ||
|  |  * @return {String} | ||
|  |  */ | ||
|  | function onIgnoreTag(tag, html, options) { | ||
|  |   // do nothing
 | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * default onTagAttr function | ||
|  |  * | ||
|  |  * @param {String} tag | ||
|  |  * @param {String} name | ||
|  |  * @param {String} value | ||
|  |  * @return {String} | ||
|  |  */ | ||
|  | function onTagAttr(tag, name, value) { | ||
|  |   // do nothing
 | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * default onIgnoreTagAttr function | ||
|  |  * | ||
|  |  * @param {String} tag | ||
|  |  * @param {String} name | ||
|  |  * @param {String} value | ||
|  |  * @return {String} | ||
|  |  */ | ||
|  | function onIgnoreTagAttr(tag, name, value) { | ||
|  |   // do nothing
 | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * default escapeHtml function | ||
|  |  * | ||
|  |  * @param {String} html | ||
|  |  */ | ||
|  | function escapeHtml(html) { | ||
|  |   return html.replace(REGEXP_LT, "<").replace(REGEXP_GT, ">"); | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * default safeAttrValue function | ||
|  |  * | ||
|  |  * @param {String} tag | ||
|  |  * @param {String} name | ||
|  |  * @param {String} value | ||
|  |  * @param {Object} cssFilter | ||
|  |  * @return {String} | ||
|  |  */ | ||
|  | function safeAttrValue(tag, name, value, cssFilter) { | ||
|  |   // unescape attribute value firstly
 | ||
|  |   value = friendlyAttrValue(value); | ||
|  | 
 | ||
|  |   if (name === "href" || name === "src") { | ||
|  |     // filter `href` and `src` attribute
 | ||
|  |     // only allow the value that starts with `http://` | `https://` | `mailto:` | `/` | `#`
 | ||
|  |     value = _.trim(value); | ||
|  |     if (value === "#") return "#"; | ||
|  |     if ( | ||
|  |       !( | ||
|  |         value.substr(0, 7) === "http://" || | ||
|  |         value.substr(0, 8) === "https://" || | ||
|  |         value.substr(0, 7) === "mailto:" || | ||
|  |         value.substr(0, 4) === "tel:" || | ||
|  |         value.substr(0, 11) === "data:image/" || | ||
|  |         value.substr(0, 6) === "ftp://" || | ||
|  |         value.substr(0, 2) === "./" || | ||
|  |         value.substr(0, 3) === "../" || | ||
|  |         value[0] === "#" || | ||
|  |         value[0] === "/" | ||
|  |       ) | ||
|  |     ) { | ||
|  |       return ""; | ||
|  |     } | ||
|  |   } else if (name === "background") { | ||
|  |     // filter `background` attribute (maybe no use)
 | ||
|  |     // `javascript:`
 | ||
|  |     REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0; | ||
|  |     if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) { | ||
|  |       return ""; | ||
|  |     } | ||
|  |   } else if (name === "style") { | ||
|  |     // `expression()`
 | ||
|  |     REGEXP_DEFAULT_ON_TAG_ATTR_7.lastIndex = 0; | ||
|  |     if (REGEXP_DEFAULT_ON_TAG_ATTR_7.test(value)) { | ||
|  |       return ""; | ||
|  |     } | ||
|  |     // `url()`
 | ||
|  |     REGEXP_DEFAULT_ON_TAG_ATTR_8.lastIndex = 0; | ||
|  |     if (REGEXP_DEFAULT_ON_TAG_ATTR_8.test(value)) { | ||
|  |       REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0; | ||
|  |       if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) { | ||
|  |         return ""; | ||
|  |       } | ||
|  |     } | ||
|  |     if (cssFilter !== false) { | ||
|  |       cssFilter = cssFilter || defaultCSSFilter; | ||
|  |       value = cssFilter.process(value); | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   // escape `<>"` before returns
 | ||
|  |   value = escapeAttrValue(value); | ||
|  |   return value; | ||
|  | } | ||
|  | 
 | ||
|  | // RegExp list
 | ||
|  | var REGEXP_LT = /</g; | ||
|  | var REGEXP_GT = />/g; | ||
|  | var REGEXP_QUOTE = /"/g; | ||
|  | var REGEXP_QUOTE_2 = /"/g; | ||
|  | var REGEXP_ATTR_VALUE_1 = /&#([a-zA-Z0-9]*);?/gim; | ||
|  | var REGEXP_ATTR_VALUE_COLON = /:?/gim; | ||
|  | var REGEXP_ATTR_VALUE_NEWLINE = /&newline;?/gim; | ||
|  | // var REGEXP_DEFAULT_ON_TAG_ATTR_3 = /\/\*|\*\//gm;
 | ||
|  | var REGEXP_DEFAULT_ON_TAG_ATTR_4 = | ||
|  |   /((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a):/gi; | ||
|  | // var REGEXP_DEFAULT_ON_TAG_ATTR_5 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:/gi;
 | ||
|  | // var REGEXP_DEFAULT_ON_TAG_ATTR_6 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:\s*image\//gi;
 | ||
|  | var REGEXP_DEFAULT_ON_TAG_ATTR_7 = | ||
|  |   /e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*\(.*/gi; | ||
|  | var REGEXP_DEFAULT_ON_TAG_ATTR_8 = /u\s*r\s*l\s*\(.*/gi; | ||
|  | 
 | ||
|  | /** | ||
|  |  * escape double quote | ||
|  |  * | ||
|  |  * @param {String} str | ||
|  |  * @return {String} str | ||
|  |  */ | ||
|  | function escapeQuote(str) { | ||
|  |   return str.replace(REGEXP_QUOTE, """); | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * unescape double quote | ||
|  |  * | ||
|  |  * @param {String} str | ||
|  |  * @return {String} str | ||
|  |  */ | ||
|  | function unescapeQuote(str) { | ||
|  |   return str.replace(REGEXP_QUOTE_2, '"'); | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * escape html entities | ||
|  |  * | ||
|  |  * @param {String} str | ||
|  |  * @return {String} | ||
|  |  */ | ||
|  | function escapeHtmlEntities(str) { | ||
|  |   return str.replace(REGEXP_ATTR_VALUE_1, function replaceUnicode(str, code) { | ||
|  |     return code[0] === "x" || code[0] === "X" | ||
|  |       ? String.fromCharCode(parseInt(code.substr(1), 16)) | ||
|  |       : String.fromCharCode(parseInt(code, 10)); | ||
|  |   }); | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * escape html5 new danger entities | ||
|  |  * | ||
|  |  * @param {String} str | ||
|  |  * @return {String} | ||
|  |  */ | ||
|  | function escapeDangerHtml5Entities(str) { | ||
|  |   return str | ||
|  |     .replace(REGEXP_ATTR_VALUE_COLON, ":") | ||
|  |     .replace(REGEXP_ATTR_VALUE_NEWLINE, " "); | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * clear nonprintable characters | ||
|  |  * | ||
|  |  * @param {String} str | ||
|  |  * @return {String} | ||
|  |  */ | ||
|  | function clearNonPrintableCharacter(str) { | ||
|  |   var str2 = ""; | ||
|  |   for (var i = 0, len = str.length; i < len; i++) { | ||
|  |     str2 += str.charCodeAt(i) < 32 ? " " : str.charAt(i); | ||
|  |   } | ||
|  |   return _.trim(str2); | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * get friendly attribute value | ||
|  |  * | ||
|  |  * @param {String} str | ||
|  |  * @return {String} | ||
|  |  */ | ||
|  | function friendlyAttrValue(str) { | ||
|  |   str = unescapeQuote(str); | ||
|  |   str = escapeHtmlEntities(str); | ||
|  |   str = escapeDangerHtml5Entities(str); | ||
|  |   str = clearNonPrintableCharacter(str); | ||
|  |   return str; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * unescape attribute value | ||
|  |  * | ||
|  |  * @param {String} str | ||
|  |  * @return {String} | ||
|  |  */ | ||
|  | function escapeAttrValue(str) { | ||
|  |   str = escapeQuote(str); | ||
|  |   str = escapeHtml(str); | ||
|  |   return str; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * `onIgnoreTag` function for removing all the tags that are not in whitelist | ||
|  |  */ | ||
|  | function onIgnoreTagStripAll() { | ||
|  |   return ""; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * remove tag body | ||
|  |  * specify a `tags` list, if the tag is not in the `tags` list then process by the specify function (optional) | ||
|  |  * | ||
|  |  * @param {array} tags | ||
|  |  * @param {function} next | ||
|  |  */ | ||
|  | function StripTagBody(tags, next) { | ||
|  |   if (typeof next !== "function") { | ||
|  |     next = function () {}; | ||
|  |   } | ||
|  | 
 | ||
|  |   var isRemoveAllTag = !Array.isArray(tags); | ||
|  |   function isRemoveTag(tag) { | ||
|  |     if (isRemoveAllTag) return true; | ||
|  |     return _.indexOf(tags, tag) !== -1; | ||
|  |   } | ||
|  | 
 | ||
|  |   var removeList = []; | ||
|  |   var posStart = false; | ||
|  | 
 | ||
|  |   return { | ||
|  |     onIgnoreTag: function (tag, html, options) { | ||
|  |       if (isRemoveTag(tag)) { | ||
|  |         if (options.isClosing) { | ||
|  |           var ret = "[/removed]"; | ||
|  |           var end = options.position + ret.length; | ||
|  |           removeList.push([ | ||
|  |             posStart !== false ? posStart : options.position, | ||
|  |             end, | ||
|  |           ]); | ||
|  |           posStart = false; | ||
|  |           return ret; | ||
|  |         } else { | ||
|  |           if (!posStart) { | ||
|  |             posStart = options.position; | ||
|  |           } | ||
|  |           return "[removed]"; | ||
|  |         } | ||
|  |       } else { | ||
|  |         return next(tag, html, options); | ||
|  |       } | ||
|  |     }, | ||
|  |     remove: function (html) { | ||
|  |       var rethtml = ""; | ||
|  |       var lastPos = 0; | ||
|  |       _.forEach(removeList, function (pos) { | ||
|  |         rethtml += html.slice(lastPos, pos[0]); | ||
|  |         lastPos = pos[1]; | ||
|  |       }); | ||
|  |       rethtml += html.slice(lastPos); | ||
|  |       return rethtml; | ||
|  |     }, | ||
|  |   }; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * remove html comments | ||
|  |  * | ||
|  |  * @param {String} html | ||
|  |  * @return {String} | ||
|  |  */ | ||
|  | function stripCommentTag(html) { | ||
|  |   var retHtml = ""; | ||
|  |   var lastPos = 0; | ||
|  |   while (lastPos < html.length) { | ||
|  |     var i = html.indexOf("<!--", lastPos); | ||
|  |     if (i === -1) { | ||
|  |       retHtml += html.slice(lastPos); | ||
|  |       break; | ||
|  |     } | ||
|  |     retHtml += html.slice(lastPos, i); | ||
|  |     var j = html.indexOf("-->", i); | ||
|  |     if (j === -1) { | ||
|  |       break; | ||
|  |     } | ||
|  |     lastPos = j + 3; | ||
|  |   } | ||
|  |   return retHtml; | ||
|  | } | ||
|  | 
 | ||
|  | /** | ||
|  |  * remove invisible characters | ||
|  |  * | ||
|  |  * @param {String} html | ||
|  |  * @return {String} | ||
|  |  */ | ||
|  | function stripBlankChar(html) { | ||
|  |   var chars = html.split(""); | ||
|  |   chars = chars.filter(function (char) { | ||
|  |     var c = char.charCodeAt(0); | ||
|  |     if (c === 127) return false; | ||
|  |     if (c <= 31) { | ||
|  |       if (c === 10 || c === 13) return true; | ||
|  |       return false; | ||
|  |     } | ||
|  |     return true; | ||
|  |   }); | ||
|  |   return chars.join(""); | ||
|  | } | ||
|  | 
 | ||
|  | exports.whiteList = getDefaultWhiteList(); | ||
|  | exports.getDefaultWhiteList = getDefaultWhiteList; | ||
|  | exports.onTag = onTag; | ||
|  | exports.onIgnoreTag = onIgnoreTag; | ||
|  | exports.onTagAttr = onTagAttr; | ||
|  | exports.onIgnoreTagAttr = onIgnoreTagAttr; | ||
|  | exports.safeAttrValue = safeAttrValue; | ||
|  | exports.escapeHtml = escapeHtml; | ||
|  | exports.escapeQuote = escapeQuote; | ||
|  | exports.unescapeQuote = unescapeQuote; | ||
|  | exports.escapeHtmlEntities = escapeHtmlEntities; | ||
|  | exports.escapeDangerHtml5Entities = escapeDangerHtml5Entities; | ||
|  | exports.clearNonPrintableCharacter = clearNonPrintableCharacter; | ||
|  | exports.friendlyAttrValue = friendlyAttrValue; | ||
|  | exports.escapeAttrValue = escapeAttrValue; | ||
|  | exports.onIgnoreTagStripAll = onIgnoreTagStripAll; | ||
|  | exports.StripTagBody = StripTagBody; | ||
|  | exports.stripCommentTag = stripCommentTag; | ||
|  | exports.stripBlankChar = stripBlankChar; | ||
|  | exports.cssFilter = defaultCSSFilter; | ||
|  | exports.getDefaultCSSWhiteList = getDefaultCSSWhiteList; |