You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							460 lines
						
					
					
						
							10 KiB
						
					
					
				
			
		
		
	
	
							460 lines
						
					
					
						
							10 KiB
						
					
					
				/**
 | 
						|
 * default settings
 | 
						|
 *
 | 
						|
 * @author Zongmin Lei<leizongmin@gmail.com>
 | 
						|
 */
 | 
						|
 | 
						|
var FilterCSS = require("cssfilter").FilterCSS;
 | 
						|
var getDefaultCSSWhiteList = require("cssfilter").getDefaultWhiteList;
 | 
						|
var _ = require("./util");
 | 
						|
 | 
						|
function getDefaultWhiteList() {
 | 
						|
  return {
 | 
						|
    a: ["target", "href", "title"],
 | 
						|
    abbr: ["title"],
 | 
						|
    address: [],
 | 
						|
    area: ["shape", "coords", "href", "alt"],
 | 
						|
    article: [],
 | 
						|
    aside: [],
 | 
						|
    audio: [
 | 
						|
      "autoplay",
 | 
						|
      "controls",
 | 
						|
      "crossorigin",
 | 
						|
      "loop",
 | 
						|
      "muted",
 | 
						|
      "preload",
 | 
						|
      "src",
 | 
						|
    ],
 | 
						|
    b: [],
 | 
						|
    bdi: ["dir"],
 | 
						|
    bdo: ["dir"],
 | 
						|
    big: [],
 | 
						|
    blockquote: ["cite"],
 | 
						|
    br: [],
 | 
						|
    caption: [],
 | 
						|
    center: [],
 | 
						|
    cite: [],
 | 
						|
    code: [],
 | 
						|
    col: ["align", "valign", "span", "width"],
 | 
						|
    colgroup: ["align", "valign", "span", "width"],
 | 
						|
    dd: [],
 | 
						|
    del: ["datetime"],
 | 
						|
    details: ["open"],
 | 
						|
    div: [],
 | 
						|
    dl: [],
 | 
						|
    dt: [],
 | 
						|
    em: [],
 | 
						|
    figcaption: [],
 | 
						|
    figure: [],
 | 
						|
    font: ["color", "size", "face"],
 | 
						|
    footer: [],
 | 
						|
    h1: [],
 | 
						|
    h2: [],
 | 
						|
    h3: [],
 | 
						|
    h4: [],
 | 
						|
    h5: [],
 | 
						|
    h6: [],
 | 
						|
    header: [],
 | 
						|
    hr: [],
 | 
						|
    i: [],
 | 
						|
    img: ["src", "alt", "title", "width", "height"],
 | 
						|
    ins: ["datetime"],
 | 
						|
    li: [],
 | 
						|
    mark: [],
 | 
						|
    nav: [],
 | 
						|
    ol: [],
 | 
						|
    p: [],
 | 
						|
    pre: [],
 | 
						|
    s: [],
 | 
						|
    section: [],
 | 
						|
    small: [],
 | 
						|
    span: [],
 | 
						|
    sub: [],
 | 
						|
    summary: [],
 | 
						|
    sup: [],
 | 
						|
    strong: [],
 | 
						|
    strike: [],
 | 
						|
    table: ["width", "border", "align", "valign"],
 | 
						|
    tbody: ["align", "valign"],
 | 
						|
    td: ["width", "rowspan", "colspan", "align", "valign"],
 | 
						|
    tfoot: ["align", "valign"],
 | 
						|
    th: ["width", "rowspan", "colspan", "align", "valign"],
 | 
						|
    thead: ["align", "valign"],
 | 
						|
    tr: ["rowspan", "align", "valign"],
 | 
						|
    tt: [],
 | 
						|
    u: [],
 | 
						|
    ul: [],
 | 
						|
    video: [
 | 
						|
      "autoplay",
 | 
						|
      "controls",
 | 
						|
      "crossorigin",
 | 
						|
      "loop",
 | 
						|
      "muted",
 | 
						|
      "playsinline",
 | 
						|
      "poster",
 | 
						|
      "preload",
 | 
						|
      "src",
 | 
						|
      "height",
 | 
						|
      "width",
 | 
						|
    ],
 | 
						|
  };
 | 
						|
}
 | 
						|
 | 
						|
var defaultCSSFilter = new FilterCSS();
 | 
						|
 | 
						|
/**
 | 
						|
 * default onTag function
 | 
						|
 *
 | 
						|
 * @param {String} tag
 | 
						|
 * @param {String} html
 | 
						|
 * @param {Object} options
 | 
						|
 * @return {String}
 | 
						|
 */
 | 
						|
function onTag(tag, html, options) {
 | 
						|
  // do nothing
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * default onIgnoreTag function
 | 
						|
 *
 | 
						|
 * @param {String} tag
 | 
						|
 * @param {String} html
 | 
						|
 * @param {Object} options
 | 
						|
 * @return {String}
 | 
						|
 */
 | 
						|
function onIgnoreTag(tag, html, options) {
 | 
						|
  // do nothing
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * default onTagAttr function
 | 
						|
 *
 | 
						|
 * @param {String} tag
 | 
						|
 * @param {String} name
 | 
						|
 * @param {String} value
 | 
						|
 * @return {String}
 | 
						|
 */
 | 
						|
function onTagAttr(tag, name, value) {
 | 
						|
  // do nothing
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * default onIgnoreTagAttr function
 | 
						|
 *
 | 
						|
 * @param {String} tag
 | 
						|
 * @param {String} name
 | 
						|
 * @param {String} value
 | 
						|
 * @return {String}
 | 
						|
 */
 | 
						|
function onIgnoreTagAttr(tag, name, value) {
 | 
						|
  // do nothing
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * default escapeHtml function
 | 
						|
 *
 | 
						|
 * @param {String} html
 | 
						|
 */
 | 
						|
function escapeHtml(html) {
 | 
						|
  return html.replace(REGEXP_LT, "<").replace(REGEXP_GT, ">");
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * default safeAttrValue function
 | 
						|
 *
 | 
						|
 * @param {String} tag
 | 
						|
 * @param {String} name
 | 
						|
 * @param {String} value
 | 
						|
 * @param {Object} cssFilter
 | 
						|
 * @return {String}
 | 
						|
 */
 | 
						|
function safeAttrValue(tag, name, value, cssFilter) {
 | 
						|
  // unescape attribute value firstly
 | 
						|
  value = friendlyAttrValue(value);
 | 
						|
 | 
						|
  if (name === "href" || name === "src") {
 | 
						|
    // filter `href` and `src` attribute
 | 
						|
    // only allow the value that starts with `http://` | `https://` | `mailto:` | `/` | `#`
 | 
						|
    value = _.trim(value);
 | 
						|
    if (value === "#") return "#";
 | 
						|
    if (
 | 
						|
      !(
 | 
						|
        value.substr(0, 7) === "http://" ||
 | 
						|
        value.substr(0, 8) === "https://" ||
 | 
						|
        value.substr(0, 7) === "mailto:" ||
 | 
						|
        value.substr(0, 4) === "tel:" ||
 | 
						|
        value.substr(0, 11) === "data:image/" ||
 | 
						|
        value.substr(0, 6) === "ftp://" ||
 | 
						|
        value.substr(0, 2) === "./" ||
 | 
						|
        value.substr(0, 3) === "../" ||
 | 
						|
        value[0] === "#" ||
 | 
						|
        value[0] === "/"
 | 
						|
      )
 | 
						|
    ) {
 | 
						|
      return "";
 | 
						|
    }
 | 
						|
  } else if (name === "background") {
 | 
						|
    // filter `background` attribute (maybe no use)
 | 
						|
    // `javascript:`
 | 
						|
    REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
 | 
						|
    if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
 | 
						|
      return "";
 | 
						|
    }
 | 
						|
  } else if (name === "style") {
 | 
						|
    // `expression()`
 | 
						|
    REGEXP_DEFAULT_ON_TAG_ATTR_7.lastIndex = 0;
 | 
						|
    if (REGEXP_DEFAULT_ON_TAG_ATTR_7.test(value)) {
 | 
						|
      return "";
 | 
						|
    }
 | 
						|
    // `url()`
 | 
						|
    REGEXP_DEFAULT_ON_TAG_ATTR_8.lastIndex = 0;
 | 
						|
    if (REGEXP_DEFAULT_ON_TAG_ATTR_8.test(value)) {
 | 
						|
      REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
 | 
						|
      if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
 | 
						|
        return "";
 | 
						|
      }
 | 
						|
    }
 | 
						|
    if (cssFilter !== false) {
 | 
						|
      cssFilter = cssFilter || defaultCSSFilter;
 | 
						|
      value = cssFilter.process(value);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  // escape `<>"` before returns
 | 
						|
  value = escapeAttrValue(value);
 | 
						|
  return value;
 | 
						|
}
 | 
						|
 | 
						|
// RegExp list
 | 
						|
var REGEXP_LT = /</g;
 | 
						|
var REGEXP_GT = />/g;
 | 
						|
var REGEXP_QUOTE = /"/g;
 | 
						|
var REGEXP_QUOTE_2 = /"/g;
 | 
						|
var REGEXP_ATTR_VALUE_1 = /&#([a-zA-Z0-9]*);?/gim;
 | 
						|
var REGEXP_ATTR_VALUE_COLON = /:?/gim;
 | 
						|
var REGEXP_ATTR_VALUE_NEWLINE = /&newline;?/gim;
 | 
						|
// var REGEXP_DEFAULT_ON_TAG_ATTR_3 = /\/\*|\*\//gm;
 | 
						|
var REGEXP_DEFAULT_ON_TAG_ATTR_4 =
 | 
						|
  /((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a):/gi;
 | 
						|
// var REGEXP_DEFAULT_ON_TAG_ATTR_5 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:/gi;
 | 
						|
// var REGEXP_DEFAULT_ON_TAG_ATTR_6 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:\s*image\//gi;
 | 
						|
var REGEXP_DEFAULT_ON_TAG_ATTR_7 =
 | 
						|
  /e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*\(.*/gi;
 | 
						|
var REGEXP_DEFAULT_ON_TAG_ATTR_8 = /u\s*r\s*l\s*\(.*/gi;
 | 
						|
 | 
						|
/**
 | 
						|
 * escape double quote
 | 
						|
 *
 | 
						|
 * @param {String} str
 | 
						|
 * @return {String} str
 | 
						|
 */
 | 
						|
function escapeQuote(str) {
 | 
						|
  return str.replace(REGEXP_QUOTE, """);
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * unescape double quote
 | 
						|
 *
 | 
						|
 * @param {String} str
 | 
						|
 * @return {String} str
 | 
						|
 */
 | 
						|
function unescapeQuote(str) {
 | 
						|
  return str.replace(REGEXP_QUOTE_2, '"');
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * escape html entities
 | 
						|
 *
 | 
						|
 * @param {String} str
 | 
						|
 * @return {String}
 | 
						|
 */
 | 
						|
function escapeHtmlEntities(str) {
 | 
						|
  return str.replace(REGEXP_ATTR_VALUE_1, function replaceUnicode(str, code) {
 | 
						|
    return code[0] === "x" || code[0] === "X"
 | 
						|
      ? String.fromCharCode(parseInt(code.substr(1), 16))
 | 
						|
      : String.fromCharCode(parseInt(code, 10));
 | 
						|
  });
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * escape html5 new danger entities
 | 
						|
 *
 | 
						|
 * @param {String} str
 | 
						|
 * @return {String}
 | 
						|
 */
 | 
						|
function escapeDangerHtml5Entities(str) {
 | 
						|
  return str
 | 
						|
    .replace(REGEXP_ATTR_VALUE_COLON, ":")
 | 
						|
    .replace(REGEXP_ATTR_VALUE_NEWLINE, " ");
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * clear nonprintable characters
 | 
						|
 *
 | 
						|
 * @param {String} str
 | 
						|
 * @return {String}
 | 
						|
 */
 | 
						|
function clearNonPrintableCharacter(str) {
 | 
						|
  var str2 = "";
 | 
						|
  for (var i = 0, len = str.length; i < len; i++) {
 | 
						|
    str2 += str.charCodeAt(i) < 32 ? " " : str.charAt(i);
 | 
						|
  }
 | 
						|
  return _.trim(str2);
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * get friendly attribute value
 | 
						|
 *
 | 
						|
 * @param {String} str
 | 
						|
 * @return {String}
 | 
						|
 */
 | 
						|
function friendlyAttrValue(str) {
 | 
						|
  str = unescapeQuote(str);
 | 
						|
  str = escapeHtmlEntities(str);
 | 
						|
  str = escapeDangerHtml5Entities(str);
 | 
						|
  str = clearNonPrintableCharacter(str);
 | 
						|
  return str;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * unescape attribute value
 | 
						|
 *
 | 
						|
 * @param {String} str
 | 
						|
 * @return {String}
 | 
						|
 */
 | 
						|
function escapeAttrValue(str) {
 | 
						|
  str = escapeQuote(str);
 | 
						|
  str = escapeHtml(str);
 | 
						|
  return str;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * `onIgnoreTag` function for removing all the tags that are not in whitelist
 | 
						|
 */
 | 
						|
function onIgnoreTagStripAll() {
 | 
						|
  return "";
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * remove tag body
 | 
						|
 * specify a `tags` list, if the tag is not in the `tags` list then process by the specify function (optional)
 | 
						|
 *
 | 
						|
 * @param {array} tags
 | 
						|
 * @param {function} next
 | 
						|
 */
 | 
						|
function StripTagBody(tags, next) {
 | 
						|
  if (typeof next !== "function") {
 | 
						|
    next = function () {};
 | 
						|
  }
 | 
						|
 | 
						|
  var isRemoveAllTag = !Array.isArray(tags);
 | 
						|
  function isRemoveTag(tag) {
 | 
						|
    if (isRemoveAllTag) return true;
 | 
						|
    return _.indexOf(tags, tag) !== -1;
 | 
						|
  }
 | 
						|
 | 
						|
  var removeList = [];
 | 
						|
  var posStart = false;
 | 
						|
 | 
						|
  return {
 | 
						|
    onIgnoreTag: function (tag, html, options) {
 | 
						|
      if (isRemoveTag(tag)) {
 | 
						|
        if (options.isClosing) {
 | 
						|
          var ret = "[/removed]";
 | 
						|
          var end = options.position + ret.length;
 | 
						|
          removeList.push([
 | 
						|
            posStart !== false ? posStart : options.position,
 | 
						|
            end,
 | 
						|
          ]);
 | 
						|
          posStart = false;
 | 
						|
          return ret;
 | 
						|
        } else {
 | 
						|
          if (!posStart) {
 | 
						|
            posStart = options.position;
 | 
						|
          }
 | 
						|
          return "[removed]";
 | 
						|
        }
 | 
						|
      } else {
 | 
						|
        return next(tag, html, options);
 | 
						|
      }
 | 
						|
    },
 | 
						|
    remove: function (html) {
 | 
						|
      var rethtml = "";
 | 
						|
      var lastPos = 0;
 | 
						|
      _.forEach(removeList, function (pos) {
 | 
						|
        rethtml += html.slice(lastPos, pos[0]);
 | 
						|
        lastPos = pos[1];
 | 
						|
      });
 | 
						|
      rethtml += html.slice(lastPos);
 | 
						|
      return rethtml;
 | 
						|
    },
 | 
						|
  };
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * remove html comments
 | 
						|
 *
 | 
						|
 * @param {String} html
 | 
						|
 * @return {String}
 | 
						|
 */
 | 
						|
function stripCommentTag(html) {
 | 
						|
  var retHtml = "";
 | 
						|
  var lastPos = 0;
 | 
						|
  while (lastPos < html.length) {
 | 
						|
    var i = html.indexOf("<!--", lastPos);
 | 
						|
    if (i === -1) {
 | 
						|
      retHtml += html.slice(lastPos);
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    retHtml += html.slice(lastPos, i);
 | 
						|
    var j = html.indexOf("-->", i);
 | 
						|
    if (j === -1) {
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    lastPos = j + 3;
 | 
						|
  }
 | 
						|
  return retHtml;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * remove invisible characters
 | 
						|
 *
 | 
						|
 * @param {String} html
 | 
						|
 * @return {String}
 | 
						|
 */
 | 
						|
function stripBlankChar(html) {
 | 
						|
  var chars = html.split("");
 | 
						|
  chars = chars.filter(function (char) {
 | 
						|
    var c = char.charCodeAt(0);
 | 
						|
    if (c === 127) return false;
 | 
						|
    if (c <= 31) {
 | 
						|
      if (c === 10 || c === 13) return true;
 | 
						|
      return false;
 | 
						|
    }
 | 
						|
    return true;
 | 
						|
  });
 | 
						|
  return chars.join("");
 | 
						|
}
 | 
						|
 | 
						|
exports.whiteList = getDefaultWhiteList();
 | 
						|
exports.getDefaultWhiteList = getDefaultWhiteList;
 | 
						|
exports.onTag = onTag;
 | 
						|
exports.onIgnoreTag = onIgnoreTag;
 | 
						|
exports.onTagAttr = onTagAttr;
 | 
						|
exports.onIgnoreTagAttr = onIgnoreTagAttr;
 | 
						|
exports.safeAttrValue = safeAttrValue;
 | 
						|
exports.escapeHtml = escapeHtml;
 | 
						|
exports.escapeQuote = escapeQuote;
 | 
						|
exports.unescapeQuote = unescapeQuote;
 | 
						|
exports.escapeHtmlEntities = escapeHtmlEntities;
 | 
						|
exports.escapeDangerHtml5Entities = escapeDangerHtml5Entities;
 | 
						|
exports.clearNonPrintableCharacter = clearNonPrintableCharacter;
 | 
						|
exports.friendlyAttrValue = friendlyAttrValue;
 | 
						|
exports.escapeAttrValue = escapeAttrValue;
 | 
						|
exports.onIgnoreTagStripAll = onIgnoreTagStripAll;
 | 
						|
exports.StripTagBody = StripTagBody;
 | 
						|
exports.stripCommentTag = stripCommentTag;
 | 
						|
exports.stripBlankChar = stripBlankChar;
 | 
						|
exports.cssFilter = defaultCSSFilter;
 | 
						|
exports.getDefaultCSSWhiteList = getDefaultCSSWhiteList;
 |