You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					132 lines
				
				3.5 KiB
			
		
		
			
		
	
	
					132 lines
				
				3.5 KiB
			| 
								 
											3 years ago
										 
									 | 
							
								var getDelimiter = require("./getDelimiter");
							 | 
						||
| 
								 | 
							
								var filterRow=require("./filterRow");
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * Convert a line of string to csv columns according to its delimiter
							 | 
						||
| 
								 | 
							
								 * the param._header may not be ready when this is called.
							 | 
						||
| 
								 | 
							
								 * @param  {[type]} rowStr [description]
							 | 
						||
| 
								 | 
							
								 * @param  {[type]} param  [Converter param]
							 | 
						||
| 
								 | 
							
								 * @return {[type]}        {cols:["a","b","c"],closed:boolean} the closed field indicate if the row is a complete row
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								module.exports = function rowSplit(rowStr, param) {
							 | 
						||
| 
								 | 
							
								  if (rowStr === "") {
							 | 
						||
| 
								 | 
							
								    return { cols: [], closed: true };
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  var quote = param.quote;
							 | 
						||
| 
								 | 
							
								  var trim = param.trim;
							 | 
						||
| 
								 | 
							
								  var escape = param.escape;
							 | 
						||
| 
								 | 
							
								  if (param.delimiter instanceof Array || param.delimiter.toLowerCase() === "auto") {
							 | 
						||
| 
								 | 
							
								    param.delimiter = getDelimiter(rowStr, param);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  var delimiter = param.delimiter;
							 | 
						||
| 
								 | 
							
								  var rowArr = rowStr.split(delimiter);
							 | 
						||
| 
								 | 
							
								  if (quote === "off") {
							 | 
						||
| 
								 | 
							
								    return { cols: rowArr, closed: true };
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  var row = [];
							 | 
						||
| 
								 | 
							
								  var inquote = false;
							 | 
						||
| 
								 | 
							
								  var quoteBuff = '';
							 | 
						||
| 
								 | 
							
								  for (var i = 0, rowLen = rowArr.length; i < rowLen; i++) {
							 | 
						||
| 
								 | 
							
								    var e = rowArr[i];
							 | 
						||
| 
								 | 
							
								    if (!inquote && trim) {
							 | 
						||
| 
								 | 
							
								      e = e.trim();
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    var len = e.length;
							 | 
						||
| 
								 | 
							
								    if (!inquote) {
							 | 
						||
| 
								 | 
							
								      if (isQuoteOpen(e, param)) { //quote open
							 | 
						||
| 
								 | 
							
								        e = e.substr(1);
							 | 
						||
| 
								 | 
							
								        if (isQuoteClose(e, param)) { //quote close
							 | 
						||
| 
								 | 
							
								          e = e.substring(0, e.length - 1);
							 | 
						||
| 
								 | 
							
								          e = _escapeQuote(e, quote, escape);
							 | 
						||
| 
								 | 
							
								          row.push(e);
							 | 
						||
| 
								 | 
							
								          continue;
							 | 
						||
| 
								 | 
							
								        } else {
							 | 
						||
| 
								 | 
							
								          inquote = true;
							 | 
						||
| 
								 | 
							
								          quoteBuff += e;
							 | 
						||
| 
								 | 
							
								          continue;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								      } else {
							 | 
						||
| 
								 | 
							
								        row.push(e);
							 | 
						||
| 
								 | 
							
								        continue;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    } else { //previous quote not closed
							 | 
						||
| 
								 | 
							
								      if (isQuoteClose(e, param)) { //close double quote
							 | 
						||
| 
								 | 
							
								        inquote = false;
							 | 
						||
| 
								 | 
							
								        e = e.substr(0, len - 1);
							 | 
						||
| 
								 | 
							
								        quoteBuff += delimiter + e;
							 | 
						||
| 
								 | 
							
								        quoteBuff = _escapeQuote(quoteBuff, quote, escape);
							 | 
						||
| 
								 | 
							
								        if (trim) {
							 | 
						||
| 
								 | 
							
								          quoteBuff = quoteBuff.replace(/\s+$/, "");
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        row.push(quoteBuff);
							 | 
						||
| 
								 | 
							
								        quoteBuff = "";
							 | 
						||
| 
								 | 
							
								      } else {
							 | 
						||
| 
								 | 
							
								        quoteBuff += delimiter + e;
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (!inquote && param._needFilterRow) {
							 | 
						||
| 
								 | 
							
								    row = filterRow(row, param);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  return { cols: row, closed: !inquote };
							 | 
						||
| 
								 | 
							
								  // if (param.workerNum<=1){
							 | 
						||
| 
								 | 
							
								  // }else{
							 | 
						||
| 
								 | 
							
								  //   if (inquote && quoteBuff.length>0){//for multi core, quote will be closed at the end of line
							 | 
						||
| 
								 | 
							
								  //     quoteBuff=_escapeQuote(quoteBuff,quote,escape);;
							 | 
						||
| 
								 | 
							
								  //     if (trim){
							 | 
						||
| 
								 | 
							
								  //       quoteBuff=quoteBuff.trimRight();
							 | 
						||
| 
								 | 
							
								  //     }
							 | 
						||
| 
								 | 
							
								  //     row.push(quoteBuff);
							 | 
						||
| 
								 | 
							
								  //   }
							 | 
						||
| 
								 | 
							
								  //   return {cols:row,closed:true};
							 | 
						||
| 
								 | 
							
								  // }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								function isQuoteOpen(str, param) {
							 | 
						||
| 
								 | 
							
								  var quote = param.quote;
							 | 
						||
| 
								 | 
							
								  var escape = param.escape;
							 | 
						||
| 
								 | 
							
								  return str[0] === quote && (
							 | 
						||
| 
								 | 
							
								    str[1] !== quote ||
							 | 
						||
| 
								 | 
							
								    str[1] === escape && (str[2] === quote || str.length === 2));
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								function isQuoteClose(str, param) {
							 | 
						||
| 
								 | 
							
								  var quote = param.quote;
							 | 
						||
| 
								 | 
							
								  var count = 0;
							 | 
						||
| 
								 | 
							
								  var idx = str.length - 1;
							 | 
						||
| 
								 | 
							
								  var escape = param.escape;
							 | 
						||
| 
								 | 
							
								  while (str[idx] === quote || str[idx] === escape) {
							 | 
						||
| 
								 | 
							
								    idx--;
							 | 
						||
| 
								 | 
							
								    count++;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  return count % 2 !== 0;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								function twoDoubleQuote(str, quote) {
							 | 
						||
| 
								 | 
							
								  var twoQuote = quote + quote;
							 | 
						||
| 
								 | 
							
								  var curIndex = -1;
							 | 
						||
| 
								 | 
							
								  while ((curIndex = str.indexOf(twoQuote, curIndex)) > -1) {
							 | 
						||
| 
								 | 
							
								    str = str.substring(0, curIndex) + str.substring(++curIndex);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  return str;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var cachedRegExp = {};
							 | 
						||
| 
								 | 
							
								function _escapeQuote(segment, quote, escape) {
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  var key = "es|" + quote + "|" + escape;
							 | 
						||
| 
								 | 
							
								  if (cachedRegExp[key] === undefined) {
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    // if (escape === "\\") {
							 | 
						||
| 
								 | 
							
								    //   escape = "\\\\";
							 | 
						||
| 
								 | 
							
								    // }
							 | 
						||
| 
								 | 
							
								    cachedRegExp[key] = new RegExp('\\'+escape + '\\'+quote, 'g');
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  var regExp = cachedRegExp[key];
							 | 
						||
| 
								 | 
							
								  // console.log(regExp,segment);
							 | 
						||
| 
								 | 
							
								  return segment.replace(regExp, quote);
							 | 
						||
| 
								 | 
							
								}
							 |