You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					340 lines
				
				10 KiB
			
		
		
			
		
	
	
					340 lines
				
				10 KiB
			| 
											2 years ago
										 | const fs = require('fs'); | ||
|  | const {EventEmitter} = require('events'); | ||
|  | const {PassThrough, Readable} = require('readable-stream'); | ||
|  | const nodeStream = require('stream'); | ||
|  | const unzip = require('unzipper'); | ||
|  | const tmp = require('tmp'); | ||
|  | const iterateStream = require('../../utils/iterate-stream'); | ||
|  | const parseSax = require('../../utils/parse-sax'); | ||
|  | 
 | ||
|  | const StyleManager = require('../../xlsx/xform/style/styles-xform'); | ||
|  | const WorkbookXform = require('../../xlsx/xform/book/workbook-xform'); | ||
|  | const RelationshipsXform = require('../../xlsx/xform/core/relationships-xform'); | ||
|  | 
 | ||
|  | const WorksheetReader = require('./worksheet-reader'); | ||
|  | const HyperlinkReader = require('./hyperlink-reader'); | ||
|  | 
 | ||
|  | tmp.setGracefulCleanup(); | ||
|  | 
 | ||
|  | class WorkbookReader extends EventEmitter { | ||
|  |   constructor(input, options = {}) { | ||
|  |     super(); | ||
|  | 
 | ||
|  |     this.input = input; | ||
|  | 
 | ||
|  |     this.options = { | ||
|  |       worksheets: 'emit', | ||
|  |       sharedStrings: 'cache', | ||
|  |       hyperlinks: 'ignore', | ||
|  |       styles: 'ignore', | ||
|  |       entries: 'ignore', | ||
|  |       ...options, | ||
|  |     }; | ||
|  | 
 | ||
|  |     this.styles = new StyleManager(); | ||
|  |     this.styles.init(); | ||
|  |   } | ||
|  | 
 | ||
|  |   _getStream(input) { | ||
|  |     if (input instanceof nodeStream.Readable || input instanceof Readable) { | ||
|  |       return input; | ||
|  |     } | ||
|  |     if (typeof input === 'string') { | ||
|  |       return fs.createReadStream(input); | ||
|  |     } | ||
|  |     throw new Error(`Could not recognise input: ${input}`); | ||
|  |   } | ||
|  | 
 | ||
|  |   async read(input, options) { | ||
|  |     try { | ||
|  |       for await (const {eventType, value} of this.parse(input, options)) { | ||
|  |         switch (eventType) { | ||
|  |           case 'shared-strings': | ||
|  |             this.emit(eventType, value); | ||
|  |             break; | ||
|  |           case 'worksheet': | ||
|  |             this.emit(eventType, value); | ||
|  |             await value.read(); | ||
|  |             break; | ||
|  |           case 'hyperlinks': | ||
|  |             this.emit(eventType, value); | ||
|  |             break; | ||
|  |         } | ||
|  |       } | ||
|  |       this.emit('end'); | ||
|  |       this.emit('finished'); | ||
|  |     } catch (error) { | ||
|  |       this.emit('error', error); | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   async *[Symbol.asyncIterator]() { | ||
|  |     for await (const {eventType, value} of this.parse()) { | ||
|  |       if (eventType === 'worksheet') { | ||
|  |         yield value; | ||
|  |       } | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   async *parse(input, options) { | ||
|  |     if (options) this.options = options; | ||
|  |     const stream = (this.stream = this._getStream(input || this.input)); | ||
|  |     const zip = unzip.Parse({forceStream: true}); | ||
|  |     stream.pipe(zip); | ||
|  | 
 | ||
|  |     // worksheets, deferred for parsing after shared strings reading
 | ||
|  |     const waitingWorkSheets = []; | ||
|  | 
 | ||
|  |     for await (const entry of iterateStream(zip)) { | ||
|  |       let match; | ||
|  |       let sheetNo; | ||
|  |       switch (entry.path) { | ||
|  |         case '_rels/.rels': | ||
|  |           break; | ||
|  |         case 'xl/_rels/workbook.xml.rels': | ||
|  |           await this._parseRels(entry); | ||
|  |           break; | ||
|  |         case 'xl/workbook.xml': | ||
|  |           await this._parseWorkbook(entry); | ||
|  |           break; | ||
|  |         case 'xl/sharedStrings.xml': | ||
|  |           yield* this._parseSharedStrings(entry); | ||
|  |           break; | ||
|  |         case 'xl/styles.xml': | ||
|  |           await this._parseStyles(entry); | ||
|  |           break; | ||
|  |         default: | ||
|  |           if (entry.path.match(/xl\/worksheets\/sheet\d+[.]xml/)) { | ||
|  |             match = entry.path.match(/xl\/worksheets\/sheet(\d+)[.]xml/); | ||
|  |             sheetNo = match[1]; | ||
|  |             if (this.sharedStrings && this.workbookRels) { | ||
|  |               yield* this._parseWorksheet(iterateStream(entry), sheetNo); | ||
|  |             } else { | ||
|  |               // create temp file for each worksheet
 | ||
|  |               await new Promise((resolve, reject) => { | ||
|  |                 tmp.file((err, path, fd, tempFileCleanupCallback) => { | ||
|  |                   if (err) { | ||
|  |                     return reject(err); | ||
|  |                   } | ||
|  |                   waitingWorkSheets.push({sheetNo, path, tempFileCleanupCallback}); | ||
|  | 
 | ||
|  |                   const tempStream = fs.createWriteStream(path); | ||
|  |                   entry.pipe(tempStream); | ||
|  |                   return tempStream.on('finish', () => { | ||
|  |                     return resolve(); | ||
|  |                   }); | ||
|  |                 }); | ||
|  |               }); | ||
|  |             } | ||
|  |           } else if (entry.path.match(/xl\/worksheets\/_rels\/sheet\d+[.]xml.rels/)) { | ||
|  |             match = entry.path.match(/xl\/worksheets\/_rels\/sheet(\d+)[.]xml.rels/); | ||
|  |             sheetNo = match[1]; | ||
|  |             yield* this._parseHyperlinks(iterateStream(entry), sheetNo); | ||
|  |           } | ||
|  |           break; | ||
|  |       } | ||
|  |       entry.autodrain(); | ||
|  |     } | ||
|  | 
 | ||
|  |     for (const {sheetNo, path, tempFileCleanupCallback} of waitingWorkSheets) { | ||
|  |       let fileStream = fs.createReadStream(path); | ||
|  |       // TODO: Remove once node v8 is deprecated
 | ||
|  |       // Detect and upgrade old fileStreams
 | ||
|  |       if (!fileStream[Symbol.asyncIterator]) { | ||
|  |         fileStream = fileStream.pipe(new PassThrough()); | ||
|  |       } | ||
|  |       yield* this._parseWorksheet(fileStream, sheetNo); | ||
|  |       tempFileCleanupCallback(); | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   _emitEntry(payload) { | ||
|  |     if (this.options.entries === 'emit') { | ||
|  |       this.emit('entry', payload); | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   async _parseRels(entry) { | ||
|  |     const xform = new RelationshipsXform(); | ||
|  |     this.workbookRels = await xform.parseStream(iterateStream(entry)); | ||
|  |   } | ||
|  | 
 | ||
|  |   async _parseWorkbook(entry) { | ||
|  |     this._emitEntry({type: 'workbook'}); | ||
|  | 
 | ||
|  |     const workbook = new WorkbookXform(); | ||
|  |     await workbook.parseStream(iterateStream(entry)); | ||
|  | 
 | ||
|  |     this.properties = workbook.map.workbookPr; | ||
|  |     this.model = workbook.model; | ||
|  |   } | ||
|  | 
 | ||
|  |   async *_parseSharedStrings(entry) { | ||
|  |     this._emitEntry({type: 'shared-strings'}); | ||
|  |     switch (this.options.sharedStrings) { | ||
|  |       case 'cache': | ||
|  |         this.sharedStrings = []; | ||
|  |         break; | ||
|  |       case 'emit': | ||
|  |         break; | ||
|  |       default: | ||
|  |         return; | ||
|  |     } | ||
|  | 
 | ||
|  |     let text = null; | ||
|  |     let richText = []; | ||
|  |     let index = 0; | ||
|  |     let font = null; | ||
|  |     for await (const events of parseSax(iterateStream(entry))) { | ||
|  |       for (const {eventType, value} of events) { | ||
|  |         if (eventType === 'opentag') { | ||
|  |           const node = value; | ||
|  |           switch (node.name) { | ||
|  |             case 'b': | ||
|  |               font = font || {}; | ||
|  |               font.bold = true; | ||
|  |               break; | ||
|  |             case 'charset': | ||
|  |               font = font || {}; | ||
|  |               font.charset = parseInt(node.attributes.charset, 10); | ||
|  |               break; | ||
|  |             case 'color': | ||
|  |               font = font || {}; | ||
|  |               font.color = {}; | ||
|  |               if (node.attributes.rgb) { | ||
|  |                 font.color.argb = node.attributes.argb; | ||
|  |               } | ||
|  |               if (node.attributes.val) { | ||
|  |                 font.color.argb = node.attributes.val; | ||
|  |               } | ||
|  |               if (node.attributes.theme) { | ||
|  |                 font.color.theme = node.attributes.theme; | ||
|  |               } | ||
|  |               break; | ||
|  |             case 'family': | ||
|  |               font = font || {}; | ||
|  |               font.family = parseInt(node.attributes.val, 10); | ||
|  |               break; | ||
|  |             case 'i': | ||
|  |               font = font || {}; | ||
|  |               font.italic = true; | ||
|  |               break; | ||
|  |             case 'outline': | ||
|  |               font = font || {}; | ||
|  |               font.outline = true; | ||
|  |               break; | ||
|  |             case 'rFont': | ||
|  |               font = font || {}; | ||
|  |               font.name = node.value; | ||
|  |               break; | ||
|  |             case 'si': | ||
|  |               font = null; | ||
|  |               richText = []; | ||
|  |               text = null; | ||
|  |               break; | ||
|  |             case 'sz': | ||
|  |               font = font || {}; | ||
|  |               font.size = parseInt(node.attributes.val, 10); | ||
|  |               break; | ||
|  |             case 'strike': | ||
|  |               break; | ||
|  |             case 't': | ||
|  |               text = null; | ||
|  |               break; | ||
|  |             case 'u': | ||
|  |               font = font || {}; | ||
|  |               font.underline = true; | ||
|  |               break; | ||
|  |             case 'vertAlign': | ||
|  |               font = font || {}; | ||
|  |               font.vertAlign = node.attributes.val; | ||
|  |               break; | ||
|  |           } | ||
|  |         } else if (eventType === 'text') { | ||
|  |           text = text ? text + value : value; | ||
|  |         } else if (eventType === 'closetag') { | ||
|  |           const node = value; | ||
|  |           switch (node.name) { | ||
|  |             case 'r': | ||
|  |               richText.push({ | ||
|  |                 font, | ||
|  |                 text, | ||
|  |               }); | ||
|  | 
 | ||
|  |               font = null; | ||
|  |               text = null; | ||
|  |               break; | ||
|  |             case 'si': | ||
|  |               if (this.options.sharedStrings === 'cache') { | ||
|  |                 this.sharedStrings.push(richText.length ? {richText} : text); | ||
|  |               } else if (this.options.sharedStrings === 'emit') { | ||
|  |                 yield {index: index++, text: richText.length ? {richText} : text}; | ||
|  |               } | ||
|  | 
 | ||
|  |               richText = []; | ||
|  |               font = null; | ||
|  |               text = null; | ||
|  |               break; | ||
|  |           } | ||
|  |         } | ||
|  |       } | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   async _parseStyles(entry) { | ||
|  |     this._emitEntry({type: 'styles'}); | ||
|  |     if (this.options.styles === 'cache') { | ||
|  |       this.styles = new StyleManager(); | ||
|  |       await this.styles.parseStream(iterateStream(entry)); | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   *_parseWorksheet(iterator, sheetNo) { | ||
|  |     this._emitEntry({type: 'worksheet', id: sheetNo}); | ||
|  |     const worksheetReader = new WorksheetReader({ | ||
|  |       workbook: this, | ||
|  |       id: sheetNo, | ||
|  |       iterator, | ||
|  |       options: this.options, | ||
|  |     }); | ||
|  | 
 | ||
|  |     const matchingRel = (this.workbookRels || []).find( | ||
|  |       rel => rel.Target === `worksheets/sheet${sheetNo}.xml` | ||
|  |     ); | ||
|  |     const matchingSheet = | ||
|  |       matchingRel && (this.model.sheets || []).find(sheet => sheet.rId === matchingRel.Id); | ||
|  |     if (matchingSheet) { | ||
|  |       worksheetReader.id = matchingSheet.id; | ||
|  |       worksheetReader.name = matchingSheet.name; | ||
|  |       worksheetReader.state = matchingSheet.state; | ||
|  |     } | ||
|  |     if (this.options.worksheets === 'emit') { | ||
|  |       yield {eventType: 'worksheet', value: worksheetReader}; | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   *_parseHyperlinks(iterator, sheetNo) { | ||
|  |     this._emitEntry({type: 'hyperlinks', id: sheetNo}); | ||
|  |     const hyperlinksReader = new HyperlinkReader({ | ||
|  |       workbook: this, | ||
|  |       id: sheetNo, | ||
|  |       iterator, | ||
|  |       options: this.options, | ||
|  |     }); | ||
|  |     if (this.options.hyperlinks === 'emit') { | ||
|  |       yield {eventType: 'hyperlinks', value: hyperlinksReader}; | ||
|  |     } | ||
|  |   } | ||
|  | } | ||
|  | 
 | ||
|  | // for reference - these are the valid values for options
 | ||
|  | WorkbookReader.Options = { | ||
|  |   worksheets: ['emit', 'ignore'], | ||
|  |   sharedStrings: ['cache', 'emit', 'ignore'], | ||
|  |   hyperlinks: ['cache', 'emit', 'ignore'], | ||
|  |   styles: ['cache', 'ignore'], | ||
|  |   entries: ['emit', 'ignore'], | ||
|  | }; | ||
|  | 
 | ||
|  | module.exports = WorkbookReader; |