You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					191 lines
				
				5.9 KiB
			
		
		
			
		
	
	
					191 lines
				
				5.9 KiB
			| 
											3 years ago
										 | const Stream = require('./'); const { DEFAULT_ENCODING, getEncoding } = Stream; | ||
|  | const { end_of_stream, finished, codePointsToString } = require('../utils'); | ||
|  | const { decoders } = require('../table'); | ||
|  | 
 | ||
|  | // 8.1 Interface TextDecoder
 | ||
|  | 
 | ||
|  | class TextDecoder { | ||
|  |   /** | ||
|  |    * @param {string=} label The label of the encoding; defaults to 'utf-8'. | ||
|  |    * @param {Object=} options | ||
|  |    */ | ||
|  |   constructor(label = DEFAULT_ENCODING, options = {}) { | ||
|  |     // A TextDecoder object has an associated encoding, decoder,
 | ||
|  |     // stream, ignore BOM flag (initially unset), BOM seen flag
 | ||
|  |     // (initially unset), error mode (initially replacement), and do
 | ||
|  |     // not flush flag (initially unset).
 | ||
|  | 
 | ||
|  |     /** @private */ | ||
|  |     this._encoding = null | ||
|  |     /** @private @type {?Decoder} */ | ||
|  |     this._decoder = null | ||
|  |     /** @private @type {boolean} */ | ||
|  |     this._ignoreBOM = false | ||
|  |     /** @private @type {boolean} */ | ||
|  |     this._BOMseen = false | ||
|  |     /** @private @type {string} */ | ||
|  |     this._error_mode = 'replacement' | ||
|  |     /** @private @type {boolean} */ | ||
|  |     this._do_not_flush = false | ||
|  | 
 | ||
|  | 
 | ||
|  |     // 1. Let encoding be the result of getting an encoding from
 | ||
|  |     // label.
 | ||
|  |     const encoding = getEncoding(label) | ||
|  | 
 | ||
|  |     // 2. If encoding is failure or replacement, throw a RangeError.
 | ||
|  |     if (encoding === null || encoding.name == 'replacement') | ||
|  |       throw RangeError('Unknown encoding: ' + label) | ||
|  |     if (!decoders[encoding.name]) { | ||
|  |       throw Error('Decoder not present.' + | ||
|  |                   ' Did you forget to include encoding-indexes.js first?') | ||
|  |     } | ||
|  | 
 | ||
|  |     // 4. Set dec's encoding to encoding.
 | ||
|  |     this._encoding = encoding | ||
|  | 
 | ||
|  |     // 5. If options's fatal member is true, set dec's error mode to
 | ||
|  |     // fatal.
 | ||
|  |     if (options['fatal']) | ||
|  |       this._error_mode = 'fatal' | ||
|  | 
 | ||
|  |     // 6. If options's ignoreBOM member is true, set dec's ignore BOM
 | ||
|  |     // flag.
 | ||
|  |     if (options['ignoreBOM']) | ||
|  |       this._ignoreBOM = true | ||
|  |   } | ||
|  | 
 | ||
|  |   get encoding() { | ||
|  |     return this._encoding.name.toLowerCase() | ||
|  |   } | ||
|  |   get fatal() { | ||
|  |     return this._error_mode === 'fatal' | ||
|  |   } | ||
|  |   get ignoreBOM() { | ||
|  |     return this._ignoreBOM | ||
|  |   } | ||
|  |   /** | ||
|  |    * @param {BufferSource=} input The buffer of bytes to decode. | ||
|  |    * @param {Object=} options | ||
|  |    * @return The decoded string. | ||
|  |    */ | ||
|  |   decode(input, options = {}) { | ||
|  |     let bytes | ||
|  |     if (typeof input === 'object' && input instanceof ArrayBuffer) { | ||
|  |       bytes = new Uint8Array(input) | ||
|  |     } else if (typeof input === 'object' && 'buffer' in input && | ||
|  |                 input.buffer instanceof ArrayBuffer) { | ||
|  |       bytes = new Uint8Array(input.buffer, | ||
|  |         input.byteOffset, | ||
|  |         input.byteLength) | ||
|  |     } else { | ||
|  |       bytes = new Uint8Array(0) | ||
|  |     } | ||
|  | 
 | ||
|  |     // 1. If the do not flush flag is unset, set decoder to a new
 | ||
|  |     // encoding's decoder, set stream to a new stream, and unset the
 | ||
|  |     // BOM seen flag.
 | ||
|  |     if (!this._do_not_flush) { | ||
|  |       this._decoder = decoders[this._encoding.name]({ | ||
|  |         fatal: this._error_mode === 'fatal' }) | ||
|  |       this._BOMseen = false | ||
|  |     } | ||
|  | 
 | ||
|  |     // 2. If options's stream is true, set the do not flush flag, and
 | ||
|  |     // unset the do not flush flag otherwise.
 | ||
|  |     this._do_not_flush = Boolean(options['stream']) | ||
|  | 
 | ||
|  |     // 3. If input is given, push a copy of input to stream.
 | ||
|  |     // TODO: Align with spec algorithm - maintain stream on instance.
 | ||
|  |     const input_stream = new Stream(bytes) | ||
|  | 
 | ||
|  |     // 4. Let output be a new stream.
 | ||
|  |     const output = [] | ||
|  | 
 | ||
|  |     /** @type {?(number|!Array.<number>)} */ | ||
|  |     let result | ||
|  | 
 | ||
|  |     // 5. While true:
 | ||
|  |     while (true) { | ||
|  |       // 1. Let token be the result of reading from stream.
 | ||
|  |       const token = input_stream.read() | ||
|  | 
 | ||
|  |       // 2. If token is end-of-stream and the do not flush flag is
 | ||
|  |       // set, return output, serialized.
 | ||
|  |       // TODO: Align with spec algorithm.
 | ||
|  |       if (token === end_of_stream) | ||
|  |         break | ||
|  | 
 | ||
|  |       // 3. Otherwise, run these subsubsteps:
 | ||
|  | 
 | ||
|  |       // 1. Let result be the result of processing token for decoder,
 | ||
|  |       // stream, output, and error mode.
 | ||
|  |       result = this._decoder.handler(input_stream, token) | ||
|  | 
 | ||
|  |       // 2. If result is finished, return output, serialized.
 | ||
|  |       if (result === finished) | ||
|  |         break | ||
|  | 
 | ||
|  |       if (result !== null) { | ||
|  |         if (Array.isArray(result)) | ||
|  |           output.push.apply(output, /**@type {!Array.<number>}*/(result)) | ||
|  |         else | ||
|  |           output.push(result) | ||
|  |       } | ||
|  | 
 | ||
|  |       // 3. Otherwise, if result is error, throw a TypeError.
 | ||
|  |       // (Thrown in handler)
 | ||
|  | 
 | ||
|  |       // 4. Otherwise, do nothing.
 | ||
|  |     } | ||
|  |     // TODO: Align with spec algorithm.
 | ||
|  |     if (!this._do_not_flush) { | ||
|  |       do { | ||
|  |         result = this._decoder.handler(input_stream, input_stream.read()) | ||
|  |         if (result === finished) | ||
|  |           break | ||
|  |         if (result === null) | ||
|  |           continue | ||
|  |         if (Array.isArray(result)) | ||
|  |           output.push.apply(output, /**@type {!Array.<number>}*/(result)) | ||
|  |         else | ||
|  |           output.push(result) | ||
|  |       } while (!input_stream.endOfStream()) | ||
|  |       this._decoder = null | ||
|  |     } | ||
|  | 
 | ||
|  |     return this.serializeStream(output) | ||
|  |   } | ||
|  |   // A TextDecoder object also has an associated serialize stream
 | ||
|  |   // algorithm...
 | ||
|  |   /** | ||
|  |    * @param {!Array.<number>} stream | ||
|  |    */ | ||
|  |   serializeStream(stream) { | ||
|  |     // 1. Let token be the result of reading from stream.
 | ||
|  |     // (Done in-place on array, rather than as a stream)
 | ||
|  | 
 | ||
|  |     // 2. If encoding is UTF-8, UTF-16BE, or UTF-16LE, and ignore
 | ||
|  |     // BOM flag and BOM seen flag are unset, run these subsubsteps:
 | ||
|  |     if (['UTF-8', 'UTF-16LE', 'UTF-16BE'].includes(this._encoding.name) && | ||
|  |         !this._ignoreBOM && !this._BOMseen) { | ||
|  |       if (stream.length > 0 && stream[0] === 0xFEFF) { | ||
|  |         // 1. If token is U+FEFF, set BOM seen flag.
 | ||
|  |         this._BOMseen = true | ||
|  |         stream.shift() | ||
|  |       } else if (stream.length > 0) { | ||
|  |         // 2. Otherwise, if token is not end-of-stream, set BOM seen
 | ||
|  |         // flag and append token to stream.
 | ||
|  |         this._BOMseen = true | ||
|  |       } else { | ||
|  |         // 3. Otherwise, if token is not end-of-stream, append token
 | ||
|  |         // to output.
 | ||
|  |         // (no-op)
 | ||
|  |       } | ||
|  |     } | ||
|  |     // 4. Otherwise, return output.
 | ||
|  |     return codePointsToString(stream) | ||
|  |   } | ||
|  | } | ||
|  | 
 | ||
|  | module.exports = TextDecoder |