You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							2392 lines
						
					
					
						
							63 KiB
						
					
					
				
			
		
		
	
	
							2392 lines
						
					
					
						
							63 KiB
						
					
					
				| /*
 | |
|    Modifications for better node.js integration:
 | |
|     Copyright 2014 Brian White. All rights reserved.
 | |
| 
 | |
|     Permission is hereby granted, free of charge, to any person obtaining a copy
 | |
|     of this software and associated documentation files (the "Software"), to
 | |
|     deal in the Software without restriction, including without limitation the
 | |
|     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 | |
|     sell copies of the Software, and to permit persons to whom the Software is
 | |
|     furnished to do so, subject to the following conditions:
 | |
| 
 | |
|     The above copyright notice and this permission notice shall be included in
 | |
|     all copies or substantial portions of the Software.
 | |
| 
 | |
|     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | |
|     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | |
|     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | |
|     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | |
|     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 | |
|     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 | |
|     IN THE SOFTWARE.
 | |
| */
 | |
| /*
 | |
|    Original source code:
 | |
|     Copyright 2014 Joshua Bell
 | |
| 
 | |
|     Licensed under the Apache License, Version 2.0 (the "License");
 | |
|     you may not use this file except in compliance with the License.
 | |
|     You may obtain a copy of the License at
 | |
| 
 | |
|         http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
|     Unless required by applicable law or agreed to in writing, software
 | |
|     distributed under the License is distributed on an "AS IS" BASIS,
 | |
|     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|     See the License for the specific language governing permissions and
 | |
|     limitations under the License.
 | |
| */
 | |
| 
 | |
| //
 | |
| // Utilities
 | |
| //
 | |
| 
 | |
| /**
 | |
|  * @param {number} a The number to test.
 | |
|  * @param {number} min The minimum value in the range, inclusive.
 | |
|  * @param {number} max The maximum value in the range, inclusive.
 | |
|  * @return {boolean} True if a >= min and a <= max.
 | |
|  */
 | |
| function inRange(a, min, max) {
 | |
|   return min <= a && a <= max;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @param {number} n The numerator.
 | |
|  * @param {number} d The denominator.
 | |
|  * @return {number} The result of the integer division of n by d.
 | |
|  */
 | |
| function div(n, d) {
 | |
|   return Math.floor(n / d);
 | |
| }
 | |
| 
 | |
| 
 | |
| //
 | |
| // Implementation of Encoding specification
 | |
| // http://dvcs.w3.org/hg/encoding/raw-file/tip/Overview.html
 | |
| //
 | |
| 
 | |
| //
 | |
| // 3. Terminology
 | |
| //
 | |
| 
 | |
| //
 | |
| // 4. Encodings
 | |
| //
 | |
| 
 | |
| /** @const */ var EOF_byte = -1;
 | |
| /** @const */ var EOF_code_point = -1;
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {Buffer} bytes Array of bytes that provide the stream.
 | |
|  */
 | |
| function ByteInputStream(bytes) {
 | |
|   /** @type {number} */
 | |
|   var pos = 0;
 | |
| 
 | |
|   /**
 | |
|    * @this {ByteInputStream}
 | |
|    * @return {number} Get the next byte from the stream.
 | |
|    */
 | |
|   this.get = function() {
 | |
|     return (pos >= bytes.length) ? EOF_byte : Number(bytes[pos]);
 | |
|   };
 | |
| 
 | |
|   /** @param {number} n Number (positive or negative) by which to
 | |
|    *      offset the byte pointer. */
 | |
|   this.offset = function(n) {
 | |
|     pos += n;
 | |
|     if (pos < 0) {
 | |
|       throw new Error('Seeking past start of the buffer');
 | |
|     }
 | |
|     if (pos > bytes.length) {
 | |
|       throw new Error('Seeking past EOF');
 | |
|     }
 | |
|   };
 | |
| 
 | |
|   /**
 | |
|    * @param {Array.<number>} test Array of bytes to compare against.
 | |
|    * @return {boolean} True if the start of the stream matches the test
 | |
|    *     bytes.
 | |
|    */
 | |
|   this.match = function(test) {
 | |
|     if (test.length > pos + bytes.length) {
 | |
|       return false;
 | |
|     }
 | |
|     var i;
 | |
|     for (i = 0; i < test.length; i += 1) {
 | |
|       if (Number(bytes[pos + i]) !== test[i]) {
 | |
|         return false;
 | |
|       }
 | |
|     }
 | |
|     return true;
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {Array.<number>} bytes The array to write bytes into.
 | |
|  */
 | |
| function ByteOutputStream(bytes) {
 | |
|   /** @type {number} */
 | |
|   var pos = 0;
 | |
| 
 | |
|   /**
 | |
|    * @param {...number} var_args The byte or bytes to emit into the stream.
 | |
|    * @return {number} The last byte emitted.
 | |
|    */
 | |
|   this.emit = function(var_args) {
 | |
|     /** @type {number} */
 | |
|     var last = EOF_byte;
 | |
|     var i;
 | |
|     for (i = 0; i < arguments.length; ++i) {
 | |
|       last = Number(arguments[i]);
 | |
|       bytes[pos++] = last;
 | |
|     }
 | |
|     return last;
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {string} string The source of code units for the stream.
 | |
|  */
 | |
| function CodePointInputStream(string) {
 | |
|   /**
 | |
|    * @param {string} string Input string of UTF-16 code units.
 | |
|    * @return {Array.<number>} Code points.
 | |
|    */
 | |
|   function stringToCodePoints(string) {
 | |
|     /** @type {Array.<number>} */
 | |
|     var cps = [];
 | |
|     // Based on http://www.w3.org/TR/WebIDL/#idl-DOMString
 | |
|     var i = 0, n = string.length;
 | |
|     while (i < string.length) {
 | |
|       var c = string.charCodeAt(i);
 | |
|       if (!inRange(c, 0xD800, 0xDFFF)) {
 | |
|         cps.push(c);
 | |
|       } else if (inRange(c, 0xDC00, 0xDFFF)) {
 | |
|         cps.push(0xFFFD);
 | |
|       } else { // (inRange(cu, 0xD800, 0xDBFF))
 | |
|         if (i === n - 1) {
 | |
|           cps.push(0xFFFD);
 | |
|         } else {
 | |
|           var d = string.charCodeAt(i + 1);
 | |
|           if (inRange(d, 0xDC00, 0xDFFF)) {
 | |
|             var a = c & 0x3FF;
 | |
|             var b = d & 0x3FF;
 | |
|             i += 1;
 | |
|             cps.push(0x10000 + (a << 10) + b);
 | |
|           } else {
 | |
|             cps.push(0xFFFD);
 | |
|           }
 | |
|         }
 | |
|       }
 | |
|       i += 1;
 | |
|     }
 | |
|     return cps;
 | |
|   }
 | |
| 
 | |
|   /** @type {number} */
 | |
|   var pos = 0;
 | |
|   /** @type {Array.<number>} */
 | |
|   var cps = stringToCodePoints(string);
 | |
| 
 | |
|   /** @param {number} n The number of bytes (positive or negative)
 | |
|    *      to advance the code point pointer by.*/
 | |
|   this.offset = function(n) {
 | |
|     pos += n;
 | |
|     if (pos < 0) {
 | |
|       throw new Error('Seeking past start of the buffer');
 | |
|     }
 | |
|     if (pos > cps.length) {
 | |
|       throw new Error('Seeking past EOF');
 | |
|     }
 | |
|   };
 | |
| 
 | |
| 
 | |
|   /** @return {number} Get the next code point from the stream. */
 | |
|   this.get = function() {
 | |
|     if (pos >= cps.length) {
 | |
|       return EOF_code_point;
 | |
|     }
 | |
|     return cps[pos];
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  */
 | |
| function CodePointOutputStream() {
 | |
|   /** @type {string} */
 | |
|   var string = '';
 | |
| 
 | |
|   /** @return {string} The accumulated string. */
 | |
|   this.string = function() {
 | |
|     return string;
 | |
|   };
 | |
| 
 | |
|   /** @param {number} c The code point to encode into the stream. */
 | |
|   this.emit = function(c) {
 | |
|     if (c <= 0xFFFF) {
 | |
|       string += String.fromCharCode(c);
 | |
|     } else {
 | |
|       c -= 0x10000;
 | |
|       string += String.fromCharCode(0xD800 + ((c >> 10) & 0x3ff));
 | |
|       string += String.fromCharCode(0xDC00 + (c & 0x3ff));
 | |
|     }
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {string} message Description of the error.
 | |
|  */
 | |
| function EncodingError(message) {
 | |
|   this.name = 'EncodingError';
 | |
|   this.message = message;
 | |
|   this.code = 0;
 | |
| }
 | |
| EncodingError.prototype = Error.prototype;
 | |
| 
 | |
| /**
 | |
|  * @param {boolean} fatal If true, decoding errors raise an exception.
 | |
|  * @param {number=} opt_code_point Override the standard fallback code point.
 | |
|  * @return {number} The code point to insert on a decoding error.
 | |
|  */
 | |
| function decoderError(fatal, opt_code_point) {
 | |
|   if (fatal) {
 | |
|     throw new EncodingError('Decoder error');
 | |
|   }
 | |
|   return opt_code_point || 0xFFFD;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @param {number} code_point The code point that could not be encoded.
 | |
|  * @return {number} Always throws, no value is actually returned.
 | |
|  */
 | |
| function encoderError(code_point) {
 | |
|   throw new EncodingError('The code point ' + code_point +
 | |
|                           ' could not be encoded.');
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @param {string} label The encoding label.
 | |
|  * @return {?{name:string,labels:Array.<string>}}
 | |
|  */
 | |
| function getEncoding(label) {
 | |
|   label = String(label).trim().toLowerCase();
 | |
|   if (Object.prototype.hasOwnProperty.call(label_to_encoding, label)) {
 | |
|     return label_to_encoding[label];
 | |
|   }
 | |
|   return null;
 | |
| }
 | |
| 
 | |
| /** @type {Array.<{encodings: Array.<{name:string,labels:Array.<string>}>,
 | |
|  *      heading: string}>} */
 | |
| var encodings = [
 | |
|   {
 | |
|     "encodings": [
 | |
|       {
 | |
|         "labels": [
 | |
|           "unicode-1-1-utf-8",
 | |
|           "utf-8",
 | |
|           "utf8"
 | |
|         ],
 | |
|         "name": "utf-8"
 | |
|       }
 | |
|     ],
 | |
|     "heading": "The Encoding"
 | |
|   },
 | |
|   {
 | |
|     "encodings": [
 | |
|       {
 | |
|         "labels": [
 | |
|           "864",
 | |
|           "cp864",
 | |
|           "csibm864",
 | |
|           "ibm864"
 | |
|         ],
 | |
|         "name": "ibm864"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "866",
 | |
|           "cp866",
 | |
|           "csibm866",
 | |
|           "ibm866"
 | |
|         ],
 | |
|         "name": "ibm866"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "csisolatin2",
 | |
|           "iso-8859-2",
 | |
|           "iso-ir-101",
 | |
|           "iso8859-2",
 | |
|           "iso88592",
 | |
|           "iso_8859-2",
 | |
|           "iso_8859-2:1987",
 | |
|           "l2",
 | |
|           "latin2"
 | |
|         ],
 | |
|         "name": "iso-8859-2"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "csisolatin3",
 | |
|           "iso-8859-3",
 | |
|           "iso-ir-109",
 | |
|           "iso8859-3",
 | |
|           "iso88593",
 | |
|           "iso_8859-3",
 | |
|           "iso_8859-3:1988",
 | |
|           "l3",
 | |
|           "latin3"
 | |
|         ],
 | |
|         "name": "iso-8859-3"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "csisolatin4",
 | |
|           "iso-8859-4",
 | |
|           "iso-ir-110",
 | |
|           "iso8859-4",
 | |
|           "iso88594",
 | |
|           "iso_8859-4",
 | |
|           "iso_8859-4:1988",
 | |
|           "l4",
 | |
|           "latin4"
 | |
|         ],
 | |
|         "name": "iso-8859-4"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "csisolatincyrillic",
 | |
|           "cyrillic",
 | |
|           "iso-8859-5",
 | |
|           "iso-ir-144",
 | |
|           "iso8859-5",
 | |
|           "iso88595",
 | |
|           "iso_8859-5",
 | |
|           "iso_8859-5:1988"
 | |
|         ],
 | |
|         "name": "iso-8859-5"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "arabic",
 | |
|           "asmo-708",
 | |
|           "csiso88596e",
 | |
|           "csiso88596i",
 | |
|           "csisolatinarabic",
 | |
|           "ecma-114",
 | |
|           "iso-8859-6",
 | |
|           "iso-8859-6-e",
 | |
|           "iso-8859-6-i",
 | |
|           "iso-ir-127",
 | |
|           "iso8859-6",
 | |
|           "iso88596",
 | |
|           "iso_8859-6",
 | |
|           "iso_8859-6:1987"
 | |
|         ],
 | |
|         "name": "iso-8859-6"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "csisolatingreek",
 | |
|           "ecma-118",
 | |
|           "elot_928",
 | |
|           "greek",
 | |
|           "greek8",
 | |
|           "iso-8859-7",
 | |
|           "iso-ir-126",
 | |
|           "iso8859-7",
 | |
|           "iso88597",
 | |
|           "iso_8859-7",
 | |
|           "iso_8859-7:1987",
 | |
|           "sun_eu_greek"
 | |
|         ],
 | |
|         "name": "iso-8859-7"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "csiso88598e",
 | |
|           "csisolatinhebrew",
 | |
|           "hebrew",
 | |
|           "iso-8859-8",
 | |
|           "iso-8859-8-e",
 | |
|           "iso-ir-138",
 | |
|           "iso8859-8",
 | |
|           "iso88598",
 | |
|           "iso_8859-8",
 | |
|           "iso_8859-8:1988",
 | |
|           "visual"
 | |
|         ],
 | |
|         "name": "iso-8859-8"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "csiso88598i",
 | |
|           "iso-8859-8-i",
 | |
|           "logical"
 | |
|         ],
 | |
|         "name": "iso-8859-8-i"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "csisolatin6",
 | |
|           "iso-8859-10",
 | |
|           "iso-ir-157",
 | |
|           "iso8859-10",
 | |
|           "iso885910",
 | |
|           "l6",
 | |
|           "latin6"
 | |
|         ],
 | |
|         "name": "iso-8859-10"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "iso-8859-13",
 | |
|           "iso8859-13",
 | |
|           "iso885913"
 | |
|         ],
 | |
|         "name": "iso-8859-13"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "iso-8859-14",
 | |
|           "iso8859-14",
 | |
|           "iso885914"
 | |
|         ],
 | |
|         "name": "iso-8859-14"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "csisolatin9",
 | |
|           "iso-8859-15",
 | |
|           "iso8859-15",
 | |
|           "iso885915",
 | |
|           "iso_8859-15",
 | |
|           "l9"
 | |
|         ],
 | |
|         "name": "iso-8859-15"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "iso-8859-16"
 | |
|         ],
 | |
|         "name": "iso-8859-16"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "cskoi8r",
 | |
|           "koi",
 | |
|           "koi8",
 | |
|           "koi8-r",
 | |
|           "koi8_r"
 | |
|         ],
 | |
|         "name": "koi8-r"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "koi8-u"
 | |
|         ],
 | |
|         "name": "koi8-u"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "csmacintosh",
 | |
|           "mac",
 | |
|           "macintosh",
 | |
|           "x-mac-roman"
 | |
|         ],
 | |
|         "name": "macintosh"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "dos-874",
 | |
|           "iso-8859-11",
 | |
|           "iso8859-11",
 | |
|           "iso885911",
 | |
|           "tis-620",
 | |
|           "windows-874"
 | |
|         ],
 | |
|         "name": "windows-874"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "cp1250",
 | |
|           "windows-1250",
 | |
|           "x-cp1250"
 | |
|         ],
 | |
|         "name": "windows-1250"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "cp1251",
 | |
|           "windows-1251",
 | |
|           "x-cp1251"
 | |
|         ],
 | |
|         "name": "windows-1251"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "ansi_x3.4-1968",
 | |
|           "ascii",
 | |
|           "cp1252",
 | |
|           "cp819",
 | |
|           "csisolatin1",
 | |
|           "ibm819",
 | |
|           "iso-8859-1",
 | |
|           "iso-ir-100",
 | |
|           "iso8859-1",
 | |
|           "iso88591",
 | |
|           "iso_8859-1",
 | |
|           "iso_8859-1:1987",
 | |
|           "l1",
 | |
|           "latin1",
 | |
|           "us-ascii",
 | |
|           "windows-1252",
 | |
|           "x-cp1252"
 | |
|         ],
 | |
|         "name": "windows-1252"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "cp1253",
 | |
|           "windows-1253",
 | |
|           "x-cp1253"
 | |
|         ],
 | |
|         "name": "windows-1253"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "cp1254",
 | |
|           "csisolatin5",
 | |
|           "iso-8859-9",
 | |
|           "iso-ir-148",
 | |
|           "iso8859-9",
 | |
|           "iso88599",
 | |
|           "iso_8859-9",
 | |
|           "iso_8859-9:1989",
 | |
|           "l5",
 | |
|           "latin5",
 | |
|           "windows-1254",
 | |
|           "x-cp1254"
 | |
|         ],
 | |
|         "name": "windows-1254"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "cp1255",
 | |
|           "windows-1255",
 | |
|           "x-cp1255"
 | |
|         ],
 | |
|         "name": "windows-1255"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "cp1256",
 | |
|           "windows-1256",
 | |
|           "x-cp1256"
 | |
|         ],
 | |
|         "name": "windows-1256"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "cp1257",
 | |
|           "windows-1257",
 | |
|           "x-cp1257"
 | |
|         ],
 | |
|         "name": "windows-1257"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "cp1258",
 | |
|           "windows-1258",
 | |
|           "x-cp1258"
 | |
|         ],
 | |
|         "name": "windows-1258"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "x-mac-cyrillic",
 | |
|           "x-mac-ukrainian"
 | |
|         ],
 | |
|         "name": "x-mac-cyrillic"
 | |
|       }
 | |
|     ],
 | |
|     "heading": "Legacy single-byte encodings"
 | |
|   },
 | |
|   {
 | |
|     "encodings": [
 | |
|       {
 | |
|         "labels": [
 | |
|           "chinese",
 | |
|           "csgb2312",
 | |
|           "csiso58gb231280",
 | |
|           "gb2312",
 | |
|           "gb_2312",
 | |
|           "gb_2312-80",
 | |
|           "gbk",
 | |
|           "iso-ir-58",
 | |
|           "x-gbk"
 | |
|         ],
 | |
|         "name": "gbk"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "gb18030"
 | |
|         ],
 | |
|         "name": "gb18030"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "hz-gb-2312"
 | |
|         ],
 | |
|         "name": "hz-gb-2312"
 | |
|       }
 | |
|     ],
 | |
|     "heading": "Legacy multi-byte Chinese (simplified) encodings"
 | |
|   },
 | |
|   {
 | |
|     "encodings": [
 | |
|       {
 | |
|         "labels": [
 | |
|           "big5",
 | |
|           "big5-hkscs",
 | |
|           "cn-big5",
 | |
|           "csbig5",
 | |
|           "x-x-big5"
 | |
|         ],
 | |
|         "name": "big5"
 | |
|       }
 | |
|     ],
 | |
|     "heading": "Legacy multi-byte Chinese (traditional) encodings"
 | |
|   },
 | |
|   {
 | |
|     "encodings": [
 | |
|       {
 | |
|         "labels": [
 | |
|           "cseucpkdfmtjapanese",
 | |
|           "euc-jp",
 | |
|           "x-euc-jp"
 | |
|         ],
 | |
|         "name": "euc-jp"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "csiso2022jp",
 | |
|           "iso-2022-jp"
 | |
|         ],
 | |
|         "name": "iso-2022-jp"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "csshiftjis",
 | |
|           "ms_kanji",
 | |
|           "shift-jis",
 | |
|           "shift_jis",
 | |
|           "sjis",
 | |
|           "windows-31j",
 | |
|           "x-sjis"
 | |
|         ],
 | |
|         "name": "shift_jis"
 | |
|       }
 | |
|     ],
 | |
|     "heading": "Legacy multi-byte Japanese encodings"
 | |
|   },
 | |
|   {
 | |
|     "encodings": [
 | |
|       {
 | |
|         "labels": [
 | |
|           "cseuckr",
 | |
|           "csksc56011987",
 | |
|           "euc-kr",
 | |
|           "iso-ir-149",
 | |
|           "korean",
 | |
|           "ks_c_5601-1987",
 | |
|           "ks_c_5601-1989",
 | |
|           "ksc5601",
 | |
|           "ksc_5601",
 | |
|           "windows-949"
 | |
|         ],
 | |
|         "name": "euc-kr"
 | |
|       }
 | |
|     ],
 | |
|     "heading": "Legacy multi-byte Korean encodings"
 | |
|   },
 | |
|   {
 | |
|     "encodings": [
 | |
|       {
 | |
|         "labels": [
 | |
|           "csiso2022kr",
 | |
|           "iso-2022-cn",
 | |
|             "iso-2022-cn-ext",
 | |
|             "iso-2022-kr"
 | |
|         ],
 | |
|         "name": "replacement"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "utf-16be"
 | |
|         ],
 | |
|         "name": "utf-16be"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "utf-16",
 | |
|           "utf-16le"
 | |
|         ],
 | |
|         "name": "utf-16le"
 | |
|       },
 | |
|       {
 | |
|         "labels": [
 | |
|           "x-user-defined"
 | |
|         ],
 | |
|         "name": "x-user-defined"
 | |
|       }
 | |
|     ],
 | |
|     "heading": "Legacy miscellaneous encodings"
 | |
|   }
 | |
| ];
 | |
| 
 | |
| var name_to_encoding = {};
 | |
| var label_to_encoding = {};
 | |
| encodings.forEach(function(category) {
 | |
|   category.encodings.forEach(function(encoding) {
 | |
|     name_to_encoding[encoding.name] = encoding;
 | |
|     encoding.labels.forEach(function(label) {
 | |
|       label_to_encoding[label] = encoding;
 | |
|     });
 | |
|   });
 | |
| });
 | |
| 
 | |
| //
 | |
| // 5. Indexes
 | |
| //
 | |
| 
 | |
| /**
 | |
|  * @param {number} pointer The |pointer| to search for.
 | |
|  * @param {Array.<?number>|undefined} index The |index| to search within.
 | |
|  * @return {?number} The code point corresponding to |pointer| in |index|,
 | |
|  *     or null if |code point| is not in |index|.
 | |
|  */
 | |
| function indexCodePointFor(pointer, index) {
 | |
|     if (!index) return null;
 | |
|     return index[pointer] || null;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @param {number} code_point The |code point| to search for.
 | |
|  * @param {Array.<?number>} index The |index| to search within.
 | |
|  * @return {?number} The first pointer corresponding to |code point| in
 | |
|  *     |index|, or null if |code point| is not in |index|.
 | |
|  */
 | |
| function indexPointerFor(code_point, index) {
 | |
|   var pointer = index.indexOf(code_point);
 | |
|   return pointer === -1 ? null : pointer;
 | |
| }
 | |
| 
 | |
| /** @type {Object.<string, (Array.<number>|Array.<Array.<number>>)>} */
 | |
| var indexes = require('./encoding-indexes');
 | |
| 
 | |
| /**
 | |
|  * @param {number} pointer The |pointer| to search for in the gb18030 index.
 | |
|  * @return {?number} The code point corresponding to |pointer| in |index|,
 | |
|  *     or null if |code point| is not in the gb18030 index.
 | |
|  */
 | |
| function indexGB18030CodePointFor(pointer) {
 | |
|   if ((pointer > 39419 && pointer < 189000) || (pointer > 1237575)) {
 | |
|     return null;
 | |
|   }
 | |
|   var /** @type {number} */ offset = 0,
 | |
|       /** @type {number} */ code_point_offset = 0,
 | |
|       /** @type {Array.<Array.<number>>} */ idx = indexes['gb18030'];
 | |
|   var i;
 | |
|   for (i = 0; i < idx.length; ++i) {
 | |
|     var entry = idx[i];
 | |
|     if (entry[0] <= pointer) {
 | |
|       offset = entry[0];
 | |
|       code_point_offset = entry[1];
 | |
|     } else {
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
|   return code_point_offset + pointer - offset;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @param {number} code_point The |code point| to locate in the gb18030 index.
 | |
|  * @return {number} The first pointer corresponding to |code point| in the
 | |
|  *     gb18030 index.
 | |
|  */
 | |
| function indexGB18030PointerFor(code_point) {
 | |
|   var /** @type {number} */ offset = 0,
 | |
|       /** @type {number} */ pointer_offset = 0,
 | |
|       /** @type {Array.<Array.<number>>} */ idx = indexes['gb18030'];
 | |
|   var i;
 | |
|   for (i = 0; i < idx.length; ++i) {
 | |
|     var entry = idx[i];
 | |
|     if (entry[1] <= code_point) {
 | |
|       offset = entry[1];
 | |
|       pointer_offset = entry[0];
 | |
|     } else {
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
|   return pointer_offset + code_point - offset;
 | |
| }
 | |
| 
 | |
| 
 | |
| //
 | |
| // 7. API
 | |
| //
 | |
| 
 | |
| /** @const */ var DEFAULT_ENCODING = 'utf-8';
 | |
| 
 | |
| // 7.1 Interface TextDecoder
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {string=} opt_encoding The label of the encoding;
 | |
|  *     defaults to 'utf-8'.
 | |
|  * @param {{fatal: boolean}=} options
 | |
|  */
 | |
| function TextDecoder(opt_encoding, options) {
 | |
|   if (!(this instanceof TextDecoder)) {
 | |
|     return new TextDecoder(opt_encoding, options);
 | |
|   }
 | |
|   opt_encoding = opt_encoding ? String(opt_encoding) : DEFAULT_ENCODING;
 | |
|   options = Object(options);
 | |
|   /** @private */
 | |
|   this._encoding = getEncoding(opt_encoding);
 | |
|   if (this._encoding === null || this._encoding.name === 'replacement')
 | |
|     throw new TypeError('Unknown encoding: ' + opt_encoding);
 | |
| 
 | |
|   /** @private @type {boolean} */
 | |
|   this._streaming = false;
 | |
|   /** @private @type {boolean} */
 | |
|   this._BOMseen = false;
 | |
|   /** @private */
 | |
|   this._decoder = null;
 | |
|   /** @private @type {{fatal: boolean}=} */
 | |
|   this._options = { fatal: Boolean(options.fatal) };
 | |
| 
 | |
|   if (Object.defineProperty) {
 | |
|     Object.defineProperty(
 | |
|         this, 'encoding',
 | |
|         { get: function() { return this._encoding.name; } });
 | |
|   } else {
 | |
|     this.encoding = this._encoding.name;
 | |
|   }
 | |
| 
 | |
|   return this;
 | |
| }
 | |
| 
 | |
| // TODO: Issue if input byte stream is offset by decoder
 | |
| // TODO: BOM detection will not work if stream header spans multiple calls
 | |
| // (last N bytes of previous stream may need to be retained?)
 | |
| TextDecoder.prototype = {
 | |
|   /**
 | |
|    * @param {Buffer=} bytes The buffer of bytes to decode.
 | |
|    * @param {{stream: boolean}=} options
 | |
|    */
 | |
|   decode: function decode(bytes, options) {
 | |
|     options = Object(options);
 | |
| 
 | |
|     if (!this._streaming) {
 | |
|       this._decoder = this._encoding.getDecoder(this._options);
 | |
|       this._BOMseen = false;
 | |
|     }
 | |
|     this._streaming = Boolean(options.stream);
 | |
| 
 | |
|     var input_stream = new ByteInputStream(bytes);
 | |
| 
 | |
|     var output_stream = new CodePointOutputStream();
 | |
| 
 | |
|     /** @type {number} */
 | |
|     var code_point;
 | |
| 
 | |
|     while (input_stream.get() !== EOF_byte) {
 | |
|       code_point = this._decoder.decode(input_stream);
 | |
|       if (code_point !== null && code_point !== EOF_code_point) {
 | |
|         output_stream.emit(code_point);
 | |
|       }
 | |
|     }
 | |
|     if (!this._streaming) {
 | |
|       do {
 | |
|         code_point = this._decoder.decode(input_stream);
 | |
|         if (code_point !== null && code_point !== EOF_code_point) {
 | |
|           output_stream.emit(code_point);
 | |
|         }
 | |
|       } while (code_point !== EOF_code_point &&
 | |
|                input_stream.get() != EOF_byte);
 | |
|       this._decoder = null;
 | |
|     }
 | |
| 
 | |
|     var result = output_stream.string();
 | |
|     if (!this._BOMseen && result.length) {
 | |
|       this._BOMseen = true;
 | |
|       if (UTFs.indexOf(this.encoding) !== -1 &&
 | |
|          result.charCodeAt(0) === 0xFEFF) {
 | |
|         result = result.substring(1);
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     return result;
 | |
|   }
 | |
| };
 | |
| 
 | |
| var UTFs = ['utf-8', 'utf-16le', 'utf-16be'];
 | |
| 
 | |
| // 7.2 Interface TextEncoder
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {string=} opt_encoding The label of the encoding;
 | |
|  *     defaults to 'utf-8'.
 | |
|  * @param {{fatal: boolean}=} options
 | |
|  */
 | |
| function TextEncoder(opt_encoding, options) {
 | |
|   if (!(this instanceof TextEncoder)) {
 | |
|     return new TextEncoder(opt_encoding, options);
 | |
|   }
 | |
|   opt_encoding = opt_encoding ? String(opt_encoding) : DEFAULT_ENCODING;
 | |
|   options = Object(options);
 | |
|   /** @private */
 | |
|   this._encoding = getEncoding(opt_encoding);
 | |
|   if (this._encoding === null || (this._encoding.name !== 'utf-8' &&
 | |
|                                   this._encoding.name !== 'utf-16le' &&
 | |
|                                   this._encoding.name !== 'utf-16be'))
 | |
|     throw new TypeError('Unknown encoding: ' + opt_encoding);
 | |
|   /** @private @type {boolean} */
 | |
|   this._streaming = false;
 | |
|   /** @private */
 | |
|   this._encoder = null;
 | |
|   /** @private @type {{fatal: boolean}=} */
 | |
|   this._options = { fatal: Boolean(options.fatal) };
 | |
| 
 | |
|   if (Object.defineProperty) {
 | |
|     Object.defineProperty(
 | |
|         this, 'encoding',
 | |
|         { get: function() { return this._encoding.name; } });
 | |
|   } else {
 | |
|     this.encoding = this._encoding.name;
 | |
|   }
 | |
| 
 | |
|   return this;
 | |
| }
 | |
| 
 | |
| TextEncoder.prototype = {
 | |
|   /**
 | |
|    * @param {string=} opt_string The string to encode.
 | |
|    * @param {{stream: boolean}=} options
 | |
|    */
 | |
|   encode: function encode(opt_string, options) {
 | |
|     opt_string = opt_string ? String(opt_string) : '';
 | |
|     options = Object(options);
 | |
|     // TODO: any options?
 | |
|     if (!this._streaming) {
 | |
|       this._encoder = this._encoding.getEncoder(this._options);
 | |
|     }
 | |
|     this._streaming = Boolean(options.stream);
 | |
| 
 | |
|     var bytes = [];
 | |
|     var output_stream = new ByteOutputStream(bytes);
 | |
|     var input_stream = new CodePointInputStream(opt_string);
 | |
|     while (input_stream.get() !== EOF_code_point) {
 | |
|       this._encoder.encode(output_stream, input_stream);
 | |
|     }
 | |
|     if (!this._streaming) {
 | |
|       /** @type {number} */
 | |
|       var last_byte;
 | |
|       do {
 | |
|         last_byte = this._encoder.encode(output_stream, input_stream);
 | |
|       } while (last_byte !== EOF_byte);
 | |
|       this._encoder = null;
 | |
|     }
 | |
|     return new Buffer(bytes);
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| //
 | |
| // 8. The encoding
 | |
| //
 | |
| 
 | |
| // 8.1 utf-8
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function UTF8Decoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   var /** @type {number} */ utf8_code_point = 0,
 | |
|       /** @type {number} */ utf8_bytes_needed = 0,
 | |
|       /** @type {number} */ utf8_bytes_seen = 0,
 | |
|       /** @type {number} */ utf8_lower_boundary = 0;
 | |
| 
 | |
|   /**
 | |
|    * @param {ByteInputStream} byte_pointer The byte stream to decode.
 | |
|    * @return {?number} The next code point decoded, or null if not enough
 | |
|    *     data exists in the input stream to decode a complete code point.
 | |
|    */
 | |
|   this.decode = function(byte_pointer) {
 | |
|     var bite = byte_pointer.get();
 | |
|     if (bite === EOF_byte) {
 | |
|       if (utf8_bytes_needed !== 0) {
 | |
|         return decoderError(fatal);
 | |
|       }
 | |
|       return EOF_code_point;
 | |
|     }
 | |
|     byte_pointer.offset(1);
 | |
| 
 | |
|     if (utf8_bytes_needed === 0) {
 | |
|       if (inRange(bite, 0x00, 0x7F)) {
 | |
|         return bite;
 | |
|       }
 | |
|       if (inRange(bite, 0xC2, 0xDF)) {
 | |
|         utf8_bytes_needed = 1;
 | |
|         utf8_lower_boundary = 0x80;
 | |
|         utf8_code_point = bite - 0xC0;
 | |
|       } else if (inRange(bite, 0xE0, 0xEF)) {
 | |
|         utf8_bytes_needed = 2;
 | |
|         utf8_lower_boundary = 0x800;
 | |
|         utf8_code_point = bite - 0xE0;
 | |
|       } else if (inRange(bite, 0xF0, 0xF4)) {
 | |
|         utf8_bytes_needed = 3;
 | |
|         utf8_lower_boundary = 0x10000;
 | |
|         utf8_code_point = bite - 0xF0;
 | |
|       } else {
 | |
|         return decoderError(fatal);
 | |
|       }
 | |
|       utf8_code_point = utf8_code_point * Math.pow(64, utf8_bytes_needed);
 | |
|       return null;
 | |
|     }
 | |
|     if (!inRange(bite, 0x80, 0xBF)) {
 | |
|       utf8_code_point = 0;
 | |
|       utf8_bytes_needed = 0;
 | |
|       utf8_bytes_seen = 0;
 | |
|       utf8_lower_boundary = 0;
 | |
|       byte_pointer.offset(-1);
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     utf8_bytes_seen += 1;
 | |
|     utf8_code_point = utf8_code_point + (bite - 0x80) *
 | |
|         Math.pow(64, utf8_bytes_needed - utf8_bytes_seen);
 | |
|     if (utf8_bytes_seen !== utf8_bytes_needed) {
 | |
|       return null;
 | |
|     }
 | |
|     var code_point = utf8_code_point;
 | |
|     var lower_boundary = utf8_lower_boundary;
 | |
|     utf8_code_point = 0;
 | |
|     utf8_bytes_needed = 0;
 | |
|     utf8_bytes_seen = 0;
 | |
|     utf8_lower_boundary = 0;
 | |
|     if (inRange(code_point, lower_boundary, 0x10FFFF) &&
 | |
|         !inRange(code_point, 0xD800, 0xDFFF)) {
 | |
|       return code_point;
 | |
|     }
 | |
|     return decoderError(fatal);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function UTF8Encoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   /**
 | |
|    * @param {ByteOutputStream} output_byte_stream Output byte stream.
 | |
|    * @param {CodePointInputStream} code_point_pointer Input stream.
 | |
|    * @return {number} The last byte emitted.
 | |
|    */
 | |
|   this.encode = function(output_byte_stream, code_point_pointer) {
 | |
|     /** @type {number} */
 | |
|     var code_point = code_point_pointer.get();
 | |
|     if (code_point === EOF_code_point) {
 | |
|       return EOF_byte;
 | |
|     }
 | |
|     code_point_pointer.offset(1);
 | |
|     if (inRange(code_point, 0xD800, 0xDFFF)) {
 | |
|       return encoderError(code_point);
 | |
|     }
 | |
|     if (inRange(code_point, 0x0000, 0x007f)) {
 | |
|       return output_byte_stream.emit(code_point);
 | |
|     }
 | |
|     var count, offset;
 | |
|     if (inRange(code_point, 0x0080, 0x07FF)) {
 | |
|       count = 1;
 | |
|       offset = 0xC0;
 | |
|     } else if (inRange(code_point, 0x0800, 0xFFFF)) {
 | |
|       count = 2;
 | |
|       offset = 0xE0;
 | |
|     } else if (inRange(code_point, 0x10000, 0x10FFFF)) {
 | |
|       count = 3;
 | |
|       offset = 0xF0;
 | |
|     }
 | |
|     var result = output_byte_stream.emit(
 | |
|         div(code_point, Math.pow(64, count)) + offset);
 | |
|     while (count > 0) {
 | |
|       var temp = div(code_point, Math.pow(64, count - 1));
 | |
|       result = output_byte_stream.emit(0x80 + (temp % 64));
 | |
|       count -= 1;
 | |
|     }
 | |
|     return result;
 | |
|   };
 | |
| }
 | |
| 
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['utf-8'].getEncoder = function(options) {
 | |
|   return new UTF8Encoder(options);
 | |
| };
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['utf-8'].getDecoder = function(options) {
 | |
|   return new UTF8Decoder(options);
 | |
| };
 | |
| 
 | |
| //
 | |
| // 9. Legacy single-byte encodings
 | |
| //
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {Array.<number>} index The encoding index.
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function SingleByteDecoder(index, options) {
 | |
|   var fatal = options.fatal;
 | |
|   /**
 | |
|    * @param {ByteInputStream} byte_pointer The byte stream to decode.
 | |
|    * @return {?number} The next code point decoded, or null if not enough
 | |
|    *     data exists in the input stream to decode a complete code point.
 | |
|    */
 | |
|   this.decode = function(byte_pointer) {
 | |
|     var bite = byte_pointer.get();
 | |
|     if (bite === EOF_byte) {
 | |
|       return EOF_code_point;
 | |
|     }
 | |
|     byte_pointer.offset(1);
 | |
|     if (inRange(bite, 0x00, 0x7F)) {
 | |
|       return bite;
 | |
|     }
 | |
|     var code_point = index[bite - 0x80];
 | |
|     if (code_point === null) {
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     return code_point;
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {Array.<?number>} index The encoding index.
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function SingleByteEncoder(index, options) {
 | |
|   var fatal = options.fatal;
 | |
|   /**
 | |
|    * @param {ByteOutputStream} output_byte_stream Output byte stream.
 | |
|    * @param {CodePointInputStream} code_point_pointer Input stream.
 | |
|    * @return {number} The last byte emitted.
 | |
|    */
 | |
|   this.encode = function(output_byte_stream, code_point_pointer) {
 | |
|     var code_point = code_point_pointer.get();
 | |
|     if (code_point === EOF_code_point) {
 | |
|       return EOF_byte;
 | |
|     }
 | |
|     code_point_pointer.offset(1);
 | |
|     if (inRange(code_point, 0x0000, 0x007F)) {
 | |
|       return output_byte_stream.emit(code_point);
 | |
|     }
 | |
|     var pointer = indexPointerFor(code_point, index);
 | |
|     if (pointer === null) {
 | |
|       encoderError(code_point);
 | |
|     }
 | |
|     return output_byte_stream.emit(pointer + 0x80);
 | |
|   };
 | |
| }
 | |
| 
 | |
| (function() {
 | |
|   encodings.forEach(function(category) {
 | |
|     if (category.heading !== 'Legacy single-byte encodings')
 | |
|       return;
 | |
|     category.encodings.forEach(function(encoding) {
 | |
|       var idx = indexes[encoding.name];
 | |
|       /** @param {{fatal: boolean}} options */
 | |
|       encoding.getDecoder = function(options) {
 | |
|         return new SingleByteDecoder(idx, options);
 | |
|       };
 | |
|       /** @param {{fatal: boolean}} options */
 | |
|       encoding.getEncoder = function(options) {
 | |
|         return new SingleByteEncoder(idx, options);
 | |
|       };
 | |
|     });
 | |
|   });
 | |
| }());
 | |
| 
 | |
| //
 | |
| // 10. Legacy multi-byte Chinese (simplified) encodings
 | |
| //
 | |
| 
 | |
| // 9.1 gbk
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {boolean} gb18030 True if decoding gb18030, false otherwise.
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function GBKDecoder(gb18030, options) {
 | |
|   var fatal = options.fatal;
 | |
|   var /** @type {number} */ gbk_first = 0x00,
 | |
|       /** @type {number} */ gbk_second = 0x00,
 | |
|       /** @type {number} */ gbk_third = 0x00;
 | |
|   /**
 | |
|    * @param {ByteInputStream} byte_pointer The byte stream to decode.
 | |
|    * @return {?number} The next code point decoded, or null if not enough
 | |
|    *     data exists in the input stream to decode a complete code point.
 | |
|    */
 | |
|   this.decode = function(byte_pointer) {
 | |
|     var bite = byte_pointer.get();
 | |
|     if (bite === EOF_byte && gbk_first === 0x00 &&
 | |
|         gbk_second === 0x00 && gbk_third === 0x00) {
 | |
|       return EOF_code_point;
 | |
|     }
 | |
|     if (bite === EOF_byte &&
 | |
|         (gbk_first !== 0x00 || gbk_second !== 0x00 || gbk_third !== 0x00)) {
 | |
|       gbk_first = 0x00;
 | |
|       gbk_second = 0x00;
 | |
|       gbk_third = 0x00;
 | |
|       decoderError(fatal);
 | |
|     }
 | |
|     byte_pointer.offset(1);
 | |
|     var code_point;
 | |
|     if (gbk_third !== 0x00) {
 | |
|       code_point = null;
 | |
|       if (inRange(bite, 0x30, 0x39)) {
 | |
|         code_point = indexGB18030CodePointFor(
 | |
|             (((gbk_first - 0x81) * 10 + (gbk_second - 0x30)) * 126 +
 | |
|              (gbk_third - 0x81)) * 10 + bite - 0x30);
 | |
|       }
 | |
|       gbk_first = 0x00;
 | |
|       gbk_second = 0x00;
 | |
|       gbk_third = 0x00;
 | |
|       if (code_point === null) {
 | |
|         byte_pointer.offset(-3);
 | |
|         return decoderError(fatal);
 | |
|       }
 | |
|       return code_point;
 | |
|     }
 | |
|     if (gbk_second !== 0x00) {
 | |
|       if (inRange(bite, 0x81, 0xFE)) {
 | |
|         gbk_third = bite;
 | |
|         return null;
 | |
|       }
 | |
|       byte_pointer.offset(-2);
 | |
|       gbk_first = 0x00;
 | |
|       gbk_second = 0x00;
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     if (gbk_first !== 0x00) {
 | |
|       if (inRange(bite, 0x30, 0x39) && gb18030) {
 | |
|         gbk_second = bite;
 | |
|         return null;
 | |
|       }
 | |
|       var lead = gbk_first;
 | |
|       var pointer = null;
 | |
|       gbk_first = 0x00;
 | |
|       var offset = bite < 0x7F ? 0x40 : 0x41;
 | |
|       if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE)) {
 | |
|         pointer = (lead - 0x81) * 190 + (bite - offset);
 | |
|       }
 | |
|       code_point = pointer === null ? null :
 | |
|           indexCodePointFor(pointer, indexes['gbk']);
 | |
|       if (pointer === null) {
 | |
|         byte_pointer.offset(-1);
 | |
|       }
 | |
|       if (code_point === null) {
 | |
|         return decoderError(fatal);
 | |
|       }
 | |
|       return code_point;
 | |
|     }
 | |
|     if (inRange(bite, 0x00, 0x7F)) {
 | |
|       return bite;
 | |
|     }
 | |
|     if (bite === 0x80) {
 | |
|       return 0x20AC;
 | |
|     }
 | |
|     if (inRange(bite, 0x81, 0xFE)) {
 | |
|       gbk_first = bite;
 | |
|       return null;
 | |
|     }
 | |
|     return decoderError(fatal);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {boolean} gb18030 True if decoding gb18030, false otherwise.
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function GBKEncoder(gb18030, options) {
 | |
|   var fatal = options.fatal;
 | |
|   /**
 | |
|    * @param {ByteOutputStream} output_byte_stream Output byte stream.
 | |
|    * @param {CodePointInputStream} code_point_pointer Input stream.
 | |
|    * @return {number} The last byte emitted.
 | |
|    */
 | |
|   this.encode = function(output_byte_stream, code_point_pointer) {
 | |
|     var code_point = code_point_pointer.get();
 | |
|     if (code_point === EOF_code_point) {
 | |
|       return EOF_byte;
 | |
|     }
 | |
|     code_point_pointer.offset(1);
 | |
|     if (inRange(code_point, 0x0000, 0x007F)) {
 | |
|       return output_byte_stream.emit(code_point);
 | |
|     }
 | |
|     var pointer = indexPointerFor(code_point, indexes['gbk']);
 | |
|     if (pointer !== null) {
 | |
|       var lead = div(pointer, 190) + 0x81;
 | |
|       var trail = pointer % 190;
 | |
|       var offset = trail < 0x3F ? 0x40 : 0x41;
 | |
|       return output_byte_stream.emit(lead, trail + offset);
 | |
|     }
 | |
|     if (pointer === null && !gb18030) {
 | |
|       return encoderError(code_point);
 | |
|     }
 | |
|     pointer = indexGB18030PointerFor(code_point);
 | |
|     var byte1 = div(div(div(pointer, 10), 126), 10);
 | |
|     pointer = pointer - byte1 * 10 * 126 * 10;
 | |
|     var byte2 = div(div(pointer, 10), 126);
 | |
|     pointer = pointer - byte2 * 10 * 126;
 | |
|     var byte3 = div(pointer, 10);
 | |
|     var byte4 = pointer - byte3 * 10;
 | |
|     return output_byte_stream.emit(byte1 + 0x81,
 | |
|                                    byte2 + 0x30,
 | |
|                                    byte3 + 0x81,
 | |
|                                    byte4 + 0x30);
 | |
|   };
 | |
| }
 | |
| 
 | |
| name_to_encoding['gbk'].getEncoder = function(options) {
 | |
|   return new GBKEncoder(false, options);
 | |
| };
 | |
| name_to_encoding['gbk'].getDecoder = function(options) {
 | |
|   return new GBKDecoder(false, options);
 | |
| };
 | |
| 
 | |
| // 9.2 gb18030
 | |
| name_to_encoding['gb18030'].getEncoder = function(options) {
 | |
|   return new GBKEncoder(true, options);
 | |
| };
 | |
| name_to_encoding['gb18030'].getDecoder = function(options) {
 | |
|   return new GBKDecoder(true, options);
 | |
| };
 | |
| 
 | |
| // 10.2 hz-gb-2312
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function HZGB2312Decoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   var /** @type {boolean} */ hzgb2312 = false,
 | |
|       /** @type {number} */ hzgb2312_lead = 0x00;
 | |
|   /**
 | |
|    * @param {ByteInputStream} byte_pointer The byte stream to decode.
 | |
|    * @return {?number} The next code point decoded, or null if not enough
 | |
|    *     data exists in the input stream to decode a complete code point.
 | |
|    */
 | |
|   this.decode = function(byte_pointer) {
 | |
|     var bite = byte_pointer.get();
 | |
|     if (bite === EOF_byte && hzgb2312_lead === 0x00) {
 | |
|       return EOF_code_point;
 | |
|     }
 | |
|     if (bite === EOF_byte && hzgb2312_lead !== 0x00) {
 | |
|       hzgb2312_lead = 0x00;
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     byte_pointer.offset(1);
 | |
|     if (hzgb2312_lead === 0x7E) {
 | |
|       hzgb2312_lead = 0x00;
 | |
|       if (bite === 0x7B) {
 | |
|         hzgb2312 = true;
 | |
|         return null;
 | |
|       }
 | |
|       if (bite === 0x7D) {
 | |
|         hzgb2312 = false;
 | |
|         return null;
 | |
|       }
 | |
|       if (bite === 0x7E) {
 | |
|         return 0x007E;
 | |
|       }
 | |
|       if (bite === 0x0A) {
 | |
|         return null;
 | |
|       }
 | |
|       byte_pointer.offset(-1);
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     if (hzgb2312_lead !== 0x00) {
 | |
|       var lead = hzgb2312_lead;
 | |
|       hzgb2312_lead = 0x00;
 | |
|       var code_point = null;
 | |
|       if (inRange(bite, 0x21, 0x7E)) {
 | |
|         code_point = indexCodePointFor((lead - 1) * 190 +
 | |
|                                        (bite + 0x3F), indexes['gbk']);
 | |
|       }
 | |
|       if (bite === 0x0A) {
 | |
|         hzgb2312 = false;
 | |
|       }
 | |
|       if (code_point === null) {
 | |
|         return decoderError(fatal);
 | |
|       }
 | |
|       return code_point;
 | |
|     }
 | |
|     if (bite === 0x7E) {
 | |
|       hzgb2312_lead = 0x7E;
 | |
|       return null;
 | |
|     }
 | |
|     if (hzgb2312) {
 | |
|       if (inRange(bite, 0x20, 0x7F)) {
 | |
|         hzgb2312_lead = bite;
 | |
|         return null;
 | |
|       }
 | |
|       if (bite === 0x0A) {
 | |
|         hzgb2312 = false;
 | |
|       }
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     if (inRange(bite, 0x00, 0x7F)) {
 | |
|       return bite;
 | |
|     }
 | |
|     return decoderError(fatal);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function HZGB2312Encoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   /** @type {boolean} */
 | |
|   var hzgb2312 = false;
 | |
|   /**
 | |
|    * @param {ByteOutputStream} output_byte_stream Output byte stream.
 | |
|    * @param {CodePointInputStream} code_point_pointer Input stream.
 | |
|    * @return {number} The last byte emitted.
 | |
|    */
 | |
|   this.encode = function(output_byte_stream, code_point_pointer) {
 | |
|     var code_point = code_point_pointer.get();
 | |
|     if (code_point === EOF_code_point) {
 | |
|       return EOF_byte;
 | |
|     }
 | |
|     code_point_pointer.offset(1);
 | |
|     if (inRange(code_point, 0x0000, 0x007F) && hzgb2312) {
 | |
|       code_point_pointer.offset(-1);
 | |
|       hzgb2312 = false;
 | |
|       return output_byte_stream.emit(0x7E, 0x7D);
 | |
|     }
 | |
|     if (code_point === 0x007E) {
 | |
|       return output_byte_stream.emit(0x7E, 0x7E);
 | |
|     }
 | |
|     if (inRange(code_point, 0x0000, 0x007F)) {
 | |
|       return output_byte_stream.emit(code_point);
 | |
|     }
 | |
|     if (!hzgb2312) {
 | |
|       code_point_pointer.offset(-1);
 | |
|       hzgb2312 = true;
 | |
|       return output_byte_stream.emit(0x7E, 0x7B);
 | |
|     }
 | |
|     var pointer = indexPointerFor(code_point, indexes['gbk']);
 | |
|     if (pointer === null) {
 | |
|       return encoderError(code_point);
 | |
|     }
 | |
|     var lead = div(pointer, 190) + 1;
 | |
|     var trail = pointer % 190 - 0x3F;
 | |
|     if (!inRange(lead, 0x21, 0x7E) || !inRange(trail, 0x21, 0x7E)) {
 | |
|       return encoderError(code_point);
 | |
|     }
 | |
|     return output_byte_stream.emit(lead, trail);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['hz-gb-2312'].getEncoder = function(options) {
 | |
|   return new HZGB2312Encoder(options);
 | |
| };
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['hz-gb-2312'].getDecoder = function(options) {
 | |
|   return new HZGB2312Decoder(options);
 | |
| };
 | |
| 
 | |
| //
 | |
| // 11. Legacy multi-byte Chinese (traditional) encodings
 | |
| //
 | |
| 
 | |
| // 11.1 big5
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function Big5Decoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   var /** @type {number} */ big5_lead = 0x00,
 | |
|       /** @type {?number} */ big5_pending = null;
 | |
| 
 | |
|   /**
 | |
|    * @param {ByteInputStream} byte_pointer The byte steram to decode.
 | |
|    * @return {?number} The next code point decoded, or null if not enough
 | |
|    *     data exists in the input stream to decode a complete code point.
 | |
|    */
 | |
|   this.decode = function(byte_pointer) {
 | |
|     // NOTE: Hack to support emitting two code points
 | |
|     if (big5_pending !== null) {
 | |
|       var pending = big5_pending;
 | |
|       big5_pending = null;
 | |
|       return pending;
 | |
|     }
 | |
|     var bite = byte_pointer.get();
 | |
|     if (bite === EOF_byte && big5_lead === 0x00) {
 | |
|       return EOF_code_point;
 | |
|     }
 | |
|     if (bite === EOF_byte && big5_lead !== 0x00) {
 | |
|       big5_lead = 0x00;
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     byte_pointer.offset(1);
 | |
|     if (big5_lead !== 0x00) {
 | |
|       var lead = big5_lead;
 | |
|       var pointer = null;
 | |
|       big5_lead = 0x00;
 | |
|       var offset = bite < 0x7F ? 0x40 : 0x62;
 | |
|       if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE)) {
 | |
|         pointer = (lead - 0x81) * 157 + (bite - offset);
 | |
|       }
 | |
|       if (pointer === 1133) {
 | |
|         big5_pending = 0x0304;
 | |
|         return 0x00CA;
 | |
|       }
 | |
|       if (pointer === 1135) {
 | |
|         big5_pending = 0x030C;
 | |
|         return 0x00CA;
 | |
|       }
 | |
|       if (pointer === 1164) {
 | |
|         big5_pending = 0x0304;
 | |
|         return 0x00EA;
 | |
|       }
 | |
|       if (pointer === 1166) {
 | |
|         big5_pending = 0x030C;
 | |
|         return 0x00EA;
 | |
|       }
 | |
|       var code_point = (pointer === null) ? null :
 | |
|           indexCodePointFor(pointer, indexes['big5']);
 | |
|       if (pointer === null) {
 | |
|         byte_pointer.offset(-1);
 | |
|       }
 | |
|       if (code_point === null) {
 | |
|         return decoderError(fatal);
 | |
|       }
 | |
|       return code_point;
 | |
|     }
 | |
|     if (inRange(bite, 0x00, 0x7F)) {
 | |
|       return bite;
 | |
|     }
 | |
|     if (inRange(bite, 0x81, 0xFE)) {
 | |
|       big5_lead = bite;
 | |
|       return null;
 | |
|     }
 | |
|     return decoderError(fatal);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function Big5Encoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   /**
 | |
|    * @param {ByteOutputStream} output_byte_stream Output byte stream.
 | |
|    * @param {CodePointInputStream} code_point_pointer Input stream.
 | |
|    * @return {number} The last byte emitted.
 | |
|    */
 | |
|   this.encode = function(output_byte_stream, code_point_pointer) {
 | |
|     var code_point = code_point_pointer.get();
 | |
|     if (code_point === EOF_code_point) {
 | |
|       return EOF_byte;
 | |
|     }
 | |
|     code_point_pointer.offset(1);
 | |
|     if (inRange(code_point, 0x0000, 0x007F)) {
 | |
|       return output_byte_stream.emit(code_point);
 | |
|     }
 | |
|     var pointer = indexPointerFor(code_point, indexes['big5']);
 | |
|     if (pointer === null) {
 | |
|       return encoderError(code_point);
 | |
|     }
 | |
|     var lead = div(pointer, 157) + 0x81;
 | |
|     //if (lead < 0xA1) {
 | |
|     //  return encoderError(code_point);
 | |
|     //}
 | |
|     var trail = pointer % 157;
 | |
|     var offset = trail < 0x3F ? 0x40 : 0x62;
 | |
|     return output_byte_stream.emit(lead, trail + offset);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['big5'].getEncoder = function(options) {
 | |
|   return new Big5Encoder(options);
 | |
| };
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['big5'].getDecoder = function(options) {
 | |
|   return new Big5Decoder(options);
 | |
| };
 | |
| 
 | |
| 
 | |
| //
 | |
| // 12. Legacy multi-byte Japanese encodings
 | |
| //
 | |
| 
 | |
| // 12.1 euc.jp
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function EUCJPDecoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   var /** @type {number} */ eucjp_first = 0x00,
 | |
|       /** @type {number} */ eucjp_second = 0x00;
 | |
|   /**
 | |
|    * @param {ByteInputStream} byte_pointer The byte stream to decode.
 | |
|    * @return {?number} The next code point decoded, or null if not enough
 | |
|    *     data exists in the input stream to decode a complete code point.
 | |
|    */
 | |
|   this.decode = function(byte_pointer) {
 | |
|     var bite = byte_pointer.get();
 | |
|     if (bite === EOF_byte) {
 | |
|       if (eucjp_first === 0x00 && eucjp_second === 0x00) {
 | |
|         return EOF_code_point;
 | |
|       }
 | |
|       eucjp_first = 0x00;
 | |
|       eucjp_second = 0x00;
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     byte_pointer.offset(1);
 | |
| 
 | |
|     var lead, code_point;
 | |
|     if (eucjp_second !== 0x00) {
 | |
|       lead = eucjp_second;
 | |
|       eucjp_second = 0x00;
 | |
|       code_point = null;
 | |
|       if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) {
 | |
|         code_point = indexCodePointFor((lead - 0xA1) * 94 + bite - 0xA1,
 | |
|                                        indexes['jis0212']);
 | |
|       }
 | |
|       if (!inRange(bite, 0xA1, 0xFE)) {
 | |
|         byte_pointer.offset(-1);
 | |
|       }
 | |
|       if (code_point === null) {
 | |
|         return decoderError(fatal);
 | |
|       }
 | |
|       return code_point;
 | |
|     }
 | |
|     if (eucjp_first === 0x8E && inRange(bite, 0xA1, 0xDF)) {
 | |
|       eucjp_first = 0x00;
 | |
|       return 0xFF61 + bite - 0xA1;
 | |
|     }
 | |
|     if (eucjp_first === 0x8F && inRange(bite, 0xA1, 0xFE)) {
 | |
|       eucjp_first = 0x00;
 | |
|       eucjp_second = bite;
 | |
|       return null;
 | |
|     }
 | |
|     if (eucjp_first !== 0x00) {
 | |
|       lead = eucjp_first;
 | |
|       eucjp_first = 0x00;
 | |
|       code_point = null;
 | |
|       if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) {
 | |
|         code_point = indexCodePointFor((lead - 0xA1) * 94 + bite - 0xA1,
 | |
|                                        indexes['jis0208']);
 | |
|       }
 | |
|       if (!inRange(bite, 0xA1, 0xFE)) {
 | |
|         byte_pointer.offset(-1);
 | |
|       }
 | |
|       if (code_point === null) {
 | |
|         return decoderError(fatal);
 | |
|       }
 | |
|       return code_point;
 | |
|     }
 | |
|     if (inRange(bite, 0x00, 0x7F)) {
 | |
|       return bite;
 | |
|     }
 | |
|     if (bite === 0x8E || bite === 0x8F || (inRange(bite, 0xA1, 0xFE))) {
 | |
|       eucjp_first = bite;
 | |
|       return null;
 | |
|     }
 | |
|     return decoderError(fatal);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function EUCJPEncoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   /**
 | |
|    * @param {ByteOutputStream} output_byte_stream Output byte stream.
 | |
|    * @param {CodePointInputStream} code_point_pointer Input stream.
 | |
|    * @return {number} The last byte emitted.
 | |
|    */
 | |
|   this.encode = function(output_byte_stream, code_point_pointer) {
 | |
|     var code_point = code_point_pointer.get();
 | |
|     if (code_point === EOF_code_point) {
 | |
|       return EOF_byte;
 | |
|     }
 | |
|     code_point_pointer.offset(1);
 | |
|     if (inRange(code_point, 0x0000, 0x007F)) {
 | |
|       return output_byte_stream.emit(code_point);
 | |
|     }
 | |
|     if (code_point === 0x00A5) {
 | |
|       return output_byte_stream.emit(0x5C);
 | |
|     }
 | |
|     if (code_point === 0x203E) {
 | |
|       return output_byte_stream.emit(0x7E);
 | |
|     }
 | |
|     if (inRange(code_point, 0xFF61, 0xFF9F)) {
 | |
|       return output_byte_stream.emit(0x8E, code_point - 0xFF61 + 0xA1);
 | |
|     }
 | |
| 
 | |
|     var pointer = indexPointerFor(code_point, indexes['jis0208']);
 | |
|     if (pointer === null) {
 | |
|       return encoderError(code_point);
 | |
|     }
 | |
|     var lead = div(pointer, 94) + 0xA1;
 | |
|     var trail = pointer % 94 + 0xA1;
 | |
|     return output_byte_stream.emit(lead, trail);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['euc-jp'].getEncoder = function(options) {
 | |
|   return new EUCJPEncoder(options);
 | |
| };
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['euc-jp'].getDecoder = function(options) {
 | |
|   return new EUCJPDecoder(options);
 | |
| };
 | |
| 
 | |
| // 12.2 iso-2022-jp
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function ISO2022JPDecoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   /** @enum */
 | |
|   var state = {
 | |
|     ASCII: 0,
 | |
|     escape_start: 1,
 | |
|     escape_middle: 2,
 | |
|     escape_final: 3,
 | |
|     lead: 4,
 | |
|     trail: 5,
 | |
|     Katakana: 6
 | |
|   };
 | |
|   var /** @type {number} */ iso2022jp_state = state.ASCII,
 | |
|       /** @type {boolean} */ iso2022jp_jis0212 = false,
 | |
|       /** @type {number} */ iso2022jp_lead = 0x00;
 | |
|   /**
 | |
|    * @param {ByteInputStream} byte_pointer The byte stream to decode.
 | |
|    * @return {?number} The next code point decoded, or null if not enough
 | |
|    *     data exists in the input stream to decode a complete code point.
 | |
|    */
 | |
|   this.decode = function(byte_pointer) {
 | |
|     var bite = byte_pointer.get();
 | |
|     if (bite !== EOF_byte) {
 | |
|       byte_pointer.offset(1);
 | |
|     }
 | |
|     switch (iso2022jp_state) {
 | |
|       default:
 | |
|       case state.ASCII:
 | |
|         if (bite === 0x1B) {
 | |
|           iso2022jp_state = state.escape_start;
 | |
|           return null;
 | |
|         }
 | |
|         if (inRange(bite, 0x00, 0x7F)) {
 | |
|           return bite;
 | |
|         }
 | |
|         if (bite === EOF_byte) {
 | |
|           return EOF_code_point;
 | |
|         }
 | |
|         return decoderError(fatal);
 | |
| 
 | |
|       case state.escape_start:
 | |
|         if (bite === 0x24 || bite === 0x28) {
 | |
|           iso2022jp_lead = bite;
 | |
|           iso2022jp_state = state.escape_middle;
 | |
|           return null;
 | |
|         }
 | |
|         if (bite !== EOF_byte) {
 | |
|           byte_pointer.offset(-1);
 | |
|         }
 | |
|         iso2022jp_state = state.ASCII;
 | |
|         return decoderError(fatal);
 | |
| 
 | |
|       case state.escape_middle:
 | |
|         var lead = iso2022jp_lead;
 | |
|         iso2022jp_lead = 0x00;
 | |
|         if (lead === 0x24 && (bite === 0x40 || bite === 0x42)) {
 | |
|           iso2022jp_jis0212 = false;
 | |
|           iso2022jp_state = state.lead;
 | |
|           return null;
 | |
|         }
 | |
|         if (lead === 0x24 && bite === 0x28) {
 | |
|           iso2022jp_state = state.escape_final;
 | |
|           return null;
 | |
|         }
 | |
|         if (lead === 0x28 && (bite === 0x42 || bite === 0x4A)) {
 | |
|           iso2022jp_state = state.ASCII;
 | |
|           return null;
 | |
|         }
 | |
|         if (lead === 0x28 && bite === 0x49) {
 | |
|           iso2022jp_state = state.Katakana;
 | |
|           return null;
 | |
|         }
 | |
|         if (bite === EOF_byte) {
 | |
|           byte_pointer.offset(-1);
 | |
|         } else {
 | |
|           byte_pointer.offset(-2);
 | |
|         }
 | |
|         iso2022jp_state = state.ASCII;
 | |
|         return decoderError(fatal);
 | |
| 
 | |
|       case state.escape_final:
 | |
|         if (bite === 0x44) {
 | |
|           iso2022jp_jis0212 = true;
 | |
|           iso2022jp_state = state.lead;
 | |
|           return null;
 | |
|         }
 | |
|         if (bite === EOF_byte) {
 | |
|           byte_pointer.offset(-2);
 | |
|         } else {
 | |
|           byte_pointer.offset(-3);
 | |
|         }
 | |
|         iso2022jp_state = state.ASCII;
 | |
|         return decoderError(fatal);
 | |
| 
 | |
|       case state.lead:
 | |
|         if (bite === 0x0A) {
 | |
|           iso2022jp_state = state.ASCII;
 | |
|           return decoderError(fatal, 0x000A);
 | |
|         }
 | |
|         if (bite === 0x1B) {
 | |
|           iso2022jp_state = state.escape_start;
 | |
|           return null;
 | |
|         }
 | |
|         if (bite === EOF_byte) {
 | |
|           return EOF_code_point;
 | |
|         }
 | |
|         iso2022jp_lead = bite;
 | |
|         iso2022jp_state = state.trail;
 | |
|         return null;
 | |
| 
 | |
|       case state.trail:
 | |
|         iso2022jp_state = state.lead;
 | |
|         if (bite === EOF_byte) {
 | |
|           return decoderError(fatal);
 | |
|         }
 | |
|         var code_point = null;
 | |
|         var pointer = (iso2022jp_lead - 0x21) * 94 + bite - 0x21;
 | |
|         if (inRange(iso2022jp_lead, 0x21, 0x7E) &&
 | |
|             inRange(bite, 0x21, 0x7E)) {
 | |
|           code_point = (iso2022jp_jis0212 === false) ?
 | |
|               indexCodePointFor(pointer, indexes['jis0208']) :
 | |
|               indexCodePointFor(pointer, indexes['jis0212']);
 | |
|         }
 | |
|         if (code_point === null) {
 | |
|           return decoderError(fatal);
 | |
|         }
 | |
|         return code_point;
 | |
| 
 | |
|       case state.Katakana:
 | |
|         if (bite === 0x1B) {
 | |
|           iso2022jp_state = state.escape_start;
 | |
|           return null;
 | |
|         }
 | |
|         if (inRange(bite, 0x21, 0x5F)) {
 | |
|           return 0xFF61 + bite - 0x21;
 | |
|         }
 | |
|         if (bite === EOF_byte) {
 | |
|           return EOF_code_point;
 | |
|         }
 | |
|         return decoderError(fatal);
 | |
|     }
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function ISO2022JPEncoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   /** @enum */
 | |
|   var state = {
 | |
|     ASCII: 0,
 | |
|     lead: 1,
 | |
|     Katakana: 2
 | |
|   };
 | |
|   var /** @type {number} */ iso2022jp_state = state.ASCII;
 | |
|   /**
 | |
|    * @param {ByteOutputStream} output_byte_stream Output byte stream.
 | |
|    * @param {CodePointInputStream} code_point_pointer Input stream.
 | |
|    * @return {number} The last byte emitted.
 | |
|    */
 | |
|   this.encode = function(output_byte_stream, code_point_pointer) {
 | |
|     var code_point = code_point_pointer.get();
 | |
|     if (code_point === EOF_code_point) {
 | |
|       return EOF_byte;
 | |
|     }
 | |
|     code_point_pointer.offset(1);
 | |
|     if ((inRange(code_point, 0x0000, 0x007F) ||
 | |
|          code_point === 0x00A5 || code_point === 0x203E) &&
 | |
|         iso2022jp_state !== state.ASCII) {
 | |
|       code_point_pointer.offset(-1);
 | |
|       iso2022jp_state = state.ASCII;
 | |
|       return output_byte_stream.emit(0x1B, 0x28, 0x42);
 | |
|     }
 | |
|     if (inRange(code_point, 0x0000, 0x007F)) {
 | |
|       return output_byte_stream.emit(code_point);
 | |
|     }
 | |
|     if (code_point === 0x00A5) {
 | |
|       return output_byte_stream.emit(0x5C);
 | |
|     }
 | |
|     if (code_point === 0x203E) {
 | |
|       return output_byte_stream.emit(0x7E);
 | |
|     }
 | |
|     if (inRange(code_point, 0xFF61, 0xFF9F) &&
 | |
|         iso2022jp_state !== state.Katakana) {
 | |
|       code_point_pointer.offset(-1);
 | |
|       iso2022jp_state = state.Katakana;
 | |
|       return output_byte_stream.emit(0x1B, 0x28, 0x49);
 | |
|     }
 | |
|     if (inRange(code_point, 0xFF61, 0xFF9F)) {
 | |
|       return output_byte_stream.emit(code_point - 0xFF61 - 0x21);
 | |
|     }
 | |
|     if (iso2022jp_state !== state.lead) {
 | |
|       code_point_pointer.offset(-1);
 | |
|       iso2022jp_state = state.lead;
 | |
|       return output_byte_stream.emit(0x1B, 0x24, 0x42);
 | |
|     }
 | |
|     var pointer = indexPointerFor(code_point, indexes['jis0208']);
 | |
|     if (pointer === null) {
 | |
|       return encoderError(code_point);
 | |
|     }
 | |
|     var lead = div(pointer, 94) + 0x21;
 | |
|     var trail = pointer % 94 + 0x21;
 | |
|     return output_byte_stream.emit(lead, trail);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['iso-2022-jp'].getEncoder = function(options) {
 | |
|   return new ISO2022JPEncoder(options);
 | |
| };
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['iso-2022-jp'].getDecoder = function(options) {
 | |
|   return new ISO2022JPDecoder(options);
 | |
| };
 | |
| 
 | |
| // 12.3 shift_jis
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function ShiftJISDecoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   var /** @type {number} */ shiftjis_lead = 0x00;
 | |
|   /**
 | |
|    * @param {ByteInputStream} byte_pointer The byte stream to decode.
 | |
|    * @return {?number} The next code point decoded, or null if not enough
 | |
|    *     data exists in the input stream to decode a complete code point.
 | |
|    */
 | |
|   this.decode = function(byte_pointer) {
 | |
|     var bite = byte_pointer.get();
 | |
|     if (bite === EOF_byte && shiftjis_lead === 0x00) {
 | |
|       return EOF_code_point;
 | |
|     }
 | |
|     if (bite === EOF_byte && shiftjis_lead !== 0x00) {
 | |
|       shiftjis_lead = 0x00;
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     byte_pointer.offset(1);
 | |
|     if (shiftjis_lead !== 0x00) {
 | |
|       var lead = shiftjis_lead;
 | |
|       shiftjis_lead = 0x00;
 | |
|       if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFC)) {
 | |
|         var offset = (bite < 0x7F) ? 0x40 : 0x41;
 | |
|         var lead_offset = (lead < 0xA0) ? 0x81 : 0xC1;
 | |
|         var code_point = indexCodePointFor((lead - lead_offset) * 188 +
 | |
|                                            bite - offset, indexes['jis0208']);
 | |
|         if (code_point === null) {
 | |
|           return decoderError(fatal);
 | |
|         }
 | |
|         return code_point;
 | |
|       }
 | |
|       byte_pointer.offset(-1);
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     if (inRange(bite, 0x00, 0x80)) {
 | |
|       return bite;
 | |
|     }
 | |
|     if (inRange(bite, 0xA1, 0xDF)) {
 | |
|       return 0xFF61 + bite - 0xA1;
 | |
|     }
 | |
|     if (inRange(bite, 0x81, 0x9F) || inRange(bite, 0xE0, 0xFC)) {
 | |
|       shiftjis_lead = bite;
 | |
|       return null;
 | |
|     }
 | |
|     return decoderError(fatal);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function ShiftJISEncoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   /**
 | |
|    * @param {ByteOutputStream} output_byte_stream Output byte stream.
 | |
|    * @param {CodePointInputStream} code_point_pointer Input stream.
 | |
|    * @return {number} The last byte emitted.
 | |
|    */
 | |
|   this.encode = function(output_byte_stream, code_point_pointer) {
 | |
|     var code_point = code_point_pointer.get();
 | |
|     if (code_point === EOF_code_point) {
 | |
|       return EOF_byte;
 | |
|     }
 | |
|     code_point_pointer.offset(1);
 | |
|     if (inRange(code_point, 0x0000, 0x0080)) {
 | |
|       return output_byte_stream.emit(code_point);
 | |
|     }
 | |
|     if (code_point === 0x00A5) {
 | |
|       return output_byte_stream.emit(0x5C);
 | |
|     }
 | |
|     if (code_point === 0x203E) {
 | |
|       return output_byte_stream.emit(0x7E);
 | |
|     }
 | |
|     if (inRange(code_point, 0xFF61, 0xFF9F)) {
 | |
|       return output_byte_stream.emit(code_point - 0xFF61 + 0xA1);
 | |
|     }
 | |
|     var pointer = indexPointerFor(code_point, indexes['jis0208']);
 | |
|     if (pointer === null) {
 | |
|       return encoderError(code_point);
 | |
|     }
 | |
|     var lead = div(pointer, 188);
 | |
|     var lead_offset = lead < 0x1F ? 0x81 : 0xC1;
 | |
|     var trail = pointer % 188;
 | |
|     var offset = trail < 0x3F ? 0x40 : 0x41;
 | |
|     return output_byte_stream.emit(lead + lead_offset, trail + offset);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['shift_jis'].getEncoder = function(options) {
 | |
|   return new ShiftJISEncoder(options);
 | |
| };
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['shift_jis'].getDecoder = function(options) {
 | |
|   return new ShiftJISDecoder(options);
 | |
| };
 | |
| 
 | |
| //
 | |
| // 13. Legacy multi-byte Korean encodings
 | |
| //
 | |
| 
 | |
| // 13.1 euc-kr
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function EUCKRDecoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   var /** @type {number} */ euckr_lead = 0x00;
 | |
|   /**
 | |
|    * @param {ByteInputStream} byte_pointer The byte stream to decode.
 | |
|    * @return {?number} The next code point decoded, or null if not enough
 | |
|    *     data exists in the input stream to decode a complete code point.
 | |
|    */
 | |
|   this.decode = function(byte_pointer) {
 | |
|     var bite = byte_pointer.get();
 | |
|     if (bite === EOF_byte && euckr_lead === 0) {
 | |
|       return EOF_code_point;
 | |
|     }
 | |
|     if (bite === EOF_byte && euckr_lead !== 0) {
 | |
|       euckr_lead = 0x00;
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     byte_pointer.offset(1);
 | |
|     if (euckr_lead !== 0x00) {
 | |
|       var lead = euckr_lead;
 | |
|       var pointer = null;
 | |
|       euckr_lead = 0x00;
 | |
| 
 | |
|       if (inRange(lead, 0x81, 0xC6)) {
 | |
|         var temp = (26 + 26 + 126) * (lead - 0x81);
 | |
|         if (inRange(bite, 0x41, 0x5A)) {
 | |
|           pointer = temp + bite - 0x41;
 | |
|         } else if (inRange(bite, 0x61, 0x7A)) {
 | |
|           pointer = temp + 26 + bite - 0x61;
 | |
|         } else if (inRange(bite, 0x81, 0xFE)) {
 | |
|           pointer = temp + 26 + 26 + bite - 0x81;
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       if (inRange(lead, 0xC7, 0xFD) && inRange(bite, 0xA1, 0xFE)) {
 | |
|         pointer = (26 + 26 + 126) * (0xC7 - 0x81) + (lead - 0xC7) * 94 +
 | |
|             (bite - 0xA1);
 | |
|       }
 | |
| 
 | |
|       var code_point = (pointer === null) ? null :
 | |
|           indexCodePointFor(pointer, indexes['euc-kr']);
 | |
|       if (pointer === null) {
 | |
|         byte_pointer.offset(-1);
 | |
|       }
 | |
|       if (code_point === null) {
 | |
|         return decoderError(fatal);
 | |
|       }
 | |
|       return code_point;
 | |
|     }
 | |
| 
 | |
|     if (inRange(bite, 0x00, 0x7F)) {
 | |
|       return bite;
 | |
|     }
 | |
| 
 | |
|     if (inRange(bite, 0x81, 0xFD)) {
 | |
|       euckr_lead = bite;
 | |
|       return null;
 | |
|     }
 | |
| 
 | |
|     return decoderError(fatal);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function EUCKREncoder(options) {
 | |
|   var fatal = options.fatal;
 | |
|   /**
 | |
|    * @param {ByteOutputStream} output_byte_stream Output byte stream.
 | |
|    * @param {CodePointInputStream} code_point_pointer Input stream.
 | |
|    * @return {number} The last byte emitted.
 | |
|    */
 | |
|   this.encode = function(output_byte_stream, code_point_pointer) {
 | |
|     var code_point = code_point_pointer.get();
 | |
|     if (code_point === EOF_code_point) {
 | |
|       return EOF_byte;
 | |
|     }
 | |
|     code_point_pointer.offset(1);
 | |
|     if (inRange(code_point, 0x0000, 0x007F)) {
 | |
|       return output_byte_stream.emit(code_point);
 | |
|     }
 | |
|     var pointer = indexPointerFor(code_point, indexes['euc-kr']);
 | |
|     if (pointer === null) {
 | |
|       return encoderError(code_point);
 | |
|     }
 | |
|     var lead, trail;
 | |
|     if (pointer < ((26 + 26 + 126) * (0xC7 - 0x81))) {
 | |
|       lead = div(pointer, (26 + 26 + 126)) + 0x81;
 | |
|       trail = pointer % (26 + 26 + 126);
 | |
|       var offset = trail < 26 ? 0x41 : trail < 26 + 26 ? 0x47 : 0x4D;
 | |
|       return output_byte_stream.emit(lead, trail + offset);
 | |
|     }
 | |
|     pointer = pointer - (26 + 26 + 126) * (0xC7 - 0x81);
 | |
|     lead = div(pointer, 94) + 0xC7;
 | |
|     trail = pointer % 94 + 0xA1;
 | |
|     return output_byte_stream.emit(lead, trail);
 | |
|   };
 | |
| }
 | |
| 
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['euc-kr'].getEncoder = function(options) {
 | |
|   return new EUCKREncoder(options);
 | |
| };
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['euc-kr'].getDecoder = function(options) {
 | |
|   return new EUCKRDecoder(options);
 | |
| };
 | |
| 
 | |
| 
 | |
| //
 | |
| // 14. Legacy miscellaneous encodings
 | |
| //
 | |
| 
 | |
| // 14.1 replacement
 | |
| 
 | |
| // Not needed - API throws TypeError
 | |
| 
 | |
| // 14.2 utf-16
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {boolean} utf16_be True if big-endian, false if little-endian.
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function UTF16Decoder(utf16_be, options) {
 | |
|   var fatal = options.fatal;
 | |
|   var /** @type {?number} */ utf16_lead_byte = null,
 | |
|       /** @type {?number} */ utf16_lead_surrogate = null;
 | |
|   /**
 | |
|    * @param {ByteInputStream} byte_pointer The byte stream to decode.
 | |
|    * @return {?number} The next code point decoded, or null if not enough
 | |
|    *     data exists in the input stream to decode a complete code point.
 | |
|    */
 | |
|   this.decode = function(byte_pointer) {
 | |
|     var bite = byte_pointer.get();
 | |
|     if (bite === EOF_byte && utf16_lead_byte === null &&
 | |
|         utf16_lead_surrogate === null) {
 | |
|       return EOF_code_point;
 | |
|     }
 | |
|     if (bite === EOF_byte && (utf16_lead_byte !== null ||
 | |
|                               utf16_lead_surrogate !== null)) {
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     byte_pointer.offset(1);
 | |
|     if (utf16_lead_byte === null) {
 | |
|       utf16_lead_byte = bite;
 | |
|       return null;
 | |
|     }
 | |
|     var code_point;
 | |
|     if (utf16_be) {
 | |
|       code_point = (utf16_lead_byte << 8) + bite;
 | |
|     } else {
 | |
|       code_point = (bite << 8) + utf16_lead_byte;
 | |
|     }
 | |
|     utf16_lead_byte = null;
 | |
|     if (utf16_lead_surrogate !== null) {
 | |
|       var lead_surrogate = utf16_lead_surrogate;
 | |
|       utf16_lead_surrogate = null;
 | |
|       if (inRange(code_point, 0xDC00, 0xDFFF)) {
 | |
|         return 0x10000 + (lead_surrogate - 0xD800) * 0x400 +
 | |
|             (code_point - 0xDC00);
 | |
|       }
 | |
|       byte_pointer.offset(-2);
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     if (inRange(code_point, 0xD800, 0xDBFF)) {
 | |
|       utf16_lead_surrogate = code_point;
 | |
|       return null;
 | |
|     }
 | |
|     if (inRange(code_point, 0xDC00, 0xDFFF)) {
 | |
|       return decoderError(fatal);
 | |
|     }
 | |
|     return code_point;
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @constructor
 | |
|  * @param {boolean} utf16_be True if big-endian, false if little-endian.
 | |
|  * @param {{fatal: boolean}} options
 | |
|  */
 | |
| function UTF16Encoder(utf16_be, options) {
 | |
|   var fatal = options.fatal;
 | |
|   /**
 | |
|    * @param {ByteOutputStream} output_byte_stream Output byte stream.
 | |
|    * @param {CodePointInputStream} code_point_pointer Input stream.
 | |
|    * @return {number} The last byte emitted.
 | |
|    */
 | |
|   this.encode = function(output_byte_stream, code_point_pointer) {
 | |
|     /**
 | |
|      * @param {number} code_unit
 | |
|      * @return {number} last byte emitted
 | |
|      */
 | |
|     function convert_to_bytes(code_unit) {
 | |
|       var byte1 = code_unit >> 8;
 | |
|       var byte2 = code_unit & 0x00FF;
 | |
|       if (utf16_be) {
 | |
|         return output_byte_stream.emit(byte1, byte2);
 | |
|       }
 | |
|       return output_byte_stream.emit(byte2, byte1);
 | |
|     }
 | |
|     var code_point = code_point_pointer.get();
 | |
|     if (code_point === EOF_code_point) {
 | |
|       return EOF_byte;
 | |
|     }
 | |
|     code_point_pointer.offset(1);
 | |
|     if (inRange(code_point, 0xD800, 0xDFFF)) {
 | |
|       encoderError(code_point);
 | |
|     }
 | |
|     if (code_point <= 0xFFFF) {
 | |
|       return convert_to_bytes(code_point);
 | |
|     }
 | |
|     var lead = div((code_point - 0x10000), 0x400) + 0xD800;
 | |
|     var trail = ((code_point - 0x10000) % 0x400) + 0xDC00;
 | |
|     convert_to_bytes(lead);
 | |
|     return convert_to_bytes(trail);
 | |
|   };
 | |
| }
 | |
| 
 | |
| // 14.3 utf-16be
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['utf-16be'].getEncoder = function(options) {
 | |
|   return new UTF16Encoder(true, options);
 | |
| };
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['utf-16be'].getDecoder = function(options) {
 | |
|   return new UTF16Decoder(true, options);
 | |
| };
 | |
| 
 | |
| // 14.4 utf-16le
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['utf-16le'].getEncoder = function(options) {
 | |
|   return new UTF16Encoder(false, options);
 | |
| };
 | |
| /** @param {{fatal: boolean}} options */
 | |
| name_to_encoding['utf-16le'].getDecoder = function(options) {
 | |
|   return new UTF16Decoder(false, options);
 | |
| };
 | |
| 
 | |
| // 14.5 x-user-defined
 | |
| // TODO: Implement this encoding.
 | |
| 
 | |
| // NOTE: currently unused
 | |
| /**
 | |
|  * @param {string} label The encoding label.
 | |
|  * @param {ByteInputStream} input_stream The byte stream to test.
 | |
|  */
 | |
| function detectEncoding(label, input_stream) {
 | |
|   if (input_stream.match([0xFF, 0xFE])) {
 | |
|     input_stream.offset(2);
 | |
|     return 'utf-16le';
 | |
|   }
 | |
|   if (input_stream.match([0xFE, 0xFF])) {
 | |
|     input_stream.offset(2);
 | |
|     return 'utf-16be';
 | |
|   }
 | |
|   if (input_stream.match([0xEF, 0xBB, 0xBF])) {
 | |
|     input_stream.offset(3);
 | |
|     return 'utf-8';
 | |
|   }
 | |
|   return label;
 | |
| }
 | |
| 
 | |
| exports.TextEncoder = TextEncoder;
 | |
| exports.TextDecoder = TextDecoder;
 | |
| exports.encodingExists = getEncoding;
 |