You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

157 lines
4.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

const { inRange, decoderError, encoderError, isASCIICodePoint,
end_of_stream, finished, isASCIIByte, floor } = require('../utils');
const index = require('../indexes'); const { indexBig5PointerFor, indexCodePointFor } = index;
//
// 12. Legacy multi-byte Chinese (traditional) encodings
//
// 12.1 Big5
// 12.1.1 Big5 decoder
/**
* @implements {Decoder}
*/
class Big5Decoder {
constructor(options) {
const { fatal } = options
this.fatal = fatal
// Big5's decoder has an associated Big5 lead (initially 0x00).
this.Big5_lead = 0x00
}
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
*/
handler(stream, bite) {
// 1. If byte is end-of-stream and Big5 lead is not 0x00, set
// Big5 lead to 0x00 and return error.
if (bite === end_of_stream && this.Big5_lead !== 0x00) {
this.Big5_lead = 0x00
return decoderError(this.fatal)
}
// 2. If byte is end-of-stream and Big5 lead is 0x00, return
// finished.
if (bite === end_of_stream && this.Big5_lead === 0x00)
return finished
// 3. If Big5 lead is not 0x00, let lead be Big5 lead, let
// pointer be null, set Big5 lead to 0x00, and then run these
// substeps:
if (this.Big5_lead !== 0x00) {
const lead = this.Big5_lead
let pointer = null
this.Big5_lead = 0x00
// 1. Let offset be 0x40 if byte is less than 0x7F and 0x62
// otherwise.
const offset = bite < 0x7F ? 0x40 : 0x62
// 2. If byte is in the range 0x40 to 0x7E, inclusive, or 0xA1
// to 0xFE, inclusive, set pointer to (lead 0x81) × 157 +
// (byte offset).
if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE))
pointer = (lead - 0x81) * 157 + (bite - offset)
// 3. If there is a row in the table below whose first column
// is pointer, return the two code points listed in its second
// column
// Pointer | Code points
// --------+--------------
// 1133 | U+00CA U+0304
// 1135 | U+00CA U+030C
// 1164 | U+00EA U+0304
// 1166 | U+00EA U+030C
switch (pointer) {
case 1133: return [0x00CA, 0x0304]
case 1135: return [0x00CA, 0x030C]
case 1164: return [0x00EA, 0x0304]
case 1166: return [0x00EA, 0x030C]
}
// 4. Let code point be null if pointer is null and the index
// code point for pointer in index Big5 otherwise.
const code_point = (pointer === null) ? null :
indexCodePointFor(pointer, index('big5'))
// 5. If code point is null and byte is an ASCII byte, prepend
// byte to stream.
if (code_point === null && isASCIIByte(bite))
stream.prepend(bite)
// 6. If code point is null, return error.
if (code_point === null)
return decoderError(this.fatal)
// 7. Return a code point whose value is code point.
return code_point
}
// 4. If byte is an ASCII byte, return a code point whose value
// is byte.
if (isASCIIByte(bite))
return bite
// 5. If byte is in the range 0x81 to 0xFE, inclusive, set Big5
// lead to byte and return continue.
if (inRange(bite, 0x81, 0xFE)) {
this.Big5_lead = bite
return null
}
// 6. Return error.
return decoderError(this.fatal)
}
}
// 12.1.2 Big5 encoder
/**
* @implements {Encoder}
*/
class Big5Encoder {
constructor() {
/**
* @param {Stream} stream Input stream.
* @param {number} code_point Next code point read from the stream.
*/
this.handler = function(stream, code_point) {
// 1. If code point is end-of-stream, return finished.
if (code_point === end_of_stream)
return finished
// 2. If code point is an ASCII code point, return a byte whose
// value is code point.
if (isASCIICodePoint(code_point))
return code_point
// 3. Let pointer be the index Big5 pointer for code point.
const pointer = indexBig5PointerFor(code_point)
// 4. If pointer is null, return error with code point.
if (pointer === null)
return encoderError(code_point)
// 5. Let lead be floor(pointer / 157) + 0x81.
const lead = floor(pointer / 157) + 0x81
// 6. If lead is less than 0xA1, return error with code point.
if (lead < 0xA1)
return encoderError(code_point)
// 7. Let trail be pointer % 157.
const trail = pointer % 157
// 8. Let offset be 0x40 if trail is less than 0x3F and 0x62
// otherwise.
const offset = trail < 0x3F ? 0x40 : 0x62
// Return two bytes whose values are lead and trail + offset.
return [lead, trail + offset]
}
}
}
module.exports.Big5Decoder = Big5Decoder
module.exports.Big5Encoder = Big5Encoder