N3RD/JN/CATJS/lib/utf16.js

139 lines
4.6 KiB
JavaScript
Raw Normal View History

2024-05-22 19:54:50 +08:00
import { inRange, decoderError, end_of_stream, finished, convertCodeUnitToBytes } from './text_decoder_utils.js'
// 15.2.1 shared utf-16 decoder
/**
* @implements {Decoder}
*/
export class UTF16Decoder {
/**
* @param {boolean} utf16_be True if big-endian, false if little-endian.
* @param {{fatal: boolean}} options
*/
constructor(utf16_be, options) {
const { fatal } = options
this.utf16_be = utf16_be
this.fatal = fatal
this.utf16_lead_byte = null
this.utf16_lead_surrogate = null
}
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
*/
handler(stream, bite) {
// 1. If byte is end-of-stream and either utf-16 lead byte or
// utf-16 lead surrogate is not null, set utf-16 lead byte and
// utf-16 lead surrogate to null, and return error.
if (bite === end_of_stream && (this.utf16_lead_byte !== null ||
this.utf16_lead_surrogate !== null)) {
return decoderError(this.fatal)
}
// 2. If byte is end-of-stream and utf-16 lead byte and utf-16
// lead surrogate are null, return finished.
if (bite === end_of_stream && this.utf16_lead_byte === null &&
this.utf16_lead_surrogate === null) {
return finished
}
// 3. If utf-16 lead byte is null, set utf-16 lead byte to byte
// and return continue.
if (this.utf16_lead_byte === null) {
this.utf16_lead_byte = bite
return null
}
// 4. Let code unit be the result of:
let code_unit
if (this.utf16_be) {
// utf-16be decoder flag is set
// (utf-16 lead byte << 8) + byte.
code_unit = (this.utf16_lead_byte << 8) + bite
} else {
// utf-16be decoder flag is unset
// (byte << 8) + utf-16 lead byte.
code_unit = (bite << 8) + this.utf16_lead_byte
}
// Then set utf-16 lead byte to null.
this.utf16_lead_byte = null
// 5. If utf-16 lead surrogate is not null, let lead surrogate
// be utf-16 lead surrogate, set utf-16 lead surrogate to null,
// and then run these substeps:
if (this.utf16_lead_surrogate !== null) {
const lead_surrogate = this.utf16_lead_surrogate
this.utf16_lead_surrogate = null
// 1. If code unit is in the range U+DC00 to U+DFFF,
// inclusive, return a code point whose value is 0x10000 +
// ((lead surrogate 0xD800) << 10) + (code unit 0xDC00).
if (inRange(code_unit, 0xDC00, 0xDFFF)) {
return 0x10000 + (lead_surrogate - 0xD800) * 0x400 +
(code_unit - 0xDC00)
}
// 2. Prepend the sequence resulting of converting code unit
// to bytes using utf-16be decoder flag to stream and return
// error.
stream.prepend(convertCodeUnitToBytes(code_unit, this.utf16_be))
return decoderError(this.fatal)
}
// 6. If code unit is in the range U+D800 to U+DBFF, inclusive,
// set utf-16 lead surrogate to code unit and return continue.
if (inRange(code_unit, 0xD800, 0xDBFF)) {
this.utf16_lead_surrogate = code_unit
return null
}
// 7. If code unit is in the range U+DC00 to U+DFFF, inclusive,
// return error.
if (inRange(code_unit, 0xDC00, 0xDFFF))
return decoderError(this.fatal)
// 8. Return code point code unit.
return code_unit
}
}
// 15.2.2 shared utf-16 encoder
/**
* @implements {Encoder}
*/
export class UTF16Encoder {
/**
* @param {boolean} [utf16_be] True if big-endian, false if little-endian.
*/
constructor(utf16_be = false) {
this.utf16_be = utf16_be
}
/**
* @param {Stream} stream Input stream.
* @param {number} code_point Next code point read from the stream.
*/
handler(stream, code_point) {
// 1. If code point is end-of-stream, return finished.
if (code_point === end_of_stream)
return finished
// 2. If code point is in the range U+0000 to U+FFFF, inclusive,
// return the sequence resulting of converting code point to
// bytes using utf-16be encoder flag.
if (inRange(code_point, 0x0000, 0xFFFF))
return convertCodeUnitToBytes(code_point, this.utf16_be)
// 3. Let lead be ((code point 0x10000) >> 10) + 0xD800,
// converted to bytes using utf-16be encoder flag.
const lead = convertCodeUnitToBytes(
((code_point - 0x10000) >> 10) + 0xD800, this.utf16_be)
// 4. Let trail be ((code point 0x10000) & 0x3FF) + 0xDC00,
// converted to bytes using utf-16be encoder flag.
const trail = convertCodeUnitToBytes(
((code_point - 0x10000) & 0x3FF) + 0xDC00, this.utf16_be)
// 5. Return a byte sequence of lead followed by trail.
return lead.concat(trail)
}
}