You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					102 lines
				
				3.6 KiB
			
		
		
			
		
	
	
					102 lines
				
				3.6 KiB
			| 
								 
											2 years ago
										 
									 | 
							
								import { EndOfStreamError } from 'peek-readable';
							 | 
						||
| 
								 | 
							
								import { Buffer } from 'node:buffer';
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * Core tokenizer
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								export class AbstractTokenizer {
							 | 
						||
| 
								 | 
							
								    constructor(fileInfo) {
							 | 
						||
| 
								 | 
							
								        /**
							 | 
						||
| 
								 | 
							
								         * Tokenizer-stream position
							 | 
						||
| 
								 | 
							
								         */
							 | 
						||
| 
								 | 
							
								        this.position = 0;
							 | 
						||
| 
								 | 
							
								        this.numBuffer = new Uint8Array(8);
							 | 
						||
| 
								 | 
							
								        this.fileInfo = fileInfo ? fileInfo : {};
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Read a token from the tokenizer-stream
							 | 
						||
| 
								 | 
							
								     * @param token - The token to read
							 | 
						||
| 
								 | 
							
								     * @param position - If provided, the desired position in the tokenizer-stream
							 | 
						||
| 
								 | 
							
								     * @returns Promise with token data
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    async readToken(token, position = this.position) {
							 | 
						||
| 
								 | 
							
								        const uint8Array = Buffer.alloc(token.len);
							 | 
						||
| 
								 | 
							
								        const len = await this.readBuffer(uint8Array, { position });
							 | 
						||
| 
								 | 
							
								        if (len < token.len)
							 | 
						||
| 
								 | 
							
								            throw new EndOfStreamError();
							 | 
						||
| 
								 | 
							
								        return token.get(uint8Array, 0);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Peek a token from the tokenizer-stream.
							 | 
						||
| 
								 | 
							
								     * @param token - Token to peek from the tokenizer-stream.
							 | 
						||
| 
								 | 
							
								     * @param position - Offset where to begin reading within the file. If position is null, data will be read from the current file position.
							 | 
						||
| 
								 | 
							
								     * @returns Promise with token data
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    async peekToken(token, position = this.position) {
							 | 
						||
| 
								 | 
							
								        const uint8Array = Buffer.alloc(token.len);
							 | 
						||
| 
								 | 
							
								        const len = await this.peekBuffer(uint8Array, { position });
							 | 
						||
| 
								 | 
							
								        if (len < token.len)
							 | 
						||
| 
								 | 
							
								            throw new EndOfStreamError();
							 | 
						||
| 
								 | 
							
								        return token.get(uint8Array, 0);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Read a numeric token from the stream
							 | 
						||
| 
								 | 
							
								     * @param token - Numeric token
							 | 
						||
| 
								 | 
							
								     * @returns Promise with number
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    async readNumber(token) {
							 | 
						||
| 
								 | 
							
								        const len = await this.readBuffer(this.numBuffer, { length: token.len });
							 | 
						||
| 
								 | 
							
								        if (len < token.len)
							 | 
						||
| 
								 | 
							
								            throw new EndOfStreamError();
							 | 
						||
| 
								 | 
							
								        return token.get(this.numBuffer, 0);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Read a numeric token from the stream
							 | 
						||
| 
								 | 
							
								     * @param token - Numeric token
							 | 
						||
| 
								 | 
							
								     * @returns Promise with number
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    async peekNumber(token) {
							 | 
						||
| 
								 | 
							
								        const len = await this.peekBuffer(this.numBuffer, { length: token.len });
							 | 
						||
| 
								 | 
							
								        if (len < token.len)
							 | 
						||
| 
								 | 
							
								            throw new EndOfStreamError();
							 | 
						||
| 
								 | 
							
								        return token.get(this.numBuffer, 0);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Ignore number of bytes, advances the pointer in under tokenizer-stream.
							 | 
						||
| 
								 | 
							
								     * @param length - Number of bytes to ignore
							 | 
						||
| 
								 | 
							
								     * @return resolves the number of bytes ignored, equals length if this available, otherwise the number of bytes available
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    async ignore(length) {
							 | 
						||
| 
								 | 
							
								        if (this.fileInfo.size !== undefined) {
							 | 
						||
| 
								 | 
							
								            const bytesLeft = this.fileInfo.size - this.position;
							 | 
						||
| 
								 | 
							
								            if (length > bytesLeft) {
							 | 
						||
| 
								 | 
							
								                this.position += bytesLeft;
							 | 
						||
| 
								 | 
							
								                return bytesLeft;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        this.position += length;
							 | 
						||
| 
								 | 
							
								        return length;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    async close() {
							 | 
						||
| 
								 | 
							
								        // empty
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    normalizeOptions(uint8Array, options) {
							 | 
						||
| 
								 | 
							
								        if (options && options.position !== undefined && options.position < this.position) {
							 | 
						||
| 
								 | 
							
								            throw new Error('`options.position` must be equal or greater than `tokenizer.position`');
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        if (options) {
							 | 
						||
| 
								 | 
							
								            return {
							 | 
						||
| 
								 | 
							
								                mayBeLess: options.mayBeLess === true,
							 | 
						||
| 
								 | 
							
								                offset: options.offset ? options.offset : 0,
							 | 
						||
| 
								 | 
							
								                length: options.length ? options.length : (uint8Array.length - (options.offset ? options.offset : 0)),
							 | 
						||
| 
								 | 
							
								                position: options.position ? options.position : this.position
							 | 
						||
| 
								 | 
							
								            };
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        return {
							 | 
						||
| 
								 | 
							
								            mayBeLess: false,
							 | 
						||
| 
								 | 
							
								            offset: 0,
							 | 
						||
| 
								 | 
							
								            length: uint8Array.length,
							 | 
						||
| 
								 | 
							
								            position: this.position
							 | 
						||
| 
								 | 
							
								        };
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								}
							 |