X-Git-Url: https://git.ayoreis.com/relax.git/blobdiff_plain/d7baaea4f90491d77b416fbf53935e090bbf7f7f..18760afc494c80d43ef455b9ef21859069936963:/relax/url-pattern-pattern-string-parser.ts?ds=inline diff --git a/relax/url-pattern-pattern-string-parser.ts b/relax/url-pattern-pattern-string-parser.ts index e69de29..662625c 100644 --- a/relax/url-pattern-pattern-string-parser.ts +++ b/relax/url-pattern-pattern-string-parser.ts @@ -0,0 +1,615 @@ +// https://tc39.es/ecma262/ + +/** https://tc39.es/ecma262/#prod-IdentifierStart */ +const IDENTIFIER_START = + /^(?:\p{ID_Start}|\$|_|\\u(?:\p{ASCII_Hex_Digit}{4}|\{(?:0?\p{ASCII_Hex_Digit}{1,5}|10\p{ASCII_Hex_Digit}{4})\}))$/u; +/** https://tc39.es/ecma262/#prod-IdentifierPart */ +const IDENTIFIER_PART = + /^(?:\p{ID_Continue}|\$|\\u(?:\p{ASCII_Hex_Digit}{4}|\{(?:0?\p{ASCII_Hex_Digit}{1,5}|10\p{ASCII_Hex_Digit}{4})\}))$/u; + +// https://infra.spec.whatwg.org + +/** https://infra.spec.whatwg.org/#ascii-code-point */ +const ASCII_CODE_POINT = /^\p{ASCII}$/u; + +// https://urlpattern.spec.whatwg.org + +/** https://urlpattern.spec.whatwg.org/#token */ +class Token { + /** https://urlpattern.spec.whatwg.org/#token-type */ + type: TokenType = "invalid-char"; + /** https://urlpattern.spec.whatwg.org/#token-index */ + index = 0; + /** https://urlpattern.spec.whatwg.org/#token-value */ + value = ""; +} + +/** https://urlpattern.spec.whatwg.org/#token-type */ +type TokenType = + | "open" + | "close" + | "regexp" + | "name" + | "char" + | "escaped-char" + | "other-modifier" + | "asterisk" + | "end" + | "invalid-char"; + +/** https://urlpattern.spec.whatwg.org/#tokenize-policy */ +type TokenizePolicy = "strict" | "lenient"; + +/** https://urlpattern.spec.whatwg.org/#tokenizer */ +class Tokenizer { + /** https://urlpattern.spec.whatwg.org/#tokenizer-input */ + input: string[] = []; + /** https://urlpattern.spec.whatwg.org/#tokenizer-policy */ + policy: TokenizePolicy = "strict"; + /** https://urlpattern.spec.whatwg.org/#tokenizer-token-list */ + readonly token_list: Token[] = []; + /** https://urlpattern.spec.whatwg.org/#tokenizer-index */ + index = 0; + /** https://urlpattern.spec.whatwg.org/#tokenizer-next-index */ + next_index = 0; + /** https://urlpattern.spec.whatwg.org/#tokenizer-code-point */ + code_point: string | null = null; + + /** https://urlpattern.spec.whatwg.org/#get-the-next-code-point */ + get_the_next_code_point() { + this.code_point = this.input[this.next_index]!; + this.next_index++; + } + + /** https://urlpattern.spec.whatwg.org/#seek-and-get-the-next-code-point */ + seek_and_get_the_next_code_point(index: number) { + this.next_index = index; + this.get_the_next_code_point(); + } + + /** https://urlpattern.spec.whatwg.org/#add-a-token */ + add_a_token( + type: TokenType, + next_position: number, + value_position: number, + value_length: number, + ) { + const token = new Token(); + token.type = type; + token.index = this.index; + token.value = this.input.slice( + value_position, + value_position + value_length, + ).join(""); + this.token_list.push(token); + this.index = next_position; + } + + /** https://urlpattern.spec.whatwg.org/#add-a-token-with-default-length */ + add_a_token_with_default_length( + type: TokenType, + next_position: number, + value_position: number, + ) { + const computed_length = next_position - value_position; + this.add_a_token(type, next_position, value_position, computed_length); + } + + /** https://urlpattern.spec.whatwg.org/#add-a-token-with-default-position-and-length */ + add_a_token_with_default_position_and_length(type: TokenType) { + this.add_a_token_with_default_length(type, this.next_index, this.index); + } + + /** https://urlpattern.spec.whatwg.org/#process-a-tokenizing-error */ + process_a_tokenizing_error(next_position: number, value_position: number) { + if (this.policy === "strict") throw new TypeError(); + this.add_a_token_with_default_length( + "invalid-char", + next_position, + value_position, + ); + } +} + +/** https://urlpattern.spec.whatwg.org/#tokenize */ +function tokenize(input: string, policy: TokenizePolicy) { + const tokenizer = new Tokenizer(); + tokenizer.input = Array.from(input); + tokenizer.policy = policy; + + while (tokenizer.index < tokenizer.input.length) { + tokenizer.seek_and_get_the_next_code_point(tokenizer.index); + + if (tokenizer.code_point === "*") { + tokenizer.add_a_token_with_default_position_and_length("asterisk"); + continue; + } + + if (tokenizer.code_point === "+" || tokenizer.code_point === "?") { + tokenizer.add_a_token_with_default_position_and_length("other-modifier"); + continue; + } + + if (tokenizer.code_point === "\\") { + if (tokenizer.index === tokenizer.input.length - 1) { + tokenizer.process_a_tokenizing_error( + tokenizer.next_index, + tokenizer.index, + ); + continue; + } + + const escaped_index = tokenizer.next_index; + tokenizer.get_the_next_code_point(); + tokenizer.add_a_token_with_default_length( + "escaped-char", + tokenizer.next_index, + escaped_index, + ); + continue; + } + + if (tokenizer.code_point === "{") { + tokenizer.add_a_token_with_default_position_and_length("open"); + continue; + } + + if (tokenizer.code_point === "}") { + tokenizer.add_a_token_with_default_position_and_length("close"); + continue; + } + + if (tokenizer.code_point === ":") { + let name_position = tokenizer.next_index; + const name_start = name_position; + + while (name_position < tokenizer.input.length) { + tokenizer.seek_and_get_the_next_code_point(name_position); + const first_code_point = name_position === name_start; + const valid_code_point = is_a_valid_name_code_point( + tokenizer.code_point, + first_code_point, + ); + if (!valid_code_point) break; + name_position = tokenizer.next_index; + } + + if (name_position <= name_start) { + tokenizer.process_a_tokenizing_error(name_start, tokenizer.index); + continue; + } + + tokenizer.add_a_token_with_default_length( + "name", + name_position, + name_start, + ); + continue; + } + + if (tokenizer.code_point === "(") { + let depth = 1; + let regexp_position = tokenizer.next_index; + const regexp_start = regexp_position; + let error = false; + + while (regexp_position < tokenizer.input.length) { + tokenizer.seek_and_get_the_next_code_point(regexp_position); + + if (!ASCII_CODE_POINT.test(tokenizer.code_point)) { + tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index); + error = true; + break; + } + + if ( + regexp_position === regexp_start && + tokenizer.code_point as string === "?" + ) { + tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index); + error = true; + break; + } + + if (tokenizer.code_point as string === "\\") { + if (regexp_position === tokenizer.input.length - 1) { + tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index); + error = true; + break; + } + + tokenizer.get_the_next_code_point(); + + if (!ASCII_CODE_POINT.test(tokenizer.code_point)) { + tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index); + error = true; + break; + } + + regexp_position = tokenizer.next_index; + continue; + } + + if (tokenizer.code_point as string === ")") { + depth--; + + if (depth === 0) { + regexp_position = tokenizer.next_index; + break; + } + } else if (tokenizer.code_point === "(") { + depth++; + + if (regexp_position === tokenizer.input.length - 1) { + tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index); + error = true; + break; + } + + const temporary_position = tokenizer.next_index; + tokenizer.get_the_next_code_point(); + + if (tokenizer.code_point as string !== "?") { + tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index); + error = true; + break; + } + + tokenizer.next_index = temporary_position; + } + + regexp_position = tokenizer.next_index; + } + + if (error) continue; + + if (depth !== 0) { + tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index); + continue; + } + + const regexp_length = regexp_position - regexp_start - 1; + + if (regexp_length === 0) { + tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index); + continue; + } + + tokenizer.add_a_token( + "regexp", + regexp_position, + regexp_start, + regexp_length, + ); + continue; + } + + tokenizer.add_a_token_with_default_position_and_length("char"); + } + + tokenizer.add_a_token_with_default_length( + "end", + tokenizer.index, + tokenizer.index, + ); + + return tokenizer.token_list; +} + +/** https://urlpattern.spec.whatwg.org/#is-a-valid-name-code-point */ +function is_a_valid_name_code_point(code_point: string, first: boolean) { + if (first) return IDENTIFIER_START.test(code_point); + return IDENTIFIER_PART.test(code_point); +} + +/** https://urlpattern.spec.whatwg.org/#part */ +export class Part { + /** https://urlpattern.spec.whatwg.org/#part-type */ + readonly type; + /** https://urlpattern.spec.whatwg.org/#part-value */ + readonly value; + /** https://urlpattern.spec.whatwg.org/#part-modifier */ + readonly modifier; + /** https://urlpattern.spec.whatwg.org/#part-name */ + readonly name; + /** https://urlpattern.spec.whatwg.org/#part-prefix */ + readonly prefix; + /** https://urlpattern.spec.whatwg.org/#part-suffix */ + readonly suffix; + + constructor( + type: PartType, + value: string, + modifier: PartModifier, + name = "", + prefix = "", + suffix = "", + ) { + this.type = type; + this.value = value; + this.modifier = modifier; + this.name = name; + this.prefix = prefix; + this.suffix = suffix; + } +} + +/** https://urlpattern.spec.whatwg.org/#part-type */ +type PartType = "fixed-text" | "regexp" | "segment-wildcard" | "full-wildcard"; +/** https://urlpattern.spec.whatwg.org/#part-modifier */ +type PartModifier = "none" | "optional" | "zero-or-more" | "one-or-more"; + +/** https://urlpattern.spec.whatwg.org/#options */ +class Options { + /** https://urlpattern.spec.whatwg.org/#options-delimiter-code-point */ + readonly delimiter_code_point; + /** https://urlpattern.spec.whatwg.org/#options-prefix-code-point */ + readonly prefix_code_point; + /** https://urlpattern.spec.whatwg.org/#options-ignore-case */ + readonly ignore_case; + + constructor( + delimiter_code_point: string, + prefix_code_point: string, + ignore_case = false, + ) { + this.delimiter_code_point = delimiter_code_point; + this.prefix_code_point = prefix_code_point; + this.ignore_case = ignore_case; + } +} + +/** https://urlpattern.spec.whatwg.org/#default-options */ +export const DEFAULT_OPTIONS = new Options("", ""); +/** https://urlpattern.spec.whatwg.org/#hostname-options */ +export const HOSTNAME_OPTIONS = new Options(".", ""); +/** https://urlpattern.spec.whatwg.org/#pathname-options */ +export const PATHNAME_OPTIONS = new Options("/", "/"); + +/** https://urlpattern.spec.whatwg.org/#encoding-callback */ +type EncodingCallback = (input: string) => string; + +/** https://urlpattern.spec.whatwg.org/#pattern-parser */ +class PatternParser { + /** https://urlpattern.spec.whatwg.org/#pattern-parser-token-list */ + token_list: Token[] = []; + /** https://urlpattern.spec.whatwg.org/#pattern-parser-encoding-callback */ + readonly encoding_callback; + /** https://urlpattern.spec.whatwg.org/#pattern-parser-segment-wildcard-regexp */ + readonly segment_wildcard_regexp; + /** https://urlpattern.spec.whatwg.org/#pattern-parser-part-list */ + readonly part_list: Part[] = []; + /** https://urlpattern.spec.whatwg.org/#pattern-parser-pending-fixed-value */ + pending_fixed_value = ""; + /** https://urlpattern.spec.whatwg.org/#pattern-parser-index */ + index = 0; + /** https://urlpattern.spec.whatwg.org/#pattern-parser-next-numeric-name */ + next_numeric_name = 0; + + constructor( + encoding_callback: EncodingCallback, + segment_wildcard_regexp: string, + ) { + this.encoding_callback = encoding_callback; + this.segment_wildcard_regexp = segment_wildcard_regexp; + } + + /** https://urlpattern.spec.whatwg.org/#try-to-consume-a-token */ + try_to_consume_a_token(type: TokenType) { + const next_token = this.token_list[this.index]!; + if (next_token.type !== type) return null; + this.index++; + return next_token; + } + + /** https://urlpattern.spec.whatwg.org/#try-to-consume-a-modifier-token */ + try_to_consume_a_modifier_token() { + let token = this.try_to_consume_a_token("other-modifier"); + if (token !== null) return token; + token = this.try_to_consume_a_token("asterisk"); + return token; + } + + /** https://urlpattern.spec.whatwg.org/#try-to-consume-a-regexp-or-wildcard-token */ + try_to_consume_a_regexp_or_wildcard_token(name_token: Token | null) { + let token = this.try_to_consume_a_token("regexp"); + + if (name_token === null && token === null) { + token = this.try_to_consume_a_token("asterisk"); + } + + return token; + } + + /** https://urlpattern.spec.whatwg.org/#consume-a-required-token */ + consume_a_required_token(type: TokenType) { + const result = this.try_to_consume_a_token(type); + if (result === null) throw new TypeError(); + return result; + } + + /** https://urlpattern.spec.whatwg.org/#consume-text */ + consume_text() { + let result = ""; + + while (true) { + let token = this.try_to_consume_a_token("char"); + token ??= this.try_to_consume_a_token("escaped-char"); + if (token === null) break; + result += token.value; + } + + return result; + } + + /** https://urlpattern.spec.whatwg.org/#maybe-add-a-part-from-the-pending-fixed-value */ + maybe_add_a_part_from_the_pending_fixed_value() { + if (this.pending_fixed_value === "") return; + const encoded_value = this.encoding_callback(this.pending_fixed_value); + this.pending_fixed_value = ""; + const part = new Part("fixed-text", encoded_value, "none"); + this.part_list.push(part); + } + + /** https://urlpattern.spec.whatwg.org/#add-a-part */ + add_a_part( + prefix: string, + name_token: Token | null, + regexp_or_wildcard_token: Token | null, + suffix: string, + modifier_token: Token | null, + ) { + let modifier: PartModifier = "none"; + if (modifier_token?.value === "?") modifier = "optional"; + else if (modifier_token?.value === "*") modifier = "zero-or-more"; + else if (modifier_token?.value === "+") modifier = "one-or-more"; + + if ( + name_token === null && regexp_or_wildcard_token === null && + modifier === "none" + ) { + this.pending_fixed_value += prefix; + return; + } + + this.maybe_add_a_part_from_the_pending_fixed_value(); + + if (name_token === null && regexp_or_wildcard_token === null) { + if (prefix === "") return; + const encoded_value = this.encoding_callback(prefix); + const part = new Part("fixed-text", encoded_value, modifier); + this.part_list.push(part); + return; + } + + let regexp_value = ""; + + if (regexp_or_wildcard_token === null) { + regexp_value = this.segment_wildcard_regexp; + } else if (regexp_or_wildcard_token.type === "asterisk") { + regexp_value = FULL_WILDCARD_REGEXP_VALUE; + } else { + regexp_value = regexp_or_wildcard_token.value; + } + + let type: PartType = "regexp"; + + if (regexp_value === this.segment_wildcard_regexp) { + type = "segment-wildcard"; + regexp_value = ""; + } else if (regexp_value === FULL_WILDCARD_REGEXP_VALUE) { + type = "full-wildcard"; + regexp_value = ""; + } + + let name = ""; + + if (name_token !== null) { + name = name_token.value; + } else if (regexp_or_wildcard_token !== null) { + name = String(this.next_numeric_name); + this.next_numeric_name++; + } + + if (this.is_a_duplicate_name(name)) throw new TypeError(); + + const encoded_prefix = this.encoding_callback(prefix); + const encoded_suffix = this.encoding_callback(suffix); + const part = new Part( + type, + regexp_value, + modifier, + name, + encoded_prefix, + encoded_suffix, + ); + this.part_list.push(part); + } + + /** https://urlpattern.spec.whatwg.org/#is-a-duplicate-name */ + is_a_duplicate_name(name: string) { + return this.part_list.some((part) => part.name === name); + } +} + +/** https://urlpattern.spec.whatwg.org/#parse-a-pattern-string */ +export function parse_a_pattern_string( + input: string, + options: Options, + encoding_callback: EncodingCallback, +) { + const parser = new PatternParser( + encoding_callback, + generate_a_segment_wildcard_regexp(options), + ); + parser.token_list = tokenize(input, "strict"); + + while (parser.index < parser.token_list.length) { + const char_token = parser.try_to_consume_a_token("char"); + let name_token = parser.try_to_consume_a_token("name"); + let regexp_or_wildcard_token = parser + .try_to_consume_a_regexp_or_wildcard_token(name_token); + + if (name_token !== null || regexp_or_wildcard_token !== null) { + let prefix = ""; + if (char_token !== null) prefix = char_token.value; + + if (prefix !== "" && prefix !== options.prefix_code_point) { + parser.pending_fixed_value += prefix; + prefix = ""; + } + + parser.maybe_add_a_part_from_the_pending_fixed_value(); + const modifier_token = parser.try_to_consume_a_modifier_token(); + parser.add_a_part( + prefix, + name_token, + regexp_or_wildcard_token, + "", + modifier_token, + ); + continue; + } + + let fixed_token = char_token; + fixed_token ??= parser.try_to_consume_a_token("escaped-char"); + + if (fixed_token !== null) { + parser.pending_fixed_value += fixed_token.value; + continue; + } + + const open_token = parser.try_to_consume_a_token("open"); + + if (open_token !== null) { + const prefix = parser.consume_text(); + name_token = parser.try_to_consume_a_token("name"); + regexp_or_wildcard_token = parser + .try_to_consume_a_regexp_or_wildcard_token(name_token); + const suffix = parser.consume_text(); + parser.consume_a_required_token("close"); + const modifier_token = parser.try_to_consume_a_modifier_token(); + parser.add_a_part( + prefix, + name_token, + regexp_or_wildcard_token, + suffix, + modifier_token, + ); + continue; + } + + parser.maybe_add_a_part_from_the_pending_fixed_value(); + parser.consume_a_required_token("end"); + } + + return parser.part_list; +} + +/** https://urlpattern.spec.whatwg.org/#full-wildcard-regexp-value */ +const FULL_WILDCARD_REGEXP_VALUE = ".*"; + +/** https://urlpattern.spec.whatwg.org/#generate-a-segment-wildcard-regexp */ +function generate_a_segment_wildcard_regexp(options: Options) { + return `[^${RegExp.escape(options.delimiter_code_point)}]+?`; +}