From a7e29ab3f614b0ff0abe81e0a34a89edbd33844d Mon Sep 17 00:00:00 2001 From: Sylvestre Bouchot Date: Sat, 10 May 2025 11:22:59 +0200 Subject: [PATCH 1/3] feat: support parameter patterns --- Readme.md | 4 +++ src/cases.spec.ts | 46 +++++++++++++++++++++++++++++++ src/index.bench.ts | 5 ++++ src/index.spec.ts | 8 ++++++ src/index.ts | 69 ++++++++++++++++++++++++++++++++++++++-------- 5 files changed, 121 insertions(+), 11 deletions(-) diff --git a/Readme.md b/Readme.md index 17bf028..566246b 100644 --- a/Readme.md +++ b/Readme.md @@ -192,6 +192,10 @@ Parameter names must be provided after `:` or `*`, and they must be a valid Java Parameter names can be wrapped in double quote characters, and this error means you forgot to close the quote character. +### Unterminated parameter pattern + +Parameter patterns must be wrapped in parentheses, and this error means you forgot to close the parentheses. + ### Express <= 4.x Path-To-RegExp breaks compatibility with Express <= `4.x` in the following ways: diff --git a/src/cases.spec.ts b/src/cases.spec.ts index 6a7aeec..2198933 100644 --- a/src/cases.spec.ts +++ b/src/cases.spec.ts @@ -100,6 +100,20 @@ export const PARSER_TESTS: ParserTestSet[] = [ { type: "text", value: "stuff" }, ]), }, + { + path: "/:locale(de|en)", + expected: new TokenData([ + { type: "text", value: "/" }, + { type: "param", name: "locale", pattern: "de|en" }, + ]), + }, + { + path: "/:foo(a|b|c)", + expected: new TokenData([ + { type: "text", value: "/" }, + { type: "param", name: "foo", pattern: "a|b|c" }, + ]), + }, ]; export const STRINGIFY_TESTS: StringifyTestSet[] = [ @@ -270,6 +284,16 @@ export const COMPILE_TESTS: CompileTestSet[] = [ { input: { test: "123/xyz" }, expected: "/123/xyz" }, ], }, + { + path: "/:locale(de|en)", + tests: [ + { input: undefined, expected: null }, + { input: {}, expected: null }, + { input: { locale: "de" }, expected: "/de" }, + { input: { locale: "en" }, expected: "/en" }, + { input: { locale: "fr" }, expected: "/fr" }, + ], + }, ]; /** @@ -376,6 +400,28 @@ export const MATCH_TESTS: MatchTestSet[] = [ ], }, + /** + * Parameter patterns. + */ + { + path: "/:locale(de|en)", + tests: [ + { input: "/de", expected: { path: "/de", params: { locale: "de" } } }, + { input: "/en", expected: { path: "/en", params: { locale: "en" } } }, + { input: "/fr", expected: false }, + { input: "/", expected: false }, + ], + }, + { + path: "/:foo(\\d)", + tests: [ + { input: "/1", expected: { path: "/1", params: { foo: "1" } } }, + { input: "/123", expected: false }, + { input: "/", expected: false }, + { input: "/foo", expected: false }, + ], + }, + /** * Case-sensitive paths. */ diff --git a/src/index.bench.ts b/src/index.bench.ts index 9d39219..1409efb 100644 --- a/src/index.bench.ts +++ b/src/index.bench.ts @@ -12,6 +12,7 @@ const PATHS: string[] = [ const STATIC_PATH_MATCH = match("/user"); const SIMPLE_PATH_MATCH = match("/user/:id"); +const SIMPLE_PATH_MATCH_WITH_PATTERN = match("/user/:id(\\d+)"); const MULTI_SEGMENT_MATCH = match("/:x/:y"); const MULTI_PATTERN_MATCH = match("/:x-:y"); const TRICKY_PATTERN_MATCH = match("/:foo|:bar|"); @@ -25,6 +26,10 @@ bench("simple path", () => { for (const path of PATHS) SIMPLE_PATH_MATCH(path); }); +bench("simple path with parameter pattern", () => { + for (const path of PATHS) SIMPLE_PATH_MATCH_WITH_PATTERN(path); +}); + bench("multi segment", () => { for (const path of PATHS) MULTI_SEGMENT_MATCH(path); }); diff --git a/src/index.spec.ts b/src/index.spec.ts index cef557f..5b5ac22 100644 --- a/src/index.spec.ts +++ b/src/index.spec.ts @@ -50,6 +50,14 @@ describe("path-to-regexp", () => { ), ); }); + + it("should throw on unterminated parameter pattern", () => { + expect(() => parse("/:foo((bar")).toThrow( + new TypeError( + "Unterminated parameter pattern at 10: https://git.new/pathToRegexpError", + ), + ); + }); }); describe("compile errors", () => { diff --git a/src/index.ts b/src/index.ts index c178797..6acb9d5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -81,7 +81,7 @@ type TokenType = interface LexToken { type: TokenType; index: number; - value: string; + value: string | { name: string; pattern?: string }; } const SIMPLE_TOKENS: Record = { @@ -119,7 +119,10 @@ function* lexer(str: string): Generator { const chars = [...str]; let i = 0; - function name() { + function name(options?: { pattern?: boolean }): { + name: string; + pattern?: string; + } { let value = ""; if (ID_START.test(chars[++i])) { @@ -153,7 +156,29 @@ function* lexer(str: string): Generator { throw new TypeError(`Missing parameter name at ${i}: ${DEBUG_URL}`); } - return value; + if (chars[i] === "(" && options?.pattern) { + let depth = 1; + let pattern = ""; + i++; + while (i < chars.length && depth > 0) { + if (chars[i] === "(") { + depth++; + } else if (chars[i] === ")") { + depth--; + } + if (depth > 0) { + pattern += chars[i++]; + } + } + if (depth !== 0) { + throw new TypeError( + `Unterminated parameter pattern at ${i}: ${DEBUG_URL}`, + ); + } + i++; + return { name: value, pattern }; + } + return { name: value }; } while (i < chars.length) { @@ -165,10 +190,14 @@ function* lexer(str: string): Generator { } else if (value === "\\") { yield { type: "ESCAPED", index: i++, value: chars[i++] }; } else if (value === ":") { - const value = name(); - yield { type: "PARAM", index: i, value }; + const value = name({ pattern: true }); + yield { + type: "PARAM", + index: i, + value, + }; } else if (value === "*") { - const value = name(); + const { name: value } = name(); yield { type: "WILDCARD", index: i, value }; } else { yield { type: "CHAR", index: i, value: chars[i++] }; @@ -191,15 +220,23 @@ class Iter { return this._peek; } - tryConsume(type: TokenType): string | undefined { + tryConsume(type: Extract): { + name: string; + pattern?: string; + }; + tryConsume(type: Exclude): string; + tryConsume( + type: TokenType, + ): string | { name: string; pattern?: string } | undefined { const token = this.peek(); if (token.type !== type) return; this._peek = undefined; // Reset after consumed. return token.value; } - consume(type: TokenType): string { - const value = this.tryConsume(type); + consume(type: TokenType): string | { name: string; pattern?: string } { + const value = + type === "PARAM" ? this.tryConsume(type) : this.tryConsume(type); if (value !== undefined) return value; const { type: nextType, index } = this.peek(); throw new TypeError( @@ -231,6 +268,7 @@ export interface Text { export interface Parameter { type: "param"; name: string; + pattern?: string; } /** @@ -287,9 +325,11 @@ export function parse(str: string, options: ParseOptions = {}): TokenData { const param = it.tryConsume("PARAM"); if (param) { + const { name, pattern } = param; tokens.push({ type: "param", - name: param, + name, + pattern, }); continue; } @@ -579,7 +619,14 @@ function toRegExp(tokens: Flattened[], delimiter: string, keys: Keys) { } if (token.type === "param") { - result += `(${negate(delimiter, isSafeSegmentParam ? "" : backtrack)}+)`; + if (token.pattern) { + result += `(${token.pattern})`; + } else { + result += `(${negate( + delimiter, + isSafeSegmentParam ? "" : backtrack, + )}+)`; + } } else { result += `([\\s\\S]+)`; } From 664eddb89d240eab5012b361057040d9de0edaff Mon Sep 17 00:00:00 2001 From: Sylvestre Bouchot Date: Sun, 11 May 2025 18:52:54 +0200 Subject: [PATCH 2/3] Re-add Pattern token, only add regex with pipe special char --- Readme.md | 10 ++++- src/cases.spec.ts | 9 ----- src/index.bench.ts | 5 --- src/index.spec.ts | 22 +++++++++-- src/index.ts | 95 +++++++++++++++++++++++++--------------------- 5 files changed, 79 insertions(+), 62 deletions(-) diff --git a/Readme.md b/Readme.md index 566246b..aa61966 100644 --- a/Readme.md +++ b/Readme.md @@ -192,10 +192,18 @@ Parameter names must be provided after `:` or `*`, and they must be a valid Java Parameter names can be wrapped in double quote characters, and this error means you forgot to close the quote character. -### Unterminated parameter pattern +### Unbalanced pattern Parameter patterns must be wrapped in parentheses, and this error means you forgot to close the parentheses. +### Only '|' is allowed as a special character in patterns + +When defining a custom pattern for a parameter (e.g., `:id()`), only the pipe character (`|`) is allowed as a special character inside the pattern. + +### Missing pattern + +When defining a custom pattern for a parameter (e.g., `:id()`), you must provide a pattern. + ### Express <= 4.x Path-To-RegExp breaks compatibility with Express <= `4.x` in the following ways: diff --git a/src/cases.spec.ts b/src/cases.spec.ts index 2198933..dcd91ca 100644 --- a/src/cases.spec.ts +++ b/src/cases.spec.ts @@ -412,15 +412,6 @@ export const MATCH_TESTS: MatchTestSet[] = [ { input: "/", expected: false }, ], }, - { - path: "/:foo(\\d)", - tests: [ - { input: "/1", expected: { path: "/1", params: { foo: "1" } } }, - { input: "/123", expected: false }, - { input: "/", expected: false }, - { input: "/foo", expected: false }, - ], - }, /** * Case-sensitive paths. diff --git a/src/index.bench.ts b/src/index.bench.ts index 1409efb..9d39219 100644 --- a/src/index.bench.ts +++ b/src/index.bench.ts @@ -12,7 +12,6 @@ const PATHS: string[] = [ const STATIC_PATH_MATCH = match("/user"); const SIMPLE_PATH_MATCH = match("/user/:id"); -const SIMPLE_PATH_MATCH_WITH_PATTERN = match("/user/:id(\\d+)"); const MULTI_SEGMENT_MATCH = match("/:x/:y"); const MULTI_PATTERN_MATCH = match("/:x-:y"); const TRICKY_PATTERN_MATCH = match("/:foo|:bar|"); @@ -26,10 +25,6 @@ bench("simple path", () => { for (const path of PATHS) SIMPLE_PATH_MATCH(path); }); -bench("simple path with parameter pattern", () => { - for (const path of PATHS) SIMPLE_PATH_MATCH_WITH_PATTERN(path); -}); - bench("multi segment", () => { for (const path of PATHS) MULTI_SEGMENT_MATCH(path); }); diff --git a/src/index.spec.ts b/src/index.spec.ts index 5b5ac22..ca59a80 100644 --- a/src/index.spec.ts +++ b/src/index.spec.ts @@ -51,10 +51,26 @@ describe("path-to-regexp", () => { ); }); - it("should throw on unterminated parameter pattern", () => { - expect(() => parse("/:foo((bar")).toThrow( + it("should throw on unbalanced pattern", () => { + expect(() => parse("/:foo((bar|sdfsdf)/")).toThrow( new TypeError( - "Unterminated parameter pattern at 10: https://git.new/pathToRegexpError", + "Unbalanced pattern at 5: https://git.new/pathToRegexpError", + ), + ); + }); + + it("should throw on not allowed characters in pattern", () => { + expect(() => parse("/:foo(\\d)")).toThrow( + new TypeError( + `Only "|" is allowed as a special character in patterns at 6: https://git.new/pathToRegexpError`, + ), + ); + }); + + it("should throw on missing pattern", () => { + expect(() => parse("//:foo()")).toThrow( + new TypeError( + "Missing pattern at 6: https://git.new/pathToRegexpError", ), ); }); diff --git a/src/index.ts b/src/index.ts index 6acb9d5..d4bf38d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,6 +3,7 @@ const NOOP_VALUE = (value: string) => value; const ID_START = /^[$_\p{ID_Start}]$/u; const ID_CONTINUE = /^[$\u200c\u200d\p{ID_Continue}]$/u; const DEBUG_URL = "https://git.new/pathToRegexpError"; +const INVALID_PATTERN_CHARS = "^$.+*?[]{}\\^"; /** * Encode a string into another string. @@ -63,6 +64,7 @@ type TokenType = | "}" | "WILDCARD" | "PARAM" + | "PATTERN" | "CHAR" | "ESCAPED" | "END" @@ -81,7 +83,7 @@ type TokenType = interface LexToken { type: TokenType; index: number; - value: string | { name: string; pattern?: string }; + value: string; } const SIMPLE_TOKENS: Record = { @@ -89,7 +91,7 @@ const SIMPLE_TOKENS: Record = { "{": "{", "}": "}", // Reserved. - "(": "(", + // "(": "(", ")": ")", "[": "[", "]": "]", @@ -119,10 +121,7 @@ function* lexer(str: string): Generator { const chars = [...str]; let i = 0; - function name(options?: { pattern?: boolean }): { - name: string; - pattern?: string; - } { + function name() { let value = ""; if (ID_START.test(chars[++i])) { @@ -156,29 +155,46 @@ function* lexer(str: string): Generator { throw new TypeError(`Missing parameter name at ${i}: ${DEBUG_URL}`); } - if (chars[i] === "(" && options?.pattern) { - let depth = 1; - let pattern = ""; - i++; - while (i < chars.length && depth > 0) { - if (chars[i] === "(") { - depth++; - } else if (chars[i] === ")") { - depth--; - } - if (depth > 0) { - pattern += chars[i++]; - } - } - if (depth !== 0) { + return value; + } + + function pattern() { + const pos = i++; + let depth = 1; + let pattern = ""; + + while (i < chars.length && depth > 0) { + const char = chars[i]; + + if (INVALID_PATTERN_CHARS.includes(char)) { throw new TypeError( - `Unterminated parameter pattern at ${i}: ${DEBUG_URL}`, + `Only "|" is allowed as a special character in patterns at ${i}: ${DEBUG_URL}`, ); } + + if (char === ")") { + depth--; + if (depth === 0) { + i++; + break; + } + } else if (char === "(") { + depth++; + } + + pattern += char; i++; - return { name: value, pattern }; } - return { name: value }; + + if (depth) { + throw new TypeError(`Unbalanced pattern at ${pos}: ${DEBUG_URL}`); + } + + if (!pattern) { + throw new TypeError(`Missing pattern at ${pos}: ${DEBUG_URL}`); + } + + return pattern; } while (i < chars.length) { @@ -190,14 +206,13 @@ function* lexer(str: string): Generator { } else if (value === "\\") { yield { type: "ESCAPED", index: i++, value: chars[i++] }; } else if (value === ":") { - const value = name({ pattern: true }); - yield { - type: "PARAM", - index: i, - value, - }; + const value = name(); + yield { type: "PARAM", index: i, value }; + } else if (value === "(") { + const value = pattern(); + yield { type: "PATTERN", index: i, value }; } else if (value === "*") { - const { name: value } = name(); + const value = name(); yield { type: "WILDCARD", index: i, value }; } else { yield { type: "CHAR", index: i, value: chars[i++] }; @@ -220,23 +235,15 @@ class Iter { return this._peek; } - tryConsume(type: Extract): { - name: string; - pattern?: string; - }; - tryConsume(type: Exclude): string; - tryConsume( - type: TokenType, - ): string | { name: string; pattern?: string } | undefined { + tryConsume(type: TokenType): string | undefined { const token = this.peek(); if (token.type !== type) return; this._peek = undefined; // Reset after consumed. return token.value; } - consume(type: TokenType): string | { name: string; pattern?: string } { - const value = - type === "PARAM" ? this.tryConsume(type) : this.tryConsume(type); + consume(type: TokenType): string { + const value = this.tryConsume(type); if (value !== undefined) return value; const { type: nextType, index } = this.peek(); throw new TypeError( @@ -325,10 +332,10 @@ export function parse(str: string, options: ParseOptions = {}): TokenData { const param = it.tryConsume("PARAM"); if (param) { - const { name, pattern } = param; + const pattern = it.tryConsume("PATTERN"); tokens.push({ type: "param", - name, + name: param, pattern, }); continue; From eaf19ce5dc89d9581a05564673f224bb4f1b98d2 Mon Sep 17 00:00:00 2001 From: Sylvestre Bouchot Date: Fri, 16 May 2025 19:48:21 +0200 Subject: [PATCH 3/3] suggestions: move validation inside toRegexp, basic regex parser with escaping support, support for wildcard --- Readme.md | 4 --- src/cases.spec.ts | 28 +++++++++++++++++++- src/index.spec.ts | 18 ++++++------- src/index.ts | 67 +++++++++++++++++++++++++++++++++++------------ 4 files changed, 86 insertions(+), 31 deletions(-) diff --git a/Readme.md b/Readme.md index aa61966..4af74bc 100644 --- a/Readme.md +++ b/Readme.md @@ -196,10 +196,6 @@ Parameter names can be wrapped in double quote characters, and this error means Parameter patterns must be wrapped in parentheses, and this error means you forgot to close the parentheses. -### Only '|' is allowed as a special character in patterns - -When defining a custom pattern for a parameter (e.g., `:id()`), only the pipe character (`|`) is allowed as a special character inside the pattern. - ### Missing pattern When defining a custom pattern for a parameter (e.g., `:id()`), you must provide a pattern. diff --git a/src/cases.spec.ts b/src/cases.spec.ts index dcd91ca..6c3b081 100644 --- a/src/cases.spec.ts +++ b/src/cases.spec.ts @@ -401,7 +401,7 @@ export const MATCH_TESTS: MatchTestSet[] = [ }, /** - * Parameter patterns. + * Patterns */ { path: "/:locale(de|en)", @@ -412,6 +412,32 @@ export const MATCH_TESTS: MatchTestSet[] = [ { input: "/", expected: false }, ], }, + { + path: "/:foo(\\\\d)", + tests: [ + { + input: "/\\d", + expected: { path: "/\\d", params: { foo: "\\d" } }, + }, + ], + }, + { + path: "/file.*ext(png|jpg)", + tests: [ + { + input: "/file.png", + expected: { path: "/file.png", params: { ext: ["png"] } }, + }, + { + input: "/file.webp", + expected: false, + }, + { + input: "/file.jpg", + expected: { path: "/file.jpg", params: { ext: ["jpg"] } }, + }, + ], + }, /** * Case-sensitive paths. diff --git a/src/index.spec.ts b/src/index.spec.ts index ca59a80..d5e8fc5 100644 --- a/src/index.spec.ts +++ b/src/index.spec.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from "vitest"; -import { parse, compile, match, stringify } from "./index.js"; +import { parse, compile, match, stringify, pathToRegexp } from "./index.js"; import { PARSER_TESTS, COMPILE_TESTS, @@ -59,14 +59,6 @@ describe("path-to-regexp", () => { ); }); - it("should throw on not allowed characters in pattern", () => { - expect(() => parse("/:foo(\\d)")).toThrow( - new TypeError( - `Only "|" is allowed as a special character in patterns at 6: https://git.new/pathToRegexpError`, - ), - ); - }); - it("should throw on missing pattern", () => { expect(() => parse("//:foo()")).toThrow( new TypeError( @@ -118,6 +110,14 @@ describe("path-to-regexp", () => { }); }); + describe("pathToRegexp errors", () => { + it("should throw on not allowed characters in pattern", () => { + expect(() => pathToRegexp("/:foo(\\d)")).toThrow( + new TypeError(`Only "|" meta character is allowed in pattern: \\d`), + ); + }); + }); + describe.each(PARSER_TESTS)( "parse $path with $options", ({ path, options, expected }) => { diff --git a/src/index.ts b/src/index.ts index d4bf38d..cbcfda0 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,8 +3,23 @@ const NOOP_VALUE = (value: string) => value; const ID_START = /^[$_\p{ID_Start}]$/u; const ID_CONTINUE = /^[$\u200c\u200d\p{ID_Continue}]$/u; const DEBUG_URL = "https://git.new/pathToRegexpError"; -const INVALID_PATTERN_CHARS = "^$.+*?[]{}\\^"; - +const PATTERN_META_CHARS = new Set([ + "*", + "+", + "?", + ".", + "^", + "$", + "|", + "\\", + "(", + ")", + "[", + "]", + "{", + "}", +]); +const ALLOWED_PATTERN_META_CHARS = new Set(["|"]); /** * Encode a string into another string. */ @@ -166,12 +181,6 @@ function* lexer(str: string): Generator { while (i < chars.length && depth > 0) { const char = chars[i]; - if (INVALID_PATTERN_CHARS.includes(char)) { - throw new TypeError( - `Only "|" is allowed as a special character in patterns at ${i}: ${DEBUG_URL}`, - ); - } - if (char === ")") { depth--; if (depth === 0) { @@ -284,6 +293,7 @@ export interface Parameter { export interface Wildcard { type: "wildcard"; name: string; + pattern?: string; } /** @@ -343,9 +353,11 @@ export function parse(str: string, options: ParseOptions = {}): TokenData { const wildcard = it.tryConsume("WILDCARD"); if (wildcard) { + const pattern = it.tryConsume("PATTERN"); tokens.push({ type: "wildcard", name: wildcard, + pattern, }); continue; } @@ -625,17 +637,14 @@ function toRegExp(tokens: Flattened[], delimiter: string, keys: Keys) { throw new TypeError(`Missing text after "${token.name}": ${DEBUG_URL}`); } - if (token.type === "param") { - if (token.pattern) { - result += `(${token.pattern})`; + if (token.pattern && isPatternSafe(token.pattern)) { + result += `(${token.pattern})`; + } else { + if (token.type === "param") { + result += `(${negate(delimiter, isSafeSegmentParam ? "" : backtrack)}+)`; } else { - result += `(${negate( - delimiter, - isSafeSegmentParam ? "" : backtrack, - )}+)`; + result += `([\\s\\S]+)`; } - } else { - result += `([\\s\\S]+)`; } keys.push(token); @@ -700,3 +709,27 @@ function isNextNameSafe(token: Token | undefined) { if (!token || token.type !== "text") return true; return !ID_CONTINUE.test(token.value[0]); } + +/** + * Validate the pattern contains only allowed meta characters. + */ +function isPatternSafe(pattern: string) { + let i = 0; + while (i < pattern.length) { + const char = pattern[i]; + + if (char === "\\" && PATTERN_META_CHARS.has(pattern[i + 1])) { + i += 2; + } else if (PATTERN_META_CHARS.has(char)) { + if (!ALLOWED_PATTERN_META_CHARS.has(char)) { + throw new TypeError( + `Only "${[...ALLOWED_PATTERN_META_CHARS].join(", ")}" meta character is allowed in pattern: ${pattern}`, + ); + } + i++; + } else { + i++; + } + } + return true; +}