Skip to content

Commit ad6e663

Browse files
committed
feat(syntax/parser): add basic regexp literal support
1 parent 60f3e16 commit ad6e663

File tree

7 files changed

+104
-6
lines changed

7 files changed

+104
-6
lines changed

jscomp/syntax/src/res_core.ml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1891,6 +1891,22 @@ and parse_constrained_expr_region p =
18911891
| _ -> Some expr)
18921892
| _ -> None
18931893

1894+
and parse_regex p pattern flags =
1895+
let start_pos = p.Parser.start_pos in
1896+
Parser.next p;
1897+
let loc = mk_loc start_pos p.prev_end_pos in
1898+
let payload =
1899+
Parsetree.PStr
1900+
[
1901+
Ast_helper.Str.eval ~loc
1902+
(Ast_helper.Exp.constant ~loc
1903+
(Pconst_string
1904+
( "/" ^ pattern ^ "/" ^ flags,
1905+
if p.mode = ParseForTypeChecker then Some "js" else None )));
1906+
]
1907+
in
1908+
Ast_helper.Exp.extension (Location.mknoloc "re", payload)
1909+
18941910
(* Atomic expressions represent unambiguous expressions.
18951911
* This means that regardless of the context, these expressions
18961912
* are always interpreted correctly. *)
@@ -1960,6 +1976,18 @@ and parse_atomic_expr p =
19601976
Parser.err ~start_pos:p.prev_end_pos p
19611977
(Diagnostics.unexpected p.Parser.token p.breadcrumbs);
19621978
Recover.default_expr ()
1979+
| Forwardslash -> (
1980+
Parser.next_regex_token p;
1981+
match p.token with
1982+
| Regex (pattern, flags) -> parse_regex p pattern flags
1983+
| _ -> Ast_helper.Exp.extension (Location.mknoloc "re", Parsetree.PStr [])
1984+
)
1985+
| ForwardslashDot -> (
1986+
Parser.next_regex_token p;
1987+
match p.token with
1988+
| Regex (pattern, flags) -> parse_regex p ("." ^ pattern) flags
1989+
| _ -> Ast_helper.Exp.extension (Location.mknoloc "re", Parsetree.PStr [])
1990+
)
19631991
| token -> (
19641992
let err_pos = p.prev_end_pos in
19651993
Parser.err ~start_pos:err_pos p

jscomp/syntax/src/res_grammar.ml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ let is_atomic_pattern_start = function
136136
let is_atomic_expr_start = function
137137
| Token.True | False | Int _ | String _ | Float _ | Codepoint _ | Backtick
138138
| Uident _ | Lident _ | Hash | Lparen | List | Lbracket | Lbrace | LessThan
139-
| Module | Percent ->
139+
| Module | Percent | Forwardslash | ForwardslashDot ->
140140
true
141141
| _ -> false
142142

@@ -151,7 +151,7 @@ let is_expr_start = function
151151
| For | Hash | If | Int _ | Lbrace | Lbracket | LessThan | Lident _ | List
152152
| Lparen | Minus | MinusDot | Module | Percent | Plus | PlusDot | String _
153153
| Switch | True | Try | Uident _ | Underscore (* _ => doThings() *)
154-
| While ->
154+
| While | Forwardslash | ForwardslashDot ->
155155
true
156156
| _ -> false
157157

@@ -257,10 +257,10 @@ let is_jsx_child_start = is_atomic_expr_start
257257

258258
let is_block_expr_start = function
259259
| Token.Assert | At | Await | Backtick | Bang | Codepoint _ | Exception
260-
| False | Float _ | For | Forwardslash | Hash | If | Int _ | Lbrace | Lbracket
261-
| LessThan | Let | Lident _ | List | Lparen | Minus | MinusDot | Module | Open
262-
| Percent | Plus | PlusDot | String _ | Switch | True | Try | Uident _
263-
| Underscore | While ->
260+
| False | Float _ | For | Forwardslash | ForwardslashDot | Hash | If | Int _
261+
| Lbrace | Lbracket | LessThan | Let | Lident _ | List | Lparen | Minus
262+
| MinusDot | Module | Open | Percent | Plus | PlusDot | String _ | Switch
263+
| True | Try | Uident _ | Underscore | While ->
264264
true
265265
| _ -> false
266266

jscomp/syntax/src/res_parser.ml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,13 @@ let next_template_literal_token p =
106106
p.start_pos <- start_pos;
107107
p.end_pos <- end_pos
108108

109+
let next_regex_token p =
110+
let start_pos, end_pos, token = Scanner.scan_regex p.scanner in
111+
p.token <- token;
112+
p.prev_end_pos <- p.end_pos;
113+
p.start_pos <- start_pos;
114+
p.end_pos <- end_pos
115+
109116
let check_progress ~prev_end_pos ~result p =
110117
if p.end_pos == prev_end_pos then None else Some result
111118

jscomp/syntax/src/res_parser.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ val optional : t -> Token.t -> bool
3131
val next : ?prev_end_pos:Lexing.position -> t -> unit
3232
val next_unsafe : t -> unit (* Does not assert on Eof, makes no progress *)
3333
val next_template_literal_token : t -> unit
34+
val next_regex_token : t -> unit
3435
val lookahead : t -> (t -> 'a) -> 'a
3536
val err :
3637
?start_pos:Lexing.position ->

jscomp/syntax/src/res_scanner.ml

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,64 @@ let scan_escape scanner =
537537
(* TODO: do we know it's \' ? *)
538538
Token.Codepoint {c = codepoint; original = contents}
539539

540+
let scan_regex scanner =
541+
let start_pos = position scanner in
542+
let buf = Buffer.create 0 in
543+
let first_char_offset = scanner.offset in
544+
let last_offset_in_buf = ref first_char_offset in
545+
546+
let bring_buf_up_to_date ~start_offset =
547+
let str_up_to_now =
548+
(String.sub scanner.src !last_offset_in_buf
549+
(start_offset - !last_offset_in_buf) [@doesNotRaise])
550+
in
551+
Buffer.add_string buf str_up_to_now;
552+
last_offset_in_buf := start_offset
553+
in
554+
555+
let result ~first_char_offset ~last_char_offset =
556+
if Buffer.length buf = 0 then
557+
(String.sub [@doesNotRaise]) scanner.src first_char_offset
558+
(last_char_offset - first_char_offset)
559+
else (
560+
bring_buf_up_to_date ~start_offset:last_char_offset;
561+
Buffer.contents buf)
562+
in
563+
let rec scan () =
564+
match scanner.ch with
565+
| '/' ->
566+
let last_char_offset = scanner.offset in
567+
next scanner;
568+
let pattern = result ~first_char_offset ~last_char_offset in
569+
let flags =
570+
let flags_buf = Buffer.create 0 in
571+
let rec scan_flags () =
572+
match scanner.ch with
573+
| 'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y' ->
574+
Buffer.add_char flags_buf scanner.ch;
575+
next scanner;
576+
scan_flags ()
577+
| _ -> Buffer.contents flags_buf
578+
in
579+
scan_flags ()
580+
in
581+
(pattern, flags)
582+
| ch when ch == '\n' || ch == hacky_eof_char ->
583+
let end_pos = position scanner in
584+
scanner.err ~start_pos ~end_pos (Diagnostics.message "unterminated regex");
585+
("", "")
586+
| '\\' ->
587+
next scanner;
588+
next scanner;
589+
scan ()
590+
| _ ->
591+
next scanner;
592+
scan ()
593+
in
594+
let pattern, flags = scan () in
595+
let end_pos = position scanner in
596+
(start_pos, end_pos, Token.Regex (pattern, flags))
597+
540598
let scan_single_line_comment scanner =
541599
let start_off = scanner.offset in
542600
let start_pos = position scanner in

jscomp/syntax/src/res_scanner.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,5 @@ val reconsider_less_than : t -> Res_token.t
3434

3535
val scan_template_literal_token :
3636
t -> Lexing.position * Lexing.position * Res_token.t
37+
38+
val scan_regex : t -> Lexing.position * Lexing.position * Res_token.t

jscomp/syntax/src/res_token.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ type t =
3939
| Backslash [@live]
4040
| Forwardslash
4141
| ForwardslashDot
42+
| Regex of string * string
4243
| Asterisk
4344
| AsteriskDot
4445
| Exponentiation
@@ -153,6 +154,7 @@ let to_string = function
153154
| PlusPlus -> "++"
154155
| PlusEqual -> "+="
155156
| Backslash -> "\\"
157+
| Regex (pattern, flags) -> "regex: /" ^ pattern ^ "/" ^ flags
156158
| Forwardslash -> "/"
157159
| ForwardslashDot -> "/."
158160
| Exception -> "exception"

0 commit comments

Comments
 (0)