Skip to content

Commit 18a727a

Browse files
authored
Regex literal syntax (#6776)
* feat(syntax/parser): add basic regexp literal support * feat(syntax/printer): print regex extension in literal form * test(syntax/jsx): remove test for broken recovery case to be fixed later * text(syntax/parsing): add regex literal tests * test(syntax): use regex literal in test files necessary to pass syntax roundtrip test since printing has changed * refactor(others/js): use regex literals * docs: add changelog entry
1 parent 94ac53e commit 18a727a

33 files changed

+1303
-97
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
- Throws an instance of JavaScript's `new Error()` and adds the extension payload for `cause` option. https://github.com/rescript-lang/rescript-compiler/pull/6611
2020
- Allow free vars in types for type coercion `e :> t`. https://github.com/rescript-lang/rescript-compiler/pull/6828
2121
- Allow `private` in with constraints. https://github.com/rescript-lang/rescript-compiler/pull/6843
22+
- Add regex literals as syntax sugar for `@bs.re`. https://github.com/rescript-lang/rescript-compiler/pull/6776
2223

2324
#### :boom: Breaking Change
2425

jscomp/others/js_re.res

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ external input: result => string = "input"
5555

5656
/**
5757
Constructs a RegExp object (Js.Re.t) from a `string`.
58-
Regex literals `%re("/.../")` should generally be preferred, but `fromString`
58+
Regex literals `/.../` should generally be preferred, but `fromString`
5959
is useful when you need to dynamically construct a regex using strings,
6060
exactly like when you do so in JavaScript.
6161
@@ -112,7 +112,7 @@ set.
112112
## Examples
113113
114114
```rescript
115-
let re = %re("/ab*TODO/g")
115+
let re = /ab*TODO/g
116116
let str = "abbcdefabh"
117117
118118
let break = ref(false)
@@ -166,7 +166,7 @@ Returns `Some(Js.Re.result)` if a match is found, `None` otherwise.
166166
* Ignore case
167167
*/
168168
169-
let re = %re("/quick\s(brown).+?(jumps)/ig")
169+
let re = /quick\s(brown).+?(jumps)/ig
170170
let result = Js.Re.exec_(re, "The Quick Brown Fox Jumps Over The Lazy Dog")
171171
```
172172

jscomp/others/js_string.res

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -440,11 +440,11 @@ on MDN.
440440
## Examples
441441
442442
```rescript
443-
Js.String.match_(%re("/b[aeiou]t/"), "The better bats") == Some(["bet"])
444-
Js.String.match_(%re("/b[aeiou]t/g"), "The better bats") == Some(["bet", "bat"])
445-
Js.String.match_(%re("/(\d+)-(\d+)-(\d+)/"), "Today is 2018-04-05.") ==
443+
Js.String.match_(/b[aeiou]t/, "The better bats") == Some(["bet"])
444+
Js.String.match_(/b[aeiou]t/g, "The better bats") == Some(["bet", "bat"])
445+
Js.String.match_(/(\d+)-(\d+)-(\d+)/, "Today is 2018-04-05.") ==
446446
Some(["2018-04-05", "2018", "04", "05"])
447-
Js.String.match_(%re("/b[aeiou]g/"), "The large container.") == None
447+
Js.String.match_(/b[aeiou]g/, "The large container.") == None
448448
```
449449
*/
450450
@send
@@ -531,8 +531,8 @@ on MDN.
531531
## Examples
532532
533533
```rescript
534-
Js.String.replaceByRe(%re("/[aeiou]/g"), "x", "vowels be gone") == "vxwxls bx gxnx"
535-
Js.String.replaceByRe(%re("/(\w+) (\w+)/"), "$2, $1", "Juan Fulano") == "Fulano, Juan"
534+
Js.String.replaceByRe(/[aeiou]/g, "x", "vowels be gone") == "vxwxls bx gxnx"
535+
Js.String.replaceByRe(/(\w+) (\w+)/, "$2, $1", "Juan Fulano") == "Fulano, Juan"
536536
```
537537
*/
538538
@send
@@ -552,7 +552,7 @@ on MDN.
552552
553553
```rescript
554554
let str = "beautiful vowels"
555-
let re = %re("/[aeiou]/g")
555+
let re = /[aeiou]/g
556556
let matchFn = (matchPart, _offset, _wholeString) => Js.String.toUpperCase(matchPart)
557557
558558
Js.String.unsafeReplaceBy0(re, matchFn, str) == "bEAUtIfUl vOwEls"
@@ -576,7 +576,7 @@ on MDN.
576576
577577
```rescript
578578
let str = "Jony is 40"
579-
let re = %re("/(Jony is )\d+/g")
579+
let re = /(Jony is )\d+/g
580580
let matchFn = (_match, part1, _offset, _wholeString) => {
581581
part1 ++ "41"
582582
}
@@ -602,7 +602,7 @@ on MDN.
602602
603603
```rescript
604604
let str = "7 times 6"
605-
let re = %re("/(\d+) times (\d+)/")
605+
let re = /(\d+) times (\d+)/
606606
let matchFn = (_match, p1, p2, _offset, _wholeString) => {
607607
switch (Belt.Int.fromString(p1), Belt.Int.fromString(p2)) {
608608
| (Some(x), Some(y)) => Belt.Int.toString(x * y)
@@ -641,8 +641,8 @@ on MDN.
641641
## Examples
642642
643643
```rescript
644-
Js.String.search(%re("/\d+/"), "testing 1 2 3") == 8
645-
Js.String.search(%re("/\d+/"), "no numbers") == -1
644+
Js.String.search(/\d+/, "testing 1 2 3") == 8
645+
Js.String.search(/\d+/, "no numbers") == -1
646646
```
647647
*/
648648
@send

jscomp/others/js_string2.res

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -432,11 +432,11 @@ on MDN.
432432
## Examples
433433
434434
```rescript
435-
Js.String2.match_("The better bats", %re("/b[aeiou]t/")) == Some(["bet"])
436-
Js.String2.match_("The better bats", %re("/b[aeiou]t/g")) == Some(["bet", "bat"])
437-
Js.String2.match_("Today is 2018-04-05.", %re("/(\d+)-(\d+)-(\d+)/")) ==
435+
Js.String2.match_("The better bats", /b[aeiou]t/) == Some(["bet"])
436+
Js.String2.match_("The better bats", /b[aeiou]t/g) == Some(["bet", "bat"])
437+
Js.String2.match_("Today is 2018-04-05.", /(\d+)-(\d+)-(\d+)/) ==
438438
Some(["2018-04-05", "2018", "04", "05"])
439-
Js.String2.match_("The large container.", %re("/b[aeiou]g/")) == None
439+
Js.String2.match_("The large container.", /b[aeiou]g/) == None
440440
```
441441
*/
442442
external match_: (t, Js_re.t) => option<array<option<t>>> = "match"
@@ -516,8 +516,8 @@ on MDN.
516516
## Examples
517517
518518
```rescript
519-
Js.String2.replaceByRe("vowels be gone", %re("/[aeiou]/g"), "x") == "vxwxls bx gxnx"
520-
Js.String2.replaceByRe("Juan Fulano", %re("/(\w+) (\w+)/"), "$2, $1") == "Fulano, Juan"
519+
Js.String2.replaceByRe("vowels be gone", /[aeiou]/g, "x") == "vxwxls bx gxnx"
520+
Js.String2.replaceByRe("Juan Fulano", /(\w+) (\w+)/, "$2, $1") == "Fulano, Juan"
521521
```
522522
*/
523523
external replaceByRe: (t, Js_re.t, t) => t = "replace"
@@ -536,7 +536,7 @@ on MDN.
536536
537537
```rescript
538538
let str = "beautiful vowels"
539-
let re = %re("/[aeiou]/g")
539+
let re = /[aeiou]/g
540540
let matchFn = (matchPart, _offset, _wholeString) => Js.String2.toUpperCase(matchPart)
541541
542542
Js.String2.unsafeReplaceBy0(str, re, matchFn) == "bEAUtIfUl vOwEls"
@@ -559,7 +559,7 @@ on MDN.
559559
560560
```rescript
561561
let str = "Jony is 40"
562-
let re = %re("/(Jony is )\d+/g")
562+
let re = /(Jony is )\d+/g
563563
let matchFn = (_match, part1, _offset, _wholeString) => {
564564
part1 ++ "41"
565565
}
@@ -584,7 +584,7 @@ on MDN.
584584
585585
```rescript
586586
let str = "7 times 6"
587-
let re = %re("/(\d+) times (\d+)/")
587+
let re = /(\d+) times (\d+)/
588588
let matchFn = (_match, p1, p2, _offset, _wholeString) => {
589589
switch (Belt.Int.fromString(p1), Belt.Int.fromString(p2)) {
590590
| (Some(x), Some(y)) => Belt.Int.toString(x * y)
@@ -621,8 +621,8 @@ on MDN.
621621
## Examples
622622
623623
```rescript
624-
Js.String2.search("testing 1 2 3", %re("/\d+/")) == 8
625-
Js.String2.search("no numbers", %re("/\d+/")) == -1
624+
Js.String2.search("testing 1 2 3", /\d+/) == 8
625+
Js.String2.search("no numbers", /\d+/) == -1
626626
```
627627
*/
628628
external search: (t, Js_re.t) => int = "search"
@@ -709,7 +709,7 @@ on MDN.
709709
## Examples
710710
711711
```rescript
712-
Js.String2.splitByRe("art; bed , cog ;dad", %re("/\s*[,;]\s*TODO/")) == [
712+
Js.String2.splitByRe("art; bed , cog ;dad", /\s*[,;]\s*TODO/) == [
713713
Some("art"),
714714
Some("bed"),
715715
Some("cog"),
@@ -732,15 +732,15 @@ on MDN.
732732
## Examples
733733
734734
```rescript
735-
Js.String2.splitByReAtMost("one: two: three: four", %re("/\s*:\s*TODO/"), ~limit=3) == [
735+
Js.String2.splitByReAtMost("one: two: three: four", /\s*:\s*TODO/, ~limit=3) == [
736736
Some("one"),
737737
Some("two"),
738738
Some("three"),
739739
]
740740
741-
Js.String2.splitByReAtMost("one: two: three: four", %re("/\s*:\s*TODO/"), ~limit=0) == []
741+
Js.String2.splitByReAtMost("one: two: three: four", /\s*:\s*TODO/, ~limit=0) == []
742742
743-
Js.String2.splitByReAtMost("one: two: three: four", %re("/\s*:\s*TODO/"), ~limit=8) == [
743+
Js.String2.splitByReAtMost("one: two: three: four", /\s*:\s*TODO/, ~limit=8) == [
744744
Some("one"),
745745
Some("two"),
746746
Some("three"),

jscomp/syntax/src/res_core.ml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1891,6 +1891,22 @@ and parse_constrained_expr_region p =
18911891
| _ -> Some expr)
18921892
| _ -> None
18931893

1894+
and parse_regex p pattern flags =
1895+
let start_pos = p.Parser.start_pos in
1896+
Parser.next p;
1897+
let loc = mk_loc start_pos p.prev_end_pos in
1898+
let payload =
1899+
Parsetree.PStr
1900+
[
1901+
Ast_helper.Str.eval ~loc
1902+
(Ast_helper.Exp.constant ~loc
1903+
(Pconst_string
1904+
( "/" ^ pattern ^ "/" ^ flags,
1905+
if p.mode = ParseForTypeChecker then Some "js" else None )));
1906+
]
1907+
in
1908+
Ast_helper.Exp.extension (Location.mknoloc "re", payload)
1909+
18941910
(* Atomic expressions represent unambiguous expressions.
18951911
* This means that regardless of the context, these expressions
18961912
* are always interpreted correctly. *)
@@ -1960,6 +1976,18 @@ and parse_atomic_expr p =
19601976
Parser.err ~start_pos:p.prev_end_pos p
19611977
(Diagnostics.unexpected p.Parser.token p.breadcrumbs);
19621978
Recover.default_expr ()
1979+
| Forwardslash -> (
1980+
Parser.next_regex_token p;
1981+
match p.token with
1982+
| Regex (pattern, flags) -> parse_regex p pattern flags
1983+
| _ -> Ast_helper.Exp.extension (Location.mknoloc "re", Parsetree.PStr [])
1984+
)
1985+
| ForwardslashDot -> (
1986+
Parser.next_regex_token p;
1987+
match p.token with
1988+
| Regex (pattern, flags) -> parse_regex p ("." ^ pattern) flags
1989+
| _ -> Ast_helper.Exp.extension (Location.mknoloc "re", Parsetree.PStr [])
1990+
)
19631991
| token -> (
19641992
let err_pos = p.prev_end_pos in
19651993
Parser.err ~start_pos:err_pos p

jscomp/syntax/src/res_grammar.ml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ let is_atomic_pattern_start = function
136136
let is_atomic_expr_start = function
137137
| Token.True | False | Int _ | String _ | Float _ | Codepoint _ | Backtick
138138
| Uident _ | Lident _ | Hash | Lparen | List | Lbracket | Lbrace | LessThan
139-
| Module | Percent ->
139+
| Module | Percent | Forwardslash | ForwardslashDot ->
140140
true
141141
| _ -> false
142142

@@ -151,7 +151,7 @@ let is_expr_start = function
151151
| For | Hash | If | Int _ | Lbrace | Lbracket | LessThan | Lident _ | List
152152
| Lparen | Minus | MinusDot | Module | Percent | Plus | PlusDot | String _
153153
| Switch | True | Try | Uident _ | Underscore (* _ => doThings() *)
154-
| While ->
154+
| While | Forwardslash | ForwardslashDot ->
155155
true
156156
| _ -> false
157157

@@ -257,10 +257,10 @@ let is_jsx_child_start = is_atomic_expr_start
257257

258258
let is_block_expr_start = function
259259
| Token.Assert | At | Await | Backtick | Bang | Codepoint _ | Exception
260-
| False | Float _ | For | Forwardslash | Hash | If | Int _ | Lbrace | Lbracket
261-
| LessThan | Let | Lident _ | List | Lparen | Minus | MinusDot | Module | Open
262-
| Percent | Plus | PlusDot | String _ | Switch | True | Try | Uident _
263-
| Underscore | While ->
260+
| False | Float _ | For | Forwardslash | ForwardslashDot | Hash | If | Int _
261+
| Lbrace | Lbracket | LessThan | Let | Lident _ | List | Lparen | Minus
262+
| MinusDot | Module | Open | Percent | Plus | PlusDot | String _ | Switch
263+
| True | Try | Uident _ | Underscore | While ->
264264
true
265265
| _ -> false
266266

jscomp/syntax/src/res_parser.ml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,13 @@ let next_template_literal_token p =
106106
p.start_pos <- start_pos;
107107
p.end_pos <- end_pos
108108

109+
let next_regex_token p =
110+
let start_pos, end_pos, token = Scanner.scan_regex p.scanner in
111+
p.token <- token;
112+
p.prev_end_pos <- p.end_pos;
113+
p.start_pos <- start_pos;
114+
p.end_pos <- end_pos
115+
109116
let check_progress ~prev_end_pos ~result p =
110117
if p.end_pos == prev_end_pos then None else Some result
111118

jscomp/syntax/src/res_parser.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ val optional : t -> Token.t -> bool
3131
val next : ?prev_end_pos:Lexing.position -> t -> unit
3232
val next_unsafe : t -> unit (* Does not assert on Eof, makes no progress *)
3333
val next_template_literal_token : t -> unit
34+
val next_regex_token : t -> unit
3435
val lookahead : t -> (t -> 'a) -> 'a
3536
val err :
3637
?start_pos:Lexing.position ->

jscomp/syntax/src/res_printer.ml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3075,6 +3075,16 @@ and print_expression ~state (e : Parsetree.expression) cmt_tbl =
30753075
Doc.soft_line;
30763076
Doc.rbrace;
30773077
])
3078+
| ( {txt = "re"},
3079+
PStr
3080+
[
3081+
{
3082+
pstr_desc =
3083+
Pstr_eval
3084+
({pexp_desc = Pexp_constant (Pconst_string (expr, _))}, []);
3085+
};
3086+
] ) ->
3087+
Doc.text expr
30783088
| extension ->
30793089
print_extension ~state ~at_module_lvl:false extension cmt_tbl)
30803090
| Pexp_apply (e, [(Nolabel, {pexp_desc = Pexp_array sub_lists})])

jscomp/syntax/src/res_scanner.ml

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,64 @@ let scan_escape scanner =
537537
(* TODO: do we know it's \' ? *)
538538
Token.Codepoint {c = codepoint; original = contents}
539539

540+
let scan_regex scanner =
541+
let start_pos = position scanner in
542+
let buf = Buffer.create 0 in
543+
let first_char_offset = scanner.offset in
544+
let last_offset_in_buf = ref first_char_offset in
545+
546+
let bring_buf_up_to_date ~start_offset =
547+
let str_up_to_now =
548+
(String.sub scanner.src !last_offset_in_buf
549+
(start_offset - !last_offset_in_buf) [@doesNotRaise])
550+
in
551+
Buffer.add_string buf str_up_to_now;
552+
last_offset_in_buf := start_offset
553+
in
554+
555+
let result ~first_char_offset ~last_char_offset =
556+
if Buffer.length buf = 0 then
557+
(String.sub [@doesNotRaise]) scanner.src first_char_offset
558+
(last_char_offset - first_char_offset)
559+
else (
560+
bring_buf_up_to_date ~start_offset:last_char_offset;
561+
Buffer.contents buf)
562+
in
563+
let rec scan () =
564+
match scanner.ch with
565+
| '/' ->
566+
let last_char_offset = scanner.offset in
567+
next scanner;
568+
let pattern = result ~first_char_offset ~last_char_offset in
569+
let flags =
570+
let flags_buf = Buffer.create 0 in
571+
let rec scan_flags () =
572+
match scanner.ch with
573+
| 'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y' ->
574+
Buffer.add_char flags_buf scanner.ch;
575+
next scanner;
576+
scan_flags ()
577+
| _ -> Buffer.contents flags_buf
578+
in
579+
scan_flags ()
580+
in
581+
(pattern, flags)
582+
| ch when ch == '\n' || ch == hacky_eof_char ->
583+
let end_pos = position scanner in
584+
scanner.err ~start_pos ~end_pos (Diagnostics.message "unterminated regex");
585+
("", "")
586+
| '\\' ->
587+
next scanner;
588+
next scanner;
589+
scan ()
590+
| _ ->
591+
next scanner;
592+
scan ()
593+
in
594+
let pattern, flags = scan () in
595+
let end_pos = position scanner in
596+
(start_pos, end_pos, Token.Regex (pattern, flags))
597+
540598
let scan_single_line_comment scanner =
541599
let start_off = scanner.offset in
542600
let start_pos = position scanner in

jscomp/syntax/src/res_scanner.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,5 @@ val reconsider_less_than : t -> Res_token.t
3434

3535
val scan_template_literal_token :
3636
t -> Lexing.position * Lexing.position * Res_token.t
37+
38+
val scan_regex : t -> Lexing.position * Lexing.position * Res_token.t

jscomp/syntax/src/res_token.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ type t =
3939
| Backslash [@live]
4040
| Forwardslash
4141
| ForwardslashDot
42+
| Regex of string * string
4243
| Asterisk
4344
| AsteriskDot
4445
| Exponentiation
@@ -153,6 +154,7 @@ let to_string = function
153154
| PlusPlus -> "++"
154155
| PlusEqual -> "+="
155156
| Backslash -> "\\"
157+
| Regex (pattern, flags) -> "regex: /" ^ pattern ^ "/" ^ flags
156158
| Forwardslash -> "/"
157159
| ForwardslashDot -> "/."
158160
| Exception -> "exception"

0 commit comments

Comments
 (0)