Skip to content
This repository was archived by the owner on Jun 15, 2023. It is now read-only.

Commit dc71166

Browse files
committed
Hide the hacky eof char implementation
Makes the scanner a bit safer
1 parent 570f07e commit dc71166

File tree

4 files changed

+25
-19
lines changed

4 files changed

+25
-19
lines changed

src/res_character_codes.ml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
let eof = Char.unsafe_chr (-1)
2-
31
let lineSeparator = Char.unsafe_chr 0x2028
42
let paragraphSeparator = Char.unsafe_chr 0x2029
53

src/res_scanner.ml

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,15 @@ module CharacterCodes = Res_character_codes
22
module Diagnostics = Res_diagnostics
33
module Token = Res_token
44
module Comment = Res_comment
5+
56
type mode = Jsx | Diamond
67

8+
(* We hide the implementation detail of the scanner reading character. Our char
9+
will also contain the special -1 value to indicate end-of-file. This isn't
10+
ideal; we should clean this up *)
11+
let hackyEOFChar = Char.unsafe_chr (-1)
12+
type charEncoding = Char.t
13+
714
type t = {
815
filename: string;
916
src: bytes;
@@ -12,7 +19,7 @@ type t = {
1219
-> endPos: Lexing.position
1320
-> Diagnostics.category
1421
-> unit;
15-
mutable ch: Char.t; (* current character *)
22+
mutable ch: charEncoding; (* current character *)
1623
mutable offset: int; (* character offset *)
1724
mutable rdOffset: int; (* reading offset (position after current character) *)
1825
mutable lineOffset: int; (* current line offset *)
@@ -61,14 +68,14 @@ let next scanner =
6168
scanner.ch <- ch
6269
) else (
6370
scanner.offset <- Bytes.length scanner.src;
64-
scanner.ch <- CharacterCodes.eof
71+
scanner.ch <- hackyEOFChar
6572
)
6673

6774
let peek scanner =
6875
if scanner.rdOffset < Bytes.length scanner.src then
6976
Bytes.unsafe_get scanner.src scanner.rdOffset
7077
else
71-
CharacterCodes.eof
78+
hackyEOFChar
7279

7380
let make ?(line=1) ~filename b =
7481
let scanner = {
@@ -199,7 +206,7 @@ let scanNumber scanner =
199206
(Diagnostics.message msg)
200207
);
201208
next scanner;
202-
Some ( ch)
209+
Some ch
203210
| _ ->
204211
None
205212
in
@@ -214,7 +221,7 @@ let scanExoticIdentifier scanner =
214221
let startPos = position scanner in
215222

216223
let rec scan () =
217-
if scanner.ch == CharacterCodes.eof then
224+
if scanner.ch == hackyEOFChar then
218225
let endPos = position scanner in
219226
scanner.err ~startPos ~endPos (Diagnostics.message "Did you forget a \" here?")
220227
else if scanner.ch == '"' then (
@@ -273,7 +280,7 @@ let scanStringEscapeSequence ~startPos scanner =
273280
let d = CharacterCodes.digitValue scanner.ch in
274281
if d >= base then
275282
let pos = position scanner in
276-
let msg = if scanner.ch == CharacterCodes.eof then
283+
let msg = if scanner.ch == hackyEOFChar then
277284
"unclosed escape sequence"
278285
else "unknown escape sequence"
279286
in
@@ -295,7 +302,7 @@ let scanString scanner =
295302

296303
let startPos = position scanner in
297304
let rec scan () =
298-
if scanner.ch == CharacterCodes.eof then
305+
if scanner.ch == hackyEOFChar then
299306
let endPos = position scanner in
300307
scanner.err ~startPos ~endPos Diagnostics.unclosedString
301308
else if scanner.ch == '"' then (
@@ -352,7 +359,7 @@ let scanEscape scanner =
352359
let scanSingleLineComment scanner =
353360
let startOff = scanner.offset in
354361
let startPos = position scanner in
355-
while scanner.ch != CharacterCodes.eof && not (CharacterCodes.isLineBreak scanner.ch) do
362+
while scanner.ch != hackyEOFChar && not (CharacterCodes.isLineBreak scanner.ch) do
356363
next scanner
357364
done;
358365
let endPos = position scanner in
@@ -371,7 +378,7 @@ let scanMultiLineComment scanner =
371378
next scanner;
372379
next scanner;
373380
if depth > 0 then scan ~depth:(depth - 1) () else ()
374-
) else if scanner.ch == CharacterCodes.eof then (
381+
) else if scanner.ch == hackyEOFChar then (
375382
let endPos = position scanner in
376383
scanner.err ~startPos ~endPos Diagnostics.unclosedComment
377384
) else if scanner.ch == '/'
@@ -405,7 +412,7 @@ let scanTemplateLiteralToken scanner =
405412
let startPos = position scanner in
406413

407414
let rec scan () =
408-
if scanner.ch == CharacterCodes.eof then (
415+
if scanner.ch == hackyEOFChar then (
409416
let endPos = position scanner in
410417
scanner.err ~startPos ~endPos Diagnostics.unclosedTemplate;
411418
Token.TemplateTail(
@@ -465,7 +472,7 @@ let rec scan scanner =
465472
)
466473
else begin
467474
next scanner;
468-
if ch == CharacterCodes.eof then Token.Eof
475+
if ch == hackyEOFChar then Token.Eof
469476
else match ch with
470477
| '.' ->
471478
if scanner.ch == '.' then (
@@ -698,21 +705,21 @@ let isBinaryOp src startCnum endCnum =
698705
in
699706
let rightOk =
700707
let c =
701-
if endCnum == Bytes.length src then CharacterCodes.eof
708+
if endCnum == Bytes.length src then hackyEOFChar
702709
else endCnum |> (Bytes.get [@doesNotRaise]) src
703710
in
704711
c == ' ' ||
705712
c == '\t' ||
706713
CharacterCodes.isLineBreak c ||
707-
c == CharacterCodes.eof
714+
c == hackyEOFChar
708715
in
709716
leftOk && rightOk
710717

711718
(* Assume `{` consumed, advances the scanner towards the ends of Reason quoted strings. (for conversion)
712719
* In {| foo bar |} the scanner will be advanced until after the `|}` *)
713720
let tryAdvanceQuotedString scanner =
714721
let rec scanContents tag () =
715-
if scanner.ch == CharacterCodes.eof then (
722+
if scanner.ch == hackyEOFChar then (
716723
()
717724
) else if scanner.ch == '|' then (
718725
next scanner;

src/res_scanner.mli

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
type mode = Jsx | Diamond
22

3+
type charEncoding
4+
35
type t = {
46
filename: string;
57
src: bytes;
@@ -8,7 +10,7 @@ type t = {
810
-> endPos: Lexing.position
911
-> Res_diagnostics.category
1012
-> unit;
11-
mutable ch: Char.t; (* current character *)
13+
mutable ch: charEncoding; (* current character *)
1214
mutable offset: int; (* character offset *)
1315
mutable rdOffset: int; (* reading offset (position after current character) *)
1416
mutable lineOffset: int; (* current line offset *)

src/res_token.ml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
module Comment = Res_comment
2-
module CharacterCodes = Res_character_codes
32

43
type t =
54
| Open
@@ -212,7 +211,7 @@ let isKeyword = function
212211
let lookupKeyword str =
213212
try keywordTable str with
214213
| Not_found ->
215-
if CharacterCodes.isUpperCase ((str.[0] [@doesNotRaise])) then
214+
if Res_character_codes.isUpperCase ((str.[0] [@doesNotRaise])) then
216215
Uident str
217216
else Lident str
218217

0 commit comments

Comments
 (0)