@@ -28,11 +28,7 @@ class ScriptScanner {
28
28
29
29
private final Pattern whitespace = Pattern .compile ("\\ s+" );
30
30
31
- private final Pattern identifier = Pattern .compile ("[a-z][a-z0-9_]*" , Pattern .CASE_INSENSITIVE );
32
-
33
- private final Pattern singleQuotedString = Pattern .compile ("'(\\ \\ '|[^'])*'" );
34
-
35
- private final Pattern ansiQuotedString = Pattern .compile ("\" (\\ \\ \" |[^\" ])*\" " );
31
+ private final Pattern identifier = Pattern .compile ("[a-z][a-z0-9_$]*" , Pattern .CASE_INSENSITIVE );
36
32
37
33
private final Pattern dollarQuotedStringDelimiter = Pattern .compile ("\\ $\\ w*\\ $" );
38
34
@@ -54,7 +50,8 @@ private boolean matches(String substring) {
54
50
55
51
private boolean matches (Pattern regexp ) {
56
52
Matcher m = regexp .matcher (script );
57
- if (m .find (offset ) && m .start () == offset ) {
53
+ m .region (offset , script .length ());
54
+ if (m .lookingAt ()) {
58
55
currentMatch = m .group ();
59
56
offset = m .end ();
60
57
return true ;
@@ -99,6 +96,26 @@ private boolean matchesMultilineComment() {
99
96
return false ;
100
97
}
101
98
99
+ private boolean matchesQuotedString (final char quote ) {
100
+ if (script .charAt (offset ) == quote ) {
101
+ boolean escaped = false ;
102
+ for (int i = offset + 1 ; i < script .length (); i ++) {
103
+ char c = script .charAt (i );
104
+ if (escaped ) {
105
+ //just skip the escaped character and drop the flag
106
+ escaped = false ;
107
+ } else if (c == '\\' ) {
108
+ escaped = true ;
109
+ } else if (c == quote ) {
110
+ currentMatch = script .substring (offset , i + 1 );
111
+ offset = i + 1 ;
112
+ return true ;
113
+ }
114
+ }
115
+ }
116
+ return false ;
117
+ }
118
+
102
119
private boolean matchesDollarQuotedString () {
103
120
//Matches $<tag>$ .... $<tag>$
104
121
if (matches (dollarQuotedStringDelimiter )) {
@@ -124,7 +141,7 @@ Lexem next() {
124
141
return Lexem .SEPARATOR ;
125
142
} else if (matchesSingleLineComment () || matchesMultilineComment ()) {
126
143
return Lexem .COMMENT ;
127
- } else if (matches ( singleQuotedString ) || matches ( ansiQuotedString ) || matchesDollarQuotedString ()) {
144
+ } else if (matchesQuotedString ( '\'' ) || matchesQuotedString ( '"' ) || matchesDollarQuotedString ()) {
128
145
return Lexem .QUOTED_STRING ;
129
146
} else if (matches (identifier )) {
130
147
return Lexem .IDENTIFIER ;
0 commit comments