@@ -74,6 +74,9 @@ fileprivate extension Compiler.ByteCodeGen {
74
74
emitMatchScalar ( s)
75
75
}
76
76
77
+ case let . characterClass( cc) :
78
+ emitCharacterClass ( cc)
79
+
77
80
case let . assertion( kind) :
78
81
try emitAssertion ( kind)
79
82
@@ -148,147 +151,24 @@ fileprivate extension Compiler.ByteCodeGen {
148
151
}
149
152
}
150
153
151
- mutating func emitStartOfLine( ) {
152
- builder. buildAssert { [ semanticLevel = options. semanticLevel]
153
- ( _, _, input, pos, subjectBounds) in
154
- if pos == subjectBounds. lowerBound { return true }
155
- switch semanticLevel {
156
- case . graphemeCluster:
157
- return input [ input. index ( before: pos) ] . isNewline
158
- case . unicodeScalar:
159
- return input. unicodeScalars [ input. unicodeScalars. index ( before: pos) ] . isNewline
160
- }
161
- }
162
- }
163
-
164
- mutating func emitEndOfLine( ) {
165
- builder. buildAssert { [ semanticLevel = options. semanticLevel]
166
- ( _, _, input, pos, subjectBounds) in
167
- if pos == subjectBounds. upperBound { return true }
168
- switch semanticLevel {
169
- case . graphemeCluster:
170
- return input [ pos] . isNewline
171
- case . unicodeScalar:
172
- return input. unicodeScalars [ pos] . isNewline
173
- }
174
- }
175
- }
176
-
177
154
mutating func emitAssertion(
178
155
_ kind: DSLTree . Atom . Assertion
179
156
) throws {
180
- // FIXME: Depends on API model we have... We may want to
181
- // think through some of these with API interactions in mind
182
- //
183
- // This might break how we use `bounds` for both slicing
184
- // and things like `firstIndex`, that is `firstIndex` may
185
- // need to supply both a slice bounds and a per-search bounds.
186
- switch kind {
187
- case . startOfSubject:
188
- builder. buildAssert { ( _, _, input, pos, subjectBounds) in
189
- pos == subjectBounds. lowerBound
190
- }
191
-
192
- case . endOfSubjectBeforeNewline:
193
- builder. buildAssert { [ semanticLevel = options. semanticLevel]
194
- ( _, _, input, pos, subjectBounds) in
195
- if pos == subjectBounds. upperBound { return true }
196
- switch semanticLevel {
197
- case . graphemeCluster:
198
- return input. index ( after: pos) == subjectBounds. upperBound
199
- && input [ pos] . isNewline
200
- case . unicodeScalar:
201
- return input. unicodeScalars. index ( after: pos) == subjectBounds. upperBound
202
- && input. unicodeScalars [ pos] . isNewline
203
- }
204
- }
205
-
206
- case . endOfSubject:
207
- builder. buildAssert { ( _, _, input, pos, subjectBounds) in
208
- pos == subjectBounds. upperBound
209
- }
210
-
211
- case . resetStartOfMatch:
212
- // FIXME: Figure out how to communicate this out
157
+ if kind == . resetStartOfMatch {
213
158
throw Unsupported ( #"\K (reset/keep assertion)"# )
214
-
215
- case . firstMatchingPositionInSubject:
216
- // TODO: We can probably build a nice model with API here
217
-
218
- // FIXME: This needs to be based on `searchBounds`,
219
- // not the `subjectBounds` given as an argument here
220
- builder. buildAssert { ( _, _, input, pos, subjectBounds) in false }
221
-
222
- case . textSegment:
223
- builder. buildAssert { ( _, _, input, pos, _) in
224
- // FIXME: Grapheme or word based on options
225
- input. isOnGraphemeClusterBoundary ( pos)
226
- }
227
-
228
- case . notTextSegment:
229
- builder. buildAssert { ( _, _, input, pos, _) in
230
- // FIXME: Grapheme or word based on options
231
- !input. isOnGraphemeClusterBoundary ( pos)
232
- }
233
-
234
- case . startOfLine:
235
- emitStartOfLine ( )
236
-
237
- case . endOfLine:
238
- emitEndOfLine ( )
239
-
240
- case . caretAnchor:
241
- if options. anchorsMatchNewlines {
242
- emitStartOfLine ( )
243
- } else {
244
- builder. buildAssert { ( _, _, input, pos, subjectBounds) in
245
- pos == subjectBounds. lowerBound
246
- }
247
- }
248
-
249
- case . dollarAnchor:
250
- if options. anchorsMatchNewlines {
251
- emitEndOfLine ( )
252
- } else {
253
- builder. buildAssert { ( _, _, input, pos, subjectBounds) in
254
- pos == subjectBounds. upperBound
255
- }
256
- }
257
-
258
- case . wordBoundary:
259
- builder. buildAssert { [ options]
260
- ( cache, maxIndex, input, pos, subjectBounds) in
261
- if options. usesSimpleUnicodeBoundaries {
262
- // TODO: How should we handle bounds?
263
- return _CharacterClassModel. word. isBoundary (
264
- input,
265
- at: pos,
266
- bounds: subjectBounds,
267
- with: options
268
- )
269
- } else {
270
- return input. isOnWordBoundary ( at: pos, using: & cache, & maxIndex)
271
- }
272
- }
273
-
274
- case . notWordBoundary:
275
- builder. buildAssert { [ options]
276
- ( cache, maxIndex, input, pos, subjectBounds) in
277
- if options. usesSimpleUnicodeBoundaries {
278
- // TODO: How should we handle bounds?
279
- return !_CharacterClassModel. word. isBoundary (
280
- input,
281
- at: pos,
282
- bounds: subjectBounds,
283
- with: options
284
- )
285
- } else {
286
- return !input. isOnWordBoundary ( at: pos, using: & cache, & maxIndex)
287
- }
288
- }
289
159
}
160
+ builder. buildAssert (
161
+ by: kind,
162
+ options. anchorsMatchNewlines,
163
+ options. usesSimpleUnicodeBoundaries,
164
+ options. usesASCIIWord,
165
+ options. semanticLevel)
290
166
}
291
-
167
+
168
+ mutating func emitCharacterClass( _ cc: DSLTree . Atom . CharacterClass ) {
169
+ builder. buildMatchBuiltin ( model: cc. asRuntimeModel ( options) )
170
+ }
171
+
292
172
mutating func emitMatchScalar( _ s: UnicodeScalar ) {
293
173
assert ( options. semanticLevel == . unicodeScalar)
294
174
if options. isCaseInsensitive && s. properties. isCased {
@@ -907,10 +787,10 @@ fileprivate extension Compiler.ByteCodeGen {
907
787
} else {
908
788
builder. buildMatchAsciiBitset ( asciiBitset)
909
789
}
910
- } else {
911
- let consumer = try ccc. generateConsumer ( options)
912
- builder. buildConsume ( by: consumer)
790
+ return
913
791
}
792
+ let consumer = try ccc. generateConsumer ( options)
793
+ builder. buildConsume ( by: consumer)
914
794
}
915
795
916
796
mutating func emitConcatenation( _ children: [ DSLTree . Node ] ) throws {
0 commit comments