Skip to content

Commit 2d7a391

Browse files
committed
wip
1 parent e5ee845 commit 2d7a391

File tree

1 file changed

+302
-37
lines changed

1 file changed

+302
-37
lines changed

Sources/_StringProcessing/Engine/MEQuantify.swift

Lines changed: 302 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,49 +9,120 @@ extension Processor {
99
let produceSavePointRange = payload.quantKind == .eager
1010
let isScalarSemantics = payload.isScalarSemantics
1111

12+
let isZeroOrMore = payload.minTrips == 0 && payload.maxExtraTrips == nil
13+
let isOneOrMore = payload.minTrips == 1 && payload.maxExtraTrips == nil
14+
1215
let matchResult: (next: String.Index, savePointRange: Range<Position>?)?
1316
switch payload.type {
1417
case .asciiBitset:
15-
matchResult = input.matchQuantifiedASCIIBitset(
16-
registers[payload.bitset],
17-
at: currentPosition,
18-
limitedBy: end,
19-
minMatches: minMatches,
20-
maxMatches: maxMatches,
21-
produceSavePointRange: produceSavePointRange,
22-
isScalarSemantics: isScalarSemantics)
18+
if isZeroOrMore {
19+
matchResult = input.matchZeroOrMoreASCIIBitset(
20+
registers[payload.bitset],
21+
at: currentPosition,
22+
limitedBy: end,
23+
produceSavePointRange: produceSavePointRange,
24+
isScalarSemantics: isScalarSemantics)
25+
} else if isOneOrMore {
26+
matchResult = input.matchOneOrMoreASCIIBitset(
27+
registers[payload.bitset],
28+
at: currentPosition,
29+
limitedBy: end,
30+
produceSavePointRange: produceSavePointRange,
31+
isScalarSemantics: isScalarSemantics)
32+
} else {
33+
matchResult = input.matchQuantifiedASCIIBitset(
34+
registers[payload.bitset],
35+
at: currentPosition,
36+
limitedBy: end,
37+
minMatches: minMatches,
38+
maxMatches: maxMatches,
39+
produceSavePointRange: produceSavePointRange,
40+
isScalarSemantics: isScalarSemantics)
41+
}
2342

2443
case .asciiChar:
25-
matchResult = input.matchQuantifiedScalar(
26-
Unicode.Scalar(payload.asciiChar),
27-
at: currentPosition,
28-
limitedBy: end,
29-
minMatches: minMatches,
30-
maxMatches: maxMatches,
31-
produceSavePointRange: produceSavePointRange,
32-
isScalarSemantics: isScalarSemantics)
44+
if isZeroOrMore {
45+
matchResult = input.matchZeroOrMoreScalar(
46+
Unicode.Scalar(payload.asciiChar),
47+
at: currentPosition,
48+
limitedBy: end,
49+
produceSavePointRange: produceSavePointRange,
50+
isScalarSemantics: isScalarSemantics)
51+
} else if isOneOrMore {
52+
matchResult = input.matchOneOrMoreScalar(
53+
Unicode.Scalar(payload.asciiChar),
54+
at: currentPosition,
55+
limitedBy: end,
56+
produceSavePointRange: produceSavePointRange,
57+
isScalarSemantics: isScalarSemantics)
58+
} else {
59+
matchResult = input.matchQuantifiedScalar(
60+
Unicode.Scalar(payload.asciiChar),
61+
at: currentPosition,
62+
limitedBy: end,
63+
minMatches: minMatches,
64+
maxMatches: maxMatches,
65+
produceSavePointRange: produceSavePointRange,
66+
isScalarSemantics: isScalarSemantics)
67+
}
3368

3469
case .any:
35-
matchResult = input.matchQuantifiedRegexDot(
36-
at: currentPosition,
37-
limitedBy: end,
38-
minMatches: minMatches,
39-
maxMatches: maxMatches,
40-
produceSavePointRange: produceSavePointRange,
41-
isScalarSemantics: isScalarSemantics,
42-
anyMatchesNewline: payload.anyMatchesNewline)
70+
if isZeroOrMore {
71+
matchResult = input.matchZeroOrMoreRegexDot(
72+
at: currentPosition,
73+
limitedBy: end,
74+
produceSavePointRange: produceSavePointRange,
75+
anyMatchesNewline: payload.anyMatchesNewline,
76+
isScalarSemantics: isScalarSemantics)
77+
} else if isOneOrMore {
78+
matchResult = input.matchOneOrMoreRegexDot(
79+
at: currentPosition,
80+
limitedBy: end,
81+
produceSavePointRange: produceSavePointRange,
82+
anyMatchesNewline: payload.anyMatchesNewline,
83+
isScalarSemantics: isScalarSemantics)
84+
} else {
85+
matchResult = input.matchQuantifiedRegexDot(
86+
at: currentPosition,
87+
limitedBy: end,
88+
minMatches: minMatches,
89+
maxMatches: maxMatches,
90+
produceSavePointRange: produceSavePointRange,
91+
anyMatchesNewline: payload.anyMatchesNewline,
92+
isScalarSemantics: isScalarSemantics)
93+
}
4394

4495
case .builtin:
45-
matchResult = input.matchQuantifiedBuiltinCC(
46-
payload.builtin,
47-
at: currentPosition,
48-
limitedBy: end,
49-
minMatches: minMatches,
50-
maxMatches: maxMatches,
51-
produceSavePointRange: produceSavePointRange,
52-
isInverted: payload.builtinIsInverted,
53-
isStrictASCII: payload.builtinIsStrict,
54-
isScalarSemantics: isScalarSemantics)
96+
if isZeroOrMore {
97+
matchResult = input.matchZeroOrMoreBuiltinCC(
98+
payload.builtin,
99+
at: currentPosition,
100+
limitedBy: end,
101+
produceSavePointRange: produceSavePointRange,
102+
isInverted: payload.builtinIsInverted,
103+
isStrictASCII: payload.builtinIsStrict,
104+
isScalarSemantics: isScalarSemantics)
105+
} else if isOneOrMore {
106+
matchResult = input.matchOneOrMoreBuiltinCC(
107+
payload.builtin,
108+
at: currentPosition,
109+
limitedBy: end,
110+
produceSavePointRange: produceSavePointRange,
111+
isInverted: payload.builtinIsInverted,
112+
isStrictASCII: payload.builtinIsStrict,
113+
isScalarSemantics: isScalarSemantics)
114+
} else {
115+
matchResult = input.matchQuantifiedBuiltinCC(
116+
payload.builtin,
117+
at: currentPosition,
118+
limitedBy: end,
119+
minMatches: minMatches,
120+
maxMatches: maxMatches,
121+
produceSavePointRange: produceSavePointRange,
122+
isInverted: payload.builtinIsInverted,
123+
isStrictASCII: payload.builtinIsStrict,
124+
isScalarSemantics: isScalarSemantics)
125+
}
55126
}
56127

57128
guard let (next, savePointRange) = matchResult else {
@@ -121,7 +192,55 @@ extension String {
121192
// position, because newline-sequence in scalar semantic mode still
122193
// matches two scalars
123194

124-
return (currentPosition, rangeStart..<rangeEnd)
195+
return (currentPosition, rangeStart..<rangeEnd)
196+
}
197+
198+
/// NOTE: [Zero|One]OrMore overloads are to specialize the inlined run loop,
199+
/// which has a substantive perf impact (especially for zero-or-more)
200+
201+
fileprivate func matchZeroOrMoreASCIIBitset(
202+
_ asciiBitset: ASCIIBitset,
203+
at currentPosition: Index,
204+
limitedBy end: Index,
205+
produceSavePointRange: Bool,
206+
isScalarSemantics: Bool
207+
) -> (next: Index, savePointRange: Range<Index>?)? {
208+
_runQuantLoop(
209+
at: currentPosition,
210+
limitedBy: end,
211+
minMatches: 0,
212+
maxMatches: UInt64.max,
213+
produceSavePointRange: produceSavePointRange,
214+
isScalarSemantics: isScalarSemantics
215+
) { currentPosition, end, isScalarSemantics in
216+
matchASCIIBitset(
217+
asciiBitset,
218+
at: currentPosition,
219+
limitedBy: end,
220+
isScalarSemantics: isScalarSemantics)
221+
}
222+
}
223+
fileprivate func matchOneOrMoreASCIIBitset(
224+
_ asciiBitset: ASCIIBitset,
225+
at currentPosition: Index,
226+
limitedBy end: Index,
227+
produceSavePointRange: Bool,
228+
isScalarSemantics: Bool
229+
) -> (next: Index, savePointRange: Range<Index>?)? {
230+
_runQuantLoop(
231+
at: currentPosition,
232+
limitedBy: end,
233+
minMatches: 1,
234+
maxMatches: UInt64.max,
235+
produceSavePointRange: produceSavePointRange,
236+
isScalarSemantics: isScalarSemantics
237+
) { currentPosition, end, isScalarSemantics in
238+
matchASCIIBitset(
239+
asciiBitset,
240+
at: currentPosition,
241+
limitedBy: end,
242+
isScalarSemantics: isScalarSemantics)
243+
}
125244
}
126245

127246
fileprivate func matchQuantifiedASCIIBitset(
@@ -149,6 +268,54 @@ extension String {
149268
}
150269
}
151270

271+
fileprivate func matchZeroOrMoreScalar(
272+
_ scalar: Unicode.Scalar,
273+
at currentPosition: Index,
274+
limitedBy end: Index,
275+
produceSavePointRange: Bool,
276+
isScalarSemantics: Bool
277+
) -> (next: Index, savePointRange: Range<Index>?)? {
278+
_runQuantLoop(
279+
at: currentPosition,
280+
limitedBy: end,
281+
minMatches: 0,
282+
maxMatches: UInt64.max,
283+
produceSavePointRange: produceSavePointRange,
284+
isScalarSemantics: isScalarSemantics
285+
) { currentPosition, end, isScalarSemantics in
286+
matchScalar(
287+
scalar,
288+
at: currentPosition,
289+
limitedBy: end,
290+
boundaryCheck: !isScalarSemantics,
291+
isCaseInsensitive: false)
292+
}
293+
}
294+
fileprivate func matchOneOrMoreScalar(
295+
_ scalar: Unicode.Scalar,
296+
at currentPosition: Index,
297+
limitedBy end: Index,
298+
produceSavePointRange: Bool,
299+
isScalarSemantics: Bool
300+
) -> (next: Index, savePointRange: Range<Index>?)? {
301+
_runQuantLoop(
302+
at: currentPosition,
303+
limitedBy: end,
304+
minMatches: 1,
305+
maxMatches: UInt64.max,
306+
produceSavePointRange: produceSavePointRange,
307+
isScalarSemantics: isScalarSemantics
308+
) { currentPosition, end, isScalarSemantics in
309+
matchScalar(
310+
scalar,
311+
at: currentPosition,
312+
limitedBy: end,
313+
boundaryCheck: !isScalarSemantics,
314+
isCaseInsensitive: false)
315+
316+
}
317+
}
318+
152319
fileprivate func matchQuantifiedScalar(
153320
_ scalar: Unicode.Scalar,
154321
at currentPosition: Index,
@@ -176,6 +343,59 @@ extension String {
176343
}
177344
}
178345

346+
fileprivate func matchZeroOrMoreBuiltinCC(
347+
_ builtinCC: _CharacterClassModel.Representation,
348+
at currentPosition: Index,
349+
limitedBy end: Index,
350+
produceSavePointRange: Bool,
351+
isInverted: Bool,
352+
isStrictASCII: Bool,
353+
isScalarSemantics: Bool
354+
) -> (next: Index, savePointRange: Range<Index>?)? {
355+
_runQuantLoop(
356+
at: currentPosition,
357+
limitedBy: end,
358+
minMatches: 0,
359+
maxMatches: UInt64.max,
360+
produceSavePointRange: produceSavePointRange,
361+
isScalarSemantics: isScalarSemantics
362+
) { currentPosition, end, isScalarSemantics in
363+
matchBuiltinCC(
364+
builtinCC,
365+
at: currentPosition,
366+
limitedBy: end,
367+
isInverted: isInverted,
368+
isStrictASCII: isStrictASCII,
369+
isScalarSemantics: isScalarSemantics)
370+
}
371+
}
372+
fileprivate func matchOneOrMoreBuiltinCC(
373+
_ builtinCC: _CharacterClassModel.Representation,
374+
at currentPosition: Index,
375+
limitedBy end: Index,
376+
produceSavePointRange: Bool,
377+
isInverted: Bool,
378+
isStrictASCII: Bool,
379+
isScalarSemantics: Bool
380+
) -> (next: Index, savePointRange: Range<Index>?)? {
381+
_runQuantLoop(
382+
at: currentPosition,
383+
limitedBy: end,
384+
minMatches: 1,
385+
maxMatches: UInt64.max,
386+
produceSavePointRange: produceSavePointRange,
387+
isScalarSemantics: isScalarSemantics
388+
) { currentPosition, end, isScalarSemantics in
389+
matchBuiltinCC(
390+
builtinCC,
391+
at: currentPosition,
392+
limitedBy: end,
393+
isInverted: isInverted,
394+
isStrictASCII: isStrictASCII,
395+
isScalarSemantics: isScalarSemantics)
396+
}
397+
}
398+
179399
fileprivate func matchQuantifiedBuiltinCC(
180400
_ builtinCC: _CharacterClassModel.Representation,
181401
at currentPosition: Index,
@@ -205,14 +425,59 @@ extension String {
205425
}
206426
}
207427

428+
fileprivate func matchZeroOrMoreRegexDot(
429+
at currentPosition: Index,
430+
limitedBy end: Index,
431+
produceSavePointRange: Bool,
432+
anyMatchesNewline: Bool,
433+
isScalarSemantics: Bool
434+
) -> (next: Index, savePointRange: Range<Index>?)? {
435+
_runQuantLoop(
436+
at: currentPosition,
437+
limitedBy: end,
438+
minMatches: 0,
439+
maxMatches: UInt64.max,
440+
produceSavePointRange: produceSavePointRange,
441+
isScalarSemantics: isScalarSemantics
442+
) { currentPosition, end, isScalarSemantics in
443+
matchRegexDot(
444+
at: currentPosition,
445+
limitedBy: end,
446+
anyMatchesNewline: anyMatchesNewline,
447+
isScalarSemantics: isScalarSemantics)
448+
}
449+
}
450+
fileprivate func matchOneOrMoreRegexDot(
451+
at currentPosition: Index,
452+
limitedBy end: Index,
453+
produceSavePointRange: Bool,
454+
anyMatchesNewline: Bool,
455+
isScalarSemantics: Bool
456+
) -> (next: Index, savePointRange: Range<Index>?)? {
457+
_runQuantLoop(
458+
at: currentPosition,
459+
limitedBy: end,
460+
minMatches: 1,
461+
maxMatches: UInt64.max,
462+
produceSavePointRange: produceSavePointRange,
463+
isScalarSemantics: isScalarSemantics
464+
) { currentPosition, end, isScalarSemantics in
465+
matchRegexDot(
466+
at: currentPosition,
467+
limitedBy: end,
468+
anyMatchesNewline: anyMatchesNewline,
469+
isScalarSemantics: isScalarSemantics)
470+
}
471+
}
472+
208473
fileprivate func matchQuantifiedRegexDot(
209474
at currentPosition: Index,
210475
limitedBy end: Index,
211476
minMatches: UInt64,
212477
maxMatches: UInt64,
213478
produceSavePointRange: Bool,
214-
isScalarSemantics: Bool,
215-
anyMatchesNewline: Bool
479+
anyMatchesNewline: Bool,
480+
isScalarSemantics: Bool
216481
) -> (next: Index, savePointRange: Range<Index>?)? {
217482
_runQuantLoop(
218483
at: currentPosition,

0 commit comments

Comments
 (0)