Skip to content

Commit fbf561c

Browse files
authored
Merge pull request #109 from hamishknight/ast-quota
2 parents c4ec0b0 + 432d998 commit fbf561c

File tree

12 files changed

+81
-7
lines changed

12 files changed

+81
-7
lines changed

Sources/_MatchingEngine/Regex/AST/CustomCharClass.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ extension AST {
3838
/// A single character or escape
3939
case atom(Atom)
4040

41+
/// A quoted sequence. Inside a custom character class this just means
42+
/// the contents should be interpreted literally.
43+
case quote(Quote)
44+
4145
/// A binary operator applied to sets of members `abc&&def`
4246
case setOperation([Member], Located<SetOp>, [Member])
4347
}
@@ -76,6 +80,7 @@ extension CustomCC.Member {
7680
case .custom(let c): return c
7781
case .range(let r): return r
7882
case .atom(let a): return a
83+
case .quote(let q): return q
7984
case .setOperation(let lhs, let op, let rhs): return (lhs, op, rhs)
8085
}
8186
}

Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,8 @@ extension Source {
453453
///
454454
/// TODO: Need to support some escapes
455455
///
456-
mutating func lexQuote() throws -> Located<String>? {
457-
try recordLoc { src in
456+
mutating func lexQuote() throws -> AST.Quote? {
457+
let str = try recordLoc { src -> String? in
458458
if src.tryEat(sequence: #"\Q"#) {
459459
return try src.expectQuoted(endingWith: #"\E"#).value
460460
}
@@ -463,6 +463,8 @@ extension Source {
463463
}
464464
return nil
465465
}
466+
guard let str = str else { return nil }
467+
return AST.Quote(str.value, str.location)
466468
}
467469

468470
/// Try to consume a comment

Sources/_MatchingEngine/Regex/Parse/Parse.swift

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ extension Parser {
146146

147147
// Quote -> `lexQuote`
148148
if let quote = try source.lexQuote() {
149-
result.append(.quote(.init(quote.value, loc(_start))))
149+
result.append(.quote(quote))
150150
continue
151151
}
152152
// Quantification -> QuantOperand Quantifier?
@@ -273,6 +273,12 @@ extension Parser {
273273
continue
274274
}
275275

276+
// Quoted sequence.
277+
if let quote = try source.lexQuote() {
278+
members.append(.quote(quote))
279+
continue
280+
}
281+
276282
guard let atom = try source.lexAtom(
277283
isInCustomCharacterClass: true, priorGroupCount: priorGroupCount)
278284
else { break }

Sources/_MatchingEngine/Regex/Printing/DumpAST.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ extension AST.Concatenation {
7171
}
7272

7373
extension AST.Quote {
74-
public var _dumpBase: String { "quote" }
74+
public var _dumpBase: String { "quote \"\(literal)\"" }
7575
}
7676

7777
extension AST.Trivia {
@@ -203,6 +203,7 @@ extension AST.CustomCharacterClass.Member: _ASTPrintable {
203203
case .custom(let cc): return "\(cc)"
204204
case .atom(let a): return "\(a)"
205205
case .range(let r): return "\(r)"
206+
case .quote(let q): return "\(q)"
206207
case .setOperation(let lhs, let op, let rhs):
207208
return "op \(lhs) \(op.value) \(rhs)"
208209
}

Sources/_MatchingEngine/Regex/Printing/PrintAsCanonical.swift

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,7 @@ extension PrettyPrinter {
6767
output(q.kind.value._canonicalBase)
6868

6969
case let .quote(q):
70-
// TODO: Is this really what we want?
71-
output("\\Q\(q.literal)\\E")
70+
output(q._canonicalBase)
7271

7372
case let .trivia(t):
7473
// TODO: We might want to output comments...
@@ -110,12 +109,21 @@ extension PrettyPrinter {
110109
output(r.rhs._canonicalBase)
111110
case .atom(let a):
112111
output(a._canonicalBase)
112+
case .quote(let q):
113+
output(q._canonicalBase)
113114
case .setOperation:
114115
output("/* TODO: set operation \(self) */")
115116
}
116117
}
117118
}
118119

120+
extension AST.Quote {
121+
var _canonicalBase: String {
122+
// TODO: Is this really what we want?
123+
"\\Q\(literal)\\E"
124+
}
125+
}
126+
119127
extension AST.Group.Kind {
120128
var _canonicalBase: String {
121129
switch self {

Sources/_MatchingEngine/Regex/Printing/PrintAsPattern.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ extension PrettyPrinter {
167167
} else {
168168
print(a._patternBase)
169169
}
170+
case .quote(let q):
171+
print("// TODO: quote \(q.literal._quoted) in custom character classes (should we split it?)")
170172
case .setOperation:
171173
print("// TODO: Set operation: \(member)")
172174
}

Sources/_StringProcessing/ASTBuilder.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,9 @@ func charClass(
214214
func quote(_ s: String) -> AST {
215215
.quote(.init(s, .fake))
216216
}
217+
func quote_m(_ s: String) -> AST.CustomCharacterClass.Member {
218+
.quote(.init(s, .fake))
219+
}
217220

218221
// MARK: - Atoms
219222

Sources/_StringProcessing/CharacterClass.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,11 @@ extension AST.CustomCharacterClass {
408408
} else {
409409
return nil
410410
}
411+
412+
case .quote(let q):
413+
// Decompose quoted literal into literal characters.
414+
result += q.literal.map { .character($0) }
415+
411416
case .setOperation(let lhs, let op, let rhs):
412417
// FIXME: CharacterClass wasn't designed for set operations with
413418
// multiple components in each operand, we should fix that. For now,

Sources/_StringProcessing/Compiler.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ class Compiler {
477477
public func _compileRegex(
478478
_ regex: String, _ syntax: SyntaxOptions = .traditional
479479
) throws -> Executor {
480-
let ast = try parse(regex, .traditional)
480+
let ast = try parse(regex, syntax)
481481
let program = try Compiler(ast: ast).emit()
482482
return Executor(program: program)
483483
}

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,20 @@ extension AST.CustomCharacterClass.Member {
151151
}
152152
return gen
153153

154+
case .quote(let q):
155+
// TODO: Not optimal.
156+
let consumers = try q.literal.map {
157+
try AST.Atom(.char($0), .fake).generateConsumer(opts)!
158+
}
159+
return { input, bounds in
160+
for consumer in consumers {
161+
if let idx = consumer(input, bounds) {
162+
return idx
163+
}
164+
}
165+
return nil
166+
}
167+
154168
case .setOperation(let lhs, let op, let rhs):
155169
// TODO: We should probably have a component type
156170
// instead of a members array... for now we reconstruct

Tests/RegexTests/MatchTests.swift

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,21 @@ extension RegexTests {
592592
"--+", input: "123---xyz", match: "---")
593593
firstMatchTest(
594594
"~~*", input: "123~~~xyz", match: "~~~")
595+
596+
597+
// Quotes in character classes.
598+
firstMatchTest(#"[\Qabc\E]"#, input: "QEa", match: "a")
599+
firstMatchTest(#"[\Qabc\E]"#, input: "cxx", match: "c")
600+
firstMatchTest(#"[\Qabc\E]+"#, input: "cba", match: "cba")
601+
firstMatchTest(#"[\Qa-c\E]+"#, input: "a-c", match: "a-c")
602+
603+
firstMatchTest(#"["a-c"]+"#, input: "abc", match: "a",
604+
syntax: .experimental)
605+
firstMatchTest(#"["abc"]+"#, input: "cba", match: "cba",
606+
syntax: .experimental)
607+
firstMatchTest(#"["abc"]+"#, input: #""abc""#, match: "abc",
608+
syntax: .experimental)
609+
firstMatchTest(#"["abc"]+"#, input: #""abc""#, match: #""abc""#)
595610
}
596611

597612
func testCharacterProperties() {

Tests/RegexTests/ParseTests.swift

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,16 @@ extension RegexTests {
433433
syntax: .experimental)
434434
parseTest(#""\"""#, quote("\""), syntax: .experimental)
435435

436+
// Quotes in character classes.
437+
parseTest(#"[\Q-\E]"#, charClass(quote_m("-")))
438+
parseTest(#"[\Qa-b[[*+\\E]"#, charClass(quote_m(#"a-b[[*+\"#)))
439+
440+
parseTest(#"["-"]"#, charClass(quote_m("-")), syntax: .experimental)
441+
parseTest(#"["a-b[[*+\""]"#, charClass(quote_m(#"a-b[[*+""#)),
442+
syntax: .experimental)
443+
444+
parseTest(#"["-"]"#, charClass(range_m("\"", "\"")))
445+
436446
// MARK: Comments
437447

438448
parseTest(
@@ -979,6 +989,9 @@ extension RegexTests {
979989
parseNotEqualTest(#"(?+1)"#, #"(?1)"#)
980990
parseNotEqualTest(#"(?&a)"#, #"(?&b)"#)
981991

992+
parseNotEqualTest(#"\Qabc\E"#, #"\Qdef\E"#)
993+
parseNotEqualTest(#""abc""#, #""def""#)
994+
982995
// TODO: failure tests
983996
}
984997

0 commit comments

Comments
 (0)