Skip to content

Commit 927d38e

Browse files
committed
Parse Oniguruma callouts
Parse named and 'of-contents' Oniguruma callouts. This requires generalizing the group name handling to be for arbitrary identifiers, which may have a specific kind for diagnostics.
1 parent adf4e97 commit 927d38e

File tree

7 files changed

+528
-78
lines changed

7 files changed

+528
-78
lines changed

Sources/_MatchingEngine/Regex/AST/Atom.swift

Lines changed: 110 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -476,14 +476,117 @@ extension AST.Atom {
476476
}
477477

478478
extension AST.Atom {
479-
public struct Callout: Hashable {
480-
public enum Argument: Hashable {
481-
case number(Int)
482-
case string(String)
479+
public enum Callout: Hashable {
480+
/// A PCRE callout written `(?C...)`
481+
public struct PCRE: Hashable {
482+
public enum Argument: Hashable {
483+
case number(Int)
484+
case string(String)
485+
}
486+
public var arg: AST.Located<Argument>
487+
488+
public init(_ arg: AST.Located<Argument>) {
489+
self.arg = arg
490+
}
491+
492+
/// Whether the argument isn't written explicitly in the source, e.g
493+
/// `(?C)` which is implicitly `(?C0)`.
494+
public var isImplicit: Bool { arg.location.isEmpty }
483495
}
484-
public var arg: AST.Located<Argument>
485-
public init(_ arg: AST.Located<Argument>) {
486-
self.arg = arg
496+
497+
/// A named Oniguruma callout written `(*name[tag]{args, ...})`
498+
public struct OnigurumaNamed: Hashable {
499+
public struct ArgList: Hashable {
500+
public var leftBrace: SourceLocation
501+
public var args: [AST.Located<String>]
502+
public var rightBrace: SourceLocation
503+
504+
public init(
505+
_ leftBrace: SourceLocation,
506+
_ args: [AST.Located<String>],
507+
_ rightBrace: SourceLocation
508+
) {
509+
self.leftBrace = leftBrace
510+
self.args = args
511+
self.rightBrace = rightBrace
512+
}
513+
}
514+
515+
public var name: AST.Located<String>
516+
public var tag: OnigurumaTag?
517+
public var args: ArgList?
518+
519+
public init(
520+
_ name: AST.Located<String>, tag: OnigurumaTag?, args: ArgList?
521+
) {
522+
self.name = name
523+
self.tag = tag
524+
self.args = args
525+
}
526+
}
527+
528+
/// An Oniguruma callout 'of contents', written `(?{...}[tag]D)`
529+
public struct OnigurumaOfContents: Hashable {
530+
public enum Direction: Hashable {
531+
case inProgress // > (the default)
532+
case inRetraction // <
533+
case both // X
534+
}
535+
public var openBraces: SourceLocation
536+
public var contents: AST.Located<String>
537+
public var closeBraces: SourceLocation
538+
public var tag: OnigurumaTag?
539+
public var direction: AST.Located<Direction>
540+
541+
public init(
542+
_ openBraces: SourceLocation, _ contents: AST.Located<String>,
543+
_ closeBraces: SourceLocation, tag: OnigurumaTag?,
544+
direction: AST.Located<Direction>
545+
) {
546+
self.openBraces = openBraces
547+
self.contents = contents
548+
self.closeBraces = closeBraces
549+
self.tag = tag
550+
self.direction = direction
551+
}
552+
553+
/// Whether the direction flag isn't written explicitly in the
554+
/// source, e.g `(?{x})` which is implicitly `(?{x}>)`.
555+
public var isDirectionImplicit: Bool { direction.location.isEmpty }
556+
}
557+
case pcre(PCRE)
558+
case onigurumaNamed(OnigurumaNamed)
559+
case onigurumaOfContents(OnigurumaOfContents)
560+
561+
private var _associatedValue: Any {
562+
switch self {
563+
case .pcre(let v): return v
564+
case .onigurumaNamed(let v): return v
565+
case .onigurumaOfContents(let v): return v
566+
}
567+
}
568+
569+
func `as`<T>(_ t: T.Type = T.self) -> T? {
570+
_associatedValue as? T
571+
}
572+
}
573+
}
574+
575+
extension AST.Atom.Callout {
576+
/// A tag specifier `[...]` which may appear in an Oniguruma callout.
577+
public struct OnigurumaTag: Hashable {
578+
public var leftBracket: SourceLocation
579+
public var name: AST.Located<String>
580+
public var rightBracket: SourceLocation
581+
582+
public init(
583+
_ leftBracket: SourceLocation,
584+
_ name: AST.Located<String>,
585+
_ rightBracket: SourceLocation
586+
) {
587+
self.leftBracket = leftBracket
588+
self.name = name
589+
self.rightBracket = rightBracket
487590
}
488591
}
489592
}

Sources/_MatchingEngine/Regex/Parse/Diagnostics.swift

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,24 @@ enum ParseError: Error, Hashable {
5656

5757
case expectedGroupSpecifier
5858
case unbalancedEndOfGroup
59-
case expectedGroupName
60-
case groupNameMustBeAlphaNumeric
61-
case groupNameCannotStartWithNumber
59+
60+
// Identifier diagnostics.
61+
case expectedIdentifier(IdentifierKind)
62+
case identifierMustBeAlphaNumeric(IdentifierKind)
63+
case identifierCannotStartWithNumber(IdentifierKind)
6264

6365
case cannotRemoveTextSegmentOptions
66+
case expectedCalloutArgument
67+
}
68+
69+
extension IdentifierKind {
70+
fileprivate var diagDescription: String {
71+
switch self {
72+
case .groupName: return "group name"
73+
case .onigurumaCalloutName: return "callout name"
74+
case .onigurumaCalloutTag: return "callout tag"
75+
}
76+
}
6477
}
6578

6679
extension ParseError: CustomStringConvertible {
@@ -120,14 +133,16 @@ extension ParseError: CustomStringConvertible {
120133
return "expected group specifier"
121134
case .unbalancedEndOfGroup:
122135
return "closing ')' does not balance any groups openings"
123-
case .expectedGroupName:
124-
return "expected group name"
125-
case .groupNameMustBeAlphaNumeric:
126-
return "group name must only contain alphanumeric characters"
127-
case .groupNameCannotStartWithNumber:
128-
return "group name must not start with number"
136+
case .expectedIdentifier(let i):
137+
return "expected \(i.diagDescription)"
138+
case .identifierMustBeAlphaNumeric(let i):
139+
return "\(i.diagDescription) must only contain alphanumeric characters"
140+
case .identifierCannotStartWithNumber(let i):
141+
return "\(i.diagDescription) must not start with number"
129142
case .cannotRemoveTextSegmentOptions:
130143
return "text segment mode cannot be unset, only changed"
144+
case .expectedCalloutArgument:
145+
return "expected argument to callout"
131146
}
132147
}
133148
}

0 commit comments

Comments
 (0)