-
Notifications
You must be signed in to change notification settings - Fork 50
Parse PCRE callouts, backtracking directives, and .NET balanced captures #117
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,6 +66,12 @@ extension AST { | |
// References | ||
case backreference(Reference) | ||
case subpattern(Reference) | ||
|
||
// (?C) | ||
case callout(Callout) | ||
|
||
// (*ACCEPT), (*FAIL), ... | ||
case backtrackingDirective(BacktrackingDirective) | ||
} | ||
} | ||
} | ||
|
@@ -443,6 +449,59 @@ extension AST.Atom { | |
} | ||
} | ||
|
||
extension AST.Atom { | ||
public struct Callout: Hashable { | ||
public enum Argument: Hashable { | ||
case number(Int) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these references? That is, could they be relative or are they always absolute? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No they're just plain arguments to the callout function. As I understand it, PCRE expects you to give it a single callout function, and it gets called with the argument specified to let you differentiate which call it is. |
||
case string(String) | ||
} | ||
public var arg: AST.Located<Argument> | ||
public init(_ arg: AST.Located<Argument>) { | ||
self.arg = arg | ||
} | ||
} | ||
} | ||
|
||
extension AST.Atom { | ||
public struct BacktrackingDirective: Hashable { | ||
public enum Kind: Hashable { | ||
/// (*ACCEPT) | ||
case accept | ||
|
||
/// (*FAIL) | ||
case fail | ||
|
||
/// (*MARK:NAME) | ||
case mark | ||
|
||
/// (*COMMIT) | ||
case commit | ||
|
||
/// (*PRUNE) | ||
case prune | ||
|
||
/// (*SKIP) | ||
case skip | ||
|
||
/// (*THEN) | ||
case then | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a newline to separate cases? Is there anywhere in the repo where these terms or concepts are defined or have a quick blurb? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, and no there currently is not. Should I elaborate on the comments here? Or split out a separate document to explain some of the more obscure features? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking the comment at first, but I do think a document sounds like a really good idea. That can serve as fodder for pitches too. I suspect the very first fully-realized pitch will be the literal-interior syntax. |
||
} | ||
public var kind: AST.Located<Kind> | ||
public var name: AST.Located<String>? | ||
hamishknight marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
public init(_ kind: AST.Located<Kind>, name: AST.Located<String>?) { | ||
self.kind = kind | ||
self.name = name | ||
} | ||
|
||
public var isQuantifiable: Bool { | ||
// As per http://pcre.org/current/doc/html/pcre2pattern.html#SEC29, only | ||
// (*ACCEPT) is quantifiable. | ||
kind.value == .accept | ||
} | ||
} | ||
} | ||
|
||
extension AST.Atom { | ||
/// Retrieve the character value of the atom if it represents a literal | ||
/// character or unicode scalar, nil otherwise. | ||
|
@@ -458,7 +517,8 @@ extension AST.Atom { | |
fallthrough | ||
|
||
case .property, .escaped, .any, .startOfLine, .endOfLine, | ||
.backreference, .subpattern, .namedCharacter: | ||
.backreference, .subpattern, .namedCharacter, .callout, | ||
.backtrackingDirective: | ||
return nil | ||
} | ||
} | ||
|
@@ -483,10 +543,21 @@ extension AST.Atom { | |
return "\\M-\\C-\(x)" | ||
|
||
case .property, .escaped, .any, .startOfLine, .endOfLine, | ||
.backreference, .subpattern, .namedCharacter: | ||
.backreference, .subpattern, .namedCharacter, .callout, | ||
.backtrackingDirective: | ||
return nil | ||
} | ||
} | ||
|
||
public var isQuantifiable: Bool { | ||
switch kind { | ||
case .backtrackingDirective(let b): | ||
return b.isQuantifiable | ||
// TODO: Are callouts quantifiable? | ||
default: | ||
return true | ||
} | ||
} | ||
} | ||
|
||
extension AST { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,7 +44,10 @@ extension AST { | |
return .atom() + innerCaptures | ||
case .namedCapture(let name): | ||
return .atom(name: name.value) + innerCaptures | ||
case .balancedCapture(let b): | ||
return .atom(name: b.name?.value) + innerCaptures | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CC @rxwei who is fixing some bugs around this area |
||
default: | ||
precondition(!group.kind.value.isCapturing) | ||
return innerCaptures | ||
} | ||
case .conditional(let c): | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is this used for?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's used to emit a diagnostic for backreference directives that aren't quantifiable:
aea6d9d#diff-4f8c3a04c147c57dc383b837f459ee51c8bec6de78d061d54ac7e228f5228dc5R1328-R1334