From 69962bf67c04ae62dff85c2b10b3c30f730da6cf Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Tue, 4 Apr 2023 13:00:05 -0500 Subject: [PATCH] Remove the unsupported `anyScalar` case We decided not to support the `anyScalar` character class, which would match a single Unicode scalar regardless of matching mode. However, its representation was still included in the various character class types in the regex engine, leading to unreachable code and unclear requirements when changing or adding new code. This change removes that representation where possible. The `DSLTree.Atom.CharacterClass` enum is left unchanged, since it is marked `@_spi(RegexBuilder) public`. Any use of that enum case is handled with a `fatalError("Unsupported")`, and it isn't produced on any code path. --- Sources/_StringProcessing/ByteCodeGen.swift | 3 --- Sources/_StringProcessing/Engine/MEBuiltins.swift | 8 -------- Sources/_StringProcessing/PrintAsPattern.swift | 4 ++-- .../_StringProcessing/Regex/ASTConversion.swift | 1 - Sources/_StringProcessing/Regex/DSLTree.swift | 3 ++- Sources/_StringProcessing/Unicode/ASCII.swift | 3 +-- .../_StringProcessing/_CharacterClassModel.swift | 14 +------------- 7 files changed, 6 insertions(+), 30 deletions(-) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 15e052901..d4c91bd63 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -702,9 +702,6 @@ fileprivate extension Compiler.ByteCodeGen { case .characterClass(let cc): // Custom character class that consumes a single grapheme let model = cc.asRuntimeModel(options) - guard model.consumesSingleGrapheme else { - return false - } builder.buildQuantify( model: model, kind, diff --git a/Sources/_StringProcessing/Engine/MEBuiltins.swift b/Sources/_StringProcessing/Engine/MEBuiltins.swift index fa4f8fa1a..a3d864165 100644 --- a/Sources/_StringProcessing/Engine/MEBuiltins.swift +++ b/Sources/_StringProcessing/Engine/MEBuiltins.swift @@ -193,8 +193,6 @@ extension String { switch (isScalarSemantics, cc) { case (_, .anyGrapheme): next = index(after: currentPosition) - case (_, .anyScalar): - next = unicodeScalars.index(after: currentPosition) case (true, _): next = unicodeScalars.index(after: currentPosition) case (false, _): @@ -204,12 +202,6 @@ extension String { switch cc { case .any, .anyGrapheme: matched = true - case .anyScalar: - if isScalarSemantics { - matched = true - } else { - matched = isOnGraphemeClusterBoundary(next) - } case .digit: if isScalarSemantics { matched = scalar.properties.numericType != nil && asciiCheck diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index 3d2d38842..08c40157a 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -760,8 +760,6 @@ extension DSLTree.Atom.CharacterClass { switch self { case .anyGrapheme: return ".anyGraphemeCluster" - case .anyUnicodeScalar: - return ".anyUnicodeScalar" case .digit: return ".digit" case .notDigit: @@ -786,6 +784,8 @@ extension DSLTree.Atom.CharacterClass { return ".whitespace" case .notWhitespace: return ".whitespace.inverted" + case .anyUnicodeScalar: + fatalError("Unsupported") } } } diff --git a/Sources/_StringProcessing/Regex/ASTConversion.swift b/Sources/_StringProcessing/Regex/ASTConversion.swift index f5b08dd6d..6af0924cb 100644 --- a/Sources/_StringProcessing/Regex/ASTConversion.swift +++ b/Sources/_StringProcessing/Regex/ASTConversion.swift @@ -183,7 +183,6 @@ extension AST.Atom.EscapedBuiltin { case .wordCharacter: return .word case .notWordCharacter: return .notWord case .graphemeCluster: return .anyGrapheme - case .trueAnychar: return .anyUnicodeScalar default: return nil } } diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 93e86c607..b784e2382 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -260,7 +260,6 @@ extension DSLTree.Atom.CharacterClass { public var inverted: DSLTree.Atom.CharacterClass? { switch self { case .anyGrapheme: return nil - case .anyUnicodeScalar: return nil case .digit: return .notDigit case .notDigit: return .digit case .word: return .notWord @@ -273,6 +272,8 @@ extension DSLTree.Atom.CharacterClass { case .notVerticalWhitespace: return .verticalWhitespace case .whitespace: return .notWhitespace case .notWhitespace: return .whitespace + case .anyUnicodeScalar: + fatalError("Unsupported") } } } diff --git a/Sources/_StringProcessing/Unicode/ASCII.swift b/Sources/_StringProcessing/Unicode/ASCII.swift index f27584893..5150e18cc 100644 --- a/Sources/_StringProcessing/Unicode/ASCII.swift +++ b/Sources/_StringProcessing/Unicode/ASCII.swift @@ -134,8 +134,7 @@ extension String { // TODO: bitvectors switch cc { - case .any, .anyGrapheme, .anyScalar: - // TODO: should any scalar not consume CR-LF in scalar semantic mode? + case .any, .anyGrapheme: return (next, true) case .digit: diff --git a/Sources/_StringProcessing/_CharacterClassModel.swift b/Sources/_StringProcessing/_CharacterClassModel.swift index 35a11a7ef..cdee66ddb 100644 --- a/Sources/_StringProcessing/_CharacterClassModel.swift +++ b/Sources/_StringProcessing/_CharacterClassModel.swift @@ -45,8 +45,6 @@ struct _CharacterClassModel: Hashable { case any = 0 /// Any grapheme cluster case anyGrapheme - /// Any Unicode scalar - case anyScalar /// Character.isDigit case digit /// Horizontal whitespace: `[:blank:]`, i.e @@ -90,15 +88,6 @@ struct _CharacterClassModel: Hashable { } } -extension _CharacterClassModel { - var consumesSingleGrapheme: Bool { - switch self.cc { - case .anyScalar: return false - default: return true - } - } -} - extension _CharacterClassModel.Representation { /// Returns true if this CharacterClass should be matched by strict ascii under the given options func isStrictAscii(options: MatchingOptions) -> Bool { @@ -119,7 +108,6 @@ extension _CharacterClassModel.Representation: CustomStringConvertible { switch self { case .any: return "" case .anyGrapheme: return "" - case .anyScalar: return "" case .digit: return "" case .horizontalWhitespace: return "" case .newlineSequence: return "" @@ -185,7 +173,7 @@ extension DSLTree.Atom.CharacterClass { case .anyGrapheme: cc = .anyGrapheme case .anyUnicodeScalar: - cc = .anyScalar + fatalError("Unsupported") } return _CharacterClassModel(cc: cc, options: options, isInverted: inverted) }