Skip to content

Commit c2e79ce

Browse files
authored
[RFC 9651] Add support for Display String type to RawStructuredFieldValues (#43)
Motivation: RFC 9651 added the Display String Structured Type. Modifications: Implement the parser and serializer for Display String in the RawStructuredFieldValues module. Result: The RawStructuredFieldValues module will support the Display String type.
1 parent 960eccb commit c2e79ce

File tree

9 files changed

+299
-0
lines changed

9 files changed

+299
-0
lines changed

Sources/RawStructuredFieldValues/ASCII.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,12 @@ let asciiSlash = UInt8(ascii: "/")
4545
let asciiPeriod = UInt8(ascii: ".")
4646
let asciiComma = UInt8(ascii: ",")
4747
let asciiCapitalA = UInt8(ascii: "A")
48+
let asciiCapitalF = UInt8(ascii: "F")
4849
let asciiCapitalZ = UInt8(ascii: "Z")
4950
let asciiLowerA = UInt8(ascii: "a")
51+
let asciiLowerF = UInt8(ascii: "f")
5052
let asciiLowerZ = UInt8(ascii: "z")
5153
let asciiCapitals = asciiCapitalA...asciiCapitalZ
5254
let asciiLowercases = asciiLowerA...asciiLowerZ
55+
let asciiHexCapitals = asciiCapitalA...asciiCapitalF
56+
let asciiHexLowercases = asciiLowerA...asciiLowerF

Sources/RawStructuredFieldValues/ComponentTypes.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ extension BareItem {
110110

111111
case .date:
112112
throw StructuredHeaderError.invalidItem
113+
case .displayString:
114+
throw StructuredHeaderError.invalidItem
113115
}
114116
}
115117
}
@@ -141,6 +143,9 @@ public enum RFC9651BareItem: Sendable {
141143

142144
/// A date item.
143145
case date(Int)
146+
147+
/// A display string item.
148+
case displayString(String)
144149
}
145150

146151
extension RFC9651BareItem: ExpressibleByBooleanLiteral {

Sources/RawStructuredFieldValues/Errors.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ public struct StructuredHeaderError: Error, Sendable {
2727
case invalidBoolean
2828
case invalidToken
2929
case invalidDate
30+
case invalidDisplayString
3031
case invalidList
3132
case invalidDictionary
3233
case missingKey
@@ -53,6 +54,7 @@ extension StructuredHeaderError {
5354
public static let invalidBoolean = StructuredHeaderError(.invalidBoolean)
5455
public static let invalidToken = StructuredHeaderError(.invalidToken)
5556
public static let invalidDate = StructuredHeaderError(.invalidDate)
57+
public static let invalidDisplayString = StructuredHeaderError(.invalidDisplayString)
5658
public static let invalidList = StructuredHeaderError(.invalidList)
5759
public static let invalidDictionary = StructuredHeaderError(.invalidDictionary)
5860
public static let missingKey = StructuredHeaderError(.missingKey)

Sources/RawStructuredFieldValues/FieldParser.swift

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ extension StructuredFieldValueParser {
224224
return try self._parseAToken()
225225
case asciiAt:
226226
return try self._parseADate()
227+
case asciiPercent:
228+
return try self._parseADisplayString()
227229
default:
228230
throw StructuredHeaderError.invalidItem
229231
}
@@ -491,6 +493,84 @@ extension StructuredFieldValueParser {
491493
return try self._parseAnIntegerOrDecimal(isDate: true)
492494
}
493495

496+
private mutating func _parseADisplayString() throws -> RFC9651BareItem {
497+
assert(self.underlyingData.first == asciiPercent)
498+
self.underlyingData.consumeFirst()
499+
500+
guard self.underlyingData.first == asciiDquote else {
501+
throw StructuredHeaderError.invalidDisplayString
502+
}
503+
504+
self.underlyingData.consumeFirst()
505+
506+
var byteArray = [UInt8]()
507+
508+
while let char = self.underlyingData.first {
509+
self.underlyingData.consumeFirst()
510+
511+
switch char {
512+
case 0x00...0x1F, 0x7F...:
513+
throw StructuredHeaderError.invalidDisplayString
514+
case asciiPercent:
515+
if self.underlyingData.count < 2 {
516+
throw StructuredHeaderError.invalidDisplayString
517+
}
518+
519+
let octetHex = EncodedHex(self.underlyingData.prefix(2))
520+
521+
self.underlyingData = self.underlyingData.dropFirst(2)
522+
523+
guard let octet = octetHex.decode() else {
524+
throw StructuredHeaderError.invalidDisplayString
525+
}
526+
527+
byteArray.append(octet)
528+
case asciiDquote:
529+
#if compiler(>=6.0)
530+
if #available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *) {
531+
let unicodeSequence = String(validating: byteArray, as: UTF8.self)
532+
533+
guard let unicodeSequence else {
534+
throw StructuredHeaderError.invalidDisplayString
535+
}
536+
537+
return .displayString(unicodeSequence)
538+
} else {
539+
return try _decodeDisplayString(byteArray: &byteArray)
540+
}
541+
#else
542+
return try _decodeDisplayString(byteArray: &byteArray)
543+
#endif
544+
default:
545+
byteArray.append(char)
546+
}
547+
}
548+
549+
// Fail parsing — reached the end of the string without finding a closing DQUOTE.
550+
throw StructuredHeaderError.invalidDisplayString
551+
}
552+
553+
/// This method is called in environments where `String(validating:as:)` is unavailable. It uses
554+
/// `String(validatingUTF8:)` which requires `byteArray` to be null terminated. `String(validating:as:)`
555+
/// does not require that requirement. Therefore, it does not perform null checks, which makes it more optimal.
556+
private func _decodeDisplayString(byteArray: inout [UInt8]) throws -> RFC9651BareItem {
557+
// String(validatingUTF8:) requires byteArray to be null-terminated.
558+
byteArray.append(0)
559+
560+
let unicodeSequence = byteArray.withUnsafeBytes {
561+
$0.withMemoryRebound(to: CChar.self) {
562+
// This force-unwrap is safe, as the buffer must successfully bind to CChar.
563+
String(validatingUTF8: $0.baseAddress!)
564+
}
565+
}
566+
567+
guard let unicodeSequence else {
568+
throw StructuredHeaderError.invalidDisplayString
569+
}
570+
571+
return .displayString(unicodeSequence)
572+
}
573+
494574
private mutating func _parseParameters() throws -> OrderedMap<Key, RFC9651BareItem> {
495575
var parameters = OrderedMap<Key, RFC9651BareItem>()
496576

@@ -643,3 +723,39 @@ extension StrippingStringEscapesCollection.Index: Comparable {
643723
lhs._baseIndex < rhs._baseIndex
644724
}
645725
}
726+
727+
/// `EncodedHex` represents a (possibly invalid) hex value in UTF8.
728+
struct EncodedHex {
729+
private(set) var firstChar: UInt8
730+
private(set) var secondChar: UInt8
731+
732+
init<Bytes: RandomAccessCollection>(_ bytes: Bytes) where Bytes.Element == UInt8 {
733+
precondition(bytes.count == 2)
734+
self.firstChar = bytes[bytes.startIndex]
735+
self.secondChar = bytes[bytes.index(after: bytes.startIndex)]
736+
}
737+
738+
/// Validates and converts `EncodedHex` to a base 10 UInt8.
739+
///
740+
/// If `EncodedHex` does not represent a valid hex value, the result of this method is nil.
741+
fileprivate func decode() -> UInt8? {
742+
guard
743+
let firstCharAsInteger = self.htoi(self.firstChar),
744+
let secondCharAsInteger = self.htoi(self.secondChar)
745+
else { return nil }
746+
747+
return (firstCharAsInteger << 4) + secondCharAsInteger
748+
}
749+
750+
/// Converts a hex character given in UTF8 to its integer value.
751+
private func htoi(_ asciiChar: UInt8) -> UInt8? {
752+
switch asciiChar {
753+
case asciiZero...asciiNine:
754+
return asciiChar - asciiZero
755+
case asciiLowerA...asciiLowerF:
756+
return asciiChar - asciiLowerA + 10
757+
default:
758+
return nil
759+
}
760+
}
761+
}

Sources/RawStructuredFieldValues/FieldSerializer.swift

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,29 @@ extension StructuredFieldValueSerializer {
213213
}
214214

215215
self.data.append(contentsOf: String(date, radix: 10).utf8)
216+
case .displayString(let displayString):
217+
let bytes = displayString.utf8
218+
219+
self.data.append(asciiPercent)
220+
self.data.append(asciiDquote)
221+
222+
for byte in bytes {
223+
if byte == asciiPercent
224+
|| byte == asciiDquote
225+
|| (0x00...0x1F).contains(byte)
226+
|| (0x7F...).contains(byte)
227+
{
228+
self.data.append(asciiPercent)
229+
230+
let encodedByte = UInt8.encodeToHex(byte)
231+
self.data.append(encodedByte.firstChar)
232+
self.data.append(encodedByte.secondChar)
233+
} else {
234+
self.data.append(byte)
235+
}
236+
}
237+
238+
self.data.append(asciiDquote)
216239
}
217240
}
218241
}
@@ -245,3 +268,18 @@ extension String {
245268
}
246269
}
247270
}
271+
272+
extension UInt8 {
273+
/// Converts an integer in base 10 to hex of type `EncodedHex`.
274+
fileprivate static func encodeToHex(_ int: Self) -> EncodedHex {
275+
let firstChar = self.itoh(int >> 4)
276+
let secondChar = self.itoh(int & 0x0F)
277+
278+
return EncodedHex([firstChar, secondChar])
279+
}
280+
281+
/// Converts an integer to its hex character in UTF8.
282+
private static func itoh(_ int: Self) -> Self {
283+
(int > 9) ? (asciiLowerA + int - 10) : (asciiZero + int)
284+
}
285+
}

Sources/sh-parser/main.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ extension RFC9651BareItem {
171171
return "decimal \(d)"
172172
case .date(let date):
173173
return "date \(date)"
174+
case .displayString(let displayString):
175+
return "display string \(displayString)"
174176
}
175177
}
176178
}

Tests/StructuredFieldValuesTests/StructuredFieldParserTests.swift

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,24 @@ final class StructuredFieldParserTests: XCTestCase {
8787

8888
XCTAssertEqual(typeName, "date", "\(fixtureName): Expected type date, got type \(typeName)")
8989
XCTAssertEqual(typeValue, baseDate, "\(fixtureName): Got \(baseDate), expected \(typeValue)")
90+
case (.displayString(let baseDisplayString), .dictionary(let typeDictionary)):
91+
guard typeDictionary.count == 2, case .string(let typeName) = typeDictionary["__type"],
92+
case .string(let typeValue) = typeDictionary["value"]
93+
else {
94+
XCTFail("\(fixtureName): Unexpected type dict \(typeDictionary)")
95+
return
96+
}
97+
98+
XCTAssertEqual(
99+
typeName,
100+
"displaystring",
101+
"\(fixtureName): Expected type displaystring, got type \(typeName)"
102+
)
103+
XCTAssertEqual(
104+
typeValue,
105+
baseDisplayString,
106+
"\(fixtureName): Got \(baseDisplayString), expected \(typeValue)"
107+
)
90108
default:
91109
XCTFail("\(fixtureName): Got \(bareItem), expected \(schema)")
92110
}

Tests/StructuredFieldValuesTests/StructuredFieldSerializerTests.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,9 @@ extension RFC9651BareItem {
214214
case (.some(.string("date")), .some(.integer(let value))):
215215
self = .date(value)
216216

217+
case (.some(.string("displaystring")), .some(.string(let value))):
218+
self = .displayString(value)
219+
217220
default:
218221
preconditionFailure("Unexpected type object \(typeObject)")
219222
}
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
[
2+
{
3+
"name": "basic display string (ascii content)",
4+
"raw": ["%\"foo bar\""],
5+
"header_type": "item",
6+
"expected": [{"__type": "displaystring", "value": "foo bar"}, {}]
7+
},
8+
{
9+
"name": "all printable ascii",
10+
"raw": ["%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\""],
11+
"header_type": "item",
12+
"expected": [{"__type": "displaystring", "value": " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"}, {}]
13+
},
14+
{
15+
"name": "non-ascii display string (uppercase escaping)",
16+
"raw": ["%\"f%C3%BC%C3%BC\""],
17+
"canonical": ["%\"f%c3%bc%c3%bc\""],
18+
"header_type": "item",
19+
"must_fail": true
20+
},
21+
{
22+
"name": "non-ascii display string (lowercase escaping)",
23+
"raw": ["%\"f%c3%bc%c3%bc\""],
24+
"header_type": "item",
25+
"expected": [{"__type": "displaystring", "value": "füü"}, {}]
26+
},
27+
{
28+
"name": "tab in display string",
29+
"raw": ["%\"\t\""],
30+
"header_type": "item",
31+
"must_fail": true
32+
},
33+
{
34+
"name": "newline in display string",
35+
"raw": ["%\"\n\""],
36+
"header_type": "item",
37+
"must_fail": true
38+
},
39+
{
40+
"name": "single quoted display string",
41+
"raw": ["%'foo'"],
42+
"header_type": "item",
43+
"must_fail": true
44+
},
45+
{
46+
"name": "unquoted display string",
47+
"raw": ["%foo"],
48+
"header_type": "item",
49+
"must_fail": true
50+
},
51+
{
52+
"name": "display string missing initial quote",
53+
"raw": ["%foo\""],
54+
"header_type": "item",
55+
"must_fail": true
56+
},
57+
{
58+
"name": "unbalanced display string",
59+
"raw": ["%\"foo"],
60+
"header_type": "item",
61+
"must_fail": true
62+
},
63+
{
64+
"name": "display string quoting",
65+
"raw": ["%\"foo %22bar%22 \\ baz\""],
66+
"header_type": "item",
67+
"expected": [{"__type": "displaystring", "value": "foo \"bar\" \\ baz"}, {}]
68+
},
69+
{
70+
"name": "bad display string escaping",
71+
"raw": ["%\"foo %a"],
72+
"header_type": "item",
73+
"must_fail": true
74+
},
75+
{
76+
"name": "bad display string utf-8 (invalid 2-byte seq)",
77+
"raw": ["%\"%c3%28\""],
78+
"header_type": "item",
79+
"must_fail": true
80+
},
81+
{
82+
"name": "bad display string utf-8 (invalid sequence id)",
83+
"raw": ["%\"%a0%a1\""],
84+
"header_type": "item",
85+
"must_fail": true
86+
},
87+
{
88+
"name": "bad display string utf-8 (invalid hex)",
89+
"raw": ["%\"%g0%1w\""],
90+
"header_type": "item",
91+
"must_fail": true
92+
},
93+
{
94+
"name": "bad display string utf-8 (invalid 3-byte seq)",
95+
"raw": ["%\"%e2%28%a1\""],
96+
"header_type": "item",
97+
"must_fail": true
98+
},
99+
{
100+
"name": "bad display string utf-8 (invalid 4-byte seq)",
101+
"raw": ["%\"%f0%28%8c%28\""],
102+
"header_type": "item",
103+
"must_fail": true
104+
},
105+
{
106+
"name": "BOM in display string",
107+
"raw": ["%\"BOM: %ef%bb%bf\""],
108+
"header_type": "item",
109+
"expected": [{"__type": "displaystring", "value": "BOM: \uFEFF"}, {}]
110+
}
111+
]

0 commit comments

Comments
 (0)