Skip to content

Commit 2cc34da

Browse files
committed
Base64: Speedup decoding fix swiftlang#1
- Use a lookup table to convert input ASCII to byte values. This is faster and simpler than scanning through multiple ranges in a loop for each input byte. - Padding character '=' and invalid inputs both have bit 6 set in their decoded value so that both values can be tested together. - Fix the estimate of the output buffer size which was estimating it at 2/3 of input buffer size, not 3/4.
1 parent 4a8a22c commit 2cc34da

File tree

1 file changed

+61
-62
lines changed

1 file changed

+61
-62
lines changed

Sources/Foundation/NSData.swift

Lines changed: 61 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -619,90 +619,88 @@ open class NSData : NSObject, NSCopying, NSMutableCopying, NSSecureCoding {
619619
}))
620620
}
621621

622-
/// The ranges of ASCII characters that are used to encode data in Base64.
623-
private static let base64ByteMappings: [Range<UInt8>] = [
624-
65 ..< 91, // A-Z
625-
97 ..< 123, // a-z
626-
48 ..< 58, // 0-9
627-
43 ..< 44, // +
628-
47 ..< 48, // /
629-
]
630622
/**
631623
Padding character used when the number of bytes to encode is not divisible by 3
632624
*/
633625
private static let base64Padding : UInt8 = 61 // =
634-
635-
/**
636-
This method takes a byte with a character from Base64-encoded string
637-
and gets the binary value that the character corresponds to.
638-
639-
- parameter byte: The byte with the Base64 character.
640-
- returns: Base64DecodedByte value containing the result (Valid , Invalid, Padding)
641-
*/
642-
private enum Base64DecodedByte {
643-
case valid(UInt8)
644-
case invalid
645-
case padding
646-
}
647-
648-
private static func base64DecodeByte(_ byte: UInt8) -> Base64DecodedByte {
649-
guard byte != base64Padding else {return .padding}
650-
var decodedStart: UInt8 = 0
651-
for range in base64ByteMappings {
652-
if range.contains(byte) {
653-
let result = decodedStart + (byte - range.lowerBound)
654-
return .valid(result)
655-
}
656-
decodedStart += range.upperBound - range.lowerBound
657-
}
658-
return .invalid
659-
}
660626

661627
/**
662628
This method decodes Base64-encoded data.
663-
629+
664630
If the input contains any bytes that are not valid Base64 characters,
665631
this will return nil.
666-
632+
667633
- parameter bytes: The Base64 bytes
668634
- parameter options: Options for handling invalid input
669635
- returns: The decoded bytes.
670636
*/
671637
private static func base64DecodeBytes<T: Collection>(_ bytes: T, options: Base64DecodingOptions = []) -> [UInt8]? where T.Element == UInt8 {
672-
var decodedBytes = [UInt8]()
673-
decodedBytes.reserveCapacity((bytes.count/3)*2)
674-
675-
var currentByte : UInt8 = 0
638+
639+
// This table maps byte values 0-127, input bytes >127 are always invalid.
640+
// Map the ASCII characters "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" -> 0...63
641+
// Map '=' (ASCII 61) to 0x40.
642+
// All other values map to 0x7f. This allows '=' and invalid bytes to be checked together by testing bit 6 (0x40).
643+
let base64Decode: StaticString = """
644+
\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\
645+
\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\
646+
\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\u{3e}\u{7f}\u{7f}\u{7f}\u{3f}\
647+
\u{34}\u{35}\u{36}\u{37}\u{38}\u{39}\u{3a}\u{3b}\u{3c}\u{3d}\u{7f}\u{7f}\u{7f}\u{40}\u{7f}\u{7f}\
648+
\u{7f}\u{00}\u{01}\u{02}\u{03}\u{04}\u{05}\u{06}\u{07}\u{08}\u{09}\u{0a}\u{0b}\u{0c}\u{0d}\u{0e}\
649+
\u{0f}\u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\u{18}\u{19}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}\
650+
\u{7f}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}\u{20}\u{21}\u{22}\u{23}\u{24}\u{25}\u{26}\u{27}\u{28}\
651+
\u{29}\u{2a}\u{2b}\u{2c}\u{2d}\u{2e}\u{2f}\u{30}\u{31}\u{32}\u{33}\u{7f}\u{7f}\u{7f}\u{7f}\u{7f}
652+
"""
653+
assert(base64Decode.isASCII)
654+
assert(base64Decode.utf8CodeUnitCount == 128)
655+
assert(base64Decode.hasPointerRepresentation)
656+
657+
let ignoreUnknown = options.contains(.ignoreUnknownCharacters)
658+
if !ignoreUnknown && !bytes.count.isMultiple(of: 4) {
659+
return nil
660+
}
661+
662+
var decodedBytes: [UInt8] = []
663+
let capacity = (bytes.count * 3) / 4 // Every 4 valid ASCII bytes maps to 3 output bytes.
664+
decodedBytes.reserveCapacity(capacity)
665+
666+
var currentByte: UInt8 = 0
676667
var validCharacterCount = 0
677668
var paddingCount = 0
678669
var index = 0
679-
680-
670+
681671
for base64Char in bytes {
682-
683-
let value : UInt8
684-
685-
switch base64DecodeByte(base64Char) {
686-
case .valid(let v):
687-
value = v
688-
validCharacterCount += 1
689-
case .invalid:
690-
if options.contains(.ignoreUnknownCharacters) {
672+
var value: UInt8 = 0
673+
674+
var invalid = false
675+
if base64Char >= base64Decode.utf8CodeUnitCount {
676+
invalid = true
677+
} else {
678+
value = base64Decode.utf8Start[Int(base64Char)]
679+
if value & 0x40 == 0x40 { // Input byte is either '=' or an invalid value.
680+
if value == 0x7f {
681+
invalid = true
682+
} else if value == 0x40 { // '=' padding at end of input.
683+
paddingCount += 1
684+
continue
685+
}
686+
}
687+
}
688+
689+
if invalid {
690+
if ignoreUnknown {
691691
continue
692692
} else {
693693
return nil
694694
}
695-
case .padding:
696-
paddingCount += 1
697-
continue
698695
}
699-
700-
//padding found in the middle of the sequence is invalid
696+
validCharacterCount += 1
697+
698+
// Padding found in the middle of the sequence is invalid.
701699
if paddingCount > 0 {
702700
return nil
703701
}
704-
705-
switch index%4 {
702+
703+
switch index {
706704
case 0:
707705
currentByte = (value << 2)
708706
case 1:
@@ -716,15 +714,16 @@ open class NSData : NSObject, NSCopying, NSMutableCopying, NSSecureCoding {
716714
case 3:
717715
currentByte |= value
718716
decodedBytes.append(currentByte)
717+
index = -1
719718
default:
720719
fatalError()
721720
}
722-
721+
723722
index += 1
724723
}
725-
726-
guard (validCharacterCount + paddingCount)%4 == 0 else {
727-
//invalid character count
724+
725+
guard (validCharacterCount + paddingCount) % 4 == 0 else {
726+
// Invalid character count of valid input characters.
728727
return nil
729728
}
730729
return decodedBytes

0 commit comments

Comments
 (0)