// Sources/SwiftProtobuf/TextFormatScanner.swift - Text format decoding // // Copyright (c) 2014 - 2019 Apple Inc. and the project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See LICENSE.txt for license information: // https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt // // ----------------------------------------------------------------------------- /// /// Test format decoding engine. /// // ----------------------------------------------------------------------------- import Foundation private let asciiBell = UInt8(7) private let asciiBackspace = UInt8(8) private let asciiTab = UInt8(9) private let asciiNewLine = UInt8(10) private let asciiVerticalTab = UInt8(11) private let asciiFormFeed = UInt8(12) private let asciiCarriageReturn = UInt8(13) private let asciiZero = UInt8(ascii: "0") private let asciiOne = UInt8(ascii: "1") private let asciiThree = UInt8(ascii: "3") private let asciiSeven = UInt8(ascii: "7") private let asciiNine = UInt8(ascii: "9") private let asciiColon = UInt8(ascii: ":") private let asciiPeriod = UInt8(ascii: ".") private let asciiPlus = UInt8(ascii: "+") private let asciiComma = UInt8(ascii: ",") private let asciiSemicolon = UInt8(ascii: ";") private let asciiDoubleQuote = UInt8(ascii: "\"") private let asciiSingleQuote = UInt8(ascii: "\'") private let asciiBackslash = UInt8(ascii: "\\") private let asciiForwardSlash = UInt8(ascii: "/") private let asciiHash = UInt8(ascii: "#") private let asciiUnderscore = UInt8(ascii: "_") private let asciiQuestionMark = UInt8(ascii: "?") private let asciiSpace = UInt8(ascii: " ") private let asciiOpenSquareBracket = UInt8(ascii: "[") private let asciiCloseSquareBracket = UInt8(ascii: "]") private let asciiOpenCurlyBracket = UInt8(ascii: "{") private let asciiCloseCurlyBracket = UInt8(ascii: "}") private let asciiOpenAngleBracket = UInt8(ascii: "<") private let asciiCloseAngleBracket = UInt8(ascii: ">") private let asciiMinus = UInt8(ascii: "-") private let asciiLowerA = UInt8(ascii: "a") private let asciiUpperA = UInt8(ascii: "A") private let asciiLowerB = UInt8(ascii: "b") private let asciiLowerE = UInt8(ascii: "e") private let asciiUpperE = UInt8(ascii: "E") private let asciiLowerF = UInt8(ascii: "f") private let asciiUpperF = UInt8(ascii: "F") private let asciiLowerI = UInt8(ascii: "i") private let asciiLowerL = UInt8(ascii: "l") private let asciiLowerN = UInt8(ascii: "n") private let asciiLowerR = UInt8(ascii: "r") private let asciiLowerS = UInt8(ascii: "s") private let asciiLowerT = UInt8(ascii: "t") private let asciiUpperT = UInt8(ascii: "T") private let asciiLowerU = UInt8(ascii: "u") private let asciiUpperU = UInt8(ascii: "U") private let asciiLowerV = UInt8(ascii: "v") private let asciiLowerX = UInt8(ascii: "x") private let asciiLowerY = UInt8(ascii: "y") private let asciiLowerZ = UInt8(ascii: "z") private let asciiUpperZ = UInt8(ascii: "Z") // https://protobuf.dev/programming-guides/proto2/#assigning // Fields can be between 1 and 536,870,911. So we can stop parsing // a raw number if we go over this (it also avoid rollover). private let maxFieldNumLength: Int = 9 private func fromHexDigit(_ c: UInt8) -> UInt8? { if c >= asciiZero && c <= asciiNine { return c - asciiZero } if c >= asciiUpperA && c <= asciiUpperF { return c - asciiUpperA + UInt8(10) } if c >= asciiLowerA && c <= asciiLowerF { return c - asciiLowerA + UInt8(10) } return nil } private func uint32FromHexDigit(_ c: UInt8) -> UInt32? { guard let u8 = fromHexDigit(c) else { return nil } return UInt32(u8) } // Protobuf Text encoding assumes that you're working directly // in UTF-8. So this implementation converts the string to UTF8, // then decodes it into a sequence of bytes, then converts // it back into a string. private func decodeString(_ s: String) -> String? { // Helper to read 4 hex digits as a UInt32 func read4HexDigits(_ i: inout String.UTF8View.Iterator) -> UInt32? { if let digit1 = i.next(), let d1 = uint32FromHexDigit(digit1), let digit2 = i.next(), let d2 = uint32FromHexDigit(digit2), let digit3 = i.next(), let d3 = uint32FromHexDigit(digit3), let digit4 = i.next(), let d4 = uint32FromHexDigit(digit4) { return (d1 << 12) + (d2 << 8) + (d3 << 4) + d4 } return nil } var out = [UInt8]() var bytes = s.utf8.makeIterator() while let byte = bytes.next() { switch byte { case asciiBackslash: // backslash if let escaped = bytes.next() { switch escaped { case asciiZero...asciiSeven: // 0...7 // C standard allows 1, 2, or 3 octal digits. let savedPosition = bytes let digit1 = escaped let digit1Value = digit1 - asciiZero if let digit2 = bytes.next(), digit2 >= asciiZero && digit2 <= asciiSeven { let digit2Value = digit2 - asciiZero let innerSavedPosition = bytes if let digit3 = bytes.next(), digit3 >= asciiZero && digit3 <= asciiSeven { let digit3Value = digit3 - asciiZero // The max octal digit is actually \377, but looking at the C++ // protobuf code in strutil.cc:UnescapeCEscapeSequences(), it // decodes with rollover, so just duplicate that behavior for // consistency between languages. let n = digit1Value &* 64 &+ digit2Value &* 8 &+ digit3Value out.append(n) } else { let n = digit1Value * 8 + digit2Value out.append(n) bytes = innerSavedPosition } } else { let n = digit1Value out.append(n) bytes = savedPosition } case asciiLowerU, asciiUpperU: // "u" // \u - 4 hex digits, \U 8 hex digits: guard let first = read4HexDigits(&bytes) else { return nil } var codePoint = first if escaped == asciiUpperU { guard let second = read4HexDigits(&bytes) else { return nil } codePoint = (codePoint << 16) + second } switch codePoint { case 0...0x7f: // 1 byte encoding out.append(UInt8(truncatingIfNeeded: codePoint)) case 0x80...0x7ff: // 2 byte encoding out.append(0xC0 + UInt8(truncatingIfNeeded: codePoint >> 6)) out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)) case 0x800...0xffff: // 3 byte encoding out.append(0xE0 + UInt8(truncatingIfNeeded: codePoint >> 12)) out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F)) out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)) case 0x10000...0x10FFFF: // 4 byte encoding out.append(0xF0 + UInt8(truncatingIfNeeded: codePoint >> 18)) out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 12) & 0x3F)) out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F)) out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)) default: return nil } case asciiLowerX: // "x" // Unlike C/C++, protobuf only allows 1 or 2 digits here: if let byte = bytes.next(), let digit = fromHexDigit(byte) { var n = digit let savedPosition = bytes if let byte = bytes.next(), let digit = fromHexDigit(byte) { n = n &* 16 + digit } else { // No second digit; reset the iterator bytes = savedPosition } out.append(n) } else { return nil // Hex escape must have at least 1 digit } case asciiLowerA: // \a out.append(asciiBell) case asciiLowerB: // \b out.append(asciiBackspace) case asciiLowerF: // \f out.append(asciiFormFeed) case asciiLowerN: // \n out.append(asciiNewLine) case asciiLowerR: // \r out.append(asciiCarriageReturn) case asciiLowerT: // \t out.append(asciiTab) case asciiLowerV: // \v out.append(asciiVerticalTab) case asciiDoubleQuote, asciiSingleQuote, asciiQuestionMark, asciiBackslash: // " ' ? \ out.append(escaped) default: return nil // Unrecognized escape } } else { return nil // Input ends with backslash } default: out.append(byte) } } // There has got to be an easier way to convert a [UInt8] into a String. return out.withUnsafeBufferPointer { ptr in if let addr = ptr.baseAddress { return utf8ToString(bytes: addr, count: ptr.count) } else { return String() } } } /// /// TextFormatScanner has no public members. /// internal struct TextFormatScanner { internal let extensions: (any ExtensionMap)? private var p: UnsafeRawPointer private let end: UnsafeRawPointer private let doubleParser = DoubleParser() internal let options: TextFormatDecodingOptions internal var recursionBudget: Int internal var complete: Bool { p == end } internal init( utf8Pointer: UnsafeRawPointer, count: Int, options: TextFormatDecodingOptions, extensions: (any ExtensionMap)? = nil ) { p = utf8Pointer end = p + count self.extensions = extensions self.options = options // Since the root message doesn't start with a `skipObjectStart`, the // budget starts with one less depth to cover that top message. recursionBudget = options.messageDepthLimit - 1 skipWhitespace() } private mutating func incrementRecursionDepth() throws { recursionBudget -= 1 if recursionBudget < 0 { throw TextFormatDecodingError.messageDepthLimit } } private mutating func decrementRecursionDepth() { recursionBudget += 1 // This should never happen, if it does, something is probably // corrupting memory, and simply throwing doesn't make much sense. if recursionBudget > options.messageDepthLimit { fatalError("Somehow TextFormatDecoding unwound more objects than it started") } } /// Skip whitespace private mutating func skipWhitespace() { while p != end { let u = p[0] switch u { case asciiSpace, asciiTab, asciiNewLine, asciiCarriageReturn: // space, tab, NL, CR p += 1 case asciiHash: // # comment p += 1 while p != end { // Skip until end of line let c = p[0] p += 1 if c == asciiNewLine || c == asciiCarriageReturn { break } } default: return } } } /// Return a buffer containing the raw UTF8 for an identifier. /// Assumes that you already know the current byte is a valid /// start of identifier. private mutating func parseUTF8Identifier() -> UnsafeRawBufferPointer { let start = p loop: while p != end { let c = p[0] switch c { case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ, asciiZero...asciiNine, asciiUnderscore: p += 1 default: break loop } } let s = UnsafeRawBufferPointer(start: start, count: p - start) skipWhitespace() return s } /// Return a String containing the next identifier. private mutating func parseIdentifier() -> String { let buff = parseUTF8Identifier() let s = utf8ToString(bytes: buff.baseAddress!, count: buff.count) // Force-unwrap is OK: we never have invalid UTF8 at this point. return s! } /// Scan a string that encodes a byte field, return a count of /// the number of bytes that should be decoded from it private mutating func validateAndCountBytesFromString(terminator: UInt8, sawBackslash: inout Bool) throws -> Int { var count = 0 let start = p sawBackslash = false while p != end { let byte = p[0] p += 1 if byte == terminator { p = start return count } switch byte { case asciiNewLine, asciiCarriageReturn: // Can't have a newline in the middle of a bytes string. throw TextFormatDecodingError.malformedText case asciiBackslash: // "\\" sawBackslash = true if p != end { let escaped = p[0] p += 1 switch escaped { case asciiZero...asciiSeven: // '0'...'7' // C standard allows 1, 2, or 3 octal digits. if p != end, p[0] >= asciiZero, p[0] <= asciiSeven { p += 1 if p != end, p[0] >= asciiZero, p[0] <= asciiSeven { if escaped > asciiThree { // Out of range octal: three digits and first digit is greater than 3 throw TextFormatDecodingError.malformedText } p += 1 } } count += 1 case asciiLowerU, asciiUpperU: // 'u' or 'U' unicode escape let numDigits = (escaped == asciiLowerU) ? 4 : 8 guard (end - p) >= numDigits else { throw TextFormatDecodingError.malformedText // unicode escape must 4/8 digits } var codePoint: UInt32 = 0 for i in 0.. 0 { while p[0] != terminator { let byte = p[0] p += 1 switch byte { case asciiBackslash: // "\\" let escaped = p[0] p += 1 switch escaped { case asciiZero...asciiSeven: // '0'...'7' // C standard allows 1, 2, or 3 octal digits. let digit1Value = escaped - asciiZero let digit2 = p[0] if digit2 >= asciiZero, digit2 <= asciiSeven { p += 1 let digit2Value = digit2 - asciiZero let digit3 = p[0] if digit3 >= asciiZero, digit3 <= asciiSeven { p += 1 let digit3Value = digit3 - asciiZero out[0] = digit1Value &* 64 + digit2Value * 8 + digit3Value out += 1 } else { out[0] = digit1Value * 8 + digit2Value out += 1 } } else { out[0] = digit1Value out += 1 } case asciiLowerU, asciiUpperU: let numDigits = (escaped == asciiLowerU) ? 4 : 8 var codePoint: UInt32 = 0 for i in 0..> 6) out[1] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F) out += 2 case 0x800...0xffff: // 3 byte encoding out[0] = 0xE0 + UInt8(truncatingIfNeeded: codePoint >> 12) out[1] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F) out[2] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F) out += 3 case 0x10000...0x10FFFF: // 4 byte encoding out[0] = 0xF0 + UInt8(truncatingIfNeeded: codePoint >> 18) out[1] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 12) & 0x3F) out[2] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F) out[3] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F) out += 4 default: preconditionFailure() // Already validated, can't happen } case asciiLowerX: // 'x' hexadecimal escape // We already validated, so we know there's at least one digit: var n = fromHexDigit(p[0])! p += 1 if let digit = fromHexDigit(p[0]) { n = n &* 16 &+ digit p += 1 } out[0] = n out += 1 case asciiLowerA: // \a ("alert") out[0] = asciiBell out += 1 case asciiLowerB: // \b out[0] = asciiBackspace out += 1 case asciiLowerF: // \f out[0] = asciiFormFeed out += 1 case asciiLowerN: // \n out[0] = asciiNewLine out += 1 case asciiLowerR: // \r out[0] = asciiCarriageReturn out += 1 case asciiLowerT: // \t out[0] = asciiTab out += 1 case asciiLowerV: // \v out[0] = asciiVerticalTab out += 1 default: out[0] = escaped out += 1 } default: out[0] = byte out += 1 } } p += 1 // Consume terminator } } } /// Assumes the leading quote has already been consumed private mutating func parseStringSegment(terminator: UInt8) -> String? { let start = p var sawBackslash = false while p != end { let c = p[0] if c == terminator { let s = utf8ToString(bytes: start, count: p - start) p += 1 skipWhitespace() if let s = s, sawBackslash { return decodeString(s) } else { return s } } p += 1 if c == asciiBackslash { // \ if p == end { return nil } sawBackslash = true p += 1 } if c == asciiNewLine || c == asciiCarriageReturn { // Can't have a newline in the middle of a raw string. return nil } } return nil // Unterminated quoted string } internal mutating func nextUInt() throws -> UInt64 { if p == end { throw TextFormatDecodingError.malformedNumber } let c = p[0] p += 1 if c == asciiZero { // leading '0' precedes octal or hex if p == end { // The TextFormat ended with a field value of zero. return 0 } if p[0] == asciiLowerX { // 'x' => hex p += 1 var n: UInt64 = 0 while p != end { let digit = p[0] let val: UInt64 switch digit { case asciiZero...asciiNine: // 0...9 val = UInt64(digit - asciiZero) case asciiLowerA...asciiLowerF: // a...f val = UInt64(digit - asciiLowerA + 10) case asciiUpperA...asciiUpperF: val = UInt64(digit - asciiUpperA + 10) default: skipWhitespace() return n } if n > UInt64.max / 16 { throw TextFormatDecodingError.malformedNumber } p += 1 n = n * 16 + val } skipWhitespace() return n } else { // octal var n: UInt64 = 0 while p != end { let digit = p[0] if digit < asciiZero || digit > asciiSeven { skipWhitespace() return n // not octal digit } let val = UInt64(digit - asciiZero) if n > UInt64.max / 8 { throw TextFormatDecodingError.malformedNumber } p += 1 n = n * 8 + val } skipWhitespace() return n } } else if c > asciiZero && c <= asciiNine { // 1...9 var n = UInt64(c - asciiZero) while p != end { let digit = p[0] if digit < asciiZero || digit > asciiNine { skipWhitespace() return n // not a digit } let val = UInt64(digit - asciiZero) if n > UInt64.max / 10 || n * 10 > UInt64.max - val { throw TextFormatDecodingError.malformedNumber } p += 1 n = n * 10 + val } skipWhitespace() return n } throw TextFormatDecodingError.malformedNumber } internal mutating func nextSInt() throws -> Int64 { if p == end { throw TextFormatDecodingError.malformedNumber } let c = p[0] if c == asciiMinus { // - p += 1 if p == end { throw TextFormatDecodingError.malformedNumber } // character after '-' must be digit let digit = p[0] if digit < asciiZero || digit > asciiNine { throw TextFormatDecodingError.malformedNumber } let n = try nextUInt() let limit: UInt64 = 0x8000_0000_0000_0000 // -Int64.min if n >= limit { if n > limit { // Too large negative number throw TextFormatDecodingError.malformedNumber } else { return Int64.min // Special case for Int64.min } } return -Int64(bitPattern: n) } else { let n = try nextUInt() if n > UInt64(bitPattern: Int64.max) { throw TextFormatDecodingError.malformedNumber } return Int64(bitPattern: n) } } internal mutating func nextStringValue() throws -> String { var result: String skipWhitespace() if p == end { throw TextFormatDecodingError.malformedText } let c = p[0] if c != asciiSingleQuote && c != asciiDoubleQuote { throw TextFormatDecodingError.malformedText } p += 1 if let s = parseStringSegment(terminator: c) { result = s } else { throw TextFormatDecodingError.malformedText } while true { if p == end { return result } let c = p[0] if c != asciiSingleQuote && c != asciiDoubleQuote { return result } p += 1 if let s = parseStringSegment(terminator: c) { result.append(s) } else { throw TextFormatDecodingError.malformedText } } } /// Protobuf Text Format allows a single bytes field to /// contain multiple quoted strings. The values /// are separately decoded and then concatenated: /// field1: "bytes" 'more bytes' /// "and even more bytes" internal mutating func nextBytesValue() throws -> Data { // Get the first string's contents var result: Data skipWhitespace() if p == end { throw TextFormatDecodingError.malformedText } let c = p[0] if c != asciiSingleQuote && c != asciiDoubleQuote { throw TextFormatDecodingError.malformedText } p += 1 var sawBackslash = false let n = try validateAndCountBytesFromString(terminator: c, sawBackslash: &sawBackslash) if sawBackslash { result = Data(count: n) parseBytesFromString(terminator: c, into: &result) } else { result = Data(bytes: p, count: n) p += n + 1 // Skip string body + close quote } // If there are more strings, decode them // and append to the result: while true { skipWhitespace() if p == end { return result } let c = p[0] if c != asciiSingleQuote && c != asciiDoubleQuote { return result } p += 1 var sawBackslash = false let n = try validateAndCountBytesFromString(terminator: c, sawBackslash: &sawBackslash) if sawBackslash { var b = Data(count: n) parseBytesFromString(terminator: c, into: &b) result.append(b) } else { result.append(Data(bytes: p, count: n)) p += n + 1 // Skip string body + close quote } } } // Tries to identify a sequence of UTF8 characters // that represent a numeric floating-point value. private mutating func tryParseFloatString() -> Double? { guard p != end else { return nil } let start = p var c = p[0] if c == asciiMinus { p += 1 guard p != end else { p = start return nil } c = p[0] } switch c { case asciiZero: // '0' as first character is not allowed followed by digit p += 1 guard p != end else { break } c = p[0] if c >= asciiZero && c <= asciiNine { p = start return nil } case asciiPeriod: // '.' as first char only if followed by digit p += 1 guard p != end else { p = start return nil } c = p[0] if c < asciiZero || c > asciiNine { p = start return nil } case asciiOne...asciiNine: break default: p = start return nil } loop: while p != end { let c = p[0] switch c { case asciiZero...asciiNine, asciiPeriod, asciiPlus, asciiMinus, asciiLowerE, asciiUpperE: // 0...9, ., +, -, e, E p += 1 case asciiLowerF, asciiUpperF: // f or F let d = doubleParser.utf8ToDouble( bytes: UnsafeRawBufferPointer( start: start, count: p - start ), finiteOnly: false ) // Just skip the 'f'/'F' p += 1 skipWhitespace() return d default: break loop } } let d = doubleParser.utf8ToDouble( bytes: UnsafeRawBufferPointer( start: start, count: p - start ), finiteOnly: false ) skipWhitespace() return d } // Skip specified characters if they all match private mutating func skipOptionalCharacters(bytes: [UInt8]) { let start = p for b in bytes { if p == end || p[0] != b { p = start return } p += 1 } } // Skip following keyword if it matches (case-insensitively) // the given keyword (specified as a series of bytes). private mutating func skipOptionalKeyword(bytes: [UInt8]) -> Bool { let start = p for b in bytes { if p == end { p = start return false } var c = p[0] if c >= asciiUpperA && c <= asciiUpperZ { // Convert to lower case // (Protobuf text keywords are case insensitive) c += asciiLowerA - asciiUpperA } if c != b { p = start return false } p += 1 } if p == end { return true } let c = p[0] if (c >= asciiUpperA && c <= asciiUpperZ) || (c >= asciiLowerA && c <= asciiLowerZ) { p = start return false } skipWhitespace() return true } // If the next token is the identifier "nan", return true. private mutating func skipOptionalNaN() -> Bool { let start = p // "-nan" doesn't mean anything, but upstream handles it, so skip // over any leading minus when checking for "nan". if p != end && p[0] == asciiMinus { p += 1 } if skipOptionalKeyword(bytes: [asciiLowerN, asciiLowerA, asciiLowerN]) { return true } else { p = start // It wasn't "nan", rewind incase we skipped a minus sign. return false } } // If the next token is a recognized spelling of "infinity", // return Float.infinity or -Float.infinity private mutating func skipOptionalInfinity() -> Float? { if p == end { return nil } let start = p let c = p[0] let negated: Bool if c == asciiMinus { negated = true p += 1 } else { negated = false } let inf = [asciiLowerI, asciiLowerN, asciiLowerF] let infinity = [ asciiLowerI, asciiLowerN, asciiLowerF, asciiLowerI, asciiLowerN, asciiLowerI, asciiLowerT, asciiLowerY, ] if skipOptionalKeyword(bytes: inf) || skipOptionalKeyword(bytes: infinity) { return negated ? -Float.infinity : Float.infinity } p = start return nil } internal mutating func nextFloat() throws -> Float { if let d = tryParseFloatString() { return Float(d) } if skipOptionalNaN() { return Float.nan } if let inf = skipOptionalInfinity() { return inf } throw TextFormatDecodingError.malformedNumber } internal mutating func nextDouble() throws -> Double { if let d = tryParseFloatString() { return d } if skipOptionalNaN() { return Double.nan } if let inf = skipOptionalInfinity() { return Double(inf) } throw TextFormatDecodingError.malformedNumber } internal mutating func nextBool() throws -> Bool { skipWhitespace() if p == end { throw TextFormatDecodingError.malformedText } let c = p[0] p += 1 let result: Bool switch c { case asciiZero: result = false case asciiOne: result = true case asciiLowerF, asciiUpperF: if p != end { let alse = [asciiLowerA, asciiLowerL, asciiLowerS, asciiLowerE] skipOptionalCharacters(bytes: alse) } result = false case asciiLowerT, asciiUpperT: if p != end { let rue = [asciiLowerR, asciiLowerU, asciiLowerE] skipOptionalCharacters(bytes: rue) } result = true default: throw TextFormatDecodingError.malformedText } if p == end { return result } switch p[0] { case asciiSpace, asciiTab, asciiNewLine, asciiCarriageReturn, asciiHash, asciiComma, asciiSemicolon, asciiCloseSquareBracket, asciiCloseCurlyBracket, asciiCloseAngleBracket: skipWhitespace() return result default: throw TextFormatDecodingError.malformedText } } internal mutating func nextOptionalEnumName() throws -> UnsafeRawBufferPointer? { skipWhitespace() if p == end { throw TextFormatDecodingError.malformedText } switch p[0] { case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ: return parseUTF8Identifier() default: return nil } } /// Any URLs are syntactically (almost) identical to extension /// keys, so we share the code for those. internal mutating func nextOptionalAnyURL() throws -> String? { try nextOptionalExtensionKey() } /// Returns next extension key or nil if end-of-input or /// if next token is not an extension key. /// /// Throws an error if the next token starts with '[' but /// cannot be parsed as an extension key. /// /// Note: This accepts / characters to support Any URL parsing. /// Technically, Any URLs can contain / characters and extension /// key names cannot. But in practice, accepting / chracters for /// extension keys works fine, since the result just gets rejected /// when the key is looked up. internal mutating func nextOptionalExtensionKey() throws -> String? { skipWhitespace() if p == end { return nil } guard p[0] == asciiOpenSquareBracket else { // [ return nil } return try parseExtensionKey() } /// Parse the rest of an [extension_field_name] in the input, assuming the /// initial "[" character has already been read (and is in the prefix) /// This is also used for AnyURL, so we include "/". private mutating func parseExtensionKey() throws -> String { assert(p[0] == asciiOpenSquareBracket) p += 1 if p == end { throw TextFormatDecodingError.malformedText } let start = p switch p[0] { case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ: p += 1 default: throw TextFormatDecodingError.malformedText } loop: while p != end { switch p[0] { case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ, asciiZero...asciiNine, asciiUnderscore, asciiPeriod, asciiForwardSlash: p += 1 case asciiCloseSquareBracket: // ] break loop default: throw TextFormatDecodingError.malformedText } } if p == end || p[0] != asciiCloseSquareBracket { throw TextFormatDecodingError.malformedText } guard let extensionName = utf8ToString(bytes: start, count: p - start) else { throw TextFormatDecodingError.malformedText } p += 1 // Skip ] skipWhitespace() return extensionName } /// Returns text of next regular key or nil if end-of-input. internal mutating func nextKey(allowExtensions: Bool) throws -> String? { skipWhitespace() if p == end { return nil } let c = p[0] switch c { case asciiOpenSquareBracket: // [ if allowExtensions { return "[\(try parseExtensionKey())]" } throw TextFormatDecodingError.unknownField case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ: // a...z, A...Z return parseIdentifier() case asciiOne...asciiNine: // 1...9 (field numbers are 123, not 0123) let start = p p += 1 while p != end { let c = p[0] if c < asciiZero || c > asciiNine { break } p += 1 if p - start > maxFieldNumLength { throw TextFormatDecodingError.malformedText } } let buff = UnsafeRawBufferPointer(start: start, count: p - start) skipWhitespace() let s = utf8ToString(bytes: buff.baseAddress!, count: buff.count) // Safe, can't be invalid UTF-8 given the input. return s! default: throw TextFormatDecodingError.malformedText } } /// Parse a field name, look it up, and return the corresponding /// field number. /// /// returns nil at end-of-input /// /// Throws if field name cannot be parsed or if field name is /// unknown. /// /// This function accounts for as much as 2/3 of the total run /// time of the entire parse. internal mutating func nextFieldNumber( names: _NameMap, messageType: any Message.Type, terminator: UInt8? ) throws -> Int? { while true { skipWhitespace() if p == end { if terminator == nil { return nil } else { // Never got the terminator. throw TextFormatDecodingError.malformedText } } var isReserved = false let c = p[0] switch c { case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ: // a...z, A...Z let key = parseUTF8Identifier() if let fieldNumber = names.number(forProtoName: key) { return fieldNumber } if !options.ignoreUnknownFields { if names.isReserved(name: key) { isReserved = true } else { throw TextFormatDecodingError.unknownField } } // Unknown field name or reserved, break and skip break case asciiOpenSquareBracket: // Start of an extension field let key = try parseExtensionKey() if let fieldNumber = extensions?.fieldNumberForProto(messageType: messageType, protoFieldName: key) { return fieldNumber } if !options.ignoreUnknownExtensionFields { throw TextFormatDecodingError.unknownField } // Unknown field name, break and skip break case asciiOne...asciiNine: // 1-9 (field numbers are 123, not 0123) let start = p var fieldNum = Int(c) - Int(asciiZero) p += 1 while p != end { let c = p[0] if c >= asciiZero && c <= asciiNine { fieldNum = fieldNum &* 10 &+ (Int(c) - Int(asciiZero)) } else { break } p += 1 if p - start > maxFieldNumLength { throw TextFormatDecodingError.malformedText } } skipWhitespace() if names.names(for: fieldNum) != nil { return fieldNum } if !options.ignoreUnknownFields { // fieldNumber is range checked while parsing, so safe can truncate. if names.isReserved(number: Int32(truncatingIfNeeded: fieldNum)) { isReserved = true } else { throw TextFormatDecodingError.unknownField } } // Unknown field name or reserved, break and skip break default: if c == terminator { let _ = skipOptionalObjectEnd(c) return nil } throw TextFormatDecodingError.malformedText } assert(options.ignoreUnknownFields || options.ignoreUnknownExtensionFields || isReserved) try skipUnknownFieldValue() // Skip any separator before looping around to try for another field. skipOptionalSeparator() } } // Helper to skip past an unknown field value, when called `p` will be pointing // at the first character after the unknown field name. internal mutating func skipUnknownFieldValue() throws { // This is modeled after the C++ text_format.cpp `ConsumeField()` // // Guess the type of this field: // - If this field is not a message, there should be a ":" between the // field name and the field value and also the field value should not // start with "{" or "<" which indicates the beginning of a message body. // - If there is no ":" or there is a "{" or "<" after ":", this field has // to be a message or the input is ill-formed. skipWhitespace() if skipOptionalColon() { if p == end { // Nothing after the ':'? throw TextFormatDecodingError.malformedText } let c = p[0] if c != asciiOpenAngleBracket && c != asciiOpenCurlyBracket { try skipUnknownPrimativeFieldValue() } else { try skipUnknownMessageFieldValue() } } else { try skipUnknownMessageFieldValue() } } /// Helper to see if this could be the start of a hex or octal number so unknown field /// value parsing can decide how to parse/validate. private func mustParseNumberAsDecimal() -> Bool { // NOTE: If we run out of characters/can't tell; then just say it doesn't have // to be decimal, and let the other code error handle it. var scan = p var c = scan[0] // Floats or decimals can have leading '-' if c == asciiMinus { scan += 1 if scan == end { return false } c = scan[0] } if c == asciiPeriod { return false // "(-)." : clearly a float } if c == asciiZero { scan += 1 if scan == end { return true } // "(-)0[end]" : parse it as decimal c = scan[0] if c == asciiLowerX // "(-)0x" : hex - must parse as decimal || (c >= asciiZero && c <= asciiSeven) { // "(-)0[0-7]" : octal - must parse as decimal return true } if c == asciiPeriod { return false // "(-)0." : clearly a float } } // At this point, it doesn't realy matter what comes next. We'll call it a floating // point value since even if it was a decimal, it might be too large for a UInt64 but // would still be valid for a float/double field. return false } private mutating func skipUnknownPrimativeFieldValue(canBeList: Bool = true) throws { // This is modeled after the C++ text_format.cpp `SkipFieldValue()` let c = p[0] if c == asciiSingleQuote || c == asciiDoubleQuote { // Note: the field could be 'bytes', so we can't parse that as a string // as it might fail. let _ = try nextBytesValue() return } if skipOptionalBeginArray() { guard canBeList else { // Have encounted an array as an element in an array, that isn't legal. throw TextFormatDecodingError.malformedText } if skipOptionalEndArray() { return } while true { if p == end { throw TextFormatDecodingError.malformedText } let c = p[0] if c != asciiOpenAngleBracket && c != asciiOpenCurlyBracket { try skipUnknownPrimativeFieldValue(canBeList: false) } else { try skipUnknownMessageFieldValue() } if skipOptionalEndArray() { return } try skipRequiredComma() } } // NOTE: This will also cover "true", "false" for booleans, "nan"/"inf" for floats. if let _ = try nextOptionalEnumName() { skipWhitespace() // `nextOptionalEnumName()` doesn't skip trailing whitespace return } // NOTE: We don't need to special case "-nan"/"-inf", as they won't be forced // to parse as decimal, and `nextDouble()` already supports them. if mustParseNumberAsDecimal() { if c == asciiMinus { let _ = try nextSInt() } else { let _ = try nextUInt() } } else { let _ = try nextDouble() } } private mutating func skipUnknownMessageFieldValue() throws { // This is modeled after the C++ text_format.cpp `SkipFieldMessage()` let terminator = try skipObjectStart() while !skipOptionalObjectEnd(terminator) { if p == end { throw TextFormatDecodingError.malformedText } if let _ = try nextKey(allowExtensions: true) { // Got a valid field name or extension name ("[ext.name]") } else { throw TextFormatDecodingError.malformedText } try skipUnknownFieldValue() skipOptionalSeparator() } } private mutating func skipRequiredCharacter(_ c: UInt8) throws { skipWhitespace() if p != end && p[0] == c { p += 1 skipWhitespace() } else { throw TextFormatDecodingError.malformedText } } internal mutating func skipRequiredComma() throws { try skipRequiredCharacter(asciiComma) } internal mutating func skipRequiredColon() throws { try skipRequiredCharacter(asciiColon) } private mutating func skipOptionalCharacter(_ c: UInt8) -> Bool { if p != end && p[0] == c { p += 1 skipWhitespace() return true } return false } internal mutating func skipOptionalColon() -> Bool { skipOptionalCharacter(asciiColon) } internal mutating func skipOptionalEndArray() -> Bool { skipOptionalCharacter(asciiCloseSquareBracket) } internal mutating func skipOptionalBeginArray() -> Bool { skipOptionalCharacter(asciiOpenSquareBracket) } internal mutating func skipOptionalObjectEnd(_ c: UInt8) -> Bool { let result = skipOptionalCharacter(c) if result { decrementRecursionDepth() } return result } internal mutating func skipOptionalSeparator() { if p != end { let c = p[0] if c == asciiComma || c == asciiSemicolon { // comma or semicolon p += 1 skipWhitespace() } } } /// Returns the character that should end this field. /// E.g., if object starts with "{", returns "}" internal mutating func skipObjectStart() throws -> UInt8 { try incrementRecursionDepth() if p != end { let c = p[0] p += 1 skipWhitespace() switch c { case asciiOpenCurlyBracket: // { return asciiCloseCurlyBracket // } case asciiOpenAngleBracket: // < return asciiCloseAngleBracket // > default: break } } throw TextFormatDecodingError.malformedText } }