| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469 |
- // Sources/SwiftProtobuf/TextFormatScanner.swift - Text format decoding
- //
- // Copyright (c) 2014 - 2019 Apple Inc. and the project authors
- // Licensed under Apache License v2.0 with Runtime Library Exception
- //
- // See LICENSE.txt for license information:
- // https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
- //
- // -----------------------------------------------------------------------------
- ///
- /// Test format decoding engine.
- ///
- // -----------------------------------------------------------------------------
- import Foundation
- private let asciiBell = UInt8(7)
- private let asciiBackspace = UInt8(8)
- private let asciiTab = UInt8(9)
- private let asciiNewLine = UInt8(10)
- private let asciiVerticalTab = UInt8(11)
- private let asciiFormFeed = UInt8(12)
- private let asciiCarriageReturn = UInt8(13)
- private let asciiZero = UInt8(ascii: "0")
- private let asciiOne = UInt8(ascii: "1")
- private let asciiThree = UInt8(ascii: "3")
- private let asciiSeven = UInt8(ascii: "7")
- private let asciiNine = UInt8(ascii: "9")
- private let asciiColon = UInt8(ascii: ":")
- private let asciiPeriod = UInt8(ascii: ".")
- private let asciiPlus = UInt8(ascii: "+")
- private let asciiComma = UInt8(ascii: ",")
- private let asciiSemicolon = UInt8(ascii: ";")
- private let asciiDoubleQuote = UInt8(ascii: "\"")
- private let asciiSingleQuote = UInt8(ascii: "\'")
- private let asciiBackslash = UInt8(ascii: "\\")
- private let asciiForwardSlash = UInt8(ascii: "/")
- private let asciiHash = UInt8(ascii: "#")
- private let asciiUnderscore = UInt8(ascii: "_")
- private let asciiQuestionMark = UInt8(ascii: "?")
- private let asciiSpace = UInt8(ascii: " ")
- private let asciiOpenSquareBracket = UInt8(ascii: "[")
- private let asciiCloseSquareBracket = UInt8(ascii: "]")
- private let asciiOpenCurlyBracket = UInt8(ascii: "{")
- private let asciiCloseCurlyBracket = UInt8(ascii: "}")
- private let asciiOpenAngleBracket = UInt8(ascii: "<")
- private let asciiCloseAngleBracket = UInt8(ascii: ">")
- private let asciiMinus = UInt8(ascii: "-")
- private let asciiLowerA = UInt8(ascii: "a")
- private let asciiUpperA = UInt8(ascii: "A")
- private let asciiLowerB = UInt8(ascii: "b")
- private let asciiLowerE = UInt8(ascii: "e")
- private let asciiUpperE = UInt8(ascii: "E")
- private let asciiLowerF = UInt8(ascii: "f")
- private let asciiUpperF = UInt8(ascii: "F")
- private let asciiLowerI = UInt8(ascii: "i")
- private let asciiLowerL = UInt8(ascii: "l")
- private let asciiLowerN = UInt8(ascii: "n")
- private let asciiLowerR = UInt8(ascii: "r")
- private let asciiLowerS = UInt8(ascii: "s")
- private let asciiLowerT = UInt8(ascii: "t")
- private let asciiUpperT = UInt8(ascii: "T")
- private let asciiLowerU = UInt8(ascii: "u")
- private let asciiUpperU = UInt8(ascii: "U")
- private let asciiLowerV = UInt8(ascii: "v")
- private let asciiLowerX = UInt8(ascii: "x")
- private let asciiLowerY = UInt8(ascii: "y")
- private let asciiLowerZ = UInt8(ascii: "z")
- private let asciiUpperZ = UInt8(ascii: "Z")
- // https://protobuf.dev/programming-guides/proto2/#assigning
- // Fields can be between 1 and 536,870,911. So we can stop parsing
- // a raw number if we go over this (it also avoid rollover).
- private let maxFieldNumLength: Int = 9
- private func fromHexDigit(_ c: UInt8) -> UInt8? {
- if c >= asciiZero && c <= asciiNine {
- return c - asciiZero
- }
- if c >= asciiUpperA && c <= asciiUpperF {
- return c - asciiUpperA + UInt8(10)
- }
- if c >= asciiLowerA && c <= asciiLowerF {
- return c - asciiLowerA + UInt8(10)
- }
- return nil
- }
- private func uint32FromHexDigit(_ c: UInt8) -> UInt32? {
- guard let u8 = fromHexDigit(c) else {
- return nil
- }
- return UInt32(u8)
- }
- // Protobuf Text encoding assumes that you're working directly
- // in UTF-8. So this implementation converts the string to UTF8,
- // then decodes it into a sequence of bytes, then converts
- // it back into a string.
- private func decodeString(_ s: String) -> String? {
- // Helper to read 4 hex digits as a UInt32
- func read4HexDigits(_ i: inout String.UTF8View.Iterator) -> UInt32? {
- if let digit1 = i.next(),
- let d1 = uint32FromHexDigit(digit1),
- let digit2 = i.next(),
- let d2 = uint32FromHexDigit(digit2),
- let digit3 = i.next(),
- let d3 = uint32FromHexDigit(digit3),
- let digit4 = i.next(),
- let d4 = uint32FromHexDigit(digit4)
- {
- return (d1 << 12) + (d2 << 8) + (d3 << 4) + d4
- }
- return nil
- }
- var out = [UInt8]()
- var bytes = s.utf8.makeIterator()
- while let byte = bytes.next() {
- switch byte {
- case asciiBackslash: // backslash
- if let escaped = bytes.next() {
- switch escaped {
- case asciiZero...asciiSeven: // 0...7
- // C standard allows 1, 2, or 3 octal digits.
- let savedPosition = bytes
- let digit1 = escaped
- let digit1Value = digit1 - asciiZero
- if let digit2 = bytes.next(),
- digit2 >= asciiZero && digit2 <= asciiSeven
- {
- let digit2Value = digit2 - asciiZero
- let innerSavedPosition = bytes
- if let digit3 = bytes.next(),
- digit3 >= asciiZero && digit3 <= asciiSeven
- {
- let digit3Value = digit3 - asciiZero
- // The max octal digit is actually \377, but looking at the C++
- // protobuf code in strutil.cc:UnescapeCEscapeSequences(), it
- // decodes with rollover, so just duplicate that behavior for
- // consistency between languages.
- let n = digit1Value &* 64 &+ digit2Value &* 8 &+ digit3Value
- out.append(n)
- } else {
- let n = digit1Value * 8 + digit2Value
- out.append(n)
- bytes = innerSavedPosition
- }
- } else {
- let n = digit1Value
- out.append(n)
- bytes = savedPosition
- }
- case asciiLowerU, asciiUpperU: // "u"
- // \u - 4 hex digits, \U 8 hex digits:
- guard let first = read4HexDigits(&bytes) else { return nil }
- var codePoint = first
- if escaped == asciiUpperU {
- guard let second = read4HexDigits(&bytes) else { return nil }
- codePoint = (codePoint << 16) + second
- }
- switch codePoint {
- case 0...0x7f:
- // 1 byte encoding
- out.append(UInt8(truncatingIfNeeded: codePoint))
- case 0x80...0x7ff:
- // 2 byte encoding
- out.append(0xC0 + UInt8(truncatingIfNeeded: codePoint >> 6))
- out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F))
- case 0x800...0xffff:
- // 3 byte encoding
- out.append(0xE0 + UInt8(truncatingIfNeeded: codePoint >> 12))
- out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F))
- out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F))
- case 0x10000...0x10FFFF:
- // 4 byte encoding
- out.append(0xF0 + UInt8(truncatingIfNeeded: codePoint >> 18))
- out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 12) & 0x3F))
- out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F))
- out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F))
- default:
- return nil
- }
- case asciiLowerX: // "x"
- // Unlike C/C++, protobuf only allows 1 or 2 digits here:
- if let byte = bytes.next(), let digit = fromHexDigit(byte) {
- var n = digit
- let savedPosition = bytes
- if let byte = bytes.next(), let digit = fromHexDigit(byte) {
- n = n &* 16 + digit
- } else {
- // No second digit; reset the iterator
- bytes = savedPosition
- }
- out.append(n)
- } else {
- return nil // Hex escape must have at least 1 digit
- }
- case asciiLowerA: // \a
- out.append(asciiBell)
- case asciiLowerB: // \b
- out.append(asciiBackspace)
- case asciiLowerF: // \f
- out.append(asciiFormFeed)
- case asciiLowerN: // \n
- out.append(asciiNewLine)
- case asciiLowerR: // \r
- out.append(asciiCarriageReturn)
- case asciiLowerT: // \t
- out.append(asciiTab)
- case asciiLowerV: // \v
- out.append(asciiVerticalTab)
- case asciiDoubleQuote,
- asciiSingleQuote,
- asciiQuestionMark,
- asciiBackslash: // " ' ? \
- out.append(escaped)
- default:
- return nil // Unrecognized escape
- }
- } else {
- return nil // Input ends with backslash
- }
- default:
- out.append(byte)
- }
- }
- // There has got to be an easier way to convert a [UInt8] into a String.
- return out.withUnsafeBufferPointer { ptr in
- if let addr = ptr.baseAddress {
- return utf8ToString(bytes: addr, count: ptr.count)
- } else {
- return String()
- }
- }
- }
- ///
- /// TextFormatScanner has no public members.
- ///
- internal struct TextFormatScanner {
- internal let extensions: (any ExtensionMap)?
- private var p: UnsafeRawPointer
- private let end: UnsafeRawPointer
- private let doubleParser = DoubleParser()
- internal let options: TextFormatDecodingOptions
- internal var recursionBudget: Int
- internal var complete: Bool { p == end }
- internal init(
- utf8Pointer: UnsafeRawPointer,
- count: Int,
- options: TextFormatDecodingOptions,
- extensions: (any ExtensionMap)? = nil
- ) {
- p = utf8Pointer
- end = p + count
- self.extensions = extensions
- self.options = options
- // Since the root message doesn't start with a `skipObjectStart`, the
- // budget starts with one less depth to cover that top message.
- recursionBudget = options.messageDepthLimit - 1
- skipWhitespace()
- }
- private mutating func incrementRecursionDepth() throws {
- recursionBudget -= 1
- if recursionBudget < 0 {
- throw TextFormatDecodingError.messageDepthLimit
- }
- }
- private mutating func decrementRecursionDepth() {
- recursionBudget += 1
- // This should never happen, if it does, something is probably
- // corrupting memory, and simply throwing doesn't make much sense.
- if recursionBudget > options.messageDepthLimit {
- fatalError("Somehow TextFormatDecoding unwound more objects than it started")
- }
- }
- /// Skip whitespace
- private mutating func skipWhitespace() {
- while p != end {
- let u = p[0]
- switch u {
- case asciiSpace,
- asciiTab,
- asciiNewLine,
- asciiCarriageReturn: // space, tab, NL, CR
- p += 1
- case asciiHash: // # comment
- p += 1
- while p != end {
- // Skip until end of line
- let c = p[0]
- p += 1
- if c == asciiNewLine || c == asciiCarriageReturn {
- break
- }
- }
- default:
- return
- }
- }
- }
- /// Return a buffer containing the raw UTF8 for an identifier.
- /// Assumes that you already know the current byte is a valid
- /// start of identifier.
- private mutating func parseUTF8Identifier() -> UnsafeRawBufferPointer {
- let start = p
- loop: while p != end {
- let c = p[0]
- switch c {
- case asciiLowerA...asciiLowerZ,
- asciiUpperA...asciiUpperZ,
- asciiZero...asciiNine,
- asciiUnderscore:
- p += 1
- default:
- break loop
- }
- }
- let s = UnsafeRawBufferPointer(start: start, count: p - start)
- skipWhitespace()
- return s
- }
- /// Return a String containing the next identifier.
- private mutating func parseIdentifier() -> String {
- let buff = parseUTF8Identifier()
- let s = utf8ToString(bytes: buff.baseAddress!, count: buff.count)
- // Force-unwrap is OK: we never have invalid UTF8 at this point.
- return s!
- }
- /// Scan a string that encodes a byte field, return a count of
- /// the number of bytes that should be decoded from it
- private mutating func validateAndCountBytesFromString(terminator: UInt8, sawBackslash: inout Bool) throws -> Int {
- var count = 0
- let start = p
- sawBackslash = false
- while p != end {
- let byte = p[0]
- p += 1
- if byte == terminator {
- p = start
- return count
- }
- switch byte {
- case asciiNewLine, asciiCarriageReturn:
- // Can't have a newline in the middle of a bytes string.
- throw TextFormatDecodingError.malformedText
- case asciiBackslash: // "\\"
- sawBackslash = true
- if p != end {
- let escaped = p[0]
- p += 1
- switch escaped {
- case asciiZero...asciiSeven: // '0'...'7'
- // C standard allows 1, 2, or 3 octal digits.
- if p != end, p[0] >= asciiZero, p[0] <= asciiSeven {
- p += 1
- if p != end, p[0] >= asciiZero, p[0] <= asciiSeven {
- if escaped > asciiThree {
- // Out of range octal: three digits and first digit is greater than 3
- throw TextFormatDecodingError.malformedText
- }
- p += 1
- }
- }
- count += 1
- case asciiLowerU, asciiUpperU: // 'u' or 'U' unicode escape
- let numDigits = (escaped == asciiLowerU) ? 4 : 8
- guard (end - p) >= numDigits else {
- throw TextFormatDecodingError.malformedText // unicode escape must 4/8 digits
- }
- var codePoint: UInt32 = 0
- for i in 0..<numDigits {
- if let digit = uint32FromHexDigit(p[i]) {
- codePoint = (codePoint << 4) + digit
- } else {
- throw TextFormatDecodingError.malformedText // wasn't a hex digit
- }
- }
- p += numDigits
- switch codePoint {
- case 0...0x7f:
- // 1 byte encoding
- count += 1
- case 0x80...0x7ff:
- // 2 byte encoding
- count += 2
- case 0xD800...0xDFFF:
- // Surrogate pair (low or high), shouldn't get a unicode literal of those.
- throw TextFormatDecodingError.malformedText
- case 0x800...0xffff:
- // 3 byte encoding
- count += 3
- case 0x10000...0x10FFFF:
- // 4 byte encoding
- count += 4
- default:
- throw TextFormatDecodingError.malformedText // Isn't a valid unicode character
- }
- case asciiLowerX: // 'x' hexadecimal escape
- if p != end && fromHexDigit(p[0]) != nil {
- p += 1
- if p != end && fromHexDigit(p[0]) != nil {
- p += 1
- }
- } else {
- throw TextFormatDecodingError.malformedText // Hex escape must have at least 1 digit
- }
- count += 1
- case asciiLowerA, // \a ("alert")
- asciiLowerB, // \b
- asciiLowerF, // \f
- asciiLowerN, // \n
- asciiLowerR, // \r
- asciiLowerT, // \t
- asciiLowerV, // \v
- asciiSingleQuote, // \'
- asciiDoubleQuote, // \"
- asciiQuestionMark, // \?
- asciiBackslash: // \\
- count += 1
- default:
- throw TextFormatDecodingError.malformedText // Unrecognized escape
- }
- }
- default:
- count += 1
- }
- }
- throw TextFormatDecodingError.malformedText
- }
- /// Protobuf Text format uses C ASCII conventions for
- /// encoding byte sequences, including the use of octal
- /// and hexadecimal escapes.
- ///
- /// Assumes that validateAndCountBytesFromString() has already
- /// verified the correctness. So we get to avoid error checks here.
- private mutating func parseBytesFromString(terminator: UInt8, into data: inout Data) {
- data.withUnsafeMutableBytes {
- (body: UnsafeMutableRawBufferPointer) in
- if var out = body.baseAddress, body.count > 0 {
- while p[0] != terminator {
- let byte = p[0]
- p += 1
- switch byte {
- case asciiBackslash: // "\\"
- let escaped = p[0]
- p += 1
- switch escaped {
- case asciiZero...asciiSeven: // '0'...'7'
- // C standard allows 1, 2, or 3 octal digits.
- let digit1Value = escaped - asciiZero
- let digit2 = p[0]
- if digit2 >= asciiZero, digit2 <= asciiSeven {
- p += 1
- let digit2Value = digit2 - asciiZero
- let digit3 = p[0]
- if digit3 >= asciiZero, digit3 <= asciiSeven {
- p += 1
- let digit3Value = digit3 - asciiZero
- out[0] = digit1Value &* 64 + digit2Value * 8 + digit3Value
- out += 1
- } else {
- out[0] = digit1Value * 8 + digit2Value
- out += 1
- }
- } else {
- out[0] = digit1Value
- out += 1
- }
- case asciiLowerU, asciiUpperU:
- let numDigits = (escaped == asciiLowerU) ? 4 : 8
- var codePoint: UInt32 = 0
- for i in 0..<numDigits {
- codePoint = (codePoint << 4) + uint32FromHexDigit(p[i])!
- }
- p += numDigits
- switch codePoint {
- case 0...0x7f:
- // 1 byte encoding
- out[0] = UInt8(truncatingIfNeeded: codePoint)
- out += 1
- case 0x80...0x7ff:
- // 2 byte encoding
- out[0] = 0xC0 + UInt8(truncatingIfNeeded: codePoint >> 6)
- out[1] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)
- out += 2
- case 0x800...0xffff:
- // 3 byte encoding
- out[0] = 0xE0 + UInt8(truncatingIfNeeded: codePoint >> 12)
- out[1] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F)
- out[2] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)
- out += 3
- case 0x10000...0x10FFFF:
- // 4 byte encoding
- out[0] = 0xF0 + UInt8(truncatingIfNeeded: codePoint >> 18)
- out[1] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 12) & 0x3F)
- out[2] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F)
- out[3] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)
- out += 4
- default:
- preconditionFailure() // Already validated, can't happen
- }
- case asciiLowerX: // 'x' hexadecimal escape
- // We already validated, so we know there's at least one digit:
- var n = fromHexDigit(p[0])!
- p += 1
- if let digit = fromHexDigit(p[0]) {
- n = n &* 16 &+ digit
- p += 1
- }
- out[0] = n
- out += 1
- case asciiLowerA: // \a ("alert")
- out[0] = asciiBell
- out += 1
- case asciiLowerB: // \b
- out[0] = asciiBackspace
- out += 1
- case asciiLowerF: // \f
- out[0] = asciiFormFeed
- out += 1
- case asciiLowerN: // \n
- out[0] = asciiNewLine
- out += 1
- case asciiLowerR: // \r
- out[0] = asciiCarriageReturn
- out += 1
- case asciiLowerT: // \t
- out[0] = asciiTab
- out += 1
- case asciiLowerV: // \v
- out[0] = asciiVerticalTab
- out += 1
- default:
- out[0] = escaped
- out += 1
- }
- default:
- out[0] = byte
- out += 1
- }
- }
- p += 1 // Consume terminator
- }
- }
- }
- /// Assumes the leading quote has already been consumed
- private mutating func parseStringSegment(terminator: UInt8) -> String? {
- let start = p
- var sawBackslash = false
- while p != end {
- let c = p[0]
- if c == terminator {
- let s = utf8ToString(bytes: start, count: p - start)
- p += 1
- skipWhitespace()
- if let s = s, sawBackslash {
- return decodeString(s)
- } else {
- return s
- }
- }
- p += 1
- if c == asciiBackslash { // \
- if p == end {
- return nil
- }
- sawBackslash = true
- p += 1
- }
- if c == asciiNewLine || c == asciiCarriageReturn {
- // Can't have a newline in the middle of a raw string.
- return nil
- }
- }
- return nil // Unterminated quoted string
- }
- internal mutating func nextUInt() throws -> UInt64 {
- if p == end {
- throw TextFormatDecodingError.malformedNumber
- }
- let c = p[0]
- p += 1
- if c == asciiZero { // leading '0' precedes octal or hex
- if p == end {
- // The TextFormat ended with a field value of zero.
- return 0
- }
- if p[0] == asciiLowerX { // 'x' => hex
- p += 1
- var n: UInt64 = 0
- while p != end {
- let digit = p[0]
- let val: UInt64
- switch digit {
- case asciiZero...asciiNine: // 0...9
- val = UInt64(digit - asciiZero)
- case asciiLowerA...asciiLowerF: // a...f
- val = UInt64(digit - asciiLowerA + 10)
- case asciiUpperA...asciiUpperF:
- val = UInt64(digit - asciiUpperA + 10)
- default:
- skipWhitespace()
- return n
- }
- if n > UInt64.max / 16 {
- throw TextFormatDecodingError.malformedNumber
- }
- p += 1
- n = n * 16 + val
- }
- skipWhitespace()
- return n
- } else { // octal
- var n: UInt64 = 0
- while p != end {
- let digit = p[0]
- if digit < asciiZero || digit > asciiSeven {
- skipWhitespace()
- return n // not octal digit
- }
- let val = UInt64(digit - asciiZero)
- if n > UInt64.max / 8 {
- throw TextFormatDecodingError.malformedNumber
- }
- p += 1
- n = n * 8 + val
- }
- skipWhitespace()
- return n
- }
- } else if c > asciiZero && c <= asciiNine { // 1...9
- var n = UInt64(c - asciiZero)
- while p != end {
- let digit = p[0]
- if digit < asciiZero || digit > asciiNine {
- skipWhitespace()
- return n // not a digit
- }
- let val = UInt64(digit - asciiZero)
- if n > UInt64.max / 10 || n * 10 > UInt64.max - val {
- throw TextFormatDecodingError.malformedNumber
- }
- p += 1
- n = n * 10 + val
- }
- skipWhitespace()
- return n
- }
- throw TextFormatDecodingError.malformedNumber
- }
- internal mutating func nextSInt() throws -> Int64 {
- if p == end {
- throw TextFormatDecodingError.malformedNumber
- }
- let c = p[0]
- if c == asciiMinus { // -
- p += 1
- if p == end {
- throw TextFormatDecodingError.malformedNumber
- }
- // character after '-' must be digit
- let digit = p[0]
- if digit < asciiZero || digit > asciiNine {
- throw TextFormatDecodingError.malformedNumber
- }
- let n = try nextUInt()
- let limit: UInt64 = 0x8000_0000_0000_0000 // -Int64.min
- if n >= limit {
- if n > limit {
- // Too large negative number
- throw TextFormatDecodingError.malformedNumber
- } else {
- return Int64.min // Special case for Int64.min
- }
- }
- return -Int64(bitPattern: n)
- } else {
- let n = try nextUInt()
- if n > UInt64(bitPattern: Int64.max) {
- throw TextFormatDecodingError.malformedNumber
- }
- return Int64(bitPattern: n)
- }
- }
- internal mutating func nextStringValue() throws -> String {
- var result: String
- skipWhitespace()
- if p == end {
- throw TextFormatDecodingError.malformedText
- }
- let c = p[0]
- if c != asciiSingleQuote && c != asciiDoubleQuote {
- throw TextFormatDecodingError.malformedText
- }
- p += 1
- if let s = parseStringSegment(terminator: c) {
- result = s
- } else {
- throw TextFormatDecodingError.malformedText
- }
- while true {
- if p == end {
- return result
- }
- let c = p[0]
- if c != asciiSingleQuote && c != asciiDoubleQuote {
- return result
- }
- p += 1
- if let s = parseStringSegment(terminator: c) {
- result.append(s)
- } else {
- throw TextFormatDecodingError.malformedText
- }
- }
- }
- /// Protobuf Text Format allows a single bytes field to
- /// contain multiple quoted strings. The values
- /// are separately decoded and then concatenated:
- /// field1: "bytes" 'more bytes'
- /// "and even more bytes"
- internal mutating func nextBytesValue() throws -> Data {
- // Get the first string's contents
- var result: Data
- skipWhitespace()
- if p == end {
- throw TextFormatDecodingError.malformedText
- }
- let c = p[0]
- if c != asciiSingleQuote && c != asciiDoubleQuote {
- throw TextFormatDecodingError.malformedText
- }
- p += 1
- var sawBackslash = false
- let n = try validateAndCountBytesFromString(terminator: c, sawBackslash: &sawBackslash)
- if sawBackslash {
- result = Data(count: n)
- parseBytesFromString(terminator: c, into: &result)
- } else {
- result = Data(bytes: p, count: n)
- p += n + 1 // Skip string body + close quote
- }
- // If there are more strings, decode them
- // and append to the result:
- while true {
- skipWhitespace()
- if p == end {
- return result
- }
- let c = p[0]
- if c != asciiSingleQuote && c != asciiDoubleQuote {
- return result
- }
- p += 1
- var sawBackslash = false
- let n = try validateAndCountBytesFromString(terminator: c, sawBackslash: &sawBackslash)
- if sawBackslash {
- var b = Data(count: n)
- parseBytesFromString(terminator: c, into: &b)
- result.append(b)
- } else {
- result.append(Data(bytes: p, count: n))
- p += n + 1 // Skip string body + close quote
- }
- }
- }
- // Tries to identify a sequence of UTF8 characters
- // that represent a numeric floating-point value.
- private mutating func tryParseFloatString() -> Double? {
- guard p != end else { return nil }
- let start = p
- var c = p[0]
- if c == asciiMinus {
- p += 1
- guard p != end else {
- p = start
- return nil
- }
- c = p[0]
- }
- switch c {
- case asciiZero: // '0' as first character is not allowed followed by digit
- p += 1
- guard p != end else { break }
- c = p[0]
- if c >= asciiZero && c <= asciiNine {
- p = start
- return nil
- }
- case asciiPeriod: // '.' as first char only if followed by digit
- p += 1
- guard p != end else {
- p = start
- return nil
- }
- c = p[0]
- if c < asciiZero || c > asciiNine {
- p = start
- return nil
- }
- case asciiOne...asciiNine:
- break
- default:
- p = start
- return nil
- }
- loop: while p != end {
- let c = p[0]
- switch c {
- case asciiZero...asciiNine,
- asciiPeriod,
- asciiPlus,
- asciiMinus,
- asciiLowerE,
- asciiUpperE: // 0...9, ., +, -, e, E
- p += 1
- case asciiLowerF, asciiUpperF: // f or F
- let d = doubleParser.utf8ToDouble(
- bytes: UnsafeRawBufferPointer(
- start: start,
- count: p - start
- ),
- finiteOnly: false
- )
- // Just skip the 'f'/'F'
- p += 1
- skipWhitespace()
- return d
- default:
- break loop
- }
- }
- let d = doubleParser.utf8ToDouble(
- bytes: UnsafeRawBufferPointer(
- start: start,
- count: p - start
- ),
- finiteOnly: false
- )
- skipWhitespace()
- return d
- }
- // Skip specified characters if they all match
- private mutating func skipOptionalCharacters(bytes: [UInt8]) {
- let start = p
- for b in bytes {
- if p == end || p[0] != b {
- p = start
- return
- }
- p += 1
- }
- }
- // Skip following keyword if it matches (case-insensitively)
- // the given keyword (specified as a series of bytes).
- private mutating func skipOptionalKeyword(bytes: [UInt8]) -> Bool {
- let start = p
- for b in bytes {
- if p == end {
- p = start
- return false
- }
- var c = p[0]
- if c >= asciiUpperA && c <= asciiUpperZ {
- // Convert to lower case
- // (Protobuf text keywords are case insensitive)
- c += asciiLowerA - asciiUpperA
- }
- if c != b {
- p = start
- return false
- }
- p += 1
- }
- if p == end {
- return true
- }
- let c = p[0]
- if (c >= asciiUpperA && c <= asciiUpperZ)
- || (c >= asciiLowerA && c <= asciiLowerZ)
- {
- p = start
- return false
- }
- skipWhitespace()
- return true
- }
- // If the next token is the identifier "nan", return true.
- private mutating func skipOptionalNaN() -> Bool {
- let start = p
- // "-nan" doesn't mean anything, but upstream handles it, so skip
- // over any leading minus when checking for "nan".
- if p != end && p[0] == asciiMinus {
- p += 1
- }
- if skipOptionalKeyword(bytes: [asciiLowerN, asciiLowerA, asciiLowerN]) {
- return true
- } else {
- p = start // It wasn't "nan", rewind incase we skipped a minus sign.
- return false
- }
- }
- // If the next token is a recognized spelling of "infinity",
- // return Float.infinity or -Float.infinity
- private mutating func skipOptionalInfinity() -> Float? {
- if p == end {
- return nil
- }
- let start = p
- let c = p[0]
- let negated: Bool
- if c == asciiMinus {
- negated = true
- p += 1
- } else {
- negated = false
- }
- let inf = [asciiLowerI, asciiLowerN, asciiLowerF]
- let infinity = [
- asciiLowerI, asciiLowerN, asciiLowerF, asciiLowerI,
- asciiLowerN, asciiLowerI, asciiLowerT, asciiLowerY,
- ]
- if skipOptionalKeyword(bytes: inf)
- || skipOptionalKeyword(bytes: infinity)
- {
- return negated ? -Float.infinity : Float.infinity
- }
- p = start
- return nil
- }
- internal mutating func nextFloat() throws -> Float {
- if let d = tryParseFloatString() {
- return Float(d)
- }
- if skipOptionalNaN() {
- return Float.nan
- }
- if let inf = skipOptionalInfinity() {
- return inf
- }
- throw TextFormatDecodingError.malformedNumber
- }
- internal mutating func nextDouble() throws -> Double {
- if let d = tryParseFloatString() {
- return d
- }
- if skipOptionalNaN() {
- return Double.nan
- }
- if let inf = skipOptionalInfinity() {
- return Double(inf)
- }
- throw TextFormatDecodingError.malformedNumber
- }
- internal mutating func nextBool() throws -> Bool {
- skipWhitespace()
- if p == end {
- throw TextFormatDecodingError.malformedText
- }
- let c = p[0]
- p += 1
- let result: Bool
- switch c {
- case asciiZero:
- result = false
- case asciiOne:
- result = true
- case asciiLowerF, asciiUpperF:
- if p != end {
- let alse = [asciiLowerA, asciiLowerL, asciiLowerS, asciiLowerE]
- skipOptionalCharacters(bytes: alse)
- }
- result = false
- case asciiLowerT, asciiUpperT:
- if p != end {
- let rue = [asciiLowerR, asciiLowerU, asciiLowerE]
- skipOptionalCharacters(bytes: rue)
- }
- result = true
- default:
- throw TextFormatDecodingError.malformedText
- }
- if p == end {
- return result
- }
- switch p[0] {
- case asciiSpace,
- asciiTab,
- asciiNewLine,
- asciiCarriageReturn,
- asciiHash,
- asciiComma,
- asciiSemicolon,
- asciiCloseSquareBracket,
- asciiCloseCurlyBracket,
- asciiCloseAngleBracket:
- skipWhitespace()
- return result
- default:
- throw TextFormatDecodingError.malformedText
- }
- }
- internal mutating func nextOptionalEnumName() throws -> UnsafeRawBufferPointer? {
- skipWhitespace()
- if p == end {
- throw TextFormatDecodingError.malformedText
- }
- switch p[0] {
- case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ:
- return parseUTF8Identifier()
- default:
- return nil
- }
- }
- /// Any URLs are syntactically (almost) identical to extension
- /// keys, so we share the code for those.
- internal mutating func nextOptionalAnyURL() throws -> String? {
- try nextOptionalExtensionKey()
- }
- /// Returns next extension key or nil if end-of-input or
- /// if next token is not an extension key.
- ///
- /// Throws an error if the next token starts with '[' but
- /// cannot be parsed as an extension key.
- ///
- /// Note: This accepts / characters to support Any URL parsing.
- /// Technically, Any URLs can contain / characters and extension
- /// key names cannot. But in practice, accepting / chracters for
- /// extension keys works fine, since the result just gets rejected
- /// when the key is looked up.
- internal mutating func nextOptionalExtensionKey() throws -> String? {
- skipWhitespace()
- if p == end {
- return nil
- }
- guard p[0] == asciiOpenSquareBracket else { // [
- return nil
- }
- return try parseExtensionKey()
- }
- /// Parse the rest of an [extension_field_name] in the input, assuming the
- /// initial "[" character has already been read (and is in the prefix)
- /// This is also used for AnyURL, so we include "/".
- private mutating func parseExtensionKey() throws -> String {
- assert(p[0] == asciiOpenSquareBracket)
- p += 1
- if p == end {
- throw TextFormatDecodingError.malformedText
- }
- let start = p
- switch p[0] {
- case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ:
- p += 1
- default:
- throw TextFormatDecodingError.malformedText
- }
- loop: while p != end {
- switch p[0] {
- case asciiLowerA...asciiLowerZ,
- asciiUpperA...asciiUpperZ,
- asciiZero...asciiNine,
- asciiUnderscore,
- asciiPeriod,
- asciiForwardSlash:
- p += 1
- case asciiCloseSquareBracket: // ]
- break loop
- default:
- throw TextFormatDecodingError.malformedText
- }
- }
- if p == end || p[0] != asciiCloseSquareBracket {
- throw TextFormatDecodingError.malformedText
- }
- guard let extensionName = utf8ToString(bytes: start, count: p - start) else {
- throw TextFormatDecodingError.malformedText
- }
- p += 1 // Skip ]
- skipWhitespace()
- return extensionName
- }
- /// Returns text of next regular key or nil if end-of-input.
- internal mutating func nextKey(allowExtensions: Bool) throws -> String? {
- skipWhitespace()
- if p == end {
- return nil
- }
- let c = p[0]
- switch c {
- case asciiOpenSquareBracket: // [
- if allowExtensions {
- return "[\(try parseExtensionKey())]"
- }
- throw TextFormatDecodingError.unknownField
- case asciiLowerA...asciiLowerZ,
- asciiUpperA...asciiUpperZ: // a...z, A...Z
- return parseIdentifier()
- case asciiOne...asciiNine: // 1...9 (field numbers are 123, not 0123)
- let start = p
- p += 1
- while p != end {
- let c = p[0]
- if c < asciiZero || c > asciiNine {
- break
- }
- p += 1
- if p - start > maxFieldNumLength {
- throw TextFormatDecodingError.malformedText
- }
- }
- let buff = UnsafeRawBufferPointer(start: start, count: p - start)
- skipWhitespace()
- let s = utf8ToString(bytes: buff.baseAddress!, count: buff.count)
- // Safe, can't be invalid UTF-8 given the input.
- return s!
- default:
- throw TextFormatDecodingError.malformedText
- }
- }
- /// Parse a field name, look it up, and return the corresponding
- /// field number.
- ///
- /// returns nil at end-of-input
- ///
- /// Throws if field name cannot be parsed or if field name is
- /// unknown.
- ///
- /// This function accounts for as much as 2/3 of the total run
- /// time of the entire parse.
- internal mutating func nextFieldNumber(
- names: _NameMap,
- messageType: any Message.Type,
- terminator: UInt8?
- ) throws -> Int? {
- while true {
- skipWhitespace()
- if p == end {
- if terminator == nil {
- return nil
- } else {
- // Never got the terminator.
- throw TextFormatDecodingError.malformedText
- }
- }
- var isReserved = false
- let c = p[0]
- switch c {
- case asciiLowerA...asciiLowerZ,
- asciiUpperA...asciiUpperZ: // a...z, A...Z
- let key = parseUTF8Identifier()
- if let fieldNumber = names.number(forProtoName: key) {
- return fieldNumber
- }
- if !options.ignoreUnknownFields {
- if names.isReserved(name: key) {
- isReserved = true
- } else {
- throw TextFormatDecodingError.unknownField
- }
- }
- // Unknown field name or reserved, break and skip
- break
- case asciiOpenSquareBracket: // Start of an extension field
- let key = try parseExtensionKey()
- if let fieldNumber = extensions?.fieldNumberForProto(messageType: messageType, protoFieldName: key) {
- return fieldNumber
- }
- if !options.ignoreUnknownExtensionFields {
- throw TextFormatDecodingError.unknownField
- }
- // Unknown field name, break and skip
- break
- case asciiOne...asciiNine: // 1-9 (field numbers are 123, not 0123)
- let start = p
- var fieldNum = Int(c) - Int(asciiZero)
- p += 1
- while p != end {
- let c = p[0]
- if c >= asciiZero && c <= asciiNine {
- fieldNum = fieldNum &* 10 &+ (Int(c) - Int(asciiZero))
- } else {
- break
- }
- p += 1
- if p - start > maxFieldNumLength {
- throw TextFormatDecodingError.malformedText
- }
- }
- skipWhitespace()
- if names.names(for: fieldNum) != nil {
- return fieldNum
- }
- if !options.ignoreUnknownFields {
- // fieldNumber is range checked while parsing, so safe can truncate.
- if names.isReserved(number: Int32(truncatingIfNeeded: fieldNum)) {
- isReserved = true
- } else {
- throw TextFormatDecodingError.unknownField
- }
- }
- // Unknown field name or reserved, break and skip
- break
- default:
- if c == terminator {
- let _ = skipOptionalObjectEnd(c)
- return nil
- }
- throw TextFormatDecodingError.malformedText
- }
- assert(options.ignoreUnknownFields || options.ignoreUnknownExtensionFields || isReserved)
- try skipUnknownFieldValue()
- // Skip any separator before looping around to try for another field.
- skipOptionalSeparator()
- }
- }
- // Helper to skip past an unknown field value, when called `p` will be pointing
- // at the first character after the unknown field name.
- internal mutating func skipUnknownFieldValue() throws {
- // This is modeled after the C++ text_format.cpp `ConsumeField()`
- //
- // Guess the type of this field:
- // - If this field is not a message, there should be a ":" between the
- // field name and the field value and also the field value should not
- // start with "{" or "<" which indicates the beginning of a message body.
- // - If there is no ":" or there is a "{" or "<" after ":", this field has
- // to be a message or the input is ill-formed.
- skipWhitespace()
- if skipOptionalColon() {
- if p == end {
- // Nothing after the ':'?
- throw TextFormatDecodingError.malformedText
- }
- let c = p[0]
- if c != asciiOpenAngleBracket && c != asciiOpenCurlyBracket {
- try skipUnknownPrimativeFieldValue()
- } else {
- try skipUnknownMessageFieldValue()
- }
- } else {
- try skipUnknownMessageFieldValue()
- }
- }
- /// Helper to see if this could be the start of a hex or octal number so unknown field
- /// value parsing can decide how to parse/validate.
- private func mustParseNumberAsDecimal() -> Bool {
- // NOTE: If we run out of characters/can't tell; then just say it doesn't have
- // to be decimal, and let the other code error handle it.
- var scan = p
- var c = scan[0]
- // Floats or decimals can have leading '-'
- if c == asciiMinus {
- scan += 1
- if scan == end { return false }
- c = scan[0]
- }
- if c == asciiPeriod {
- return false // "(-)." : clearly a float
- }
- if c == asciiZero {
- scan += 1
- if scan == end { return true } // "(-)0[end]" : parse it as decimal
- c = scan[0]
- if c == asciiLowerX // "(-)0x" : hex - must parse as decimal
- || (c >= asciiZero && c <= asciiSeven)
- { // "(-)0[0-7]" : octal - must parse as decimal
- return true
- }
- if c == asciiPeriod {
- return false // "(-)0." : clearly a float
- }
- }
- // At this point, it doesn't realy matter what comes next. We'll call it a floating
- // point value since even if it was a decimal, it might be too large for a UInt64 but
- // would still be valid for a float/double field.
- return false
- }
- private mutating func skipUnknownPrimativeFieldValue(canBeList: Bool = true) throws {
- // This is modeled after the C++ text_format.cpp `SkipFieldValue()`
- let c = p[0]
- if c == asciiSingleQuote || c == asciiDoubleQuote {
- // Note: the field could be 'bytes', so we can't parse that as a string
- // as it might fail.
- let _ = try nextBytesValue()
- return
- }
- if skipOptionalBeginArray() {
- guard canBeList else {
- // Have encounted an array as an element in an array, that isn't legal.
- throw TextFormatDecodingError.malformedText
- }
- if skipOptionalEndArray() {
- return
- }
- while true {
- if p == end {
- throw TextFormatDecodingError.malformedText
- }
- let c = p[0]
- if c != asciiOpenAngleBracket && c != asciiOpenCurlyBracket {
- try skipUnknownPrimativeFieldValue(canBeList: false)
- } else {
- try skipUnknownMessageFieldValue()
- }
- if skipOptionalEndArray() {
- return
- }
- try skipRequiredComma()
- }
- }
- // NOTE: This will also cover "true", "false" for booleans, "nan"/"inf" for floats.
- if let _ = try nextOptionalEnumName() {
- skipWhitespace() // `nextOptionalEnumName()` doesn't skip trailing whitespace
- return
- }
- // NOTE: We don't need to special case "-nan"/"-inf", as they won't be forced
- // to parse as decimal, and `nextDouble()` already supports them.
- if mustParseNumberAsDecimal() {
- if c == asciiMinus {
- let _ = try nextSInt()
- } else {
- let _ = try nextUInt()
- }
- } else {
- let _ = try nextDouble()
- }
- }
- private mutating func skipUnknownMessageFieldValue() throws {
- // This is modeled after the C++ text_format.cpp `SkipFieldMessage()`
- let terminator = try skipObjectStart()
- while !skipOptionalObjectEnd(terminator) {
- if p == end {
- throw TextFormatDecodingError.malformedText
- }
- if let _ = try nextKey(allowExtensions: true) {
- // Got a valid field name or extension name ("[ext.name]")
- } else {
- throw TextFormatDecodingError.malformedText
- }
- try skipUnknownFieldValue()
- skipOptionalSeparator()
- }
- }
- private mutating func skipRequiredCharacter(_ c: UInt8) throws {
- skipWhitespace()
- if p != end && p[0] == c {
- p += 1
- skipWhitespace()
- } else {
- throw TextFormatDecodingError.malformedText
- }
- }
- internal mutating func skipRequiredComma() throws {
- try skipRequiredCharacter(asciiComma)
- }
- internal mutating func skipRequiredColon() throws {
- try skipRequiredCharacter(asciiColon)
- }
- private mutating func skipOptionalCharacter(_ c: UInt8) -> Bool {
- if p != end && p[0] == c {
- p += 1
- skipWhitespace()
- return true
- }
- return false
- }
- internal mutating func skipOptionalColon() -> Bool {
- skipOptionalCharacter(asciiColon)
- }
- internal mutating func skipOptionalEndArray() -> Bool {
- skipOptionalCharacter(asciiCloseSquareBracket)
- }
- internal mutating func skipOptionalBeginArray() -> Bool {
- skipOptionalCharacter(asciiOpenSquareBracket)
- }
- internal mutating func skipOptionalObjectEnd(_ c: UInt8) -> Bool {
- let result = skipOptionalCharacter(c)
- if result {
- decrementRecursionDepth()
- }
- return result
- }
- internal mutating func skipOptionalSeparator() {
- if p != end {
- let c = p[0]
- if c == asciiComma || c == asciiSemicolon { // comma or semicolon
- p += 1
- skipWhitespace()
- }
- }
- }
- /// Returns the character that should end this field.
- /// E.g., if object starts with "{", returns "}"
- internal mutating func skipObjectStart() throws -> UInt8 {
- try incrementRecursionDepth()
- if p != end {
- let c = p[0]
- p += 1
- skipWhitespace()
- switch c {
- case asciiOpenCurlyBracket: // {
- return asciiCloseCurlyBracket // }
- case asciiOpenAngleBracket: // <
- return asciiCloseAngleBracket // >
- default:
- break
- }
- }
- throw TextFormatDecodingError.malformedText
- }
- }
|