TextFormatScanner.swift 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469
  1. // Sources/SwiftProtobuf/TextFormatScanner.swift - Text format decoding
  2. //
  3. // Copyright (c) 2014 - 2019 Apple Inc. and the project authors
  4. // Licensed under Apache License v2.0 with Runtime Library Exception
  5. //
  6. // See LICENSE.txt for license information:
  7. // https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
  8. //
  9. // -----------------------------------------------------------------------------
  10. ///
  11. /// Test format decoding engine.
  12. ///
  13. // -----------------------------------------------------------------------------
  14. import Foundation
  15. private let asciiBell = UInt8(7)
  16. private let asciiBackspace = UInt8(8)
  17. private let asciiTab = UInt8(9)
  18. private let asciiNewLine = UInt8(10)
  19. private let asciiVerticalTab = UInt8(11)
  20. private let asciiFormFeed = UInt8(12)
  21. private let asciiCarriageReturn = UInt8(13)
  22. private let asciiZero = UInt8(ascii: "0")
  23. private let asciiOne = UInt8(ascii: "1")
  24. private let asciiThree = UInt8(ascii: "3")
  25. private let asciiSeven = UInt8(ascii: "7")
  26. private let asciiNine = UInt8(ascii: "9")
  27. private let asciiColon = UInt8(ascii: ":")
  28. private let asciiPeriod = UInt8(ascii: ".")
  29. private let asciiPlus = UInt8(ascii: "+")
  30. private let asciiComma = UInt8(ascii: ",")
  31. private let asciiSemicolon = UInt8(ascii: ";")
  32. private let asciiDoubleQuote = UInt8(ascii: "\"")
  33. private let asciiSingleQuote = UInt8(ascii: "\'")
  34. private let asciiBackslash = UInt8(ascii: "\\")
  35. private let asciiForwardSlash = UInt8(ascii: "/")
  36. private let asciiHash = UInt8(ascii: "#")
  37. private let asciiUnderscore = UInt8(ascii: "_")
  38. private let asciiQuestionMark = UInt8(ascii: "?")
  39. private let asciiSpace = UInt8(ascii: " ")
  40. private let asciiOpenSquareBracket = UInt8(ascii: "[")
  41. private let asciiCloseSquareBracket = UInt8(ascii: "]")
  42. private let asciiOpenCurlyBracket = UInt8(ascii: "{")
  43. private let asciiCloseCurlyBracket = UInt8(ascii: "}")
  44. private let asciiOpenAngleBracket = UInt8(ascii: "<")
  45. private let asciiCloseAngleBracket = UInt8(ascii: ">")
  46. private let asciiMinus = UInt8(ascii: "-")
  47. private let asciiLowerA = UInt8(ascii: "a")
  48. private let asciiUpperA = UInt8(ascii: "A")
  49. private let asciiLowerB = UInt8(ascii: "b")
  50. private let asciiLowerE = UInt8(ascii: "e")
  51. private let asciiUpperE = UInt8(ascii: "E")
  52. private let asciiLowerF = UInt8(ascii: "f")
  53. private let asciiUpperF = UInt8(ascii: "F")
  54. private let asciiLowerI = UInt8(ascii: "i")
  55. private let asciiLowerL = UInt8(ascii: "l")
  56. private let asciiLowerN = UInt8(ascii: "n")
  57. private let asciiLowerR = UInt8(ascii: "r")
  58. private let asciiLowerS = UInt8(ascii: "s")
  59. private let asciiLowerT = UInt8(ascii: "t")
  60. private let asciiUpperT = UInt8(ascii: "T")
  61. private let asciiLowerU = UInt8(ascii: "u")
  62. private let asciiUpperU = UInt8(ascii: "U")
  63. private let asciiLowerV = UInt8(ascii: "v")
  64. private let asciiLowerX = UInt8(ascii: "x")
  65. private let asciiLowerY = UInt8(ascii: "y")
  66. private let asciiLowerZ = UInt8(ascii: "z")
  67. private let asciiUpperZ = UInt8(ascii: "Z")
  68. // https://protobuf.dev/programming-guides/proto2/#assigning
  69. // Fields can be between 1 and 536,870,911. So we can stop parsing
  70. // a raw number if we go over this (it also avoid rollover).
  71. private let maxFieldNumLength: Int = 9
  72. private func fromHexDigit(_ c: UInt8) -> UInt8? {
  73. if c >= asciiZero && c <= asciiNine {
  74. return c - asciiZero
  75. }
  76. if c >= asciiUpperA && c <= asciiUpperF {
  77. return c - asciiUpperA + UInt8(10)
  78. }
  79. if c >= asciiLowerA && c <= asciiLowerF {
  80. return c - asciiLowerA + UInt8(10)
  81. }
  82. return nil
  83. }
  84. private func uint32FromHexDigit(_ c: UInt8) -> UInt32? {
  85. guard let u8 = fromHexDigit(c) else {
  86. return nil
  87. }
  88. return UInt32(u8)
  89. }
  90. // Protobuf Text encoding assumes that you're working directly
  91. // in UTF-8. So this implementation converts the string to UTF8,
  92. // then decodes it into a sequence of bytes, then converts
  93. // it back into a string.
  94. private func decodeString(_ s: String) -> String? {
  95. // Helper to read 4 hex digits as a UInt32
  96. func read4HexDigits(_ i: inout String.UTF8View.Iterator) -> UInt32? {
  97. if let digit1 = i.next(),
  98. let d1 = uint32FromHexDigit(digit1),
  99. let digit2 = i.next(),
  100. let d2 = uint32FromHexDigit(digit2),
  101. let digit3 = i.next(),
  102. let d3 = uint32FromHexDigit(digit3),
  103. let digit4 = i.next(),
  104. let d4 = uint32FromHexDigit(digit4)
  105. {
  106. return (d1 << 12) + (d2 << 8) + (d3 << 4) + d4
  107. }
  108. return nil
  109. }
  110. var out = [UInt8]()
  111. var bytes = s.utf8.makeIterator()
  112. while let byte = bytes.next() {
  113. switch byte {
  114. case asciiBackslash: // backslash
  115. if let escaped = bytes.next() {
  116. switch escaped {
  117. case asciiZero...asciiSeven: // 0...7
  118. // C standard allows 1, 2, or 3 octal digits.
  119. let savedPosition = bytes
  120. let digit1 = escaped
  121. let digit1Value = digit1 - asciiZero
  122. if let digit2 = bytes.next(),
  123. digit2 >= asciiZero && digit2 <= asciiSeven
  124. {
  125. let digit2Value = digit2 - asciiZero
  126. let innerSavedPosition = bytes
  127. if let digit3 = bytes.next(),
  128. digit3 >= asciiZero && digit3 <= asciiSeven
  129. {
  130. let digit3Value = digit3 - asciiZero
  131. // The max octal digit is actually \377, but looking at the C++
  132. // protobuf code in strutil.cc:UnescapeCEscapeSequences(), it
  133. // decodes with rollover, so just duplicate that behavior for
  134. // consistency between languages.
  135. let n = digit1Value &* 64 &+ digit2Value &* 8 &+ digit3Value
  136. out.append(n)
  137. } else {
  138. let n = digit1Value * 8 + digit2Value
  139. out.append(n)
  140. bytes = innerSavedPosition
  141. }
  142. } else {
  143. let n = digit1Value
  144. out.append(n)
  145. bytes = savedPosition
  146. }
  147. case asciiLowerU, asciiUpperU: // "u"
  148. // \u - 4 hex digits, \U 8 hex digits:
  149. guard let first = read4HexDigits(&bytes) else { return nil }
  150. var codePoint = first
  151. if escaped == asciiUpperU {
  152. guard let second = read4HexDigits(&bytes) else { return nil }
  153. codePoint = (codePoint << 16) + second
  154. }
  155. switch codePoint {
  156. case 0...0x7f:
  157. // 1 byte encoding
  158. out.append(UInt8(truncatingIfNeeded: codePoint))
  159. case 0x80...0x7ff:
  160. // 2 byte encoding
  161. out.append(0xC0 + UInt8(truncatingIfNeeded: codePoint >> 6))
  162. out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F))
  163. case 0x800...0xffff:
  164. // 3 byte encoding
  165. out.append(0xE0 + UInt8(truncatingIfNeeded: codePoint >> 12))
  166. out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F))
  167. out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F))
  168. case 0x10000...0x10FFFF:
  169. // 4 byte encoding
  170. out.append(0xF0 + UInt8(truncatingIfNeeded: codePoint >> 18))
  171. out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 12) & 0x3F))
  172. out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F))
  173. out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F))
  174. default:
  175. return nil
  176. }
  177. case asciiLowerX: // "x"
  178. // Unlike C/C++, protobuf only allows 1 or 2 digits here:
  179. if let byte = bytes.next(), let digit = fromHexDigit(byte) {
  180. var n = digit
  181. let savedPosition = bytes
  182. if let byte = bytes.next(), let digit = fromHexDigit(byte) {
  183. n = n &* 16 + digit
  184. } else {
  185. // No second digit; reset the iterator
  186. bytes = savedPosition
  187. }
  188. out.append(n)
  189. } else {
  190. return nil // Hex escape must have at least 1 digit
  191. }
  192. case asciiLowerA: // \a
  193. out.append(asciiBell)
  194. case asciiLowerB: // \b
  195. out.append(asciiBackspace)
  196. case asciiLowerF: // \f
  197. out.append(asciiFormFeed)
  198. case asciiLowerN: // \n
  199. out.append(asciiNewLine)
  200. case asciiLowerR: // \r
  201. out.append(asciiCarriageReturn)
  202. case asciiLowerT: // \t
  203. out.append(asciiTab)
  204. case asciiLowerV: // \v
  205. out.append(asciiVerticalTab)
  206. case asciiDoubleQuote,
  207. asciiSingleQuote,
  208. asciiQuestionMark,
  209. asciiBackslash: // " ' ? \
  210. out.append(escaped)
  211. default:
  212. return nil // Unrecognized escape
  213. }
  214. } else {
  215. return nil // Input ends with backslash
  216. }
  217. default:
  218. out.append(byte)
  219. }
  220. }
  221. // There has got to be an easier way to convert a [UInt8] into a String.
  222. return out.withUnsafeBufferPointer { ptr in
  223. if let addr = ptr.baseAddress {
  224. return utf8ToString(bytes: addr, count: ptr.count)
  225. } else {
  226. return String()
  227. }
  228. }
  229. }
  230. ///
  231. /// TextFormatScanner has no public members.
  232. ///
  233. internal struct TextFormatScanner {
  234. internal let extensions: (any ExtensionMap)?
  235. private var p: UnsafeRawPointer
  236. private let end: UnsafeRawPointer
  237. private let doubleParser = DoubleParser()
  238. internal let options: TextFormatDecodingOptions
  239. internal var recursionBudget: Int
  240. internal var complete: Bool { p == end }
  241. internal init(
  242. utf8Pointer: UnsafeRawPointer,
  243. count: Int,
  244. options: TextFormatDecodingOptions,
  245. extensions: (any ExtensionMap)? = nil
  246. ) {
  247. p = utf8Pointer
  248. end = p + count
  249. self.extensions = extensions
  250. self.options = options
  251. // Since the root message doesn't start with a `skipObjectStart`, the
  252. // budget starts with one less depth to cover that top message.
  253. recursionBudget = options.messageDepthLimit - 1
  254. skipWhitespace()
  255. }
  256. private mutating func incrementRecursionDepth() throws {
  257. recursionBudget -= 1
  258. if recursionBudget < 0 {
  259. throw TextFormatDecodingError.messageDepthLimit
  260. }
  261. }
  262. private mutating func decrementRecursionDepth() {
  263. recursionBudget += 1
  264. // This should never happen, if it does, something is probably
  265. // corrupting memory, and simply throwing doesn't make much sense.
  266. if recursionBudget > options.messageDepthLimit {
  267. fatalError("Somehow TextFormatDecoding unwound more objects than it started")
  268. }
  269. }
  270. /// Skip whitespace
  271. private mutating func skipWhitespace() {
  272. while p != end {
  273. let u = p[0]
  274. switch u {
  275. case asciiSpace,
  276. asciiTab,
  277. asciiNewLine,
  278. asciiCarriageReturn: // space, tab, NL, CR
  279. p += 1
  280. case asciiHash: // # comment
  281. p += 1
  282. while p != end {
  283. // Skip until end of line
  284. let c = p[0]
  285. p += 1
  286. if c == asciiNewLine || c == asciiCarriageReturn {
  287. break
  288. }
  289. }
  290. default:
  291. return
  292. }
  293. }
  294. }
  295. /// Return a buffer containing the raw UTF8 for an identifier.
  296. /// Assumes that you already know the current byte is a valid
  297. /// start of identifier.
  298. private mutating func parseUTF8Identifier() -> UnsafeRawBufferPointer {
  299. let start = p
  300. loop: while p != end {
  301. let c = p[0]
  302. switch c {
  303. case asciiLowerA...asciiLowerZ,
  304. asciiUpperA...asciiUpperZ,
  305. asciiZero...asciiNine,
  306. asciiUnderscore:
  307. p += 1
  308. default:
  309. break loop
  310. }
  311. }
  312. let s = UnsafeRawBufferPointer(start: start, count: p - start)
  313. skipWhitespace()
  314. return s
  315. }
  316. /// Return a String containing the next identifier.
  317. private mutating func parseIdentifier() -> String {
  318. let buff = parseUTF8Identifier()
  319. let s = utf8ToString(bytes: buff.baseAddress!, count: buff.count)
  320. // Force-unwrap is OK: we never have invalid UTF8 at this point.
  321. return s!
  322. }
  323. /// Scan a string that encodes a byte field, return a count of
  324. /// the number of bytes that should be decoded from it
  325. private mutating func validateAndCountBytesFromString(terminator: UInt8, sawBackslash: inout Bool) throws -> Int {
  326. var count = 0
  327. let start = p
  328. sawBackslash = false
  329. while p != end {
  330. let byte = p[0]
  331. p += 1
  332. if byte == terminator {
  333. p = start
  334. return count
  335. }
  336. switch byte {
  337. case asciiNewLine, asciiCarriageReturn:
  338. // Can't have a newline in the middle of a bytes string.
  339. throw TextFormatDecodingError.malformedText
  340. case asciiBackslash: // "\\"
  341. sawBackslash = true
  342. if p != end {
  343. let escaped = p[0]
  344. p += 1
  345. switch escaped {
  346. case asciiZero...asciiSeven: // '0'...'7'
  347. // C standard allows 1, 2, or 3 octal digits.
  348. if p != end, p[0] >= asciiZero, p[0] <= asciiSeven {
  349. p += 1
  350. if p != end, p[0] >= asciiZero, p[0] <= asciiSeven {
  351. if escaped > asciiThree {
  352. // Out of range octal: three digits and first digit is greater than 3
  353. throw TextFormatDecodingError.malformedText
  354. }
  355. p += 1
  356. }
  357. }
  358. count += 1
  359. case asciiLowerU, asciiUpperU: // 'u' or 'U' unicode escape
  360. let numDigits = (escaped == asciiLowerU) ? 4 : 8
  361. guard (end - p) >= numDigits else {
  362. throw TextFormatDecodingError.malformedText // unicode escape must 4/8 digits
  363. }
  364. var codePoint: UInt32 = 0
  365. for i in 0..<numDigits {
  366. if let digit = uint32FromHexDigit(p[i]) {
  367. codePoint = (codePoint << 4) + digit
  368. } else {
  369. throw TextFormatDecodingError.malformedText // wasn't a hex digit
  370. }
  371. }
  372. p += numDigits
  373. switch codePoint {
  374. case 0...0x7f:
  375. // 1 byte encoding
  376. count += 1
  377. case 0x80...0x7ff:
  378. // 2 byte encoding
  379. count += 2
  380. case 0xD800...0xDFFF:
  381. // Surrogate pair (low or high), shouldn't get a unicode literal of those.
  382. throw TextFormatDecodingError.malformedText
  383. case 0x800...0xffff:
  384. // 3 byte encoding
  385. count += 3
  386. case 0x10000...0x10FFFF:
  387. // 4 byte encoding
  388. count += 4
  389. default:
  390. throw TextFormatDecodingError.malformedText // Isn't a valid unicode character
  391. }
  392. case asciiLowerX: // 'x' hexadecimal escape
  393. if p != end && fromHexDigit(p[0]) != nil {
  394. p += 1
  395. if p != end && fromHexDigit(p[0]) != nil {
  396. p += 1
  397. }
  398. } else {
  399. throw TextFormatDecodingError.malformedText // Hex escape must have at least 1 digit
  400. }
  401. count += 1
  402. case asciiLowerA, // \a ("alert")
  403. asciiLowerB, // \b
  404. asciiLowerF, // \f
  405. asciiLowerN, // \n
  406. asciiLowerR, // \r
  407. asciiLowerT, // \t
  408. asciiLowerV, // \v
  409. asciiSingleQuote, // \'
  410. asciiDoubleQuote, // \"
  411. asciiQuestionMark, // \?
  412. asciiBackslash: // \\
  413. count += 1
  414. default:
  415. throw TextFormatDecodingError.malformedText // Unrecognized escape
  416. }
  417. }
  418. default:
  419. count += 1
  420. }
  421. }
  422. throw TextFormatDecodingError.malformedText
  423. }
  424. /// Protobuf Text format uses C ASCII conventions for
  425. /// encoding byte sequences, including the use of octal
  426. /// and hexadecimal escapes.
  427. ///
  428. /// Assumes that validateAndCountBytesFromString() has already
  429. /// verified the correctness. So we get to avoid error checks here.
  430. private mutating func parseBytesFromString(terminator: UInt8, into data: inout Data) {
  431. data.withUnsafeMutableBytes {
  432. (body: UnsafeMutableRawBufferPointer) in
  433. if var out = body.baseAddress, body.count > 0 {
  434. while p[0] != terminator {
  435. let byte = p[0]
  436. p += 1
  437. switch byte {
  438. case asciiBackslash: // "\\"
  439. let escaped = p[0]
  440. p += 1
  441. switch escaped {
  442. case asciiZero...asciiSeven: // '0'...'7'
  443. // C standard allows 1, 2, or 3 octal digits.
  444. let digit1Value = escaped - asciiZero
  445. let digit2 = p[0]
  446. if digit2 >= asciiZero, digit2 <= asciiSeven {
  447. p += 1
  448. let digit2Value = digit2 - asciiZero
  449. let digit3 = p[0]
  450. if digit3 >= asciiZero, digit3 <= asciiSeven {
  451. p += 1
  452. let digit3Value = digit3 - asciiZero
  453. out[0] = digit1Value &* 64 + digit2Value * 8 + digit3Value
  454. out += 1
  455. } else {
  456. out[0] = digit1Value * 8 + digit2Value
  457. out += 1
  458. }
  459. } else {
  460. out[0] = digit1Value
  461. out += 1
  462. }
  463. case asciiLowerU, asciiUpperU:
  464. let numDigits = (escaped == asciiLowerU) ? 4 : 8
  465. var codePoint: UInt32 = 0
  466. for i in 0..<numDigits {
  467. codePoint = (codePoint << 4) + uint32FromHexDigit(p[i])!
  468. }
  469. p += numDigits
  470. switch codePoint {
  471. case 0...0x7f:
  472. // 1 byte encoding
  473. out[0] = UInt8(truncatingIfNeeded: codePoint)
  474. out += 1
  475. case 0x80...0x7ff:
  476. // 2 byte encoding
  477. out[0] = 0xC0 + UInt8(truncatingIfNeeded: codePoint >> 6)
  478. out[1] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)
  479. out += 2
  480. case 0x800...0xffff:
  481. // 3 byte encoding
  482. out[0] = 0xE0 + UInt8(truncatingIfNeeded: codePoint >> 12)
  483. out[1] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F)
  484. out[2] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)
  485. out += 3
  486. case 0x10000...0x10FFFF:
  487. // 4 byte encoding
  488. out[0] = 0xF0 + UInt8(truncatingIfNeeded: codePoint >> 18)
  489. out[1] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 12) & 0x3F)
  490. out[2] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F)
  491. out[3] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)
  492. out += 4
  493. default:
  494. preconditionFailure() // Already validated, can't happen
  495. }
  496. case asciiLowerX: // 'x' hexadecimal escape
  497. // We already validated, so we know there's at least one digit:
  498. var n = fromHexDigit(p[0])!
  499. p += 1
  500. if let digit = fromHexDigit(p[0]) {
  501. n = n &* 16 &+ digit
  502. p += 1
  503. }
  504. out[0] = n
  505. out += 1
  506. case asciiLowerA: // \a ("alert")
  507. out[0] = asciiBell
  508. out += 1
  509. case asciiLowerB: // \b
  510. out[0] = asciiBackspace
  511. out += 1
  512. case asciiLowerF: // \f
  513. out[0] = asciiFormFeed
  514. out += 1
  515. case asciiLowerN: // \n
  516. out[0] = asciiNewLine
  517. out += 1
  518. case asciiLowerR: // \r
  519. out[0] = asciiCarriageReturn
  520. out += 1
  521. case asciiLowerT: // \t
  522. out[0] = asciiTab
  523. out += 1
  524. case asciiLowerV: // \v
  525. out[0] = asciiVerticalTab
  526. out += 1
  527. default:
  528. out[0] = escaped
  529. out += 1
  530. }
  531. default:
  532. out[0] = byte
  533. out += 1
  534. }
  535. }
  536. p += 1 // Consume terminator
  537. }
  538. }
  539. }
  540. /// Assumes the leading quote has already been consumed
  541. private mutating func parseStringSegment(terminator: UInt8) -> String? {
  542. let start = p
  543. var sawBackslash = false
  544. while p != end {
  545. let c = p[0]
  546. if c == terminator {
  547. let s = utf8ToString(bytes: start, count: p - start)
  548. p += 1
  549. skipWhitespace()
  550. if let s = s, sawBackslash {
  551. return decodeString(s)
  552. } else {
  553. return s
  554. }
  555. }
  556. p += 1
  557. if c == asciiBackslash { // \
  558. if p == end {
  559. return nil
  560. }
  561. sawBackslash = true
  562. p += 1
  563. }
  564. if c == asciiNewLine || c == asciiCarriageReturn {
  565. // Can't have a newline in the middle of a raw string.
  566. return nil
  567. }
  568. }
  569. return nil // Unterminated quoted string
  570. }
  571. internal mutating func nextUInt() throws -> UInt64 {
  572. if p == end {
  573. throw TextFormatDecodingError.malformedNumber
  574. }
  575. let c = p[0]
  576. p += 1
  577. if c == asciiZero { // leading '0' precedes octal or hex
  578. if p == end {
  579. // The TextFormat ended with a field value of zero.
  580. return 0
  581. }
  582. if p[0] == asciiLowerX { // 'x' => hex
  583. p += 1
  584. var n: UInt64 = 0
  585. while p != end {
  586. let digit = p[0]
  587. let val: UInt64
  588. switch digit {
  589. case asciiZero...asciiNine: // 0...9
  590. val = UInt64(digit - asciiZero)
  591. case asciiLowerA...asciiLowerF: // a...f
  592. val = UInt64(digit - asciiLowerA + 10)
  593. case asciiUpperA...asciiUpperF:
  594. val = UInt64(digit - asciiUpperA + 10)
  595. default:
  596. skipWhitespace()
  597. return n
  598. }
  599. if n > UInt64.max / 16 {
  600. throw TextFormatDecodingError.malformedNumber
  601. }
  602. p += 1
  603. n = n * 16 + val
  604. }
  605. skipWhitespace()
  606. return n
  607. } else { // octal
  608. var n: UInt64 = 0
  609. while p != end {
  610. let digit = p[0]
  611. if digit < asciiZero || digit > asciiSeven {
  612. skipWhitespace()
  613. return n // not octal digit
  614. }
  615. let val = UInt64(digit - asciiZero)
  616. if n > UInt64.max / 8 {
  617. throw TextFormatDecodingError.malformedNumber
  618. }
  619. p += 1
  620. n = n * 8 + val
  621. }
  622. skipWhitespace()
  623. return n
  624. }
  625. } else if c > asciiZero && c <= asciiNine { // 1...9
  626. var n = UInt64(c - asciiZero)
  627. while p != end {
  628. let digit = p[0]
  629. if digit < asciiZero || digit > asciiNine {
  630. skipWhitespace()
  631. return n // not a digit
  632. }
  633. let val = UInt64(digit - asciiZero)
  634. if n > UInt64.max / 10 || n * 10 > UInt64.max - val {
  635. throw TextFormatDecodingError.malformedNumber
  636. }
  637. p += 1
  638. n = n * 10 + val
  639. }
  640. skipWhitespace()
  641. return n
  642. }
  643. throw TextFormatDecodingError.malformedNumber
  644. }
  645. internal mutating func nextSInt() throws -> Int64 {
  646. if p == end {
  647. throw TextFormatDecodingError.malformedNumber
  648. }
  649. let c = p[0]
  650. if c == asciiMinus { // -
  651. p += 1
  652. if p == end {
  653. throw TextFormatDecodingError.malformedNumber
  654. }
  655. // character after '-' must be digit
  656. let digit = p[0]
  657. if digit < asciiZero || digit > asciiNine {
  658. throw TextFormatDecodingError.malformedNumber
  659. }
  660. let n = try nextUInt()
  661. let limit: UInt64 = 0x8000_0000_0000_0000 // -Int64.min
  662. if n >= limit {
  663. if n > limit {
  664. // Too large negative number
  665. throw TextFormatDecodingError.malformedNumber
  666. } else {
  667. return Int64.min // Special case for Int64.min
  668. }
  669. }
  670. return -Int64(bitPattern: n)
  671. } else {
  672. let n = try nextUInt()
  673. if n > UInt64(bitPattern: Int64.max) {
  674. throw TextFormatDecodingError.malformedNumber
  675. }
  676. return Int64(bitPattern: n)
  677. }
  678. }
  679. internal mutating func nextStringValue() throws -> String {
  680. var result: String
  681. skipWhitespace()
  682. if p == end {
  683. throw TextFormatDecodingError.malformedText
  684. }
  685. let c = p[0]
  686. if c != asciiSingleQuote && c != asciiDoubleQuote {
  687. throw TextFormatDecodingError.malformedText
  688. }
  689. p += 1
  690. if let s = parseStringSegment(terminator: c) {
  691. result = s
  692. } else {
  693. throw TextFormatDecodingError.malformedText
  694. }
  695. while true {
  696. if p == end {
  697. return result
  698. }
  699. let c = p[0]
  700. if c != asciiSingleQuote && c != asciiDoubleQuote {
  701. return result
  702. }
  703. p += 1
  704. if let s = parseStringSegment(terminator: c) {
  705. result.append(s)
  706. } else {
  707. throw TextFormatDecodingError.malformedText
  708. }
  709. }
  710. }
  711. /// Protobuf Text Format allows a single bytes field to
  712. /// contain multiple quoted strings. The values
  713. /// are separately decoded and then concatenated:
  714. /// field1: "bytes" 'more bytes'
  715. /// "and even more bytes"
  716. internal mutating func nextBytesValue() throws -> Data {
  717. // Get the first string's contents
  718. var result: Data
  719. skipWhitespace()
  720. if p == end {
  721. throw TextFormatDecodingError.malformedText
  722. }
  723. let c = p[0]
  724. if c != asciiSingleQuote && c != asciiDoubleQuote {
  725. throw TextFormatDecodingError.malformedText
  726. }
  727. p += 1
  728. var sawBackslash = false
  729. let n = try validateAndCountBytesFromString(terminator: c, sawBackslash: &sawBackslash)
  730. if sawBackslash {
  731. result = Data(count: n)
  732. parseBytesFromString(terminator: c, into: &result)
  733. } else {
  734. result = Data(bytes: p, count: n)
  735. p += n + 1 // Skip string body + close quote
  736. }
  737. // If there are more strings, decode them
  738. // and append to the result:
  739. while true {
  740. skipWhitespace()
  741. if p == end {
  742. return result
  743. }
  744. let c = p[0]
  745. if c != asciiSingleQuote && c != asciiDoubleQuote {
  746. return result
  747. }
  748. p += 1
  749. var sawBackslash = false
  750. let n = try validateAndCountBytesFromString(terminator: c, sawBackslash: &sawBackslash)
  751. if sawBackslash {
  752. var b = Data(count: n)
  753. parseBytesFromString(terminator: c, into: &b)
  754. result.append(b)
  755. } else {
  756. result.append(Data(bytes: p, count: n))
  757. p += n + 1 // Skip string body + close quote
  758. }
  759. }
  760. }
  761. // Tries to identify a sequence of UTF8 characters
  762. // that represent a numeric floating-point value.
  763. private mutating func tryParseFloatString() -> Double? {
  764. guard p != end else { return nil }
  765. let start = p
  766. var c = p[0]
  767. if c == asciiMinus {
  768. p += 1
  769. guard p != end else {
  770. p = start
  771. return nil
  772. }
  773. c = p[0]
  774. }
  775. switch c {
  776. case asciiZero: // '0' as first character is not allowed followed by digit
  777. p += 1
  778. guard p != end else { break }
  779. c = p[0]
  780. if c >= asciiZero && c <= asciiNine {
  781. p = start
  782. return nil
  783. }
  784. case asciiPeriod: // '.' as first char only if followed by digit
  785. p += 1
  786. guard p != end else {
  787. p = start
  788. return nil
  789. }
  790. c = p[0]
  791. if c < asciiZero || c > asciiNine {
  792. p = start
  793. return nil
  794. }
  795. case asciiOne...asciiNine:
  796. break
  797. default:
  798. p = start
  799. return nil
  800. }
  801. loop: while p != end {
  802. let c = p[0]
  803. switch c {
  804. case asciiZero...asciiNine,
  805. asciiPeriod,
  806. asciiPlus,
  807. asciiMinus,
  808. asciiLowerE,
  809. asciiUpperE: // 0...9, ., +, -, e, E
  810. p += 1
  811. case asciiLowerF, asciiUpperF: // f or F
  812. let d = doubleParser.utf8ToDouble(
  813. bytes: UnsafeRawBufferPointer(
  814. start: start,
  815. count: p - start
  816. ),
  817. finiteOnly: false
  818. )
  819. // Just skip the 'f'/'F'
  820. p += 1
  821. skipWhitespace()
  822. return d
  823. default:
  824. break loop
  825. }
  826. }
  827. let d = doubleParser.utf8ToDouble(
  828. bytes: UnsafeRawBufferPointer(
  829. start: start,
  830. count: p - start
  831. ),
  832. finiteOnly: false
  833. )
  834. skipWhitespace()
  835. return d
  836. }
  837. // Skip specified characters if they all match
  838. private mutating func skipOptionalCharacters(bytes: [UInt8]) {
  839. let start = p
  840. for b in bytes {
  841. if p == end || p[0] != b {
  842. p = start
  843. return
  844. }
  845. p += 1
  846. }
  847. }
  848. // Skip following keyword if it matches (case-insensitively)
  849. // the given keyword (specified as a series of bytes).
  850. private mutating func skipOptionalKeyword(bytes: [UInt8]) -> Bool {
  851. let start = p
  852. for b in bytes {
  853. if p == end {
  854. p = start
  855. return false
  856. }
  857. var c = p[0]
  858. if c >= asciiUpperA && c <= asciiUpperZ {
  859. // Convert to lower case
  860. // (Protobuf text keywords are case insensitive)
  861. c += asciiLowerA - asciiUpperA
  862. }
  863. if c != b {
  864. p = start
  865. return false
  866. }
  867. p += 1
  868. }
  869. if p == end {
  870. return true
  871. }
  872. let c = p[0]
  873. if (c >= asciiUpperA && c <= asciiUpperZ)
  874. || (c >= asciiLowerA && c <= asciiLowerZ)
  875. {
  876. p = start
  877. return false
  878. }
  879. skipWhitespace()
  880. return true
  881. }
  882. // If the next token is the identifier "nan", return true.
  883. private mutating func skipOptionalNaN() -> Bool {
  884. let start = p
  885. // "-nan" doesn't mean anything, but upstream handles it, so skip
  886. // over any leading minus when checking for "nan".
  887. if p != end && p[0] == asciiMinus {
  888. p += 1
  889. }
  890. if skipOptionalKeyword(bytes: [asciiLowerN, asciiLowerA, asciiLowerN]) {
  891. return true
  892. } else {
  893. p = start // It wasn't "nan", rewind incase we skipped a minus sign.
  894. return false
  895. }
  896. }
  897. // If the next token is a recognized spelling of "infinity",
  898. // return Float.infinity or -Float.infinity
  899. private mutating func skipOptionalInfinity() -> Float? {
  900. if p == end {
  901. return nil
  902. }
  903. let start = p
  904. let c = p[0]
  905. let negated: Bool
  906. if c == asciiMinus {
  907. negated = true
  908. p += 1
  909. } else {
  910. negated = false
  911. }
  912. let inf = [asciiLowerI, asciiLowerN, asciiLowerF]
  913. let infinity = [
  914. asciiLowerI, asciiLowerN, asciiLowerF, asciiLowerI,
  915. asciiLowerN, asciiLowerI, asciiLowerT, asciiLowerY,
  916. ]
  917. if skipOptionalKeyword(bytes: inf)
  918. || skipOptionalKeyword(bytes: infinity)
  919. {
  920. return negated ? -Float.infinity : Float.infinity
  921. }
  922. p = start
  923. return nil
  924. }
  925. internal mutating func nextFloat() throws -> Float {
  926. if let d = tryParseFloatString() {
  927. return Float(d)
  928. }
  929. if skipOptionalNaN() {
  930. return Float.nan
  931. }
  932. if let inf = skipOptionalInfinity() {
  933. return inf
  934. }
  935. throw TextFormatDecodingError.malformedNumber
  936. }
  937. internal mutating func nextDouble() throws -> Double {
  938. if let d = tryParseFloatString() {
  939. return d
  940. }
  941. if skipOptionalNaN() {
  942. return Double.nan
  943. }
  944. if let inf = skipOptionalInfinity() {
  945. return Double(inf)
  946. }
  947. throw TextFormatDecodingError.malformedNumber
  948. }
  949. internal mutating func nextBool() throws -> Bool {
  950. skipWhitespace()
  951. if p == end {
  952. throw TextFormatDecodingError.malformedText
  953. }
  954. let c = p[0]
  955. p += 1
  956. let result: Bool
  957. switch c {
  958. case asciiZero:
  959. result = false
  960. case asciiOne:
  961. result = true
  962. case asciiLowerF, asciiUpperF:
  963. if p != end {
  964. let alse = [asciiLowerA, asciiLowerL, asciiLowerS, asciiLowerE]
  965. skipOptionalCharacters(bytes: alse)
  966. }
  967. result = false
  968. case asciiLowerT, asciiUpperT:
  969. if p != end {
  970. let rue = [asciiLowerR, asciiLowerU, asciiLowerE]
  971. skipOptionalCharacters(bytes: rue)
  972. }
  973. result = true
  974. default:
  975. throw TextFormatDecodingError.malformedText
  976. }
  977. if p == end {
  978. return result
  979. }
  980. switch p[0] {
  981. case asciiSpace,
  982. asciiTab,
  983. asciiNewLine,
  984. asciiCarriageReturn,
  985. asciiHash,
  986. asciiComma,
  987. asciiSemicolon,
  988. asciiCloseSquareBracket,
  989. asciiCloseCurlyBracket,
  990. asciiCloseAngleBracket:
  991. skipWhitespace()
  992. return result
  993. default:
  994. throw TextFormatDecodingError.malformedText
  995. }
  996. }
  997. internal mutating func nextOptionalEnumName() throws -> UnsafeRawBufferPointer? {
  998. skipWhitespace()
  999. if p == end {
  1000. throw TextFormatDecodingError.malformedText
  1001. }
  1002. switch p[0] {
  1003. case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ:
  1004. return parseUTF8Identifier()
  1005. default:
  1006. return nil
  1007. }
  1008. }
  1009. /// Any URLs are syntactically (almost) identical to extension
  1010. /// keys, so we share the code for those.
  1011. internal mutating func nextOptionalAnyURL() throws -> String? {
  1012. try nextOptionalExtensionKey()
  1013. }
  1014. /// Returns next extension key or nil if end-of-input or
  1015. /// if next token is not an extension key.
  1016. ///
  1017. /// Throws an error if the next token starts with '[' but
  1018. /// cannot be parsed as an extension key.
  1019. ///
  1020. /// Note: This accepts / characters to support Any URL parsing.
  1021. /// Technically, Any URLs can contain / characters and extension
  1022. /// key names cannot. But in practice, accepting / chracters for
  1023. /// extension keys works fine, since the result just gets rejected
  1024. /// when the key is looked up.
  1025. internal mutating func nextOptionalExtensionKey() throws -> String? {
  1026. skipWhitespace()
  1027. if p == end {
  1028. return nil
  1029. }
  1030. guard p[0] == asciiOpenSquareBracket else { // [
  1031. return nil
  1032. }
  1033. return try parseExtensionKey()
  1034. }
  1035. /// Parse the rest of an [extension_field_name] in the input, assuming the
  1036. /// initial "[" character has already been read (and is in the prefix)
  1037. /// This is also used for AnyURL, so we include "/".
  1038. private mutating func parseExtensionKey() throws -> String {
  1039. assert(p[0] == asciiOpenSquareBracket)
  1040. p += 1
  1041. if p == end {
  1042. throw TextFormatDecodingError.malformedText
  1043. }
  1044. let start = p
  1045. switch p[0] {
  1046. case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ:
  1047. p += 1
  1048. default:
  1049. throw TextFormatDecodingError.malformedText
  1050. }
  1051. loop: while p != end {
  1052. switch p[0] {
  1053. case asciiLowerA...asciiLowerZ,
  1054. asciiUpperA...asciiUpperZ,
  1055. asciiZero...asciiNine,
  1056. asciiUnderscore,
  1057. asciiPeriod,
  1058. asciiForwardSlash:
  1059. p += 1
  1060. case asciiCloseSquareBracket: // ]
  1061. break loop
  1062. default:
  1063. throw TextFormatDecodingError.malformedText
  1064. }
  1065. }
  1066. if p == end || p[0] != asciiCloseSquareBracket {
  1067. throw TextFormatDecodingError.malformedText
  1068. }
  1069. guard let extensionName = utf8ToString(bytes: start, count: p - start) else {
  1070. throw TextFormatDecodingError.malformedText
  1071. }
  1072. p += 1 // Skip ]
  1073. skipWhitespace()
  1074. return extensionName
  1075. }
  1076. /// Returns text of next regular key or nil if end-of-input.
  1077. internal mutating func nextKey(allowExtensions: Bool) throws -> String? {
  1078. skipWhitespace()
  1079. if p == end {
  1080. return nil
  1081. }
  1082. let c = p[0]
  1083. switch c {
  1084. case asciiOpenSquareBracket: // [
  1085. if allowExtensions {
  1086. return "[\(try parseExtensionKey())]"
  1087. }
  1088. throw TextFormatDecodingError.unknownField
  1089. case asciiLowerA...asciiLowerZ,
  1090. asciiUpperA...asciiUpperZ: // a...z, A...Z
  1091. return parseIdentifier()
  1092. case asciiOne...asciiNine: // 1...9 (field numbers are 123, not 0123)
  1093. let start = p
  1094. p += 1
  1095. while p != end {
  1096. let c = p[0]
  1097. if c < asciiZero || c > asciiNine {
  1098. break
  1099. }
  1100. p += 1
  1101. if p - start > maxFieldNumLength {
  1102. throw TextFormatDecodingError.malformedText
  1103. }
  1104. }
  1105. let buff = UnsafeRawBufferPointer(start: start, count: p - start)
  1106. skipWhitespace()
  1107. let s = utf8ToString(bytes: buff.baseAddress!, count: buff.count)
  1108. // Safe, can't be invalid UTF-8 given the input.
  1109. return s!
  1110. default:
  1111. throw TextFormatDecodingError.malformedText
  1112. }
  1113. }
  1114. /// Parse a field name, look it up, and return the corresponding
  1115. /// field number.
  1116. ///
  1117. /// returns nil at end-of-input
  1118. ///
  1119. /// Throws if field name cannot be parsed or if field name is
  1120. /// unknown.
  1121. ///
  1122. /// This function accounts for as much as 2/3 of the total run
  1123. /// time of the entire parse.
  1124. internal mutating func nextFieldNumber(
  1125. names: _NameMap,
  1126. messageType: any Message.Type,
  1127. terminator: UInt8?
  1128. ) throws -> Int? {
  1129. while true {
  1130. skipWhitespace()
  1131. if p == end {
  1132. if terminator == nil {
  1133. return nil
  1134. } else {
  1135. // Never got the terminator.
  1136. throw TextFormatDecodingError.malformedText
  1137. }
  1138. }
  1139. var isReserved = false
  1140. let c = p[0]
  1141. switch c {
  1142. case asciiLowerA...asciiLowerZ,
  1143. asciiUpperA...asciiUpperZ: // a...z, A...Z
  1144. let key = parseUTF8Identifier()
  1145. if let fieldNumber = names.number(forProtoName: key) {
  1146. return fieldNumber
  1147. }
  1148. if !options.ignoreUnknownFields {
  1149. if names.isReserved(name: key) {
  1150. isReserved = true
  1151. } else {
  1152. throw TextFormatDecodingError.unknownField
  1153. }
  1154. }
  1155. // Unknown field name or reserved, break and skip
  1156. break
  1157. case asciiOpenSquareBracket: // Start of an extension field
  1158. let key = try parseExtensionKey()
  1159. if let fieldNumber = extensions?.fieldNumberForProto(messageType: messageType, protoFieldName: key) {
  1160. return fieldNumber
  1161. }
  1162. if !options.ignoreUnknownExtensionFields {
  1163. throw TextFormatDecodingError.unknownField
  1164. }
  1165. // Unknown field name, break and skip
  1166. break
  1167. case asciiOne...asciiNine: // 1-9 (field numbers are 123, not 0123)
  1168. let start = p
  1169. var fieldNum = Int(c) - Int(asciiZero)
  1170. p += 1
  1171. while p != end {
  1172. let c = p[0]
  1173. if c >= asciiZero && c <= asciiNine {
  1174. fieldNum = fieldNum &* 10 &+ (Int(c) - Int(asciiZero))
  1175. } else {
  1176. break
  1177. }
  1178. p += 1
  1179. if p - start > maxFieldNumLength {
  1180. throw TextFormatDecodingError.malformedText
  1181. }
  1182. }
  1183. skipWhitespace()
  1184. if names.names(for: fieldNum) != nil {
  1185. return fieldNum
  1186. }
  1187. if !options.ignoreUnknownFields {
  1188. // fieldNumber is range checked while parsing, so safe can truncate.
  1189. if names.isReserved(number: Int32(truncatingIfNeeded: fieldNum)) {
  1190. isReserved = true
  1191. } else {
  1192. throw TextFormatDecodingError.unknownField
  1193. }
  1194. }
  1195. // Unknown field name or reserved, break and skip
  1196. break
  1197. default:
  1198. if c == terminator {
  1199. let _ = skipOptionalObjectEnd(c)
  1200. return nil
  1201. }
  1202. throw TextFormatDecodingError.malformedText
  1203. }
  1204. assert(options.ignoreUnknownFields || options.ignoreUnknownExtensionFields || isReserved)
  1205. try skipUnknownFieldValue()
  1206. // Skip any separator before looping around to try for another field.
  1207. skipOptionalSeparator()
  1208. }
  1209. }
  1210. // Helper to skip past an unknown field value, when called `p` will be pointing
  1211. // at the first character after the unknown field name.
  1212. internal mutating func skipUnknownFieldValue() throws {
  1213. // This is modeled after the C++ text_format.cpp `ConsumeField()`
  1214. //
  1215. // Guess the type of this field:
  1216. // - If this field is not a message, there should be a ":" between the
  1217. // field name and the field value and also the field value should not
  1218. // start with "{" or "<" which indicates the beginning of a message body.
  1219. // - If there is no ":" or there is a "{" or "<" after ":", this field has
  1220. // to be a message or the input is ill-formed.
  1221. skipWhitespace()
  1222. if skipOptionalColon() {
  1223. if p == end {
  1224. // Nothing after the ':'?
  1225. throw TextFormatDecodingError.malformedText
  1226. }
  1227. let c = p[0]
  1228. if c != asciiOpenAngleBracket && c != asciiOpenCurlyBracket {
  1229. try skipUnknownPrimativeFieldValue()
  1230. } else {
  1231. try skipUnknownMessageFieldValue()
  1232. }
  1233. } else {
  1234. try skipUnknownMessageFieldValue()
  1235. }
  1236. }
  1237. /// Helper to see if this could be the start of a hex or octal number so unknown field
  1238. /// value parsing can decide how to parse/validate.
  1239. private func mustParseNumberAsDecimal() -> Bool {
  1240. // NOTE: If we run out of characters/can't tell; then just say it doesn't have
  1241. // to be decimal, and let the other code error handle it.
  1242. var scan = p
  1243. var c = scan[0]
  1244. // Floats or decimals can have leading '-'
  1245. if c == asciiMinus {
  1246. scan += 1
  1247. if scan == end { return false }
  1248. c = scan[0]
  1249. }
  1250. if c == asciiPeriod {
  1251. return false // "(-)." : clearly a float
  1252. }
  1253. if c == asciiZero {
  1254. scan += 1
  1255. if scan == end { return true } // "(-)0[end]" : parse it as decimal
  1256. c = scan[0]
  1257. if c == asciiLowerX // "(-)0x" : hex - must parse as decimal
  1258. || (c >= asciiZero && c <= asciiSeven)
  1259. { // "(-)0[0-7]" : octal - must parse as decimal
  1260. return true
  1261. }
  1262. if c == asciiPeriod {
  1263. return false // "(-)0." : clearly a float
  1264. }
  1265. }
  1266. // At this point, it doesn't realy matter what comes next. We'll call it a floating
  1267. // point value since even if it was a decimal, it might be too large for a UInt64 but
  1268. // would still be valid for a float/double field.
  1269. return false
  1270. }
  1271. private mutating func skipUnknownPrimativeFieldValue(canBeList: Bool = true) throws {
  1272. // This is modeled after the C++ text_format.cpp `SkipFieldValue()`
  1273. let c = p[0]
  1274. if c == asciiSingleQuote || c == asciiDoubleQuote {
  1275. // Note: the field could be 'bytes', so we can't parse that as a string
  1276. // as it might fail.
  1277. let _ = try nextBytesValue()
  1278. return
  1279. }
  1280. if skipOptionalBeginArray() {
  1281. guard canBeList else {
  1282. // Have encounted an array as an element in an array, that isn't legal.
  1283. throw TextFormatDecodingError.malformedText
  1284. }
  1285. if skipOptionalEndArray() {
  1286. return
  1287. }
  1288. while true {
  1289. if p == end {
  1290. throw TextFormatDecodingError.malformedText
  1291. }
  1292. let c = p[0]
  1293. if c != asciiOpenAngleBracket && c != asciiOpenCurlyBracket {
  1294. try skipUnknownPrimativeFieldValue(canBeList: false)
  1295. } else {
  1296. try skipUnknownMessageFieldValue()
  1297. }
  1298. if skipOptionalEndArray() {
  1299. return
  1300. }
  1301. try skipRequiredComma()
  1302. }
  1303. }
  1304. // NOTE: This will also cover "true", "false" for booleans, "nan"/"inf" for floats.
  1305. if let _ = try nextOptionalEnumName() {
  1306. skipWhitespace() // `nextOptionalEnumName()` doesn't skip trailing whitespace
  1307. return
  1308. }
  1309. // NOTE: We don't need to special case "-nan"/"-inf", as they won't be forced
  1310. // to parse as decimal, and `nextDouble()` already supports them.
  1311. if mustParseNumberAsDecimal() {
  1312. if c == asciiMinus {
  1313. let _ = try nextSInt()
  1314. } else {
  1315. let _ = try nextUInt()
  1316. }
  1317. } else {
  1318. let _ = try nextDouble()
  1319. }
  1320. }
  1321. private mutating func skipUnknownMessageFieldValue() throws {
  1322. // This is modeled after the C++ text_format.cpp `SkipFieldMessage()`
  1323. let terminator = try skipObjectStart()
  1324. while !skipOptionalObjectEnd(terminator) {
  1325. if p == end {
  1326. throw TextFormatDecodingError.malformedText
  1327. }
  1328. if let _ = try nextKey(allowExtensions: true) {
  1329. // Got a valid field name or extension name ("[ext.name]")
  1330. } else {
  1331. throw TextFormatDecodingError.malformedText
  1332. }
  1333. try skipUnknownFieldValue()
  1334. skipOptionalSeparator()
  1335. }
  1336. }
  1337. private mutating func skipRequiredCharacter(_ c: UInt8) throws {
  1338. skipWhitespace()
  1339. if p != end && p[0] == c {
  1340. p += 1
  1341. skipWhitespace()
  1342. } else {
  1343. throw TextFormatDecodingError.malformedText
  1344. }
  1345. }
  1346. internal mutating func skipRequiredComma() throws {
  1347. try skipRequiredCharacter(asciiComma)
  1348. }
  1349. internal mutating func skipRequiredColon() throws {
  1350. try skipRequiredCharacter(asciiColon)
  1351. }
  1352. private mutating func skipOptionalCharacter(_ c: UInt8) -> Bool {
  1353. if p != end && p[0] == c {
  1354. p += 1
  1355. skipWhitespace()
  1356. return true
  1357. }
  1358. return false
  1359. }
  1360. internal mutating func skipOptionalColon() -> Bool {
  1361. skipOptionalCharacter(asciiColon)
  1362. }
  1363. internal mutating func skipOptionalEndArray() -> Bool {
  1364. skipOptionalCharacter(asciiCloseSquareBracket)
  1365. }
  1366. internal mutating func skipOptionalBeginArray() -> Bool {
  1367. skipOptionalCharacter(asciiOpenSquareBracket)
  1368. }
  1369. internal mutating func skipOptionalObjectEnd(_ c: UInt8) -> Bool {
  1370. let result = skipOptionalCharacter(c)
  1371. if result {
  1372. decrementRecursionDepth()
  1373. }
  1374. return result
  1375. }
  1376. internal mutating func skipOptionalSeparator() {
  1377. if p != end {
  1378. let c = p[0]
  1379. if c == asciiComma || c == asciiSemicolon { // comma or semicolon
  1380. p += 1
  1381. skipWhitespace()
  1382. }
  1383. }
  1384. }
  1385. /// Returns the character that should end this field.
  1386. /// E.g., if object starts with "{", returns "}"
  1387. internal mutating func skipObjectStart() throws -> UInt8 {
  1388. try incrementRecursionDepth()
  1389. if p != end {
  1390. let c = p[0]
  1391. p += 1
  1392. skipWhitespace()
  1393. switch c {
  1394. case asciiOpenCurlyBracket: // {
  1395. return asciiCloseCurlyBracket // }
  1396. case asciiOpenAngleBracket: // <
  1397. return asciiCloseAngleBracket // >
  1398. default:
  1399. break
  1400. }
  1401. }
  1402. throw TextFormatDecodingError.malformedText
  1403. }
  1404. }