BytecodeReader.swift 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. // Sources/SwiftProtobuf/BytecodeReader.swift - Internal bytecode reader
  2. //
  3. // Copyright (c) 2014 - 2025 Apple Inc. and the project authors
  4. // Licensed under Apache License v2.0 with Runtime Library Exception
  5. //
  6. // See LICENSE.txt for license information:
  7. // https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
  8. //
  9. // -----------------------------------------------------------------------------
  10. /// Reads values encoded in a SwiftProtobuf bytecode stream.
  11. package struct BytecodeReader<Instruction: RawRepresentable> where Instruction.RawValue == UInt64 {
  12. /// The remaining slice of the program that has not yet been read.
  13. private var remainingProgram: UnsafeBufferPointer<UInt8>.SubSequence
  14. /// Indicates whether or not there is still data that hasn't yet been read in the bytecode
  15. /// stream.
  16. package var hasData: Bool {
  17. !remainingProgram.isEmpty
  18. }
  19. /// Creates a new bytecode reader that reads the given bytecode stream.
  20. package init(remainingProgram: UnsafeBufferPointer<UInt8>.SubSequence) {
  21. self.remainingProgram = remainingProgram
  22. // We reserve the first integer of the program text as a "format specifier". This
  23. // future-proofs us if we ever want to change the way programs themselves are encoded
  24. // (for example, compressing them).
  25. Self.checkProgramFormat(nextUInt64())
  26. }
  27. /// Checks that the given program format is valid (i.e., not greater than the runtime supports),
  28. /// trapping if it is invalid.
  29. static func checkProgramFormat(_ programFormat: UInt64) {
  30. if programFormat > latestBytecodeProgramFormat {
  31. fatalError("Unexpected bytecode program format \(programFormat)")
  32. }
  33. }
  34. /// Reads and returns the next instruction from the bytecode stream.
  35. ///
  36. /// - Precondition: The reader must not be at the end of the bytecode stream, and the next
  37. /// opcode must not be zero.
  38. ///
  39. /// - Returns: The instruction that was read from the bytecode stream.
  40. package mutating func nextInstruction() -> Instruction {
  41. precondition(hasData, "Unexpected end of bytecode stream")
  42. let opcode = nextUInt64()
  43. precondition(opcode != 0, "Opcode 0 is reserved; do not use it in your own instructions")
  44. guard let instruction = Instruction(rawValue: opcode) else {
  45. fatalError("Unexpected opcode \(opcode) for instruction set \(Instruction.self)")
  46. }
  47. return instruction
  48. }
  49. /// Reads and returns the next signed 32-bit integer from the bytecode stream.
  50. ///
  51. /// This is provided as its own primitive operation because 32-bit values are extremely common
  52. /// as field numbers (0 to 2^29-1) and enum cases (-2^31 to 2^31-1). In particular for enum
  53. /// cases, using this function specifically for those cases avoids making mistakes involving
  54. /// sign- vs. zero-extension between differently-sized integers.
  55. ///
  56. /// - Precondition: The reader must not be at the end of the bytecode stream.
  57. ///
  58. /// - Returns: The signed 32-bit integer that was read from the bytecode stream.
  59. package mutating func nextInt32() -> Int32 {
  60. // `Int32`s are stored by converting them bit-wise to a `UInt32` and then zero-extended to
  61. // `UInt64`, since this representation is smaller than sign-extending them to 64 bits.
  62. let uint64Value = nextUInt64()
  63. assert(uint64Value < UInt64(0x1_0000_0000), "nextInt32() read a value larger than 32 bits")
  64. return Int32(bitPattern: UInt32(truncatingIfNeeded: uint64Value))
  65. }
  66. /// Reads and returns the next unsigned 64-bit integer from the bytecode stream.
  67. ///
  68. /// - Precondition: The reader must not be at the end of the bytecode stream.
  69. ///
  70. /// - Returns: The unsigned 64-bit integer that was read from the bytecode stream.
  71. package mutating func nextUInt64() -> UInt64 {
  72. precondition(hasData, "Unexpected end of bytecode stream")
  73. // We store our programs as `StaticString`s, but those are still required to be UTF-8
  74. // encoded. This means we can't use a standard varint encoding for integers (because we
  75. // cannot arbitrarily use the most significant bit), but we can use a slightly modified
  76. // version that always keeps the MSB clear and uses the next-to-MSB as the continuation bit.
  77. let byte = UInt64(remainingProgram.first!)
  78. remainingProgram = remainingProgram.dropFirst()
  79. precondition(byte & 0x80 == 0, "Invalid integer leading byte \(byte)")
  80. if byte & 0x40 == 0 {
  81. return byte
  82. }
  83. var value: UInt64 = byte & 0x3f
  84. var shift: UInt64 = 6
  85. while true {
  86. let byte = remainingProgram.first!
  87. remainingProgram = remainingProgram.dropFirst()
  88. value |= UInt64(byte & 0x3f) &<< shift
  89. precondition(byte & 0x80 == 0, "Invalid integer leading byte \(byte)")
  90. if byte & 0x40 == 0 {
  91. return value
  92. }
  93. shift &+= 6
  94. guard shift < 64 else {
  95. fatalError("Bytecode value too large to fit into UInt64")
  96. }
  97. }
  98. }
  99. /// Reads and returns the next null-terminated string from the bytecode stream.
  100. ///
  101. /// - Precondition: The reader must not be at the end of the bytecode stream.
  102. ///
  103. /// - Returns: An `UnsafeBufferPointer` containing the string that was read from the bytecode
  104. /// stream. This pointer is rebased -- its base address is the start of the string that was
  105. /// just read, not the start of the entire stream -- but its lifetime is still tied to that of
  106. /// the original bytecode stream (which is immortal if it originated from a static string).
  107. package mutating func nextNullTerminatedString() -> UnsafeBufferPointer<UInt8> {
  108. precondition(hasData, "Unexpected end of bytecode stream")
  109. guard let nullIndex = remainingProgram.firstIndex(of: 0) else {
  110. preconditionFailure("Unexpected end of bytecode stream while looking for end of string")
  111. }
  112. let endIndex = remainingProgram.index(after: nullIndex)
  113. defer { remainingProgram = remainingProgram[endIndex...] }
  114. return .init(rebasing: remainingProgram[..<nullIndex])
  115. }
  116. /// Reads and returns the next array of length-delimited strings from the bytecode stream.
  117. ///
  118. /// - Precondition: The reader must not be at the end of the bytecode stream.
  119. ///
  120. /// - Returns: An array of `UnsafeBufferPointer`s containing the strings that were read from the
  121. /// bytecode stream. See the documentation of `nextString()` for details on the lifetimes of
  122. /// these pointers.
  123. package mutating func nextNullTerminatedStringArray() -> [UnsafeBufferPointer<UInt8>] {
  124. precondition(hasData, "Unexpected end of bytecode stream")
  125. let count = Int(nextUInt64())
  126. return [UnsafeBufferPointer<UInt8>](unsafeUninitializedCapacity: count) {
  127. (buffer, initializedCount) in
  128. for index in 0..<count {
  129. buffer.initializeElement(at: index, to: nextNullTerminatedString())
  130. }
  131. initializedCount = count
  132. }
  133. }
  134. }
  135. /// Indicates the latest bytecode program format supported by `BytecodeReader`.
  136. ///
  137. /// Programs written by a `BytecodeWriter` (see protoc-gen-swift) should *only* support this
  138. /// version; there is no reason to generate an older version than the latest that the runtime
  139. /// supports. Readers, on the other hand, must support the latest and all previous formats (unless
  140. /// making breaking changes).
  141. package let latestBytecodeProgramFormat: UInt64 = 0