NameMap.swift 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. // Sources/SwiftProtobuf/NameMap.swift - Bidirectional number/name mapping
  2. //
  3. // Copyright (c) 2014 - 2016 Apple Inc. and the project authors
  4. // Licensed under Apache License v2.0 with Runtime Library Exception
  5. //
  6. // See LICENSE.txt for license information:
  7. // https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
  8. //
  9. // -----------------------------------------------------------------------------
  10. /// TODO: Right now, only the NameMap and the NameDescription enum
  11. /// (which are directly used by the generated code) are public.
  12. /// This means that code outside the library has no way to actually
  13. /// use this data. We should develop and publicize a suitable API
  14. /// for that purpose. (Which might be the same as the internal API.)
  15. /// This must produce exactly the same outputs as the corresponding
  16. /// code in the protoc-gen-swift code generator. Changing it will
  17. /// break compatibility of the library with older generated code.
  18. ///
  19. /// It does not necessarily need to match protoc's JSON field naming
  20. /// logic, however.
  21. private func toJSONFieldName(_ s: UnsafeBufferPointer<UInt8>) -> String {
  22. var result = String.UnicodeScalarView()
  23. var capitalizeNext = false
  24. for c in s {
  25. if c == UInt8(ascii: "_") {
  26. capitalizeNext = true
  27. } else if capitalizeNext {
  28. result.append(Unicode.Scalar(c).uppercasedAssumingASCII)
  29. capitalizeNext = false
  30. } else {
  31. result.append(Unicode.Scalar(c))
  32. }
  33. }
  34. return String(result)
  35. }
  36. #if !REMOVE_LEGACY_NAMEMAP_INITIALIZERS
  37. private func toJSONFieldName(_ s: StaticString) -> String {
  38. guard s.hasPointerRepresentation else {
  39. // If it's a single code point, it wouldn't be changed by the above algorithm.
  40. // Return it as-is.
  41. return s.description
  42. }
  43. return toJSONFieldName(UnsafeBufferPointer(start: s.utf8Start, count: s.utf8CodeUnitCount))
  44. }
  45. #endif // !REMOVE_LEGACY_NAMEMAP_INITIALIZERS
  46. /// Allocate static memory buffers to intern UTF-8
  47. /// string data. Track the buffers and release all of those buffers
  48. /// in case we ever get deallocated.
  49. private class InternPool {
  50. private var interned = [UnsafeRawBufferPointer]()
  51. func intern(utf8: String.UTF8View) -> UnsafeRawBufferPointer {
  52. let mutable = UnsafeMutableRawBufferPointer.allocate(
  53. byteCount: utf8.count,
  54. alignment: MemoryLayout<UInt8>.alignment
  55. )
  56. mutable.copyBytes(from: utf8)
  57. let immutable = UnsafeRawBufferPointer(mutable)
  58. interned.append(immutable)
  59. return immutable
  60. }
  61. func intern(utf8Ptr: UnsafeBufferPointer<UInt8>) -> UnsafeRawBufferPointer {
  62. let mutable = UnsafeMutableRawBufferPointer.allocate(
  63. byteCount: utf8Ptr.count,
  64. alignment: MemoryLayout<UInt8>.alignment
  65. )
  66. mutable.copyBytes(from: utf8Ptr)
  67. let immutable = UnsafeRawBufferPointer(mutable)
  68. interned.append(immutable)
  69. return immutable
  70. }
  71. deinit {
  72. for buff in interned {
  73. buff.deallocate()
  74. }
  75. }
  76. }
  77. /// Instructions used in bytecode streams that define proto name mappings.
  78. ///
  79. /// Since field and enum case names are encoded in numeric order, field and case number operands in
  80. /// the bytecode are stored as adjacent differences. Most messages/enums use densely packed
  81. /// numbers, so we've optimized the opcodes for that; each instruction that takes a single
  82. /// field/case number has two forms: one that assumes the next number is +1 from the previous
  83. /// number, and a second form that takes an arbitrary delta from the previous number.
  84. ///
  85. /// This has package visibility so that it is also visible to the generator.
  86. package enum ProtoNameInstruction: UInt64, CaseIterable {
  87. /// The proto (text format) name and the JSON name are the same string.
  88. ///
  89. /// ## Operands
  90. /// * (Delta only) An integer representing the (delta from the previous) field or enum case
  91. /// number.
  92. /// * A string containing the single text format and JSON name.
  93. case sameNext = 1
  94. case sameDelta = 2
  95. /// The JSON name can be computed from the proto string.
  96. ///
  97. /// ## Operands
  98. /// * (Delta only) An integer representing the (delta from the previous) field or enum case
  99. /// number.
  100. /// * A string containing the single text format name, from which the JSON name will be
  101. /// dynamically computed.
  102. case standardNext = 3
  103. case standardDelta = 4
  104. /// The JSON and text format names are just different.
  105. ///
  106. /// ## Operands
  107. /// * (Delta only) An integer representing the (delta from the previous) field or enum case
  108. /// number.
  109. /// * A string containing the text format name.
  110. /// * A string containing the JSON name.
  111. case uniqueNext = 5
  112. case uniqueDelta = 6
  113. /// Used for group fields only to represent the message type name of a group.
  114. ///
  115. /// ## Operands
  116. /// * (Delta only) An integer representing the (delta from the previous) field number.
  117. /// * A string containing the (UpperCamelCase by convention) message type name, from which the
  118. /// text format and JSON names can be derived (lowercase).
  119. case groupNext = 7
  120. case groupDelta = 8
  121. /// Used for enum cases only to represent a value's primary proto name (the first defined case)
  122. /// and its aliases. The JSON and text format names for enums are always the same.
  123. ///
  124. /// ## Operands
  125. /// * (Delta only) An integer representing the (delta from the previous) enum case number.
  126. /// * An integer `aliasCount` representing the number of aliases.
  127. /// * A string containing the text format/JSON name (the first defined case with this number).
  128. /// * `aliasCount` strings containing other text format/JSON names that are aliases.
  129. case aliasNext = 9
  130. case aliasDelta = 10
  131. /// Represents a reserved name in a proto message.
  132. ///
  133. /// ## Operands
  134. /// * The name of a reserved field.
  135. case reservedName = 11
  136. /// Represents a range of reserved field numbers or enum case numbers in a proto message.
  137. ///
  138. /// ## Operands
  139. /// * An integer representing the lower bound (inclusive) of the reserved number range.
  140. /// * An integer representing the delta between the upper bound (exclusive) and the lower bound
  141. /// of the reserved number range.
  142. case reservedNumbers = 12
  143. /// Indicates whether the opcode represents an instruction that has an explicit delta encoded
  144. /// as its first operand.
  145. var hasExplicitDelta: Bool {
  146. switch self {
  147. case .sameDelta, .standardDelta, .uniqueDelta, .groupDelta, .aliasDelta: return true
  148. default: return false
  149. }
  150. }
  151. }
  152. /// An immutable bidirectional mapping between field/enum-case names
  153. /// and numbers, used to record field names for text-based
  154. /// serialization (JSON and text). These maps are lazily instantiated
  155. /// for each message as needed, so there is no run-time overhead for
  156. /// users who do not use text-based serialization formats.
  157. public struct _NameMap: ExpressibleByDictionaryLiteral {
  158. /// An immutable interned string container. The `utf8Start` pointer
  159. /// is guaranteed valid for the lifetime of the `NameMap` that you
  160. /// fetched it from. Since `NameMap`s are only instantiated as
  161. /// immutable static values, that should be the lifetime of the
  162. /// program.
  163. ///
  164. /// Internally, this uses `StaticString` (which refers to a fixed
  165. /// block of UTF-8 data) where possible. In cases where the string
  166. /// has to be computed, it caches the UTF-8 bytes in an
  167. /// unmovable and immutable heap area.
  168. package struct Name: Hashable, CustomStringConvertible {
  169. #if !REMOVE_LEGACY_NAMEMAP_INITIALIZERS
  170. // This should not be used outside of this file, as it requires
  171. // coordinating the lifecycle with the lifecycle of the pool
  172. // where the raw UTF8 gets interned.
  173. fileprivate init(staticString: StaticString, pool: InternPool) {
  174. if staticString.hasPointerRepresentation {
  175. self.utf8Buffer = UnsafeRawBufferPointer(
  176. start: staticString.utf8Start,
  177. count: staticString.utf8CodeUnitCount
  178. )
  179. } else {
  180. self.utf8Buffer = staticString.withUTF8Buffer { pool.intern(utf8Ptr: $0) }
  181. }
  182. }
  183. #endif // !REMOVE_LEGACY_NAMEMAP_INITIALIZERS
  184. // This should not be used outside of this file, as it requires
  185. // coordinating the lifecycle with the lifecycle of the pool
  186. // where the raw UTF8 gets interned.
  187. fileprivate init(string: String, pool: InternPool) {
  188. let utf8 = string.utf8
  189. self.utf8Buffer = pool.intern(utf8: utf8)
  190. }
  191. // This is for building a transient `Name` object sufficient for lookup purposes.
  192. // It MUST NOT be exposed outside of this file.
  193. fileprivate init(transientUtf8Buffer: UnsafeRawBufferPointer) {
  194. self.utf8Buffer = transientUtf8Buffer
  195. }
  196. // This is for building a `Name` object from a slice of a bytecode `StaticString`.
  197. // It MUST NOT be exposed outside of this file.
  198. fileprivate init(bytecodeUTF8Buffer: UnsafeBufferPointer<UInt8>) {
  199. self.utf8Buffer = UnsafeRawBufferPointer(bytecodeUTF8Buffer)
  200. }
  201. internal let utf8Buffer: UnsafeRawBufferPointer
  202. public var description: String {
  203. String(decoding: self.utf8Buffer, as: UTF8.self)
  204. }
  205. public func hash(into hasher: inout Hasher) {
  206. for byte in utf8Buffer {
  207. hasher.combine(byte)
  208. }
  209. }
  210. public static func == (lhs: Name, rhs: Name) -> Bool {
  211. if lhs.utf8Buffer.count != rhs.utf8Buffer.count {
  212. return false
  213. }
  214. return lhs.utf8Buffer.elementsEqual(rhs.utf8Buffer)
  215. }
  216. }
  217. /// The JSON and proto names for a particular field, enum case, or extension.
  218. internal struct Names {
  219. private(set) var json: Name?
  220. private(set) var proto: Name
  221. }
  222. #if !REMOVE_LEGACY_NAMEMAP_INITIALIZERS
  223. /// A description of the names for a particular field or enum case.
  224. /// The different forms here let us minimize the amount of string
  225. /// data that we store in the binary.
  226. ///
  227. /// These are only used in the generated code to initialize a NameMap.
  228. public enum NameDescription {
  229. /// The proto (text format) name and the JSON name are the same string.
  230. case same(proto: StaticString)
  231. /// The JSON name can be computed from the proto string
  232. case standard(proto: StaticString)
  233. /// The JSON and text format names are just different.
  234. case unique(proto: StaticString, json: StaticString)
  235. /// Used for enum cases only to represent a value's primary proto name (the
  236. /// first defined case) and its aliases. The JSON and text format names for
  237. /// enums are always the same.
  238. case aliased(proto: StaticString, aliases: [StaticString])
  239. }
  240. #endif // !REMOVE_LEGACY_NAMEMAP_INITIALIZERS
  241. private var internPool = InternPool()
  242. /// The mapping from field/enum-case numbers to names.
  243. private var numberToNameMap: [Int: Names] = [:]
  244. /// The mapping from proto/text names to field/enum-case numbers.
  245. private var protoToNumberMap: [Name: Int] = [:]
  246. /// The mapping from JSON names to field/enum-case numbers.
  247. /// Note that this also contains all of the proto/text names,
  248. /// as required by Google's spec for protobuf JSON.
  249. private var jsonToNumberMap: [Name: Int] = [:]
  250. /// The reserved names in for this object. Currently only used for Message to
  251. /// support TextFormat's requirement to skip these names in all cases.
  252. private var reservedNames: [String] = []
  253. /// The reserved numbers in for this object. Currently only used for Message to
  254. /// support TextFormat's requirement to skip these numbers in all cases.
  255. private var reservedRanges: [Range<Int32>] = []
  256. /// Creates a new empty field/enum-case name/number mapping.
  257. public init() {}
  258. #if REMOVE_LEGACY_NAMEMAP_INITIALIZERS
  259. // Provide a dummy for ExpressibleByDictionaryLiteral conformance.
  260. public init(dictionaryLiteral elements: (Int, Int)...) {
  261. fatalError("Support compiled out removed")
  262. }
  263. #else // !REMOVE_LEGACY_NAMEMAP_INITIALIZERS
  264. /// Build the bidirectional maps between numbers and proto/JSON names.
  265. @available(
  266. *,
  267. deprecated,
  268. message: "Please regenerate your .pb.swift files with the current version of the SwiftProtobuf protoc plugin."
  269. )
  270. public init(
  271. reservedNames: [String],
  272. reservedRanges: [Range<Int32>],
  273. numberNameMappings: KeyValuePairs<Int, NameDescription>
  274. ) {
  275. self.reservedNames = reservedNames
  276. self.reservedRanges = reservedRanges
  277. initHelper(numberNameMappings)
  278. }
  279. /// Build the bidirectional maps between numbers and proto/JSON names.
  280. @available(
  281. *,
  282. deprecated,
  283. message: "Please regenerate your .pb.swift files with the current version of the SwiftProtobuf protoc plugin."
  284. )
  285. public init(dictionaryLiteral elements: (Int, NameDescription)...) {
  286. initHelper(elements)
  287. }
  288. /// Helper to share the building of mappings between the two initializers.
  289. private mutating func initHelper<Pairs: Collection>(
  290. _ elements: Pairs
  291. ) where Pairs.Element == (key: Int, value: NameDescription) {
  292. for (number, description) in elements {
  293. switch description {
  294. case .same(proto: let p):
  295. let protoName = Name(staticString: p, pool: internPool)
  296. let names = Names(json: protoName, proto: protoName)
  297. numberToNameMap[number] = names
  298. protoToNumberMap[protoName] = number
  299. jsonToNumberMap[protoName] = number
  300. case .standard(proto: let p):
  301. let protoName = Name(staticString: p, pool: internPool)
  302. let jsonString = toJSONFieldName(p)
  303. let jsonName = Name(string: jsonString, pool: internPool)
  304. let names = Names(json: jsonName, proto: protoName)
  305. numberToNameMap[number] = names
  306. protoToNumberMap[protoName] = number
  307. jsonToNumberMap[protoName] = number
  308. jsonToNumberMap[jsonName] = number
  309. case .unique(proto: let p, json: let j):
  310. let jsonName = Name(staticString: j, pool: internPool)
  311. let protoName = Name(staticString: p, pool: internPool)
  312. let names = Names(json: jsonName, proto: protoName)
  313. numberToNameMap[number] = names
  314. protoToNumberMap[protoName] = number
  315. jsonToNumberMap[protoName] = number
  316. jsonToNumberMap[jsonName] = number
  317. case .aliased(proto: let p, let aliases):
  318. let protoName = Name(staticString: p, pool: internPool)
  319. let names = Names(json: protoName, proto: protoName)
  320. numberToNameMap[number] = names
  321. protoToNumberMap[protoName] = number
  322. jsonToNumberMap[protoName] = number
  323. for alias in aliases {
  324. let protoName = Name(staticString: alias, pool: internPool)
  325. protoToNumberMap[protoName] = number
  326. jsonToNumberMap[protoName] = number
  327. }
  328. }
  329. }
  330. }
  331. #endif // !REMOVE_LEGACY_NAMEMAP_INITIALIZERS
  332. public init(bytecode: StaticString) {
  333. var previousNumber = 0
  334. BytecodeInterpreter<ProtoNameInstruction>(program: bytecode).execute { instruction, reader in
  335. func nextNumber() -> Int {
  336. let next: Int
  337. if instruction.hasExplicitDelta {
  338. next = previousNumber + Int(reader.nextInt32())
  339. } else {
  340. next = previousNumber + 1
  341. }
  342. previousNumber = next
  343. return next
  344. }
  345. switch instruction {
  346. case .sameNext, .sameDelta:
  347. let number = nextNumber()
  348. let protoName = Name(bytecodeUTF8Buffer: reader.nextNullTerminatedString())
  349. numberToNameMap[number] = Names(json: protoName, proto: protoName)
  350. protoToNumberMap[protoName] = number
  351. jsonToNumberMap[protoName] = number
  352. case .standardNext, .standardDelta:
  353. let number = nextNumber()
  354. let protoNameBuffer = reader.nextNullTerminatedString()
  355. let protoName = Name(bytecodeUTF8Buffer: protoNameBuffer)
  356. let jsonString = toJSONFieldName(protoNameBuffer)
  357. let jsonName = Name(string: jsonString, pool: internPool)
  358. numberToNameMap[number] = Names(json: jsonName, proto: protoName)
  359. protoToNumberMap[protoName] = number
  360. jsonToNumberMap[protoName] = number
  361. jsonToNumberMap[jsonName] = number
  362. case .uniqueNext, .uniqueDelta:
  363. let number = nextNumber()
  364. let protoName = Name(bytecodeUTF8Buffer: reader.nextNullTerminatedString())
  365. let jsonName = Name(bytecodeUTF8Buffer: reader.nextNullTerminatedString())
  366. numberToNameMap[number] = Names(json: jsonName, proto: protoName)
  367. protoToNumberMap[protoName] = number
  368. jsonToNumberMap[protoName] = number
  369. jsonToNumberMap[jsonName] = number
  370. case .groupNext, .groupDelta:
  371. let number = nextNumber()
  372. let protoNameBuffer = reader.nextNullTerminatedString()
  373. let protoName = Name(bytecodeUTF8Buffer: protoNameBuffer)
  374. protoToNumberMap[protoName] = number
  375. jsonToNumberMap[protoName] = number
  376. let lowercaseName: Name
  377. let hasUppercase = protoNameBuffer.contains { (UInt8(ascii: "A")...UInt8(ascii: "Z")).contains($0) }
  378. if hasUppercase {
  379. lowercaseName = Name(
  380. string: String(decoding: protoNameBuffer, as: UTF8.self).lowercased(),
  381. pool: internPool
  382. )
  383. protoToNumberMap[lowercaseName] = number
  384. jsonToNumberMap[lowercaseName] = number
  385. } else {
  386. // No need to convert and intern a separate copy of the string
  387. // if it would be identical.
  388. lowercaseName = protoName
  389. }
  390. numberToNameMap[number] = Names(json: lowercaseName, proto: protoName)
  391. case .aliasNext, .aliasDelta:
  392. let number = nextNumber()
  393. let protoName = Name(bytecodeUTF8Buffer: reader.nextNullTerminatedString())
  394. numberToNameMap[number] = Names(json: protoName, proto: protoName)
  395. protoToNumberMap[protoName] = number
  396. jsonToNumberMap[protoName] = number
  397. for alias in reader.nextNullTerminatedStringArray() {
  398. let protoName = Name(bytecodeUTF8Buffer: alias)
  399. protoToNumberMap[protoName] = number
  400. jsonToNumberMap[protoName] = number
  401. }
  402. case .reservedName:
  403. let name = String(decoding: reader.nextNullTerminatedString(), as: UTF8.self)
  404. reservedNames.append(name)
  405. case .reservedNumbers:
  406. let lowerBound = reader.nextInt32()
  407. let upperBound = lowerBound + reader.nextInt32()
  408. reservedRanges.append(lowerBound..<upperBound)
  409. }
  410. }
  411. }
  412. /// Returns the name bundle for the field/enum-case with the given number, or
  413. /// `nil` if there is no match.
  414. internal func names(for number: Int) -> Names? {
  415. numberToNameMap[number]
  416. }
  417. /// Returns the field/enum-case number that has the given JSON name,
  418. /// or `nil` if there is no match.
  419. ///
  420. /// This is used by the Text format parser to look up field or enum
  421. /// names using a direct reference to the un-decoded UTF8 bytes.
  422. internal func number(forProtoName raw: UnsafeRawBufferPointer) -> Int? {
  423. let n = Name(transientUtf8Buffer: raw)
  424. return protoToNumberMap[n]
  425. }
  426. /// Returns the field/enum-case number that has the given JSON name,
  427. /// or `nil` if there is no match.
  428. ///
  429. /// This accepts a regular `String` and is used in JSON parsing
  430. /// only when a field name or enum name was decoded from a string
  431. /// containing backslash escapes.
  432. ///
  433. /// JSON parsing must interpret *both* the JSON name of the
  434. /// field/enum-case provided by the descriptor *as well as* its
  435. /// original proto/text name.
  436. internal func number(forJSONName name: String) -> Int? {
  437. let utf8 = Array(name.utf8)
  438. return utf8.withUnsafeBytes { (buffer: UnsafeRawBufferPointer) in
  439. let n = Name(transientUtf8Buffer: buffer)
  440. return jsonToNumberMap[n]
  441. }
  442. }
  443. /// Returns the field/enum-case number that has the given JSON name,
  444. /// or `nil` if there is no match.
  445. ///
  446. /// This is used by the JSON parser when a field name or enum name
  447. /// required no special processing. As a result, we can avoid
  448. /// copying the name and look up the number using a direct reference
  449. /// to the un-decoded UTF8 bytes.
  450. internal func number(forJSONName raw: UnsafeRawBufferPointer) -> Int? {
  451. let n = Name(transientUtf8Buffer: raw)
  452. return jsonToNumberMap[n]
  453. }
  454. /// Returns all proto names
  455. internal var names: [Name] {
  456. numberToNameMap.map(\.value.proto)
  457. }
  458. /// Returns if the given name was reserved.
  459. internal func isReserved(name: UnsafeRawBufferPointer) -> Bool {
  460. guard !reservedNames.isEmpty,
  461. let baseAddress = name.baseAddress,
  462. let s = utf8ToString(bytes: baseAddress, count: name.count)
  463. else {
  464. return false
  465. }
  466. return reservedNames.contains(s)
  467. }
  468. /// Returns if the given number was reserved.
  469. internal func isReserved(number: Int32) -> Bool {
  470. for range in reservedRanges {
  471. if range.contains(number) {
  472. return true
  473. }
  474. }
  475. return false
  476. }
  477. }
  478. // The `_NameMap` (and supporting types) are only mutated during their initial
  479. // creation, then for the lifetime of the a process they are constant. Swift
  480. // 5.10 flags the generated `_protobuf_nameMap` usages as a problem
  481. // (https://github.com/apple/swift-protobuf/issues/1560) so this silences those
  482. // warnings since the usage has been deemed safe.
  483. //
  484. // https://github.com/apple/swift-protobuf/issues/1561 is also opened to revisit
  485. // the `_NameMap` generally as it dates back to the days before Swift perferred
  486. // the UTF-8 internal encoding.
  487. extension _NameMap: Sendable {}
  488. extension _NameMap.Name: @unchecked Sendable {}
  489. extension InternPool: @unchecked Sendable {}