AnyMessageStorage.swift 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. // Sources/SwiftProtobuf/AnyMessageStorage.swift - Custom storage for Any WKT
  2. //
  3. // Copyright (c) 2014 - 2017 Apple Inc. and the project authors
  4. // Licensed under Apache License v2.0 with Runtime Library Exception
  5. //
  6. // See LICENSE.txt for license information:
  7. // https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
  8. //
  9. // -----------------------------------------------------------------------------
  10. ///
  11. /// Hand written storage class for Google_Protobuf_Any to support on demand
  12. /// transforms between the formats.
  13. ///
  14. // -----------------------------------------------------------------------------
  15. import Foundation
  16. private func serializeAnyJSON(
  17. for message: any Message,
  18. typeURL: String,
  19. options: JSONEncodingOptions
  20. ) throws -> String {
  21. var visitor = try JSONEncodingVisitor(type: type(of: message), options: options)
  22. visitor.startObject(message: message)
  23. visitor.encodeField(name: "@type", stringValue: typeURL)
  24. if let m = message as? (any _CustomJSONCodable) {
  25. let value = try m.encodedJSONString(options: options)
  26. visitor.encodeField(name: "value", jsonText: value)
  27. } else {
  28. try message.traverse(visitor: &visitor)
  29. }
  30. visitor.endObject()
  31. return visitor.stringResult
  32. }
  33. private func emitVerboseTextForm(visitor: inout TextFormatEncodingVisitor, message: any Message, typeURL: String) {
  34. let url: String
  35. if typeURL.isEmpty {
  36. url = buildTypeURL(forMessage: message, typePrefix: defaultAnyTypeURLPrefix)
  37. } else {
  38. url = typeURL
  39. }
  40. visitor.visitAnyVerbose(value: message, typeURL: url)
  41. }
  42. private func asJSONObject(body: [UInt8]) -> Data {
  43. let asciiOpenCurlyBracket = UInt8(ascii: "{")
  44. let asciiCloseCurlyBracket = UInt8(ascii: "}")
  45. var result = [asciiOpenCurlyBracket]
  46. result.append(contentsOf: body)
  47. result.append(asciiCloseCurlyBracket)
  48. return Data(result)
  49. }
  50. private func unpack(
  51. contentJSON: [UInt8],
  52. extensions: any ExtensionMap,
  53. options: JSONDecodingOptions,
  54. as messageType: any Message.Type
  55. ) throws -> any Message {
  56. guard messageType is any _CustomJSONCodable.Type else {
  57. let contentJSONAsObject = asJSONObject(body: contentJSON)
  58. return try messageType.init(jsonUTF8Bytes: contentJSONAsObject, extensions: extensions, options: options)
  59. }
  60. var value = String()
  61. try contentJSON.withUnsafeBytes { (body: UnsafeRawBufferPointer) in
  62. if body.count > 0 {
  63. // contentJSON will be the valid JSON for inside an object (everything but
  64. // the '{' and '}', so minimal validation is needed.
  65. var scanner = JSONScanner(source: body, options: options, extensions: extensions)
  66. while !scanner.complete {
  67. let key = try scanner.nextQuotedString()
  68. try scanner.skipRequiredColon()
  69. if key == "value" {
  70. value = try scanner.skip()
  71. break
  72. }
  73. if !options.ignoreUnknownFields {
  74. // The only thing within a WKT should be "value".
  75. throw AnyUnpackError.malformedWellKnownTypeJSON
  76. }
  77. let _ = try scanner.skip()
  78. try scanner.skipRequiredComma()
  79. }
  80. if !options.ignoreUnknownFields && !scanner.complete {
  81. // If that wasn't the end, then there was another key, and WKTs should
  82. // only have the one when not skipping unknowns.
  83. throw AnyUnpackError.malformedWellKnownTypeJSON
  84. }
  85. }
  86. }
  87. return try messageType.init(jsonString: value, extensions: extensions, options: options)
  88. }
  89. internal class AnyMessageStorage {
  90. // The two properties generated Google_Protobuf_Any will reference.
  91. var _typeURL = String()
  92. var _value: Data {
  93. // Remapped to the internal `state`.
  94. get {
  95. switch state {
  96. case .binary(let value):
  97. return Data(value)
  98. case .message(let message):
  99. do {
  100. return try message.serializedBytes(partial: true)
  101. } catch {
  102. return Data()
  103. }
  104. case .contentJSON(let contentJSON, let options):
  105. guard let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) else {
  106. return Data()
  107. }
  108. do {
  109. let m = try unpack(
  110. contentJSON: contentJSON,
  111. extensions: SimpleExtensionMap(),
  112. options: options,
  113. as: messageType
  114. )
  115. return try m.serializedBytes(partial: true)
  116. } catch {
  117. return Data()
  118. }
  119. }
  120. }
  121. set {
  122. state = .binary(newValue)
  123. }
  124. }
  125. enum InternalState {
  126. // a serialized binary
  127. // Note: Unlike contentJSON below, binary does not bother to capture the
  128. // decoding options. This is because the actual binary format is the binary
  129. // blob, i.e. - when decoding from binary, the spec doesn't include decoding
  130. // the binary blob, it is pass through. Instead there is a public api for
  131. // unpacking that takes new options when a developer decides to decode it.
  132. case binary(Data)
  133. // a message
  134. case message(any Message)
  135. // parsed JSON with the @type removed and the decoding options.
  136. case contentJSON([UInt8], JSONDecodingOptions)
  137. }
  138. var state: InternalState = .binary(Data())
  139. #if swift(>=5.10)
  140. // This property is used as the initial default value for new instances of the type.
  141. // The type itself is protecting the reference to its storage via CoW semantics.
  142. // This will force a copy to be made of this reference when the first mutation occurs;
  143. // hence, it is safe to mark this as `nonisolated(unsafe)`.
  144. static nonisolated(unsafe) let defaultInstance = AnyMessageStorage()
  145. #else
  146. static let defaultInstance = AnyMessageStorage()
  147. #endif
  148. private init() {}
  149. init(copying source: AnyMessageStorage) {
  150. _typeURL = source._typeURL
  151. state = source.state
  152. }
  153. func isA<M: Message>(_ type: M.Type) -> Bool {
  154. if _typeURL.isEmpty {
  155. return false
  156. }
  157. let encodedType = typeName(fromURL: _typeURL)
  158. return encodedType == M.protoMessageName
  159. }
  160. // This is only ever called with the expectation that target will be fully
  161. // replaced during the unpacking and never as a merge.
  162. func unpackTo<M: Message>(
  163. target: inout M,
  164. extensions: (any ExtensionMap)?,
  165. options: BinaryDecodingOptions
  166. ) throws {
  167. guard isA(M.self) else {
  168. throw AnyUnpackError.typeMismatch
  169. }
  170. switch state {
  171. case .binary(let data):
  172. target = try M(serializedBytes: data, extensions: extensions, partial: true, options: options)
  173. case .message(let msg):
  174. if let message = msg as? M {
  175. // Already right type, copy it over.
  176. target = message
  177. } else {
  178. // Different type, serialize and parse.
  179. let bytes: [UInt8] = try msg.serializedBytes(partial: true)
  180. target = try M(serializedBytes: bytes, extensions: extensions, partial: true)
  181. }
  182. case .contentJSON(let contentJSON, let options):
  183. target =
  184. try unpack(
  185. contentJSON: contentJSON,
  186. extensions: extensions ?? SimpleExtensionMap(),
  187. options: options,
  188. as: M.self
  189. ) as! M
  190. }
  191. }
  192. // Called before the message is traversed to do any error preflights.
  193. // Since traverse() will use _value, this is our chance to throw
  194. // when _value can't.
  195. func preTraverse() throws {
  196. switch state {
  197. case .binary:
  198. // Nothing to be checked.
  199. break
  200. case .message:
  201. // When set from a developer provided message, partial support
  202. // is done. Any message that comes in from another format isn't
  203. // checked, and transcoding the isInitialized requirement is
  204. // never inserted.
  205. break
  206. case .contentJSON(let contentJSON, let options):
  207. // contentJSON requires we have the type available for decoding.
  208. guard let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) else {
  209. throw BinaryEncodingError.anyTranscodeFailure
  210. }
  211. do {
  212. // Decodes the full JSON and then discard the result.
  213. // The regular traversal will decode this again by querying the
  214. // `value` field, but that has no way to fail. As a result,
  215. // we need this to accurately handle decode errors.
  216. _ = try unpack(
  217. contentJSON: contentJSON,
  218. extensions: SimpleExtensionMap(),
  219. options: options,
  220. as: messageType
  221. )
  222. } catch {
  223. throw BinaryEncodingError.anyTranscodeFailure
  224. }
  225. }
  226. }
  227. }
  228. /// Custom handling for Text format.
  229. extension AnyMessageStorage {
  230. func decodeTextFormat(typeURL url: String, decoder: inout TextFormatDecoder) throws {
  231. // Decoding the verbose form requires knowing the type.
  232. _typeURL = url
  233. guard let messageType = Google_Protobuf_Any.messageType(forTypeURL: url) else {
  234. // The type wasn't registered, can't parse it.
  235. throw TextFormatDecodingError.malformedText
  236. }
  237. let terminator = try decoder.scanner.skipObjectStart()
  238. var subDecoder = try TextFormatDecoder(
  239. messageType: messageType,
  240. scanner: decoder.scanner,
  241. terminator: terminator
  242. )
  243. if messageType == Google_Protobuf_Any.self {
  244. var any = Google_Protobuf_Any()
  245. try any.decodeTextFormat(decoder: &subDecoder)
  246. state = .message(any)
  247. } else {
  248. var m = messageType.init()
  249. try m.decodeMessage(decoder: &subDecoder)
  250. state = .message(m)
  251. }
  252. decoder.scanner = subDecoder.scanner
  253. if try decoder.nextFieldNumber() != nil {
  254. // Verbose any can never have additional keys.
  255. throw TextFormatDecodingError.malformedText
  256. }
  257. }
  258. // Specialized traverse for writing out a Text form of the Any.
  259. // This prefers the more-legible "verbose" format if it can
  260. // use it, otherwise will fall back to simpler forms.
  261. internal func textTraverse(visitor: inout TextFormatEncodingVisitor) {
  262. switch state {
  263. case .binary(let valueData):
  264. if let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) {
  265. // If we can decode it, we can write the readable verbose form:
  266. do {
  267. let m = try messageType.init(serializedBytes: valueData, partial: true)
  268. emitVerboseTextForm(visitor: &visitor, message: m, typeURL: _typeURL)
  269. return
  270. } catch {
  271. // Fall through to just print the type and raw binary data.
  272. }
  273. }
  274. if !_typeURL.isEmpty {
  275. try! visitor.visitSingularStringField(value: _typeURL, fieldNumber: 1)
  276. }
  277. if !valueData.isEmpty {
  278. try! visitor.visitSingularBytesField(value: valueData, fieldNumber: 2)
  279. }
  280. case .message(let msg):
  281. emitVerboseTextForm(visitor: &visitor, message: msg, typeURL: _typeURL)
  282. case .contentJSON(let contentJSON, let options):
  283. // If we can decode it, we can write the readable verbose form:
  284. if let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) {
  285. do {
  286. let m = try unpack(
  287. contentJSON: contentJSON,
  288. extensions: SimpleExtensionMap(),
  289. options: options,
  290. as: messageType
  291. )
  292. emitVerboseTextForm(visitor: &visitor, message: m, typeURL: _typeURL)
  293. return
  294. } catch {
  295. // Fall through to just print the raw JSON data
  296. }
  297. }
  298. if !_typeURL.isEmpty {
  299. try! visitor.visitSingularStringField(value: _typeURL, fieldNumber: 1)
  300. }
  301. // Build a readable form of the JSON:
  302. let contentJSONAsObject = asJSONObject(body: contentJSON)
  303. visitor.visitAnyJSONBytesField(value: contentJSONAsObject)
  304. }
  305. }
  306. }
  307. /// The obvious goal for Hashable/Equatable conformance would be for
  308. /// hash and equality to behave as if we always decoded the inner
  309. /// object and hashed or compared that. Unfortunately, Any typically
  310. /// stores serialized contents and we don't always have the ability to
  311. /// deserialize it. Since none of our supported serializations are
  312. /// fully deterministic, we can't even ensure that equality will
  313. /// behave this way when the Any contents are in the same
  314. /// serialization.
  315. ///
  316. /// As a result, we can only really perform a "best effort" equality
  317. /// test. Of course, regardless of the above, we must guarantee that
  318. /// hashValue is compatible with equality.
  319. extension AnyMessageStorage {
  320. // Can't use _valueData for a few reasons:
  321. // 1. Since decode is done on demand, two objects could be equal
  322. // but created differently (one from JSON, one for Message, etc.),
  323. // and the hash values have to be equal even if we don't have data
  324. // yet.
  325. // 2. map<> serialization order is undefined. At the time of writing
  326. // the Swift, Objective-C, and Go runtimes all tend to have random
  327. // orders, so the messages could be identical, but in binary form
  328. // they could differ.
  329. public func hash(into hasher: inout Hasher) {
  330. if !_typeURL.isEmpty {
  331. hasher.combine(_typeURL)
  332. }
  333. }
  334. func isEqualTo(other: AnyMessageStorage) -> Bool {
  335. if _typeURL != other._typeURL {
  336. return false
  337. }
  338. // Since the library does lazy Any decode, equality is a very hard problem.
  339. // It things exactly match, that's pretty easy, otherwise, one ends up having
  340. // to error on saying they aren't equal.
  341. //
  342. // The best option would be to have Message forms and compare those, as that
  343. // removes issues like map<> serialization order, some other protocol buffer
  344. // implementation details/bugs around serialized form order, etc.; but that
  345. // would also greatly slow down equality tests.
  346. //
  347. // Do our best to compare what is present have...
  348. // If both have messages, check if they are the same.
  349. if case .message(let myMsg) = state, case .message(let otherMsg) = other.state,
  350. type(of: myMsg) == type(of: otherMsg)
  351. {
  352. // Since the messages are known to be same type, we can claim both equal and
  353. // not equal based on the equality comparison.
  354. return myMsg.isEqualTo(message: otherMsg)
  355. }
  356. // If both have serialized data, and they exactly match; the messages are equal.
  357. // Because there could be map in the message, the fact that the data isn't the
  358. // same doesn't always mean the messages aren't equal. Likewise, the binary could
  359. // have been created by a library that doesn't order the fields, or the binary was
  360. // created using the appending ability in of the binary format.
  361. if case .binary(let myValue) = state, case .binary(let otherValue) = other.state, myValue == otherValue {
  362. return true
  363. }
  364. // If both have contentJSON, and they exactly match; the messages are equal.
  365. // Because there could be map in the message (or the JSON could just be in a different
  366. // order), the fact that the JSON isn't the same doesn't always mean the messages
  367. // aren't equal.
  368. if case .contentJSON(let myJSON, _) = state,
  369. case .contentJSON(let otherJSON, _) = other.state,
  370. myJSON == otherJSON
  371. {
  372. return true
  373. }
  374. // Out of options. To do more compares, the states conversions would have to be
  375. // done to do comparisons; and since equality can be used somewhat removed from
  376. // a developer (if they put protos in a Set, use them as keys to a Dictionary, etc),
  377. // the conversion cost might be to high for those uses. Give up and say they aren't equal.
  378. return false
  379. }
  380. }
  381. // _CustomJSONCodable support for Google_Protobuf_Any
  382. extension AnyMessageStorage {
  383. // Override the traversal-based JSON encoding
  384. // This builds an Any JSON representation from one of:
  385. // * The message we were initialized with,
  386. // * The JSON fields we last deserialized, or
  387. // * The protobuf field we were deserialized from.
  388. // The last case requires locating the type, deserializing
  389. // into an object, then reserializing back to JSON.
  390. func encodedJSONString(options: JSONEncodingOptions) throws -> String {
  391. switch state {
  392. case .binary(let valueData):
  393. // Follow the C++ protostream_objectsource.cc's
  394. // ProtoStreamObjectSource::RenderAny() special casing of an empty value.
  395. guard !valueData.isEmpty else {
  396. if _typeURL.isEmpty {
  397. return "{}"
  398. }
  399. var jsonEncoder = JSONEncoder()
  400. jsonEncoder.startField(name: "@type")
  401. jsonEncoder.putStringValue(value: _typeURL)
  402. jsonEncoder.endObject()
  403. return jsonEncoder.stringResult
  404. }
  405. // Transcode by decoding the binary data to a message object
  406. // and then recode back into JSON.
  407. guard let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) else {
  408. // If we don't have the type available, we can't decode the
  409. // binary value, so we're stuck. (The Google spec does not
  410. // provide a way to just package the binary value for someone
  411. // else to decode later.)
  412. throw JSONEncodingError.anyTranscodeFailure
  413. }
  414. let m = try messageType.init(serializedBytes: valueData, partial: true)
  415. return try serializeAnyJSON(for: m, typeURL: _typeURL, options: options)
  416. case .message(let msg):
  417. // We should have been initialized with a typeURL, but
  418. // ensure it wasn't cleared.
  419. let url = !_typeURL.isEmpty ? _typeURL : buildTypeURL(forMessage: msg, typePrefix: defaultAnyTypeURLPrefix)
  420. return try serializeAnyJSON(for: msg, typeURL: url, options: options)
  421. case .contentJSON(let contentJSON, _):
  422. var jsonEncoder = JSONEncoder()
  423. jsonEncoder.startObject()
  424. jsonEncoder.startField(name: "@type")
  425. jsonEncoder.putStringValue(value: _typeURL)
  426. if !contentJSON.isEmpty {
  427. jsonEncoder.append(staticText: ",")
  428. // NOTE: This doesn't really take `options` into account since it is
  429. // just reflecting out what was taken in originally.
  430. jsonEncoder.append(utf8Bytes: contentJSON)
  431. }
  432. jsonEncoder.endObject()
  433. return jsonEncoder.stringResult
  434. }
  435. }
  436. // TODO: If the type is well-known or has already been registered,
  437. // we should consider decoding eagerly. Eager decoding would
  438. // catch certain errors earlier (good) but would probably be
  439. // a performance hit if the Any contents were never accessed (bad).
  440. // Of course, we can't always decode eagerly (we don't always have the
  441. // message type available), so the deferred logic here is still needed.
  442. func decodeJSON(from decoder: inout JSONDecoder) throws {
  443. try decoder.scanner.skipRequiredObjectStart()
  444. // Reset state
  445. _typeURL = String()
  446. state = .binary(Data())
  447. if decoder.scanner.skipOptionalObjectEnd() {
  448. return
  449. }
  450. var jsonEncoder = JSONEncoder()
  451. while true {
  452. let key = try decoder.scanner.nextQuotedString()
  453. try decoder.scanner.skipRequiredColon()
  454. if key == "@type" {
  455. _typeURL = try decoder.scanner.nextQuotedString()
  456. } else {
  457. jsonEncoder.startField(name: key)
  458. let keyValueJSON = try decoder.scanner.skip()
  459. jsonEncoder.append(text: keyValueJSON)
  460. }
  461. if decoder.scanner.skipOptionalObjectEnd() {
  462. // Capture the options, but set the messageDepthLimit to be what
  463. // was left right now, as that is the limit when the JSON is finally
  464. // parsed.
  465. var updatedOptions = decoder.options
  466. updatedOptions.messageDepthLimit = decoder.scanner.recursionBudget
  467. state = .contentJSON(Array(jsonEncoder.dataResult), updatedOptions)
  468. return
  469. }
  470. try decoder.scanner.skipRequiredComma()
  471. }
  472. }
  473. }