AnyMessageStorage.swift 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. // Sources/SwiftProtobuf/AnyMessageStorage.swift - Custom storage for Any WKT
  2. //
  3. // Copyright (c) 2014 - 2017 Apple Inc. and the project authors
  4. // Licensed under Apache License v2.0 with Runtime Library Exception
  5. //
  6. // See LICENSE.txt for license information:
  7. // https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
  8. //
  9. // -----------------------------------------------------------------------------
  10. ///
  11. /// Hand written storage class for Google_Protobuf_Any to support on demand
  12. /// transforms between the formats.
  13. ///
  14. // -----------------------------------------------------------------------------
  15. import Foundation
  16. private func serializeAnyJSON(
  17. for message: any Message,
  18. typeURL: String,
  19. options: JSONEncodingOptions
  20. ) throws -> String {
  21. var visitor = try JSONEncodingVisitor(type: type(of: message), options: options)
  22. visitor.startObject(message: message)
  23. visitor.encodeField(name: "@type", stringValue: typeURL)
  24. if let m = message as? (any _CustomJSONCodable) {
  25. let value = try m.encodedJSONString(options: options)
  26. visitor.encodeField(name: "value", jsonText: value)
  27. } else {
  28. try message.traverse(visitor: &visitor)
  29. }
  30. visitor.endObject()
  31. return visitor.stringResult
  32. }
  33. private func emitVerboseTextForm(visitor: inout TextFormatEncodingVisitor, message: any Message, typeURL: String) {
  34. let url: String
  35. if typeURL.isEmpty {
  36. url = buildTypeURL(forMessage: message, typePrefix: defaultAnyTypeURLPrefix)
  37. } else {
  38. url = typeURL
  39. }
  40. visitor.visitAnyVerbose(value: message, typeURL: url)
  41. }
  42. private func asJSONObject(body: [UInt8]) -> Data {
  43. let asciiOpenCurlyBracket = UInt8(ascii: "{")
  44. let asciiCloseCurlyBracket = UInt8(ascii: "}")
  45. var result = [asciiOpenCurlyBracket]
  46. result.append(contentsOf: body)
  47. result.append(asciiCloseCurlyBracket)
  48. return Data(result)
  49. }
  50. private func unpack(
  51. contentJSON: [UInt8],
  52. extensions: any ExtensionMap,
  53. options: JSONDecodingOptions,
  54. as messageType: any Message.Type
  55. ) throws -> any Message {
  56. guard messageType is any _CustomJSONCodable.Type else {
  57. let contentJSONAsObject = asJSONObject(body: contentJSON)
  58. return try messageType.init(jsonUTF8Bytes: contentJSONAsObject, extensions: extensions, options: options)
  59. }
  60. var value = String()
  61. try contentJSON.withUnsafeBytes { (body: UnsafeRawBufferPointer) in
  62. if body.count > 0 {
  63. // contentJSON will be the valid JSON for inside an object (everything but
  64. // the '{' and '}', so minimal validation is needed.
  65. var scanner = JSONScanner(source: body, options: options, extensions: extensions)
  66. while !scanner.complete {
  67. let key = try scanner.nextQuotedString()
  68. try scanner.skipRequiredColon()
  69. if key == "value" {
  70. value = try scanner.skip()
  71. break
  72. }
  73. if !options.ignoreUnknownFields {
  74. // The only thing within a WKT should be "value".
  75. throw AnyUnpackError.malformedWellKnownTypeJSON
  76. }
  77. let _ = try scanner.skip()
  78. try scanner.skipRequiredComma()
  79. }
  80. if !options.ignoreUnknownFields && !scanner.complete {
  81. // If that wasn't the end, then there was another key, and WKTs should
  82. // only have the one when not skipping unknowns.
  83. throw AnyUnpackError.malformedWellKnownTypeJSON
  84. }
  85. }
  86. }
  87. return try messageType.init(jsonString: value, extensions: extensions, options: options)
  88. }
  89. internal class AnyMessageStorage {
  90. // The two properties generated Google_Protobuf_Any will reference.
  91. var _typeURL = String()
  92. var _value: Data {
  93. // Remapped to the internal `state`.
  94. get {
  95. switch state {
  96. case .binary(let value):
  97. return Data(value)
  98. case .message(let message):
  99. do {
  100. return try message.serializedBytes(partial: true)
  101. } catch {
  102. return Data()
  103. }
  104. case .contentJSON(let contentJSON, let options):
  105. guard let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) else {
  106. return Data()
  107. }
  108. do {
  109. let m = try unpack(
  110. contentJSON: contentJSON,
  111. extensions: SimpleExtensionMap(),
  112. options: options,
  113. as: messageType
  114. )
  115. return try m.serializedBytes(partial: true)
  116. } catch {
  117. return Data()
  118. }
  119. }
  120. }
  121. set {
  122. state = .binary(newValue)
  123. }
  124. }
  125. enum InternalState {
  126. // a serialized binary
  127. // Note: Unlike contentJSON below, binary does not bother to capture the
  128. // decoding options. This is because the actual binary format is the binary
  129. // blob, i.e. - when decoding from binary, the spec doesn't include decoding
  130. // the binary blob, it is pass through. Instead there is a public api for
  131. // unpacking that takes new options when a developer decides to decode it.
  132. case binary(Data)
  133. // a message
  134. case message(any Message)
  135. // parsed JSON with the @type removed and the decoding options.
  136. case contentJSON([UInt8], JSONDecodingOptions)
  137. }
  138. var state: InternalState = .binary(Data())
  139. // This property is used as the initial default value for new instances of the type.
  140. // The type itself is protecting the reference to its storage via CoW semantics.
  141. // This will force a copy to be made of this reference when the first mutation occurs;
  142. // hence, it is safe to mark this as `nonisolated(unsafe)`.
  143. static nonisolated(unsafe) let defaultInstance = AnyMessageStorage()
  144. private init() {}
  145. init(copying source: AnyMessageStorage) {
  146. _typeURL = source._typeURL
  147. state = source.state
  148. }
  149. func isA<M: Message>(_ type: M.Type) -> Bool {
  150. if _typeURL.isEmpty {
  151. return false
  152. }
  153. let encodedType = typeName(fromURL: _typeURL)
  154. return encodedType == M.protoMessageName
  155. }
  156. // This is only ever called with the expectation that target will be fully
  157. // replaced during the unpacking and never as a merge.
  158. func unpackTo<M: Message>(
  159. target: inout M,
  160. extensions: (any ExtensionMap)?,
  161. options: BinaryDecodingOptions
  162. ) throws {
  163. guard isA(M.self) else {
  164. throw AnyUnpackError.typeMismatch
  165. }
  166. switch state {
  167. case .binary(let data):
  168. target = try M(serializedBytes: data, extensions: extensions, partial: true, options: options)
  169. case .message(let msg):
  170. if let message = msg as? M {
  171. // Already right type, copy it over.
  172. target = message
  173. } else {
  174. // Different type, serialize and parse.
  175. let bytes: [UInt8] = try msg.serializedBytes(partial: true)
  176. target = try M(serializedBytes: bytes, extensions: extensions, partial: true)
  177. }
  178. case .contentJSON(let contentJSON, let options):
  179. target =
  180. try unpack(
  181. contentJSON: contentJSON,
  182. extensions: extensions ?? SimpleExtensionMap(),
  183. options: options,
  184. as: M.self
  185. ) as! M
  186. }
  187. }
  188. // Called before the message is traversed to do any error preflights.
  189. // Since traverse() will use _value, this is our chance to throw
  190. // when _value can't.
  191. func preTraverse() throws {
  192. switch state {
  193. case .binary:
  194. // Nothing to be checked.
  195. break
  196. case .message:
  197. // When set from a developer provided message, partial support
  198. // is done. Any message that comes in from another format isn't
  199. // checked, and transcoding the isInitialized requirement is
  200. // never inserted.
  201. break
  202. case .contentJSON(let contentJSON, let options):
  203. // contentJSON requires we have the type available for decoding.
  204. guard let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) else {
  205. throw BinaryEncodingError.anyTranscodeFailure
  206. }
  207. do {
  208. // Decodes the full JSON and then discard the result.
  209. // The regular traversal will decode this again by querying the
  210. // `value` field, but that has no way to fail. As a result,
  211. // we need this to accurately handle decode errors.
  212. _ = try unpack(
  213. contentJSON: contentJSON,
  214. extensions: SimpleExtensionMap(),
  215. options: options,
  216. as: messageType
  217. )
  218. } catch {
  219. throw BinaryEncodingError.anyTranscodeFailure
  220. }
  221. }
  222. }
  223. }
  224. /// Custom handling for Text format.
  225. extension AnyMessageStorage {
  226. func decodeTextFormat(typeURL url: String, decoder: inout TextFormatDecoder) throws {
  227. // Decoding the verbose form requires knowing the type.
  228. _typeURL = url
  229. guard let messageType = Google_Protobuf_Any.messageType(forTypeURL: url) else {
  230. // The type wasn't registered, can't parse it.
  231. throw TextFormatDecodingError.malformedText
  232. }
  233. let terminator = try decoder.scanner.skipObjectStart()
  234. var subDecoder = try TextFormatDecoder(
  235. messageType: messageType,
  236. scanner: decoder.scanner,
  237. terminator: terminator
  238. )
  239. if messageType == Google_Protobuf_Any.self {
  240. var any = Google_Protobuf_Any()
  241. try any.decodeTextFormat(decoder: &subDecoder)
  242. state = .message(any)
  243. } else {
  244. var m = messageType.init()
  245. try m.decodeMessage(decoder: &subDecoder)
  246. state = .message(m)
  247. }
  248. decoder.scanner = subDecoder.scanner
  249. if try decoder.nextFieldNumber() != nil {
  250. // Verbose any can never have additional keys.
  251. throw TextFormatDecodingError.malformedText
  252. }
  253. }
  254. // Specialized traverse for writing out a Text form of the Any.
  255. // This prefers the more-legible "verbose" format if it can
  256. // use it, otherwise will fall back to simpler forms.
  257. internal func textTraverse(visitor: inout TextFormatEncodingVisitor) {
  258. switch state {
  259. case .binary(let valueData):
  260. if let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) {
  261. // If we can decode it, we can write the readable verbose form:
  262. do {
  263. let m = try messageType.init(serializedBytes: valueData, partial: true)
  264. emitVerboseTextForm(visitor: &visitor, message: m, typeURL: _typeURL)
  265. return
  266. } catch {
  267. // Fall through to just print the type and raw binary data.
  268. }
  269. }
  270. if !_typeURL.isEmpty {
  271. try! visitor.visitSingularStringField(value: _typeURL, fieldNumber: 1)
  272. }
  273. if !valueData.isEmpty {
  274. try! visitor.visitSingularBytesField(value: valueData, fieldNumber: 2)
  275. }
  276. case .message(let msg):
  277. emitVerboseTextForm(visitor: &visitor, message: msg, typeURL: _typeURL)
  278. case .contentJSON(let contentJSON, let options):
  279. // If we can decode it, we can write the readable verbose form:
  280. if let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) {
  281. do {
  282. let m = try unpack(
  283. contentJSON: contentJSON,
  284. extensions: SimpleExtensionMap(),
  285. options: options,
  286. as: messageType
  287. )
  288. emitVerboseTextForm(visitor: &visitor, message: m, typeURL: _typeURL)
  289. return
  290. } catch {
  291. // Fall through to just print the raw JSON data
  292. }
  293. }
  294. if !_typeURL.isEmpty {
  295. try! visitor.visitSingularStringField(value: _typeURL, fieldNumber: 1)
  296. }
  297. // Build a readable form of the JSON:
  298. let contentJSONAsObject = asJSONObject(body: contentJSON)
  299. visitor.visitAnyJSONBytesField(value: contentJSONAsObject)
  300. }
  301. }
  302. }
  303. /// The obvious goal for Hashable/Equatable conformance would be for
  304. /// hash and equality to behave as if we always decoded the inner
  305. /// object and hashed or compared that. Unfortunately, Any typically
  306. /// stores serialized contents and we don't always have the ability to
  307. /// deserialize it. Since none of our supported serializations are
  308. /// fully deterministic, we can't even ensure that equality will
  309. /// behave this way when the Any contents are in the same
  310. /// serialization.
  311. ///
  312. /// As a result, we can only really perform a "best effort" equality
  313. /// test. Of course, regardless of the above, we must guarantee that
  314. /// hashValue is compatible with equality.
  315. extension AnyMessageStorage {
  316. // Can't use _valueData for a few reasons:
  317. // 1. Since decode is done on demand, two objects could be equal
  318. // but created differently (one from JSON, one for Message, etc.),
  319. // and the hash values have to be equal even if we don't have data
  320. // yet.
  321. // 2. map<> serialization order is undefined. At the time of writing
  322. // the Swift, Objective-C, and Go runtimes all tend to have random
  323. // orders, so the messages could be identical, but in binary form
  324. // they could differ.
  325. public func hash(into hasher: inout Hasher) {
  326. if !_typeURL.isEmpty {
  327. hasher.combine(_typeURL)
  328. }
  329. }
  330. func isEqualTo(other: AnyMessageStorage) -> Bool {
  331. if _typeURL != other._typeURL {
  332. return false
  333. }
  334. // Since the library does lazy Any decode, equality is a very hard problem.
  335. // It things exactly match, that's pretty easy, otherwise, one ends up having
  336. // to error on saying they aren't equal.
  337. //
  338. // The best option would be to have Message forms and compare those, as that
  339. // removes issues like map<> serialization order, some other protocol buffer
  340. // implementation details/bugs around serialized form order, etc.; but that
  341. // would also greatly slow down equality tests.
  342. //
  343. // Do our best to compare what is present have...
  344. // If both have messages, check if they are the same.
  345. if case .message(let myMsg) = state, case .message(let otherMsg) = other.state,
  346. type(of: myMsg) == type(of: otherMsg)
  347. {
  348. // Since the messages are known to be same type, we can claim both equal and
  349. // not equal based on the equality comparison.
  350. return myMsg.isEqualTo(message: otherMsg)
  351. }
  352. // If both have serialized data, and they exactly match; the messages are equal.
  353. // Because there could be map in the message, the fact that the data isn't the
  354. // same doesn't always mean the messages aren't equal. Likewise, the binary could
  355. // have been created by a library that doesn't order the fields, or the binary was
  356. // created using the appending ability in of the binary format.
  357. if case .binary(let myValue) = state, case .binary(let otherValue) = other.state, myValue == otherValue {
  358. return true
  359. }
  360. // If both have contentJSON, and they exactly match; the messages are equal.
  361. // Because there could be map in the message (or the JSON could just be in a different
  362. // order), the fact that the JSON isn't the same doesn't always mean the messages
  363. // aren't equal.
  364. if case .contentJSON(let myJSON, _) = state,
  365. case .contentJSON(let otherJSON, _) = other.state,
  366. myJSON == otherJSON
  367. {
  368. return true
  369. }
  370. // Out of options. To do more compares, the states conversions would have to be
  371. // done to do comparisons; and since equality can be used somewhat removed from
  372. // a developer (if they put protos in a Set, use them as keys to a Dictionary, etc),
  373. // the conversion cost might be to high for those uses. Give up and say they aren't equal.
  374. return false
  375. }
  376. }
  377. // _CustomJSONCodable support for Google_Protobuf_Any
  378. extension AnyMessageStorage {
  379. // Spec for Any says this should contain atleast one slash. Looking at upstream languages, most
  380. // actually look up the value in their runtime registries, but since we do deferred parsing
  381. // we can't assume the registry is complete, thus just do this minimal validation check.
  382. fileprivate func isTypeURLValid() -> Bool {
  383. _typeURL.contains("/")
  384. }
  385. // Override the traversal-based JSON encoding
  386. // This builds an Any JSON representation from one of:
  387. // * The message we were initialized with,
  388. // * The JSON fields we last deserialized, or
  389. // * The protobuf field we were deserialized from.
  390. // The last case requires locating the type, deserializing
  391. // into an object, then reserializing back to JSON.
  392. func encodedJSONString(options: JSONEncodingOptions) throws -> String {
  393. switch state {
  394. case .binary(let valueData):
  395. // Follow the C++ protostream_objectsource.cc's
  396. // ProtoStreamObjectSource::RenderAny() special casing of an empty value.
  397. if valueData.isEmpty && _typeURL.isEmpty {
  398. return "{}"
  399. }
  400. guard isTypeURLValid() else {
  401. if _typeURL.isEmpty {
  402. throw SwiftProtobufError.JSONEncoding.emptyAnyTypeURL()
  403. }
  404. throw SwiftProtobufError.JSONEncoding.invalidAnyTypeURL(type_url: _typeURL)
  405. }
  406. if valueData.isEmpty {
  407. var jsonEncoder = JSONEncoder()
  408. jsonEncoder.startObject()
  409. jsonEncoder.startField(name: "@type")
  410. jsonEncoder.putStringValue(value: _typeURL)
  411. jsonEncoder.endObject()
  412. return jsonEncoder.stringResult
  413. }
  414. // Transcode by decoding the binary data to a message object
  415. // and then recode back into JSON.
  416. guard let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) else {
  417. // If we don't have the type available, we can't decode the
  418. // binary value, so we're stuck. (The Google spec does not
  419. // provide a way to just package the binary value for someone
  420. // else to decode later.)
  421. throw JSONEncodingError.anyTranscodeFailure
  422. }
  423. let m = try messageType.init(serializedBytes: valueData, partial: true)
  424. return try serializeAnyJSON(for: m, typeURL: _typeURL, options: options)
  425. case .message(let msg):
  426. // We should have been initialized with a typeURL, make sure it is valid.
  427. if !_typeURL.isEmpty && !isTypeURLValid() {
  428. throw SwiftProtobufError.JSONEncoding.invalidAnyTypeURL(type_url: _typeURL)
  429. }
  430. // If it was cleared, default it.
  431. let url = !_typeURL.isEmpty ? _typeURL : buildTypeURL(forMessage: msg, typePrefix: defaultAnyTypeURLPrefix)
  432. return try serializeAnyJSON(for: msg, typeURL: url, options: options)
  433. case .contentJSON(let contentJSON, _):
  434. guard isTypeURLValid() else {
  435. if _typeURL.isEmpty {
  436. throw SwiftProtobufError.JSONEncoding.emptyAnyTypeURL()
  437. }
  438. throw SwiftProtobufError.JSONEncoding.invalidAnyTypeURL(type_url: _typeURL)
  439. }
  440. var jsonEncoder = JSONEncoder()
  441. jsonEncoder.startObject()
  442. jsonEncoder.startField(name: "@type")
  443. jsonEncoder.putStringValue(value: _typeURL)
  444. if !contentJSON.isEmpty {
  445. jsonEncoder.append(staticText: ",")
  446. // NOTE: This doesn't really take `options` into account since it is
  447. // just reflecting out what was taken in originally.
  448. jsonEncoder.append(utf8Bytes: contentJSON)
  449. }
  450. jsonEncoder.endObject()
  451. return jsonEncoder.stringResult
  452. }
  453. }
  454. // TODO: If the type is well-known or has already been registered,
  455. // we should consider decoding eagerly. Eager decoding would
  456. // catch certain errors earlier (good) but would probably be
  457. // a performance hit if the Any contents were never accessed (bad).
  458. // Of course, we can't always decode eagerly (we don't always have the
  459. // message type available), so the deferred logic here is still needed.
  460. func decodeJSON(from decoder: inout JSONDecoder) throws {
  461. try decoder.scanner.skipRequiredObjectStart()
  462. // Reset state
  463. _typeURL = String()
  464. state = .binary(Data())
  465. if decoder.scanner.skipOptionalObjectEnd() {
  466. return
  467. }
  468. var jsonEncoder = JSONEncoder()
  469. while true {
  470. let key = try decoder.scanner.nextQuotedString()
  471. try decoder.scanner.skipRequiredColon()
  472. if key == "@type" {
  473. _typeURL = try decoder.scanner.nextQuotedString()
  474. guard isTypeURLValid() else {
  475. throw SwiftProtobufError.JSONDecoding.invalidAnyTypeURL(type_url: _typeURL)
  476. }
  477. } else {
  478. jsonEncoder.startField(name: key)
  479. let keyValueJSON = try decoder.scanner.skip()
  480. jsonEncoder.append(text: keyValueJSON)
  481. }
  482. if decoder.scanner.skipOptionalObjectEnd() {
  483. if _typeURL.isEmpty {
  484. throw SwiftProtobufError.JSONDecoding.emptyAnyTypeURL()
  485. }
  486. // Capture the options, but set the messageDepthLimit to be what
  487. // was left right now, as that is the limit when the JSON is finally
  488. // parsed.
  489. var updatedOptions = decoder.options
  490. updatedOptions.messageDepthLimit = decoder.scanner.recursionBudget
  491. state = .contentJSON(Array(jsonEncoder.dataResult), updatedOptions)
  492. return
  493. }
  494. try decoder.scanner.skipRequiredComma()
  495. }
  496. }
  497. }