ModelContent.swift 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. // Copyright 2023 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. import Foundation
  15. /// A type describing data in media formats interpretable by an AI model. Each generative AI
  16. /// request or response contains an `Array` of ``ModelContent``s, and each ``ModelContent`` value
  17. /// may comprise multiple heterogeneous ``ModelContent/Part``s.
  18. @available(iOS 15.0, macOS 11.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  19. public struct ModelContent: Equatable, Sendable {
  20. /// A discrete piece of data in a media format interpretable by an AI model. Within a single value
  21. /// of ``Part``, different data types may not mix.
  22. public enum Part: Equatable, Sendable {
  23. /// Text value.
  24. case text(String)
  25. /// Data with a specified media type. Not all media types may be supported by the AI model.
  26. case data(mimetype: String, Data)
  27. /// File data stored in Cloud Storage for Firebase, referenced by URI.
  28. ///
  29. /// > Note: Supported media types depends on the model; see [media requirements
  30. /// > ](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts#media_requirements)
  31. /// > for details.
  32. ///
  33. /// - Parameters:
  34. /// - mimetype: The IANA standard MIME type of the uploaded file, for example, `"image/jpeg"`
  35. /// or `"video/mp4"`; see [media requirements
  36. /// ](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts#media_requirements)
  37. /// for supported values.
  38. /// - uri: The `"gs://"`-prefixed URI of the file in Cloud Storage for Firebase, for example,
  39. /// `"gs://bucket-name/path/image.jpg"`.
  40. case fileData(mimetype: String, uri: String)
  41. /// A predicted function call returned from the model.
  42. case functionCall(FunctionCall)
  43. /// A response to a function call.
  44. case functionResponse(FunctionResponse)
  45. // MARK: Convenience Initializers
  46. /// Convenience function for populating a Part with JPEG data.
  47. public static func jpeg(_ data: Data) -> Self {
  48. return .data(mimetype: "image/jpeg", data)
  49. }
  50. /// Convenience function for populating a Part with PNG data.
  51. public static func png(_ data: Data) -> Self {
  52. return .data(mimetype: "image/png", data)
  53. }
  54. /// Returns the text contents of this ``Part``, if it contains text.
  55. public var text: String? {
  56. switch self {
  57. case let .text(contents): return contents
  58. default: return nil
  59. }
  60. }
  61. }
  62. /// The role of the entity creating the ``ModelContent``. For user-generated client requests,
  63. /// for example, the role is `user`.
  64. public let role: String?
  65. /// The data parts comprising this ``ModelContent`` value.
  66. public let parts: [Part]
  67. /// Creates a new value from any data or `Array` of data interpretable as a
  68. /// ``Part``. See ``ThrowingPartsRepresentable`` for types that can be interpreted as `Part`s.
  69. public init(role: String? = "user", parts: some ThrowingPartsRepresentable) throws {
  70. self.role = role
  71. try self.parts = parts.tryPartsValue()
  72. }
  73. /// Creates a new value from any data or `Array` of data interpretable as a
  74. /// ``Part``. See ``ThrowingPartsRepresentable`` for types that can be interpreted as `Part`s.
  75. public init(role: String? = "user", parts: some PartsRepresentable) {
  76. self.role = role
  77. self.parts = parts.partsValue
  78. }
  79. /// Creates a new value from a list of ``Part``s.
  80. public init(role: String? = "user", parts: [Part]) {
  81. self.role = role
  82. self.parts = parts
  83. }
  84. /// Creates a new value from any data interpretable as a ``Part``. See
  85. /// ``ThrowingPartsRepresentable``
  86. /// for types that can be interpreted as `Part`s.
  87. public init(role: String? = "user", _ parts: any ThrowingPartsRepresentable...) throws {
  88. let content = try parts.flatMap { try $0.tryPartsValue() }
  89. self.init(role: role, parts: content)
  90. }
  91. /// Creates a new value from any data interpretable as a ``Part``. See
  92. /// ``ThrowingPartsRepresentable``
  93. /// for types that can be interpreted as `Part`s.
  94. public init(role: String? = "user", _ parts: [PartsRepresentable]) {
  95. let content = parts.flatMap { $0.partsValue }
  96. self.init(role: role, parts: content)
  97. }
  98. }
  99. // MARK: Codable Conformances
  100. @available(iOS 15.0, macOS 11.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  101. extension ModelContent: Codable {}
  102. @available(iOS 15.0, macOS 11.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  103. extension ModelContent.Part: Codable {
  104. enum CodingKeys: String, CodingKey {
  105. case text
  106. case inlineData
  107. case fileData
  108. case functionCall
  109. case functionResponse
  110. }
  111. enum InlineDataKeys: String, CodingKey {
  112. case mimeType = "mime_type"
  113. case bytes = "data"
  114. }
  115. enum FileDataKeys: String, CodingKey {
  116. case mimeType = "mime_type"
  117. case uri = "file_uri"
  118. }
  119. public func encode(to encoder: Encoder) throws {
  120. var container = encoder.container(keyedBy: CodingKeys.self)
  121. switch self {
  122. case let .text(a0):
  123. try container.encode(a0, forKey: .text)
  124. case let .data(mimetype, bytes):
  125. var inlineDataContainer = container.nestedContainer(
  126. keyedBy: InlineDataKeys.self,
  127. forKey: .inlineData
  128. )
  129. try inlineDataContainer.encode(mimetype, forKey: .mimeType)
  130. try inlineDataContainer.encode(bytes, forKey: .bytes)
  131. case let .fileData(mimetype: mimetype, url):
  132. var fileDataContainer = container.nestedContainer(
  133. keyedBy: FileDataKeys.self,
  134. forKey: .fileData
  135. )
  136. try fileDataContainer.encode(mimetype, forKey: .mimeType)
  137. try fileDataContainer.encode(url, forKey: .uri)
  138. case let .functionCall(functionCall):
  139. try container.encode(functionCall, forKey: .functionCall)
  140. case let .functionResponse(functionResponse):
  141. try container.encode(functionResponse, forKey: .functionResponse)
  142. }
  143. }
  144. public init(from decoder: Decoder) throws {
  145. let values = try decoder.container(keyedBy: CodingKeys.self)
  146. if values.contains(.text) {
  147. self = try .text(values.decode(String.self, forKey: .text))
  148. } else if values.contains(.inlineData) {
  149. let dataContainer = try values.nestedContainer(
  150. keyedBy: InlineDataKeys.self,
  151. forKey: .inlineData
  152. )
  153. let mimetype = try dataContainer.decode(String.self, forKey: .mimeType)
  154. let bytes = try dataContainer.decode(Data.self, forKey: .bytes)
  155. self = .data(mimetype: mimetype, bytes)
  156. } else if values.contains(.functionCall) {
  157. self = try .functionCall(values.decode(FunctionCall.self, forKey: .functionCall))
  158. } else if values.contains(.functionResponse) {
  159. self = try .functionResponse(values.decode(FunctionResponse.self, forKey: .functionResponse))
  160. } else {
  161. let unexpectedKeys = values.allKeys.map { $0.stringValue }
  162. throw DecodingError.dataCorrupted(DecodingError.Context(
  163. codingPath: values.codingPath,
  164. debugDescription: "Unexpected ModelContent.Part type(s): \(unexpectedKeys)"
  165. ))
  166. }
  167. }
  168. }