GenerationConfig.swift 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. // Copyright 2023 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. import Foundation
  15. #if canImport(FoundationModels)
  16. import FoundationModels
  17. #endif // canImport(FoundationModels)
  18. /// A struct defining model parameters to be used when sending generative AI
  19. /// requests to the backend model.
  20. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  21. public struct GenerationConfig: Sendable {
  22. public struct ResponseSchema {
  23. let openAPISchema: Schema?
  24. fileprivate let jsonSchema: (any ResponseJSONSchema)?
  25. fileprivate init(openAPISchema: Schema?, jsonSchema: (any ResponseJSONSchema)?) {
  26. self.openAPISchema = openAPISchema
  27. self.jsonSchema = jsonSchema
  28. }
  29. public static func openAPI(_ schema: Schema) -> Self {
  30. return self.init(openAPISchema: schema, jsonSchema: nil)
  31. }
  32. @available(iOS 26.0, macOS 26.0, *)
  33. @available(tvOS, unavailable)
  34. @available(watchOS, unavailable)
  35. public static func foundationModels(_ schema: GenerationSchema) -> Self {
  36. return self.init(openAPISchema: nil, jsonSchema: schema)
  37. }
  38. @available(iOS 26.0, macOS 26.0, *)
  39. @available(tvOS, unavailable)
  40. @available(watchOS, unavailable)
  41. public static func foundationModels(generating type: any FoundationModels.Generable.Type) -> Self {
  42. return self.init(openAPISchema: nil, jsonSchema: type.generationSchema)
  43. }
  44. public static func jsonSchema(_ schema: FirebaseGenerationSchema) -> Self {
  45. return self.init(openAPISchema: nil, jsonSchema: schema)
  46. }
  47. public static func jsonSchema(generating type: any FirebaseGenerable.Type) -> Self {
  48. return self.init(openAPISchema: nil, jsonSchema: type.firebaseGenerationSchema)
  49. }
  50. }
  51. /// Controls the degree of randomness in token selection.
  52. let temperature: Float?
  53. /// Controls diversity of generated text.
  54. let topP: Float?
  55. /// Limits the number of highest probability words considered.
  56. let topK: Int?
  57. /// The number of response variations to return.
  58. let candidateCount: Int?
  59. /// Maximum number of tokens that can be generated in the response.
  60. let maxOutputTokens: Int?
  61. /// Controls the likelihood of repeating the same words or phrases already generated in the text.
  62. let presencePenalty: Float?
  63. /// Controls the likelihood of repeating words, with the penalty increasing for each repetition.
  64. let frequencyPenalty: Float?
  65. /// A set of up to 5 `String`s that will stop output generation.
  66. let stopSequences: [String]?
  67. /// Output response MIME type of the generated candidate text.
  68. let responseMIMEType: String?
  69. /// Output schema of the generated candidate text.
  70. let responseSchema: Schema?
  71. fileprivate let responseJSONSchema: (any ResponseJSONSchema)?
  72. /// Supported modalities of the response.
  73. let responseModalities: [ResponseModality]?
  74. /// Configuration for controlling the "thinking" behavior of compatible Gemini models.
  75. let thinkingConfig: ThinkingConfig?
  76. /// Creates a new `GenerationConfig` value.
  77. ///
  78. /// See the
  79. /// [Configure model parameters](https://firebase.google.com/docs/vertex-ai/model-parameters)
  80. /// guide and the
  81. /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  82. /// for more details.
  83. ///
  84. /// - Parameters:
  85. /// - temperature:Controls the randomness of the language model's output. Higher values (for
  86. /// example, 1.0) make the text more random and creative, while lower values (for example,
  87. /// 0.1) make it more focused and deterministic.
  88. ///
  89. /// > Note: A temperature of 0 means that the highest probability tokens are always selected.
  90. /// > In this case, responses for a given prompt are mostly deterministic, but a small amount
  91. /// > of variation is still possible.
  92. ///
  93. /// > Important: The range of supported temperature values depends on the model; see the
  94. /// > [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#temperature)
  95. /// > for more details.
  96. /// - topP: Controls diversity of generated text. Higher values (e.g., 0.9) produce more diverse
  97. /// text, while lower values (e.g., 0.5) make the output more focused.
  98. ///
  99. /// The supported range is 0.0 to 1.0.
  100. ///
  101. /// > Important: The default `topP` value depends on the model; see the
  102. /// > [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#top-p)
  103. /// > for more details.
  104. /// - topK: Limits the number of highest probability words the model considers when generating
  105. /// text. For example, a topK of 40 means only the 40 most likely words are considered for the
  106. /// next token. A higher value increases diversity, while a lower value makes the output more
  107. /// deterministic.
  108. ///
  109. /// The supported range is 1 to 40.
  110. ///
  111. /// > Important: Support for `topK` and the default value depends on the model; see the
  112. /// [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#top-k)
  113. /// for more details.
  114. /// - candidateCount: The number of response variations to return; defaults to 1 if not set.
  115. /// Support for multiple candidates depends on the model; see the
  116. /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  117. /// for more details.
  118. /// - maxOutputTokens: Maximum number of tokens that can be generated in the response.
  119. /// See the configure model parameters [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#max-output-tokens)
  120. /// for more details.
  121. /// - presencePenalty: Controls the likelihood of repeating the same words or phrases already
  122. /// generated in the text. Higher values increase the penalty of repetition, resulting in more
  123. /// diverse output.
  124. ///
  125. /// > Note: While both `presencePenalty` and `frequencyPenalty` discourage repetition,
  126. /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase
  127. /// > has already appeared, whereas `frequencyPenalty` increases the penalty for *each*
  128. /// > repetition of a word/phrase.
  129. ///
  130. /// > Important: The range of supported `presencePenalty` values depends on the model; see the
  131. /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  132. /// > for more details
  133. /// - frequencyPenalty: Controls the likelihood of repeating words or phrases, with the penalty
  134. /// increasing for each repetition. Higher values increase the penalty of repetition,
  135. /// resulting in more diverse output.
  136. ///
  137. /// > Note: While both `frequencyPenalty` and `presencePenalty` discourage repetition,
  138. /// > `frequencyPenalty` increases the penalty for *each* repetition of a word/phrase, whereas
  139. /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase
  140. /// > has already appeared.
  141. ///
  142. /// > Important: The range of supported `frequencyPenalty` values depends on the model; see
  143. /// > the
  144. /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  145. /// > for more details
  146. /// - stopSequences: A set of up to 5 `String`s that will stop output generation. If specified,
  147. /// the API will stop at the first appearance of a stop sequence. The stop sequence will not
  148. /// be included as part of the response. See the
  149. /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  150. /// for more details.
  151. /// - responseMIMEType: Output response MIME type of the generated candidate text.
  152. ///
  153. /// Supported MIME types:
  154. /// - `text/plain`: Text output; the default behavior if unspecified.
  155. /// - `application/json`: JSON response in the candidates.
  156. /// - `text/x.enum`: For classification tasks, output an enum value as defined in the
  157. /// `responseSchema`.
  158. /// - responseSchema: Output schema of the generated candidate text. If set, a compatible
  159. /// `responseMIMEType` must also be set.
  160. ///
  161. /// Compatible MIME types:
  162. /// - `application/json`: Schema for JSON response.
  163. ///
  164. /// Refer to the
  165. /// [Generate structured
  166. /// output](https://firebase.google.com/docs/vertex-ai/structured-output?platform=ios) guide
  167. /// for more details.
  168. /// - responseModalities: The data types (modalities) that may be returned in model responses.
  169. ///
  170. /// See the [multimodal
  171. /// responses](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation)
  172. /// documentation for more details.
  173. ///
  174. /// > Warning: Specifying response modalities is a **Public Preview** feature, which means
  175. /// > that it is not subject to any SLA or deprecation policy and could change in
  176. /// > backwards-incompatible ways.
  177. /// - thinkingConfig: Configuration for controlling the "thinking" behavior of compatible Gemini
  178. /// models; see ``ThinkingConfig`` for more details.
  179. public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
  180. candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
  181. presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
  182. stopSequences: [String]? = nil, responseMIMEType: String? = nil,
  183. responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil,
  184. thinkingConfig: ThinkingConfig? = nil) {
  185. // Explicit init because otherwise if we re-arrange the above variables it changes the API
  186. // surface.
  187. self.temperature = temperature
  188. self.topP = topP
  189. self.topK = topK
  190. self.candidateCount = candidateCount
  191. self.maxOutputTokens = maxOutputTokens
  192. self.presencePenalty = presencePenalty
  193. self.frequencyPenalty = frequencyPenalty
  194. self.stopSequences = stopSequences
  195. self.responseMIMEType = responseMIMEType
  196. self.responseSchema = responseSchema
  197. self.responseJSONSchema = nil
  198. self.responseModalities = responseModalities
  199. self.thinkingConfig = thinkingConfig
  200. }
  201. #if canImport(FoundationModels)
  202. /// Option 1: Overload with support for specifying a `GenerationSchema`.
  203. @available(iOS 26.0, macOS 26.0, *)
  204. @available(tvOS, unavailable)
  205. @available(watchOS, unavailable)
  206. public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
  207. candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
  208. presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
  209. stopSequences: [String]? = nil, responseMIMEType: String? = "application/json",
  210. responseSchema: FoundationModels.GenerationSchema,
  211. responseModalities: [ResponseModality]? = nil,
  212. thinkingConfig: ThinkingConfig? = nil) {
  213. self.temperature = temperature
  214. self.topP = topP
  215. self.topK = topK
  216. self.candidateCount = candidateCount
  217. self.maxOutputTokens = maxOutputTokens
  218. self.presencePenalty = presencePenalty
  219. self.frequencyPenalty = frequencyPenalty
  220. self.stopSequences = stopSequences
  221. self.responseMIMEType = responseMIMEType
  222. self.responseSchema = nil
  223. self.responseJSONSchema = responseSchema
  224. self.responseModalities = responseModalities
  225. self.thinkingConfig = thinkingConfig
  226. }
  227. /// Option 2: Overload with support for specifying a `Generable` type.
  228. @available(iOS 26.0, macOS 26.0, *)
  229. @available(tvOS, unavailable)
  230. @available(watchOS, unavailable)
  231. public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
  232. candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
  233. presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
  234. stopSequences: [String]? = nil, responseMIMEType: String? = "application/json",
  235. generating type: any FoundationModels.Generable.Type,
  236. responseModalities: [ResponseModality]? = nil,
  237. thinkingConfig: ThinkingConfig? = nil) {
  238. self.temperature = temperature
  239. self.topP = topP
  240. self.topK = topK
  241. self.candidateCount = candidateCount
  242. self.maxOutputTokens = maxOutputTokens
  243. self.presencePenalty = presencePenalty
  244. self.frequencyPenalty = frequencyPenalty
  245. self.stopSequences = stopSequences
  246. self.responseMIMEType = responseMIMEType
  247. self.responseSchema = nil
  248. self.responseJSONSchema = type.generationSchema
  249. self.responseModalities = responseModalities
  250. self.thinkingConfig = thinkingConfig
  251. }
  252. #endif // canImport(FoundationModels)
  253. /// Option #3: New `ResponseSchema` type with static methods
  254. public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
  255. candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
  256. presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
  257. stopSequences: [String]? = nil, responseMIMEType: String? = "application/json",
  258. responseSchema: ResponseSchema, responseModalities: [ResponseModality]? = nil,
  259. thinkingConfig: ThinkingConfig? = nil) {
  260. self.temperature = temperature
  261. self.topP = topP
  262. self.topK = topK
  263. self.candidateCount = candidateCount
  264. self.maxOutputTokens = maxOutputTokens
  265. self.presencePenalty = presencePenalty
  266. self.frequencyPenalty = frequencyPenalty
  267. self.stopSequences = stopSequences
  268. self.responseMIMEType = responseMIMEType
  269. if let openAPISchema = responseSchema.openAPISchema {
  270. self.responseSchema = openAPISchema
  271. self.responseJSONSchema = nil
  272. } else if let jsonSchema = responseSchema.jsonSchema {
  273. self.responseSchema = nil
  274. self.responseJSONSchema = jsonSchema
  275. } else {
  276. self.responseSchema = nil
  277. self.responseJSONSchema = nil
  278. }
  279. self.responseModalities = responseModalities
  280. self.thinkingConfig = thinkingConfig
  281. }
  282. }
  283. // MARK: - Codable Conformances
  284. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  285. extension GenerationConfig: Encodable {
  286. enum CodingKeys: String, CodingKey {
  287. case temperature
  288. case topP
  289. case topK
  290. case candidateCount
  291. case maxOutputTokens
  292. case presencePenalty
  293. case frequencyPenalty
  294. case stopSequences
  295. case responseMIMEType = "responseMimeType"
  296. case responseSchema
  297. case responseJSONSchema = "responseJsonSchema"
  298. case responseModalities
  299. case thinkingConfig
  300. }
  301. public func encode(to encoder: any Encoder) throws {
  302. var container = encoder.container(keyedBy: CodingKeys.self)
  303. try container.encodeIfPresent(temperature, forKey: .temperature)
  304. try container.encodeIfPresent(topP, forKey: .topP)
  305. try container.encodeIfPresent(topK, forKey: .topK)
  306. try container.encodeIfPresent(candidateCount, forKey: .candidateCount)
  307. try container.encodeIfPresent(maxOutputTokens, forKey: .maxOutputTokens)
  308. try container.encodeIfPresent(presencePenalty, forKey: .presencePenalty)
  309. try container.encodeIfPresent(frequencyPenalty, forKey: .frequencyPenalty)
  310. try container.encodeIfPresent(stopSequences, forKey: .stopSequences)
  311. try container.encodeIfPresent(responseMIMEType, forKey: .responseMIMEType)
  312. try container.encodeIfPresent(responseSchema, forKey: .responseSchema)
  313. if let responseJSONSchema {
  314. try container.encode(responseJSONSchema, forKey: .responseJSONSchema)
  315. }
  316. try container.encodeIfPresent(responseModalities, forKey: .responseModalities)
  317. try container.encodeIfPresent(thinkingConfig, forKey: .thinkingConfig)
  318. }
  319. }
  320. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  321. fileprivate protocol ResponseJSONSchema: Encodable, Sendable {}
  322. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  323. extension JSONValue: ResponseJSONSchema {}
  324. @available(iOS 26.0, macOS 26.0, *)
  325. @available(tvOS, unavailable)
  326. @available(watchOS, unavailable)
  327. extension GenerationSchema: ResponseJSONSchema {}
  328. public protocol FirebaseGenerable {
  329. static var firebaseGenerationSchema: FirebaseGenerationSchema { get }
  330. }
  331. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  332. public struct FirebaseGenerationSchema {
  333. // This is just a mock
  334. }
  335. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  336. extension FirebaseGenerationSchema: ResponseJSONSchema {}