GenerationConfig.swift 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. // Copyright 2023 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. import Foundation
  15. /// A struct defining model parameters to be used when sending generative AI
  16. /// requests to the backend model.
  17. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  18. public struct GenerationConfig: Sendable {
  19. /// Controls the degree of randomness in token selection.
  20. let temperature: Float?
  21. /// Controls diversity of generated text.
  22. let topP: Float?
  23. /// Limits the number of highest probability words considered.
  24. let topK: Int?
  25. /// The number of response variations to return.
  26. let candidateCount: Int?
  27. /// Maximum number of tokens that can be generated in the response.
  28. let maxOutputTokens: Int?
  29. /// Controls the likelihood of repeating the same words or phrases already generated in the text.
  30. let presencePenalty: Float?
  31. /// Controls the likelihood of repeating words, with the penalty increasing for each repetition.
  32. let frequencyPenalty: Float?
  33. /// A set of up to 5 `String`s that will stop output generation.
  34. let stopSequences: [String]?
  35. /// Output response MIME type of the generated candidate text.
  36. let responseMIMEType: String?
  37. /// Output schema of the generated candidate text.
  38. let responseSchema: Schema?
  39. /// Output schema of the generated response in [JSON Schema](https://json-schema.org/) format.
  40. ///
  41. /// If set, `responseSchema` must be omitted and `responseMIMEType` is required.
  42. let responseJSONSchema: JSONObject?
  43. /// Supported modalities of the response.
  44. let responseModalities: [ResponseModality]?
  45. /// Configuration for controlling the "thinking" behavior of compatible Gemini models.
  46. let thinkingConfig: ThinkingConfig?
  47. /// Creates a new `GenerationConfig` value.
  48. ///
  49. /// See the
  50. /// [Configure model parameters](https://firebase.google.com/docs/vertex-ai/model-parameters)
  51. /// guide and the
  52. /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  53. /// for more details.
  54. ///
  55. /// - Parameters:
  56. /// - temperature:Controls the randomness of the language model's output. Higher values (for
  57. /// example, 1.0) make the text more random and creative, while lower values (for example,
  58. /// 0.1) make it more focused and deterministic.
  59. ///
  60. /// > Note: A temperature of 0 means that the highest probability tokens are always selected.
  61. /// > In this case, responses for a given prompt are mostly deterministic, but a small amount
  62. /// > of variation is still possible.
  63. ///
  64. /// > Important: The range of supported temperature values depends on the model; see the
  65. /// > [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#temperature)
  66. /// > for more details.
  67. /// - topP: Controls diversity of generated text. Higher values (e.g., 0.9) produce more diverse
  68. /// text, while lower values (e.g., 0.5) make the output more focused.
  69. ///
  70. /// The supported range is 0.0 to 1.0.
  71. ///
  72. /// > Important: The default `topP` value depends on the model; see the
  73. /// > [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#top-p)
  74. /// > for more details.
  75. /// - topK: Limits the number of highest probability words the model considers when generating
  76. /// text. For example, a topK of 40 means only the 40 most likely words are considered for the
  77. /// next token. A higher value increases diversity, while a lower value makes the output more
  78. /// deterministic.
  79. ///
  80. /// The supported range is 1 to 40.
  81. ///
  82. /// > Important: Support for `topK` and the default value depends on the model; see the
  83. /// [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#top-k)
  84. /// for more details.
  85. /// - candidateCount: The number of response variations to return; defaults to 1 if not set.
  86. /// Support for multiple candidates depends on the model; see the
  87. /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  88. /// for more details.
  89. /// - maxOutputTokens: Maximum number of tokens that can be generated in the response.
  90. /// See the configure model parameters [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#max-output-tokens)
  91. /// for more details.
  92. /// - presencePenalty: Controls the likelihood of repeating the same words or phrases already
  93. /// generated in the text. Higher values increase the penalty of repetition, resulting in more
  94. /// diverse output.
  95. ///
  96. /// > Note: While both `presencePenalty` and `frequencyPenalty` discourage repetition,
  97. /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase
  98. /// > has already appeared, whereas `frequencyPenalty` increases the penalty for *each*
  99. /// > repetition of a word/phrase.
  100. ///
  101. /// > Important: The range of supported `presencePenalty` values depends on the model; see the
  102. /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  103. /// > for more details
  104. /// - frequencyPenalty: Controls the likelihood of repeating words or phrases, with the penalty
  105. /// increasing for each repetition. Higher values increase the penalty of repetition,
  106. /// resulting in more diverse output.
  107. ///
  108. /// > Note: While both `frequencyPenalty` and `presencePenalty` discourage repetition,
  109. /// > `frequencyPenalty` increases the penalty for *each* repetition of a word/phrase, whereas
  110. /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase
  111. /// > has already appeared.
  112. ///
  113. /// > Important: The range of supported `frequencyPenalty` values depends on the model; see
  114. /// > the
  115. /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  116. /// > for more details
  117. /// - stopSequences: A set of up to 5 `String`s that will stop output generation. If specified,
  118. /// the API will stop at the first appearance of a stop sequence. The stop sequence will not
  119. /// be included as part of the response. See the
  120. /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  121. /// for more details.
  122. /// - responseMIMEType: Output response MIME type of the generated candidate text.
  123. ///
  124. /// Supported MIME types:
  125. /// - `text/plain`: Text output; the default behavior if unspecified.
  126. /// - `application/json`: JSON response in the candidates.
  127. /// - `text/x.enum`: For classification tasks, output an enum value as defined in the
  128. /// `responseSchema`.
  129. /// - responseSchema: Output schema of the generated candidate text. If set, a compatible
  130. /// `responseMIMEType` must also be set.
  131. ///
  132. /// Compatible MIME types:
  133. /// - `application/json`: Schema for JSON response.
  134. ///
  135. /// Refer to the
  136. /// [Generate structured
  137. /// output](https://firebase.google.com/docs/vertex-ai/structured-output?platform=ios) guide
  138. /// for more details.
  139. /// - responseModalities: The data types (modalities) that may be returned in model responses.
  140. ///
  141. /// See the [multimodal
  142. /// responses](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation)
  143. /// documentation for more details.
  144. ///
  145. /// > Warning: Specifying response modalities is a **Public Preview** feature, which means
  146. /// > that it is not subject to any SLA or deprecation policy and could change in
  147. /// > backwards-incompatible ways.
  148. /// - thinkingConfig: Configuration for controlling the "thinking" behavior of compatible Gemini
  149. /// models; see ``ThinkingConfig`` for more details.
  150. public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
  151. candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
  152. presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
  153. stopSequences: [String]? = nil, responseMIMEType: String? = nil,
  154. responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil,
  155. thinkingConfig: ThinkingConfig? = nil) {
  156. // Explicit init because otherwise if we re-arrange the above variables it changes the API
  157. // surface.
  158. self.temperature = temperature
  159. self.topP = topP
  160. self.topK = topK
  161. self.candidateCount = candidateCount
  162. self.maxOutputTokens = maxOutputTokens
  163. self.presencePenalty = presencePenalty
  164. self.frequencyPenalty = frequencyPenalty
  165. self.stopSequences = stopSequences
  166. self.responseMIMEType = responseMIMEType
  167. self.responseSchema = responseSchema
  168. responseJSONSchema = nil
  169. self.responseModalities = responseModalities
  170. self.thinkingConfig = thinkingConfig
  171. }
  172. init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, candidateCount: Int? = nil,
  173. maxOutputTokens: Int? = nil, presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
  174. stopSequences: [String]? = nil, responseMIMEType: String, responseJSONSchema: JSONObject,
  175. responseModalities: [ResponseModality]? = nil, thinkingConfig: ThinkingConfig? = nil) {
  176. self.temperature = temperature
  177. self.topP = topP
  178. self.topK = topK
  179. self.candidateCount = candidateCount
  180. self.maxOutputTokens = maxOutputTokens
  181. self.presencePenalty = presencePenalty
  182. self.frequencyPenalty = frequencyPenalty
  183. self.stopSequences = stopSequences
  184. self.responseMIMEType = responseMIMEType
  185. responseSchema = nil
  186. self.responseJSONSchema = responseJSONSchema
  187. self.responseModalities = responseModalities
  188. self.thinkingConfig = thinkingConfig
  189. }
  190. }
  191. // MARK: - Codable Conformances
  192. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  193. extension GenerationConfig: Encodable {
  194. enum CodingKeys: String, CodingKey {
  195. case temperature
  196. case topP
  197. case topK
  198. case candidateCount
  199. case maxOutputTokens
  200. case presencePenalty
  201. case frequencyPenalty
  202. case stopSequences
  203. case responseMIMEType = "responseMimeType"
  204. case responseSchema
  205. case responseJSONSchema = "responseJsonSchema"
  206. case responseModalities
  207. case thinkingConfig
  208. }
  209. }