// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation

/// A struct defining model parameters to be used when sending generative AI
/// requests to the backend model.
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
public struct GenerationConfig: Sendable {
  /// Controls the degree of randomness in token selection.
  let temperature: Float?

  /// Controls diversity of generated text.
  let topP: Float?

  /// Limits the number of highest probability words considered.
  let topK: Int?

  /// The number of response variations to return.
  let candidateCount: Int?

  /// Maximum number of tokens that can be generated in the response.
  let maxOutputTokens: Int?

  /// Controls the likelihood of repeating the same words or phrases already generated in the text.
  let presencePenalty: Float?

  /// Controls the likelihood of repeating words, with the penalty increasing for each repetition.
  let frequencyPenalty: Float?

  /// A set of up to 5 `String`s that will stop output generation.
  let stopSequences: [String]?

  /// Output response MIME type of the generated candidate text.
  let responseMIMEType: String?

  /// Output schema of the generated candidate text.
  let responseSchema: Schema?

  /// Output schema of the generated response in [JSON Schema](https://json-schema.org/) format.
  ///
  /// If set, `responseSchema` must be omitted and `responseMIMEType` is required.
  let responseJSONSchema: JSONObject?

  /// Supported modalities of the response.
  let responseModalities: [ResponseModality]?

  /// Configuration for controlling the "thinking" behavior of compatible Gemini models.
  let thinkingConfig: ThinkingConfig?

  /// Creates a new `GenerationConfig` value.
  ///
  /// See the
  /// [Configure model parameters](https://firebase.google.com/docs/vertex-ai/model-parameters)
  /// guide and the
  /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  /// for more details.
  ///
  /// - Parameters:
  ///   - temperature:Controls the randomness of the language model's output. Higher values (for
  ///     example, 1.0) make the text more random and creative, while lower values (for example,
  ///     0.1) make it more focused and deterministic.
  ///
  ///     > Note: A temperature of 0 means that the highest probability tokens are always selected.
  ///     > In this case, responses for a given prompt are mostly deterministic, but a small amount
  ///     > of variation is still possible.
  ///
  ///     > Important: The range of supported temperature values depends on the model; see the
  ///     > [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#temperature)
  ///     > for more details.
  ///   - topP: Controls diversity of generated text. Higher values (e.g., 0.9) produce more diverse
  ///     text, while lower values (e.g., 0.5) make the output more focused.
  ///
  ///     The supported range is 0.0 to 1.0.
  ///
  ///     > Important: The default `topP` value depends on the model; see the
  ///     > [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#top-p)
  ///     > for more details.
  ///   - topK: Limits the number of highest probability words the model considers when generating
  ///     text. For example, a topK of 40 means only the 40 most likely words are considered for the
  ///     next token. A higher value increases diversity, while a lower value makes the output more
  ///     deterministic.
  ///
  ///     The supported range is 1 to 40.
  ///
  ///     > Important: Support for `topK` and the default value depends on the model; see the
  ///     [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#top-k)
  ///     for more details.
  ///   - candidateCount: The number of response variations to return; defaults to 1 if not set.
  ///     Support for multiple candidates depends on the model; see the
  ///     [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  ///     for more details.
  ///   - maxOutputTokens: Maximum number of tokens that can be generated in the response.
  ///     See the configure model parameters [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#max-output-tokens)
  ///     for more details.
  ///   - presencePenalty: Controls the likelihood of repeating the same words or phrases already
  ///     generated in the text. Higher values increase the penalty of repetition, resulting in more
  ///     diverse output.
  ///
  ///     > Note: While both `presencePenalty` and `frequencyPenalty` discourage repetition,
  ///     > `presencePenalty` applies the same penalty regardless of how many times the word/phrase
  ///     > has already appeared, whereas `frequencyPenalty` increases the penalty for *each*
  ///     > repetition of a word/phrase.
  ///
  ///     > Important: The range of supported `presencePenalty` values depends on the model; see the
  ///     > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  ///     > for more details
  ///   - frequencyPenalty: Controls the likelihood of repeating words or phrases, with the penalty
  ///     increasing for each repetition. Higher values increase the penalty of repetition,
  ///     resulting in more diverse output.
  ///
  ///     > Note: While both `frequencyPenalty` and `presencePenalty` discourage repetition,
  ///     > `frequencyPenalty` increases the penalty for *each* repetition of a word/phrase, whereas
  ///     > `presencePenalty` applies the same penalty regardless of how many times the word/phrase
  ///     > has already appeared.
  ///
  ///     > Important: The range of supported `frequencyPenalty` values depends on the model; see
  ///     > the
  ///     > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  ///     > for more details
  ///   - stopSequences: A set of up to 5 `String`s that will stop output generation. If specified,
  ///     the API will stop at the first appearance of a stop sequence. The stop sequence will not
  ///     be included as part of the response. See the
  ///     [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig)
  ///     for more details.
  ///   - responseMIMEType: Output response MIME type of the generated candidate text.
  ///
  ///     Supported MIME types:
  ///     - `text/plain`: Text output; the default behavior if unspecified.
  ///     - `application/json`: JSON response in the candidates.
  ///     - `text/x.enum`: For classification tasks, output an enum value as defined in the
  ///       `responseSchema`.
  ///   - responseSchema: Output schema of the generated candidate text. If set, a compatible
  ///     `responseMIMEType` must also be set.
  ///
  ///     Compatible MIME types:
  ///     - `application/json`: Schema for JSON response.
  ///
  ///     Refer to the
  ///     [Generate structured
  ///     output](https://firebase.google.com/docs/vertex-ai/structured-output?platform=ios) guide
  ///     for more details.
  ///   - responseModalities: The data types (modalities) that may be returned in model responses.
  ///
  ///     See the [multimodal
  ///     responses](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation)
  ///     documentation for more details.
  ///
  ///     > Warning: Specifying response modalities is a **Public Preview** feature, which means
  ///     > that it is not subject to any SLA or deprecation policy and could change in
  ///     > backwards-incompatible ways.
  ///   - thinkingConfig: Configuration for controlling the "thinking" behavior of compatible Gemini
  ///     models; see ``ThinkingConfig`` for more details.
  public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
              candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
              presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
              stopSequences: [String]? = nil, responseMIMEType: String? = nil,
              responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil,
              thinkingConfig: ThinkingConfig? = nil) {
    // Explicit init because otherwise if we re-arrange the above variables it changes the API
    // surface.
    self.temperature = temperature
    self.topP = topP
    self.topK = topK
    self.candidateCount = candidateCount
    self.maxOutputTokens = maxOutputTokens
    self.presencePenalty = presencePenalty
    self.frequencyPenalty = frequencyPenalty
    self.stopSequences = stopSequences
    self.responseMIMEType = responseMIMEType
    self.responseSchema = responseSchema
    responseJSONSchema = nil
    self.responseModalities = responseModalities
    self.thinkingConfig = thinkingConfig
  }

  init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, candidateCount: Int? = nil,
       maxOutputTokens: Int? = nil, presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
       stopSequences: [String]? = nil, responseMIMEType: String, responseJSONSchema: JSONObject,
       responseModalities: [ResponseModality]? = nil, thinkingConfig: ThinkingConfig? = nil) {
    self.temperature = temperature
    self.topP = topP
    self.topK = topK
    self.candidateCount = candidateCount
    self.maxOutputTokens = maxOutputTokens
    self.presencePenalty = presencePenalty
    self.frequencyPenalty = frequencyPenalty
    self.stopSequences = stopSequences
    self.responseMIMEType = responseMIMEType
    responseSchema = nil
    self.responseJSONSchema = responseJSONSchema
    self.responseModalities = responseModalities
    self.thinkingConfig = thinkingConfig
  }
}

// MARK: - Codable Conformances

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension GenerationConfig: Encodable {
  enum CodingKeys: String, CodingKey {
    case temperature
    case topP
    case topK
    case candidateCount
    case maxOutputTokens
    case presencePenalty
    case frequencyPenalty
    case stopSequences
    case responseMIMEType = "responseMimeType"
    case responseSchema
    case responseJSONSchema = "responseJsonSchema"
    case responseModalities
    case thinkingConfig
  }
}