5 месяцев назад · be30d46dfc
--- a/FirebaseAI/CHANGELOG.md
+++ b/FirebaseAI/CHANGELOG.md
@@ -2,6 +2,9 @@
 
				 - [fixed] Fixed various links in the Live API doc comments not mapping correctly.
			
 
				 - [fixed] Fixed minor translation issue for nanosecond conversion when receiving
			
 
				   `LiveServerGoingAwayNotice`. (#15410)
			
 
				+- [feature] Added support for sending video frames with the Live API via the `sendVideoRealtime`
			
 
				+  method on [`LiveSession`](https://firebase.google.com/docs/reference/swift/firebaseai/api/reference/Classes/LiveSession).
			
 
				+  (#15432)
			
 
				 
			
 
				 # 12.4.0
			
 
				 - [feature] Added support for the URL context tool, which allows the model to access content
			
--- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift
+++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift
@@ -67,15 +67,24 @@ public final class LiveSession: Sendable {
 
				     await service.send(.realtimeInput(message))
			
 
				   }
			
 
				 
			
 
				-  /// Sends a video input stream to the model, using the realtime API.
			
 
				+  /// Sends a video frame to the model, using the realtime API.
			
 
				+  ///
			
 
				+  /// Instead of raw video data, the model expects individual frames of the video,
			
 
				+  /// sent as images.
			
 
				+  ///
			
 
				+  /// If your video has audio, send it seperately through ``LiveSession/sendAudioRealtime(_:)``.
			
 
				+  ///
			
 
				+  /// For better performance, frames can also be sent at a lower rate than the video;
			
 
				+  /// even as low as 1 frame per second.
			
 
				   ///
			
 
				   /// - Parameters:
			
 
				-  ///   - video: Encoded video data, used to update the model on the client's conversation.
			
 
				-  ///   - format: The format that the video was encoded in (eg; `mp4`, `webm`, `wmv`, etc.,).
			
 
				-  // TODO: (b/448671945) Make public after testing and next release
			
 
				-  func sendVideoRealtime(_ video: Data, format: String) async {
			
 
				+  ///   - video: Encoded image data extracted from a frame of the video, used to update the model on
			
 
				+  ///     the client's conversation.
			
 
				+  ///   - mimeType: The IANA standard MIME type of the video frame data (eg; `images/png`,
			
 
				+  ///     `images/jpeg`etc.,).
			
 
				+  public func sendVideoRealtime(_ video: Data, mimeType: String) async {
			
 
				     let message = BidiGenerateContentRealtimeInput(
			
 
				-      video: InlineData(data: video, mimeType: "video/\(format)")
			
 
				+      video: InlineData(data: video, mimeType: mimeType)
			
 
				     )
			
 
				     await service.send(.realtimeInput(message))
			
 
				   }
			
--- a/FirebaseAI/Tests/TestApp/FirebaseAITestApp.xcodeproj/project.pbxproj
+++ b/FirebaseAI/Tests/TestApp/FirebaseAITestApp.xcodeproj/project.pbxproj
@@ -7,6 +7,7 @@
 
				 	objects = {
			
 
				 
			
 
				 /* Begin PBXBuildFile section */
			
 
				+		0E0481222EA2E51300A50172 /* DataUtils.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E0481212EA2E51100A50172 /* DataUtils.swift */; };
			
 
				 		0E460FAB2E9858E4007E26A6 /* LiveSessionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0E460FAA2E9858E4007E26A6 /* LiveSessionTests.swift */; };
			
 
				 		0EC8BAE22E98784E0075A4E0 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 868A7C532CCC26B500E449DD /* Assets.xcassets */; };
			
 
				 		862218812D04E098007ED2D4 /* IntegrationTestUtils.swift in Sources */ = {isa = PBXBuildFile; fileRef = 862218802D04E08D007ED2D4 /* IntegrationTestUtils.swift */; };
			
@@ -44,6 +45,7 @@
 
				 /* End PBXContainerItemProxy section */
			
 
				 
			
 
				 /* Begin PBXFileReference section */
			
 
				+		0E0481212EA2E51100A50172 /* DataUtils.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DataUtils.swift; sourceTree = "<group>"; };
			
 
				 		0E460FAA2E9858E4007E26A6 /* LiveSessionTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LiveSessionTests.swift; sourceTree = "<group>"; };
			
 
				 		862218802D04E08D007ED2D4 /* IntegrationTestUtils.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = IntegrationTestUtils.swift; sourceTree = "<group>"; };
			
 
				 		864F8F702D4980D60002EA7E /* ImagenIntegrationTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ImagenIntegrationTests.swift; sourceTree = "<group>"; };
			
@@ -168,6 +170,7 @@
 
				 		8698D7442CD3CEF700ABA833 /* Utilities */ = {
			
 
				 			isa = PBXGroup;
			
 
				 			children = (
			
 
				+				0E0481212EA2E51100A50172 /* DataUtils.swift */,
			
 
				 				86D77E032D7B6C95003D155D /* InstanceConfig.swift */,
			
 
				 				862218802D04E08D007ED2D4 /* IntegrationTestUtils.swift */,
			
 
				 			);
			
@@ -304,6 +307,7 @@
 
				 				DEF0BB512DA9B7450093E9F4 /* SchemaTests.swift in Sources */,
			
 
				 				DEF0BB4F2DA74F680093E9F4 /* TestHelpers.swift in Sources */,
			
 
				 				868A7C4F2CCC229F00E449DD /* Credentials.swift in Sources */,
			
 
				+				0E0481222EA2E51300A50172 /* DataUtils.swift in Sources */,
			
 
				 				864F8F712D4980DD0002EA7E /* ImagenIntegrationTests.swift in Sources */,
			
 
				 				862218812D04E098007ED2D4 /* IntegrationTestUtils.swift in Sources */,
			
 
				 				86D77DFC2D7A5340003D155D /* GenerateContentIntegrationTests.swift in Sources */,
			
--- a/FirebaseAI/Tests/TestApp/Resources/Assets.xcassets/cat.dataset/Contents.json
+++ b/FirebaseAI/Tests/TestApp/Resources/Assets.xcassets/cat.dataset/Contents.json
@@ -0,0 +1,13 @@
 
				+{
			
 
				+  "data" : [
			
 
				+    {
			
 
				+      "filename" : "videoplayback.mp4",
			
 
				+      "idiom" : "universal",
			
 
				+      "universal-type-identifier" : "public.mpeg-4"
			
 
				+    }
			
 
				+  ],
			
 
				+  "info" : {
			
 
				+    "author" : "xcode",
			
 
				+    "version" : 1
			
 
				+  }
			
 
				+}
			
--- a/FirebaseAI/Tests/TestApp/Resources/Assets.xcassets/cat.dataset/videoplayback.mp4
+++ b/FirebaseAI/Tests/TestApp/Resources/Assets.xcassets/cat.dataset/videoplayback.mp4
--- a/FirebaseAI/Tests/TestApp/Tests/Integration/LiveSessionTests.swift
+++ b/FirebaseAI/Tests/TestApp/Tests/Integration/LiveSessionTests.swift
@@ -76,6 +76,14 @@ struct LiveSessionTests {
 
				       role: "system",
			
 
				       parts: "When you receive a message, if the message is a single word, assume it's the first name of a person, and call the getLastName tool to get the last name of said person. Only respond with the last name."
			
 
				     )
			
 
				+
			
 
				+    static let animalInVideo = ModelContent(
			
 
				+      role: "system",
			
 
				+      parts: """
			
 
				+      Send a one word response of what ANIMAL is in the video. \
			
 
				+      If you don't receive a video, send "Test is broken, I didn't receive a video.".
			
 
				+      """.trimmingCharacters(in: .whitespacesAndNewlines)
			
 
				+    )
			
 
				   }
			
 
				 
			
 
				   @Test(arguments: arguments)
			
@@ -181,6 +189,49 @@ struct LiveSessionTests {
 
				     #expect(modelResponse == "goodbye")
			
 
				   }
			
 
				 
			
 
				+  @Test(arguments: arguments.filter { $0.1 != ModelNames.gemini2FlashLive })
			
 
				+  // gemini-2.0-flash-live-001 is buggy and likes to respond to the audio or system instruction
			
 
				+  // (eg; it will say 'okay' or 'hello', instead of following the instructions)
			
 
				+  func sendVideoRealtime_receiveText(_ config: InstanceConfig, modelName: String) async throws {
			
 
				+    let model = FirebaseAI.componentInstance(config).liveModel(
			
 
				+      modelName: modelName,
			
 
				+      generationConfig: textConfig,
			
 
				+      systemInstruction: SystemInstructions.animalInVideo
			
 
				+    )
			
 
				+
			
 
				+    let session = try await model.connect()
			
 
				+    guard let videoFile = NSDataAsset(name: "cat") else {
			
 
				+      Issue.record("Missing video file 'cat' in Assets")
			
 
				+      return
			
 
				+    }
			
 
				+
			
 
				+    let frames = try await videoFile.videoFrames()
			
 
				+    for frame in frames {
			
 
				+      await session.sendVideoRealtime(frame, mimeType: "image/png")
			
 
				+    }
			
 
				+
			
 
				+    // the model doesn't respond unless we send some audio too
			
 
				+    // vertex also responds if you send text, but google ai doesn't
			
 
				+    // (they both respond with audio though)
			
 
				+    guard let audioFile = NSDataAsset(name: "hello") else {
			
 
				+      Issue.record("Missing audio file 'hello.wav' in Assets")
			
 
				+      return
			
 
				+    }
			
 
				+    await session.sendAudioRealtime(audioFile.data)
			
 
				+    await session.sendAudioRealtime(Data(repeating: 0, count: audioFile.data.count))
			
 
				+
			
 
				+    let text = try await session.collectNextTextResponse()
			
 
				+
			
 
				+    await session.close()
			
 
				+    let modelResponse = text
			
 
				+      .trimmingCharacters(in: .whitespacesAndNewlines)
			
 
				+      .trimmingCharacters(in: .punctuationCharacters)
			
 
				+      .lowercased()
			
 
				+
			
 
				+    // model response varies
			
 
				+    #expect(["kitten", "cat", "kitty"].contains(modelResponse))
			
 
				+  }
			
 
				+
			
 
				   @Test(arguments: arguments)
			
 
				   func realtime_functionCalling(_ config: InstanceConfig, modelName: String) async throws {
			
 
				     let model = FirebaseAI.componentInstance(config).liveModel(
			
--- a/FirebaseAI/Tests/TestApp/Tests/Utilities/DataUtils.swift
+++ b/FirebaseAI/Tests/TestApp/Tests/Utilities/DataUtils.swift
@@ -0,0 +1,64 @@
 
				+// Copyright 2025 Google LLC
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//      http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+import AVFoundation
			
 
				+import SwiftUI
			
 
				+
			
 
				+extension NSDataAsset {
			
 
				+  /// The preferred file extension for this asset, if any.
			
 
				+  ///
			
 
				+  /// This is set in the Asset catalog under the `File Type` field.
			
 
				+  var fileExtension: String? {
			
 
				+    UTType(typeIdentifier)?.preferredFilenameExtension
			
 
				+  }
			
 
				+
			
 
				+  /// Extracts `.png` frames from a video at a rate of 1 FPS.
			
 
				+  ///
			
 
				+  /// - Returns:
			
 
				+  ///   An array of `Data` corresponding to individual images for each frame.
			
 
				+  func videoFrames() async throws -> [Data] {
			
 
				+    guard let fileExtension else {
			
 
				+      fatalError(
			
 
				+        "Failed to find file extension; ensure the \"File Type\" is set in the asset catalog."
			
 
				+      )
			
 
				+    }
			
 
				+
			
 
				+    // we need a temp file so we can provide a URL to AVURLAsset
			
 
				+    let tempFileURL = URL(fileURLWithPath: NSTemporaryDirectory())
			
 
				+      .appendingPathComponent(UUID().uuidString, isDirectory: false)
			
 
				+      .appendingPathExtension(fileExtension)
			
 
				+
			
 
				+    try data.write(to: tempFileURL)
			
 
				+
			
 
				+    defer {
			
 
				+      try? FileManager.default.removeItem(at: tempFileURL)
			
 
				+    }
			
 
				+
			
 
				+    let asset = AVURLAsset(url: tempFileURL)
			
 
				+    let generator = AVAssetImageGenerator(asset: asset)
			
 
				+
			
 
				+    let duration = try await asset.load(.duration).seconds
			
 
				+    return try stride(from: 0, to: duration, by: 1).map { seconds in
			
 
				+      let time = CMTime(seconds: seconds, preferredTimescale: 1)
			
 
				+      let cg = try generator.copyCGImage(at: time, actualTime: nil)
			
 
				+
			
 
				+      let image = UIImage(cgImage: cg)
			
 
				+      guard let png = image.pngData() else {
			
 
				+        fatalError("Failed to encode image to png")
			
 
				+      }
			
 
				+
			
 
				+      return png
			
 
				+    }
			
 
				+  }
			
 
				+}