MultimodalSnippets.swift 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. // Copyright 2024 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. import FirebaseAILogic
  15. import FirebaseCore
  16. import XCTest
  17. #if canImport(UIKit)
  18. import UIKit
  19. #endif // canImport(UIKit)
  20. // These snippet tests are intentionally skipped in CI jobs; see the README file in this directory
  21. // for instructions on running them manually.
  22. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  23. final class MultimodalSnippets: XCTestCase {
  24. let bundle = BundleTestUtil.bundle()
  25. lazy var model = FirebaseAI.firebaseAI(backend: .vertexAI()).generativeModel(
  26. modelName: "gemini-2.0-flash"
  27. )
  28. lazy var videoURL = {
  29. guard let url = bundle.url(forResource: "animals", withExtension: "mp4") else {
  30. fatalError("Video file animals.mp4 not found in Resources.")
  31. }
  32. return url
  33. }()
  34. lazy var audioURL = {
  35. guard let url = bundle.url(forResource: "hello-world", withExtension: "mp3") else {
  36. fatalError("Audio file hello-world.mp3 not found in Resources.")
  37. }
  38. return url
  39. }()
  40. lazy var pdfURL = {
  41. guard let url = bundle.url(forResource: "gemini-report", withExtension: "pdf") else {
  42. fatalError("PDF file gemini-report.pdf not found in Resources.")
  43. }
  44. return url
  45. }()
  46. override func setUpWithError() throws {
  47. try FirebaseApp.configureDefaultAppForSnippets()
  48. }
  49. override func tearDown() async throws {
  50. await FirebaseApp.deleteDefaultAppForSnippets()
  51. }
  52. // MARK: - Image Input
  53. #if canImport(UIKit)
  54. func testMultimodalOneImageNonStreaming() async throws {
  55. guard let image = UIImage(systemName: "bicycle") else { fatalError() }
  56. // Provide a text prompt to include with the image
  57. let prompt = "What's in this picture?"
  58. // To generate text output, call generateContent and pass in the prompt
  59. let response = try await model.generateContent(image, prompt)
  60. print(response.text ?? "No text in response.")
  61. }
  62. func testMultimodalOneImageStreaming() async throws {
  63. guard let image = UIImage(systemName: "bicycle") else { fatalError() }
  64. // Provide a text prompt to include with the image
  65. let prompt = "What's in this picture?"
  66. // To stream generated text output, call generateContentStream and pass in the prompt
  67. let contentStream = try model.generateContentStream(image, prompt)
  68. for try await chunk in contentStream {
  69. if let text = chunk.text {
  70. print(text)
  71. }
  72. }
  73. }
  74. func testMultimodalMultiImagesNonStreaming() async throws {
  75. guard let image1 = UIImage(systemName: "car") else { fatalError() }
  76. guard let image2 = UIImage(systemName: "car.2") else { fatalError() }
  77. // Provide a text prompt to include with the images
  78. let prompt = "What's different between these pictures?"
  79. // To generate text output, call generateContent and pass in the prompt
  80. let response = try await model.generateContent(image1, image2, prompt)
  81. print(response.text ?? "No text in response.")
  82. }
  83. func testMultimodalMultiImagesStreaming() async throws {
  84. guard let image1 = UIImage(systemName: "car") else { fatalError() }
  85. guard let image2 = UIImage(systemName: "car.2") else { fatalError() }
  86. // Provide a text prompt to include with the images
  87. let prompt = "What's different between these pictures?"
  88. // To stream generated text output, call generateContentStream and pass in the prompt
  89. let contentStream = try model.generateContentStream(image1, image2, prompt)
  90. for try await chunk in contentStream {
  91. if let text = chunk.text {
  92. print(text)
  93. }
  94. }
  95. }
  96. #endif // canImport(UIKit)
  97. // MARK: - Video Input
  98. func testMultimodalVideoNonStreaming() async throws {
  99. // Provide the video as `Data` with the appropriate MIME type
  100. let video = try InlineDataPart(data: Data(contentsOf: videoURL), mimeType: "video/mp4")
  101. // Provide a text prompt to include with the video
  102. let prompt = "What is in the video?"
  103. // To generate text output, call generateContent with the text and video
  104. let response = try await model.generateContent(video, prompt)
  105. print(response.text ?? "No text in response.")
  106. }
  107. func testMultimodalVideoStreaming() async throws {
  108. // Provide the video as `Data` with the appropriate MIME type
  109. let video = try InlineDataPart(data: Data(contentsOf: videoURL), mimeType: "video/mp4")
  110. // Provide a text prompt to include with the video
  111. let prompt = "What is in the video?"
  112. // To stream generated text output, call generateContentStream with the text and video
  113. let contentStream = try model.generateContentStream(video, prompt)
  114. for try await chunk in contentStream {
  115. if let text = chunk.text {
  116. print(text)
  117. }
  118. }
  119. }
  120. // MARK: - Audio Input
  121. func testMultiModalAudioNonStreaming() async throws {
  122. // Provide the audio as `Data` with the appropriate MIME type
  123. let audio = try InlineDataPart(data: Data(contentsOf: audioURL), mimeType: "audio/mpeg")
  124. // Provide a text prompt to include with the audio
  125. let prompt = "Transcribe what's said in this audio recording."
  126. // To generate text output, call `generateContent` with the audio and text prompt
  127. let response = try await model.generateContent(audio, prompt)
  128. // Print the generated text, handling the case where it might be nil
  129. print(response.text ?? "No text in response.")
  130. }
  131. func testMultiModalAudioStreaming() async throws {
  132. // Provide the audio as `Data` with the appropriate MIME type
  133. let audio = try InlineDataPart(data: Data(contentsOf: audioURL), mimeType: "audio/mpeg")
  134. // Provide a text prompt to include with the audio
  135. let prompt = "Transcribe what's said in this audio recording."
  136. // To stream generated text output, call `generateContentStream` with the audio and text prompt
  137. let contentStream = try model.generateContentStream(audio, prompt)
  138. // Print the generated text, handling the case where it might be nil
  139. for try await chunk in contentStream {
  140. if let text = chunk.text {
  141. print(text)
  142. }
  143. }
  144. }
  145. // MARK: - Document Input
  146. func testMultiModalPDFStreaming() async throws {
  147. // Provide the PDF as `Data` with the appropriate MIME type
  148. let pdf = try InlineDataPart(data: Data(contentsOf: pdfURL), mimeType: "application/pdf")
  149. // Provide a text prompt to include with the PDF file
  150. let prompt = "Summarize the important results in this report."
  151. // To stream generated text output, call `generateContentStream` with the PDF file and text
  152. // prompt
  153. let contentStream = try model.generateContentStream(pdf, prompt)
  154. // Print the generated text, handling the case where it might be nil
  155. for try await chunk in contentStream {
  156. if let text = chunk.text {
  157. print(text)
  158. }
  159. }
  160. }
  161. func testMultiModalPDFNonStreaming() async throws {
  162. // Provide the PDF as `Data` with the appropriate MIME type
  163. let pdf = try InlineDataPart(data: Data(contentsOf: pdfURL), mimeType: "application/pdf")
  164. // Provide a text prompt to include with the PDF file
  165. let prompt = "Summarize the important results in this report."
  166. // To generate text output, call `generateContent` with the PDF file and text prompt
  167. let response = try await model.generateContent(pdf, prompt)
  168. // Print the generated text, handling the case where it might be nil
  169. print(response.text ?? "No text in response.")
  170. }
  171. }