MultimodalSnippets.swift 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. // Copyright 2024 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. import FirebaseCore
  15. import FirebaseVertexAI
  16. import XCTest
  17. #if canImport(UIKit)
  18. import UIKit
  19. #endif // canImport(UIKit)
  20. // These snippet tests are intentionally skipped in CI jobs; see the README file in this directory
  21. // for instructions on running them manually.
  22. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
  23. final class MultimodalSnippets: XCTestCase {
  24. let bundle = BundleTestUtil.bundle()
  25. lazy var model = VertexAI.vertexAI().generativeModel(modelName: "gemini-2.0-flash")
  26. lazy var videoURL = {
  27. guard let url = bundle.url(forResource: "animals", withExtension: "mp4") else {
  28. fatalError("Video file animals.mp4 not found in Resources.")
  29. }
  30. return url
  31. }()
  32. lazy var audioURL = {
  33. guard let url = bundle.url(forResource: "hello-world", withExtension: "mp3") else {
  34. fatalError("Audio file hello-world.mp3 not found in Resources.")
  35. }
  36. return url
  37. }()
  38. lazy var pdfURL = {
  39. guard let url = bundle.url(forResource: "gemini-report", withExtension: "pdf") else {
  40. fatalError("PDF file gemini-report.pdf not found in Resources.")
  41. }
  42. return url
  43. }()
  44. override func setUpWithError() throws {
  45. try FirebaseApp.configureDefaultAppForSnippets()
  46. }
  47. override func tearDown() async throws {
  48. await FirebaseApp.deleteDefaultAppForSnippets()
  49. }
  50. // MARK: - Image Input
  51. #if canImport(UIKit)
  52. func testMultimodalOneImageNonStreaming() async throws {
  53. guard let image = UIImage(systemName: "bicycle") else { fatalError() }
  54. // Provide a text prompt to include with the image
  55. let prompt = "What's in this picture?"
  56. // To generate text output, call generateContent and pass in the prompt
  57. let response = try await model.generateContent(image, prompt)
  58. print(response.text ?? "No text in response.")
  59. }
  60. func testMultimodalOneImageStreaming() async throws {
  61. guard let image = UIImage(systemName: "bicycle") else { fatalError() }
  62. // Provide a text prompt to include with the image
  63. let prompt = "What's in this picture?"
  64. // To stream generated text output, call generateContentStream and pass in the prompt
  65. let contentStream = try model.generateContentStream(image, prompt)
  66. for try await chunk in contentStream {
  67. if let text = chunk.text {
  68. print(text)
  69. }
  70. }
  71. }
  72. func testMultimodalMultiImagesNonStreaming() async throws {
  73. guard let image1 = UIImage(systemName: "car") else { fatalError() }
  74. guard let image2 = UIImage(systemName: "car.2") else { fatalError() }
  75. // Provide a text prompt to include with the images
  76. let prompt = "What's different between these pictures?"
  77. // To generate text output, call generateContent and pass in the prompt
  78. let response = try await model.generateContent(image1, image2, prompt)
  79. print(response.text ?? "No text in response.")
  80. }
  81. func testMultimodalMultiImagesStreaming() async throws {
  82. guard let image1 = UIImage(systemName: "car") else { fatalError() }
  83. guard let image2 = UIImage(systemName: "car.2") else { fatalError() }
  84. // Provide a text prompt to include with the images
  85. let prompt = "What's different between these pictures?"
  86. // To stream generated text output, call generateContentStream and pass in the prompt
  87. let contentStream = try model.generateContentStream(image1, image2, prompt)
  88. for try await chunk in contentStream {
  89. if let text = chunk.text {
  90. print(text)
  91. }
  92. }
  93. }
  94. #endif // canImport(UIKit)
  95. // MARK: - Video Input
  96. func testMultimodalVideoNonStreaming() async throws {
  97. // Provide the video as `Data` with the appropriate MIME type
  98. let video = try InlineDataPart(data: Data(contentsOf: videoURL), mimeType: "video/mp4")
  99. // Provide a text prompt to include with the video
  100. let prompt = "What is in the video?"
  101. // To generate text output, call generateContent with the text and video
  102. let response = try await model.generateContent(video, prompt)
  103. print(response.text ?? "No text in response.")
  104. }
  105. func testMultimodalVideoStreaming() async throws {
  106. // Provide the video as `Data` with the appropriate MIME type
  107. let video = try InlineDataPart(data: Data(contentsOf: videoURL), mimeType: "video/mp4")
  108. // Provide a text prompt to include with the video
  109. let prompt = "What is in the video?"
  110. // To stream generated text output, call generateContentStream with the text and video
  111. let contentStream = try model.generateContentStream(video, prompt)
  112. for try await chunk in contentStream {
  113. if let text = chunk.text {
  114. print(text)
  115. }
  116. }
  117. }
  118. // MARK: - Audio Input
  119. func testMultiModalAudioNonStreaming() async throws {
  120. // Provide the audio as `Data` with the appropriate MIME type
  121. let audio = try InlineDataPart(data: Data(contentsOf: audioURL), mimeType: "audio/mpeg")
  122. // Provide a text prompt to include with the audio
  123. let prompt = "Transcribe what's said in this audio recording."
  124. // To generate text output, call `generateContent` with the audio and text prompt
  125. let response = try await model.generateContent(audio, prompt)
  126. // Print the generated text, handling the case where it might be nil
  127. print(response.text ?? "No text in response.")
  128. }
  129. func testMultiModalAudioStreaming() async throws {
  130. // Provide the audio as `Data` with the appropriate MIME type
  131. let audio = try InlineDataPart(data: Data(contentsOf: audioURL), mimeType: "audio/mpeg")
  132. // Provide a text prompt to include with the audio
  133. let prompt = "Transcribe what's said in this audio recording."
  134. // To stream generated text output, call `generateContentStream` with the audio and text prompt
  135. let contentStream = try model.generateContentStream(audio, prompt)
  136. // Print the generated text, handling the case where it might be nil
  137. for try await chunk in contentStream {
  138. if let text = chunk.text {
  139. print(text)
  140. }
  141. }
  142. }
  143. // MARK: - Document Input
  144. func testMultiModalPDFStreaming() async throws {
  145. // Provide the PDF as `Data` with the appropriate MIME type
  146. let pdf = try InlineDataPart(data: Data(contentsOf: pdfURL), mimeType: "application/pdf")
  147. // Provide a text prompt to include with the PDF file
  148. let prompt = "Summarize the important results in this report."
  149. // To stream generated text output, call `generateContentStream` with the PDF file and text
  150. // prompt
  151. let contentStream = try model.generateContentStream(pdf, prompt)
  152. // Print the generated text, handling the case where it might be nil
  153. for try await chunk in contentStream {
  154. if let text = chunk.text {
  155. print(text)
  156. }
  157. }
  158. }
  159. func testMultiModalPDFNonStreaming() async throws {
  160. // Provide the PDF as `Data` with the appropriate MIME type
  161. let pdf = try InlineDataPart(data: Data(contentsOf: pdfURL), mimeType: "application/pdf")
  162. // Provide a text prompt to include with the PDF file
  163. let prompt = "Summarize the important results in this report."
  164. // To generate text output, call `generateContent` with the PDF file and text prompt
  165. let response = try await model.generateContent(pdf, prompt)
  166. // Print the generated text, handling the case where it might be nil
  167. print(response.text ?? "No text in response.")
  168. }
  169. }