From 936ec3f496c8812b8bf3280a1e516f160e71e744 Mon Sep 17 00:00:00 2001
From: Zach Nagengast <zacharynagengast@gmail.com>
Date: Wed, 15 Jan 2025 18:57:05 -0800
Subject: [PATCH] Add repo option to regression test matrix (#293)

* Add repo and token option to regression test matrix

* Add default Debug.xcconfig file

* Update fastlane to run on repo from benchmark config

* Formatting
---
 Examples/WhisperAX/Debug.xcconfig             |  2 +
 .../xcshareddata/xcschemes/WhisperAX.xcscheme |  5 ++
 Sources/WhisperKit/Core/Configurations.swift  |  4 +
 Sources/WhisperKit/Core/WhisperKit.swift      |  6 +-
 .../WhisperKitTests/RegressionTestUtils.swift |  6 ++
 Tests/WhisperKitTests/RegressionTests.swift   | 78 +++++++++++++------
 fastlane/Fastfile                             | 16 +++-
 7 files changed, 89 insertions(+), 28 deletions(-)
 create mode 100644 Examples/WhisperAX/Debug.xcconfig
diff --git a/Examples/WhisperAX/Debug.xcconfig b/Examples/WhisperAX/Debug.xcconfig
new file mode 100644
index 0000000..be6bbdd
--- /dev/null
+++ b/Examples/WhisperAX/Debug.xcconfig
@@ -0,0 +1,2 @@
+// Run `make setup` to add your team here
+DEVELOPMENT_TEAM=
diff --git a/Examples/WhisperAX/WhisperAX.xcodeproj/xcshareddata/xcschemes/WhisperAX.xcscheme b/Examples/WhisperAX/WhisperAX.xcodeproj/xcshareddata/xcschemes/WhisperAX.xcscheme
index 236ed0e..48d9731 100644
--- a/Examples/WhisperAX/WhisperAX.xcodeproj/xcshareddata/xcschemes/WhisperAX.xcscheme
+++ b/Examples/WhisperAX/WhisperAX.xcodeproj/xcshareddata/xcschemes/WhisperAX.xcscheme
@@ -79,6 +79,11 @@
             value = "$(MODEL_NAME)"
             isEnabled = "YES">
          </EnvironmentVariable>
+         <EnvironmentVariable
+            key = "MODEL_REPO"
+            value = "$(MODEL_REPO)"
+            isEnabled = "YES">
+         </EnvironmentVariable>
       </EnvironmentVariables>
    </LaunchAction>
    <ProfileAction
diff --git a/Sources/WhisperKit/Core/Configurations.swift b/Sources/WhisperKit/Core/Configurations.swift
index 1547b1f..65d899e 100644
--- a/Sources/WhisperKit/Core/Configurations.swift
+++ b/Sources/WhisperKit/Core/Configurations.swift
@@ -12,6 +12,8 @@ open class WhisperKitConfig {
     public var downloadBase: URL?
     /// Repository for downloading models
     public var modelRepo: String?
+    /// Token for downloading models from repo (if required)
+    public var modelToken: String?
 
     /// Folder to store models
     public var modelFolder: String?
@@ -47,6 +49,7 @@ open class WhisperKitConfig {
     public init(model: String? = nil,
                 downloadBase: URL? = nil,
                 modelRepo: String? = nil,
+                modelToken: String? = nil,
                 modelFolder: String? = nil,
                 tokenizerFolder: URL? = nil,
                 computeOptions: ModelComputeOptions? = nil,
@@ -67,6 +70,7 @@ open class WhisperKitConfig {
         self.model = model
         self.downloadBase = downloadBase
         self.modelRepo = modelRepo
+        self.modelToken = modelToken
         self.modelFolder = modelFolder
         self.tokenizerFolder = tokenizerFolder
         self.computeOptions = computeOptions
diff --git a/Sources/WhisperKit/Core/WhisperKit.swift b/Sources/WhisperKit/Core/WhisperKit.swift
index 6cccf01..26b8961 100644
--- a/Sources/WhisperKit/Core/WhisperKit.swift
+++ b/Sources/WhisperKit/Core/WhisperKit.swift
@@ -68,10 +68,10 @@ open class WhisperKit {
             model: config.model,
             downloadBase: config.downloadBase,
             modelRepo: config.modelRepo,
+            modelToken: config.modelToken,
             modelFolder: config.modelFolder,
             download: config.download
         )
-        
 
         if let prewarm = config.prewarm, prewarm {
             Logging.info("Prewarming models...")
@@ -295,6 +295,7 @@ open class WhisperKit {
         model: String?,
         downloadBase: URL? = nil,
         modelRepo: String?,
+        modelToken: String? = nil,
         modelFolder: String?,
         download: Bool
     ) async throws {
@@ -312,7 +313,8 @@ open class WhisperKit {
                     variant: modelVariant,
                     downloadBase: downloadBase,
                     useBackgroundSession: useBackgroundDownloadSession,
-                    from: repo
+                    from: repo,
+                    token: modelToken
                 )
             } catch {
                 // Handle errors related to model downloading
diff --git a/Tests/WhisperKitTests/RegressionTestUtils.swift b/Tests/WhisperKitTests/RegressionTestUtils.swift
index 07a25db..f3aac3e 100644
--- a/Tests/WhisperKitTests/RegressionTestUtils.swift
+++ b/Tests/WhisperKitTests/RegressionTestUtils.swift
@@ -54,6 +54,7 @@ class TestInfo: JSONCodable {
     let datasetDir: String
     let datasetRepo: String
     let model: String
+    let modelRepo: String
     let modelSizeMB: Double
     let date: String
     let timeElapsedInSeconds: TimeInterval
@@ -69,6 +70,7 @@ class TestInfo: JSONCodable {
         datasetDir: String,
         datasetRepo: String,
         model: String,
+        modelRepo: String,
         modelSizeMB: Double,
         date: String,
         timeElapsedInSeconds: TimeInterval,
@@ -83,6 +85,7 @@ class TestInfo: JSONCodable {
         self.datasetDir = datasetDir
         self.datasetRepo = datasetRepo
         self.model = model
+        self.modelRepo = modelRepo
         self.modelSizeMB = modelSizeMB
         self.date = date
         self.timeElapsedInSeconds = timeElapsedInSeconds
@@ -101,6 +104,7 @@ struct TestReport: JSONCodable {
     let osType: String
     let osVersion: String
     let modelsTested: [String]
+    let modelReposTested: [String]
     let failureInfo: [String: String]
     let attachments: [String: String]
 
@@ -109,6 +113,7 @@ struct TestReport: JSONCodable {
         osType: String,
         osVersion: String,
         modelsTested: [String],
+        modelReposTested: [String],
         failureInfo: [String: String],
         attachments: [String: String]
     ) {
@@ -116,6 +121,7 @@ struct TestReport: JSONCodable {
         self.osType = osType
         self.osVersion = osVersion
         self.modelsTested = modelsTested
+        self.modelReposTested = modelReposTested
         self.failureInfo = failureInfo
         self.attachments = attachments
     }
diff --git a/Tests/WhisperKitTests/RegressionTests.swift b/Tests/WhisperKitTests/RegressionTests.swift
index 5f5c095..33047c3 100644
--- a/Tests/WhisperKitTests/RegressionTests.swift
+++ b/Tests/WhisperKitTests/RegressionTests.swift
@@ -13,19 +13,22 @@ import WatchKit
 #endif
 
 @available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
-final class RegressionTests: XCTestCase {
+class RegressionTests: XCTestCase {
     var audioFileURLs: [URL]?
     var remoteFileURLs: [URL]?
     var metadataURL: URL?
     var testWERURLs: [URL]?
     var modelsToTest: [String] = []
+    var modelReposToTest: [String] = []
     var modelsTested: [String] = []
+    var modelReposTested: [String] = []
     var optionsToTest: [DecodingOptions] = [DecodingOptions()]
 
     struct TestConfig {
         let dataset: String
         let modelComputeOptions: ModelComputeOptions
         var model: String
+        var modelRepo: String
         let decodingOptions: DecodingOptions
     }
 
@@ -34,6 +37,7 @@ final class RegressionTests: XCTestCase {
     var datasets = ["librispeech-10mins", "earnings22-10mins"]
     let debugDataset = ["earnings22-10mins"]
     let debugModels = ["tiny"]
+    let debugRepos = ["argmaxinc/whisperkit-coreml"]
 
     var computeOptions: [ModelComputeOptions] = [
         ModelComputeOptions(audioEncoderCompute: .cpuAndNeuralEngine, textDecoderCompute: .cpuAndNeuralEngine),
@@ -71,16 +75,29 @@ final class RegressionTests: XCTestCase {
         Logging.debug("Max memory before warning: \(maxMemory)")
     }
 
-    func testEnvConfigurations(defaultModels: [String]? = nil) {
+    class func getModelToken() -> String? {
+        // Add token here or override
+        return nil
+    }
+
+    func testEnvConfigurations(defaultModels: [String]? = nil, defaultRepos: [String]? = nil) {
         if let modelSizeEnv = ProcessInfo.processInfo.environment["MODEL_NAME"], !modelSizeEnv.isEmpty {
             modelsToTest = [modelSizeEnv]
             Logging.debug("Model size: \(modelSizeEnv)")
+
+            if let repoEnv = ProcessInfo.processInfo.environment["MODEL_REPO"] {
+                modelReposToTest = [repoEnv]
+                Logging.debug("Using repo: \(repoEnv)")
+            }
+
             XCTAssertTrue(modelsToTest.count > 0, "Invalid model size: \(modelSizeEnv)")
+
             if modelSizeEnv == "crash_test" {
                 fatalError("Crash test triggered")
             }
         } else {
             modelsToTest = defaultModels ?? debugModels
+            modelReposToTest = defaultRepos ?? debugRepos
             Logging.debug("Model size not set by env")
         }
     }
@@ -116,7 +133,7 @@ final class RegressionTests: XCTestCase {
 
     // MARK: - Test Pipeline
 
-    private func runRegressionTests(with testMatrix: [TestConfig]) async throws {
+    public func runRegressionTests(with testMatrix: [TestConfig]) async throws {
         var failureInfo: [String: String] = [:]
         var attachments: [String: String] = [:]
         let device = getCurrentDevice()
@@ -159,8 +176,7 @@ final class RegressionTests: XCTestCase {
 
         // Create WhisperKit instance with checks for memory usage
         let whisperKit = try await createWithMemoryCheck(
-            model: config.model,
-            computeOptions: config.modelComputeOptions,
+            testConfig: config,
             verbose: true,
             logLevel: .debug
         )
@@ -169,6 +185,8 @@ final class RegressionTests: XCTestCase {
             config.model = modelFile
             modelsTested.append(modelFile)
             modelsTested = Array(Set(modelsTested))
+            modelReposTested.append(config.modelRepo)
+            modelReposTested = Array(Set(modelReposTested))
         }
 
         for audioFilePath in audioFilePaths {
@@ -295,6 +313,7 @@ final class RegressionTests: XCTestCase {
             datasetDir: config.dataset,
             datasetRepo: datasetRepo,
             model: config.model,
+            modelRepo: config.modelRepo,
             modelSizeMB: modelSizeMB ?? -1,
             date: startTime.formatted(Date.ISO8601FormatStyle().dateSeparator(.dash)),
             timeElapsedInSeconds: Date().timeIntervalSince(startTime),
@@ -432,20 +451,23 @@ final class RegressionTests: XCTestCase {
         }
     }
 
-    private func getTestMatrix() -> [TestConfig] {
+    public func getTestMatrix() -> [TestConfig] {
         var regressionTestConfigMatrix: [TestConfig] = []
         for dataset in datasets {
             for computeOption in computeOptions {
                 for options in optionsToTest {
-                    for model in modelsToTest {
-                        regressionTestConfigMatrix.append(
-                            TestConfig(
-                                dataset: dataset,
-                                modelComputeOptions: computeOption,
-                                model: model,
-                                decodingOptions: options
+                    for repo in modelReposToTest {
+                        for model in modelsToTest {
+                            regressionTestConfigMatrix.append(
+                                TestConfig(
+                                    dataset: dataset,
+                                    modelComputeOptions: computeOption,
+                                    model: model,
+                                    modelRepo: repo,
+                                    decodingOptions: options
+                                )
                             )
-                        )
+                        }
                     }
                 }
             }
@@ -555,6 +577,7 @@ final class RegressionTests: XCTestCase {
             osType: osDetails.osType,
             osVersion: osDetails.osVersion,
             modelsTested: modelsTested,
+            modelReposTested: modelReposTested,
             failureInfo: failureInfo,
             attachments: attachments
         )
@@ -610,17 +633,14 @@ final class RegressionTests: XCTestCase {
         return Double(modelSize / (1024 * 1024)) // Convert to MB
     }
 
-    func createWithMemoryCheck(
-        model: String,
-        computeOptions: ModelComputeOptions,
-        verbose: Bool,
-        logLevel: Logging.LogLevel
-    ) async throws -> WhisperKit {
+    public func initWhisperKitTask(testConfig config: TestConfig, verbose: Bool, logLevel: Logging.LogLevel) -> Task<WhisperKit, Error> {
         // Create the initialization task
         let initializationTask = Task { () -> WhisperKit in
             let whisperKit = try await WhisperKit(WhisperKitConfig(
-                model: model,
-                computeOptions: computeOptions,
+                model: config.model,
+                modelRepo: config.modelRepo,
+                modelToken: Self.getModelToken(),
+                computeOptions: config.modelComputeOptions,
                 verbose: verbose,
                 logLevel: logLevel,
                 prewarm: true,
@@ -629,6 +649,20 @@ final class RegressionTests: XCTestCase {
             try Task.checkCancellation()
             return whisperKit
         }
+        return initializationTask
+    }
+
+    func createWithMemoryCheck(
+        testConfig: TestConfig,
+        verbose: Bool,
+        logLevel: Logging.LogLevel
+    ) async throws -> WhisperKit {
+        // Create the initialization task
+        let initializationTask = initWhisperKitTask(
+            testConfig: testConfig,
+            verbose: verbose,
+            logLevel: logLevel
+        )
 
         // Start the memory monitoring task
         let monitorTask = Task {
diff --git a/fastlane/Fastfile b/fastlane/Fastfile
index 6325ecb..1059bc2 100644
--- a/fastlane/Fastfile
+++ b/fastlane/Fastfile
@@ -23,7 +23,7 @@ BASE_BENCHMARK_PATH = "#{WORKING_DIR}/benchmark_data".freeze
 BASE_UPLOAD_PATH = "#{WORKING_DIR}/upload_folder".freeze
 XCRESULT_PATH = File.expand_path("#{BASE_BENCHMARK_PATH}/#{COMMIT_TIMESTAMP}_#{COMMIT_HASH}/")
 BENCHMARK_REPO = 'argmaxinc/whisperkit-evals-dataset'.freeze
-BENCHMARK_CONFIGS = {
+BENCHMARK_CONFIGS ||= {
   full: {
     test_identifier: 'WhisperAXTests/RegressionTests/testModelPerformance',
     name: 'full',
@@ -50,12 +50,14 @@ BENCHMARK_CONFIGS = {
       'openai_whisper-large-v3-v20240930_turbo',
       'openai_whisper-large-v3-v20240930_626MB',
       'openai_whisper-large-v3-v20240930_turbo_632MB'
-    ]
+    ],
+    repo: 'argmaxinc/whisperkit-coreml'
   },
   debug: {
     test_identifier: 'WhisperAXTests/RegressionTests/testModelPerformanceWithDebugConfig',
     name: 'debug',
-    models: ['tiny', 'crash_test', 'unknown_model', 'small.en']
+    models: ['tiny', 'crash_test', 'unknown_model', 'small.en'],
+    repo: 'argmaxinc/whisperkit-coreml'
   }
 }.freeze
 
@@ -200,7 +202,9 @@ end
 
 def run_benchmark(devices, config)
   summaries = []
-  BENCHMARK_CONFIGS[config][:models].each do |model|
+  config_data = BENCHMARK_CONFIGS[config]
+
+  config_data[:models].each do |model|
     begin
       # Sanitize device name for use in file path
       devices_to_test = devices.map { |device_info| device_info[:name] }.compact
@@ -228,8 +232,12 @@ def run_benchmark(devices, config)
       UI.message "Running in #{BENCHMARK_CONFIGS[config][:name]} mode"
 
       UI.message "Running benchmark for model: #{model}"
+      UI.message 'Using Hugging Face:'
+      UI.message "  • Repository: #{config_data[:repo]}"
+
       xcargs = [
         "MODEL_NAME=#{model}",
+        "MODEL_REPO=#{config_data[:repo]}",
         '-allowProvisioningUpdates',
         '-allowProvisioningDeviceRegistration'
       ].join(' ')