From 936ec3f496c8812b8bf3280a1e516f160e71e744 Mon Sep 17 00:00:00 2001 From: Zach Nagengast Date: Wed, 15 Jan 2025 18:57:05 -0800 Subject: [PATCH] Add repo option to regression test matrix (#293) * Add repo and token option to regression test matrix * Add default Debug.xcconfig file * Update fastlane to run on repo from benchmark config * Formatting --- Examples/WhisperAX/Debug.xcconfig | 2 + .../xcshareddata/xcschemes/WhisperAX.xcscheme | 5 ++ Sources/WhisperKit/Core/Configurations.swift | 4 + Sources/WhisperKit/Core/WhisperKit.swift | 6 +- .../WhisperKitTests/RegressionTestUtils.swift | 6 ++ Tests/WhisperKitTests/RegressionTests.swift | 78 +++++++++++++------ fastlane/Fastfile | 16 +++- 7 files changed, 89 insertions(+), 28 deletions(-) create mode 100644 Examples/WhisperAX/Debug.xcconfig diff --git a/Examples/WhisperAX/Debug.xcconfig b/Examples/WhisperAX/Debug.xcconfig new file mode 100644 index 0000000..be6bbdd --- /dev/null +++ b/Examples/WhisperAX/Debug.xcconfig @@ -0,0 +1,2 @@ +// Run `make setup` to add your team here +DEVELOPMENT_TEAM= diff --git a/Examples/WhisperAX/WhisperAX.xcodeproj/xcshareddata/xcschemes/WhisperAX.xcscheme b/Examples/WhisperAX/WhisperAX.xcodeproj/xcshareddata/xcschemes/WhisperAX.xcscheme index 236ed0e..48d9731 100644 --- a/Examples/WhisperAX/WhisperAX.xcodeproj/xcshareddata/xcschemes/WhisperAX.xcscheme +++ b/Examples/WhisperAX/WhisperAX.xcodeproj/xcshareddata/xcschemes/WhisperAX.xcscheme @@ -79,6 +79,11 @@ value = "$(MODEL_NAME)" isEnabled = "YES"> + + String? { + // Add token here or override + return nil + } + + func testEnvConfigurations(defaultModels: [String]? = nil, defaultRepos: [String]? = nil) { if let modelSizeEnv = ProcessInfo.processInfo.environment["MODEL_NAME"], !modelSizeEnv.isEmpty { modelsToTest = [modelSizeEnv] Logging.debug("Model size: \(modelSizeEnv)") + + if let repoEnv = ProcessInfo.processInfo.environment["MODEL_REPO"] { + modelReposToTest = [repoEnv] + Logging.debug("Using repo: \(repoEnv)") + } + XCTAssertTrue(modelsToTest.count > 0, "Invalid model size: \(modelSizeEnv)") + if modelSizeEnv == "crash_test" { fatalError("Crash test triggered") } } else { modelsToTest = defaultModels ?? debugModels + modelReposToTest = defaultRepos ?? debugRepos Logging.debug("Model size not set by env") } } @@ -116,7 +133,7 @@ final class RegressionTests: XCTestCase { // MARK: - Test Pipeline - private func runRegressionTests(with testMatrix: [TestConfig]) async throws { + public func runRegressionTests(with testMatrix: [TestConfig]) async throws { var failureInfo: [String: String] = [:] var attachments: [String: String] = [:] let device = getCurrentDevice() @@ -159,8 +176,7 @@ final class RegressionTests: XCTestCase { // Create WhisperKit instance with checks for memory usage let whisperKit = try await createWithMemoryCheck( - model: config.model, - computeOptions: config.modelComputeOptions, + testConfig: config, verbose: true, logLevel: .debug ) @@ -169,6 +185,8 @@ final class RegressionTests: XCTestCase { config.model = modelFile modelsTested.append(modelFile) modelsTested = Array(Set(modelsTested)) + modelReposTested.append(config.modelRepo) + modelReposTested = Array(Set(modelReposTested)) } for audioFilePath in audioFilePaths { @@ -295,6 +313,7 @@ final class RegressionTests: XCTestCase { datasetDir: config.dataset, datasetRepo: datasetRepo, model: config.model, + modelRepo: config.modelRepo, modelSizeMB: modelSizeMB ?? -1, date: startTime.formatted(Date.ISO8601FormatStyle().dateSeparator(.dash)), timeElapsedInSeconds: Date().timeIntervalSince(startTime), @@ -432,20 +451,23 @@ final class RegressionTests: XCTestCase { } } - private func getTestMatrix() -> [TestConfig] { + public func getTestMatrix() -> [TestConfig] { var regressionTestConfigMatrix: [TestConfig] = [] for dataset in datasets { for computeOption in computeOptions { for options in optionsToTest { - for model in modelsToTest { - regressionTestConfigMatrix.append( - TestConfig( - dataset: dataset, - modelComputeOptions: computeOption, - model: model, - decodingOptions: options + for repo in modelReposToTest { + for model in modelsToTest { + regressionTestConfigMatrix.append( + TestConfig( + dataset: dataset, + modelComputeOptions: computeOption, + model: model, + modelRepo: repo, + decodingOptions: options + ) ) - ) + } } } } @@ -555,6 +577,7 @@ final class RegressionTests: XCTestCase { osType: osDetails.osType, osVersion: osDetails.osVersion, modelsTested: modelsTested, + modelReposTested: modelReposTested, failureInfo: failureInfo, attachments: attachments ) @@ -610,17 +633,14 @@ final class RegressionTests: XCTestCase { return Double(modelSize / (1024 * 1024)) // Convert to MB } - func createWithMemoryCheck( - model: String, - computeOptions: ModelComputeOptions, - verbose: Bool, - logLevel: Logging.LogLevel - ) async throws -> WhisperKit { + public func initWhisperKitTask(testConfig config: TestConfig, verbose: Bool, logLevel: Logging.LogLevel) -> Task { // Create the initialization task let initializationTask = Task { () -> WhisperKit in let whisperKit = try await WhisperKit(WhisperKitConfig( - model: model, - computeOptions: computeOptions, + model: config.model, + modelRepo: config.modelRepo, + modelToken: Self.getModelToken(), + computeOptions: config.modelComputeOptions, verbose: verbose, logLevel: logLevel, prewarm: true, @@ -629,6 +649,20 @@ final class RegressionTests: XCTestCase { try Task.checkCancellation() return whisperKit } + return initializationTask + } + + func createWithMemoryCheck( + testConfig: TestConfig, + verbose: Bool, + logLevel: Logging.LogLevel + ) async throws -> WhisperKit { + // Create the initialization task + let initializationTask = initWhisperKitTask( + testConfig: testConfig, + verbose: verbose, + logLevel: logLevel + ) // Start the memory monitoring task let monitorTask = Task { diff --git a/fastlane/Fastfile b/fastlane/Fastfile index 6325ecb..1059bc2 100644 --- a/fastlane/Fastfile +++ b/fastlane/Fastfile @@ -23,7 +23,7 @@ BASE_BENCHMARK_PATH = "#{WORKING_DIR}/benchmark_data".freeze BASE_UPLOAD_PATH = "#{WORKING_DIR}/upload_folder".freeze XCRESULT_PATH = File.expand_path("#{BASE_BENCHMARK_PATH}/#{COMMIT_TIMESTAMP}_#{COMMIT_HASH}/") BENCHMARK_REPO = 'argmaxinc/whisperkit-evals-dataset'.freeze -BENCHMARK_CONFIGS = { +BENCHMARK_CONFIGS ||= { full: { test_identifier: 'WhisperAXTests/RegressionTests/testModelPerformance', name: 'full', @@ -50,12 +50,14 @@ BENCHMARK_CONFIGS = { 'openai_whisper-large-v3-v20240930_turbo', 'openai_whisper-large-v3-v20240930_626MB', 'openai_whisper-large-v3-v20240930_turbo_632MB' - ] + ], + repo: 'argmaxinc/whisperkit-coreml' }, debug: { test_identifier: 'WhisperAXTests/RegressionTests/testModelPerformanceWithDebugConfig', name: 'debug', - models: ['tiny', 'crash_test', 'unknown_model', 'small.en'] + models: ['tiny', 'crash_test', 'unknown_model', 'small.en'], + repo: 'argmaxinc/whisperkit-coreml' } }.freeze @@ -200,7 +202,9 @@ end def run_benchmark(devices, config) summaries = [] - BENCHMARK_CONFIGS[config][:models].each do |model| + config_data = BENCHMARK_CONFIGS[config] + + config_data[:models].each do |model| begin # Sanitize device name for use in file path devices_to_test = devices.map { |device_info| device_info[:name] }.compact @@ -228,8 +232,12 @@ def run_benchmark(devices, config) UI.message "Running in #{BENCHMARK_CONFIGS[config][:name]} mode" UI.message "Running benchmark for model: #{model}" + UI.message 'Using Hugging Face:' + UI.message " • Repository: #{config_data[:repo]}" + xcargs = [ "MODEL_NAME=#{model}", + "MODEL_REPO=#{config_data[:repo]}", '-allowProvisioningUpdates', '-allowProvisioningDeviceRegistration' ].join(' ')