openproblems-bio · lazappi · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025
diff --git a/_viash.yaml b/_viash.yaml
@@ -2,7 +2,7 @@ viash_version: 0.9.0
 
 # Step 1: Change the name of the task.
 # example: task_name_of_this_task
-name: task_template
+name: task_foundation_models
 organization: openproblems-bio
 version: dev
 
@@ -11,8 +11,8 @@ license: MIT
 keywords: [single-cell, openproblems, benchmark]
 # Step 3: Update the `task_template` to the name of the task from step 1.
 links:
-  issue_tracker: https://github.com/openproblems-bio/task_template/issues
-  repository: https://github.com/openproblems-bio/task_template
+  issue_tracker: https://github.com/openproblems-bio/task_foundation_models/issues
+  repository: https://github.com/openproblems-bio/task_foundation_models
   docker_registry: ghcr.io
 
 
@@ -50,11 +50,8 @@ info:
   # Step 5: Replace the task_template to the name of the task.
   test_resources:
     - type: s3
-      path: s3://openproblems-data/resources_test/common/
-      dest: resources_test/common
-    - type: s3
-      path: s3://openproblems-data/resources_test/task_template/
-      dest: resources_test/task_template
+      path: s3://openproblems-data/resources_test/task_foundation_models/
+      dest: resources_test/task_foundation_models
 
 # Step 6: Update the authors of the task.
 authors: 
@@ -81,8 +78,7 @@ config_mods: |
   .runners[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h" }
 
 repositories:
-  - name: core
+  - name: openproblems
     type: github
-    repo: openproblems-bio/core
+    repo: openproblems-bio/openproblems
     tag: build/main
-    path: viash/core
diff --git a/scripts/create_resources/test_resources.sh b/scripts/create_resources/test_resources.sh
@@ -6,48 +6,38 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
 # ensure that the command below is run from the root of the repository
 cd "$REPO_ROOT"
 
-# # remove this when you have implemented the script
-# echo "TODO: replace the commands in this script with the sequence of components that you need to run to generate test_resources."
-# echo "  Inside this script, you will need to place commands to generate example files for each of the 'src/api/file_*.yaml' files."
-# exit 1
-
 set -e
 
-RAW_DATA=resources_test/common
-DATASET_DIR=resources_test/task_template
-
-mkdir -p $DATASET_DIR
-
-# process dataset
-viash run src/data_processors/process_dataset/config.vsh.yaml -- \
-  --input $RAW_DATA/cxg_mouse_pancreas_atlas/dataset.h5ad \
-  --output_train $DATASET_DIR/cxg_mouse_pancreas_atlas/train.h5ad \
-  --output_test $DATASET_DIR/cxg_mouse_pancreas_atlas/test.h5ad \
-  --output_solution $DATASET_DIR/cxg_mouse_pancreas_atlas/solution.h5ad
-
-# run one method
-viash run src/methods/logistic_regression/config.vsh.yaml -- \
-    --input_train $DATASET_DIR/cxg_mouse_pancreas_atlas/train.h5ad \
-    --input_test $DATASET_DIR/cxg_mouse_pancreas_atlas/test.h5ad \
-    --output $DATASET_DIR/cxg_mouse_pancreas_atlas/prediction.h5ad
-
-# run one metric
-viash run src/metrics/accuracy/config.vsh.yaml -- \
-    --input_prediction $DATASET_DIR/cxg_mouse_pancreas_atlas/prediction.h5ad \
-    --input_solution $DATASET_DIR/cxg_mouse_pancreas_atlas/solution.h5ad \
-    --output $DATASET_DIR/cxg_mouse_pancreas_atlas/score.h5ad
-
-# write manual state.yaml. this is not actually necessary but you never know it might be useful
-cat > $DATASET_DIR/cxg_mouse_pancreas_atlas/state.yaml << HERE
-id: cxg_mouse_pancreas_atlas
-train: !file train.h5ad
-test: !file test.h5ad
-solution: !file solution.h5ad
-prediction: !file prediction.h5ad
-score: !file score.h5ad
-HERE
+OUT_DIR=resources_test/task_foundation_models/results
+
+TASKS=(
+  "task_label_projection"
+  "task_batch_integration"
+)
+
+if [ -d "$OUT_DIR" ]; then
+  echo "Removing existing directory '$OUT_DIR'"
+  rm -rf "$OUT_DIR"
+fi
+
+mkdir -p "$OUT_DIR"
+
+for TASK in "${TASKS[@]}"; do
+  BASE_DIR="s3://openproblems-data/resources/$TASK/results"
+
+  # find subdir in bucket with latest date which has a 'task_info.yaml' file
+  DATE=$(aws s3 ls "$BASE_DIR/" --recursive --no-sign-request | awk '{print $4}' | grep 'task_info.yaml' | sort -r | head -n 1 | sed 's#.*/run_\(.*\)/[^/]*$#\1#')
+
+  INPUT_DIR="$BASE_DIR/run_$DATE"
+  TASK_STRIP_PREFIX=$(echo $TASK | sed 's/task_//')
+  OUTPUT_DIR="$OUT_DIR/$TASK_STRIP_PREFIX"
+
+  echo "Syncing '$INPUT_DIR' to '$OUTPUT_DIR'"
+  aws s3 sync "$INPUT_DIR" "$OUTPUT_DIR" --delete --no-sign-request
+done
 
 # only run this if you have access to the openproblems-data bucket
 aws s3 sync --profile op \
-  "$DATASET_DIR" s3://openproblems-data/resources_test/task_template \
+  "resources_test/task_foundation_models" \
+  s3://openproblems-data/resources_test/task_foundation_models \
   --delete --dryrun
diff --git a/src/control_methods/true_labels/config.vsh.yaml b/src/control_methods/true_labels/config.vsh.yaml
diff --git a/src/control_methods/true_labels/script.py b/src/control_methods/true_labels/script.py
diff --git a/src/data_processors/process_dataset/config.vsh.yaml b/src/data_processors/process_dataset/config.vsh.yaml
diff --git a/src/data_processors/process_dataset/script.py b/src/data_processors/process_dataset/script.py