diff --git a/.dockerignore b/.dockerignore index 70465c8..d0420cb 100644 --- a/.dockerignore +++ b/.dockerignore @@ -10,3 +10,4 @@ *.html *.DS_Store /usecase_data/ +/usecase/data/ diff --git a/.gitignore b/.gitignore index f450658..01c7604 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ /site_libs/ *.html *.DS_Store +/usecase/data/ /usecase_data/ # Created by https://www.toptal.com/developers/gitignore/api/python,r diff --git a/README.md b/README.md index e692cbc..0c6569c 100644 --- a/README.md +++ b/README.md @@ -118,11 +118,11 @@ pixi run pipeline ### Docker -To run the pipeline with Docker, use the following command. The image is ~5GB and the pipeline can require a lot of working memory ~20GB, so make sure to increase the RAM allocated to Docker in your settings. Note that the usecase_data/ and scripts/ folders are mounted to the Docker container, so you can edit the scripts and access the data. +To run the pipeline with Docker, use the following command. The image is ~5GB and the pipeline can require a lot of working memory ~20GB, so make sure to increase the RAM allocated to Docker in your settings. Note that the usecase/data/ and scripts/ folders are mounted to the Docker container, so you can edit the scripts and access the data. ```bash docker pull berombau/polygloty-docker:latest -docker run -it -v $(pwd)/usecase_data:/app/usecase_data -v $(pwd)/scripts:/app/scripts berombau/polygloty-docker:latest pixi run pipeline +docker run -it -v $(pwd)/usecase/data:/app/usecase/data -v $(pwd)/scripts:/app/scripts berombau/polygloty-docker:latest pixi run pipeline ``` ### Extra: building the Docker image yourself @@ -131,7 +131,7 @@ To edit and build the Docker image yourself, use can use the following command.: ```bash docker build -t polygloty-docker . -docker run -it -v $(pwd)/usecase_data:/app/usecase_data -v $(pwd)/scripts:/app/scripts polygloty-docker pixi run pipeline +docker run -it -v $(pwd)/usecase/data:/app/usecase/data -v $(pwd)/scripts:/app/scripts polygloty-docker pixi run pipeline ``` To publish it to Docker Hub, use the following command: diff --git a/book/in_memory2.qmd b/book/in_memory2.qmd index 83a9baf..b99f826 100644 --- a/book/in_memory2.qmd +++ b/book/in_memory2.qmd @@ -16,7 +16,7 @@ Read in the anndata object ```{r read_in} library(anndata) -adata_path <- "notebooks/usecase_data/sc_counts_reannotated_with_counts.h5ad" +adata_path <- "notebooks/usecase/data/sc_counts_reannotated_with_counts.h5ad" adata <- anndata::read_h5ad(adata_path) ``` @@ -89,5 +89,5 @@ pb_adata = ad.AnnData( Store to disk: ```{python store_pseudobulk} -pb_adata.write_h5ad("usecase_data/pseudobulk.h5ad") +pb_adata.write_h5ad("usecase/data/pseudobulk.h5ad") ``` \ No newline at end of file diff --git a/book/in_memory_interoperability.qmd b/book/in_memory_interoperability.qmd index 5c13cd2..1566ad5 100644 --- a/book/in_memory_interoperability.qmd +++ b/book/in_memory_interoperability.qmd @@ -203,7 +203,7 @@ with (robjects.default_converter + pandas2ri.converter).context(): ```{r read_in} library(anndata) -adata_path <- "notebooks/usecase_data/sc_counts_reannotated_with_counts.h5ad" +adata_path <- "notebooks/usecase/data/sc_counts_reannotated_with_counts.h5ad" adata <- anndata::read_h5ad(adata_path) ``` @@ -270,6 +270,6 @@ pb_adata <- anndata::AnnData( Store to disk: ```{r store_pseudobulk} -write_h5ad(pb_adata, "notebooks/usecase_data/pseudobulk.h5ad") +write_h5ad(pb_adata, "notebooks/usecase/data/pseudobulk.h5ad") ``` \ No newline at end of file diff --git a/book/on_disk_interoperability.qmd b/book/on_disk_interoperability.qmd index c2afc9e..d060750 100644 --- a/book/on_disk_interoperability.qmd +++ b/book/on_disk_interoperability.qmd @@ -122,9 +122,9 @@ With the Pixi task runner, you can define these tasks in their respective enviro pixi run pipeline ``` -You can create a Docker image with all the `pixi` environments and run the pipeline in one containerized environment. The image is ~5GB and the pipeline can require a lot of working memory ~20GB, so make sure to increase the RAM allocated to Docker in your settings. Note that the `usecase_data/` and `scripts/` folders are mounted to the Docker container, so you can interactively edit the scripts and access the data. +You can create a Docker image with all the `pixi` environments and run the pipeline in one containerized environment. The image is ~5GB and the pipeline can require a lot of working memory ~20GB, so make sure to increase the RAM allocated to Docker in your settings. Note that the `usecase/data/` and `scripts/` folders are mounted to the Docker container, so you can interactively edit the scripts and access the data. ```bash docker pull berombau/polygloty-docker:latest -docker run -it -v $(pwd)/usecase_data:/app/usecase_data -v $(pwd)/scripts:/app/scripts berombau/polygloty-docker:latest pixi run pipeline +docker run -it -v $(pwd)/usecase/data:/app/usecase/data -v $(pwd)/scripts:/app/scripts berombau/polygloty-docker:latest pixi run pipeline ``` \ No newline at end of file diff --git a/scripts/1_load_data.sh b/scripts/1_load_data.sh index 0ca5f12..5d5fd42 100644 --- a/scripts/1_load_data.sh +++ b/scripts/1_load_data.sh @@ -1,6 +1,6 @@ -if [[ ! -f usecase_data/sc_counts_reannotated_with_counts.h5ad ]]; then +if [[ ! -f usecase/data/sc_counts_reannotated_with_counts.h5ad ]]; then aws s3 cp \ --no-sign-request \ s3://openproblems-bio/public/neurips-2023-competition/sc_counts_reannotated_with_counts.h5ad \ - usecase_data/sc_counts_reannotated_with_counts.h5ad + usecase/data/sc_counts_reannotated_with_counts.h5ad fi diff --git a/scripts/2_compute_pseudobulk.py b/scripts/2_compute_pseudobulk.py index ab126a2..430d453 100644 --- a/scripts/2_compute_pseudobulk.py +++ b/scripts/2_compute_pseudobulk.py @@ -2,7 +2,7 @@ import anndata as ad print("Load data") -adata = ad.read_h5ad("usecase_data/sc_counts_reannotated_with_counts.h5ad") +adata = ad.read_h5ad("usecase/data/sc_counts_reannotated_with_counts.h5ad") sm_name = "Belinostat" control_name = "Dimethyl Sulfoxide" @@ -44,4 +44,4 @@ ) print("Store to disk") -pb_adata.write_h5ad("usecase_data/pseudobulk.h5ad") +pb_adata.write_h5ad("usecase/data/pseudobulk.h5ad") diff --git a/scripts/3_analysis_de.R b/scripts/3_analysis_de.R index 8f07a6b..fa3f3e1 100644 --- a/scripts/3_analysis_de.R +++ b/scripts/3_analysis_de.R @@ -3,7 +3,7 @@ library(anndata) library(dplyr, warn.conflicts = FALSE) print("Reading data...") -pb_adata <- read_h5ad("usecase_data/pseudobulk.h5ad") +pb_adata <- read_h5ad("usecase/data/pseudobulk.h5ad") # Select small molecule and control: sm_name <- "Belinostat" @@ -34,4 +34,4 @@ res |> head(10) # Write to disk: -write.csv(res, "usecase_data/de_contrasts.csv") \ No newline at end of file +write.csv(res, "usecase/data/de_contrasts.csv") \ No newline at end of file