diff --git a/_quarto.yml b/_quarto.yml index 0c713d9..c94da1e 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -16,7 +16,7 @@ book: - index.qmd - book/introduction.qmd - book/usecase/index.qmd - - book/inmemory/in_memory_interoperability.qmd + - book/in_memory/index.qmd - book/on_disk_interoperability.qmd - text: Workflow frameworks part: book/workflow_frameworks/index.qmd diff --git a/book/in_memory/.gitignore b/book/in_memory/.gitignore new file mode 100644 index 0000000..249cda9 --- /dev/null +++ b/book/in_memory/.gitignore @@ -0,0 +1 @@ +/data \ No newline at end of file diff --git a/book/inmemory/images/inmemorymatrix.png b/book/in_memory/images/inmemorymatrix.png similarity index 100% rename from book/inmemory/images/inmemorymatrix.png rename to book/in_memory/images/inmemorymatrix.png diff --git a/book/inmemory/in_memory_interoperability.qmd b/book/in_memory/index.qmd similarity index 95% rename from book/inmemory/in_memory_interoperability.qmd rename to book/in_memory/index.qmd index fce4c6c..a300adb 100644 --- a/book/inmemory/in_memory_interoperability.qmd +++ b/book/in_memory/index.qmd @@ -86,7 +86,8 @@ with (default_converter + pandas2ri.converter).context(): One big limitation of rpy2 is the inability to convert sparse matrices: there is no built-in conversion module for scipy. The `anndata2ri` package provides, apart from functionality to convert SingleCellExperiment objects to an anndata objects, functions to convert sparse matrices. -```{r include=FALSE} +```{r import_sce, include=FALSE} +# this is added so renv knows that singlecellexperiment needs to be installed library(SingleCellExperiment) ``` @@ -146,7 +147,7 @@ The pseudobulked data is read in: ```{python load_data} import anndata as ad -pd_adata = ad.read_h5ad("usecase/data/pseudobulk.h5ad") +pd_adata = ad.read_h5ad("../usecase/data/pseudobulk.h5ad") ``` Select small molecule and control: @@ -216,11 +217,11 @@ res = utils.head(dplyr.arrange(res, 'padj'), 10) ``` Write to disk: this again requires the pandas2ri converter to convert the results to a pandas dataframe. -```{python write_results} +```{python write_results, eval=FALSE} with (robjects.default_converter + pandas2ri.converter).context(): res_pd = robjects.conversion.get_conversion().rpy2py(res) - res_pd.to_csv("usecase/data/de_contrasts.csv") + res_pd.to_csv("../usecase/data/de_contrasts.csv") ``` ### Usecase: ran in R @@ -228,7 +229,7 @@ with (robjects.default_converter + pandas2ri.converter).context(): ```{r read_in} library(anndata) -adata_path <- "usecase/data/sc_counts_subset.h5ad" +adata_path <- "../usecase/data/sc_counts_subset.h5ad" adata <- anndata::read_h5ad(adata_path) ``` @@ -280,12 +281,12 @@ pb_X <- group_by(cr, celltype) %>% summarise(across(where(is.numeric), sum)) Construct obs for pseudobulk. Use 'plate_well_celltype_reannotated' as index and make sure to retain the columns 'sm_name', 'cell_type', and 'plate_name': -```{r pb_obs, eval = FALSE} +```{r pb_obs_r, eval = FALSE} pb_obs <- adata$obs[c("sm_name", "cell_type", "plate_name", "well", "plate_well_celltype_reannotated")] pb_obs <- pb_obs[!duplicated(pb_obs), ] ``` -```{python pb_obs, eval = FALSE} +```{python pb_obs_py, eval = FALSE} pb_obs = adata.obs[["sm_name", "cell_type", "plate_name", "well"]].copy() pb_obs.index = adata.obs["plate_well_celltype_reannotated"] pb_obs = pb_obs.drop_duplicates() @@ -309,6 +310,6 @@ Store to disk: ```{r store_pseudobulk, eval=FALSE} -write_h5ad(pb_adata, "notebooks/usecase_data/pseudobulk.h5ad") +write_h5ad(pb_adata, "../usecase/data/pseudobulk.h5ad") ``` \ No newline at end of file