From 4c21d2b04cb7b2c21f0720a57f9a3b028a023bf9 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Sat, 7 Sep 2024 09:13:46 +0200
Subject: [PATCH] change names, add some content to the intro

---
 _quarto.yml                                   |  8 ++---
 book/file_formats.qmd                         | 23 --------------
 ...ory.qmd => in_memory_interoperability.qmd} |  2 +-
 book/intro.qmd                                | 10 -------
 book/introduction.qmd                         | 30 +++++++++++++++++++
 ...dataR.qmd => on_disk_interoperability.qmd} | 22 ++++++++++----
 book/references.bib                           | 14 +++++++++
 .../data/criteria.csv                         |  0
 .../data/wf_aggregated_scores.csv             |  0
 .../data/wf_metadata.csv                      |  0
 .../examples/nextflow/nf-core/environment.yml |  0
 .../examples/nextflow/nf-core/main.nf         |  0
 .../examples/nextflow/nf-core/meta.yml        |  0
 .../nextflow/nf-core/tests/main.nf.test       |  0
 .../examples/nextflow/wratten2021_poc/main.nf |  0
 .../snakemake-wrappers/environment.yaml       |  0
 .../snakemake/snakemake-wrappers/meta.yaml    |  0
 .../snakemake-wrappers/test/Snakefile         |  0
 .../snakemake/snakemake-wrappers/wrapper.py   |  0
 .../snakemake/wratten2021_poc/fastqc.smk      |  0
 .../examples/wdl/biowdl/fastqc.wdl            |  0
 .../examples/wdl/biowdl/fastqc_full.wdl       |  0
 .../examples/wdl/wratten2021_poc/fastqc.wdl   |  0
 .../images/productionization.svg              |  0
 .../images/qualities.svg                      |  0
 .../images/technologies.svg                   |  0
 .../index.qmd                                 |  2 --
 27 files changed, 66 insertions(+), 45 deletions(-)
 delete mode 100644 book/file_formats.qmd
 rename book/{in_memory.qmd => in_memory_interoperability.qmd} (98%)
 delete mode 100644 book/intro.qmd
 create mode 100644 book/introduction.qmd
 rename book/{anndataR.qmd => on_disk_interoperability.qmd} (53%)
 rename book/{workflows => workflow_frameworks}/data/criteria.csv (100%)
 rename book/{workflows => workflow_frameworks}/data/wf_aggregated_scores.csv (100%)
 rename book/{workflows => workflow_frameworks}/data/wf_metadata.csv (100%)
 rename book/{workflows => workflow_frameworks}/examples/nextflow/nf-core/environment.yml (100%)
 rename book/{workflows => workflow_frameworks}/examples/nextflow/nf-core/main.nf (100%)
 rename book/{workflows => workflow_frameworks}/examples/nextflow/nf-core/meta.yml (100%)
 rename book/{workflows => workflow_frameworks}/examples/nextflow/nf-core/tests/main.nf.test (100%)
 rename book/{workflows => workflow_frameworks}/examples/nextflow/wratten2021_poc/main.nf (100%)
 rename book/{workflows => workflow_frameworks}/examples/snakemake/snakemake-wrappers/environment.yaml (100%)
 rename book/{workflows => workflow_frameworks}/examples/snakemake/snakemake-wrappers/meta.yaml (100%)
 rename book/{workflows => workflow_frameworks}/examples/snakemake/snakemake-wrappers/test/Snakefile (100%)
 rename book/{workflows => workflow_frameworks}/examples/snakemake/snakemake-wrappers/wrapper.py (100%)
 rename book/{workflows => workflow_frameworks}/examples/snakemake/wratten2021_poc/fastqc.smk (100%)
 rename book/{workflows => workflow_frameworks}/examples/wdl/biowdl/fastqc.wdl (100%)
 rename book/{workflows => workflow_frameworks}/examples/wdl/biowdl/fastqc_full.wdl (100%)
 rename book/{workflows => workflow_frameworks}/examples/wdl/wratten2021_poc/fastqc.wdl (100%)
 rename book/{workflows => workflow_frameworks}/images/productionization.svg (100%)
 rename book/{workflows => workflow_frameworks}/images/qualities.svg (100%)
 rename book/{workflows => workflow_frameworks}/images/technologies.svg (100%)
 rename book/{workflows => workflow_frameworks}/index.qmd (97%)

diff --git a/_quarto.yml b/_quarto.yml
index 6a356ca..56da45d 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -14,11 +14,11 @@ book:
   repo-actions: [edit, issue, source]
   chapters:
     - index.qmd
-    - book/intro.qmd
+    - book/introduction.qmd
     - book/usecase/index.qmd
-    - book/file_formats.qmd
-    - book/in_memory.qmd
-    - book/workflows/index.qmd
+    - book/in_memory_interoperability.qmd
+    - book/on_disk_interoperability.qmd
+    - book/workflow_frameworks/index.qmd
     - book/book_slides.qmd
     - book/references.qmd
 
diff --git a/book/file_formats.qmd b/book/file_formats.qmd
deleted file mode 100644
index e6d76d2..0000000
--- a/book/file_formats.qmd
+++ /dev/null
@@ -1,23 +0,0 @@
----
-title: File formats
-engine: knitr
----
-
-# File formats
-
-Data format based interoperability
-
-1. h5ad / zarr / Apache Arrow
-2. Reading and writing these formats
-
-## Setup
-
-```{python}
-import anndata
-import numpy
-import scanpy
-```
-
-```{python}
-anndata.__version__
-```
diff --git a/book/in_memory.qmd b/book/in_memory_interoperability.qmd
similarity index 98%
rename from book/in_memory.qmd
rename to book/in_memory_interoperability.qmd
index 484fd5e..75f53cf 100644
--- a/book/in_memory.qmd
+++ b/book/in_memory_interoperability.qmd
@@ -1,5 +1,5 @@
 ---
-title: In memory interoperability (from Python)
+title: In-memory interoperability
 engine: knitr
 ---
 
diff --git a/book/intro.qmd b/book/intro.qmd
deleted file mode 100644
index 4a9b827..0000000
--- a/book/intro.qmd
+++ /dev/null
@@ -1,10 +0,0 @@
-# Introduction
-
-Any bioinformatician that has analysed a single-cell dataset knows that using methods developed for different ecosystems or programming languages is necessary but painful.
-Any package developer has asked themselves the question on how to best provide access to their tool or method.
-
-We will give an overview of the interoperability tools you can use when analysing a single-cell dataset: do you want to convert your data to a different data format, or is just calling one R function in your Jupyter notebook sufficient? Do you want fine-grained control over each step in the analysis pipeline or do you run a series of scripts that you really should convert to a workflow system?
-
-We will give information on different options for package developers to provide better interoperability. Should you reimplement your package in a new language? How do you ensure that the results are the same?
-
-In order to follow this workshop, we expect the participants to have some Python or R programming knowledge.
diff --git a/book/introduction.qmd b/book/introduction.qmd
new file mode 100644
index 0000000..a878f6f
--- /dev/null
+++ b/book/introduction.qmd
@@ -0,0 +1,30 @@
+---
+title: Introduction
+engine: knitr
+---
+
+Single-cell analysis has emerged as a transformative force in biology,
+providing unprecedented insights into cellular heterogeneity and complex biological processes. The rapid advancement in this field has led to a proliferation of specialized tools and methods [@Zappia2021], often developed in different programming languages and software ecosystems. While this diversity empowers researchers to leverage the best tools for each analysis step [@Heumos2023], it also presents a significant challenge: how to seamlessly integrate and execute analyses across these disparate languages and frameworks.
+
+The need to utilize tools from different programming ecosystems creates a "polyglot" landscape in single-cell analysis, where researchers must navigate the complexities of interoperability, data exchange, and workflow management. This fragmentation can hinder productivity, introduce errors, and impede reproducibility.
+
+Researchers can approach this challenge in various ways, each with its own trade-offs and considerations. In the next chapters, we'll explore different strategies for achieving interoperability in single-cell analysis, including:
+
+## Code porting
+
+Porting tools from one language to another can offer complete control and eliminate interoperability concerns. However, one should not underestimate the effort required to reimplement complex algorithms, and the risk of introducing errors.
+
+Furthermore, work is not done after the initial port -- in order for the researcher's work to be useful to others, the ported code must be maintained and kept up-to-date with the original implementation. For this reason, we don't consider reimplementation a viable option for most use-cases and will not discuss it further in this book.
+
+## In-memory Interoperability
+
+Tools like rpy2 and reticulate allow for direct communication between languages within a single analysis session. This approach provides flexibility and avoids intermediate file I/O, but can introduce complexity in managing dependencies and environments.
+
+
+## File-based Interoperability
+
+Storing intermediate results in standardized, language-agnostic file formats (e.g., HDF5, Parquet) allows for sequential execution of scripts written in different languages. This approach is relatively simple but can lead to increased storage requirements and I/O overhead.
+
+## Workflow Frameworks
+
+Workflow management systems (e.g., Nextflow, Snakemake) provide a structured approach to orchestrate complex, multi-language pipelines, enhancing reproducibility and automation. However, they may require a learning curve and additional configuration.
diff --git a/book/anndataR.qmd b/book/on_disk_interoperability.qmd
similarity index 53%
rename from book/anndataR.qmd
rename to book/on_disk_interoperability.qmd
index 3b972e5..50a9d62 100644
--- a/book/anndataR.qmd
+++ b/book/on_disk_interoperability.qmd
@@ -1,15 +1,27 @@
 ---
-title: "WIP: In memory interoperability (Python side)"
+title: On-disk interoperability
 engine: knitr
 ---
 
+Data format based interoperability
 
-# anndataR
+1. h5ad / zarr / Apache Arrow
+2. Reading and writing these formats
 
-Calling python from R and vice versa
+## Setup
 
-1. rpy2 & reticulate
-2. How to do this in jupyter notebooks and rmarkdown scripts
+```{python}
+import anndata
+import numpy
+import scanpy
+```
+
+```{python}
+anndata.__version__
+```
+
+
+## anndataR
 
 ```{r}
 library(anndataR)
diff --git a/book/references.bib b/book/references.bib
index c131410..868e557 100644
--- a/book/references.bib
+++ b/book/references.bib
@@ -51,3 +51,17 @@ @article{Wratten2021
   month = sep,
   pages = {1161–1168}
 }
+
+@article{Zappia2021,
+  title = {Over 1000 tools reveal trends in the single-cell RNA-seq analysis landscape},
+  volume = {22},
+  ISSN = {1474-760X},
+  url = {http://dx.doi.org/10.1186/s13059-021-02519-4},
+  DOI = {10.1186/s13059-021-02519-4},
+  number = {1},
+  journal = {Genome Biology},
+  publisher = {Springer Science and Business Media LLC},
+  author = {Zappia,  Luke and Theis,  Fabian J.},
+  year = {2021},
+  month = oct 
+}
diff --git a/book/workflows/data/criteria.csv b/book/workflow_frameworks/data/criteria.csv
similarity index 100%
rename from book/workflows/data/criteria.csv
rename to book/workflow_frameworks/data/criteria.csv
diff --git a/book/workflows/data/wf_aggregated_scores.csv b/book/workflow_frameworks/data/wf_aggregated_scores.csv
similarity index 100%
rename from book/workflows/data/wf_aggregated_scores.csv
rename to book/workflow_frameworks/data/wf_aggregated_scores.csv
diff --git a/book/workflows/data/wf_metadata.csv b/book/workflow_frameworks/data/wf_metadata.csv
similarity index 100%
rename from book/workflows/data/wf_metadata.csv
rename to book/workflow_frameworks/data/wf_metadata.csv
diff --git a/book/workflows/examples/nextflow/nf-core/environment.yml b/book/workflow_frameworks/examples/nextflow/nf-core/environment.yml
similarity index 100%
rename from book/workflows/examples/nextflow/nf-core/environment.yml
rename to book/workflow_frameworks/examples/nextflow/nf-core/environment.yml
diff --git a/book/workflows/examples/nextflow/nf-core/main.nf b/book/workflow_frameworks/examples/nextflow/nf-core/main.nf
similarity index 100%
rename from book/workflows/examples/nextflow/nf-core/main.nf
rename to book/workflow_frameworks/examples/nextflow/nf-core/main.nf
diff --git a/book/workflows/examples/nextflow/nf-core/meta.yml b/book/workflow_frameworks/examples/nextflow/nf-core/meta.yml
similarity index 100%
rename from book/workflows/examples/nextflow/nf-core/meta.yml
rename to book/workflow_frameworks/examples/nextflow/nf-core/meta.yml
diff --git a/book/workflows/examples/nextflow/nf-core/tests/main.nf.test b/book/workflow_frameworks/examples/nextflow/nf-core/tests/main.nf.test
similarity index 100%
rename from book/workflows/examples/nextflow/nf-core/tests/main.nf.test
rename to book/workflow_frameworks/examples/nextflow/nf-core/tests/main.nf.test
diff --git a/book/workflows/examples/nextflow/wratten2021_poc/main.nf b/book/workflow_frameworks/examples/nextflow/wratten2021_poc/main.nf
similarity index 100%
rename from book/workflows/examples/nextflow/wratten2021_poc/main.nf
rename to book/workflow_frameworks/examples/nextflow/wratten2021_poc/main.nf
diff --git a/book/workflows/examples/snakemake/snakemake-wrappers/environment.yaml b/book/workflow_frameworks/examples/snakemake/snakemake-wrappers/environment.yaml
similarity index 100%
rename from book/workflows/examples/snakemake/snakemake-wrappers/environment.yaml
rename to book/workflow_frameworks/examples/snakemake/snakemake-wrappers/environment.yaml
diff --git a/book/workflows/examples/snakemake/snakemake-wrappers/meta.yaml b/book/workflow_frameworks/examples/snakemake/snakemake-wrappers/meta.yaml
similarity index 100%
rename from book/workflows/examples/snakemake/snakemake-wrappers/meta.yaml
rename to book/workflow_frameworks/examples/snakemake/snakemake-wrappers/meta.yaml
diff --git a/book/workflows/examples/snakemake/snakemake-wrappers/test/Snakefile b/book/workflow_frameworks/examples/snakemake/snakemake-wrappers/test/Snakefile
similarity index 100%
rename from book/workflows/examples/snakemake/snakemake-wrappers/test/Snakefile
rename to book/workflow_frameworks/examples/snakemake/snakemake-wrappers/test/Snakefile
diff --git a/book/workflows/examples/snakemake/snakemake-wrappers/wrapper.py b/book/workflow_frameworks/examples/snakemake/snakemake-wrappers/wrapper.py
similarity index 100%
rename from book/workflows/examples/snakemake/snakemake-wrappers/wrapper.py
rename to book/workflow_frameworks/examples/snakemake/snakemake-wrappers/wrapper.py
diff --git a/book/workflows/examples/snakemake/wratten2021_poc/fastqc.smk b/book/workflow_frameworks/examples/snakemake/wratten2021_poc/fastqc.smk
similarity index 100%
rename from book/workflows/examples/snakemake/wratten2021_poc/fastqc.smk
rename to book/workflow_frameworks/examples/snakemake/wratten2021_poc/fastqc.smk
diff --git a/book/workflows/examples/wdl/biowdl/fastqc.wdl b/book/workflow_frameworks/examples/wdl/biowdl/fastqc.wdl
similarity index 100%
rename from book/workflows/examples/wdl/biowdl/fastqc.wdl
rename to book/workflow_frameworks/examples/wdl/biowdl/fastqc.wdl
diff --git a/book/workflows/examples/wdl/biowdl/fastqc_full.wdl b/book/workflow_frameworks/examples/wdl/biowdl/fastqc_full.wdl
similarity index 100%
rename from book/workflows/examples/wdl/biowdl/fastqc_full.wdl
rename to book/workflow_frameworks/examples/wdl/biowdl/fastqc_full.wdl
diff --git a/book/workflows/examples/wdl/wratten2021_poc/fastqc.wdl b/book/workflow_frameworks/examples/wdl/wratten2021_poc/fastqc.wdl
similarity index 100%
rename from book/workflows/examples/wdl/wratten2021_poc/fastqc.wdl
rename to book/workflow_frameworks/examples/wdl/wratten2021_poc/fastqc.wdl
diff --git a/book/workflows/images/productionization.svg b/book/workflow_frameworks/images/productionization.svg
similarity index 100%
rename from book/workflows/images/productionization.svg
rename to book/workflow_frameworks/images/productionization.svg
diff --git a/book/workflows/images/qualities.svg b/book/workflow_frameworks/images/qualities.svg
similarity index 100%
rename from book/workflows/images/qualities.svg
rename to book/workflow_frameworks/images/qualities.svg
diff --git a/book/workflows/images/technologies.svg b/book/workflow_frameworks/images/technologies.svg
similarity index 100%
rename from book/workflows/images/technologies.svg
rename to book/workflow_frameworks/images/technologies.svg
diff --git a/book/workflows/index.qmd b/book/workflow_frameworks/index.qmd
similarity index 97%
rename from book/workflows/index.qmd
rename to book/workflow_frameworks/index.qmd
index 7da7c93..6f590b5 100644
--- a/book/workflows/index.qmd
+++ b/book/workflow_frameworks/index.qmd
@@ -3,8 +3,6 @@ title: Workflows
 author: Robrecht Cannoodt, Data Intuitive
 ---
 
-Single-cell analysis has revolutionized our understanding of cellular heterogeneity and complex biological processes. However, this cutting-edge field often demands the use of multiple programming languages and frameworks, each with its strengths and specialized tools [@Heumos2023]. This polyglot approach, while powerful, introduces significant technical challenges in terms of interoperability, usability, and reproducibility.
-
 In the previous chapters, we've explored strategies for supporting data operability across programming language. Now, we turn our attention to how to effectively integrate these tools and languages into a cohesive and scalable analysis workflow.
 
 ## Productionization