diff --git a/.github/workflows/test-mac-arm.yml b/.github/workflows/test-mac-arm.yml
deleted file mode 100644
index 18a658c..0000000
--- a/.github/workflows/test-mac-arm.yml
+++ /dev/null
@@ -1,55 +0,0 @@
-on:
-  pull_request:
-  push:
-    branches: main
-
-name: Test on mac arm
-
-jobs:
-  build-deploy:
-    runs-on: macos-latest
-
-    permissions:
-      contents: write
-
-    steps:
-      - name: Check out repository
-        uses: actions/checkout@v4
-
-      - name: Set up specific version of Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.12'
-
-      - name: Install R
-        uses: r-lib/actions/setup-r@v2
-
-      - name: Set up Quarto
-        uses: quarto-dev/quarto-actions/setup@v2
-        
-      - name: Make sure data is available
-        run: |
-          AWS_EC2_METADATA_DISABLED=true \
-            aws s3 cp \
-            --no-sign-request \
-            s3://openproblems-bio/public/neurips-2023-competition/sc_counts_reannotated_with_counts.h5ad \
-            book/usecase/data/sc_counts_reannotated_with_counts.h5ad
-
-      # attempt with renv
-      - name: Set up renv
-        uses: r-lib/actions/setup-renv@v2
-
-      - name: Cache certain directories to speed up build
-        uses: actions/cache@v2
-        with:
-          path: |
-            book/usecase/data
-          key: quarto_cache_osx_${{ github.ref_name }}
-          restore-keys: |
-            quarto_cache_osx_main
-      
-      - name: Render book
-        run: |
-          Rscript -e "renv::restore()"
-          source renv/python/virtualenvs/renv-python-3.12/bin/activate
-          quarto render
diff --git a/book/in_memory_interoperability.qmd b/book/in_memory_interoperability.qmd
index 64e1ec0..8bb2142 100644
--- a/book/in_memory_interoperability.qmd
+++ b/book/in_memory_interoperability.qmd
@@ -62,6 +62,10 @@ The `anndata2ri` package provides, apart from functionality to convert SingleCel
 
 TODO: how to subscript sparse matrix? Is it possible?
 
+```{r include=FALSE}
+library(SingleCellExperiment)
+```
+
 ```{python rpy2_sparse}
 import scipy as sp
 
diff --git a/book/on_disk_interoperability.qmd b/book/on_disk_interoperability.qmd
index d934563..c2afc9e 100644
--- a/book/on_disk_interoperability.qmd
+++ b/book/on_disk_interoperability.qmd
@@ -11,29 +11,65 @@ The downside is that it can lead to increased storage requirements and I/O overh
 
 Debugging is only possible for individual scripts and the workflow in not as interactive and explorative as the in-memory approach.
 
-## Different files 
+## Different file formats
 
-Data format based interoperability
+It's important to differentiate between language specific file formats and language agnostic file formats. For example, most languages can serialize objects to disk. R has the `.RDS` file format and Python has the `.pickle/.pkl` file format, but these are not interoperable. Older versions of the language could also have problems reading in serialized objects created by the latest language version.
+
+For a file format to be language agnostic, it should have a mature standard describing how the data is stored on disk. This standard should be implemented in multiple languages, documented and tested for compatibility. Some file formats have a reference implementation in C, which can be used to create bindings for other languages. Most also have a status page that list the implementations and which version or how much of the standard they support.
+
+
+
+### Dataframes
+
+| File Format | Support in Python | Support in R | Text-Based | Binary | High-Performance | Ease-of-Use | Compression | Suitable for Single-Cell | Suitable for Spatial |
+|-------------|-------------------|--------------|------------|--------|------------------|-------------|-------------|--------------------------|----------------------|
+| CSV         | Yes               | Yes          | Yes        | No     | No               | High        | No          | Limited                   | No                   |
+| JSON        | Yes               | Yes          | Yes        | No     | No               | Medium      | No          | Limited                   | No                   |
+| Parquet     | Yes               | Yes          | No         | Yes    | Yes              | High        | Yes         | Limited                   | No                   |
+| Feather     | Yes               | Yes          | No         | Yes    | Yes              | High        | Yes         | Yes                       | No                   |
+| HDF5        | Yes               | Yes          | No         | Yes    | Yes              | Medium      | Yes         | Yes                       | Limited              |
+
+
+### n-dimensional arrays
+
+| File Format | Support in Python | Support in R | Text-Based | Binary | High-Performance | Ease-of-Use | Compression | Suitable for Single-Cell | Suitable for Spatial |
+|-------------|-------------------|--------------|------------|--------|------------------|-------------|-------------|--------------------------|----------------------|
+| h5ad        | Yes               | Limited      | No         | Yes    | Yes              | Medium      | Yes         | Yes                       | Yes                  |
+| Zarr        | Yes               | Yes          | No         | Yes    | Yes              | Medium      | Yes         | Yes                       | Yes                  |
+| NumPy (npy) | Yes               | Limited      | No         | Yes    | Yes              | High        | No          | Yes                       | No                   |
+| NetCDF      | Yes               | Yes          | No         | Yes    | Yes              | Medium      | Yes         | Yes                       | Yes                  |
+| TIFF        | Yes               | Limited      | No         | Yes    | Medium           | Medium      | Yes         | No                        | Yes                  |
 
-1. h5ad / zarr / Apache Arrow
-2. Reading and writing these formats
 
-TODO: comparison table
 
 ## Different on-disk pipelines
 
 You can use a shell script to run the pipeline in a sequential manner. This requires all the dependencies to be installed in one large environment.
 
-TODO: compare notebook and script pipelines
+Usually you start in a notebook with an exploratory analysis, then move to a script for reproducibility and finally to a pipeline for scalability.
+
+The scripts in such a script pipeline are a collection of the code snippets from the notebooks and can be written in different languages and executed in sequence.
+
+Alternatively, there are frameworks that keep the notebooks and create a pipeline with it. The upside is that you can avoid converting the code snippets in the notebooks to scripts. The downside is that you have to use a specific framework and the notebooks can become very large and unwieldy.
 
 ## Notebook pipelines
 
-TODO: talk about Quarto, nb and papermill
+You can use [Quarto](https://quarto.org/) to run code snippets in different languages in the same `.qmd` notebook. Our [Use-case chapter](./usecase/) is one example of this.
+
+For example, [Papermill]https://github.com/nteract/papermill) can execute Jupyter notebooks in sequence and pass variables between them. [Ploomber](https://github.com/ploomber/ploomber) is another example.
+
+### Execute notebooks via the CLI
 
+Jupyter via [nbconvert](https://nbconvert.readthedocs.io/en/latest/#).:
 ```bash
 jupyter nbconvert --to notebook --execute my_notebook.ipynb --allow-errors --output-dir outputs/
 ```
 
+[RMarkdown](https://rmarkdown.rstudio.com/):
+```bash
+Rscript -e "rmarkdown::render('my_notebook.Rmd',params=list(args = myarg))"
+```
+
 ## Script pipelines
 
 ### Calling scripts in the same environment
diff --git a/book/workflow_frameworks/examples/viash_nextflow/.gitignore b/book/workflow_frameworks/examples/viash_nextflow/.gitignore
new file mode 100644
index 0000000..401d616
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/.gitignore
@@ -0,0 +1,3 @@
+/work
+/target
+/.nextflow*
\ No newline at end of file
diff --git a/book/workflow_frameworks/examples/viash_nextflow/_viash.yaml b/book/workflow_frameworks/examples/viash_nextflow/_viash.yaml
new file mode 100644
index 0000000..6654ed6
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/_viash.yaml
@@ -0,0 +1,3 @@
+name: polygloty_usecase
+version: 0.1.0
+viash_version: 0.9.0
\ No newline at end of file
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/compute_pseudobulk/config.vsh.yaml b/book/workflow_frameworks/examples/viash_nextflow/src/compute_pseudobulk/config.vsh.yaml
new file mode 100644
index 0000000..7e486af
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/compute_pseudobulk/config.vsh.yaml
@@ -0,0 +1,56 @@
+name: compute_pseudobulk
+description: Compute pseudobulk expression from anndata object
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - type: file
+        name: --input
+        description: Path to the input h5ad file
+        example: /path/to/input.h5ad
+        required: true
+  - name: Pseudobulk arguments
+    arguments:
+      - type: string
+        name: --obs_column_index
+        description: Name of the column to pseudobulk on
+        example: cell_type
+        required: true
+      - type: string
+        name: --obs_column_values
+        description: List of column names for the new obs data frame
+        example: ["batch", "sample"]
+        multiple: true
+        required: true
+  - name: Outputs
+    arguments:
+      - type: file
+        name: --output
+        description: Path to the output h5ad file
+        example: /path/to/output.h5ad
+        required: true
+        direction: output
+
+resources:
+  - type: python_script
+    path: script.py
+
+test_resources:
+  - type: python_script
+    path: test.py
+
+engines:
+  - type: docker
+    image: python:3.10
+    setup:
+      - type: python
+        pypi:
+          - anndata
+    test_setup:
+      - type: python
+        pypi:
+          - viashpy
+
+runners:
+  - type: executable
+  - type: nextflow
\ No newline at end of file
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/compute_pseudobulk/script.py b/book/workflow_frameworks/examples/viash_nextflow/src/compute_pseudobulk/script.py
new file mode 100644
index 0000000..90a6e07
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/compute_pseudobulk/script.py
@@ -0,0 +1,41 @@
+import anndata as ad
+import pandas as pd
+import numpy as np
+
+## VIASH START
+par = {"input": "", "obs_column_index": "", "obs_column_values": [], "output": ""}
+## VIASH END
+
+print("Load data", flush=True)
+adata = ad.read_h5ad(par["input"])
+
+print(f"Format of input data: {adata}", flush=True)
+assert par["obs_column_index"] in adata.obs.columns, f"Column '{par['obs_column']}' not found in obs."
+for col in par["obs_column_values"]:
+  assert col in adata.obs.columns, f"Column '{col}' not found in obs."
+
+print("Compute pseudobulk", flush=True)
+X = adata.X
+if not isinstance(X, np.ndarray):
+  X = X.toarray()
+combined = pd.DataFrame(
+  X,
+  index=adata.obs[par["obs_column_index"]],
+)
+combined.columns = adata.var_names
+pb_X = combined.groupby(level=0).sum()
+
+print("Construct obs for pseudobulk")
+pb_obs = adata.obs[par["obs_column_values"]].copy()
+pb_obs.index = adata.obs[par["obs_column_index"]]
+pb_obs = pb_obs.drop_duplicates()
+
+print("Create AnnData object")
+pb_adata = ad.AnnData(
+  X=pb_X.loc[pb_obs.index].values,
+  obs=pb_obs,
+  var=adata.var,
+)
+
+print("Store to disk")
+pb_adata.write_h5ad(par["output"], compression="gzip")
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/compute_pseudobulk/test.py b/book/workflow_frameworks/examples/viash_nextflow/src/compute_pseudobulk/test.py
new file mode 100644
index 0000000..02cd7e9
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/compute_pseudobulk/test.py
@@ -0,0 +1,43 @@
+import sys
+import anndata as ad
+import pytest
+import numpy as np
+
+def test_subset_var(run_component, tmp_path):
+  input_path = tmp_path / "input.h5ad"
+  output_path = tmp_path / "output.h5ad"
+
+  # create data
+  adata_in = ad.AnnData(
+    X=np.array([[1, 2], [3, 4], [5, 6], [7, 8]]),
+    obs={
+      "cell_type": ["A", "A", "B", "B"],
+      "time": [1, 2, 1, 2],
+      "condition": ["ctrl", "ctrl", "trt", "trt"],
+    },
+    var={"highly_variable": [True, False]},
+  )
+
+  adata_in.write_h5ad(input_path)
+
+  # run component
+  run_component([
+    "--input", str(input_path),
+    "--obs_column_index", "cell_type",
+    "--obs_column_values", "condition",
+    "--output", str(output_path),
+  ])
+
+  # load output
+  adata_out = ad.read_h5ad(output_path)
+
+  # check output
+  assert adata_out.X.shape == (2, 2)
+  assert np.all(adata_out.X == np.array([[4, 6], [12, 14]]))
+  assert adata_out.obs.index.tolist() == ["A", "B"]
+  assert adata_out.obs["condition"].tolist() == ["ctrl", "trt"]
+  assert adata_out.var["highly_variable"].tolist() == [True, False]
+
+
+if __name__ == "__main__":
+  sys.exit(pytest.main([__file__]))
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/differential_expression/config.vsh.yaml b/book/workflow_frameworks/examples/viash_nextflow/src/differential_expression/config.vsh.yaml
new file mode 100644
index 0000000..c7785fe
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/differential_expression/config.vsh.yaml
@@ -0,0 +1,68 @@
+name: differential_expression
+description: Compute differential expression between two observation types
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - type: file
+        name: --input
+        description: Path to the input h5ad file
+        example: /path/to/input.h5ad
+        required: true
+  - name: Differential expression arguments
+    arguments:
+      - type: string
+        name: --contrast
+        description: |
+          Contrast to compute. Must be of length 3:
+
+          1. The name of the column to contrast on
+          2. The name of the first observation type
+          3. The name of the second observation type
+        example: ["cell_type", "batch", "sample"]
+        multiple: true
+        required: true
+      - type: string
+        name: --design_formula
+        description: Design formula for the differential expression model
+        example: ~ batch + cell_type
+  - name: Outputs
+    arguments:
+      - type: file
+        name: --output
+        description: Path to the output h5ad file
+        example: /path/to/output.h5ad
+        required: true
+        direction: output
+
+resources:
+  - type: r_script
+    path: script.R
+
+test_resources:
+  - type: r_script
+    path: test.R
+
+engines:
+  - type: docker
+    image: rocker/r2u:22.04
+    setup:
+      - type: apt
+        packages:
+          - python3
+          - python3-pip
+          - python3-dev
+          - python-is-python3
+      - type: python
+        pypi:
+          - anndata
+      - type: r
+        cran:
+          - anndata
+          - processx
+        bioc:
+          - DESeq2
+
+runners:
+  - type: executable
+  - type: nextflow
\ No newline at end of file
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/differential_expression/script.R b/book/workflow_frameworks/examples/viash_nextflow/src/differential_expression/script.R
new file mode 100644
index 0000000..f91a50d
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/differential_expression/script.R
@@ -0,0 +1,42 @@
+library(anndata)
+requireNamespace("DESeq2", quietly = TRUE)
+
+## VIASH START
+par <- list(input = "", contrast = c(), design_formula = "", output = "")
+## VIASH END
+
+cat("Reading data\n")
+adata <- read_h5ad(par$input)
+
+cat("Parse formula\n")
+formula <- as.formula(par$design_formula)
+
+cat("Create DESeq dataset\n")
+# transform counts matrix
+count_data <- t(as.matrix(adata$X))
+storage.mode(count_data) <- "integer"
+
+# create dataset
+dds <- DESeq2::DESeqDataSetFromMatrix(
+  countData = count_data,
+  colData = adata$obs,
+  design = formula
+)
+
+cat("Run DESeq2\n")
+dds <- DESeq2::DESeq(dds)
+
+res <- DESeq2::results(dds, contrast = par$contrast) |>
+  as.data.frame()
+
+cat("Write to disk\n")
+contrast_names <- gsub(" ", "_", par$contrast)
+contrast_names <- gsub("[^[:alnum:]]", "_", contrast_names)
+contrast_names <- gsub("__", "_", contrast_names)
+contrast_names <- tolower(contrast_names)
+
+varm_name <- paste0("de_", paste(contrast_names, collapse = "_"))
+adata$varm[[varm_name]] <- res
+
+# Save adata
+zzz <- adata$write_h5ad(par$output, compression = "gzip")
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/differential_expression/test.R b/book/workflow_frameworks/examples/viash_nextflow/src/differential_expression/test.R
new file mode 100644
index 0000000..cfd135c
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/differential_expression/test.R
@@ -0,0 +1,61 @@
+library(anndata)
+
+cat("Create input data\n")
+X <- matrix(runif(100, 10, 100), nrow = 10, ncol = 10)
+for (i in 1:10) {
+  X[1:5, i] <- X[1:5, i] + i * 10
+}
+adata_in <- AnnData(
+  X = X,
+  obs = data.frame(
+    row.names = paste0("cell", 1:10),
+    sm_name = rep(c("Belinostat", "Dimethyl Sulfoxide"), each = 5),
+    plate_name = rep(c("plate1", "plate2"), times = 5)
+  ),
+  var = data.frame(
+    row.names = paste0("gene", 1:10)
+  )
+)
+
+cat("Write input data to file\n")
+input_path <- "input.h5ad"
+output_path <- "output.h5ad"
+zzz <- adata_in$write_h5ad(input_path)
+
+cat("Run component\n")
+zzz <- processx::run(
+  command = meta$executable,
+  args = c(
+    "--input", input_path,
+    "--contrast", "sm_name;Dimethyl Sulfoxide;Belinostat",
+    "--design_formula", "~ sm_name + plate_name",
+    "--output", output_path
+  ),
+  error_on_status = TRUE,
+  echo = TRUE
+)
+
+cat("Read output data\n")
+adata_out <- read_h5ad(output_path)
+
+cat("Preview output data\n")
+print(adata_out)
+
+cat("Check DE results:\n")
+de_out <- adata_out$varm$de_sm_name_dimethyl_sulfoxide_belinostat
+if (is.null(de_out)) {
+  stop("No DE results found")
+}
+
+print(de_out)
+
+expected_colnames <- c("baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")
+if (!all(colnames(de_out) == expected_colnames)) {
+  stop(paste0(
+    "Column names do not match.\n",
+    "Expected: ", paste(expected_colnames, collapse = ", "), "\n",
+    "Actual: ", paste(colnames(de_out), collapse = ", ")
+  ))
+}
+
+cat("Done\n")
\ No newline at end of file
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/load_data/config.vsh.yaml b/book/workflow_frameworks/examples/viash_nextflow/src/load_data/config.vsh.yaml
new file mode 100644
index 0000000..683168a
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/load_data/config.vsh.yaml
@@ -0,0 +1,31 @@
+name: load_data
+description: Load data from an S3 bucket
+
+arguments:
+  - type: string
+    name: --url
+    description: URL to the data
+    example: s3://my-bucket/my-data.csv
+    required: true
+  - type: file
+    name: --output
+    description: Path to the output file
+    example: /path/to/output.csv
+    required: true
+    direction: output
+
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+
+engines:
+  - type: docker
+    image: amazon/aws-cli
+
+runners:
+  - type: executable
+  - type: nextflow
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/load_data/script.sh b/book/workflow_frameworks/examples/viash_nextflow/src/load_data/script.sh
new file mode 100644
index 0000000..c0aca9d
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/load_data/script.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+aws s3 cp \
+  --no-sign-request \
+  "$par_url" \
+  "$par_output"
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/load_data/test.sh b/book/workflow_frameworks/examples/viash_nextflow/src/load_data/test.sh
new file mode 100644
index 0000000..5c0e486
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/load_data/test.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Run the executable
+"$meta_executable" \
+  --url s3://openproblems-bio/public/neurips-2023-competition/moa_annotations.csv \
+  --output moa_annotations.csv
+
+# Check if the output file exists
+if [[ ! -f moa_annotations.csv ]]; then
+  echo "File not found!"
+  exit 1
+fi
+
+# Check if the output file has the correct MD5 sum
+if [[ "$(md5sum moa_annotations.csv | cut -d ' ' -f 1)" != "80ebe44ce6b8d73f31dbc653787089f9" ]]; then
+  echo "MD5 sum does not match!"
+  exit 1
+fi
+
+echo "All tests passed!"
\ No newline at end of file
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/subset_obs/config.vsh.yaml b/book/workflow_frameworks/examples/viash_nextflow/src/subset_obs/config.vsh.yaml
new file mode 100644
index 0000000..43ee865
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/subset_obs/config.vsh.yaml
@@ -0,0 +1,58 @@
+name: subset_obs
+description: Subset the observations of an AnnData object
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - type: file
+        name: --input
+        description: Path to the input h5ad file
+        example: /path/to/input.h5ad
+        required: true
+  - name: Subsetting arguments
+    arguments:
+      - type: string
+        name: --obs_column
+        description: Name of the column to subset on
+        example: cell_type
+        required: true
+      - type: string
+        name: --obs_values
+        description: List of values to subset on. If column is a boolean, do not pass any values to this argument.
+        multiple: true
+        example: ["B cell", "T cell"]
+      - type: boolean_true
+        name: --invert
+        description: Invert the subset
+  - name: Outputs
+    arguments:
+      - type: file
+        name: --output
+        description: Path to the output h5ad file
+        example: /path/to/output.h5ad
+        required: true
+        direction: output
+
+resources:
+  - type: python_script
+    path: script.py
+
+test_resources:
+  - type: python_script
+    path: test.py
+
+engines:
+  - type: docker
+    image: python:3.10
+    setup:
+      - type: python
+        pypi:
+          - anndata
+    test_setup:
+      - type: python
+        pypi:
+          - viashpy
+
+runners:
+  - type: executable
+  - type: nextflow
\ No newline at end of file
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/subset_obs/script.py b/book/workflow_frameworks/examples/viash_nextflow/src/subset_obs/script.py
new file mode 100644
index 0000000..3dcdcd4
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/subset_obs/script.py
@@ -0,0 +1,31 @@
+import anndata as ad
+
+## VIASH START
+par = {"input": "", "obs_column": "", "obs_values": [], "invert": False, "output": ""}
+## VIASH END
+
+print("Load data", flush=True)
+adata = ad.read_h5ad(par["input"])
+
+print(f"Format of input data: {adata}", flush=True)
+
+print("Subset data", flush=True)
+filt = adata.obs[par["obs_column"]]
+
+# if filt is a list of booleans
+assert (filt.dtype == bool) == (not par["obs_values"]), \
+  f"If column '{par['obs_column']}' is boolean, 'obs_values' must be empty, and vice versa."
+
+if filt.dtype != bool:
+  # if filt is a list of strings
+  filt = filt.isin(par["obs_values"])
+
+if par["invert"]:
+  filt = ~filt
+
+adata = adata[filt].copy()
+
+print(f"Format of output data: {adata}", flush=True)
+
+print("Store to disk", flush=True)
+adata.write_h5ad(par["output"], compression="gzip")
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/subset_obs/test.py b/book/workflow_frameworks/examples/viash_nextflow/src/subset_obs/test.py
new file mode 100644
index 0000000..5892fa1
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/subset_obs/test.py
@@ -0,0 +1,37 @@
+import sys
+import anndata as ad
+import pytest
+import numpy as np
+
+def test_subset_var(run_component, tmp_path):
+  input_path = tmp_path / "input.h5ad"
+  output_path = tmp_path / "output.h5ad"
+
+  # create data
+  adata_in = ad.AnnData(
+    X=np.random.rand(4, 2),
+    obs={"cell_type": ["A", "B", "C", "D"]},
+    var={"highly_variable": [True, False]},
+  )
+
+  adata_in.write_h5ad(input_path)
+
+  # run component
+  run_component([
+    "--input", str(input_path),
+    "--obs_column", "cell_type",
+    "--obs_values", "A;B",
+    "--output", str(output_path),
+  ])
+
+  # load output
+  adata_out = ad.read_h5ad(output_path)
+
+  # check output
+  assert adata_out.X.shape == (2, 2)
+  assert adata_out.obs["cell_type"].tolist() == ["A", "B"]
+  assert adata_out.var["highly_variable"].tolist() == [True, False]
+
+
+if __name__ == "__main__":
+  sys.exit(pytest.main([__file__]))
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/subset_var/config.vsh.yaml b/book/workflow_frameworks/examples/viash_nextflow/src/subset_var/config.vsh.yaml
new file mode 100644
index 0000000..1c8cdc9
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/subset_var/config.vsh.yaml
@@ -0,0 +1,58 @@
+name: subset_var
+description: Subset the variables of an AnnData object
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - type: file
+        name: --input
+        description: Path to the input h5ad file
+        example: /path/to/input.h5ad
+        required: true
+  - name: Subsetting arguments
+    arguments:
+      - type: string
+        name: --var_column
+        description: Name of the column to subset on
+        example: highly_variable
+        required: true
+      - type: string
+        name: --var_values
+        description: List of values to subset on. If column is a boolean, do not pass any values to this argument.
+        multiple: true
+        example: []
+      - type: boolean_true
+        name: --invert
+        description: Invert the subset
+  - name: Outputs
+    arguments:
+      - type: file
+        name: --output
+        description: Path to the output h5ad file
+        example: /path/to/output.h5ad
+        required: true
+        direction: output
+
+resources:
+  - type: python_script
+    path: script.py
+
+test_resources:
+  - type: python_script
+    path: test.py
+
+engines:
+  - type: docker
+    image: python:3.10
+    setup:
+      - type: python
+        pypi:
+          - anndata
+    test_setup:
+      - type: python
+        pypi:
+          - viashpy
+
+runners:
+  - type: executable
+  - type: nextflow
\ No newline at end of file
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/subset_var/script.py b/book/workflow_frameworks/examples/viash_nextflow/src/subset_var/script.py
new file mode 100644
index 0000000..8086273
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/subset_var/script.py
@@ -0,0 +1,31 @@
+import anndata as ad
+
+## VIASH START
+par = {"input": "", "var_column": "", "var_values": [], "invert": False, "output": ""}
+## VIASH END
+
+print("Load data", flush=True)
+adata = ad.read_h5ad(par["input"])
+
+print(f"Format of input data: {adata}", flush=True)
+
+print("Subset data", flush=True)
+filt = adata.var[par["var_column"]]
+
+# if filt is a list of booleans
+assert (filt.dtype == bool) == (not par["var_values"]), \
+  f"If column '{par['var_column']}' is boolean, 'var_values' must be empty, and vice versa."
+
+if filt.dtype != bool:
+  # if filt is a list of strings
+  filt = filt.isin(par["var_values"])
+
+if par["invert"]:
+  filt = ~filt
+
+adata = adata[:, filt].copy()
+
+print(f"Format of output data: {adata}", flush=True)
+
+print("Store to disk", flush=True)
+adata.write_h5ad(par["output"], compression="gzip")
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/subset_var/test.py b/book/workflow_frameworks/examples/viash_nextflow/src/subset_var/test.py
new file mode 100644
index 0000000..0c4db92
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/subset_var/test.py
@@ -0,0 +1,36 @@
+import sys
+import anndata as ad
+import pytest
+import numpy as np
+
+def test_subset_var(run_component, tmp_path):
+  input_path = tmp_path / "input.h5ad"
+  output_path = tmp_path / "output.h5ad"
+
+  # create data
+  adata_in = ad.AnnData(
+    X=np.random.rand(4, 2),
+    obs={"cell_type": ["A", "B", "C", "D"]},
+    var={"highly_variable": [True, False]},
+  )
+
+  adata_in.write_h5ad(input_path)
+
+  # run component
+  run_component([
+    "--input", str(input_path),
+    "--var_column", "highly_variable",
+    "--output", str(output_path),
+  ])
+
+  # load output
+  adata_out = ad.read_h5ad(output_path)
+
+  # check output
+  assert adata_out.X.shape == (4, 1)
+  assert adata_out.obs["cell_type"].tolist() == ["A", "B", "C", "D"]
+  assert adata_out.var["highly_variable"].tolist() == [True]
+
+
+if __name__ == "__main__":
+  sys.exit(pytest.main([__file__]))
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/workflow/config.vsh.yaml b/book/workflow_frameworks/examples/viash_nextflow/src/workflow/config.vsh.yaml
new file mode 100644
index 0000000..e589477
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/workflow/config.vsh.yaml
@@ -0,0 +1,51 @@
+name: workflow
+description: |
+  A workflow to compute differential expression between two groups of cells in an AnnData object.
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - type: file
+        name: --input
+        description: Path to the input h5ad file
+        example: s3://my-bucket/my-data.h5ad
+        required: true
+  - name: Differential expression arguments
+    arguments:
+      - type: string
+        name: --contrast
+        description: |
+          Contrast to compute. Must be of length 3:
+
+          1. The name of the column to contrast on
+          2. The name of the first observation type
+          3. The name of the second observation type
+        example: ["cell_type", "batch", "sample"]
+        multiple: true
+        required: true
+      - type: string
+        name: --design_formula
+        description: Design formula for the differential expression model
+        example: ~ batch + cell_type
+  - name: Outputs
+    arguments:
+      - type: file
+        name: --output
+        description: Path to the output h5ad file
+        example: /path/to/output.h5ad
+        required: true
+        direction: output
+
+resources:
+  - type: nextflow_script
+    path: main.nf
+    entrypoint: wf
+
+dependencies:
+  - name: subset_obs
+  - name: subset_var
+  - name: compute_pseudobulk
+  - name: differential_expression
+
+runners:
+  - type: nextflow
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/workflow/main.nf b/book/workflow_frameworks/examples/viash_nextflow/src/workflow/main.nf
new file mode 100644
index 0000000..584d78e
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/workflow/main.nf
@@ -0,0 +1,58 @@
+workflow wf {
+  take:
+  ch_in
+
+  main:
+  ch_out = ch_in
+
+    | subset_obs.run(
+      key: "subset_sm_name",
+      fromState: ["input": "input"],
+      args: [
+        "obs_column": "sm_name",
+        "obs_values": ["Belinostat", "Dimethyl Sulfoxide"]
+      ],
+      toState: ["data": "output"]
+    )
+
+    | subset_obs.run(
+      key: "subset_cell_type",
+      fromState: ["input": "data"],
+      args: [
+        "obs_column": "cell_type",
+        "obs_values": ["T cells"]
+      ],
+      toState: ["data": "output"]
+    )
+
+    | subset_var.run(
+      fromState: ["input": "data"],
+      args: [
+        "var_column": "highly_variable",
+      ],
+      toState: ["data": "output"]
+    )
+
+    | compute_pseudobulk.run(
+      fromState: ["input": "data"],
+      args: [
+        "obs_column_index": "plate_well_celltype_reannotated",
+        "obs_column_values": ["sm_name", "cell_type", "plate_name", "well"],
+      ],
+      toState: ["data": "output"]
+    )
+
+    | differential_expression.run(
+      fromState: [
+        "input": "data",
+        "contrast": "contrast",
+        "design_formula": "design_formula"
+      ],
+      toState: ["data": "output"]
+    )
+
+    | setState(["output": "data"])
+    
+  emit:
+  ch_out
+}
\ No newline at end of file
diff --git a/book/workflow_frameworks/examples/viash_nextflow/src/workflow/run.sh b/book/workflow_frameworks/examples/viash_nextflow/src/workflow/run.sh
new file mode 100755
index 0000000..6e624ef
--- /dev/null
+++ b/book/workflow_frameworks/examples/viash_nextflow/src/workflow/run.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+nextflow run \
+  target/nextflow/workflow/main.nf \
+  -with-docker \
+  --id dataset \
+  --input s3://openproblems-bio/public/neurips-2023-competition/sc_counts_reannotated_with_counts.h5ad \
+  --contrast 'sm_name;Belinostat;Dimethyl Sulfoxide' \
+  --design_formula '~ sm_name + plate_name' \
+  --publish_dir output
\ No newline at end of file
diff --git a/renv.lock b/renv.lock
index 95fa381..6d1ba6b 100644
--- a/renv.lock
+++ b/renv.lock
@@ -353,6 +353,22 @@
       ],
       "Hash": "1716e201f81ced0f456dd5ec85fe20f8"
     },
+    "SingleCellExperiment": {
+      "Package": "SingleCellExperiment",
+      "Version": "1.24.0",
+      "Source": "Bioconductor",
+      "Requirements": [
+        "BiocGenerics",
+        "DelayedArray",
+        "GenomicRanges",
+        "S4Vectors",
+        "SummarizedExperiment",
+        "methods",
+        "stats",
+        "utils"
+      ],
+      "Hash": "e21b3571ce76eb4dfe6bf7900960aa6a"
+    },
     "SparseArray": {
       "Package": "SparseArray",
       "Version": "1.2.4",