From a91bba75df83ae4b2c820e1c034639d32151f2a6 Mon Sep 17 00:00:00 2001 From: LouiseDck Date: Thu, 12 Sep 2024 08:47:29 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20saeyslab?= =?UTF-8?q?/polygloty@2b52247422c36a01dcb1550431523c66c19039f2=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- assets/slides.html | 128 +++++++++++++++++++++++++++------ book/in_memory/reticulate.html | 2 +- book/in_memory/rpy2.html | 25 ++++--- search.json | 4 +- sitemap.xml | 36 +++++----- 5 files changed, 139 insertions(+), 56 deletions(-) diff --git a/assets/slides.html b/assets/slides.html index d9f024d..5fbbdb3 100644 --- a/assets/slides.html +++ b/assets/slides.html @@ -1262,7 +1262,6 @@

Polyglot programming for single-cell analysis

2024-09-12

-

Introduction

    @@ -1271,14 +1270,16 @@

    Introduction

We will be focusing on R & Python

-
-

Summary

+ +
+

Summary

Interoperability between languages allows analysts to take advantage of the strengths of different ecosystems

On-disk interoperability uses standard file formats to transfer data and is typically more reliable

In-memory interoperability transfers data directly between parallel sessions and is convenient for interactive analysis

While interoperability is currently possible developers continue to improve the experience

Single-cell best practices: Interoperability

-
+
+

How do you interact with a package in another language?

    @@ -1410,7 +1411,7 @@

    Rpy2: basics

  1. rpy2.robjects, the high-level interface
  2. -
    +
    import rpy2
     import rpy2.robjects as robjects
     
    @@ -1437,7 +1438,7 @@ 

    Rpy2: basics

    Rpy2: basics

    -
    +
    str_vector = robjects.StrVector(['abc', 'def', 'ghi'])
     flt_vector = robjects.FloatVector([0.3, 0.8, 0.7])
     int_vector = robjects.IntVector([1, 2, 3])
    @@ -1457,7 +1458,7 @@ 

    Rpy2: basics

    Rpy2: numpy

    -
    +
    import numpy as np
     
     from rpy2.robjects import numpy2ri
    @@ -1469,18 +1470,18 @@ 

    Rpy2: numpy

    mtx = robjects.r.matrix(rd_m, nrow = 5) print(mtx)
    -
    [[0.69525594 0.29780005 0.41267065 0.25871805]
    - [0.88313251 0.79471121 0.5369112  0.24752835]
    - [0.68812232 0.24265455 0.51419239 0.80029227]
    - [0.43218943 0.37441082 0.05505875 0.23599726]
    - [0.58236939 0.34859652 0.14651556 0.24370712]]
    +
    [[0.73294749 0.55953375 0.69944132 0.52744075]
    + [0.09756794 0.39535684 0.80669803 0.10540606]
    + [0.35662206 0.70148737 0.12002733 0.28026677]
    + [0.19947608 0.84421019 0.82702188 0.82531633]
    + [0.56938249 0.04640811 0.34178679 0.3285883 ]]

    Rpy2: pandas

    -
    +
    import pandas as pd
     
     from rpy2.robjects import pandas2ri
    @@ -1503,7 +1504,7 @@ 

    Rpy2: pandas

    Rpy2: sparse matrices

    -
    +
    import scipy as sp
     
     from anndata2ri import scipy2ri
    @@ -1515,12 +1516,12 @@ 

    Rpy2: sparse matrices

    print(sp_r)
    5 x 4 sparse Matrix of class "dgCMatrix"
    -                                             
    -[1,] 0.6952559 0.2978000 0.41267065 0.2587180
    -[2,] 0.8831325 0.7947112 0.53691120 0.2475283
    -[3,] 0.6881223 0.2426546 0.51419239 0.8002923
    -[4,] 0.4321894 0.3744108 0.05505875 0.2359973
    -[5,] 0.5823694 0.3485965 0.14651556 0.2437071
    +                                              
    +[1,] 0.73294749 0.55953375 0.6994413 0.5274408
    +[2,] 0.09756794 0.39535684 0.8066980 0.1054061
    +[3,] 0.35662206 0.70148737 0.1200273 0.2802668
    +[4,] 0.19947608 0.84421019 0.8270219 0.8253163
    +[5,] 0.56938249 0.04640811 0.3417868 0.3285883
     
    @@ -1641,10 +1642,33 @@

    Reticulate scanpy

    # obsp: 'connectivities', 'distances'
    -

    Disk-based interoperability

    +

    Disk-based interoperability is a strategy for achieving interoperability between tools written in different programming languages by storing intermediate results in standardized, language-agnostic file formats.

    +
      +
    • Upside: +
        +
      • Simple, just add reading and witing lines
      • +
      • Modular scripts
      • +
    • +
    • Downside: +
        +
      • increased disk usage
      • +
      • less direct interaction, debugging…
      • +
    • +
    +
    +
    +
    +

    Important features of interoperable file formats

    +
      +
    • Compression
    • +
    • Sparse matrix support
    • +
    • Large images
    • +
    • Lazy chunk loading
    • +
    • Remote storage
    • +

    General single cell file formats of interest for Python and R

    @@ -1871,9 +1895,69 @@

    Specialized single cell file formats of interest for Python and R

    +
    +
    +

    Disk-based pipelines

    +

    Script pipeline:

    +
    #!/bin/bash
    +
    +bash scripts/1_load_data.sh
    +python scripts/2_compute_pseudobulk.py
    +Rscript scripts/3_analysis_de.R
    +

    Notebook pipeline:

    +
    # Every step can be a new notebook execution with inspectable output
    +jupyter nbconvert --to notebook --execute my_notebook.ipynb --allow-errors --output-dir outputs/
    +
    +
    +

    Just stay in your language and call scripts

    +
    import subprocess
    +
    +subprocess.run("bash scripts/1_load_data.sh", shell=True)
    +# Alternatively you can run Python code here instead of calling a Python script
    +subprocess.run("python scripts/2_compute_pseudobulk.py", shell=True)
    +subprocess.run("Rscript scripts/3_analysis_de.R", shell=True)
    +
    +
    +
    +

    Pipelines with different environments

    +
      +
    1. interleave with environment (de)activation functions
    2. +
    3. use rvenv
    4. +
    5. use Pixi
    6. +
    +
    +
    +

    Pixi to manage different environments

    +
    pixi run -e bash scripts/1_load_data.sh
    +pixi run -e scverse scripts/2_compute_pseudobulk.py
    +pixi run -e rverse scripts/3_analysis_de.R
    +
    +
    +

    Define tasks in Pixi

    +
    ...
    +[feature.bash.tasks]
    +load_data = "bash book/disk_based/scripts/1_load_data.sh"
    +...
    +[feature.scverse.tasks]
    +compute_pseudobulk = "python book/disk_based/scripts/2_compute_pseudobulk.py"
    +...
    +[feature.rverse.tasks]
    +analysis_de = "Rscript --no-init-file book/disk_based/scripts/3_analysis_de.R"
    +...
    +[tasks]
    +pipeline = { depends-on = ["load_data", "compute_pseudobulk", "analysis_de"] }
    +
    pixi run pipeline
    +
    +
    +

    Also possible to use containers

    +
    docker pull berombau/polygloty-docker:latest
    +docker run -it -v $(pwd)/usecase:/app/usecase -v $(pwd)/book:/app/book berombau/polygloty-docker:latest pixi run pipeline
    +

    Another approach is to use multi-package containers to create custom combinations of packages. - Multi-Package BioContainers - Seqera Containers

    +

    Workflows

    - +

    You can go a long way with a folder of notebooks or scripts and the right tools. But as your project grows more bespoke, it can be worth the effort to use a workflow framework like Viash, Nextflow or Snakemake to manage the pipeline for you.

    +

    See https://saeyslab.github.io/polygloty/book/workflow_frameworks/

    diff --git a/book/in_memory/reticulate.html b/book/in_memory/reticulate.html index 508f61e..3f3fee3 100644 --- a/book/in_memory/reticulate.html +++ b/book/in_memory/reticulate.html @@ -322,7 +322,7 @@

    6 Reticulate: bas

    rd$choice(example)
    -
    [1] 3
    +
    [1] 2
    bi$list(bi$reversed(example))
    diff --git a/book/in_memory/rpy2.html b/book/in_memory/rpy2.html index 2c5dc7a..36c1428 100644 --- a/book/in_memory/rpy2.html +++ b/book/in_memory/rpy2.html @@ -387,19 +387,18 @@

    
       0%|          | 0.00/9.82M [00:00<?, ?B/s]
    -  0%|          | 8.00k/9.82M [00:00<03:13, 53.3kB/s]
    -  0%|          | 32.0k/9.82M [00:00<01:30, 114kB/s] 
    -  1%|          | 96.0k/9.82M [00:00<00:40, 250kB/s]
    -  2%|1         | 200k/9.82M [00:00<00:24, 416kB/s] 
    -  4%|4         | 408k/9.82M [00:00<00:13, 749kB/s]
    -  8%|8         | 840k/9.82M [00:00<00:06, 1.44MB/s]
    - 17%|#6        | 1.65M/9.82M [00:01<00:03, 2.75MB/s]
    - 30%|##9       | 2.91M/9.82M [00:01<00:01, 4.35MB/s]
    - 46%|####6     | 4.55M/9.82M [00:01<00:00, 6.55MB/s]
    - 67%|######6   | 6.55M/9.82M [00:01<00:00, 8.60MB/s]
    - 88%|########7 | 8.64M/9.82M [00:01<00:00, 9.21MB/s]
    - 93%|#########2| 9.09M/9.82M [00:01<00:00, 8.04MB/s]
    -100%|##########| 9.82M/9.82M [00:01<00:00, 5.35MB/s]
    + 0%| | 8.00k/9.82M [00:00<02:11, 78.3kB/s] + 0%| | 32.0k/9.82M [00:00<01:02, 165kB/s] + 1%| | 96.0k/9.82M [00:00<00:27, 365kB/s] + 2%|1 | 200k/9.82M [00:00<00:16, 606kB/s] + 4%|4 | 408k/9.82M [00:00<00:09, 1.09MB/s] + 8%|8 | 840k/9.82M [00:00<00:04, 2.09MB/s] + 17%|#6 | 1.65M/9.82M [00:00<00:02, 4.00MB/s] + 34%|###3 | 3.33M/9.82M [00:00<00:00, 7.85MB/s] + 46%|####6 | 4.55M/9.82M [00:00<00:00, 8.71MB/s] + 73%|#######2 | 7.16M/9.82M [00:01<00:00, 13.5MB/s] + 93%|#########2| 9.11M/9.82M [00:01<00:00, 15.1MB/s] +100%|##########| 9.82M/9.82M [00:01<00:00, 8.41MB/s]

    
     with anndata2ri.converter.context():
    diff --git a/search.json b/search.json
    index 0e9c651..6cd6eb8 100644
    --- a/search.json
    +++ b/search.json
    @@ -159,7 +159,7 @@
         "href": "book/in_memory/rpy2.html",
         "title": "4  Rpy2",
         "section": "",
    -    "text": "4.1 Rpy2: basic functionality\nRpy2 is a foreign function interface to R. It can be used in the following way:\nimport rpy2\nimport rpy2.robjects as robjects\n\n/home/runner/work/polygloty/polygloty/renv/python/virtualenvs/renv-python-3.12/lib/python3.12/site-packages/rpy2/rinterface_lib/embedded.py:276: UserWarning: R was initialized outside of rpy2 (R_NilValue != NULL). Trying to use it nevertheless.\n  warnings.warn(msg)\nR was initialized outside of rpy2 (R_NilValue != NULL). Trying to use it nevertheless.\n\nvector = robjects.IntVector([1,2,3])\nrsum = robjects.r['sum']\n\nrsum(vector)\n\n\n        IntVector with 1 elements.\n        \n\n\n\n6\nLuckily, we’re not restricted to just calling R functions and creating R objects. The real power of this in-memory interoperability lies in the conversion of Python objects to R objects to call R functions on, and then to the conversion of the results back to Python objects.\nRpy2 requires specific conversion rules for different Python objects. It is straightforward to create R vectors from corresponding Python lists:\nstr_vector = robjects.StrVector(['abc', 'def', 'ghi'])\nflt_vector = robjects.FloatVector([0.3, 0.8, 0.7])\nint_vector = robjects.IntVector([1, 2, 3])\nmtx = robjects.r.matrix(robjects.IntVector(range(10)), nrow=5)\nHowever, for single cell biology, the objects that are most interesting to convert are (count) matrices, arrays and dataframes. In order to do this, you need to import the corresponding rpy2 modules and specify the conversion context.\nimport numpy as np\n\nfrom rpy2.robjects import numpy2ri\nfrom rpy2.robjects import default_converter\n\nrd_m = np.random.random((10, 7))\n\nwith (default_converter + numpy2ri.converter).context():\n    mtx2 = robjects.r.matrix(rd_m, nrow = 10)\nimport pandas as pd\n\nfrom rpy2.robjects import pandas2ri\n\npd_df = pd.DataFrame({'int_values': [1,2,3],\n                      'str_values': ['abc', 'def', 'ghi']})\n\nwith (default_converter + pandas2ri.converter).context():\n    pd_df_r = robjects.DataFrame(pd_df)\nOne big limitation of rpy2 is the inability to convert sparse matrices: there is no built-in conversion module for scipy. The anndata2ri package provides, apart from functionality to convert SingleCellExperiment objects to an anndata objects, functions to convert sparse matrices.\nimport scipy as sp\n\nfrom anndata2ri import scipy2ri\n\nsparse_matrix = sp.sparse.csc_matrix(rd_m)\n\nwith (default_converter + scipy2ri.converter).context():\n    sp_r = scipy2ri.py2rpy(sparse_matrix)\nWe will showcase how to use anndata2ri to convert an anndata object to a SingleCellExperiment object and vice versa as well:\nimport anndata as ad\nimport scanpy.datasets as scd\n\nimport anndata2ri\n\nadata_paul = scd.paul15()\n\n\n  0%|          | 0.00/9.82M [00:00<?, ?B/s]\n  0%|          | 8.00k/9.82M [00:00<03:13, 53.3kB/s]\n  0%|          | 32.0k/9.82M [00:00<01:30, 114kB/s] \n  1%|          | 96.0k/9.82M [00:00<00:40, 250kB/s]\n  2%|1         | 200k/9.82M [00:00<00:24, 416kB/s] \n  4%|4         | 408k/9.82M [00:00<00:13, 749kB/s]\n  8%|8         | 840k/9.82M [00:00<00:06, 1.44MB/s]\n 17%|#6        | 1.65M/9.82M [00:01<00:03, 2.75MB/s]\n 30%|##9       | 2.91M/9.82M [00:01<00:01, 4.35MB/s]\n 46%|####6     | 4.55M/9.82M [00:01<00:00, 6.55MB/s]\n 67%|######6   | 6.55M/9.82M [00:01<00:00, 8.60MB/s]\n 88%|########7 | 8.64M/9.82M [00:01<00:00, 9.21MB/s]\n 93%|#########2| 9.09M/9.82M [00:01<00:00, 8.04MB/s]\n100%|##########| 9.82M/9.82M [00:01<00:00, 5.35MB/s]\n\n\nwith anndata2ri.converter.context():\n    sce = anndata2ri.py2rpy(adata_paul)\n    ad2 = anndata2ri.rpy2py(sce)",
    +    "text": "4.1 Rpy2: basic functionality\nRpy2 is a foreign function interface to R. It can be used in the following way:\nimport rpy2\nimport rpy2.robjects as robjects\n\n/home/runner/work/polygloty/polygloty/renv/python/virtualenvs/renv-python-3.12/lib/python3.12/site-packages/rpy2/rinterface_lib/embedded.py:276: UserWarning: R was initialized outside of rpy2 (R_NilValue != NULL). Trying to use it nevertheless.\n  warnings.warn(msg)\nR was initialized outside of rpy2 (R_NilValue != NULL). Trying to use it nevertheless.\n\nvector = robjects.IntVector([1,2,3])\nrsum = robjects.r['sum']\n\nrsum(vector)\n\n\n        IntVector with 1 elements.\n        \n\n\n\n6\nLuckily, we’re not restricted to just calling R functions and creating R objects. The real power of this in-memory interoperability lies in the conversion of Python objects to R objects to call R functions on, and then to the conversion of the results back to Python objects.\nRpy2 requires specific conversion rules for different Python objects. It is straightforward to create R vectors from corresponding Python lists:\nstr_vector = robjects.StrVector(['abc', 'def', 'ghi'])\nflt_vector = robjects.FloatVector([0.3, 0.8, 0.7])\nint_vector = robjects.IntVector([1, 2, 3])\nmtx = robjects.r.matrix(robjects.IntVector(range(10)), nrow=5)\nHowever, for single cell biology, the objects that are most interesting to convert are (count) matrices, arrays and dataframes. In order to do this, you need to import the corresponding rpy2 modules and specify the conversion context.\nimport numpy as np\n\nfrom rpy2.robjects import numpy2ri\nfrom rpy2.robjects import default_converter\n\nrd_m = np.random.random((10, 7))\n\nwith (default_converter + numpy2ri.converter).context():\n    mtx2 = robjects.r.matrix(rd_m, nrow = 10)\nimport pandas as pd\n\nfrom rpy2.robjects import pandas2ri\n\npd_df = pd.DataFrame({'int_values': [1,2,3],\n                      'str_values': ['abc', 'def', 'ghi']})\n\nwith (default_converter + pandas2ri.converter).context():\n    pd_df_r = robjects.DataFrame(pd_df)\nOne big limitation of rpy2 is the inability to convert sparse matrices: there is no built-in conversion module for scipy. The anndata2ri package provides, apart from functionality to convert SingleCellExperiment objects to an anndata objects, functions to convert sparse matrices.\nimport scipy as sp\n\nfrom anndata2ri import scipy2ri\n\nsparse_matrix = sp.sparse.csc_matrix(rd_m)\n\nwith (default_converter + scipy2ri.converter).context():\n    sp_r = scipy2ri.py2rpy(sparse_matrix)\nWe will showcase how to use anndata2ri to convert an anndata object to a SingleCellExperiment object and vice versa as well:\nimport anndata as ad\nimport scanpy.datasets as scd\n\nimport anndata2ri\n\nadata_paul = scd.paul15()\n\n\n  0%|          | 0.00/9.82M [00:00<?, ?B/s]\n  0%|          | 8.00k/9.82M [00:00<02:11, 78.3kB/s]\n  0%|          | 32.0k/9.82M [00:00<01:02, 165kB/s] \n  1%|          | 96.0k/9.82M [00:00<00:27, 365kB/s]\n  2%|1         | 200k/9.82M [00:00<00:16, 606kB/s] \n  4%|4         | 408k/9.82M [00:00<00:09, 1.09MB/s]\n  8%|8         | 840k/9.82M [00:00<00:04, 2.09MB/s]\n 17%|#6        | 1.65M/9.82M [00:00<00:02, 4.00MB/s]\n 34%|###3      | 3.33M/9.82M [00:00<00:00, 7.85MB/s]\n 46%|####6     | 4.55M/9.82M [00:00<00:00, 8.71MB/s]\n 73%|#######2  | 7.16M/9.82M [00:01<00:00, 13.5MB/s]\n 93%|#########2| 9.11M/9.82M [00:01<00:00, 15.1MB/s]\n100%|##########| 9.82M/9.82M [00:01<00:00, 8.41MB/s]\n\n\nwith anndata2ri.converter.context():\n    sce = anndata2ri.py2rpy(adata_paul)\n    ad2 = anndata2ri.rpy2py(sce)",
         "crumbs": [
           "In-memory interoperability",
           "4  Rpy2"
    @@ -192,7 +192,7 @@
         "href": "book/in_memory/reticulate.html",
         "title": "5  Reticulate",
         "section": "",
    -    "text": "Reticulate is a foreign function interface in R to Python.\n\n6 Reticulate: basic functionality\nData types are automatically converted from Python to R and vice versa. A useful table of automatic conversions can be found here.\nYou can easily import python modules, and call the functions in the following way:\n\nlibrary(reticulate)\n\nbi <- reticulate::import_builtins()\nrd <- reticulate::import(\"random\")\n\nexample <- c(1,2,3)\nbi$max(example)\n\n[1] 3\n\nrd$choice(example)\n\n[1] 3\n\nbi$list(bi$reversed(example))\n\n[1] 3 2 1\n\n\nNumpy is also easily used:\n\nnp <- reticulate::import(\"numpy\")\n\na <- np$asarray(tuple(list(1,2), list(3, 4)))\nb <- np$asarray(list(5,6))\nb <- np$reshape(b, newshape = tuple(1L,2L))\n\nnp$concatenate(tuple(a, b), axis=0L)\n\n     [,1] [,2]\n[1,]    1    2\n[2,]    3    4\n[3,]    5    6\n\n\nIf you want more finegrained control over conversion, you can specify in the import statement that you do not want results of functions of that package to be converted to R data types.\n\nnp <- reticulate::import(\"numpy\", convert = FALSE)\n\na <- np$asarray(tuple(list(1,2), list(3, 4)))\nb <- np$asarray(list(5,6))\nb <- np$reshape(b, newshape = tuple(1L,2L))\n\nnp$concatenate(tuple(a, b), axis=0L)\n\narray([[1., 2.],\n       [3., 4.],\n       [5., 6.]])\n\n\nYou can explicitly convert data types:\n\nresult <- np$concatenate(tuple(a, b), axis=0L)\n\npy_to_r(result)\n\n     [,1] [,2]\n[1,]    1    2\n[2,]    3    4\n[3,]    5    6\n\nresult_r <- py_to_r(result)\nr_to_py(result_r)\n\narray([[1., 2.],\n       [3., 4.],\n       [5., 6.]])\n\n\n\n\n7 Interactivity\nYou can easily include Python chunks in Rmarkdown notebooks using the Python engine in knitr.\n\n\n8 Usecase\nWe will not showcase the usefulness of reticulate by using the DE analysis: it would involve loading in pandas to create a Python dataframe, adding rownames and columnnames and then grouping them, but that is easier to do natively in R.\nA more interesting thing you can do using reticulate is interacting with anndata-based Python packages, such as scanpy!\n\nlibrary(anndata)\nlibrary(reticulate)\nsc <- import(\"scanpy\")\n\nadata_path <- \"../usecase/data/sc_counts_subset.h5ad\"\nadata <- anndata::read_h5ad(adata_path)\n\nWe can preprocess the data:\n\nsc$pp$filter_cells(adata, min_genes = 200)\nsc$pp$filter_genes(adata, min_cells = 3)\n\n\nsc$pp$pca(adata)\nsc$pp$neighbors(adata)\nsc$tl$umap(adata)\n\nadata\n\nAnnData object with n_obs × n_vars = 32727 × 20542\n    obs: 'dose_uM', 'timepoint_hr', 'well', 'row', 'col', 'plate_name', 'cell_id', 'cell_type', 'split', 'donor_id', 'sm_name', 'control', 'SMILES', 'sm_lincs_id', 'library_id', 'leiden_res1', 'group', 'cell_type_orig', 'plate_well_celltype_reannotated', 'cell_count_by_well_celltype', 'cell_count_by_plate_well', 'n_genes'\n    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'n_cells'\n    uns: 'cell_type_colors', 'celltypist_celltype_colors', 'donor_id_colors', 'hvg', 'leiden_res1_colors', 'log1p', 'neighbors', 'over_clustering', 'rank_genes_groups', 'pca', 'umap'\n    obsm: 'HTO_clr', 'X_pca', 'X_umap', 'protein_counts'\n    varm: 'PCs'\n    obsp: 'connectivities', 'distances'\n\n\nWe can’t easily show the result of the plot in this Quarto notebook, but we can save it and show it:\n\npath <- \"umap.png\"\nsc$pl$umap(adata, color=\"leiden_res1\", save=path)\n\n\n\n\n\n\n\nFigure 8.1: UMAP plot of the adata object",
    +    "text": "Reticulate is a foreign function interface in R to Python.\n\n6 Reticulate: basic functionality\nData types are automatically converted from Python to R and vice versa. A useful table of automatic conversions can be found here.\nYou can easily import python modules, and call the functions in the following way:\n\nlibrary(reticulate)\n\nbi <- reticulate::import_builtins()\nrd <- reticulate::import(\"random\")\n\nexample <- c(1,2,3)\nbi$max(example)\n\n[1] 3\n\nrd$choice(example)\n\n[1] 2\n\nbi$list(bi$reversed(example))\n\n[1] 3 2 1\n\n\nNumpy is also easily used:\n\nnp <- reticulate::import(\"numpy\")\n\na <- np$asarray(tuple(list(1,2), list(3, 4)))\nb <- np$asarray(list(5,6))\nb <- np$reshape(b, newshape = tuple(1L,2L))\n\nnp$concatenate(tuple(a, b), axis=0L)\n\n     [,1] [,2]\n[1,]    1    2\n[2,]    3    4\n[3,]    5    6\n\n\nIf you want more finegrained control over conversion, you can specify in the import statement that you do not want results of functions of that package to be converted to R data types.\n\nnp <- reticulate::import(\"numpy\", convert = FALSE)\n\na <- np$asarray(tuple(list(1,2), list(3, 4)))\nb <- np$asarray(list(5,6))\nb <- np$reshape(b, newshape = tuple(1L,2L))\n\nnp$concatenate(tuple(a, b), axis=0L)\n\narray([[1., 2.],\n       [3., 4.],\n       [5., 6.]])\n\n\nYou can explicitly convert data types:\n\nresult <- np$concatenate(tuple(a, b), axis=0L)\n\npy_to_r(result)\n\n     [,1] [,2]\n[1,]    1    2\n[2,]    3    4\n[3,]    5    6\n\nresult_r <- py_to_r(result)\nr_to_py(result_r)\n\narray([[1., 2.],\n       [3., 4.],\n       [5., 6.]])\n\n\n\n\n7 Interactivity\nYou can easily include Python chunks in Rmarkdown notebooks using the Python engine in knitr.\n\n\n8 Usecase\nWe will not showcase the usefulness of reticulate by using the DE analysis: it would involve loading in pandas to create a Python dataframe, adding rownames and columnnames and then grouping them, but that is easier to do natively in R.\nA more interesting thing you can do using reticulate is interacting with anndata-based Python packages, such as scanpy!\n\nlibrary(anndata)\nlibrary(reticulate)\nsc <- import(\"scanpy\")\n\nadata_path <- \"../usecase/data/sc_counts_subset.h5ad\"\nadata <- anndata::read_h5ad(adata_path)\n\nWe can preprocess the data:\n\nsc$pp$filter_cells(adata, min_genes = 200)\nsc$pp$filter_genes(adata, min_cells = 3)\n\n\nsc$pp$pca(adata)\nsc$pp$neighbors(adata)\nsc$tl$umap(adata)\n\nadata\n\nAnnData object with n_obs × n_vars = 32727 × 20542\n    obs: 'dose_uM', 'timepoint_hr', 'well', 'row', 'col', 'plate_name', 'cell_id', 'cell_type', 'split', 'donor_id', 'sm_name', 'control', 'SMILES', 'sm_lincs_id', 'library_id', 'leiden_res1', 'group', 'cell_type_orig', 'plate_well_celltype_reannotated', 'cell_count_by_well_celltype', 'cell_count_by_plate_well', 'n_genes'\n    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'n_cells'\n    uns: 'cell_type_colors', 'celltypist_celltype_colors', 'donor_id_colors', 'hvg', 'leiden_res1_colors', 'log1p', 'neighbors', 'over_clustering', 'rank_genes_groups', 'pca', 'umap'\n    obsm: 'HTO_clr', 'X_pca', 'X_umap', 'protein_counts'\n    varm: 'PCs'\n    obsp: 'connectivities', 'distances'\n\n\nWe can’t easily show the result of the plot in this Quarto notebook, but we can save it and show it:\n\npath <- \"umap.png\"\nsc$pl$umap(adata, color=\"leiden_res1\", save=path)\n\n\n\n\n\n\n\nFigure 8.1: UMAP plot of the adata object",
         "crumbs": [
           "In-memory interoperability",
           "5  Reticulate"
    diff --git a/sitemap.xml b/sitemap.xml
    index 5646682..858e3fd 100644
    --- a/sitemap.xml
    +++ b/sitemap.xml
    @@ -2,74 +2,74 @@
     
       
         https://saeyslab.github.io/polygloty/index.html
    -    2024-09-12T08:33:57.968Z
    +    2024-09-12T08:40:23.433Z
       
       
         https://saeyslab.github.io/polygloty/book/introduction.html
    -    2024-09-12T08:33:57.964Z
    +    2024-09-12T08:40:23.425Z
       
       
         https://saeyslab.github.io/polygloty/book/usecase/index.html
    -    2024-09-12T08:33:57.964Z
    +    2024-09-12T08:40:23.425Z
       
       
         https://saeyslab.github.io/polygloty/book/in_memory/pitfalls.html
    -    2024-09-12T08:33:57.964Z
    +    2024-09-12T08:40:23.425Z
       
       
         https://saeyslab.github.io/polygloty/book/in_memory/rpy2.html
    -    2024-09-12T08:33:57.964Z
    +    2024-09-12T08:40:23.425Z
       
       
         https://saeyslab.github.io/polygloty/book/in_memory/reticulate.html
    -    2024-09-12T08:33:57.964Z
    +    2024-09-12T08:40:23.425Z
       
       
         https://saeyslab.github.io/polygloty/book/disk_based/file_formats.html
    -    2024-09-12T08:33:57.960Z
    +    2024-09-12T08:40:23.421Z
       
       
         https://saeyslab.github.io/polygloty/book/disk_based/disk_based_pipelines.html
    -    2024-09-12T08:33:57.960Z
    +    2024-09-12T08:40:23.421Z
       
       
         https://saeyslab.github.io/polygloty/book/workflow_frameworks/review.html
    -    2024-09-12T08:33:57.968Z
    +    2024-09-12T08:40:23.429Z
       
       
         https://saeyslab.github.io/polygloty/book/workflow_frameworks/qualities.html
    -    2024-09-12T08:33:57.968Z
    +    2024-09-12T08:40:23.429Z
       
       
         https://saeyslab.github.io/polygloty/book/workflow_frameworks/quality_assessment.html
    -    2024-09-12T08:33:57.968Z
    +    2024-09-12T08:40:23.429Z
       
       
         https://saeyslab.github.io/polygloty/book/workflow_frameworks/viash_nextflow.html
    -    2024-09-12T08:33:57.968Z
    +    2024-09-12T08:40:23.429Z
       
       
         https://saeyslab.github.io/polygloty/book/workflow_frameworks/best_practices.html
    -    2024-09-12T08:33:57.964Z
    +    2024-09-12T08:40:23.425Z
       
       
         https://saeyslab.github.io/polygloty/book/book_slides.html
    -    2024-09-12T08:33:57.960Z
    +    2024-09-12T08:40:23.421Z
       
       
         https://saeyslab.github.io/polygloty/book/references.html
    -    2024-09-12T08:33:57.964Z
    +    2024-09-12T08:40:23.425Z
       
       
         https://saeyslab.github.io/polygloty/book/in_memory/index.html
    -    2024-09-12T08:33:57.964Z
    +    2024-09-12T08:40:23.425Z
       
       
         https://saeyslab.github.io/polygloty/book/disk_based/index.html
    -    2024-09-12T08:33:57.960Z
    +    2024-09-12T08:40:23.421Z
       
       
         https://saeyslab.github.io/polygloty/book/workflow_frameworks/index.html
    -    2024-09-12T08:33:57.968Z
    +    2024-09-12T08:40:23.429Z