Skip to content

Commit

Permalink
Add reticulate slides
Browse files Browse the repository at this point in the history
  • Loading branch information
LouiseDck committed Sep 11, 2024
1 parent dde4a1d commit b41480e
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 9 deletions.
Binary file added slides/data/paul15/paul15.h5
Binary file not shown.
Binary file added slides/images/reticulate_table.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
123 changes: 114 additions & 9 deletions slides/slides.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ format:
history: true
link-external-newwindow: true
jupyter: python3
exectute:
execute:
echo: true
---

Expand Down Expand Up @@ -123,7 +123,7 @@ tools = importr('tools', robject_translations = d)
# Pitfalls when using Python and R
**Integers**

```r
```{r}
library(reticulate)
bi <- reticulate::import_builtins()
Expand All @@ -144,7 +144,8 @@ bi$list(bi$range(0L, 5L))
- `rpy2.rinterface`, the low-level interface
- `rpy2.robjects`, the high-level interface

```python
```{python}
#| echo: true
import rpy2
import rpy2.robjects as robjects
Expand All @@ -156,29 +157,34 @@ rsum(vector)

# Rpy2: basics

```python
```{python}
#| echo: true
str_vector = robjects.StrVector(['abc', 'def', 'ghi'])
flt_vector = robjects.FloatVector([0.3, 0.8, 0.7])
int_vector = robjects.IntVector([1, 2, 3])
mtx = robjects.r.matrix(robjects.IntVector(range(10)), nrow=5)
print(mtx)
```

# Rpy2: numpy

```python
```{python}
#| echo: true
import numpy as np
from rpy2.robjects import numpy2ri
from rpy2.robjects import default_converter
rd_m = np.random.random((10, 7))
rd_m = np.random.random((5, 4))
with (default_converter + numpy2ri.converter).context():
mtx2 = robjects.r.matrix(rd_m, nrow = 10)
mtx = robjects.r.matrix(rd_m, nrow = 5)
print(mtx)
```

# Rpy2: pandas
```python
```{python}
#| echo: true
import pandas as pd
from rpy2.robjects import pandas2ri
Expand All @@ -188,11 +194,13 @@ pd_df = pd.DataFrame({'int_values': [1,2,3],
with (default_converter + pandas2ri.converter).context():
pd_df_r = robjects.DataFrame(pd_df)
print(pd_df_r)
```

# Rpy2: sparse matrices

```python
```{python}
#| echo: true
import scipy as sp
from anndata2ri import scipy2ri
Expand All @@ -201,6 +209,7 @@ sparse_matrix = sp.sparse.csc_matrix(rd_m)
with (default_converter + scipy2ri.converter).context():
sp_r = scipy2ri.py2rpy(sparse_matrix)
print(sp_r)
```

# Rpy2: anndata
Expand Down Expand Up @@ -233,6 +242,102 @@ with anndata2ri.converter.context():

# Reticulate

![](images/reticulate_table.png)

# Reticulate

```r
library(reticulate)

bi <- reticulate::import_builtins()
rd <- reticulate::import("random")

example <- c(1,2,3)
bi$max(example)
# [1] 3
rd$choice(example)
# [1] 2
cat(bi$list(bi$reversed(example)))
# [1] 3 2 1
```

# Reticulate numpy

```r
np <- reticulate::import("numpy")

a <- np$asarray(tuple(list(1,2), list(3, 4)))
b <- np$asarray(list(5,6))
b <- np$reshape(b, newshape = tuple(1L,2L))

np$concatenate(tuple(a, b), axis=0L)
# [,1] [,2]
# [1,] 1 2
# [2,] 3 4
# [3,] 5 6
```

# Reticulate conversion

```r
np <- reticulate::import("numpy", convert = FALSE)

a <- np$asarray(tuple(list(1,2), list(3, 4)))
b <- np$asarray(list(5,6))
b <- np$reshape(b, newshape = tuple(1L,2L))

np$concatenate(tuple(a, b), axis=0L)
# array([[1., 2.],
# [3., 4.],
# [5., 6.]])
```

You can explicitly convert data types:
```r
result <- np$concatenate(tuple(a, b), axis=0L)

py_to_r(result)
# [,1] [,2]
# [1,] 1 2
# [2,] 3 4
# [3,] 5 6

result_r <- py_to_r(result)
r_to_py(result_r)
# array([[1., 2.],
# [3., 4.],
# [5., 6.]])
```

# Reticulate scanpy

```r
library(anndata)
library(reticulate)
sc <- import("scanpy")

adata_path <- "../usecase/data/sc_counts_subset.h5ad"
adata <- anndata::read_h5ad(adata_path)
```

We can preprocess & analyse the data:
```r
sc$pp$filter_cells(adata, min_genes = 200)
sc$pp$filter_genes(adata, min_cells = 3)
sc$pp$pca(adata)
sc$pp$neighbors(adata)
sc$tl$umap(adata)

adata
# AnnData object with n_obs × n_vars = 32727 × 20542
# obs: 'dose_uM', 'timepoint_hr', 'well', 'row', 'col', 'plate_name', 'cell_id', 'cell_type', 'split', 'donor_id', 'sm_name', 'control', 'SMILES', 'sm_lincs_id', 'library_id', 'leiden_res1', 'group', 'cell_type_orig', 'plate_well_celltype_reannotated', 'cell_count_by_well_celltype', 'cell_count_by_plate_well', 'n_genes'
# var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'n_cells'
# uns: 'cell_type_colors', 'celltypist_celltype_colors', 'donor_id_colors', 'hvg', 'leiden_res1_colors', 'log1p', 'neighbors', 'over_clustering', 'rank_genes_groups', 'pca', 'umap'
# obsm: 'HTO_clr', 'X_pca', 'X_umap', 'protein_counts'
# varm: 'PCs'
# obsp: 'connectivities', 'distances'
```

# Disk-based interoperability

## General single cell file formats of interest for Python and R
Expand Down

0 comments on commit b41480e

Please sign in to comment.