Skip to content

Commit

Permalink
scgpt added/workflow updated
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Nov 18, 2024
1 parent 662b882 commit 403533f
Show file tree
Hide file tree
Showing 13 changed files with 117 additions and 266 deletions.
84 changes: 82 additions & 2 deletions runs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,14 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Submitted batch job 7838763\n"
"Submitted batch job 7838786\n"
]
}
],
Expand All @@ -194,6 +194,86 @@
" calculate_scores()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"--2024-11-18 22:11:00-- https://drive.google.com/uc?export=download&id=1CPVtpWUJ2nkI9jGignlHLcefBe6Gk-F9\n",
"Resolving drive.google.com (drive.google.com)... 173.194.79.113, 173.194.79.139, 173.194.79.138, ...\n",
"Connecting to drive.google.com (drive.google.com)|173.194.79.113|:443... connected.\n",
"HTTP request sent, awaiting response... 303 See Other\n",
"Location: https://drive.usercontent.google.com/download?id=1CPVtpWUJ2nkI9jGignlHLcefBe6Gk-F9&export=download [following]\n",
"--2024-11-18 22:11:00-- https://drive.usercontent.google.com/download?id=1CPVtpWUJ2nkI9jGignlHLcefBe6Gk-F9&export=download\n",
"Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 173.194.69.132, 2a00:1450:4013:c04::84\n",
"Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|173.194.69.132|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 2425 (2.4K) [text/html]\n",
"Saving to: ‘output/best_model.pt’\n",
"\n",
" 0K .. 100% 44.8M=0s\n",
"\n",
"2024-11-18 22:11:00 (44.8 MB/s) - ‘output/best_model.pt’ saved [2425/2425]\n",
"\n",
"--2024-11-18 22:11:00-- https://drive.google.com/file/d/1Qzb6Y9UB342a2QxmY-BCubSvcmYZ5jw3/view?usp=drive_link\n",
"Resolving drive.google.com (drive.google.com)... 173.194.79.139, 173.194.79.138, 173.194.79.102, ...\n",
"Connecting to drive.google.com (drive.google.com)|173.194.79.139|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: unspecified [text/html]\n",
"Saving to: ‘output/vocab.json’\n",
"\n",
" 0K .......... .......... .......... .......... .......... 2.51M\n",
" 50K .......... .......... .......... .......... . 5.45M=0.03s\n",
"\n",
"2024-11-18 22:11:01 (3.32 MB/s) - ‘output/vocab.json’ saved [93749]\n",
"\n",
"--2024-11-18 22:11:01-- https://drive.google.com/file/d/1VwPGHuSorVAXyTreMFI1yzMougtUDeUt/view?usp=drive_link\n",
"Resolving drive.google.com (drive.google.com)... 173.194.79.138, 173.194.79.102, 173.194.79.100, ...\n",
"Connecting to drive.google.com (drive.google.com)|173.194.79.138|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: unspecified [text/html]\n",
"Saving to: ‘output/args.json’\n",
"\n",
" 0K .......... .......... .......... .......... .......... 2.49M\n",
" 50K .......... .......... .......... .......... . 5.16M=0.03s\n",
"\n",
"2024-11-18 22:11:01 (3.25 MB/s) - ‘output/args.json’ saved [93798]\n",
"\n"
]
},
{
"data": {
"text/plain": [
"CompletedProcess(args=\"wget --no-check-certificate 'https://drive.google.com/file/d/1VwPGHuSorVAXyTreMFI1yzMougtUDeUt/view?usp=drive_link' -O output/args.json\", returncode=0)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"par = {'temp_dir': 'output'}\n",
"\n",
"par['model_file'] = f\"{par['temp_dir']}/best_model.pt\"\n",
"par['model_config_file'] = f\"{par['temp_dir']}/args.json\"\n",
"par['vocab_file'] = f\"{par['temp_dir']}/vocab.json\"\n",
"\n",
"\n",
"command = f\"wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1CPVtpWUJ2nkI9jGignlHLcefBe6Gk-F9' -O {par['model_file']}\"\n",
"subprocess.run(command, shell=True, check=True)\n",
"\n",
"command = f\"wget --no-check-certificate 'https://drive.google.com/file/d/1Qzb6Y9UB342a2QxmY-BCubSvcmYZ5jw3/view?usp=drive_link' -O {par['vocab_file']}\"\n",
"subprocess.run(command, shell=True, check=True)\n",
"\n",
"command = f\"wget --no-check-certificate 'https://drive.google.com/file/d/1VwPGHuSorVAXyTreMFI1yzMougtUDeUt/view?usp=drive_link' -O {par['model_config_file']}\"\n",
"subprocess.run(command, shell=True, check=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,6 @@ functionality:
GRN inference using scGPT.
documentation_url: https://github.com/bowang-lab/scGPT/blob/main/tutorials/Tutorial_Attention_GRN.ipynb
arguments:
- name: --model_file
type: file
direction: input
example: resources_test/supplementary/finetuned_scGPT_adamson/best_model.pt
default: resources_test/supplementary/finetuned_scGPT_adamson/best_model.pt
- name: --model_config_file
type: file
direction: input
example: resources_test/supplementary/finetuned_scGPT_adamson/args.json
default: resources_test/supplementary/finetuned_scGPT_adamson/args.json
- name: --vocab_file
type: file
direction: input
example: resources_test/supplementary/finetuned_scGPT_adamson/vocab.json
default: resources_test/supplementary/finetuned_scGPT_adamson/vocab.json
- name: --n_bins
type: integer
direction: input
Expand Down
17 changes: 17 additions & 0 deletions src/methods/single_omics/scgpt/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path
import sys
import warnings
import subprocess

import torch
from anndata import AnnData
Expand Down Expand Up @@ -57,6 +58,22 @@
}
## VIASH END

# Download datasets
par['model_file'] = f"{par['temp_dir']}/best_model.pt"
par['model_config_file'] = f"{par['temp_dir']}/args.json"
par['vocab_file'] = f"{par['temp_dir']}/vocab.json"


command = f"wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1CPVtpWUJ2nkI9jGignlHLcefBe6Gk-F9' -O {par['model_file']}"
subprocess.run(command, shell=True, check=True)

command = f"wget --no-check-certificate 'https://drive.google.com/file/d/1Qzb6Y9UB342a2QxmY-BCubSvcmYZ5jw3/view?usp=drive_link' -O {par['vocab_file']}"
subprocess.run(command, shell=True, check=True)

command = f"wget --no-check-certificate 'https://drive.google.com/file/d/1VwPGHuSorVAXyTreMFI1yzMougtUDeUt/view?usp=drive_link' -O {par['model_config_file']}"
subprocess.run(command, shell=True, check=True)


# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:50"
initial_memory = torch.cuda.memory_allocated()
def monitor_memory():
Expand Down
4 changes: 2 additions & 2 deletions src/metrics/script_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def define_par(dataset):
raise ValueError('define first')

par = {
'reg_type': 'ridge',
'reg_type': 'GB',
'models_dir': f"resources/grn_models/{dataset}",
'scores_dir': f"output/temp/{dataset}",

Expand Down Expand Up @@ -77,7 +77,7 @@ def define_par(dataset):
global_models = False

# - run metrics
for dataset in ['norman', 'adamson']: #'replogle2', 'nakatake', norman
for dataset in ['op','replogle2', 'nakatake', 'norman', 'adamson']: #'replogle2', 'nakatake', norman
print('------ ', dataset, '------')
par = define_par(dataset)
os.makedirs(par['scores_dir'], exist_ok=True)
Expand Down
8 changes: 4 additions & 4 deletions src/workflows/process_perturbation/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ functionality:
type: file
required: true
direction: input
default: resources/datasets_raw/perturbation_counts.h5ad
example: resources_test/datasets_raw/op_perturbation_counts.h5ad
description: single cell perturbation data

- name: --perturbation_data_bc
- name: --perturbation_data_n
__merge__: ../../api/file_evaluation_h5ad.yaml
required: false
direction: output
Expand All @@ -28,8 +28,8 @@ functionality:
dependencies:
- name: perturbation/sc_counts
- name: perturbation/normalization
- name: perturbation/batch_correction_scgen
- name: perturbation/batch_correction_seurat
# - name: perturbation/batch_correction_scgen
# - name: perturbation/batch_correction_seurat

platforms:
- type: nextflow
Expand Down
11 changes: 1 addition & 10 deletions src/workflows/process_perturbation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,9 @@ workflow run_wf {
fromState: [pseudobulked_data_f: "pseudobulked_data_f"],
toState: [perturbation_data_n: "perturbation_data_n"]
)

| batch_correction_scgen.run(
fromState: [perturbation_data_n: "perturbation_data_n"],
toState: [perturbation_data_bc: "perturbation_data_bc"]
)

| batch_correction_seurat.run(
fromState: [perturbation_data_n: "perturbation_data_bc"],
toState: [perturbation_data_bc: "perturbation_data_bc"]
)

| setState(["perturbation_data_bc"])
| setState(["perturbation_data_n"])

emit:
output_ch
Expand Down
21 changes: 7 additions & 14 deletions src/workflows/run_benchmark/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ functionality:
argument_groups:
- name: Inputs
arguments:
- name: --multiomics_rna
- name: --rna
type: file
direction: input
- name: --multiomics_atac
- name: --atac
type: file
direction: input
- name: --perturbation_data
- name: --evaluation_data
type: file
direction: input
- name: --prediction
Expand All @@ -25,7 +25,7 @@ functionality:
- name: --subsample
type: integer
direction: input
default: 2
default: -1
- name: --reg_type
type: string
direction: input
Expand All @@ -49,15 +49,7 @@ functionality:
required: false
direction: input
default: pearson
- name: --cell_type_specific
type: boolean
required: false
direction: input
default: true
- name: --normalize
type: boolean
required: false
direction: input

- name: Outputs
arguments:
- name: "--scores"
Expand Down Expand Up @@ -96,10 +88,11 @@ functionality:
- name: grn_methods/portia
- name: grn_methods/grnboost2
- name: grn_methods/scenic

# - name: grn_methods/genie3

- name: grn_methods/ppcor #needs docker image
# - name: grn_methods/scgpt
- name: grn_methods/scgpt

# ---- multiomics
- name: grn_methods/celloracle
Expand Down
11 changes: 4 additions & 7 deletions src/workflows/run_benchmark/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ workflow run_wf {
grnboost2,
ppcor,
scenic,
scglue,

pearson_corr,
negative_control,
Expand Down Expand Up @@ -77,12 +78,9 @@ workflow run_wf {
},
// use 'fromState' to fetch the arguments the component requires from the overall state
fromState: [
multiomics_rna: "multiomics_rna",
multiomics_atac: "multiomics_atac",
rna: "rna",
atac: "atac",
tf_all: "tf_all",
perturbation_data:"perturbation_data",
cell_type_specific:"cell_type_specific",
normalize:"normalize",
num_workers:"num_workers"

],
Expand All @@ -107,7 +105,7 @@ workflow run_wf {
},
// use 'fromState' to fetch the arguments the component requires from the overall state
fromState: [
perturbation_data: "perturbation_data",
evaluation_data: "evaluation_data",
prediction: "prediction",
method_id: "method_id",
subsample: "subsample",
Expand All @@ -116,7 +114,6 @@ workflow run_wf {
consensus: "consensus",
tf_all: "tf_all",
layer:"layer",
cell_type_specific:"cell_type_specific"
],
// use 'toState' to publish that component's outputs to the overall state
toState: { id, output, state, comp ->
Expand Down
Loading

0 comments on commit 403533f

Please sign in to comment.