diff --git a/runs.ipynb b/runs.ipynb
index 1978ab80..75855173 100644
--- a/runs.ipynb
+++ b/runs.ipynb
@@ -43,7 +43,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -74,7 +74,18 @@
" 'methods': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'scglue', 'celloracle', 'scenicplus'],\n",
" 'models_dir': 'resources/grn_models/',\n",
" 'scores_dir': 'resources/scores'\n",
- "}"
+ "}\n",
+ "\n",
+ "datasets = ['op', 'replogle2', 'nakatake', 'norman', 'adamson']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ad.read('resources/')"
]
},
{
@@ -1174,6 +1185,13 @@
"![image.png](attachment:image.png)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### nakatake"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 7,
@@ -1405,6 +1423,13 @@
"![image-3.png](attachment:image-3.png)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### norman "
+ ]
+ },
{
"cell_type": "code",
"execution_count": 8,
@@ -1662,6 +1687,13 @@
"!ls output/temp/adamson/"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### adamson"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 12,
@@ -1872,6 +1904,623 @@
"![image.png](attachment:image.png)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Global models"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 119,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "50000-skeleton_False-binarize_True-GB.csv\n",
+ "50000-skeleton_False-binarize_True-ridge.csv\n",
+ "lognorm-50000-skeleton_False-binarize_True-ridge-global-False.csv\n",
+ "nets\n",
+ "X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\n",
+ "X_norm-50000-skeleton_False-binarize_True-ridge-global-True.csv\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls output/temp/op/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 121,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " S1 | \n",
+ " S2 | \n",
+ " static-theta-0.0 | \n",
+ " static-theta-0.5 | \n",
+ " static-theta-1.0 | \n",
+ " rank | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " collectri | \n",
+ " 0.058315 | \n",
+ " 0.126899 | \n",
+ " 0.229779 | \n",
+ " 0.272409 | \n",
+ " 0.290769 | \n",
+ " 13 | \n",
+ "
\n",
+ " \n",
+ " negative_control | \n",
+ " -0.000923 | \n",
+ " -0.000961 | \n",
+ " 0.193640 | \n",
+ " 0.260667 | \n",
+ " 0.290877 | \n",
+ " 19 | \n",
+ "
\n",
+ " \n",
+ " positive_control | \n",
+ " 0.721750 | \n",
+ " 1.188803 | \n",
+ " 0.654297 | \n",
+ " 0.411675 | \n",
+ " 0.310124 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " pearson_corr | \n",
+ " 0.570333 | \n",
+ " 1.048355 | \n",
+ " 0.580583 | \n",
+ " 0.379891 | \n",
+ " 0.303594 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " portia | \n",
+ " 0.465527 | \n",
+ " 0.674097 | \n",
+ " 0.513885 | \n",
+ " 0.317206 | \n",
+ " 0.298095 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " ppcor | \n",
+ " 0.196689 | \n",
+ " 0.238486 | \n",
+ " 0.362190 | \n",
+ " 0.289743 | \n",
+ " 0.291190 | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " grnboost2 | \n",
+ " 0.735540 | \n",
+ " 0.923076 | \n",
+ " 0.581265 | \n",
+ " 0.471491 | \n",
+ " 0.329940 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " scenic | \n",
+ " 0.318873 | \n",
+ " 0.467008 | \n",
+ " 0.519103 | \n",
+ " 0.406525 | \n",
+ " 0.317489 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " granie | \n",
+ " 0.147522 | \n",
+ " 0.193977 | \n",
+ " 0.165400 | \n",
+ " 0.211497 | \n",
+ " 0.283239 | \n",
+ " 21 | \n",
+ "
\n",
+ " \n",
+ " scglue | \n",
+ " 0.155642 | \n",
+ " 0.678908 | \n",
+ " 0.499250 | \n",
+ " 0.293863 | \n",
+ " 0.293939 | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " celloracle | \n",
+ " 0.466119 | \n",
+ " 0.762438 | \n",
+ " 0.577843 | \n",
+ " 0.412485 | \n",
+ " 0.309474 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " figr | \n",
+ " 0.225030 | \n",
+ " 0.534416 | \n",
+ " 0.288675 | \n",
+ " 0.301267 | \n",
+ " 0.295626 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " scenicplus | \n",
+ " 0.588578 | \n",
+ " 0.753705 | \n",
+ " 0.612103 | \n",
+ " 0.467226 | \n",
+ " 0.324725 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " ANANSE_tissue_networks_lung.parquet | \n",
+ " 0.011492 | \n",
+ " 0.042744 | \n",
+ " 0.190339 | \n",
+ " 0.264441 | \n",
+ " 0.290627 | \n",
+ " 17 | \n",
+ "
\n",
+ " \n",
+ " ANANSE_tissue_networks_stomach.parquet | \n",
+ " 0.000624 | \n",
+ " 0.003118 | \n",
+ " 0.177599 | \n",
+ " 0.255215 | \n",
+ " 0.288802 | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " ANANSE_tissue_networks_heart.parquet | \n",
+ " 0.002072 | \n",
+ " 0.010872 | \n",
+ " 0.185617 | \n",
+ " 0.260697 | \n",
+ " 0.289334 | \n",
+ " 20 | \n",
+ "
\n",
+ " \n",
+ " ANANSE_tissue_networks_bone_marrow.parquet | \n",
+ " 0.008027 | \n",
+ " 0.034896 | \n",
+ " 0.163763 | \n",
+ " 0.266824 | \n",
+ " 0.291476 | \n",
+ " 18 | \n",
+ "
\n",
+ " \n",
+ " gtex_rna_networks_Whole_Blood.parquet | \n",
+ " 0.099496 | \n",
+ " 0.379754 | \n",
+ " 0.223288 | \n",
+ " 0.269420 | \n",
+ " 0.290558 | \n",
+ " 11 | \n",
+ "
\n",
+ " \n",
+ " gtex_rna_networks_Brain_Amygdala.parquet | \n",
+ " 0.029852 | \n",
+ " 0.143870 | \n",
+ " 0.217035 | \n",
+ " 0.254217 | \n",
+ " 0.287616 | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " gtex_rna_networks_Breast_Mammary_Tissue.parquet | \n",
+ " 0.056548 | \n",
+ " 0.237685 | \n",
+ " 0.206502 | \n",
+ " 0.259042 | \n",
+ " 0.288375 | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " gtex_rna_networks_Lung.parquet | \n",
+ " 0.065444 | \n",
+ " 0.286007 | \n",
+ " 0.216973 | \n",
+ " 0.265278 | \n",
+ " 0.290077 | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " gtex_rna_networks_Stomach.parquet | \n",
+ " 0.057147 | \n",
+ " 0.236925 | \n",
+ " 0.184944 | \n",
+ " 0.260095 | \n",
+ " 0.287619 | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 121,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_scores_gb = pd.read_csv(f\"output/temp/op/X_norm-50000-skeleton_False-binarize_True-ridge-global-True.csv\", index_col=0)\n",
+ "df_scores = pd.read_csv(f\"output/temp/op/X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\", index_col=0)\n",
+ "\n",
+ "df_scores = pd.concat([df_scores, df_scores_gb])\n",
+ "# df_scores[df_scores<0] = 0\n",
+ "df_all_n = (df_scores-df_scores.min(axis=0))/(df_scores.max(axis=0)-df_scores.min(axis=0))\n",
+ "df_scores['rank'] = df_all_n.mean(axis=1).rank(ascending=False).astype(int)\n",
+ "df_scores.style.background_gradient()"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -1925,41 +2574,75 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 112,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "50000-skeleton_False-binarize_True-GB.csv\n",
- "50000-skeleton_False-binarize_True-ridge.csv\n",
- "lognorm-50000-skeleton_False-binarize_True-ridge-global-False.csv\n",
- "nets\n",
- "X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "!ls output/temp/op/X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv"
+ "# - collect all the scores\n",
+ "for i, dataset in enumerate(datasets):\n",
+ " df_scores = pd.read_csv(f\"output/temp/{dataset}/X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\", index_col=0)\n",
+ " # - normalize scores \n",
+ " df_scores = df_scores.fillna(0)\n",
+ " df_scores[df_scores < 0] = 0\n",
+ " df_scores = (df_scores-df_scores.min(axis=0))/(df_scores.max(axis=0)-df_scores.min(axis=0))\n",
+ " df_scores = df_scores.reset_index().melt(id_vars='index', var_name='metric', value_name='r2score').rename(columns={'index':'model'})\n",
+ " df_scores['dataset'] = dataset\n",
+ " if i == 0:\n",
+ " df_all = df_scores\n",
+ " else:\n",
+ " df_all = pd.concat([df_all, df_scores], axis=0)\n"
]
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
- "for dataset in datasets:\n",
- " df_scores = pd.read_csv(f\"output/temp/{dataset}/X_norm-50000-skeleton_False-binarize_True-ridge-global-False.csv\", index_col=0)\n",
- " # - normalize per method\n",
+ "df_all = df_all[~(df_all['model'] == 'collectri')]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 116,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/vol/tmp/users/jnourisa/ipykernel_1636782/208795827.py:9: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
+ " df_metrics = df_all.groupby(['model','metric']).apply(lambda df: mean_for_metrics(df)).reset_index().pivot(index='model', columns='metric', values='r2score')\n",
+ "/vol/tmp/users/jnourisa/ipykernel_1636782/208795827.py:19: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
+ " df_datasets = df_all.groupby(['model','dataset']).apply(lambda df: mean_for_datasets(df)).reset_index().pivot(index='model', columns='dataset', values='r2score')\n"
+ ]
+ }
+ ],
+ "source": [
+ "# - mean scores for metrics\n",
+ "def mean_for_metrics(df):\n",
+ " metric = df['metric'].values.flatten()[0]\n",
+ " if metric in ['S1','S2']:\n",
+ " df = df[df['dataset']=='op']\n",
+ " else:\n",
+ " pass \n",
+ " return df[['r2score']].mean()\n",
+ "df_metrics = df_all.groupby(['model','metric']).apply(lambda df: mean_for_metrics(df)).reset_index().pivot(index='model', columns='metric', values='r2score')\n",
"\n",
- " # - normalize per dataset\n"
+ "# - mean scores for datasets\n",
+ "def mean_for_datasets(df):\n",
+ " dataset = df['dataset'].values.flatten()[0]\n",
+ " if dataset != 'op':\n",
+ " df = df[~df['metric'].isin(['S1','S2'])]\n",
+ " else:\n",
+ " pass \n",
+ " return df[['r2score']].mean()\n",
+ "df_datasets = df_all.groupby(['model','dataset']).apply(lambda df: mean_for_datasets(df)).reset_index().pivot(index='model', columns='dataset', values='r2score')"
]
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 117,
"metadata": {},
"outputs": [
{
@@ -1989,167 +2672,246 @@
" static-theta-0.0 | \n",
" static-theta-0.5 | \n",
" static-theta-1.0 | \n",
+ " adamson | \n",
+ " nakatake | \n",
+ " norman | \n",
+ " op | \n",
+ " replogle2 | \n",
" overall_score | \n",
" Duration (hour) | \n",
" Peak memory (GB) | \n",
+ " User-friendly | \n",
+ " Complexity | \n",
" \n",
" \n",
" \n",
" \n",
- " 6 | \n",
+ " 3 | \n",
" GRNBoost2 | \n",
" 1.000000 | \n",
" 0.776475 | \n",
- " 0.850620 | \n",
+ " 0.826432 | \n",
+ " 0.980936 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
+ " 0.787585 | \n",
+ " 0.941155 | \n",
" 0.925419 | \n",
+ " 1.000000 | \n",
+ " 0.923800 | \n",
" 7.510556 | \n",
" 7.378796 | \n",
+ " 8 | \n",
+ " 2 | \n",
"
\n",
" \n",
- " 2 | \n",
+ " 7 | \n",
" Positive Control | \n",
" 0.981252 | \n",
" 1.000000 | \n",
- " 1.000000 | \n",
- " 0.769936 | \n",
- " 0.575695 | \n",
+ " 0.958346 | \n",
+ " 0.860857 | \n",
+ " 0.633513 | \n",
+ " 0.832683 | \n",
+ " 0.857764 | \n",
+ " 0.886458 | \n",
" 0.865377 | \n",
+ " 0.729077 | \n",
+ " 0.860533 | \n",
" 0.015000 | \n",
" 11.601669 | \n",
+ " 10 | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 12 | \n",
+ " 10 | \n",
" Scenic+ | \n",
" 0.800199 | \n",
" 0.634004 | \n",
" 0.913697 | \n",
" 0.983595 | \n",
" 0.888349 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.843969 | \n",
+ " 0.000000 | \n",
" 0.843969 | \n",
" 11.740556 | \n",
" 131.342854 | \n",
+ " 1 | \n",
+ " 9 | \n",
"
\n",
" \n",
- " 3 | \n",
+ " 5 | \n",
" Pearson corr. | \n",
" 0.775394 | \n",
" 0.881858 | \n",
- " 0.849224 | \n",
- " 0.647685 | \n",
- " 0.435855 | \n",
+ " 0.880661 | \n",
+ " 0.792117 | \n",
+ " 0.556792 | \n",
+ " 0.814796 | \n",
+ " 0.729421 | \n",
+ " 0.834992 | \n",
" 0.718003 | \n",
+ " 0.692485 | \n",
+ " 0.767652 | \n",
" 0.041389 | \n",
" 23.801899 | \n",
+ " 10 | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 10 | \n",
+ " 0 | \n",
" CellOracle | \n",
" 0.633710 | \n",
" 0.641349 | \n",
" 0.843619 | \n",
" 0.773048 | \n",
" 0.561766 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.690699 | \n",
+ " 0.000000 | \n",
" 0.690699 | \n",
" 3.765000 | \n",
" 41.601166 | \n",
+ " 6 | \n",
+ " 4 | \n",
"
\n",
" \n",
- " 7 | \n",
- " Scenic | \n",
- " 0.433522 | \n",
- " 0.392839 | \n",
- " 0.723472 | \n",
- " 0.750125 | \n",
- " 0.733396 | \n",
- " 0.606671 | \n",
- " 24.008611 | \n",
- " 35.954300 | \n",
- "
\n",
- " \n",
- " 4 | \n",
+ " 6 | \n",
" Portia | \n",
" 0.632905 | \n",
" 0.567038 | \n",
- " 0.712799 | \n",
- " 0.406583 | \n",
- " 0.318112 | \n",
+ " 0.523610 | \n",
+ " 0.337081 | \n",
+ " 0.235700 | \n",
+ " 0.000000 | \n",
+ " 0.511919 | \n",
+ " 0.266277 | \n",
" 0.527488 | \n",
+ " 0.569957 | \n",
+ " 0.417198 | \n",
" 2.491111 | \n",
" 55.685230 | \n",
+ " 9 | \n",
+ " 1 | \n",
"
\n",
" \n",
- " 9 | \n",
+ " 11 | \n",
" scGLUE | \n",
" 0.211603 | \n",
" 0.571086 | \n",
" 0.682864 | \n",
" 0.316799 | \n",
" 0.229125 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.402295 | \n",
+ " 0.000000 | \n",
" 0.402295 | \n",
" 11.097500 | \n",
" 61.677879 | \n",
+ " 6 | \n",
+ " 4 | \n",
"
\n",
" \n",
- " 11 | \n",
+ " 9 | \n",
+ " Scenic | \n",
+ " 0.433522 | \n",
+ " 0.392839 | \n",
+ " 0.437852 | \n",
+ " 0.350487 | \n",
+ " 0.345024 | \n",
+ " 0.000000 | \n",
+ " 0.289197 | \n",
+ " 0.240945 | \n",
+ " 0.606671 | \n",
+ " 0.245343 | \n",
+ " 0.371320 | \n",
+ " 24.008611 | \n",
+ " 35.954300 | \n",
+ " 7 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
" FigR | \n",
" 0.305938 | \n",
" 0.449542 | \n",
" 0.252149 | \n",
" 0.345278 | \n",
" 0.265239 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.323629 | \n",
+ " 0.000000 | \n",
" 0.323629 | \n",
" 6.731667 | \n",
" 225.208725 | \n",
+ " 6 | \n",
+ " 4 | \n",
"
\n",
" \n",
- " 5 | \n",
+ " 8 | \n",
" PPCOR | \n",
" 0.267408 | \n",
" 0.200610 | \n",
- " 0.402517 | \n",
- " 0.300952 | \n",
- " 0.170267 | \n",
+ " 0.360093 | \n",
+ " 0.285175 | \n",
+ " 0.165767 | \n",
+ " 0.566048 | \n",
+ " 0.041881 | \n",
+ " 0.418685 | \n",
" 0.268351 | \n",
+ " 0.033865 | \n",
+ " 0.260788 | \n",
" 13.425833 | \n",
" 64.136433 | \n",
+ " 7 | \n",
+ " 3 | \n",
"
\n",
" \n",
- " 0 | \n",
- " CollectRI | \n",
- " 0.079282 | \n",
- " 0.106745 | \n",
- " 0.131682 | \n",
- " 0.234284 | \n",
- " 0.161236 | \n",
- " 0.142646 | \n",
+ " 4 | \n",
+ " Negative Control | \n",
" 0.000000 | \n",
" 0.000000 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " Negative Control | \n",
+ " 0.073181 | \n",
+ " 0.139694 | \n",
+ " 0.080763 | \n",
+ " 0.352581 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
- " 0.057763 | \n",
- " 0.189121 | \n",
- " 0.163558 | \n",
" 0.082089 | \n",
+ " 0.000000 | \n",
+ " 0.072831 | \n",
" 0.003889 | \n",
" 2.216045 | \n",
+ " 10 | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 8 | \n",
+ " 2 | \n",
" GRaNIE | \n",
" 0.200563 | \n",
" 0.163170 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.072747 | \n",
+ " 0.000000 | \n",
" 0.072747 | \n",
" 1.012038 | \n",
" 41.000000 | \n",
+ " 6 | \n",
+ " 4 | \n",
"
\n",
" \n",
"\n",
@@ -2157,59 +2919,99 @@
],
"text/plain": [
" method_name S1 S2 static-theta-0.0 static-theta-0.5 \\\n",
- "6 GRNBoost2 1.000000 0.776475 0.850620 1.000000 \n",
- "2 Positive Control 0.981252 1.000000 1.000000 0.769936 \n",
- "12 Scenic+ 0.800199 0.634004 0.913697 0.983595 \n",
- "3 Pearson corr. 0.775394 0.881858 0.849224 0.647685 \n",
- "10 CellOracle 0.633710 0.641349 0.843619 0.773048 \n",
- "7 Scenic 0.433522 0.392839 0.723472 0.750125 \n",
- "4 Portia 0.632905 0.567038 0.712799 0.406583 \n",
- "9 scGLUE 0.211603 0.571086 0.682864 0.316799 \n",
- "11 FigR 0.305938 0.449542 0.252149 0.345278 \n",
- "5 PPCOR 0.267408 0.200610 0.402517 0.300952 \n",
- "0 CollectRI 0.079282 0.106745 0.131682 0.234284 \n",
- "1 Negative Control 0.000000 0.000000 0.057763 0.189121 \n",
- "8 GRaNIE 0.200563 0.163170 0.000000 0.000000 \n",
+ "3 GRNBoost2 1.000000 0.776475 0.826432 0.980936 \n",
+ "7 Positive Control 0.981252 1.000000 0.958346 0.860857 \n",
+ "10 Scenic+ 0.800199 0.634004 0.913697 0.983595 \n",
+ "5 Pearson corr. 0.775394 0.881858 0.880661 0.792117 \n",
+ "0 CellOracle 0.633710 0.641349 0.843619 0.773048 \n",
+ "6 Portia 0.632905 0.567038 0.523610 0.337081 \n",
+ "11 scGLUE 0.211603 0.571086 0.682864 0.316799 \n",
+ "9 Scenic 0.433522 0.392839 0.437852 0.350487 \n",
+ "1 FigR 0.305938 0.449542 0.252149 0.345278 \n",
+ "8 PPCOR 0.267408 0.200610 0.360093 0.285175 \n",
+ "4 Negative Control 0.000000 0.000000 0.073181 0.139694 \n",
+ "2 GRaNIE 0.200563 0.163170 0.000000 0.000000 \n",
+ "\n",
+ " static-theta-1.0 adamson nakatake norman op replogle2 \\\n",
+ "3 1.000000 1.000000 0.787585 0.941155 0.925419 1.000000 \n",
+ "7 0.633513 0.832683 0.857764 0.886458 0.865377 0.729077 \n",
+ "10 0.888349 0.000000 0.000000 0.000000 0.843969 0.000000 \n",
+ "5 0.556792 0.814796 0.729421 0.834992 0.718003 0.692485 \n",
+ "0 0.561766 0.000000 0.000000 0.000000 0.690699 0.000000 \n",
+ "6 0.235700 0.000000 0.511919 0.266277 0.527488 0.569957 \n",
+ "11 0.229125 0.000000 0.000000 0.000000 0.402295 0.000000 \n",
+ "9 0.345024 0.000000 0.289197 0.240945 0.606671 0.245343 \n",
+ "1 0.265239 0.000000 0.000000 0.000000 0.323629 0.000000 \n",
+ "8 0.165767 0.566048 0.041881 0.418685 0.268351 0.033865 \n",
+ "4 0.080763 0.352581 0.000000 0.000000 0.082089 0.000000 \n",
+ "2 0.000000 0.000000 0.000000 0.000000 0.072747 0.000000 \n",
+ "\n",
+ " overall_score Duration (hour) Peak memory (GB) User-friendly \\\n",
+ "3 0.923800 7.510556 7.378796 8 \n",
+ "7 0.860533 0.015000 11.601669 10 \n",
+ "10 0.843969 11.740556 131.342854 1 \n",
+ "5 0.767652 0.041389 23.801899 10 \n",
+ "0 0.690699 3.765000 41.601166 6 \n",
+ "6 0.417198 2.491111 55.685230 9 \n",
+ "11 0.402295 11.097500 61.677879 6 \n",
+ "9 0.371320 24.008611 35.954300 7 \n",
+ "1 0.323629 6.731667 225.208725 6 \n",
+ "8 0.260788 13.425833 64.136433 7 \n",
+ "4 0.072831 0.003889 2.216045 10 \n",
+ "2 0.072747 1.012038 41.000000 6 \n",
"\n",
- " static-theta-1.0 overall_score Duration (hour) Peak memory (GB) \n",
- "6 1.000000 0.925419 7.510556 7.378796 \n",
- "2 0.575695 0.865377 0.015000 11.601669 \n",
- "12 0.888349 0.843969 11.740556 131.342854 \n",
- "3 0.435855 0.718003 0.041389 23.801899 \n",
- "10 0.561766 0.690699 3.765000 41.601166 \n",
- "7 0.733396 0.606671 24.008611 35.954300 \n",
- "4 0.318112 0.527488 2.491111 55.685230 \n",
- "9 0.229125 0.402295 11.097500 61.677879 \n",
- "11 0.265239 0.323629 6.731667 225.208725 \n",
- "5 0.170267 0.268351 13.425833 64.136433 \n",
- "0 0.161236 0.142646 0.000000 0.000000 \n",
- "1 0.163558 0.082089 0.003889 2.216045 \n",
- "8 0.000000 0.072747 1.012038 41.000000 "
+ " Complexity \n",
+ "3 2 \n",
+ "7 0 \n",
+ "10 9 \n",
+ "5 0 \n",
+ "0 4 \n",
+ "6 1 \n",
+ "11 4 \n",
+ "9 3 \n",
+ "1 4 \n",
+ "8 3 \n",
+ "4 0 \n",
+ "2 4 "
]
},
- "execution_count": 21,
+ "execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "# create ranking \n",
- "df_scores = df_scores.fillna(0)\n",
- "df_scores[df_scores < 0] = 0\n",
- "df_scores = (df_scores-df_scores.min(axis=0))/(df_scores.max(axis=0)-df_scores.min(axis=0))\n",
+ "# - calculate over scores\n",
+ "df_scores = pd.concat([df_metrics, df_datasets], axis=1)\n",
+ "# df_scores = df_metrics\n",
"df_scores['overall_score'] = df_scores.mean(axis=1)\n",
- "df_scores\n",
- "# df_scores['rank'] = df_scores.mean(axis=1).rank(ascending=False).astype(int)\n",
"\n",
- "df_all = pd.concat([df_scores, df_res], axis=1)\n",
- "df_all = df_all.fillna(0)\n",
- "df_all.index.name = 'method_name' \n",
- "df_all = df_all.reset_index()\n",
+ "# - merge scores with resources \n",
+ "df_summary = pd.concat([df_scores, df_res], axis=1)\n",
+ "df_summary = df_summary.fillna(0)\n",
+ "df_summary.index.name = 'method_name' \n",
+ "df_summary = df_summary.reset_index()\n",
"\n",
- "df_all = df_all.sort_values(by='overall_score', ascending=False) \n",
+ "df_summary = df_summary.sort_values(by='overall_score', ascending=False) \n",
"\n",
- "df_all.method_name = df_all.method_name.map(surragate_names)\n",
- "df_all"
+ "df_summary.method_name = df_summary.method_name.map(surragate_names)\n",
+ "# - add user complexity \n",
+ "df_summary['User-friendly'] = df_summary['method_name'].map({\n",
+ " 'Scenic+': 1, \n",
+ " 'GRNBoost2': 8, \n",
+ " 'Positive Control': 10, \n",
+ " 'Pearson corr.': 10,\n",
+ " 'CellOracle': 6,\n",
+ " 'Portia': 9,\n",
+ " 'scGLUE': 6,\n",
+ " 'Scenic': 7,\n",
+ " 'FigR': 6,\n",
+ " 'PPCOR': 7,\n",
+ " 'Negative Control': 10,\n",
+ " 'GRaNIE': 6,\n",
+ " })\n",
+ "df_summary['Complexity'] = df_summary['User-friendly'].max() - df_summary['User-friendly']\n",
+ "df_summary"
]
},
{
@@ -2221,7 +3023,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 118,
"metadata": {},
"outputs": [
{
@@ -2229,9 +3031,9 @@
"output_type": "stream",
"text": [
"── \u001b[1mAttaching packages\u001b[22m ─────────────────────────────────────── tidyverse 1.3.1 ──\n",
- "\u001b[32m✔\u001b[39m \u001b[34mggplot2\u001b[39m 3.5.1 \u001b[32m✔\u001b[39m \u001b[34mpurrr \u001b[39m 0.3.4\n",
+ "\u001b[32m✔\u001b[39m \u001b[34mggplot2\u001b[39m 3.5.1 \u001b[32m✔\u001b[39m \u001b[34mpurrr \u001b[39m 1.0.2\n",
"\u001b[32m✔\u001b[39m \u001b[34mtibble \u001b[39m 3.2.1 \u001b[32m✔\u001b[39m \u001b[34mdplyr \u001b[39m 1.1.4\n",
- "\u001b[32m✔\u001b[39m \u001b[34mtidyr \u001b[39m 1.2.0 \u001b[32m✔\u001b[39m \u001b[34mstringr\u001b[39m 1.4.0\n",
+ "\u001b[32m✔\u001b[39m \u001b[34mtidyr \u001b[39m 1.3.1 \u001b[32m✔\u001b[39m \u001b[34mstringr\u001b[39m 1.5.1\n",
"\u001b[32m✔\u001b[39m \u001b[34mreadr \u001b[39m 2.1.2 \u001b[32m✔\u001b[39m \u001b[34mforcats\u001b[39m 0.5.1\n",
"── \u001b[1mConflicts\u001b[22m ────────────────────────────────────────── tidyverse_conflicts() ──\n",
"\u001b[31m✖\u001b[39m \u001b[34mdplyr\u001b[39m::\u001b[32mfilter()\u001b[39m masks \u001b[34mstats\u001b[39m::filter()\n",
@@ -2241,15 +3043,15 @@
"\u001b[36mℹ\u001b[39m Please use `whereami::thisfile()` instead. \n",
"\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[1m\u001b[22mNew names:\n",
"\u001b[36m•\u001b[39m `` -> `...1`\n",
- "\u001b[1mRows: \u001b[22m\u001b[34m13\u001b[39m \u001b[1mColumns: \u001b[22m\u001b[34m13\u001b[39m\n",
+ "\u001b[1mRows: \u001b[22m\u001b[34m12\u001b[39m \u001b[1mColumns: \u001b[22m\u001b[34m22\u001b[39m\n",
"\u001b[36m──\u001b[39m \u001b[1mColumn specification\u001b[22m \u001b[36m────────────────────────────────────────────────────────\u001b[39m\n",
"\u001b[1mDelimiter:\u001b[22m \"\\t\"\n",
"\u001b[31mchr\u001b[39m (1): method_name\n",
- "\u001b[32mdbl\u001b[39m (12): ...1, S1, S2, static-theta-0.0, static-theta-0.5, overall_score, D...\n",
+ "\u001b[32mdbl\u001b[39m (21): ...1, S1, S2, static-theta-0.0, static-theta-0.5, static-theta-1.0...\n",
"\n",
"\u001b[36mℹ\u001b[39m Use `spec()` to retrieve the full column specification for this data.\n",
"\u001b[36mℹ\u001b[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.\n",
- "\u001b[?25h\u001b[?25h\u001b[90m# A tibble: 10 × 7\u001b[39m\n",
+ "\u001b[?25h\u001b[?25h\u001b[90m# A tibble: 18 × 7\u001b[39m\n",
" id id_color name group geom palette options \n",
" \u001b[3m\u001b[90m\u001b[39m\u001b[23m \u001b[3m\u001b[90m\u001b[39m\u001b[23m \u001b[3m\u001b[90m\u001b[39m\u001b[23m \u001b[3m\u001b[90m\u001b[39m\u001b[23m \u001b[3m\u001b[90m\u001b[39m\u001b[23m \u001b[3m\u001b[90m\u001b[39m\u001b[23m \u001b[3m\u001b[90m\u001b[39m\u001b[23m \n",
"\u001b[90m 1\u001b[39m method_name \u001b[31mNA\u001b[39m \u001b[90m\"\u001b[39mName\u001b[90m\"\u001b[39m meth… text \u001b[31mNA\u001b[39m \u001b[90m\u001b[39m\n",
@@ -2258,16 +3060,30 @@
"\u001b[90m 4\u001b[39m S2 S2 \u001b[90m\"\u001b[39mS2\u001b[90m\"\u001b[39m metr… funk… metric… \u001b[90m\u001b[39m\n",
"\u001b[90m 5\u001b[39m static-theta-0.0 static-theta-0.0 \u001b[90m\"\u001b[39mTheta (m… metr… funk… metric… \u001b[90m\u001b[39m\n",
"\u001b[90m 6\u001b[39m static-theta-0.5 static-theta-0.5 \u001b[90m\"\u001b[39mTheta (m… metr… funk… metric… \u001b[90m\u001b[39m\n",
- "\u001b[90m 7\u001b[39m memory_log \u001b[31mNA\u001b[39m \u001b[90m\"\u001b[39mPeak mem… reso… rect resour… \u001b[90m\u001b[39m\n",
- "\u001b[90m 8\u001b[39m memory_str \u001b[31mNA\u001b[39m \u001b[90m\"\u001b[39m\u001b[90m\"\u001b[39m reso… text \u001b[31mNA\u001b[39m \u001b[90m\u001b[39m\n",
- "\u001b[90m 9\u001b[39m duration_log \u001b[31mNA\u001b[39m \u001b[90m\"\u001b[39mDuration… reso… rect resour… \u001b[90m\u001b[39m\n",
- "\u001b[90m10\u001b[39m duration_str \u001b[31mNA\u001b[39m \u001b[90m\"\u001b[39m\u001b[90m\"\u001b[39m reso… text \u001b[31mNA\u001b[39m \u001b[90m\u001b[39m\n",
+ "\u001b[90m 7\u001b[39m static-theta-1.0 static-theta-1.0 \u001b[90m\"\u001b[39mTheta (m… metr… funk… metric… \u001b[90m\u001b[39m\n",
+ "\u001b[90m 8\u001b[39m op op \u001b[90m\"\u001b[39mOPSCA\u001b[90m\"\u001b[39m data… funk… dataset \u001b[90m\u001b[39m\n",
+ "\u001b[90m 9\u001b[39m adamson adamson \u001b[90m\"\u001b[39mAdamson\u001b[90m\"\u001b[39m data… funk… dataset \u001b[90m\u001b[39m\n",
+ "\u001b[90m10\u001b[39m nakatake nakatake \u001b[90m\"\u001b[39mNakatake\u001b[90m\"\u001b[39m data… funk… dataset \u001b[90m\u001b[39m\n",
+ "\u001b[90m11\u001b[39m norman norman \u001b[90m\"\u001b[39mNorman\u001b[90m\"\u001b[39m data… funk… dataset \u001b[90m\u001b[39m\n",
+ "\u001b[90m12\u001b[39m replogle2 replogle2 \u001b[90m\"\u001b[39mReplogle\u001b[90m\"\u001b[39m data… funk… dataset \u001b[90m\u001b[39m\n",
+ "\u001b[90m13\u001b[39m memory_log \u001b[31mNA\u001b[39m \u001b[90m\"\u001b[39mPeak mem… reso… rect resour… \u001b[90m\u001b[39m\n",
+ "\u001b[90m14\u001b[39m memory_str \u001b[31mNA\u001b[39m \u001b[90m\"\u001b[39m\u001b[90m\"\u001b[39m reso… text \u001b[31mNA\u001b[39m \u001b[90m\u001b[39m\n",
+ "\u001b[90m15\u001b[39m duration_log \u001b[31mNA\u001b[39m \u001b[90m\"\u001b[39mDuration… reso… rect resour… \u001b[90m\u001b[39m\n",
+ "\u001b[90m16\u001b[39m duration_str \u001b[31mNA\u001b[39m \u001b[90m\"\u001b[39m\u001b[90m\"\u001b[39m reso… text \u001b[31mNA\u001b[39m \u001b[90m\u001b[39m\n",
+ "\u001b[90m17\u001b[39m complexity_log \u001b[31mNA\u001b[39m \u001b[90m\"\u001b[39mComplexi… reso… rect resour… \u001b[90m\u001b[39m\n",
+ "\u001b[90m18\u001b[39m Complexity \u001b[31mNA\u001b[39m \u001b[90m\"\u001b[39m\u001b[90m\"\u001b[39m reso… text \u001b[31mNA\u001b[39m \u001b[90m\u001b[39m\n",
"\u001b[?25h\u001b[?25h\u001b[?25h\u001b[?25h\u001b[36mℹ\u001b[39m Could not find column 'id' in data. Using rownames as 'id'.\n",
"\u001b[36mℹ\u001b[39m Column info did not contain a column called 'legend', generating options based on the 'geom' column.\n",
"\u001b[36mℹ\u001b[39m No row info was provided, assuming all rows in `data` are to be plotted.\n",
"\u001b[36mℹ\u001b[39m Row info did not contain group information, assuming rows are ungrouped.\n",
+ "\u001b[36mℹ\u001b[39m Palette named 'dataset' was not defined. Assuming palette is numerical. Automatically selected palette 'Blues'.\n",
+ "\u001b[36mℹ\u001b[39m Some palettes were not used in the column info, adding legends for them.\n",
"\u001b[36mℹ\u001b[39m Legend 1 did not contain color, inferring from the palette.\n",
"\u001b[36mℹ\u001b[39m Legend 2 did not contain color, inferring from the palette.\n",
+ "\u001b[36mℹ\u001b[39m Legend 6 did not contain a geom, inferring from the column info.\n",
+ "\u001b[36mℹ\u001b[39m Legend 6 did not contain labels, inferring from the geom.\n",
+ "\u001b[36mℹ\u001b[39m Legend 6 did not contain size, inferring from the labels.\n",
+ "\u001b[36mℹ\u001b[39m Legend 6 did not contain color, inferring from the palette.\n",
"\u001b[?25h\u001b[?25h\u001b[?25h"
]
}
@@ -2277,17 +3093,19 @@
"summary_file = \"output/summary.tsv\"\n",
"summary_figure = \"output/summary_figure.pdf\"\n",
"\n",
- "df_all['memory_log'] = np.log(df_all['Peak memory (GB)']+1)\n",
- "df_all['memory_log'] = np.max(df_all['memory_log'])-df_all['memory_log']\n",
+ "df_summary['memory_log'] = np.log(df_summary['Peak memory (GB)']+1)\n",
+ "df_summary['memory_log'] = np.max(df_summary['memory_log'])-df_summary['memory_log']\n",
"\n",
+ "df_summary['complexity_log'] = np.log(df_summary['Complexity']+1)\n",
+ "df_summary['complexity_log'] = np.max(df_summary['complexity_log'])-df_summary['complexity_log']\n",
"\n",
- "df_all[\"duration_log\"] = np.log(df_all['Duration (hour)']+1)\n",
- "df_all['duration_log'] = np.max(df_all['duration_log'])-df_all['duration_log']\n",
+ "df_summary[\"duration_log\"] = np.log(df_summary['Duration (hour)']+1)\n",
+ "df_summary['duration_log'] = np.max(df_summary['duration_log'])-df_summary['duration_log']\n",
"\n",
- "df_all[\"duration_str\"] = df_all['Duration (hour)'].round(1).astype(str)\n",
- "df_all['memory_str'] = df_all['Peak memory (GB)'].round(1).astype(str)\n",
+ "df_summary[\"duration_str\"] = df_summary['Duration (hour)'].round(1).astype(str)\n",
+ "df_summary['memory_str'] = df_summary['Peak memory (GB)'].round(1).astype(str)\n",
"\n",
- "df_all.to_csv(summary_file, sep='\\t')\n",
+ "df_summary.to_csv(summary_file, sep='\\t')\n",
"\n",
"!Rscript ../grn_benchmark/src/summary_figure.R {summary_file} {summary_figure}"
]
diff --git a/src/api/file_evaluation_h5ad.yaml b/src/api/file_evaluation_h5ad.yaml
index 6376cff6..8ccc8d82 100644
--- a/src/api/file_evaluation_h5ad.yaml
+++ b/src/api/file_evaluation_h5ad.yaml
@@ -17,11 +17,11 @@ info:
- name: donor_id
type: string
description: "Donor id"
- required: true
+ required: false
- name: perturbation_type
type: string
description: "Name of the column indicating perturbation type"
- required: true
+ required: false
layers:
- name: X_norm
diff --git a/src/process_data/test_data/script.py b/src/process_data/test_data/script.py
index 5ad5e8b1..4665fa93 100644
--- a/src/process_data/test_data/script.py
+++ b/src/process_data/test_data/script.py
@@ -21,11 +21,11 @@
'perturbation_data': 'resources/evaluation_datasets/op_perturbation.h5ad',
'perturbation_data_test': 'resources_test/evaluation_datasets/op_perturbation.h5ad',
- 'multiomics_counts': 'resources/datasets_raw/op_multiome_counts.h5ad',
- 'multiomics_counts_test': 'resources_test/datasets_raw/op_multiome_counts.h5ad',
+ 'multiomics_counts': 'resources/datasets_raw/op_multiome_sc_counts.h5ad',
+ 'multiomics_counts_test': 'resources_test/datasets_raw/op_multiome_sc_counts.h5ad',
- 'perturbation_counts': 'resources/datasets_raw/op_perturbation_counts.h5ad',
- 'perturbation_counts_test': 'resources_test/datasets_raw/op_perturbation_counts.h5ad',
+ 'perturbation_counts': 'resources/datasets_raw/op_perturbation_sc_counts.h5ad',
+ 'perturbation_counts_test': 'resources_test/datasets_raw/op_perturbation_sc_counts.h5ad',
}
## VIASH END