diff --git a/runs.ipynb b/runs.ipynb
index ace7c58df..df7e78d1d 100644
--- a/runs.ipynb
+++ b/runs.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 99,
"metadata": {},
"outputs": [],
"source": [
@@ -2154,117 +2154,100 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 92,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_247612/1517971497.py:5: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
- " adata_df.groupby(['cell_type','donor_id']).apply(lambda df: create_meta_cells(df) )\n",
- "/tmp/ipykernel_247612/1517971497.py:5: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
- " adata_df.groupby(['cell_type','donor_id']).apply(lambda df: create_meta_cells(df) )\n"
+ "/tmp/ipykernel_247612/3835466291.py:11: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+ " df = adata_df.groupby(['cell_type','donor_id']).apply(lambda df: create_meta_cells(df))\n",
+ "/root/anaconda3/envs/py10/lib/python3.10/site-packages/anndata/_core/aligned_df.py:67: ImplicitModificationWarning: Transforming to str index.\n",
+ " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n"
]
- },
+ }
+ ],
+ "source": [
+ "def create_meta_cells(df, n_cells=15):\n",
+ " meta_x = []\n",
+ " for i in range(0, df.shape[0], n_cells):\n",
+ " meta_x.append(df.iloc[i:i+n_cells, :].sum(axis=0).values)\n",
+ " df = pd.DataFrame(meta_x, columns=df.columns)\n",
+ " return df\n",
+ " \n",
+ "adata_df = pd.DataFrame(multiomics_rna.X.todense(), columns=multiomics_rna.var_names)\n",
+ "adata_df['cell_type'] = multiomics_rna.obs['cell_type'].values\n",
+ "adata_df['donor_id'] = multiomics_rna.obs['donor_id'].values\n",
+ "df = adata_df.groupby(['cell_type','donor_id']).apply(lambda df: create_meta_cells(df))\n",
+ "X = df.values\n",
+ "var = pd.DataFrame(index=df.columns)\n",
+ "obs = df.reset_index()[['cell_type','donor_id']]\n",
+ "adata = ad.AnnData(X=X, obs=obs, var=var)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 98,
+ "metadata": {},
+ "outputs": [
{
"data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " location | \n",
- " AL627309.1 | \n",
- " AL627309.5 | \n",
- " AL627309.4 | \n",
- " LINC01409 | \n",
- " LINC01128 | \n",
- " LINC00115 | \n",
- " FAM41C | \n",
- " AL645608.6 | \n",
- " SAMD11 | \n",
- " NOC2L | \n",
- " ... | \n",
- " AC145212.1 | \n",
- " MAFIP | \n",
- " AC011043.1 | \n",
- " AL354822.1 | \n",
- " AL592183.1 | \n",
- " AC240274.1 | \n",
- " AC004556.3 | \n",
- " AC007325.4 | \n",
- " cell_type | \n",
- " donor_id | \n",
- "
\n",
- " \n",
- " cell_type | \n",
- " donor_id | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- "
\n",
- "
0 rows × 22789 columns
\n",
- "
"
- ],
"text/plain": [
- "Empty DataFrame\n",
- "Columns: [AL627309.1, AL627309.5, AL627309.4, LINC01409, LINC01128, LINC00115, FAM41C, AL645608.6, SAMD11, NOC2L, KLHL17, PLEKHN1, HES4, ISG15, AL645608.1, AGRN, C1orf159, AL390719.3, LINC01342, TTLL10, TNFRSF18, TNFRSF4, SDF4, B3GALT6, C1QTNF12, UBE2J2, LINC01786, SCNN1D, ACAP3, PUSL1, INTS11, CPTP, TAS1R3, DVL1, MXRA8, AURKAIP1, CCNL2, MRPL20-AS1, MRPL20, AL391244.2, ATAD3C, ATAD3B, ATAD3A, TMEM240, SSU72, AL645728.1, FNDC10, AL691432.4, AL691432.2, MIB2, MMP23B, CDK11B, FO704657.1, SLC35E2B, CDK11A, SLC35E2A, NADK, GNB1, AL109917.1, CFAP74, PRKCZ, AL590822.2, PRKCZ-AS1, FAAP20, AL590822.1, SKI, AL590822.3, MORN1, AL513477.2, RER1, PEX10, PLCH2, PANK4, AL139246.5, TNFRSF14-AS1, TNFRSF14, AL139246.3, PRXL2B, MMEL1, TTC34, PRDM16, MEGF6, AL513320.1, TPRG1L, WRAP73, TP73, CCDC27, SMIM1, LRRC47, CEP104, DFFB, C1orf174, LINC01134, AL805961.1, AJAP1, AL365255.1, NPHP4, KCNAB2, CHD5, RPL22, ...]\n",
- "Index: []\n",
- "\n",
- "[0 rows x 22789 columns]"
+ "0.9265477230460478"
]
},
- "execution_count": 37,
+ "execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "def create_meta_cells(df, n_cells=15):\n",
- " meta_x = []\n",
- " for i in range(0, df.shape[0], n_cells):\n",
- " meta_x.append(df.iloc[i:n_cells, :].sum(axis=0))\n",
- " \n",
- "adata_df = pd.DataFrame(multiomics_rna.X, columns=multiomics_rna.var_names)\n",
- "adata_df[['cell_type','donor_id']] = multiomics_rna.obs[['cell_type', 'donor_id']]\n",
- "adata_df.groupby(['cell_type','donor_id']).apply(lambda df: create_meta_cells(df))"
+ "# (adata.X==0).sum()/adata.X.size\n",
+ "(multiomics_rna.X.todense()==0).sum()/multiomics_rna.X.todense().size"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 1., 1., 1., ..., 6., 3., 18.], dtype=float32)"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# multiomics_rna = ad.read('resources/grn-benchmark/multiomics_rna.h5ad')\n",
+ "# multiomics_rna.X.data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAGeCAYAAAA0WWMxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAioUlEQVR4nO3df3BU1f3/8dcKsiSYBPHHbiIRol1QQfwBGgnW0EriIFAd1KpRB3/QgQlWUkaRNKMsHd1AamPUCB0oYPwRcapinVo1sdWozagBpVVQUAkQKmtGG5JoMonC+f7hl/24JFZusnuS3TwfM2eme+7Z3fflsPLq2bv3uIwxRgAAAJYc1dcFAACAgYXwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALBqcF8XcLiDBw/qs88+U1JSklwuV1+XAwAAjoAxRq2trUpLS9NRR/3I2oZx4JtvvjFFRUVm9OjRZujQoSYjI8MsW7bMHDhwIDTm4MGDZunSpSY1NdUMHTrUZGdnmw8++OCI36OhocFIotFoNBqNFoOtoaHhR/+td7TysWLFCv3xj39URUWFxo0bp02bNummm25SSkqKFi5cKEkqKSlRaWmpHnnkEY0ZM0b33HOPcnJytH37diUlJf3oexwa09DQoOTkZCflAQCAPtLS0qL09PQj+rfe5WRjuZkzZ8rj8Wjt2rWhviuuuEKJiYl67LHHZIxRWlqaCgoKdOedd0qSOjo65PF4tGLFCs2bN++Iik9JSVFzczPhAwCAGOHk329HF5xeeOGF+vvf/64dO3ZIkv71r3/pzTff1KWXXipJqq+vVzAYVG5ubug5brdb2dnZqq2t7fY1Ozo61NLSEtYAAED8cvS1y5133qnm5maddtppGjRokA4cOKB7771X1157rSQpGAxKkjweT9jzPB6Pdu/e3e1rFhcXa9myZT2pHQAAxCBHKx9PPfWUHn/8cVVWVurdd99VRUWF7rvvPlVUVISNO/xXKsaYH/zlSmFhoZqbm0OtoaHB4SkAAIBY4mjl44477tCSJUt0zTXXSJLOPPNM7d69W8XFxZozZ468Xq+k71ZAUlNTQ89rbGzsshpyiNvtltvt7mn9AAAgxjha+Whra+vy291Bgwbp4MGDkqSMjAx5vV5VV1eHjnd2dqqmpkZZWVkRKBcAAMQ6Rysfs2bN0r333quTTz5Z48aN03vvvafS0lLdfPPNkr77uqWgoECBQEA+n08+n0+BQECJiYnKy8uLygkAAIDY4ih8PPTQQ7rrrruUn5+vxsZGpaWlad68ebr77rtDYxYvXqz29nbl5+erqalJmZmZqqqqOqLf/QIAgPjn6D4fNnCfDwAAYk/U7vMBAADQW4QPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGCVo/t8AOhfRi95oUvfruUz+qASADhyrHwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAqsF9XQDwfaOXvBD2eNfyGX1USfw4/M9U4s8VQN9i5QMAAFhF+AAAAFYRPgAAgFWEDwAAYBUXnAJxprsLTAGgP3G08jF69Gi5XK4ubcGCBZIkY4z8fr/S0tKUkJCgqVOnauvWrVEpHAAAxCZH4aOurk779u0LterqaknSVVddJUkqKSlRaWmpysvLVVdXJ6/Xq5ycHLW2tka+cgAAEJMchY8TTjhBXq831P7617/q1FNPVXZ2towxKisrU1FRkWbPnq3x48eroqJCbW1tqqys/MHX7OjoUEtLS1gDAADxq8cXnHZ2durxxx/XzTffLJfLpfr6egWDQeXm5obGuN1uZWdnq7a29gdfp7i4WCkpKaGWnp7e05IAAEAM6HH4eO6557R//37deOONkqRgMChJ8ng8YeM8Hk/oWHcKCwvV3Nwcag0NDT0tCQAAxIAe/9pl7dq1mj59utLS0sL6XS5X2GNjTJe+73O73XK73T0tAwAAxJgerXzs3r1br7zyiubOnRvq83q9ktRllaOxsbHLaggAABi4ehQ+1q9frxNPPFEzZvzf5lQZGRnyer2hX8BI310XUlNTo6ysrN5XCgAA4oLjr10OHjyo9evXa86cORo8+P+e7nK5VFBQoEAgIJ/PJ5/Pp0AgoMTEROXl5UW0aAAAELsch49XXnlFe/bs0c0339zl2OLFi9Xe3q78/Hw1NTUpMzNTVVVVSkpKikixAAAg9jkOH7m5uTLGdHvM5XLJ7/fL7/f3ti4AABCn2FgOAABYxcZy6Ne62yRt1/IZ3YwEAMQKVj4AAIBVhA8AAGAV4QMAAFhF+AAAAFZxwSkQQ7q7ABcAYg0rHwAAwCrCBwAAsIrwAQAArCJ8AAAAq7jgFHHp8Asz+9tdUblzK4CBjJUPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWOU4fPznP//R9ddfr+OOO06JiYk6++yztXnz5tBxY4z8fr/S0tKUkJCgqVOnauvWrREtGgAAxC5H4aOpqUlTpkzR0UcfrRdffFHbtm3TH/7wBw0fPjw0pqSkRKWlpSovL1ddXZ28Xq9ycnLU2toa6doBAEAMGuxk8IoVK5Senq7169eH+kaPHh3638YYlZWVqaioSLNnz5YkVVRUyOPxqLKyUvPmzYtM1QAAIGY5Wvl4/vnnNWnSJF111VU68cQTdc4552jNmjWh4/X19QoGg8rNzQ31ud1uZWdnq7a2ttvX7OjoUEtLS1gDAADxy1H42Llzp1atWiWfz6eXX35Z8+fP12233aZHH31UkhQMBiVJHo8n7Hkejyd07HDFxcVKSUkJtfT09J6cBwAAiBGOwsfBgwd17rnnKhAI6JxzztG8efP0q1/9SqtWrQob53K5wh4bY7r0HVJYWKjm5uZQa2hocHgKAAAgljgKH6mpqTrjjDPC+k4//XTt2bNHkuT1eiWpyypHY2Njl9WQQ9xut5KTk8MaAACIX47Cx5QpU7R9+/awvh07dmjUqFGSpIyMDHm9XlVXV4eOd3Z2qqamRllZWREoFwAAxDpHv3b5zW9+o6ysLAUCAf3yl7/UO++8o9WrV2v16tWSvvu6paCgQIFAQD6fTz6fT4FAQImJicrLy4vKCQAAgNjiKHycd9552rhxowoLC/W73/1OGRkZKisr03XXXRcas3jxYrW3tys/P19NTU3KzMxUVVWVkpKSIl48EE9GL3kh7PGu5TP6qBIAiC5H4UOSZs6cqZkzZ/7gcZfLJb/fL7/f35u6AABAnGJvFwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWOb7PB9DfHH5zrngRr+cFAKx8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKziJmOIOdx8CwBiGysfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMCqwX1dAAau0Ute6OsSAAB9wNHKh9/vl8vlCmterzd03Bgjv9+vtLQ0JSQkaOrUqdq6dWvEiwYAALHL8dcu48aN0759+0Lt/fffDx0rKSlRaWmpysvLVVdXJ6/Xq5ycHLW2tka0aAAAELsch4/BgwfL6/WG2gknnCDpu1WPsrIyFRUVafbs2Ro/frwqKirU1tamysrKiBcOAABik+Pw8fHHHystLU0ZGRm65pprtHPnTklSfX29gsGgcnNzQ2Pdbreys7NVW1v7g6/X0dGhlpaWsAYAAOKXo/CRmZmpRx99VC+//LLWrFmjYDCorKwsffnllwoGg5Ikj8cT9hyPxxM61p3i4mKlpKSEWnp6eg9OAwAAxApH4WP69Om64oordOaZZ2ratGl64YXvfq1QUVERGuNyucKeY4zp0vd9hYWFam5uDrWGhgYnJQEAgBjTq/t8DBs2TGeeeaY+/vjj0K9eDl/laGxs7LIa8n1ut1vJyclhDQAAxK9ehY+Ojg59+OGHSk1NVUZGhrxer6qrq0PHOzs7VVNTo6ysrF4XCgAA4oOjm4zdfvvtmjVrlk4++WQ1NjbqnnvuUUtLi+bMmSOXy6WCggIFAgH5fD75fD4FAgElJiYqLy8vWvUDAIAY4yh87N27V9dee62++OILnXDCCbrgggv01ltvadSoUZKkxYsXq729Xfn5+WpqalJmZqaqqqqUlJQUleIBAEDscRQ+NmzY8D+Pu1wu+f1++f3+3tQEAADiGBvLAQAAqwgfAADAKna1BSKM3XoB4H9j5QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFTcZQ0QcfmOtXctn9FElAID+jpUPAABgFeEDAABYRfgAAABWET4AAIBVXHAKa+J1t9d4PS8AiBZWPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFaxsRyigs3WAAA/hJUPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGBVr8JHcXGxXC6XCgoKQn3GGPn9fqWlpSkhIUFTp07V1q1be1snAACIEz0OH3V1dVq9erUmTJgQ1l9SUqLS0lKVl5errq5OXq9XOTk5am1t7XWxAAAg9vUofHz11Ve67rrrtGbNGh177LGhfmOMysrKVFRUpNmzZ2v8+PGqqKhQW1ubKisru32tjo4OtbS0hDUAABC/ehQ+FixYoBkzZmjatGlh/fX19QoGg8rNzQ31ud1uZWdnq7a2ttvXKi4uVkpKSqilp6f3pCSg10YveaFLAwBEnuPwsWHDBr377rsqLi7uciwYDEqSPB5PWL/H4wkdO1xhYaGam5tDraGhwWlJAAAghji6vXpDQ4MWLlyoqqoqDR069AfHuVyusMfGmC59h7jdbrndbidlAACAGOZo5WPz5s1qbGzUxIkTNXjwYA0ePFg1NTV68MEHNXjw4NCKx+GrHI2NjV1WQwAAwMDkKHxcfPHFev/997Vly5ZQmzRpkq677jpt2bJFp5xyirxer6qrq0PP6ezsVE1NjbKysiJePAAAiD2OvnZJSkrS+PHjw/qGDRum4447LtRfUFCgQCAgn88nn8+nQCCgxMRE5eXlRa5qAL1y+MW0u5bP6KNKAAxEjsLHkVi8eLHa29uVn5+vpqYmZWZmqqqqSklJSZF+KwAAEIN6HT5ee+21sMcul0t+v19+v7+3Lw0AAOIQe7sAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwKuIbyyH+Hb4jKgAATrDyAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCKm4xhQOjuxmi7ls/og0oAAKx8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKziJmNxgptoAQBiBSsfAADAKsIHAACwivABAACsInwAAACruOAUA1Z3F+kCAKLP0crHqlWrNGHCBCUnJys5OVmTJ0/Wiy++GDpujJHf71daWpoSEhI0depUbd26NeJFAwCA2OUofIwcOVLLly/Xpk2btGnTJv385z/XZZddFgoYJSUlKi0tVXl5uerq6uT1epWTk6PW1taoFA8AAGKPo/Axa9YsXXrppRozZozGjBmje++9V8ccc4zeeustGWNUVlamoqIizZ49W+PHj1dFRYXa2tpUWVn5g6/Z0dGhlpaWsAYAAOJXj6/5OHDggP785z/r66+/1uTJk1VfX69gMKjc3NzQGLfbrezsbNXW1mrevHndvk5xcbGWLVvW0zIARAA3qQNgk+Nfu7z//vs65phj5Ha7NX/+fG3cuFFnnHGGgsGgJMnj8YSN93g8oWPdKSwsVHNzc6g1NDQ4LQkAAMQQxysfY8eO1ZYtW7R//34988wzmjNnjmpqakLHXS5X2HhjTJe+73O73XK73U7LAAAAMcrxyseQIUP0k5/8RJMmTVJxcbHOOussPfDAA/J6vZLUZZWjsbGxy2oIAAAYuHp9kzFjjDo6OpSRkSGv16vq6urQsc7OTtXU1CgrK6u3bwMAAOKEo69dfvvb32r69OlKT09Xa2urNmzYoNdee00vvfSSXC6XCgoKFAgE5PP55PP5FAgElJiYqLy8vGjVD1jFjckAoPcchY/PP/9cN9xwg/bt26eUlBRNmDBBL730knJyciRJixcvVnt7u/Lz89XU1KTMzExVVVUpKSkpKsUDAIDY4yh8rF279n8ed7lc8vv98vv9vakJAADEMTaWAwAAVrGx3AByJDeS4poGAEC0sfIBAACsInwAAACrCB8AAMAqwgcAALCKC077ocMv+mR3UQBAPGHlAwAAWEX4AAAAVhE+AACAVVzzAfwP3HQNACKPlQ8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFXc4jQHd3WUzUjvdcgdPAIBtrHwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrHIWP4uJinXfeeUpKStKJJ56oyy+/XNu3bw8bY4yR3+9XWlqaEhISNHXqVG3dujWiRQMAgNjlKHzU1NRowYIFeuutt1RdXa1vv/1Wubm5+vrrr0NjSkpKVFpaqvLyctXV1cnr9SonJ0etra0RLx4AAMSewU4Gv/TSS2GP169frxNPPFGbN2/WRRddJGOMysrKVFRUpNmzZ0uSKioq5PF4VFlZqXnz5kWucgAAEJN6dc1Hc3OzJGnEiBGSpPr6egWDQeXm5obGuN1uZWdnq7a2ttvX6OjoUEtLS1gDAADxy9HKx/cZY7Ro0SJdeOGFGj9+vCQpGAxKkjweT9hYj8ej3bt3d/s6xcXFWrZsWU/LABAlo5e8EPZ41/IZfVQJgHjT45WPW2+9Vf/+97/15JNPdjnmcrnCHhtjuvQdUlhYqObm5lBraGjoaUkAACAG9Gjl49e//rWef/55vf766xo5cmSo3+v1SvpuBSQ1NTXU39jY2GU15BC32y23292TMgAAQAxytPJhjNGtt96qZ599Vv/4xz+UkZERdjwjI0Ner1fV1dWhvs7OTtXU1CgrKysyFQMAgJjmaOVjwYIFqqys1F/+8hclJSWFrvFISUlRQkKCXC6XCgoKFAgE5PP55PP5FAgElJiYqLy8vKicQKyx+T364e8FAEB/4Ch8rFq1SpI0derUsP7169frxhtvlCQtXrxY7e3tys/PV1NTkzIzM1VVVaWkpKSIFAwAAGKbo/BhjPnRMS6XS36/X36/v6c1AQCAOMbeLgAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKxytLcL+g92rAUAxCpWPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYP7uoCBbvSSF/q6BAAArGLlAwAAWEX4AAAAVhE+AACAVYQPAABgFRecRhAXjwIA8OMcr3y8/vrrmjVrltLS0uRyufTcc8+FHTfGyO/3Ky0tTQkJCZo6daq2bt0aqXoBAECMcxw+vv76a5111lkqLy/v9nhJSYlKS0tVXl6uuro6eb1e5eTkqLW1tdfFAgCA2Of4a5fp06dr+vTp3R4zxqisrExFRUWaPXu2JKmiokIej0eVlZWaN29e76oFAAAxL6IXnNbX1ysYDCo3NzfU53a7lZ2drdra2m6f09HRoZaWlrAGAADiV0TDRzAYlCR5PJ6wfo/HEzp2uOLiYqWkpIRaenp6JEsCAAD9TFR+autyucIeG2O69B1SWFio5ubmUGtoaIhGSQAAoJ+I6E9tvV6vpO9WQFJTU0P9jY2NXVZDDnG73XK73ZEsAwAA9GMRXfnIyMiQ1+tVdXV1qK+zs1M1NTXKysqK5FsBAIAY5Xjl46uvvtInn3wSelxfX68tW7ZoxIgROvnkk1VQUKBAICCfzyefz6dAIKDExETl5eVFtHAAABCbHIePTZs26Wc/+1no8aJFiyRJc+bM0SOPPKLFixervb1d+fn5ampqUmZmpqqqqpSUlBS5qgEAQMxyGWNMXxfxfS0tLUpJSVFzc7OSk5P7uhxHuL064tmu5TP6ugQA/ZiTf7/ZWA4AAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVjjeWw3fYxwUAgJ5h5QMAAFhF+AAAAFYRPgAAgFVc8wGgxw6/9mnX8hl9VAmAWMLKBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALBqwN3h9Eh2o+UujUBX7OQcHd39ufLfIMQ7Vj4AAIBVhA8AAGAV4QMAAFg14K75OBJ8tw0AQPSw8gEAAKwifAAAAKsIHwAAwCrCBwAAsIoLTgFY1dc31err94+WeD2v7hx+rt2d55GMiUU9Oa/++HcjaisfK1euVEZGhoYOHaqJEyfqjTfeiNZbAQCAGBKV8PHUU0+poKBARUVFeu+99/TTn/5U06dP1549e6LxdgAAIIZE5WuX0tJS3XLLLZo7d64kqaysTC+//LJWrVql4uLisLEdHR3q6OgIPW5ubpYktbS0RKM0Hexoi8rrAjiyz213n8Fofd774/sfLlL19LfziqbDz7W78zySMbGoJ+dl6+/Godc0xvz4YBNhHR0dZtCgQebZZ58N67/tttvMRRdd1GX80qVLjSQajUaj0Whx0BoaGn40K0R85eOLL77QgQMH5PF4wvo9Ho+CwWCX8YWFhVq0aFHo8cGDB/Xf//5Xxx13nFwuV0RqamlpUXp6uhoaGpScnByR10TvMCf9D3PSPzEv/Q9z0j1jjFpbW5WWlvajY6P2a5fDg4Mxptsw4Xa75Xa7w/qGDx8elZqSk5P5i9LPMCf9D3PSPzEv/Q9z0lVKSsoRjYv4BafHH3+8Bg0a1GWVo7GxsctqCAAAGHgiHj6GDBmiiRMnqrq6Oqy/urpaWVlZkX47AAAQY6LytcuiRYt0ww03aNKkSZo8ebJWr16tPXv2aP78+dF4ux/ldru1dOnSLl/voO8wJ/0Pc9I/MS/9D3PSey5jjuQ3Mc6tXLlSJSUl2rdvn8aPH6/7779fF110UTTeCgAAxJCohQ8AAIDusLEcAACwivABAACsInwAAACrCB8AAMCquAkfK1euVEZGhoYOHaqJEyfqjTfe+MGxr732mlwuV5f20UcfWaw4/jmZE+m7TQaLioo0atQoud1unXrqqVq3bp2lagcGJ3Ny4403dvs5GTdunMWKBwann5UnnnhCZ511lhITE5WamqqbbrpJX375paVqBwanc/Lwww/r9NNPV0JCgsaOHatHH33UUqUxKgJ7yfW5DRs2mKOPPtqsWbPGbNu2zSxcuNAMGzbM7N69u9vxr776qpFktm/fbvbt2xdq3377reXK45fTOTHGmF/84hcmMzPTVFdXm/r6evP222+bf/7znxarjm9O52T//v1hn4+GhgYzYsQIs3TpUruFxzmn8/LGG2+Yo446yjzwwANm586d5o033jDjxo0zl19+ueXK45fTOVm5cqVJSkoyGzZsMJ9++ql58sknzTHHHGOef/55y5XHjrgIH+eff76ZP39+WN9pp51mlixZ0u34Q+GjqanJQnUDk9M5efHFF01KSor58ssvbZQ3IDmdk8Nt3LjRuFwus2vXrmiUN2A5nZff//735pRTTgnre/DBB83IkSOjVuNA43ROJk+ebG6//fawvoULF5opU6ZErcZYF/Nfu3R2dmrz5s3Kzc0N68/NzVVtbe3/fO4555yj1NRUXXzxxXr11VejWeaA0pM5ef755zVp0iSVlJTopJNO0pgxY3T77bervb3dRslxrzefk0PWrl2radOmadSoUdEocUDqybxkZWVp7969+tvf/iZjjD7//HM9/fTTmjFjho2S415P5qSjo0NDhw4N60tISNA777yjb775Jmq1xrKYDx9ffPGFDhw40GXTOo/H02Vzu0NSU1O1evVqPfPMM3r22Wc1duxYXXzxxXr99ddtlBz3ejInO3fu1JtvvqkPPvhAGzduVFlZmZ5++mktWLDARslxrydz8n379u3Tiy++qLlz50arxAGpJ/OSlZWlJ554QldffbWGDBkir9er4cOH66GHHrJRctzryZxccskl+tOf/qTNmzfLGKNNmzZp3bp1+uabb/TFF1/YKDvmRGVvl77gcrnCHhtjuvQdMnbsWI0dOzb0ePLkyWpoaNB9993HLeAjyMmcHDx4UC6XS0888URoS+bS0lJdeeWVevjhh5WQkBD1egcCJ3PyfY888oiGDx+uyy+/PEqVDWxO5mXbtm267bbbdPfdd+uSSy7Rvn37dMcdd2j+/Plau3atjXIHBCdzctdddykYDOqCCy6QMUYej0c33nijSkpKNGjQIBvlxpyYX/k4/vjjNWjQoC6JtLGxsUty/V8uuOACffzxx5Eub0DqyZykpqbqpJNOCgUPSTr99NNljNHevXujWu9A0JvPiTFG69at0w033KAhQ4ZEs8wBpyfzUlxcrClTpuiOO+7QhAkTdMkll2jlypVat26d9u3bZ6PsuNaTOUlISNC6devU1tamXbt2ac+ePRo9erSSkpJ0/PHH2yg75sR8+BgyZIgmTpyo6urqsP7q6mplZWUd8eu89957Sk1NjXR5A1JP5mTKlCn67LPP9NVXX4X6duzYoaOOOkojR46Mar0DQW8+JzU1Nfrkk090yy23RLPEAakn89LW1qajjgr/T/eh/3dt2Kqr13rzWTn66KM1cuRIDRo0SBs2bNDMmTO7zBX+vz660DWiDv0sau3atWbbtm2moKDADBs2LHRV/pIlS8wNN9wQGn///febjRs3mh07dpgPPvjALFmyxEgyzzzzTF+dQtxxOietra1m5MiR5sorrzRbt241NTU1xufzmblz5/bVKcQdp3NyyPXXX28yMzNtlztgOJ2X9evXm8GDB5uVK1eaTz/91Lz55ptm0qRJ5vzzz++rU4g7Tudk+/bt5rHHHjM7duwwb7/9trn66qvNiBEjTH19fR+dQf8XF+HDGGMefvhhM2rUKDNkyBBz7rnnmpqamtCxOXPmmOzs7NDjFStWmFNPPdUMHTrUHHvssebCCy80L7zwQh9UHd+czIkxxnz44Ydm2rRpJiEhwYwcOdIsWrTItLW1Wa46vjmdk/3795uEhASzevVqy5UOLE7n5cEHHzRnnHGGSUhIMKmpqea6664ze/futVx1fHMyJ9u2bTNnn322SUhIMMnJyeayyy4zH330UR9UHTtcxrBOBwAA7OHLKAAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFb9P5lDUwjY4DOeAAAAAElFTkSuQmCC",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "aa = plt.hist((df_==0).sum(axis=1)/df.shape[1], bins=100)"
]
},
{
@@ -2609,392 +2592,347 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 104,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "download: s3://openproblems-data/resources/grn/results/grn_evaluation_so_all_ridge/scores.yaml to resources/results/grn_evaluation_so_all_ridge/scores.yaml\n",
- "download: s3://openproblems-data/resources/grn/results/grn_evaluation_so_all_ridge/state.yaml to resources/results/grn_evaluation_so_all_ridge/state.yaml\n",
- "download: s3://openproblems-data/resources/grn/results/grn_evaluation_so_all_ridge/trace.txt to resources/results/grn_evaluation_so_all_ridge/trace.txt\n",
- "download: s3://openproblems-data/resources/grn/results/grn_evaluation_so_all_ridge/metric_configs.yaml to resources/results/grn_evaluation_so_all_ridge/metric_configs.yaml\n"
+ "download: s3://openproblems-data/resources/grn/results/grn_evaluation_all_ridge/scores.yaml to resources/results/grn_evaluation_all_ridge/scores.yaml\n",
+ "download: s3://openproblems-data/resources/grn/results/grn_evaluation_all_ridge/trace.txt to resources/results/grn_evaluation_all_ridge/trace.txt\n"
]
}
],
"source": [
- "!aws s3 sync s3://openproblems-data/resources/grn/results/grn_evaluation_so_all_ridge resources/results/grn_evaluation_so_all_ridge"
+ "!aws s3 sync s3://openproblems-data/resources/grn/results/grn_evaluation_all_ridge resources/results/grn_evaluation_all_ridge"
]
},
{
"cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "download: s3://openproblems-data/resources_test/grn/results/single_omics_try2/trace.txt to resources/results/single_omics_try2/trace.txt\n",
- "download: s3://openproblems-data/resources_test/grn/results/single_omics_try2/state.yaml to resources/results/single_omics_try2/state.yaml\n",
- "download: s3://openproblems-data/resources_test/grn/results/single_omics_try2/scores.yaml to resources/results/single_omics_try2/scores.yaml\n",
- "download: s3://openproblems-data/resources_test/grn/results/single_omics_try2/output/prediction.csv to resources/results/single_omics_try2/output/prediction.csv\n"
- ]
- }
- ],
- "source": [
- "!aws s3 sync s3://openproblems-data/resources_test/grn/results/single_omics_try2 resources/results/single_omics_try2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
+ "execution_count": 106,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
- "\n",
+ "\n",
" \n",
" \n",
" | \n",
- " ex(False)_tf(-1) | \n",
- " ex(True)_tf(-1) | \n",
- " static-theta-0.0 | \n",
- " static-theta-0.5 | \n",
- " Mean | \n",
+ " ex(False)_tf(-1) | \n",
+ " ex(True)_tf(-1) | \n",
+ " static-theta-0.0 | \n",
+ " static-theta-0.5 | \n",
"
\n",
" \n",
" \n",
" \n",
- " negative_control | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
+ " negative_control | \n",
+ " -0.035493 | \n",
+ " -0.034779 | \n",
+ " 0.379416 | \n",
+ " 0.504639 | \n",
+ "
\n",
+ " \n",
+ " baseline_pearson | \n",
+ " -0.100238 | \n",
+ " -0.211182 | \n",
+ " 0.489316 | \n",
+ " 0.514896 | \n",
+ "
\n",
+ " \n",
+ " baseline_dotproduct | \n",
+ " -0.100238 | \n",
+ " -0.211182 | \n",
+ " 0.489316 | \n",
+ " 0.514896 | \n",
+ "
\n",
+ " \n",
+ " baseline_pearson_causal | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " baseline_dotproduct_causal | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
"
\n",
" \n",
- " baseline_corr | \n",
- " 0.393125 | \n",
- " 0.408044 | \n",
- " 0.497401 | \n",
- " 0.749563 | \n",
- " 0.512033 | \n",
+ " baseline_dotproduct_causal_cell_type | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
"
\n",
" \n",
- " baseline_corr_causal | \n",
- " 0.726172 | \n",
- " 0.756289 | \n",
- " 0.636012 | \n",
- " 0.815551 | \n",
- " 0.733506 | \n",
+ " baseline_dotproduct_causal_metacell | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
"
\n",
" \n",
- " positive_control | \n",
- " 1.000000 | \n",
- " 1.000000 | \n",
- " 0.822805 | \n",
- " 1.000000 | \n",
- " 0.955701 | \n",
+ " positive_control | \n",
+ " 0.628303 | \n",
+ " 0.629964 | \n",
+ " 0.683244 | \n",
+ " 0.741396 | \n",
"
\n",
" \n",
- " collectri | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.589265 | \n",
- " 0.694495 | \n",
- " 0.320940 | \n",
+ " collectri | \n",
+ " -0.100238 | \n",
+ " -0.211182 | \n",
+ " 0.489316 | \n",
+ " 0.514896 | \n",
"
\n",
" \n",
- " granie | \n",
- " 0.171687 | \n",
- " 0.334464 | \n",
- " 0.429662 | \n",
- " 0.709483 | \n",
- " 0.411324 | \n",
+ " granie | \n",
+ " 0.108554 | \n",
+ " 0.209125 | \n",
+ " 0.356784 | \n",
+ " 0.526008 | \n",
"
\n",
" \n",
- " figr | \n",
- " 0.242269 | \n",
- " 0.343472 | \n",
- " 0.819839 | \n",
- " 0.763056 | \n",
- " 0.542159 | \n",
+ " figr | \n",
+ " 0.154044 | \n",
+ " 0.220225 | \n",
+ " 0.680781 | \n",
+ " 0.565727 | \n",
"
\n",
" \n",
- " celloracle | \n",
- " 0.327920 | \n",
- " 0.405112 | \n",
- " 0.520984 | \n",
- " 0.723681 | \n",
- " 0.494424 | \n",
+ " celloracle | \n",
+ " 0.208249 | \n",
+ " 0.258602 | \n",
+ " 0.432617 | \n",
+ " 0.536534 | \n",
"
\n",
" \n",
- " scglue | \n",
- " 0.372405 | \n",
- " 0.426923 | \n",
- " 0.975920 | \n",
- " 0.808295 | \n",
- " 0.645886 | \n",
+ " scglue | \n",
+ " 0.245670 | \n",
+ " 0.289934 | \n",
+ " 0.810389 | \n",
+ " 0.599267 | \n",
"
\n",
" \n",
- " scenicplus | \n",
- " 0.480744 | \n",
- " 0.623056 | \n",
- " 0.840685 | \n",
- " 0.805198 | \n",
- " 0.687421 | \n",
+ " scenicplus | \n",
+ " 0.301834 | \n",
+ " 0.392452 | \n",
+ " 0.698092 | \n",
+ " 0.596971 | \n",
"
\n",
" \n",
- " portia | \n",
- " 0.020074 | \n",
- " 0.049269 | \n",
- " 0.592261 | \n",
- " 0.725473 | \n",
- " 0.346769 | \n",
+ " portia | \n",
+ " 0.013737 | \n",
+ " 0.033267 | \n",
+ " 0.491804 | \n",
+ " 0.537863 | \n",
"
\n",
" \n",
- " ppcor | \n",
- " 0.047583 | \n",
- " 0.029518 | \n",
- " 0.270373 | \n",
- " 0.709920 | \n",
- " 0.264348 | \n",
+ " ppcor | \n",
+ " 0.027029 | \n",
+ " 0.018207 | \n",
+ " 0.224514 | \n",
+ " 0.526332 | \n",
"
\n",
" \n",
- " grnboost2 | \n",
- " 0.420662 | \n",
- " 0.679448 | \n",
- " 1.000000 | \n",
- " 0.788107 | \n",
- " 0.722054 | \n",
+ " grnboost2 | \n",
+ " 0.264538 | \n",
+ " 0.426411 | \n",
+ " 0.830384 | \n",
+ " 0.584299 | \n",
"
\n",
" \n",
- " genie3 | \n",
- " 0.331627 | \n",
- " 0.557145 | \n",
- " 0.996056 | \n",
- " 0.785269 | \n",
- " 0.667524 | \n",
+ " genie3 | \n",
+ " 0.200146 | \n",
+ " 0.335431 | \n",
+ " 0.827109 | \n",
+ " 0.582196 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 6,
+ "execution_count": 106,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "models_all = ['negative_control', 'baseline_corr', 'baseline_corr_causal', 'positive_control', 'collectri','granie', 'figr', 'celloracle', 'scglue', 'scenicplus', 'portia','ppcor', 'grnboost2', 'genie3']\n",
+ "models_all = ['negative_control', 'baseline_pearson', 'baseline_dotproduct', 'baseline_pearson_causal', 'baseline_dotproduct_causal', 'baseline_dotproduct_causal_cell_type', 'baseline_dotproduct_causal_metacell', 'positive_control', 'collectri','granie', 'figr', 'celloracle', 'scglue', 'scenicplus', 'portia','ppcor', 'grnboost2', 'genie3']\n",
"def extract_data(data, reg='reg1', dataset_id='scgen_pearson'):\n",
" i = 0\n",
" for entry in data:\n",
@@ -3019,7 +2957,7 @@
" return df_reg\n",
"import yaml\n",
"import pandas as pd\n",
- "base_folder = 'resources/results/grn_evaluation_so_all_ridge/'\n",
+ "base_folder = 'resources/results/grn_evaluation_all_ridge/'\n",
"\n",
"result_file = f'{base_folder}/scores.yaml'\n",
"with open(result_file, 'r') as file:\n",
@@ -3027,9 +2965,9 @@
"df_reg1 = extract_data(data, reg='reg1').reindex(models_all).drop(columns=['Mean'])\n",
"df_reg2 = extract_data(data, reg='reg2').reindex(models_all).drop(columns=['Mean'])\n",
"df_all = pd.concat([df_reg1, df_reg2], axis=1).fillna(0)\n",
- "df_all[df_all<0]=0\n",
- "df_all = (df_all-df_all.min(axis=0))/(df_all.max(axis=0)-df_all.min(axis=0))\n",
- "df_all['Mean'] = df_all.mean(axis=1)\n",
+ "# df_all[df_all<0]=0\n",
+ "# df_all = (df_all-df_all.min(axis=0))/(df_all.max(axis=0)-df_all.min(axis=0))\n",
+ "# df_all['Mean'] = df_all.mean(axis=1)\n",
"df_all.style.background_gradient()"
]
},
diff --git a/scripts/run_grn_evaluation.sh b/scripts/run_grn_evaluation.sh
index 44e506f2b..24d71eb88 100644
--- a/scripts/run_grn_evaluation.sh
+++ b/scripts/run_grn_evaluation.sh
@@ -13,7 +13,7 @@ grn_models_folder="${resources_dir}/grn_models"
subsample=-2
max_workers=10
layer=scgen_pearson
-metric_ids="[regression_1, regression_2]"
+metric_ids="[regression_1]"
param_file="./params/${RUN_ID}.yaml"
@@ -67,19 +67,14 @@ append_entry_control() {
causal: ${2}
corr_method: ${3}
prediction: ${resources_dir}/grn_models/collectri.csv
+ cell_type_specific: ${4}
+ metacell: ${5}
+ impute: ${6}
HERE
- if [ -n "$4" ]; then
- echo " cell_type_specific: ${4}" >> $param_file
- fi
- if [ -n "$5" ]; then
- echo " metacell: ${5}" >> $param_file
- fi
- if [ -n "$6" ]; then
- echo " impute: ${6}" >> $param_file
- fi
+
}
-# #Loop through grn_names and layers
+#Loop through grn_names and layers
# for grn_name in "${grn_names[@]}"; do
# append_entry "$grn_name"
# done
@@ -88,12 +83,12 @@ HERE
# append_entry_control "negative_control" "False" ""
# append_entry_control "positive_control" "False" ""
# append_entry_control "baseline_pearson" "False" "pearson"
-# append_entry_control "baseline_dotproduct" "False" "dotproduct"
+append_entry_control "baseline_dotproduct" "False" "dotproduct" "false" "false" "false"
# append_entry_control "baseline_pearson_causal" "True" "pearson"
-append_entry_control "baseline_dotproduct_causal" "True" "dotproduct"
+append_entry_control "baseline_dotproduct_causal" "True" "dotproduct" "false" "false" "false"
# append_entry_control "baseline_dotproduct_causal_cell_type" "True" "dotproduct" "true"
# append_entry_control "baseline_dotproduct_causal_metacell" "True" "dotproduct" "false" "true"
-append_entry_control "baseline_dotproduct_causal_impute" "True" "dotproduct" "false" "false" "true"
+# append_entry_control "baseline_dotproduct_causal_impute" "True" "dotproduct" "false" "false" "true"
# append_entry_control "baseline_corr_causal_spearman" "True" "spearman"
diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml
deleted file mode 100644
index 4e409d910..000000000
--- a/src/api/comp_control_method.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-
-functionality:
- namespace: "control_methods"
- info:
- type: control_method
- type_info:
- label: Control Method
- summary: A control method.
- description: |
- A control method to serve as a quality control for the GRN inference benchmark.
- arguments:
- - name: --layer
- type: string
- direction: input
- default: scgen_pearson
- description: Which layer of pertubation data to use to find tf-gene relationships.
- required: false
- - name: --prediction
- __merge__: file_prediction.yaml
- required: false
- direction: output
- - name: --tf_all
- type: file
- required: true
- direction: input
- example: resources_test/prior/tf_all.csv
-
-
-
- test_resources:
- - type: python_script
- path: /src/common/component_tests/run_and_check_output.py
- - path: /resources_test/grn-benchmark
- dest: resources_test/grn-benchmark
- - path: /resources_test/prior
- dest: resources_test/prior
\ No newline at end of file
diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml
index 56bf37867..764ed3c23 100644
--- a/src/api/comp_method.yaml
+++ b/src/api/comp_method.yaml
@@ -10,37 +10,38 @@ functionality:
arguments:
- name: --multiomics_rna
__merge__: file_multiomics_rna_h5ad.yaml
- required: false
+ required: true
direction: input
- default: resources/grn-benchmark/multiomics_rna.h5ad
- - name: --multiomics_atac
- __merge__: file_multiomics_atac_h5ad.yaml
- required: false
- direction: input
- must_exist: false
- default: resources/grn-benchmark/multiomics_atac.h5ad
+ example: resources_test/grn-benchmark/multiomics_rna.h5ad
- name: --prediction
__merge__: file_prediction.yaml
- required: false
+ required: true
direction: output
- example: output/prediction.csv
- default: output/prediction.csv
- - name: --temp_dir
- type: string
+ example: resources_test/grn_models/collectri.csv
+ - name: --tf_all
+ type: file
+ required: true
direction: input
- default: output/temdir
+ example: resources_test/prior/tf_all.csv
+ - name: --max_n_links
+ type: integer
+ default: 50000
- name: --num_workers
type: integer
direction: input
default: 4
- - name: --tf_all
- type: file
- example: resources/prior/tf_all.csv
- default: resources/prior/tf_all.csv
- required: false
- - name: --max_n_links
+ - name: --temp_dir
+ type: string
+ direction: input
+ default: output/temdir
+ - name: --seed
type: integer
- default: 50000
+ direction: input
+ default: 32
+
+
+
+
test_resources:
- type: python_script
diff --git a/src/api/comp_method_mo.yaml b/src/api/comp_method_mo.yaml
new file mode 100644
index 000000000..bd8e1c843
--- /dev/null
+++ b/src/api/comp_method_mo.yaml
@@ -0,0 +1,17 @@
+__merge__: comp_method.yaml
+
+functionality:
+ info:
+ type: methods
+ type_info:
+ label: Method
+ summary: A GRN inference method for multiomics grn
+ description: |
+ A method for inferring GRN from atac and rna data.
+ arguments:
+ - name: --multiomics_atac
+ __merge__: file_multiomics_atac_h5ad.yaml
+ required: true
+ direction: input
+ must_exist: false
+ example: resources_test/grn-benchmark/multiomics_atac.h5ad
diff --git a/src/api/comp_method_r.yaml b/src/api/comp_method_r.yaml
index 55ce2593f..10748d821 100644
--- a/src/api/comp_method_r.yaml
+++ b/src/api/comp_method_r.yaml
@@ -1,5 +1,5 @@
functionality:
- namespace: "methods_r"
+ namespace: "methods"
info:
type: methods_r
type_info:
diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml
index beb9b046c..99a468348 100644
--- a/src/api/comp_metric.yaml
+++ b/src/api/comp_metric.yaml
@@ -12,17 +12,19 @@ functionality:
__merge__: file_perturbation_h5ad.yaml
required: false
direction: input
- default: resources/grn-benchmark/perturbation_data.h5ad
- name: --prediction
__merge__: file_prediction.yaml
required: true
direction: input
-
- name: --score
__merge__: file_score.yaml
required: false
direction: output
- default: output/score.h5ad
+ - name: --tf_all
+ type: file
+ direction: input
+ required: true
+ example: resources_test/prior/tf_all.csv
- name: --reg_type
type: string
direction: input
@@ -43,11 +45,6 @@ functionality:
direction: input
required: false
example: collectri
- - name: --tf_all
- type: file
- direction: input
- example: resources_test/prior/tf_all.csv
- default: resources/prior/tf_all.csv
- name: --apply_tf
type: boolean
required: false
@@ -57,6 +54,11 @@ functionality:
required: false
default: true
description: clips the r2 scores for each gene to make them within [0, 1]
+ - name: --layer
+ type: string
+ direction: input
+ required: false
+ default: scgen_pearson
diff --git a/src/control_methods/baseline_corr/config.vsh.yaml b/src/control_methods/baseline_corr/config.vsh.yaml
index f3675be13..9ad3db556 100644
--- a/src/control_methods/baseline_corr/config.vsh.yaml
+++ b/src/control_methods/baseline_corr/config.vsh.yaml
@@ -1,24 +1,17 @@
-__merge__: ../../api/comp_control_method.yaml
+__merge__: ../../api/comp_method.yaml
functionality:
name: baseline_corr
+ namespace: control_methods
info:
label: baseline_corr
- summary: "Baseline based on Pearson corr"
+ summary: "Baseline based on correlation"
arguments:
- name: --causal
type: boolean
direction: input
default: false
- - name: --seed
- type: integer
- direction: input
- - name: --multiomics_rna
- type: file
- required: true
- direction: input
- example: resources_test/grn-benchmark/multiomics_rna.h5ad
- name: --corr_method
type: string
required: false
@@ -50,7 +43,8 @@ platforms:
image: ghcr.io/openproblems-bio/base_python:1.0.4
setup:
- type: python
- packages: [ magic-impute ]
+ # packages: [ magic-impute ]
+ packages: [ ]
- type: native
- type: nextflow
directives:
diff --git a/src/control_methods/baseline_corr/script.py b/src/control_methods/baseline_corr/script.py
index d08033528..760335b11 100644
--- a/src/control_methods/baseline_corr/script.py
+++ b/src/control_methods/baseline_corr/script.py
@@ -47,7 +47,7 @@ def create_corr_net(X: np.ndarray, groups: np.ndarray, method="pearson"):
return grn
print('Read data')
multiomics_rna = ad.read_h5ad(par["multiomics_rna"])
-# multiomics_rna = multiomics_rna[:,:2000] #TODO: togo
+multiomics_rna = multiomics_rna[:,:2000] #TODO: togo
if par['metacell']:
print('metacell')
diff --git a/src/control_methods/negative_control/config.vsh.yaml b/src/control_methods/negative_control/config.vsh.yaml
index 599844f50..8ff514e20 100644
--- a/src/control_methods/negative_control/config.vsh.yaml
+++ b/src/control_methods/negative_control/config.vsh.yaml
@@ -1,6 +1,7 @@
-__merge__: ../../api/comp_control_method.yaml
+__merge__: ../../api/comp_method.yaml
functionality:
name: negative_control
+ namespace: control_methods
info:
label: Negative control
summary: Source-target links based on random assignment
@@ -9,9 +10,9 @@ functionality:
arguments:
- name: --perturbation_data
type: file
- required: false
+ required: true
direction: input
- default: resources/grn-benchmark/perturbation_data.h5ad
+ example: resources_test/grn-benchmark/perturbation_data.h5ad
resources:
- type: python_script
diff --git a/src/control_methods/positive_control/config.vsh.yaml b/src/control_methods/positive_control/config.vsh.yaml
index 4add99379..5a3c237c7 100644
--- a/src/control_methods/positive_control/config.vsh.yaml
+++ b/src/control_methods/positive_control/config.vsh.yaml
@@ -1,6 +1,7 @@
-__merge__: ../../api/comp_control_method.yaml
+__merge__: ../../api/comp_method.yaml
functionality:
name: positive_control
+ namespace: control_methods
info:
label: Positive control
summary: Source-target links based on perturbation data
@@ -9,9 +10,9 @@ functionality:
arguments:
- name: --perturbation_data
type: file
- required: false
+ required: true
direction: input
- default: resources/grn-benchmark/perturbation_data.h5ad
+ example: resources_test/grn-benchmark/perturbation_data.h5ad
resources:
- type: python_script
diff --git a/src/methods/dummy/config.vsh.yaml b/src/methods/dummy/config.vsh.yaml
deleted file mode 100644
index 3b38332dd..000000000
--- a/src/methods/dummy/config.vsh.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
-
-functionality:
- name: dummy
- namespace: "grn_methods"
- info:
- label: dummy
- summary: "FILL IN: A one sentence summary of this method."
- description: |
- A dummy method that contains the format of a real GRN inference method.
- documentation_url: https://url.to/the/documentation
- repository_url: https://github.com/organisation/repository
- arguments:
- - name: --multiomics_rna
- type: file
- required: True
- direction: input
- - name: --multiomics_atac
- type: file
- required: True
- direction: input
- - name: --prediction
- type: file
- required: true
- direction: output
- resources:
- - type: python_script
- path: script.py
-
-platforms:
- - type: docker
- image: ghcr.io/openproblems-bio/base_python:1.0.4
- setup:
- - type: python
- packages: [ ]
-
- - type: native
- - type: nextflow
- directives:
- label: [midtime,midmem,midcpu]
diff --git a/src/methods/dummy/run.sh b/src/methods/dummy/run.sh
deleted file mode 100644
index c482c6427..000000000
--- a/src/methods/dummy/run.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-viash run src/methods/dummy/config.vsh.yaml -- --multiomics_rna resources_test/grn-benchmark/multiomics_rna.h5ad \
- --multiomics_atac resources_test/grn-benchmark/multiomics_atac.h5ad \
- --prediction output/prediction.csv
diff --git a/src/methods/dummy/script.py b/src/methods/dummy/script.py
deleted file mode 100644
index 986b90152..000000000
--- a/src/methods/dummy/script.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import pandas as pd
-import anndata as ad
-
-## VIASH START
-par = {
- "multiomics_rna": "resources/grn-benchmark/multiomics_rna.h5ad",
- "multiomics_atac": "resources/grn-benchmark/multiomics_atac.h5ad",
- "annotation_file": "resources/grn-benchmark/annotation_file",
- "motif_file": "resources/grn-benchmark/motif_file",
- "prediction": "output/prediction.csv",
-}
-## VIASH END
-
-print('Reading input files', flush=True)
-multiomics_rna = ad.read_h5ad(par["multiomics_rna"])
-multiomics_atac = ad.read_h5ad(par["multiomics_atac"])
-
-
-
-print('Preprocess data', flush=True)
-# ... preprocessing ...
-
-print('Train model', flush=True)
-# ... train model ...
-
-print('Generate predictions', flush=True)
-# ... generate predictions ...
-
-print('Write output to file', flush=True)
-output = pd.DataFrame(
- data = {'source':['tf1'], 'target':['g1'], 'weight':[1]}
- # columns=['source', 'target', 'weight']
-)
-output.to_csv(par["prediction"])
-
-
diff --git a/src/methods/multi_omics/celloracle/config.vsh.yaml b/src/methods/multi_omics/celloracle/config.novsh.yaml
similarity index 94%
rename from src/methods/multi_omics/celloracle/config.vsh.yaml
rename to src/methods/multi_omics/celloracle/config.novsh.yaml
index bc976fe5f..f5fcc416c 100644
--- a/src/methods/multi_omics/celloracle/config.vsh.yaml
+++ b/src/methods/multi_omics/celloracle/config.novsh.yaml
@@ -1,4 +1,4 @@
-__merge__: ../../../api/comp_method.yaml
+__merge__: ../../../api/comp_method_mo.yaml
functionality:
name: celloracle
diff --git a/src/methods/multi_omics/celloracle_ns/config.vsh.yaml b/src/methods/multi_omics/celloracle_ns/config.novsh.yaml
similarity index 100%
rename from src/methods/multi_omics/celloracle_ns/config.vsh.yaml
rename to src/methods/multi_omics/celloracle_ns/config.novsh.yaml
diff --git a/src/methods/multi_omics/granie/config.vsh.yaml b/src/methods/multi_omics/granie/config.novsh.yaml
similarity index 100%
rename from src/methods/multi_omics/granie/config.vsh.yaml
rename to src/methods/multi_omics/granie/config.novsh.yaml
diff --git a/src/methods/multi_omics/granie_ns/config.vsh.yaml b/src/methods/multi_omics/granie_ns/config.novsh.yaml
similarity index 100%
rename from src/methods/multi_omics/granie_ns/config.vsh.yaml
rename to src/methods/multi_omics/granie_ns/config.novsh.yaml
diff --git a/src/methods/multi_omics/scenicplus/config.vsh.yaml b/src/methods/multi_omics/scenicplus/config.novsh.yaml
similarity index 96%
rename from src/methods/multi_omics/scenicplus/config.vsh.yaml
rename to src/methods/multi_omics/scenicplus/config.novsh.yaml
index 3f8d92aad..021f1309c 100644
--- a/src/methods/multi_omics/scenicplus/config.vsh.yaml
+++ b/src/methods/multi_omics/scenicplus/config.novsh.yaml
@@ -1,4 +1,4 @@
-__merge__: ../../../api/comp_method.yaml
+__merge__: ../../../api/comp_method_mo.yaml
functionality:
diff --git a/src/methods/multi_omics/scenicplus_ns/config.vsh.yaml b/src/methods/multi_omics/scenicplus_ns/config.novsh.yaml
similarity index 100%
rename from src/methods/multi_omics/scenicplus_ns/config.vsh.yaml
rename to src/methods/multi_omics/scenicplus_ns/config.novsh.yaml
diff --git a/src/methods/multi_omics/scglue/config.vsh.yaml b/src/methods/multi_omics/scglue/config.novsh.yaml
similarity index 96%
rename from src/methods/multi_omics/scglue/config.vsh.yaml
rename to src/methods/multi_omics/scglue/config.novsh.yaml
index 8b9d3f33e..8861ce342 100644
--- a/src/methods/multi_omics/scglue/config.vsh.yaml
+++ b/src/methods/multi_omics/scglue/config.novsh.yaml
@@ -1,4 +1,4 @@
-__merge__: ../../../api/comp_method.yaml
+__merge__: ../../../api/comp_method_mo.yaml
functionality:
diff --git a/src/methods/multi_omics/scglue_ns/config.vsh.yaml b/src/methods/multi_omics/scglue_ns/config.novsh.yaml
similarity index 100%
rename from src/methods/multi_omics/scglue_ns/config.vsh.yaml
rename to src/methods/multi_omics/scglue_ns/config.novsh.yaml
diff --git a/src/metrics/regression_1/config.vsh.yaml b/src/metrics/regression_1/config.vsh.yaml
index 1cd6c1be6..a6fefcba0 100644
--- a/src/metrics/regression_1/config.vsh.yaml
+++ b/src/metrics/regression_1/config.vsh.yaml
@@ -8,11 +8,6 @@ functionality:
description: |
Calculates R2 score using regression approach 1.
arguments:
- - name: --layer
- type: string
- direction: input
- required: false
- default: scgen_pearson
- name: --min_tf
type: integer
direction: input
diff --git a/src/metrics/regression_2/config.vsh.yaml b/src/metrics/regression_2/config.vsh.yaml
index 2298f12f1..c5ef8a094 100644
--- a/src/metrics/regression_2/config.vsh.yaml
+++ b/src/metrics/regression_2/config.vsh.yaml
@@ -14,17 +14,14 @@ functionality:
- name: --consensus
type: file
direction: input
- must_exist: true
- default: 'resources/prior/consensus-num-regulators.json'
+ must_exist: false
+ required: true
example: 'resources_test/prior/consensus-num-regulators.json'
- name: --static_only
+ direction: input
type: boolean
default: true
- - name: --layer
- type: string
- direction: input
- required: false
- default: scgen_pearson
+
platforms:
- type: docker
image: ghcr.io/openproblems-bio/base_python:1.0.4
diff --git a/src/process_data/explanatory_analysis/hvgs/config.novsh.yaml b/src/process_data/explanatory_analysis/hvgs/config.novsh.yaml
deleted file mode 100644
index 684dafa0c..000000000
--- a/src/process_data/explanatory_analysis/hvgs/config.novsh.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-
-functionality:
- name: hvgs
- info:
- label: hvgs
- summary: "Determining HVGs based on perturbation data"
-
- arguments:
- - name: --perturbation_data
- __merge__: ../../../api/file_perturbation_h5ad.yaml
- direction: input
- required: true
- - name: --multiomics_rna
- __merge__: ../../../api/file_multiomics_rna_h5ad.yaml
- direction: input
- required: true
- - name: --n_hvgs
- type: integer
- direction: input
- required: false
- default: 3000
- - name: --hvgs
- type: file
- direction: output
- required: true
- default: resources/grn-benchmark/supp/hvgs.csv
-
- resources:
- - type: r_script
- path: script.R
-
-
-platforms:
- - type: docker
- image: openproblems/base_r:1.0.0
- setup:
- - type: r
- bioc: [scry]
- packages: [zellkonverter]
-
-
-
- - type: native
- - type: nextflow
- directives:
- label: [midtime,midmem,midcpu]
diff --git a/src/process_data/explanatory_analysis/hvgs/run.sh b/src/process_data/explanatory_analysis/hvgs/run.sh
deleted file mode 100644
index 78d5fc6e3..000000000
--- a/src/process_data/explanatory_analysis/hvgs/run.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-viash run src/process_data/explanatory_analysis/hvgs/config.novsh.yaml -- --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \
- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
- --hvgs resources/grn-benchmark/supp/hvgs.csv
-
-
-
-
diff --git a/src/process_data/explanatory_analysis/hvgs/script.R b/src/process_data/explanatory_analysis/hvgs/script.R
deleted file mode 100644
index 49caffade..000000000
--- a/src/process_data/explanatory_analysis/hvgs/script.R
+++ /dev/null
@@ -1,47 +0,0 @@
-
-library(scry)
-library(zellkonverter)
-library(SingleCellExperiment)
-options(digits=5, max.print=100) # Adjust numbers as needed
-
-
-
-## VIASH START
-par <- list(
- perturbation_data = "resources/grn-benchmark/perturbation_data.h5ad",
- multiomics_rna = "resources/grn-benchmark/multiomics_rna.h5ad",
- hvgs = "resources/grn-benchmark/supp/hvgs.txt",
- n_hvgs = 3000
-)
-## VIASH END
-
-print(par)
-
-
-adata = readH5AD(par$perturbation_data) # raw counts
-multiomics_rna <- readH5AD(par$multiomics_rna)
-
-# Extract the gene names from multiomics_rna
-multiomics_genes <- rownames(multiomics_rna)
-
-# Subset adata to keep only the genes present in multiomics_rna
-adata <- adata[rownames(adata) %in% multiomics_genes, ]
-
-adata_sce = devianceFeatureSelection(adata, assay="X", batch=colData(adata)$plate_name)
-
-binomial_deviance <- rowData(adata_sce)$binomial_deviance
-
-# Sort the indices of binomial deviance in decreasing order and select the top `n_hvgs`
-indices <- order(binomial_deviance, decreasing = TRUE)[1:par$n_hvgs]
-
-# Create a mask
-mask <- rep(FALSE, length(binomial_deviance))
-mask[indices] <- TRUE
-
-# Select the highly variable genes
-hvgs_sce <- rownames(adata_sce)[mask]
-
-# Save the highly variable genes to a text file
-print(dim(hvgs_sce))
-
-write(hvgs_sce, file = par$hvgs)
\ No newline at end of file
diff --git a/src/process_data/perturbation/batch_correction_evaluation/config.vsh.yaml b/src/process_data/perturbation/batch_correction_evaluation/config.vsh.yaml
index 1f50f5ad9..41d7a7a92 100644
--- a/src/process_data/perturbation/batch_correction_evaluation/config.vsh.yaml
+++ b/src/process_data/perturbation/batch_correction_evaluation/config.vsh.yaml
@@ -8,14 +8,13 @@ functionality:
arguments:
- name: --perturbation_data
__merge__: ../../../api/file_perturbation_h5ad.yaml
- required: false
+ required: true
direction: input
- default: resources/grn-benchmark/perturbation_data.h5ad
- name: --output
type: file
- required: true
+ required: false
direction: output
- default: output/batch_correction_metrics.csv
+ example: resources_test/results/batch_correction_metrics.csv
resources:
- type: python_script
diff --git a/src/process_data/perturbation/batch_correction_scgen/config.vsh.yaml b/src/process_data/perturbation/batch_correction_scgen/config.vsh.yaml
index f61b32495..525787bbc 100644
--- a/src/process_data/perturbation/batch_correction_scgen/config.vsh.yaml
+++ b/src/process_data/perturbation/batch_correction_scgen/config.vsh.yaml
@@ -30,7 +30,6 @@ functionality:
required: true
required: true
direction: input
- default: resources/grn-benchmark/perturbation_data.h5ad
example: resources_test/grn-benchmark/perturbation_data.h5ad
- name: --perturbation_data_bc
type: file
@@ -60,9 +59,8 @@ functionality:
type: double
description: "Batch correction using scgen on pearson data"
required: true
- required: true
+ required: false
direction: output
- default: resources/grn-benchmark/perturbation_data.h5ad
example: resources_test/grn-benchmark/perturbation_data.h5ad
resources:
diff --git a/src/process_data/perturbation/batch_correction_seurat/config.vsh.yaml b/src/process_data/perturbation/batch_correction_seurat/config.vsh.yaml
index 07494e407..6bc6a7752 100644
--- a/src/process_data/perturbation/batch_correction_seurat/config.vsh.yaml
+++ b/src/process_data/perturbation/batch_correction_seurat/config.vsh.yaml
@@ -32,14 +32,9 @@ functionality:
direction: input
example: resources_test/grn-benchmark/perturbation_data.h5ad
- name: --perturbation_data_bc
- type: file
- info:
- label: perturbation
- summary: "Perturbation dataset for benchmarking."
- __merge__: ../../../api/file_perturbation_h5ad.yaml
+ __merge__: ../../../api/file_perturbation_h5ad.yaml
required: false
direction: output
- example: resources_test/grn-benchmark/perturbation_data.h5ad
resources:
diff --git a/src/process_data/perturbation/normalization/config.vsh.yaml b/src/process_data/perturbation/normalization/config.vsh.yaml
index 9d45aaf66..6e51e5f22 100644
--- a/src/process_data/perturbation/normalization/config.vsh.yaml
+++ b/src/process_data/perturbation/normalization/config.vsh.yaml
@@ -7,7 +7,6 @@ functionality:
label: normalization
summary: "Normalize pseudobulked data"
-
arguments:
- name: --pseudobulked_data_f
@@ -25,7 +24,6 @@ functionality:
required: true
direction: input
- default: resources_local/pseudobulked_data_f
example: resources_test/grn-benchmark/perturbation_data.h5ad
- name: --perturbation_data_n
@@ -48,9 +46,8 @@ functionality:
type: double
description: "Normalized values using shifted logarithm "
required: true
- required: true
+ required: false
direction: output
- default: resources/grn-benchmark/perturbation_data.h5ad
example: resources_test/grn-benchmark/perturbation_data.h5ad
diff --git a/src/process_data/perturbation/sc_counts/config.vsh.yaml b/src/process_data/perturbation/sc_counts/config.vsh.yaml
index 0198f38a2..9554d8b1e 100644
--- a/src/process_data/perturbation/sc_counts/config.vsh.yaml
+++ b/src/process_data/perturbation/sc_counts/config.vsh.yaml
@@ -16,7 +16,6 @@ functionality:
type: file
required: true
direction: input
- default: resources/datasets_raw/perturbation_counts.h5ad
example: resources_test/datasets_raw/perturbation_counts.h5ad
- name: --pseudobulked_data
@@ -25,9 +24,8 @@ functionality:
label: pseudobulked_data
summary: "Pseudobulked perturbation dataset for benchmarking."
file_type: h5ad
- required: true
+ required: false
direction: output
- default: resources_local/pseudobulked_data.h5ad
example: resources_test/grn-benchmark/perturbation_data.h5ad
- name: --pseudobulked_data_f
@@ -42,9 +40,8 @@ functionality:
type: double
description: "Pseudobulked values using mean approach"
required: true
- required: true
+ required: false
direction: output
- default: resources_local/pseudobulked_data_f.h5ad
example: resources_test/grn-benchmark/perturbation_data.h5ad
diff --git a/src/workflows/run_grn_evaluation/main.nf b/src/workflows/run_grn_evaluation/main.nf
index 26e74a3b5..f82b13994 100644
--- a/src/workflows/run_grn_evaluation/main.nf
+++ b/src/workflows/run_grn_evaluation/main.nf
@@ -41,99 +41,9 @@ workflow run_wf {
]
}
)
-
- | baseline_corr.run(
- runIf: { id, state ->
- state.method_id == 'baseline_pearson_causal'
- },
- fromState: [
- multiomics_rna: "multiomics_rna",
- layer: "layer",
- tf_all: "tf_all",
- causal: "causal",
- corr_method: "corr_method"
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
- | baseline_corr.run(
- runIf: { id, state ->
- state.method_id == 'baseline_dotproduct_causal'
- },
- fromState: [
- multiomics_rna: "multiomics_rna",
- layer: "layer",
- tf_all: "tf_all",
- causal: "causal",
- corr_method: "corr_method"
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
- | baseline_corr.run(
- runIf: { id, state ->
- state.method_id == 'baseline_dotproduct_causal_cell_type'
- },
- fromState: [
- multiomics_rna: "multiomics_rna",
- layer: "layer",
- tf_all: "tf_all",
- causal: "causal",
- corr_method: "corr_method",
- cell_type_specific: "cell_type_specific"
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
- | baseline_corr.run(
- runIf: { id, state ->
- state.method_id == 'baseline_dotproduct_causal_metacell'
- },
- fromState: [
- multiomics_rna: "multiomics_rna",
- layer: "layer",
- tf_all: "tf_all",
- causal: "causal",
- corr_method: "corr_method",
- metacell: "metacell"
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
| baseline_corr.run(
runIf: { id, state ->
- state.method_id == 'baseline_dotproduct_causal_impute'
- },
- fromState: [
- multiomics_rna: "multiomics_rna",
- layer: "layer",
- tf_all: "tf_all",
- causal: "causal",
- corr_method: "corr_method",
- metacell: "metacell",
- impute: "impute"
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
- | baseline_corr.run(
- runIf: { id, state ->
- state.method_id == 'baseline_corr_causal_spearman'
+ ['baseline_pearson', 'baseline_dotproduct_causal'].contains(state.method_id)
},
fromState: [
multiomics_rna: "multiomics_rna",
@@ -148,24 +58,114 @@ workflow run_wf {
]
}
)
+ // | baseline_corr.run(
+ // runIf: { id, state ->
+ // state.method_id == 'baseline_dotproduct'
+ // },
+ // fromState: [
+ // multiomics_rna: "multiomics_rna",
+ // layer: "layer",
+ // tf_all: "tf_all",
+ // causal: "causal",
+ // corr_method: "corr_method"
+ // ],
+ // toState: {id, output, state ->
+ // state + [
+ // prediction: output.prediction
+ // ]
+ // }
+ // )
+
+ // | baseline_corr.run(
+ // runIf: { id, state ->
+ // state.method_id == 'baseline_pearson_causal'
+ // },
+ // fromState: [
+ // multiomics_rna: "multiomics_rna",
+ // layer: "layer",
+ // tf_all: "tf_all",
+ // causal: "causal",
+ // corr_method: "corr_method"
+ // ],
+ // toState: {id, output, state ->
+ // state + [
+ // prediction: output.prediction
+ // ]
+ // }
+ // )
+ // | baseline_corr.run(
+ // runIf: { id, state ->
+ // state.method_id == 'baseline_dotproduct_causal'
+ // },
+ // fromState: [
+ // multiomics_rna: "multiomics_rna",
+ // layer: "layer",
+ // tf_all: "tf_all",
+ // causal: "causal",
+ // corr_method: "corr_method"
+ // ],
+ // toState: {id, output, state ->
+ // state + [
+ // prediction: output.prediction
+ // ]
+ // }
+ // )
+ // | baseline_corr.run(
+ // runIf: { id, state ->
+ // state.method_id == 'baseline_dotproduct_causal_cell_type'
+ // },
+ // fromState: [
+ // multiomics_rna: "multiomics_rna",
+ // layer: "layer",
+ // tf_all: "tf_all",
+ // causal: "causal",
+ // corr_method: "corr_method",
+ // cell_type_specific: "cell_type_specific"
+ // ],
+ // toState: {id, output, state ->
+ // state + [
+ // prediction: output.prediction
+ // ]
+ // }
+ // )
+ // | baseline_corr.run(
+ // runIf: { id, state ->
+ // state.method_id == 'baseline_dotproduct_causal_metacell'
+ // },
+ // fromState: [
+ // multiomics_rna: "multiomics_rna",
+ // layer: "layer",
+ // tf_all: "tf_all",
+ // causal: "causal",
+ // corr_method: "corr_method",
+ // metacell: "metacell"
+ // ],
+ // toState: {id, output, state ->
+ // state + [
+ // prediction: output.prediction
+ // ]
+ // }
+ // )
+ // | baseline_corr.run(
+ // runIf: { id, state ->
+ // state.method_id == 'baseline_dotproduct_causal_impute'
+ // },
+ // fromState: [
+ // multiomics_rna: "multiomics_rna",
+ // layer: "layer",
+ // tf_all: "tf_all",
+ // causal: "causal",
+ // corr_method: "corr_method",
+ // metacell: "metacell",
+ // impute: "impute"
+ // ],
+ // toState: {id, output, state ->
+ // state + [
+ // prediction: output.prediction
+ // ]
+ // }
+ // )
- | baseline_corr.run(
- runIf: { id, state ->
- state.method_id == 'baseline_corr'
- },
- fromState: [
- multiomics_rna: "multiomics_rna",
- layer: "layer",
- tf_all: "tf_all",
- causal: "causal",
- seed: "seed"
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
| negative_control.run(
runIf: { id, state ->
state.method_id == 'negative_control'