Skip to content

Commit

Permalink
Rerun with fixed entrez_gene_id bug
Browse files Browse the repository at this point in the history
  • Loading branch information
dhimmel committed Oct 5, 2016
1 parent 93aa3ab commit 1f27d5e
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 11 deletions.
12 changes: 6 additions & 6 deletions 6.differential-expression.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 5,
"metadata": {
"collapsed": false
},
Expand Down Expand Up @@ -233,7 +233,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 6,
"metadata": {
"collapsed": false
},
Expand All @@ -244,7 +244,7 @@
"22973"
]
},
"execution_count": 39,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -259,7 +259,7 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 7,
"metadata": {
"collapsed": false
},
Expand Down Expand Up @@ -364,7 +364,7 @@
"349009 -1.196275 0.557971 SLC25A5-AS1 "
]
},
"execution_count": 42,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -376,7 +376,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 8,
"metadata": {
"collapsed": false
},
Expand Down
18 changes: 13 additions & 5 deletions scripts/6.differential-expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def get_diffex(subtype_df):
ttest = ttest_1samp(diffex_df, popmean=0, axis=0)

df = pandas.DataFrame.from_items([
('entrez_gene_id', diffex_df.columns.astype(int)),
('entrez_gene_id', diffex_df.columns),
('patients', len(diffex_df)),
('tumor_mean', tumor_df.mean()),
('normal_mean', normal_df.mean()),
Expand All @@ -67,21 +67,29 @@ def get_diffex(subtype_df):
])
return df

diffex_df = type_df.groupby('acronym').apply(get_diffex).reset_index('acronym')
diffex_df = (type_df
.groupby('acronym')
.apply(get_diffex)
.reset_index('acronym')
.query("patients >= 5")
)

diffex_df.entrez_gene_id = diffex_df.entrez_gene_id.astype(int)


# In[6]:

# Add gene symbols
path = os.path.join('data', 'genes.tsv')
gene_df = pandas.read_table(path)
gene_df = pandas.read_table(path, low_memory=False)
gene_df = gene_df[['entrez_gene_id', 'symbol']]
diffex_df = gene_df.merge(diffex_df, how='right')
len(gene_df)


# In[7]:

diffex_df.head()
diffex_df = diffex_df.merge(gene_df, how='left')
diffex_df.tail()


# In[8]:
Expand Down

0 comments on commit 1f27d5e

Please sign in to comment.