Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cancerGeneListMatch and kbStatementRelated flags #110

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions graphkb/genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,16 +392,18 @@ def get_gene_information(

Function is originally from pori_ipr_python::annotate.py

Gene flags (categories) are: ['cancerGene', 'cancerRelated', 'knownFusionPartner', 'knownSmallMutation',
'oncogene', 'therapeuticAssociated', 'tumourSuppressor']
Gene flags (categories) are: [
'cancerGeneListMatch', 'kbStatementRelated', 'knownFusionPartner',
'knownSmallMutation', 'oncogene', 'therapeuticAssociated', 'tumourSuppressor'
]

Args:
graphkb_conn ([type]): [description]
gene_names ([type]): [description]
Returns:
List of gene_info dicts of form [{'name':<gene_str>, <flag>: True}]
Keys of False values are simply omitted from ipr upload to reduce info transfer.
eg. [{'cancerRelated': True,
eg. [{'kbStatementRelated': True,
'knownFusionPartner': True,
'knownSmallMutation': True,
'name': 'TERT',
Expand All @@ -423,7 +425,7 @@ def get_gene_information(
statements = [s for s in statements if s.get("reviewStatus") != FAILED_REVIEW_STATUS]

gene_flags: Dict[str, Set[str]] = {
"cancerRelated": set(),
"kbStatementRelated": set(),
"knownFusionPartner": set(),
"knownSmallMutation": set(),
}
Expand All @@ -432,9 +434,9 @@ def get_gene_information(
for condition in statement["conditions"]:
if not condition.get("reference1"):
continue
gene_flags["cancerRelated"].add(condition["reference1"])
gene_flags["kbStatementRelated"].add(condition["reference1"])
if condition["reference2"]:
gene_flags["cancerRelated"].add(condition["reference2"])
gene_flags["kbStatementRelated"].add(condition["reference2"])
gene_flags["knownFusionPartner"].add(condition["reference1"])
gene_flags["knownFusionPartner"].add(condition["reference2"])
elif condition["@class"] == "PositionalVariant":
Expand All @@ -444,8 +446,8 @@ def get_gene_information(
gene_flags["oncogene"] = convert_to_rid_set(get_oncokb_oncogenes(graphkb_conn))
logger.info("fetching tumour supressors list")
gene_flags["tumourSuppressor"] = convert_to_rid_set(get_oncokb_tumour_supressors(graphkb_conn))
logger.info("fetching cancerGene list")
gene_flags["cancerGene"] = convert_to_rid_set(get_cancer_genes(graphkb_conn))
logger.info("fetching cancerGeneListMatch list")
gene_flags["cancerGeneListMatch"] = convert_to_rid_set(get_cancer_genes(graphkb_conn))

logger.info("fetching therapeutic associated genes lists")
gene_flags["therapeuticAssociated"] = convert_to_rid_set(
Expand Down
8 changes: 4 additions & 4 deletions tests/test_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,10 @@ def test_get_gene_information(conn):
+ CANNONICAL_THERAPY_GENES
):
assert gene in [
g["name"] for g in gene_info if g.get("cancerRelated")
], f"Missed cancerRelated {gene}"
g["name"] for g in gene_info if g.get("kbStatementRelated")
], f"Missed kbStatementRelated {gene}"

for gene in CANONICAL_CG:
assert gene in [
g["name"] for g in gene_info if g.get("cancerGene")
], f"Missed cancerGene {gene}"
g["name"] for g in gene_info if g.get("cancerGeneListMatch")
], f"Missed cancerGeneListMatch {gene}"