From e9c55b2f8969b45d158eedafc0bfee7d76f1c4ae Mon Sep 17 00:00:00 2001 From: Eric Weitz Date: Fri, 20 Dec 2024 08:00:35 -0500 Subject: [PATCH] Add basic documentation for ClinVar cache pipeline --- scripts/python/cache/clinvar_cache.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/python/cache/clinvar_cache.py b/scripts/python/cache/clinvar_cache.py index ba4d9b26..db878124 100644 --- a/scripts/python/cache/clinvar_cache.py +++ b/scripts/python/cache/clinvar_cache.py @@ -1,8 +1,13 @@ +"""Cache data on variants related to human health, from NCBI ClinVar + +Example: +python clinvar_cache.py +""" + import csv import json import gzip - clinical_concerns = ['Likely_pathogenic', 'Pathogenic/Likely_pathogenic', 'Pathogenic'] robust_review_statuses = [ 'criteria_provided,_multiple_submitters,_no_conflicts', @@ -96,6 +101,8 @@ def trim_info_fields(fields): output_rows = [] +# https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar_20241215.vcf.gz +# Source: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/ with open('clinvar_20241215.vcf') as file: reader = csv.reader(file, delimiter="\t") for row in reader: