Skip to content

Commit

Permalink
Add basic documentation for ClinVar cache pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
eweitz committed Dec 20, 2024
1 parent f68574a commit e9c55b2
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion scripts/python/cache/clinvar_cache.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
"""Cache data on variants related to human health, from NCBI ClinVar
Example:
python clinvar_cache.py
"""

import csv
import json
import gzip


clinical_concerns = ['Likely_pathogenic', 'Pathogenic/Likely_pathogenic', 'Pathogenic']
robust_review_statuses = [
'criteria_provided,_multiple_submitters,_no_conflicts',
Expand Down Expand Up @@ -96,6 +101,8 @@ def trim_info_fields(fields):

output_rows = []

# https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar_20241215.vcf.gz
# Source: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/
with open('clinvar_20241215.vcf') as file:
reader = csv.reader(file, delimiter="\t")
for row in reader:
Expand Down

0 comments on commit e9c55b2

Please sign in to comment.