From e9c55b2f8969b45d158eedafc0bfee7d76f1c4ae Mon Sep 17 00:00:00 2001
From: Eric Weitz <eric.m.weitz@gmail.com>
Date: Fri, 20 Dec 2024 08:00:35 -0500
Subject: [PATCH] Add basic documentation for ClinVar cache pipeline

---
 scripts/python/cache/clinvar_cache.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/scripts/python/cache/clinvar_cache.py b/scripts/python/cache/clinvar_cache.py
index ba4d9b26..db878124 100644
--- a/scripts/python/cache/clinvar_cache.py
+++ b/scripts/python/cache/clinvar_cache.py
@@ -1,8 +1,13 @@
+"""Cache data on variants related to human health, from NCBI ClinVar
+
+Example:
+python clinvar_cache.py
+"""
+
 import csv
 import json
 import gzip
 
-
 clinical_concerns = ['Likely_pathogenic', 'Pathogenic/Likely_pathogenic', 'Pathogenic']
 robust_review_statuses = [
     'criteria_provided,_multiple_submitters,_no_conflicts',
@@ -96,6 +101,8 @@ def trim_info_fields(fields):
 
 output_rows = []
 
+# https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar_20241215.vcf.gz
+# Source: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/
 with open('clinvar_20241215.vcf') as file:
     reader = csv.reader(file, delimiter="\t")
     for row in reader: