From 995062eb6d03f5bc32f0f218410b04ac3f53d3ce Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Thu, 3 Sep 2020 14:02:04 +0100
Subject: [PATCH 01/16] added outer postcode mapping

---
 civet/command.py                    | 38 +++++++------
 civet/scripts/assess_input_file.smk |  8 +--
 civet/scripts/civet_template.pmd    |  6 +--
 civet/scripts/make_report.py        | 13 ++---
 civet/scripts/mapping.py            | 83 ++++++++++++++++++++++++-----
 5 files changed, 106 insertions(+), 42 deletions(-)

diff --git a/civet/command.py b/civet/command.py
index 4a8f63fd..8235c502 100644
--- a/civet/command.py
+++ b/civet/command.py
@@ -68,9 +68,8 @@ def main(sysargs = sys.argv[1:]):
     parser.add_argument('--date-window',action="store",default=7, type=int, dest="date_window",help="Define the window +- either side of cluster sample collection date-range. Default is 7 days.")
     parser.add_argument("-v","--version", action='version', version=f"civet {__version__}")
 
-    parser.add_argument("--map-sequences", action="store_true", dest="map_sequences", help="Map the coordinate points of sequences, coloured by a triat.")
-    parser.add_argument("--x-col", required=False, dest="x_col", help="column containing x coordinate for mapping sequences")
-    parser.add_argument("--y-col", required=False, dest="y_col", help="column containing y coordinate for mapping sequences")
+    parser.add_argument("--map-sequences", action="store_true", dest="map_sequences", help="Map the coordinate points of sequences, coloured by a trait.")
+    parser.add_argument("--map-inputs", required=False, dest="map_inputs", help="columns containing EITHER x and y coordinates as a comma separated string OR outer postcodes for mapping sequences")
     parser.add_argument("--input-crs", required=False, dest="input_crs", help="Coordinate reference system of sequence coordinates")
     parser.add_argument("--mapping-trait", required=False, dest="mapping_trait", help="Column to colour mapped sequences by")
 
@@ -272,23 +271,31 @@ def main(sysargs = sys.argv[1:]):
         
     if args.map_sequences:
         config["map_sequences"] = True
-        if not args.x_col or not args.y_col:
-            sys.stderr.write('Error: coordinates not supplied for mapping sequences. Please provide --x-col and --y-col')
-            sys.exit(-1)
-        elif not args.input_crs:
-            sys.stderr.write('Error: input coordinate system not provided for mapping. Please provide --input-crs eg EPSG:3395')
+        if not args.map_inputs:
+            sys.stderr.write('Error: coordinates or outer postcode not supplied for mapping sequences. Please provide either x and y columns as a comma separated string, or column header containing outer postcode.')
             sys.exit(-1)
+        if len(args.map_inputs.split(",")) == 2:
+            if not args.input_crs:
+                sys.stderr.write('Error: input coordinate system not provided for mapping. Please provide --input-crs eg EPSG:3395')
+                sys.exit(-1)
+            else:
+                input_crs = args.input_crs
         else:
-            config["x_col"] = args.x_col
-            config["y_col"] = args.y_col
-            config["input_crs"] = args.input_crs
+            input_crs = "EPSG:4326"
+        
+        config["map_cols"] = args.map_inputs
+        config["input_crs"] = input_crs
 
+        relevant_cols = []
         with open(query, newline="") as f:
             reader = csv.DictReader(f)
             column_names = reader.fieldnames
-            relevant_cols = [args.x_col, args.y_col, args.mapping_trait]
+            map_cols = args.map_inputs.split(",")
+            for i in map_cols:
+                relevant_cols.append(i)
+            relevant_cols.append(args.mapping_trait)
+            
             for map_arg in relevant_cols:
-
                 if map_arg and map_arg not in reader.fieldnames:
                     sys.stderr.write(f"Error: {map_arg} field not found in metadata file")
                     sys.exit(-1)
@@ -300,8 +307,7 @@ def main(sysargs = sys.argv[1:]):
             
     else:
         config["map_sequences"] = False
-        config["x_col"] = False
-        config["y_col"] = False
+        config["map_cols"] = False
         config["input_crs"] = False
         config["mapping_trait"] = False
 
@@ -516,6 +522,7 @@ def main(sysargs = sys.argv[1:]):
     map_input_3 = pkg_resources.resource_filename('civet', 'data/mapping_files/NI_counties.geojson')  
     map_input_4 = pkg_resources.resource_filename('civet', 'data/mapping_files/Mainland_HBs_gapclosed_mapshaped_d3.json')
     map_input_5 = pkg_resources.resource_filename('civet', 'data/mapping_files/urban_areas_UK.geojson')
+    map_input_6 = pkg_resources.resource_filename('civet', 'data/mapping_files/UK_outPC_coords.csv')
     spatial_translations_1 = pkg_resources.resource_filename('civet', 'data/mapping_files/HB_Translation.pkl')
     spatial_translations_2 = pkg_resources.resource_filename('civet', 'data/mapping_files/adm2_regions_to_coords.csv')
 
@@ -555,6 +562,7 @@ def main(sysargs = sys.argv[1:]):
     config["ni_map"] = map_input_3
     config["uk_map_d3"] = map_input_4
     config["urban_centres"] = map_input_5
+    config["pc_file"] = map_input_6
     config["HB_translations"] = spatial_translations_1
     config["PC_translations"] = spatial_translations_2
 
diff --git a/civet/scripts/assess_input_file.smk b/civet/scripts/assess_input_file.smk
index 6bbd6a65..0bccde05 100644
--- a/civet/scripts/assess_input_file.smk
+++ b/civet/scripts/assess_input_file.smk
@@ -409,6 +409,7 @@ rule make_report:
         uk_map = config["uk_map"],
         channels_map = config["channels_map"],
         ni_map = config["ni_map"],
+        pc_file = config["pc_file"],
         urban_centres = config["urban_centres"],
         genome_graph = rules.find_snps.output.genome_graph,
         snp_report = rules.find_snps.output.report,
@@ -429,8 +430,7 @@ rule make_report:
         figdir = os.path.join(config["outdir"],"figures"),
         failure = config["qc_fail_report"],
         map_sequences = config["map_sequences"],
-        x_col = config["x_col"],
-        y_col = config["y_col"],
+        map_cols = config["map_cols"],
         input_crs = config["input_crs"],
         mapping_trait = config["mapping_trait"],
         add_boxplots = config["add_boxplots"],
@@ -484,12 +484,12 @@ rule make_report:
         "--uk-map {input.uk_map:q} "
         "--channels-map {input.channels_map:q} "
         "--ni-map {input.ni_map:q} "
+        "--pc-file {input.pc_file:q} "
         "--outfile {output.outfile:q} "
         "--outdir {params.outdir:q} "
         "--map-sequences {params.map_sequences} "
         "--snp-report {input.snp_report:q} "
-        "--x-col {params.x_col} "
-        "--y-col {params.y_col} "
+        "--map-cols {params.map_cols} "
         "--input-crs {params.input_crs} "
         "--mapping-trait {params.mapping_trait} "
         "--urban-centres {input.urban_centres} "
diff --git a/civet/scripts/civet_template.pmd b/civet/scripts/civet_template.pmd
index 4b54671c..e1c3d461 100644
--- a/civet/scripts/civet_template.pmd
+++ b/civet/scripts/civet_template.pmd
@@ -98,14 +98,14 @@ local_lin_maps = "" ##CHANGE
 local_lin_tables = "" ##CHANGE
 
 map_sequences = "" ##CHANGE #going to be true or false
-x_col = "" ##CHANGE
-y_col = "" ##CHANGE
+map_inputs = "" ##CHANGE
 mapping_trait = "" ##CHANGE
 input_crs = "" ##CHANGE
 uk_map = "" ##CHANGE
 channels_map = "" ##CHANGE
 ni_map = "" ##CHANGE
 urban_centres = "" ##CHANGE
+pc_file = "" ##CHANGE
 
 
 mapping_json_files = [uk_map, channels_map, ni_map]
@@ -257,7 +257,7 @@ print(f"![]({figdir}/genome_graph.png)")
 ```python, name="map_sequences", echo=False, results='raw', include=False
 if map_sequences != "False":
     print("## Plotting sequences")
-    adm2_in_map, adm2_percentages = mapping.map_traits(input_csv, input_crs, mapping_trait, x_col, y_col, mapping_json_files, urban_centres)
+    adm2_in_map, adm2_percentages = mapping.map_sequences_using_coordinates(input_csv, mapping_json_files, urban_centres, pc_file, mapping_trait, map_inputs, input_crs)
 
     print("There are sequences from " + str(len(adm2_in_map)) + " admin2 regions")
 
diff --git a/civet/scripts/make_report.py b/civet/scripts/make_report.py
index 6c1156f8..aaa089c8 100644
--- a/civet/scripts/make_report.py
+++ b/civet/scripts/make_report.py
@@ -3,11 +3,12 @@
 from pweave import weave
 import argparse
 import shutil
+import sys
 
 
 thisdir = os.path.abspath(os.path.dirname(__file__))
 
-def make_report(cog_metadata, input_csv, filtered_cog_metadata, outfile, outdir, treedir, figdir, snp_report, colour_fields, label_fields, node_summary, report_template, failed_seqs, seq_centre, clean_locs, uk_map, channels_map, ni_map, local_lineages, local_lin_maps, local_lin_tables,map_sequences,x_col,y_col, input_crs,mapping_trait,urban_centres,add_boxplots, graphic_dict):
+def make_report(cog_metadata, input_csv, filtered_cog_metadata, outfile, outdir, treedir, figdir, snp_report, colour_fields, label_fields, node_summary, report_template, failed_seqs, seq_centre, clean_locs, uk_map, channels_map, ni_map, pc_file, local_lineages, local_lin_maps, local_lin_tables,map_sequences,map_inputs, input_crs,mapping_trait,urban_centres,add_boxplots, graphic_dict):
 
     name_stem = ".".join(outfile.split(".")[:-1])
                         
@@ -55,12 +56,12 @@ def make_report(cog_metadata, input_csv, filtered_cog_metadata, outfile, outdir,
                             "uk_map": f'uk_map = "{uk_map}"\n',
                             "channels_map": f'channels_map = "{channels_map}"\n',
                             "ni_map": f'ni_map = "{ni_map}"\n',
+                            "pc_file": f'pc_file = "{pc_file}"\n',
                             "local_lineages":f'local_lineages = "{local_lineages}"\n',
                             "local_lin_maps" : f'local_lin_maps = "{local_lin_maps}"\n',
                             "local_lin_tables" : f'local_lin_tables = "{local_lin_tables}"\n',
                             "map_sequences":f'map_sequences = "{map_sequences}"\n', 
-                            "x_col":f'x_col = "{x_col}"\n',
-                            "y_col":f'y_col = "{y_col}"\n',
+                            "map_inputs":f'map_inputs = "{map_inputs}"\n',
                             "mapping_trait":f'mapping_trait = "{mapping_trait}"\n',
                             "input_crs":f'input_crs = "{input_crs}"\n',
                             "urban_centres":f'urban_centres = "{urban_centres}"\n',
@@ -108,12 +109,12 @@ def main():
     parser.add_argument("--uk-map", required=True, help="shape file for uk counties", dest="uk_map")
     parser.add_argument("--channels-map", required=True, help="shape file for channel islands", dest="channels_map")
     parser.add_argument("--ni-map", required=True, help="shape file for northern irish counties", dest="ni_map")
+    parser.add_argument("--pc-file", required=True, help="file containing outer postcode to centroid mapping", dest="pc_file")
     parser.add_argument("--snp-report", required=True, help="snp report", dest="snp_report")
     parser.add_argument("--add-boxplots", action="store_true",dest="add_boxplots",default=False)
 
     parser.add_argument("--map-sequences", required=True, help="Bool for whether mapping of sequences by trait is required", dest="map_sequences")
-    parser.add_argument("--x-col", default="", help="column name in input csv which contains x coords for mapping", dest="x_col")
-    parser.add_argument("--y-col", default="", help="column name in input csv which contains y coords for mapping", dest="y_col")
+    parser.add_argument("--map-cols", default="", help="either column names in input csv which contains x coords and y coords for mapping as a comma separated string OR column name containing outer postcode", dest="map_inputs")
     parser.add_argument("--input-crs", default="", help="coordinate reference system that x and y inputs are in", dest="input_crs")
     parser.add_argument("--mapping-trait", default="", help="trait to map sequences by", dest="mapping_trait")
     parser.add_argument("--urban-centres", default="", help="geojson for plotting urban centres", dest="urban_centres")
@@ -124,7 +125,7 @@ def main():
 
     args = parser.parse_args()
 
-    make_report(args.cog_metadata, args.input_csv, args.filtered_cog_metadata, args.outfile, args.outdir, args.treedir, args.figdir, args.snp_report, args.colour_fields, args.label_fields, args.node_summary, args.report_template, args.failed_seqs, args.sc, args.clean_locs, args.uk_map, args.channels_map, args.ni_map, args.local_lineages, args.local_lin_maps, args.local_lin_tables,args.map_sequences, args.x_col, args.y_col, args.input_crs, args.mapping_trait, args.urban_centres,args.add_boxplots, args.graphic_dict)
+    make_report(args.cog_metadata, args.input_csv, args.filtered_cog_metadata, args.outfile, args.outdir, args.treedir, args.figdir, args.snp_report, args.colour_fields, args.label_fields, args.node_summary, args.report_template, args.failed_seqs, args.sc, args.clean_locs, args.uk_map, args.channels_map, args.ni_map, args.pc_file, args.local_lineages, args.local_lin_maps, args.local_lin_tables,args.map_sequences, args.map_inputs, args.input_crs, args.mapping_trait, args.urban_centres,args.add_boxplots, args.graphic_dict)
 
 
 if __name__ == "__main__":
diff --git a/civet/scripts/mapping.py b/civet/scripts/mapping.py
index 71f2ab28..99a9d2e8 100644
--- a/civet/scripts/mapping.py
+++ b/civet/scripts/mapping.py
@@ -161,7 +161,7 @@ def make_map(centroid_geo, all_uk):
     ax.axis("off")
 
 
-def run_map_functions(tax_dict, clean_locs_file, mapping_json_files): #So this takes adm2s and plots them onto the whole UK
+def map_adm2(tax_dict, clean_locs_file, mapping_json_files): #So this takes adm2s and plots them onto the whole UK
 
     adm2s, metadata_multi_loc, straight_map = prep_data(tax_dict, clean_locs_file)
 
@@ -175,12 +175,7 @@ def run_map_functions(tax_dict, clean_locs_file, mapping_json_files): #So this t
 
     make_map(centroid_geo, all_uk)
 
-def map_traits(input_csv, input_crs, colour_map_trait, x_col, y_col, mapping_json_files, urban_centres):
-
-    all_uk = generate_all_uk_dataframe(mapping_json_files)
-    all_uk = all_uk.to_crs("EPSG:3395")
-
-    urban = geopandas.read_file(urban_centres)
+def get_coords_from_file(input_csv, input_crs, colour_map_trait, x_col, y_col):
 
     ##READ IN TRAITS##
 
@@ -200,15 +195,67 @@ def map_traits(input_csv, input_crs, colour_map_trait, x_col, y_col, mapping_jso
                 trait = seq[colour_map_trait]
             
             if x != "" and y != "":
-            
-                #name_to_coords[name] = (((float(x)/200)-2.2,(float(y)/200)+55)) #just for now, will change to just x and y in a bit
-                name_to_ccords[name] = (float(x),float(y))
+                #If we have the actual coordinates
+                name_to_coords[name] = (float(x),float(y))
 
                 if colour_map_trait != "False":
                     name_to_trait[name] = trait
 
+    return name_to_coords, name_to_trait
+
+def generate_coords_from_outer_postcode(pc_file, input_csv, postcode_col, colour_map_trait):
+
+    pc_to_coords = {}
+    name_to_coords = {}
+    name_to_trait = {}
+
+    with open(pc_file) as f:
+        reader = csv.DictReader(f)
+        data = [r for r in reader]
+        
+        for line in data:
+
+            pc = line["outcode"]
+            x = float(line["longitude"])
+            y = float(line["latitude"])
+
+            pc_to_coords[pc] = ((x,y))
+    
+    
+    with open(input_csv) as f:
+        reader = csv.DictReader(f)
+        data = [r for r in reader]
+        
+        for seq in data:
+            name = seq["name"]
+            outer_postcode = seq[postcode_col]
+            
+            if colour_map_trait != "False":
+                trait = seq[colour_map_trait]
+            
+            if outer_postcode != "":
+                if outer_postcode in pc_to_coords.keys():
+                    name_to_coords[name] = pc_to_coords[outer_postcode]
+
+                    if colour_map_trait != "False":
+                        name_to_trait[name] = trait
+
+                else:
+                    pass
+
+
+    return name_to_coords, name_to_trait
+
+
+def plot_coordinates(mapping_json_files, urban_centres, name_to_coords, name_to_trait, input_crs, colour_map_trait):
+
     ##MAKE DATAFRAME##
 
+    all_uk = generate_all_uk_dataframe(mapping_json_files)
+    all_uk = all_uk.to_crs("EPSG:3395")
+
+    urban = geopandas.read_file(urban_centres)
+
     df_dict = defaultdict(list)
 
     for name, point in name_to_coords.items():
@@ -217,7 +264,7 @@ def map_traits(input_csv, input_crs, colour_map_trait, x_col, y_col, mapping_jso
             df_dict[colour_map_trait].append(name_to_trait[name])
         
     crs = {'init':input_crs}
-        
+
     df = geopandas.GeoDataFrame(df_dict, crs=crs)    
     df_final = df.to_crs(all_uk.crs)
 
@@ -290,11 +337,19 @@ def map_traits(input_csv, input_crs, colour_map_trait, x_col, y_col, mapping_jso
     ax.axis("off") 
 
     return adm2_counter, adm2_percentages
-
-
         
 
+def map_sequences_using_coordinates(input_csv, mapping_json_files, urban_centres, pc_file,colour_map_trait, map_inputs, input_crs):
 
-
+    cols = map_inputs.split(",")
+    if len(cols) == 2:
+        x_col = cols[0]
+        y_col = cols[1]
+        name_to_coords, name_to_trait = get_coords_from_file(input_csv, input_crs, colour_map_trait, x_col, y_col)
+    elif len(cols) == 1:
+        postcode_col = cols[0]
+        name_to_coords, name_to_trait = generate_coords_from_outer_postcode(pc_file, input_csv, postcode_col, colour_map_trait)
     
+    adm2_counter, adm2_percentages = plot_coordinates(mapping_json_files, urban_centres, name_to_coords, name_to_trait, input_crs, colour_map_trait)
 
+    return adm2_counter, adm2_percentages
\ No newline at end of file

From 53b0ae1df9105796e7267848da191d42e8d42c57 Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Fri, 4 Sep 2020 14:41:18 +0100
Subject: [PATCH 02/16] adding timeline plot

---
 README.md                           |  3 +++
 civet/command.py                    | 14 +++++++++++
 civet/scripts/COG_template.pmd      |  2 +-
 civet/scripts/assess_input_file.smk |  2 ++
 civet/scripts/civet_template.pmd    | 39 +++++++++++++++++++++++------
 civet/scripts/data_parsing.py       | 34 ++++++++++++++++++++-----
 civet/scripts/make_report.py        |  6 +++--
 civet/scripts/make_tree_figures.py  |  3 ---
 setup.py                            |  3 ++-
 9 files changed, 85 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 9c505f6b..43c6dd1f 100644
--- a/README.md
+++ b/README.md
@@ -116,6 +116,9 @@ optional arguments:
   --label-fields LABEL_FIELDS
                         Comma separated string of fields to add to tree report
                         labels.
+  --date-fields DATE_FIELDS
+                        Comma separated string of date information to include.
+                        Default = "sample_date"
   --node-summary COLUMN Choose which metadata column to summarise collapsed nodes by.                      
   --search-field SEARCH_FIELD
                         Option to search COG database for a different id type.
diff --git a/civet/command.py b/civet/command.py
index 8235c502..876b9304 100644
--- a/civet/command.py
+++ b/civet/command.py
@@ -45,6 +45,7 @@ def main(sysargs = sys.argv[1:]):
     parser.add_argument('--fields', action="store",help="Comma separated string of fields to display in the trees in the report. Default: country")
     parser.add_argument('--display', action="store", help="Comma separated string of fields to display as coloured dots rather than text in report trees. Optionally add colour scheme eg adm1=viridis", dest="display")
     parser.add_argument('--label-fields', action="store", help="Comma separated string of fields to add to tree report labels.", dest="label_fields")
+    parser.add_argument("--date-fields", action="store", help="Comma separated string of metadata headers containing date information.", dest="date_fields")
     parser.add_argument("--node-summary", action="store", help="Column to summarise collapsed nodes by. Default = Global lineage", dest="node_summary")
     parser.add_argument('--search-field', action="store",help="Option to search COG database for a different id type. Default: COG-UK ID", dest="search_field",default="central_sample_id")
     parser.add_argument('--distance', action="store",help="Extraction from large tree radius. Default: 2", dest="distance",default=2)
@@ -177,6 +178,7 @@ def main(sysargs = sys.argv[1:]):
     labels = []
     queries = []
     graphics_list = []
+    date_lst = []
 
     with open(query, newline="") as f:
         reader = csv.DictReader(f)
@@ -209,6 +211,17 @@ def main(sysargs = sys.argv[1:]):
                     sys.stderr.write(f"Error: {label_f} field not found in metadata file")
                     sys.exit(-1)
 
+        if not args.date_fields:
+            date_lst.append("sample_date")
+        else:
+            date_fields = args.date_fields.split(",")
+            for date_f in date_fields:
+                if date_f in reader.fieldnames or date_f != "sample_date":
+                    date_lst.append(date_f)
+                else:
+                    sys.stderr.write(f"Error: {date_f} field not found in query metadata file")
+                    sys.exit(-1)
+
         if args.display:
             sections = args.display.split(",")
             for item in sections:
@@ -250,6 +263,7 @@ def main(sysargs = sys.argv[1:]):
         "query":query,
         "fields":",".join(fields),
         "label_fields":",".join(labels),
+        "date_fields":",".join(date_lst),
         "outdir":outdir,
         "tempdir":tempdir,
         "trim_start":265,   # where to pad to using datafunk
diff --git a/civet/scripts/COG_template.pmd b/civet/scripts/COG_template.pmd
index e75267b1..bd8237ef 100644
--- a/civet/scripts/COG_template.pmd
+++ b/civet/scripts/COG_template.pmd
@@ -193,7 +193,7 @@ else:
 Their location is shown on the following map, with dots sized by the number of sequences from each location.
 
 ```python, name="make_map", echo=False, include=False, results='tex'
-map = mapping.run_map_functions(query_dict, clean_locs_file, mapping_json_files)
+map = mapping.map_adm2(query_dict, clean_locs_file, mapping_json_files)
 ```
 
 ```python, name="show_map", echo=False, results='raw'
diff --git a/civet/scripts/assess_input_file.smk b/civet/scripts/assess_input_file.smk
index 0bccde05..c0499e5f 100644
--- a/civet/scripts/assess_input_file.smk
+++ b/civet/scripts/assess_input_file.smk
@@ -421,6 +421,7 @@ rule make_report:
         outdir = config["rel_outdir"],
         fields = config["fields"],
         label_fields = config["label_fields"],
+        date_fields = config["date_fields"],
         node_summary = config["node_summary"],
         sc_source = config["sequencing_centre"],
         sc = config["sequencing_centre_file"],
@@ -472,6 +473,7 @@ rule make_report:
         "-f {params.fields:q} "
         "--graphic_dict {params.graphic_dict:q} "
         "--label-fields {params.label_fields:q} "
+        "--date-fields {params.date_fields:q} "
         "--node-summary {params.node_summary} "
         "--figdir {params.rel_figdir:q} "
         "{params.sc_flag} "
diff --git a/civet/scripts/civet_template.pmd b/civet/scripts/civet_template.pmd
index e1c3d461..0de7e587 100644
--- a/civet/scripts/civet_template.pmd
+++ b/civet/scripts/civet_template.pmd
@@ -25,6 +25,7 @@ import os
 import data_parsing as dp
 import make_tree_figures as tree_viz
 import mapping as mapping
+import time_functions as time_func
 import matplotlib.font_manager as font_manager
 import matplotlib as mpl
 from collections import defaultdict
@@ -66,6 +67,7 @@ label_fields_input = "" ##CHANGE
 graphic_dict_input = "" ##CHANGE
 summary_dir = "" ##CHANGE
 node_summary_option = "" ##CHANGE
+date_fields_input = ##CHANGE 
 
 tree_name_stem = "" ##CHANGE
 
@@ -89,7 +91,10 @@ if label_fields_input != "NONE":
     for i in options: 
         label_fields.append(i)
 
-
+date_fields = []
+options = date_fields_input.split(",")
+for i in options: 
+    date_fields.append(i)
 
 snp_report = "" ##CHANGE
 
@@ -130,20 +135,22 @@ mpl.rcParams['axes.labelweight']=50
 
 ```python, name="parse metadata", echo=False, results='raw'
 
-present_in_tree, tip_to_tree = dp.parse_tree_tips(tree_dir)
+present_in_tree, tip_to_tree, tree_list = dp.parse_tree_tips(tree_dir)
 
 query_dict, query_id_dict, present_lins, tree_to_tip = dp.parse_filtered_metadata(filtered_cog_metadata, tip_to_tree) #Just the lines with their queries plus the closest match in COG
 
 if input_csv != '':
     adm2_to_adm1 = dp.prepping_adm2_adm1_data(full_metadata_file)
-    query_dict = dp.parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, adm2_to_adm1, False) #Any query information they have provided
+    query_dict = dp.parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, date_fields, adm2_to_adm1, False) #Any query information they have provided
+
+full_tax_dict = dp.parse_full_metadata(query_dict, full_metadata_file, present_lins, present_in_tree, node_summary_option, date_fields)
+
+dates_present, overall_dates, max_overall_date, min_overall_date,  max_string, min_string = time_func.summarise_dates(query_dict)
 
-full_tax_dict = dp.parse_full_metadata(query_dict, full_metadata_file, present_lins, present_in_tree, node_summary_option)
 ```
 
 ```python, name="QC fails", echo=False, results="raw"
 count_fails = 0
-
 if QC_fail_file != "": 
     with open(QC_fail_file) as f:
         next(f)
@@ -174,6 +181,13 @@ for tax in query_dict.values():
 print(str(number_seqs) + " queries (" + str(cog_number) + " matched to COG-UK database).")
 print(str(not_in_cog_number) + " additional sequences provided.")
 
+if dates_present:
+    print("Time fields provided: " + ",".join(date_fields))
+    print("Earliest date: " + min_string)
+    print("Latest date: " + max_string)
+else:
+    print("No date information found")
+
 ```
 
 
@@ -235,9 +249,6 @@ if too_tall_trees != []:
         print("Tree" + str(tree) + " is too large to be rendered here.")
 ```
 
-
-
-
 ```python, name="tree_background", echo=False, include=False,  results='raw'
 if add_boxplots != "":
     print("""### Tree background\n\nThe following plots describe the data in the collapsed nodes in more detail.\nIf more than one country was present, the bar chart describes the number of sequences present in each country. \nWhere there were 10 countries or more, the largest 10 have been taken. \nIf a UK sequence is present in the collapsed node, it is always shown in the plot.\n\n""")
@@ -277,6 +288,18 @@ if map_sequences != "False":
 
 
 
+```python, name="time_plot", echo=False, results='raw', include=False
+if dates_present:
+    print("## Time distribution of sequences\n")
+    print("Coloured by: Tree \n")
+    time_func.plot_time_series(query_dict, max_overall_date, min_overall_date, tree_list, label_fields)
+```
+```python, name="show_time", echo=False, results='raw'
+if dates_present:
+    print("![](" + figdir + "/" + name_stem + "_time_plot_1.png)")
+```
+
+
 
 ```python, name='Regional-scale', echo=False, results='raw'
 if local_lineages != '':
diff --git a/civet/scripts/data_parsing.py b/civet/scripts/data_parsing.py
index 7eaeefab..77596bc0 100644
--- a/civet/scripts/data_parsing.py
+++ b/civet/scripts/data_parsing.py
@@ -17,6 +17,8 @@ def __init__(self, name, global_lin, uk_lin, phylotype):
         self.name = name
 
         self.sample_date = "NA"
+
+        self.date_dict = {}
         
         if global_lin == "":
             self.global_lin = "NA"
@@ -152,7 +154,14 @@ def parse_filtered_metadata(metadata_file, tip_to_tree):
     return query_dict, query_id_dict, present_lins, tree_to_tip
     
 
-def parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, adm2_adm1_dict, cog_report):
+def convert_date(date_string):
+    bits = date_string.split("-")
+    date_dt = dt.date(int(bits[0]),int(bits[1]), int(bits[2]))
+    
+    return date_dt
+
+
+def parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, date_fields, adm2_adm1_dict, cog_report):
     new_query_dict = {}
     contract_dict = {"SCT":"Scotland", "WLS": "Wales", "ENG":"England", "NIR": "Northern_Ireland"}
     cleaning = {"SCOTLAND":"Scotland", "WALES":"Wales", "ENGLAND":"England", "NORTHERN_IRELAND": "Northern_Ireland", "NORTHERN IRELAND": "Northern_Ireland"}
@@ -172,6 +181,13 @@ def parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, adm2
             if name in query_id_dict.keys():
                 taxon = query_id_dict[name]
 
+                for field in date_fields:
+                    if field in reader.fieldnames:
+                        if sequence[field] != "":
+                            date_dt = convert_date(sequence[field])
+                            taxon.date_dict[field] = date_dt #so if it's in the query input, it overwrites the COG one
+
+                #keep this separate to above, because sample date is specifically needed
                 if "sample_date" in col_names: #if it's not in COG but date is provided (if it's in COG, it will already have been assigned a sample date.)
                     if sequence["sample_date"] != "":
                         taxon.sample_date = sequence["sample_date"]
@@ -214,9 +230,6 @@ def parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, adm2
 
 
                 new_query_dict[taxon.name] = taxon
-            
-            # else:
-            #     print(name + " is in the input file but not the processed file. This suggests that it is not in COG and a sequence has also not been provided.")
                 
     return new_query_dict 
 
@@ -225,11 +238,13 @@ def parse_tree_tips(tree_dir):
 
     tips = []
     tip_to_tree = {}
+    tree_list = []
 
     for fn in os.listdir(tree_dir):
         if fn.endswith("tree"):
             tree_name = fn.split(".")[0]
             tree = bt.loadNewick(tree_dir + "/" + fn, absoluteTime=False)
+            tree_list.append(tree_name)
             for k in tree.Objects:
                 if k.branchType == 'leaf' and "inserted" not in k.name:
                     tips.append(k.name)
@@ -242,9 +257,9 @@ def parse_tree_tips(tree_dir):
                     tip_list = tip_string.split(",")
                     tips.extend(tip_list)
 
-    return tips, tip_to_tree
+    return tips, tip_to_tree, tree_list
 
-def parse_full_metadata(query_dict, full_metadata, present_lins, present_in_tree, node_summary_option):
+def parse_full_metadata(query_dict, full_metadata, present_lins, present_in_tree, node_summary_option, date_fields):
 
     full_tax_dict = query_dict.copy()
 
@@ -286,8 +301,15 @@ def parse_full_metadata(query_dict, full_metadata, present_lins, present_in_tree
                 tax_object = query_dict[seq_name]
                 if tax_object.sample_date == "NA" and date != "":
                     tax_object.sample_date = date
+                    tax_object.all_dates.append(convert_date(date))
                 if "adm2" not in tax_object.attribute_dict.keys() and adm2 != "":
                     tax_object.attribute_dict["adm2"] = adm2
+
+                for field in date_fields:
+                    if field in reader.fieldnames:
+                        if sequence[field] != "":
+                            date_dt = convert_date(sequence[field])
+                            tax_object.date_dict[field] = date_dt 
                 
                 full_tax_dict[seq_name] = tax_object
                     
diff --git a/civet/scripts/make_report.py b/civet/scripts/make_report.py
index aaa089c8..49105cf1 100644
--- a/civet/scripts/make_report.py
+++ b/civet/scripts/make_report.py
@@ -8,7 +8,7 @@
 
 thisdir = os.path.abspath(os.path.dirname(__file__))
 
-def make_report(cog_metadata, input_csv, filtered_cog_metadata, outfile, outdir, treedir, figdir, snp_report, colour_fields, label_fields, node_summary, report_template, failed_seqs, seq_centre, clean_locs, uk_map, channels_map, ni_map, pc_file, local_lineages, local_lin_maps, local_lin_tables,map_sequences,map_inputs, input_crs,mapping_trait,urban_centres,add_boxplots, graphic_dict):
+def make_report(cog_metadata, input_csv, filtered_cog_metadata, outfile, outdir, treedir, figdir, snp_report, colour_fields, label_fields, node_summary, report_template, failed_seqs, seq_centre, clean_locs, uk_map, channels_map, ni_map, pc_file, local_lineages, local_lin_maps, local_lin_tables,map_sequences,map_inputs, input_crs,mapping_trait,urban_centres,add_boxplots, graphic_dict, date_fields):
 
     name_stem = ".".join(outfile.split(".")[:-1])
                         
@@ -44,6 +44,7 @@ def make_report(cog_metadata, input_csv, filtered_cog_metadata, outfile, outdir,
                             "input_directory": f'input_directory = "{treedir}"\n',
                             "desired_fields_input": f'desired_fields_input = "{colour_fields}"\n',
                             "label_fields_input": f'label_fields_input = "{label_fields}"\n',
+                            "date_fields_input":f'date_fields_input = "{date_fields}"\n',
                             "node_summary_option": f'node_summary_option = "{node_summary}"\n',
                             "figdir": f'figdir = "{figdir}"\n',
                             "tree_dir": f'tree_dir = "{treedir}"\n',
@@ -90,6 +91,7 @@ def main():
     parser.add_argument("-f", "--fields",default="", help="desired fields for report. Default=date and UK country",dest="colour_fields")
     parser.add_argument("-l", "--label-fields", default="", help="fields to add into labels in report trees. Default is adm2 and date", dest='label_fields')
     parser.add_argument("-gd", "--graphic_dict", default="", help="fields to colour by rather than display text. Add colour scheme optionally", dest="graphic_dict")
+    parser.add_argument("--date-fields", help="column headers containing date information as a a comma separated string.", dest="date_fields")
     parser.add_argument("--node-summary", action="store", help="field to summarise collapsed nodes by. Default=lineage", dest="node_summary")
 
     parser.add_argument("-sc", "--sequencing-centre",default="", help="Sequencing centre", dest="sc")
@@ -125,7 +127,7 @@ def main():
 
     args = parser.parse_args()
 
-    make_report(args.cog_metadata, args.input_csv, args.filtered_cog_metadata, args.outfile, args.outdir, args.treedir, args.figdir, args.snp_report, args.colour_fields, args.label_fields, args.node_summary, args.report_template, args.failed_seqs, args.sc, args.clean_locs, args.uk_map, args.channels_map, args.ni_map, args.pc_file, args.local_lineages, args.local_lin_maps, args.local_lin_tables,args.map_sequences, args.map_inputs, args.input_crs, args.mapping_trait, args.urban_centres,args.add_boxplots, args.graphic_dict)
+    make_report(args.cog_metadata, args.input_csv, args.filtered_cog_metadata, args.outfile, args.outdir, args.treedir, args.figdir, args.snp_report, args.colour_fields, args.label_fields, args.node_summary, args.report_template, args.failed_seqs, args.sc, args.clean_locs, args.uk_map, args.channels_map, args.ni_map, args.pc_file, args.local_lineages, args.local_lin_maps, args.local_lin_tables,args.map_sequences, args.map_inputs, args.input_crs, args.mapping_trait, args.urban_centres,args.add_boxplots, args.graphic_dict, args.date_fields)
 
 
 if __name__ == "__main__":
diff --git a/civet/scripts/make_tree_figures.py b/civet/scripts/make_tree_figures.py
index 7abc52bb..080014fb 100644
--- a/civet/scripts/make_tree_figures.py
+++ b/civet/scripts/make_tree_figures.py
@@ -738,9 +738,6 @@ def describe_tree_background(full_tax_dict, tree_name_stem, tree_dir):
                     plt.xticks(size=5, rotation=90)
                     plt.yticks(size=5)
                     
-
-
-
                     plt.title(pretty_focal + ": " + nde, size=5)
 
 
diff --git a/setup.py b/setup.py
index 68682320..4410d45e 100644
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,8 @@ def run(self):
       "civet/scripts/find_snps.smk",
       "civet/scripts/find_snps.py",
       "civet/scripts/find_ambiguities.py",
-      "civet/scripts/make_genome_graph.py"],
+      "civet/scripts/make_genome_graph.py",
+      "civet/scripts/time_functions.py"],
       package_data={"civet":["data/*","data/headers/*","data/mapping_files/*","data/vega_templates/*"]},
       install_requires=[
             "biopython>=1.70",

From 06e3dec25a16a4c586bf54325927a2757e9d776b Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Fri, 4 Sep 2020 14:41:34 +0100
Subject: [PATCH 03/16] actually adding the time script

---
 civet/scripts/time_functions.py | 134 ++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 civet/scripts/time_functions.py

diff --git a/civet/scripts/time_functions.py b/civet/scripts/time_functions.py
new file mode 100644
index 00000000..1ef758c6
--- /dev/null
+++ b/civet/scripts/time_functions.py
@@ -0,0 +1,134 @@
+import datetime as dt 
+import matplotlib.pyplot as plt
+from matplotlib import cm
+import numpy as np
+import matplotlib.ticker as plticker
+import math
+
+
+def summarise_dates(query_dict):
+
+    overall_dates = []
+
+    
+
+    for tax in query_dict.values():
+        overall_dates.extend(list(tax.date_dict.values()))
+
+    if overall_dates != []:
+
+        min_overall_date = min(overall_dates)
+        max_overall_date = max(overall_dates)
+
+        min_string = min_overall_date.strftime("%Y-%m-%d")
+        max_string = max_overall_date.strftime("%Y-%m-%d")
+
+        dates_present = True
+
+    else:
+        dates_present = False
+
+
+    return dates_present, overall_dates, max_overall_date, min_overall_date,  max_string, min_string
+
+def display_name(tax, custom_tip_fields):
+    
+    name = tax.name
+            
+    date = tax.sample_date
+    display_name = f"{name}|{date}"
+    
+    if "adm2" in tax.attribute_dict.keys():
+        adm2 = tax.attribute_dict["adm2"]
+        display_name = f"{name}|{adm2}|{date}"
+
+    if len(custom_tip_fields) > 0: 
+        for label_element in custom_tip_fields:
+            display_name = display_name + "|" + tax.attribute_dict[label_element]
+    
+    return display_name
+
+def find_colour_dict(tree_list):
+    
+    colour_dict = {}
+    count = 0
+
+    cmap = cm.get_cmap("Paired")
+    
+    colors = cmap(np.linspace(0, 1, len(tree_list)))
+    
+    for option in sorted(tree_list):
+        colour_dict[option] = colors[count]
+        count += 1
+
+    return colour_dict
+
+def plot_time_series(query_dict, overall_max_date, overall_min_date, tree_list, custom_tip_fields):
+
+    time_len = (overall_max_date - overall_min_date).days
+
+    tick_loc_base = float(math.ceil(time_len/5))
+    
+    loc = plticker.MultipleLocator(base=tick_loc_base)
+
+    fig, ax1 = plt.subplots(1,1, figsize=(20,15))
+    ax2 = ax1.twinx()
+    
+    if time_len > 10:
+        offset = dt.timedelta(time_len/10)
+    else:
+        offset = dt.timedelta(time_len/3)
+
+    colour_dict = find_colour_dict(tree_list)
+
+    count = 0
+
+    for tax in query_dict.values():
+        if tax.date_dict != {}:
+        
+            first_date = min(list(tax.date_dict.values()))
+            last_date = max(list(tax.date_dict.values()))
+            
+            label = display_name(tax, custom_tip_fields)
+            
+            x = [first_date, last_date]
+            y = [count, count]
+            
+            ax1.scatter(first_date, count, color=colour_dict[tax.tree], s=200, zorder=2)
+            ax1.scatter(last_date, count, color=colour_dict[tax.tree], s=200, zorder=2)
+            
+            if first_date != last_date:
+                ax1.plot(x,y, color=colour_dict[tax.tree],zorder=1)
+
+            # ax2.plot([last_date, overall_max_date+offset],y,ls='dotted',lw=1,color="dimgrey")
+            if x != overall_max_date:
+                ax1.plot([last_date, overall_max_date+offset],y,ls='--',lw=1,color="dimgrey", zorder=1)
+
+            ax2.text(overall_max_date+offset, count, label,size=15)
+            
+            count += 1
+        
+    ylim = ax1.get_ylim()
+    ax2.set_ylim(ylim)
+        
+    ax1.spines['top'].set_visible(False) ## make axes invisible
+    ax1.spines['right'].set_visible(False)
+    ax1.spines['left'].set_visible(False)
+    ax1.spines['bottom'].set_visible(False)
+    ax1.set_yticks([])
+    ax2.spines['top'].set_visible(False) ## make axes invisible
+    ax2.spines['right'].set_visible(False)
+    ax2.spines['left'].set_visible(False)
+    ax2.set_yticks([])
+
+    ax1.tick_params(labelsize=20, rotation=90)
+    ax1.xaxis.set_major_locator(loc)
+
+
+    fig.tight_layout()
+
+
+
+
+
+

From 3f0208268ba1196dc52c1e2f3a025502034068a9 Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Tue, 8 Sep 2020 18:57:08 +0100
Subject: [PATCH 04/16] timelines per tree, update test csv, fixing bugs

---
 civet/scripts/civet_template.pmd   | 28 ++++++--------
 civet/scripts/data_parsing.py      | 62 +++++++++++++++++++-----------
 civet/scripts/find_snps.smk        |  3 +-
 civet/scripts/make_tree_figures.py | 18 ++++++---
 civet/scripts/time_functions.py    | 21 ++++++----
 civet/tests/test.csv               | 24 ++++++------
 6 files changed, 89 insertions(+), 67 deletions(-)

diff --git a/civet/scripts/civet_template.pmd b/civet/scripts/civet_template.pmd
index 0de7e587..2a8311b7 100644
--- a/civet/scripts/civet_template.pmd
+++ b/civet/scripts/civet_template.pmd
@@ -143,7 +143,7 @@ if input_csv != '':
     adm2_to_adm1 = dp.prepping_adm2_adm1_data(full_metadata_file)
     query_dict = dp.parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, date_fields, adm2_to_adm1, False) #Any query information they have provided
 
-full_tax_dict = dp.parse_full_metadata(query_dict, full_metadata_file, present_lins, present_in_tree, node_summary_option, date_fields)
+full_tax_dict = dp.parse_full_metadata(query_dict, label_fields, full_metadata_file, present_lins, present_in_tree, node_summary_option, date_fields)
 
 dates_present, overall_dates, max_overall_date, min_overall_date,  max_string, min_string = time_func.summarise_dates(query_dict)
 
@@ -213,7 +213,7 @@ elif not_in_cog_number == 0 and cog_number != 0:
 <!-- ## Phylogenies -->
 
 ```python, name="make_trees", echo=False, include=False, figure=False
-too_tall_trees, overall_tree_number, colour_dict_dict, overall_df_dict = tree_viz.make_all_of_the_trees(tree_dir, tree_name_stem, full_tax_dict, query_dict, desired_fields, label_fields, graphic_dict)
+too_tall_trees, overall_tree_number, colour_dict_dict, overall_df_dict, tree_order = tree_viz.make_all_of_the_trees(tree_dir, tree_name_stem, full_tax_dict, query_dict, desired_fields, label_fields, graphic_dict)
 ```
 ```python, name="make_legend", echo=False, include=False, results='tex'
 for trait, colour_dict in colour_dict_dict.items():
@@ -222,6 +222,13 @@ for trait, colour_dict in colour_dict_dict.items():
     if number_of_options > 15:
         print("WARNING: There are more than 15 options to colour by for " + trait + ", which will make it difficult to see the differences between colours. Consider adding the trait to the taxon labels on the tree by using the flag _--label-fields_ when calling CIVET.")
 ```
+```python, name="time_plot", echo=False, results='raw', include=False
+if dates_present:
+    for tree in tree_order:
+        lookup = f"{tree_name_stem}_{tree}"
+        tips = tree_to_tip[lookup]
+        time_func.plot_time_series(tips, query_dict, max_overall_date, min_overall_date, tree_list, label_fields)
+```
 
 ```python, name="show_trees", echo=False, results='raw'
 for i in range(1,overall_tree_number+1):
@@ -240,6 +247,8 @@ for i in range(1,overall_tree_number+1):
     print(f'<img src="{figdir}/{name_stem}_make_legend_1.png" alt="drawing" style="width:100%;"/>')
     print("\n")
 
+    if dates_present:
+        print("![](" + figdir + "/" + name_stem + "_time_plot_" + str(i) + ".png)")
 
 ```
 
@@ -286,21 +295,6 @@ if map_sequences != "False":
     print("![](" + figdir + "/" + name_stem + "_map_sequences_1.png)")
 ```
 
-
-
-```python, name="time_plot", echo=False, results='raw', include=False
-if dates_present:
-    print("## Time distribution of sequences\n")
-    print("Coloured by: Tree \n")
-    time_func.plot_time_series(query_dict, max_overall_date, min_overall_date, tree_list, label_fields)
-```
-```python, name="show_time", echo=False, results='raw'
-if dates_present:
-    print("![](" + figdir + "/" + name_stem + "_time_plot_1.png)")
-```
-
-
-
 ```python, name='Regional-scale', echo=False, results='raw'
 if local_lineages != '':
     print("## Regional-scale background UK lineage mapping")
diff --git a/civet/scripts/data_parsing.py b/civet/scripts/data_parsing.py
index 77596bc0..d77a23df 100644
--- a/civet/scripts/data_parsing.py
+++ b/civet/scripts/data_parsing.py
@@ -185,7 +185,7 @@ def parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, date
                     if field in reader.fieldnames:
                         if sequence[field] != "":
                             date_dt = convert_date(sequence[field])
-                            taxon.date_dict[field] = date_dt #so if it's in the query input, it overwrites the COG one
+                            taxon.date_dict[field] = date_dt 
 
                 #keep this separate to above, because sample date is specifically needed
                 if "sample_date" in col_names: #if it's not in COG but date is provided (if it's in COG, it will already have been assigned a sample date.)
@@ -193,29 +193,35 @@ def parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, date
                         taxon.sample_date = sequence["sample_date"]
 
                 for col in col_names: #Add other metadata fields provided
-                    if col != "name" and (col in desired_fields or col in label_fields) and col != "adm1":
-                        if sequence[col] == "":
-                            taxon.attribute_dict[col] = "NA"
-                        else:
+                    if len(label_fields) > 0:
+                        if col in label_fields:
                             taxon.attribute_dict[col] = sequence[col]
-        
-                    if col == "adm1":
-                        if "UK" in sequence[col]:
-                            adm1_prep = sequence[col].split("-")[1]
-                            adm1 = contract_dict[adm1_prep]
-                        else:
-                            if sequence[col].upper() in cleaning.keys():
-                                adm1 = cleaning[sequence[col].upper()]
+                    else:
+                        if col != "name" and col in desired_fields and col != "adm1":
+                            if sequence[col] == "":
+                                taxon.attribute_dict[col] = "NA"
                             else:
-                                adm1 = sequence[col]
-
-                        taxon.attribute_dict["adm1"] = adm1
+                                taxon.attribute_dict[col] = sequence[col]
+            
+                        if col == "adm1":
+                            if "UK" in sequence[col]:
+                                adm1_prep = sequence[col].split("-")[1]
+                                adm1 = contract_dict[adm1_prep]
+                            else:
+                                if sequence[col].upper() in cleaning.keys():
+                                    adm1 = cleaning[sequence[col].upper()]
+                                else:
+                                    adm1 = sequence[col]
 
-                    if col == "adm2" and "adm1" not in col_names: #or sequence["adm1"] == ""):
-                        if sequence[col] in adm2_adm1_dict.keys():
-                            adm1 = adm2_adm1_dict[sequence[col]]
                             taxon.attribute_dict["adm1"] = adm1
 
+                        if col == "adm2" and "adm1" not in col_names: #or sequence["adm1"] == ""):
+                            if sequence[col] in adm2_adm1_dict.keys():
+                                adm1 = adm2_adm1_dict[sequence[col]]
+                                taxon.attribute_dict["adm1"] = adm1
+
+                    
+
                 if cog_report:
                     taxon.attribute_dict["adm2"]= sequence["adm2"] 
                     if "collection_date" in reader.fieldnames:
@@ -259,10 +265,15 @@ def parse_tree_tips(tree_dir):
 
     return tips, tip_to_tree, tree_list
 
-def parse_full_metadata(query_dict, full_metadata, present_lins, present_in_tree, node_summary_option, date_fields):
+def parse_full_metadata(query_dict, label_fields, full_metadata, present_lins, present_in_tree, node_summary_option, date_fields):
 
     full_tax_dict = query_dict.copy()
 
+    with open(full_metadata, 'r') as f:
+        reader = csv.DictReader(f)
+        col_name_prep = next(reader)
+        col_names = list(col_name_prep.keys())
+
     with open(full_metadata, 'r') as f:
         reader = csv.DictReader(f)
         in_data = [r for r in reader]
@@ -307,10 +318,15 @@ def parse_full_metadata(query_dict, full_metadata, present_lins, present_in_tree
 
                 for field in date_fields:
                     if field in reader.fieldnames:
-                        if sequence[field] != "":
+                        if sequence[field] != "" and field not in tax_object.date_dict.keys():
                             date_dt = convert_date(sequence[field])
                             tax_object.date_dict[field] = date_dt 
-                
+
+                if len(label_fields) > 0:
+                    for field in label_fields:
+                        if field in col_names and tax_object.attribute_dict[field] == "" and sequence[field] != "":
+                            tax_object.attribute_dict[field] = sequence[field]
+
                 full_tax_dict[seq_name] = tax_object
                     
     return full_tax_dict
@@ -368,7 +384,7 @@ def make_initial_table(query_dict, desired_fields, label_fields, cog_report):
         
         if label_fields != []:
             for i in label_fields: 
-                if i not in desired_fields:
+                if i not in desired_fields and i != "sample_date" and i != "name":
                     df_dict[i].append(query.attribute_dict[i])
 
         if cog_report:
diff --git a/civet/scripts/find_snps.smk b/civet/scripts/find_snps.smk
index 80e5ab0d..88c7ee50 100644
--- a/civet/scripts/find_snps.smk
+++ b/civet/scripts/find_snps.smk
@@ -59,6 +59,7 @@ rule assess_snps:
         find_snps.py --input {input.aln:q} --output {output.snp_report:q} --tree {params.tree}
         """
 
+#delete this
 rule gather_snp_reports:
     input:
         snps = expand(os.path.join(config["tempdir"],"snp_reports","{tree}.snps.txt"), tree=config["tree_stems"])
@@ -99,7 +100,7 @@ rule ambiguities_at_snp_sites:
 
 rule make_snp_figure:
     input:
-        rules.gather_snp_reports.output.report,
+        rules.gather_snp_reports.output.report, #change this one to snps input in gather_snp_reports
         rules.ambiguities_at_snp_sites.output.snp_report
     output:
         os.path.join(config["outdir"],"figures","genome_graph.png")
diff --git a/civet/scripts/make_tree_figures.py b/civet/scripts/make_tree_figures.py
index 080014fb..4ca2b317 100644
--- a/civet/scripts/make_tree_figures.py
+++ b/civet/scripts/make_tree_figures.py
@@ -84,10 +84,16 @@ def display_name(tree, tree_name, tree_dir, full_taxon_dict, query_dict, custom_
                         adm2 = taxon_obj.attribute_dict["adm2"]
                         k.traits["display"] = f"{name}|{adm2}|{date}"
 
-                    if name in query_dict.keys():
-                        if len(custom_tip_fields) > 0: 
+                    count = 0
+                    if len(custom_tip_fields) > 0: 
+                        if name in query_dict.keys(): 
                             for label_element in custom_tip_fields:
-                                k.traits["display"] = k.traits["display"] + "|" + taxon_obj.attribute_dict[label_element]
+                                if count == 0:
+                                    display_name = taxon_obj.attribute_dict[label_element] 
+                                else:   
+                                    display_name = display_name + "|" + taxon_obj.attribute_dict[label_element]
+                                count += 1
+                        k.traits["display"] = display_name
                 
                 
                 else:
@@ -348,14 +354,14 @@ def make_all_of_the_trees(input_dir, tree_name_stem, taxon_dict, query_dict, des
 
     overall_tree_count = 0
     
-    lst = sort_trees_index(input_dir)
+    tree_order = sort_trees_index(input_dir)
 
     for trait, colour_scheme in graphic_dict.items():
         colour_dict = find_colour_dict(query_dict, trait, colour_scheme)
         
         colour_dict_dict[trait] = colour_dict
 
-    for fn in lst:
+    for fn in tree_order:
         lineage = fn
         treename = f"{tree_name_stem}_{fn}"
         treefile = f"{tree_name_stem}_{fn}.tree"
@@ -403,7 +409,7 @@ def make_all_of_the_trees(input_dir, tree_name_stem, taxon_dict, query_dict, des
                 too_tall_trees.append(lineage)
                 continue
 
-    return too_tall_trees, overall_tree_count, colour_dict_dict, overall_df_dict
+    return too_tall_trees, overall_tree_count, colour_dict_dict, overall_df_dict, tree_order
 
 def summarise_collapsed_node_for_label(tree_dir, focal_node, focal_tree, full_tax_dict): 
     
diff --git a/civet/scripts/time_functions.py b/civet/scripts/time_functions.py
index 1ef758c6..da0765fa 100644
--- a/civet/scripts/time_functions.py
+++ b/civet/scripts/time_functions.py
@@ -10,8 +10,6 @@ def summarise_dates(query_dict):
 
     overall_dates = []
 
-    
-
     for tax in query_dict.values():
         overall_dates.extend(list(tax.date_dict.values()))
 
@@ -42,9 +40,14 @@ def display_name(tax, custom_tip_fields):
         adm2 = tax.attribute_dict["adm2"]
         display_name = f"{name}|{adm2}|{date}"
 
+    count = 0
     if len(custom_tip_fields) > 0: 
         for label_element in custom_tip_fields:
-            display_name = display_name + "|" + tax.attribute_dict[label_element]
+            if count == 0:
+                display_name = tax.attribute_dict[label_element] 
+            else:   
+                display_name = display_name + "|" + tax.attribute_dict[label_element]
+            count += 1
     
     return display_name
 
@@ -63,15 +66,17 @@ def find_colour_dict(tree_list):
 
     return colour_dict
 
-def plot_time_series(query_dict, overall_max_date, overall_min_date, tree_list, custom_tip_fields):
+def plot_time_series(tips, query_dict, overall_max_date, overall_min_date, tree_list, custom_tip_fields):
 
     time_len = (overall_max_date - overall_min_date).days
 
+    height = math.sqrt(len(tips))*2 + 1
+
     tick_loc_base = float(math.ceil(time_len/5))
     
     loc = plticker.MultipleLocator(base=tick_loc_base)
 
-    fig, ax1 = plt.subplots(1,1, figsize=(20,15))
+    fig, ax1 = plt.subplots(1,1, figsize=(20,height))
     ax2 = ax1.twinx()
     
     if time_len > 10:
@@ -81,10 +86,10 @@ def plot_time_series(query_dict, overall_max_date, overall_min_date, tree_list,
 
     colour_dict = find_colour_dict(tree_list)
 
-    count = 0
+    count = 1
 
-    for tax in query_dict.values():
-        if tax.date_dict != {}:
+    for tax in tips:
+        if tax.date_dict != {} and tax in query_dict.values():
         
             first_date = min(list(tax.date_dict.values()))
             last_date = max(list(tax.date_dict.values()))
diff --git a/civet/tests/test.csv b/civet/tests/test.csv
index 2e34d65f..eb36b09c 100644
--- a/civet/tests/test.csv
+++ b/civet/tests/test.csv
@@ -1,12 +1,12 @@
-name,HCW_status,care_home,sample_date,adm2,test,test2
-EDB129_closest,HCW,A,2020-06-05,Edinburgh,yes,yes
-EDB129_closestb,Patient,A,2020-02-30,Edinburgh,yes,yes
-20144000304_closest,Patient,B,2020-02-31,Edinburgh,no,yes
-This_seq_is_too_short,HCW,A,2020-02-32,Edinburgh,no,yes
-EDB3588,HCW,C,2020-02-33,Edinburgh,no,yes
-EDB2533,Patient,D,2020-02-34,Edinburgh,no,NA
-PHEC-1A65C,HCW,A,2020-02-35,Edinburgh,yes,no
-PHEC-1AD2A,NA,NA,2020-02-36,Edinburgh,no,yes
-PHEC-1A917,Patient,D,2020-02-37,Edinburgh,NA,NA
-This_seq_has_lots_of_Ns,HCW,A,2020-02-38,Edinburgh,NA,NA
-This_seq_is_literally_just_N,HCW,D,2020-02-39,Edinburgh,NA,NA
+name,HCW_status,care_home,sample_date,adm2,test,test2
+EDB129_closest,HCW,A,2020-06-05,Edinburgh,yes,yes
+EDB129_closestb,Patient,A,2020-03-30,Edinburgh,yes,yes
+20144000304_closest,Patient,B,2020-02-28,Edinburgh,no,yes
+This_seq_is_too_short,HCW,A,2020-02-27,Edinburgh,no,yes
+EDB3588,HCW,C,2020-04-20,Edinburgh,no,yes
+EDB2533,Patient,D,2020-04-29,Edinburgh,no,NA
+PHEC-1A65C,HCW,A,2020-05-04,Edinburgh,yes,no
+PHEC-1AD2A,NA,NA,2020-03-02,Edinburgh,no,yes
+PHEC-1A917,Patient,D,2020-02-27,Edinburgh,NA,NA
+This_seq_has_lots_of_Ns,HCW,A,2020-05-15,Edinburgh,NA,NA
+This_seq_is_literally_just_N,HCW,D,2020-04-30,Edinburgh,NA,NA
\ No newline at end of file

From 9b4de8552645bee48acd0087f5325c37ab2d6899 Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Wed, 9 Sep 2020 13:01:10 +0100
Subject: [PATCH 05/16] added to time series figure, added different sized and
 shape dots for collapsed nodes

---
 civet/command.py                   | 15 ++++++----
 civet/scripts/civet_template.pmd   | 28 ++++++++++++------
 civet/scripts/data_parsing.py      | 22 ++++++++------
 civet/scripts/find_snps.smk        |  1 +
 civet/scripts/make_tree_figures.py | 18 ++++++++----
 civet/scripts/time_functions.py    | 46 ++++++++++++++++++++----------
 6 files changed, 86 insertions(+), 44 deletions(-)

diff --git a/civet/command.py b/civet/command.py
index 876b9304..5b98e559 100644
--- a/civet/command.py
+++ b/civet/command.py
@@ -91,6 +91,9 @@ def main(sysargs = sys.argv[1:]):
             'gnuplot', 'gnuplot2', 'CMRmap', 'cubehelix', 'brg',
             'gist_rainbow', 'rainbow', 'jet', 'nipy_spectral', 'gist_ncar']
 
+    full_metadata_headers = ["central_sample_id", "biosample_source_id","sequence_name","secondary_identifier","sample_date","epi_week","country","adm1","adm2","outer_postcode","is_surveillance","is_community","is_hcw","is_travel_history","travel_history","lineage","lineage_support","uk_lineage","acc_lineage","del_lineage","phylotype"]
+
+
     # Exit with help menu if no args supplied
     if len(sysargs)<1: 
         parser.print_help()
@@ -205,21 +208,21 @@ def main(sysargs = sys.argv[1:]):
         else:
             label_fields = args.label_fields.split(",")
             for label_f in label_fields:
-                if label_f in reader.fieldnames:
+                if label_f in reader.fieldnames or label_f in full_metadata_headers:
                     labels.append(label_f)
                 else:
-                    sys.stderr.write(f"Error: {label_f} field not found in metadata file")
+                    sys.stderr.write(f"Error: {label_f} field not found in metadata file or full metadata file")
                     sys.exit(-1)
 
         if not args.date_fields:
-            date_lst.append("sample_date")
+            date_lst.append("NONE")
         else:
             date_fields = args.date_fields.split(",")
             for date_f in date_fields:
-                if date_f in reader.fieldnames or date_f != "sample_date":
+                if date_f in reader.fieldnames or date_f in full_metadata_headers:
                     date_lst.append(date_f)
                 else:
-                    sys.stderr.write(f"Error: {date_f} field not found in query metadata file")
+                    sys.stderr.write(f"Error: {date_f} field not found in query metadata file or full metadata file")
                     sys.exit(-1)
 
         if args.display:
@@ -282,7 +285,7 @@ def main(sysargs = sys.argv[1:]):
         config["add_boxplots"]= True
     else:
         config["add_boxplots"]= False
-        
+
     if args.map_sequences:
         config["map_sequences"] = True
         if not args.map_inputs:
diff --git a/civet/scripts/civet_template.pmd b/civet/scripts/civet_template.pmd
index 2a8311b7..6aec30be 100644
--- a/civet/scripts/civet_template.pmd
+++ b/civet/scripts/civet_template.pmd
@@ -92,9 +92,10 @@ if label_fields_input != "NONE":
         label_fields.append(i)
 
 date_fields = []
-options = date_fields_input.split(",")
-for i in options: 
-    date_fields.append(i)
+if date_fields_input != "NONE":
+    options = date_fields_input.split(",")
+    for i in options: 
+        date_fields.append(i)
 
 snp_report = "" ##CHANGE
 
@@ -102,7 +103,7 @@ local_lineages = "" ##CHANGE
 local_lin_maps = "" ##CHANGE
 local_lin_tables = "" ##CHANGE
 
-map_sequences = "" ##CHANGE #going to be true or false
+map_sequences = "" ##CHANGE 
 map_inputs = "" ##CHANGE
 mapping_trait = "" ##CHANGE
 input_crs = "" ##CHANGE
@@ -145,8 +146,13 @@ if input_csv != '':
 
 full_tax_dict = dp.parse_full_metadata(query_dict, label_fields, full_metadata_file, present_lins, present_in_tree, node_summary_option, date_fields)
 
-dates_present, overall_dates, max_overall_date, min_overall_date,  max_string, min_string = time_func.summarise_dates(query_dict)
 
+time_outputs = time_func.summarise_dates(query_dict)
+if type(time_outputs) != bool:
+    overall_dates, max_overall_date, min_overall_date,  max_string, min_string = time_outputs
+    dates_present = True
+else:
+    dates_present = False
 ```
 
 ```python, name="QC fails", echo=False, results="raw"
@@ -210,7 +216,6 @@ elif not_in_cog_number == 0 and cog_number != 0:
     print(df_cog.to_markdown())
 ```
 
-<!-- ## Phylogenies -->
 
 ```python, name="make_trees", echo=False, include=False, figure=False
 too_tall_trees, overall_tree_number, colour_dict_dict, overall_df_dict, tree_order = tree_viz.make_all_of_the_trees(tree_dir, tree_name_stem, full_tax_dict, query_dict, desired_fields, label_fields, graphic_dict)
@@ -224,10 +229,15 @@ for trait, colour_dict in colour_dict_dict.items():
 ```
 ```python, name="time_plot", echo=False, results='raw', include=False
 if dates_present:
+    count = 0
+    tree_to_time_series = {}
     for tree in tree_order:
+        count += 1
         lookup = f"{tree_name_stem}_{tree}"
         tips = tree_to_tip[lookup]
-        time_func.plot_time_series(tips, query_dict, max_overall_date, min_overall_date, tree_list, label_fields)
+        if len(tips) > 1:
+            time_func.plot_time_series(tips, query_dict, max_overall_date, min_overall_date, date_fields, label_fields)
+            tree_to_time_series[lookup] = count
 ```
 
 ```python, name="show_trees", echo=False, results='raw'
@@ -247,8 +257,8 @@ for i in range(1,overall_tree_number+1):
     print(f'<img src="{figdir}/{name_stem}_make_legend_1.png" alt="drawing" style="width:100%;"/>')
     print("\n")
 
-    if dates_present:
-        print("![](" + figdir + "/" + name_stem + "_time_plot_" + str(i) + ".png)")
+    if dates_present and lookup in tree_to_time_series.keys():
+        print("![](" + figdir + "/" + name_stem + "_time_plot_" + str(tree_to_time_series[lookup]) + ".png)")
 
 ```
 
diff --git a/civet/scripts/data_parsing.py b/civet/scripts/data_parsing.py
index d77a23df..30ea312a 100644
--- a/civet/scripts/data_parsing.py
+++ b/civet/scripts/data_parsing.py
@@ -193,8 +193,10 @@ def parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, date
                         taxon.sample_date = sequence["sample_date"]
 
                 for col in col_names: #Add other metadata fields provided
-                    if len(label_fields) > 0:
-                        if col in label_fields:
+                    if col in label_fields:
+                        if sequence[col] == "":
+                                taxon.attribute_dict[col] = "NA"     
+                        else:
                             taxon.attribute_dict[col] = sequence[col]
                     else:
                         if col != "name" and col in desired_fields and col != "adm1":
@@ -220,8 +222,6 @@ def parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, date
                                 adm1 = adm2_adm1_dict[sequence[col]]
                                 taxon.attribute_dict["adm1"] = adm1
 
-                    
-
                 if cog_report:
                     taxon.attribute_dict["adm2"]= sequence["adm2"] 
                     if "collection_date" in reader.fieldnames:
@@ -321,10 +321,16 @@ def parse_full_metadata(query_dict, label_fields, full_metadata, present_lins, p
                         if sequence[field] != "" and field not in tax_object.date_dict.keys():
                             date_dt = convert_date(sequence[field])
                             tax_object.date_dict[field] = date_dt 
-
-                if len(label_fields) > 0:
-                    for field in label_fields:
-                        if field in col_names and tax_object.attribute_dict[field] == "" and sequence[field] != "":
+                    
+                for field in label_fields:
+                    if field in col_names:
+                        if field not in tax_object.attribute_dict.keys():
+                            if sequence[field] == "":
+                                tax_object.attribute_dict[field] = "NA"
+                            else:
+                                tax_object.attribute_dict[field] = sequence[field]
+                        
+                        elif tax_object.attribute_dict[field] == "NA" and sequence[field] != "":
                             tax_object.attribute_dict[field] = sequence[field]
 
                 full_tax_dict[seq_name] = tax_object
diff --git a/civet/scripts/find_snps.smk b/civet/scripts/find_snps.smk
index 88c7ee50..55121192 100644
--- a/civet/scripts/find_snps.smk
+++ b/civet/scripts/find_snps.smk
@@ -101,6 +101,7 @@ rule ambiguities_at_snp_sites:
 rule make_snp_figure:
     input:
         rules.gather_snp_reports.output.report, #change this one to snps input in gather_snp_reports
+        #snps = expand(os.path.join(config["tempdir"],"snp_reports","{tree}.snps.txt"), tree=config["tree_stems"]),
         rules.ambiguities_at_snp_sites.output.snp_report
     output:
         os.path.join(config["outdir"],"figures","genome_graph.png")
diff --git a/civet/scripts/make_tree_figures.py b/civet/scripts/make_tree_figures.py
index 4ca2b317..ca87edfe 100644
--- a/civet/scripts/make_tree_figures.py
+++ b/civet/scripts/make_tree_figures.py
@@ -71,8 +71,9 @@ def display_name(tree, tree_name, tree_dir, full_taxon_dict, query_dict, custom_
             name = k.name
             
             if "inserted" in name:
-                collapsed_node_info = summarise_collapsed_node_for_label(tree_dir, name, tree_name, full_taxon_dict)
+                collapsed_node_info, number_nodes = summarise_collapsed_node_for_label(tree_dir, name, tree_name, full_taxon_dict)
                 k.traits["display"] = collapsed_node_info
+                k.node_number = number_nodes
             else:
                 if name in full_taxon_dict:
                     taxon_obj = full_taxon_dict[name]
@@ -94,6 +95,8 @@ def display_name(tree, tree_name, tree_dir, full_taxon_dict, query_dict, custom_
                                     display_name = display_name + "|" + taxon_obj.attribute_dict[label_element]
                                 count += 1
                         k.traits["display"] = display_name
+
+                    k.node_number = 1
                 
                 
                 else:
@@ -101,8 +104,10 @@ def display_name(tree, tree_name, tree_dir, full_taxon_dict, query_dict, custom_
                         number = name.split("_")[-1]
                         display = f"Tree {number}"
                         k.traits["display"] = display
+                        k.node_number = 1
                     else:
                         k.traits["display"] = name + "|" + "not in dict"
+                        k.node_number = 1
 
 
 def find_colour_dict(query_dict, trait, colour_scheme):
@@ -171,11 +176,10 @@ def make_scaled_tree(My_Tree, tree_name, tree_dir, num_tips, colour_dict_dict, d
     space_offset = tallest_height/10
     absolute_x_axis_size = tallest_height+space_offset+space_offset + tallest_height #changed from /3 
     
-
     tipsize = 40
     c_func=lambda k: 'dimgrey' ## colour of branches
     l_func=lambda k: 'lightgrey' ## colour of dotted lines
-    s_func = lambda k: tipsize*5 if k.name in query_dict.keys() else tipsize
+    s_func = lambda k: tipsize*5 if k.name in query_dict.keys() else (0 if k.node_number > 1 else tipsize)
     z_func=lambda k: 100
     b_func=lambda k: 2.0 #branch width
     so_func=lambda k: tipsize*5 if k.name in query_dict.keys() else 0
@@ -195,7 +199,6 @@ def make_scaled_tree(My_Tree, tree_name, tree_dir, num_tips, colour_dict_dict, d
         trait = next(key_iterator) #so always have the first trait as the first colour dot
 
     first_trait = trait
-        
     colour_dict = colour_dict_dict[trait]
     cn_func = lambda k: colour_dict[query_dict[k.name].attribute_dict[trait]] if k.name in query_dict.keys() else 'dimgrey'
     co_func=lambda k: colour_dict[query_dict[k.name].attribute_dict[trait]] if k.name in query_dict.keys() else 'dimgrey' 
@@ -226,7 +229,6 @@ def make_scaled_tree(My_Tree, tree_name, tree_dir, num_tips, colour_dict_dict, d
     My_Tree.plotPoints(ax, x_attr=x_attr, colour_function=cn_func,y_attr=y_attr, size_function=s_func, outline_colour=outline_colour_func)
     My_Tree.plotPoints(ax, x_attr=x_attr, colour_function=co_func, y_attr=y_attr, size_function=so_func, outline_colour=outline_colour_func)
 
-
     blob_dict = {}
 
     for k in My_Tree.Objects:
@@ -236,6 +238,10 @@ def make_scaled_tree(My_Tree, tree_name, tree_dir, num_tips, colour_dict_dict, d
             x=x_attr(k)
             y=y_attr(k)
         
+            if k.node_number > 1:
+                new_dot_size = tipsize*(1+math.log(k.node_number)) 
+                ax.scatter(x, y, s=new_dot_size, marker="s", zorder=3, color="dimgrey")
+
             height = My_Tree.treeHeight+offset
             text_start = tallest_height+space_offset+space_offset
 
@@ -462,7 +468,7 @@ def summarise_collapsed_node_for_label(tree_dir, focal_node, focal_tree, full_ta
 
                 info = pretty_node_name + ": " + number_nodes + " in " + pretty_summary
 
-    return info
+    return info, len(member_list)
 
 def summarise_node_table(tree_dir, focal_tree, full_tax_dict):
 
diff --git a/civet/scripts/time_functions.py b/civet/scripts/time_functions.py
index da0765fa..102dfce4 100644
--- a/civet/scripts/time_functions.py
+++ b/civet/scripts/time_functions.py
@@ -21,13 +21,11 @@ def summarise_dates(query_dict):
         min_string = min_overall_date.strftime("%Y-%m-%d")
         max_string = max_overall_date.strftime("%Y-%m-%d")
 
-        dates_present = True
-
     else:
-        dates_present = False
+        return False
 
 
-    return dates_present, overall_dates, max_overall_date, min_overall_date,  max_string, min_string
+    return overall_dates, max_overall_date, min_overall_date,  max_string, min_string
 
 def display_name(tax, custom_tip_fields):
     
@@ -51,28 +49,33 @@ def display_name(tax, custom_tip_fields):
     
     return display_name
 
-def find_colour_dict(tree_list):
+def find_colour_dict(date_fields):
     
     colour_dict = {}
     count = 0
 
     cmap = cm.get_cmap("Paired")
     
-    colors = cmap(np.linspace(0, 1, len(tree_list)))
+    colors = cmap(np.linspace(0, 1, len(date_fields)))
     
-    for option in sorted(tree_list):
+    for option in sorted(date_fields):
         colour_dict[option] = colors[count]
         count += 1
 
     return colour_dict
 
-def plot_time_series(tips, query_dict, overall_max_date, overall_min_date, tree_list, custom_tip_fields):
+def plot_time_series(tips, query_dict, overall_max_date, overall_min_date, date_fields, custom_tip_fields):
+
+    colour_dict = find_colour_dict(date_fields)    
 
     time_len = (overall_max_date - overall_min_date).days
 
     height = math.sqrt(len(tips))*2 + 1
 
-    tick_loc_base = float(math.ceil(time_len/5))
+    if time_len > 20:
+        tick_loc_base = float(math.ceil(time_len/5))
+    else:
+        tick_loc_base = time_len
     
     loc = plticker.MultipleLocator(base=tick_loc_base)
 
@@ -84,26 +87,36 @@ def plot_time_series(tips, query_dict, overall_max_date, overall_min_date, tree_
     else:
         offset = dt.timedelta(time_len/3)
 
-    colour_dict = find_colour_dict(tree_list)
 
     count = 1
 
     for tax in tips:
         if tax.date_dict != {} and tax in query_dict.values():
         
-            first_date = min(list(tax.date_dict.values()))
-            last_date = max(list(tax.date_dict.values()))
+            first_date_type = min(tax.date_dict.keys(), key=lambda k: tax.date_dict[k])
+            last_date_type = max(tax.date_dict.keys(), key=lambda k: tax.date_dict[k])
+
+            first_date = tax.date_dict[first_date_type]
+            last_date = tax.date_dict[last_date_type]
+
+            other_dates = {}
+            for date_type, date in tax.date_dict.items():
+                if date != first_date and date != last_date:
+                    other_dates[date_type] = date
             
             label = display_name(tax, custom_tip_fields)
             
             x = [first_date, last_date]
             y = [count, count]
             
-            ax1.scatter(first_date, count, color=colour_dict[tax.tree], s=200, zorder=2)
-            ax1.scatter(last_date, count, color=colour_dict[tax.tree], s=200, zorder=2)
+            ax1.scatter(first_date, count, color=colour_dict[first_date_type], s=200, zorder=2, label=first_date_type)
+            ax1.scatter(last_date, count, color=colour_dict[last_date_type], s=200, zorder=2, label=last_date_type)
+
+            for date_option, date in other_dates.items():
+                ax1.scatter(date, count, color=colour_dict[date_option], s=200, zorder=2, label=date_option)
             
             if first_date != last_date:
-                ax1.plot(x,y, color=colour_dict[tax.tree],zorder=1)
+                ax1.plot(x,y, color="dimgrey",zorder=1)
 
             # ax2.plot([last_date, overall_max_date+offset],y,ls='dotted',lw=1,color="dimgrey")
             if x != overall_max_date:
@@ -129,6 +142,9 @@ def plot_time_series(tips, query_dict, overall_max_date, overall_min_date, tree_
     ax1.tick_params(labelsize=20, rotation=90)
     ax1.xaxis.set_major_locator(loc)
 
+    handles, labels = ax1.get_legend_handles_labels()
+    unique = [(h, l) for i, (h, l) in enumerate(zip(handles, labels)) if l not in labels[:i]]
+    ax1.legend(*zip(*unique))
 
     fig.tight_layout()
 

From 8b5d1f0022c5d5b50badf0fa3cf393b11c5affc9 Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Wed, 9 Sep 2020 13:02:04 +0100
Subject: [PATCH 06/16] adding to test csv file

---
 civet/tests/test.csv | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/civet/tests/test.csv b/civet/tests/test.csv
index eb36b09c..3c883142 100644
--- a/civet/tests/test.csv
+++ b/civet/tests/test.csv
@@ -1,12 +1,12 @@
-name,HCW_status,care_home,sample_date,adm2,test,test2
-EDB129_closest,HCW,A,2020-06-05,Edinburgh,yes,yes
-EDB129_closestb,Patient,A,2020-03-30,Edinburgh,yes,yes
-20144000304_closest,Patient,B,2020-02-28,Edinburgh,no,yes
-This_seq_is_too_short,HCW,A,2020-02-27,Edinburgh,no,yes
-EDB3588,HCW,C,2020-04-20,Edinburgh,no,yes
-EDB2533,Patient,D,2020-04-29,Edinburgh,no,NA
-PHEC-1A65C,HCW,A,2020-05-04,Edinburgh,yes,no
-PHEC-1AD2A,NA,NA,2020-03-02,Edinburgh,no,yes
-PHEC-1A917,Patient,D,2020-02-27,Edinburgh,NA,NA
-This_seq_has_lots_of_Ns,HCW,A,2020-05-15,Edinburgh,NA,NA
-This_seq_is_literally_just_N,HCW,D,2020-04-30,Edinburgh,NA,NA
\ No newline at end of file
+name,HCW_status,care_home,sample_date,date_2,date_3,adm2,test,test2
+EDB129_closest,HCW,A,2020-06-05,2020-06-10,2020-06-17,Edinburgh,yes,yes
+EDB129_closestb,Patient,A,2020-03-30,2020-04-04,2020-04-11,Edinburgh,yes,yes
+20144000304_closest,Patient,B,2020-02-28,2020-03-04,2020-03-11,Edinburgh,no,yes
+This_seq_is_too_short,HCW,A,2020-02-27,2020-03-03,2020-03-10,Edinburgh,no,yes
+EDB3588,HCW,C,2020-04-20,2020-04-25,2020-05-02,Edinburgh,no,yes
+EDB2533,Patient,D,2020-04-29,2020-05-04,2020-05-11,Edinburgh,no,NA
+PHEC-1A65C,HCW,A,2020-05-04,2020-05-09,2020-05-16,Edinburgh,yes,no
+PHEC-1AD2A,NA,NA,2020-03-02,2020-03-07,2020-03-14,Edinburgh,no,yes
+PHEC-1A917,Patient,D,2020-02-27,2020-03-03,2020-03-10,Edinburgh,NA,NA
+This_seq_has_lots_of_Ns,HCW,A,2020-05-15,2020-05-20,2020-05-27,Edinburgh,NA,NA
+This_seq_is_literally_just_N,HCW,D,2020-04-30,2020-05-05,2020-05-12,Edinburgh,NA,NA
\ No newline at end of file

From 6d0b807019e4f8064610cfcfe0faf14e2406f33e Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Wed, 9 Sep 2020 18:08:49 +0100
Subject: [PATCH 07/16] make genome graph appear for every tree

---
 civet/scripts/assess_input_file.smk | 12 +++--
 civet/scripts/civet_template.pmd    | 10 ++--
 civet/scripts/find_ambiguities.py   | 14 +++---
 civet/scripts/find_snps.py          |  3 +-
 civet/scripts/find_snps.smk         | 75 +++++++++++++++--------------
 civet/scripts/make_genome_graph.py  |  2 -
 civet/scripts/time_functions.py     |  6 +--
 7 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/civet/scripts/assess_input_file.smk b/civet/scripts/assess_input_file.smk
index c0499e5f..896a7c15 100644
--- a/civet/scripts/assess_input_file.smk
+++ b/civet/scripts/assess_input_file.smk
@@ -2,6 +2,7 @@ import csv
 from Bio import SeqIO
 import os
 import collections
+from pytools.persistent_dict import PersistentDict
 
 rule check_cog_db:
     input:
@@ -284,16 +285,17 @@ rule find_snps:
         tempdir= config["tempdir"],
         path = workflow.current_basedir,
         threshold = config["threshold"],
-        
+                
         fasta = config["fasta"],
         tree_dir = os.path.join(config["outdir"],"local_trees"),
 
         cores = workflow.cores,
         force = config["force"],
         quiet_mode = config["quiet_mode"]
+
     output:
-        genome_graph = os.path.join(config["outdir"],"figures","genome_graph.png"),
-        report = os.path.join(config["outdir"],"snp_reports","snp_reports.txt")
+        genome_graphs = os.path.join(config["outdir"],"snp_reports","tree_subtree_1.snps.txt"), #this obviously isn't ideal because it's not flexible to name stem changes
+        reports = os.path.join(config["outdir"],"figures","genome_graph_tree_subtree_1.png")
     run:
         local_trees = []
         for r,d,f in os.walk(params.tree_dir):
@@ -411,8 +413,8 @@ rule make_report:
         ni_map = config["ni_map"],
         pc_file = config["pc_file"],
         urban_centres = config["urban_centres"],
-        genome_graph = rules.find_snps.output.genome_graph,
-        snp_report = rules.find_snps.output.report,
+        genome_graph = rules.find_snps.output.genome_graphs, #do these two arguments need to be here? 
+        snp_report = rules.find_snps.output.reports, 
         central = os.path.join(config["outdir"], 'figures', "central_map_ukLin.png"),
         neighboring = os.path.join(config["outdir"], 'figures', "neighboring_map_ukLin.png"),
         region = os.path.join(config["outdir"], 'figures', "region_map_ukLin.png")
diff --git a/civet/scripts/civet_template.pmd b/civet/scripts/civet_template.pmd
index 6aec30be..f3bfc3b1 100644
--- a/civet/scripts/civet_template.pmd
+++ b/civet/scripts/civet_template.pmd
@@ -192,7 +192,7 @@ if dates_present:
     print("Earliest date: " + min_string)
     print("Latest date: " + max_string)
 else:
-    print("No date information found")
+    print("No time information provided")
 
 ```
 
@@ -260,6 +260,8 @@ for i in range(1,overall_tree_number+1):
     if dates_present and lookup in tree_to_time_series.keys():
         print("![](" + figdir + "/" + name_stem + "_time_plot_" + str(tree_to_time_series[lookup]) + ".png)")
 
+    print(f"![]({figdir}/genome_graph_{lookup}.png)")
+
 ```
 
 ```python, name="Tree error checking", echo=False, results='raw'
@@ -278,12 +280,6 @@ if add_boxplots != "":
 
 ```
 
-### SNPs found in sequences of interest
-
-```python, name="genome graph", echo=False, results='raw'
-print(f"![]({figdir}/genome_graph.png)")
-```
-
 ```python, name="map_sequences", echo=False, results='raw', include=False
 if map_sequences != "False":
     print("## Plotting sequences")
diff --git a/civet/scripts/find_ambiguities.py b/civet/scripts/find_ambiguities.py
index 4d53679f..f54f5551 100644
--- a/civet/scripts/find_ambiguities.py
+++ b/civet/scripts/find_ambiguities.py
@@ -16,23 +16,21 @@ def parse_args():
 
     return parser.parse_args()
 
-def find_snps():
-
-    args = parse_args()
+def find_snps(input_file, output, report):
 
     input_seqs = collections.defaultdict(list)
     outgroup_seq = ""
 
-    for record in SeqIO.parse(args.input, "fasta"):
+    for record in SeqIO.parse(input_file, "fasta"):
         if record.id == "outgroup":
             outgroup_seq = record.seq.upper()
         else:
             input_seqs[str(record.seq).upper()].append(record.id)
 
-    with open(args.output, "w") as fw:
+    with open(output, "w") as fw:
         fw.write("name\tnum_snps\tambiguous_snps\n")
         snp_dict = collections.defaultdict(list)
-        with open(args.report, newline="") as f:
+        with open(report, newline="") as f:
             reader = csv.DictReader(f, delimiter="\t")
             for row in reader:
                 snps = row["snps"].split(";")
@@ -59,4 +57,6 @@ def find_snps():
 
 if __name__ == '__main__':
 
-    find_snps()
\ No newline at end of file
+    args = parse_args()
+
+    find_snps(args.input, args.output, args.report)
\ No newline at end of file
diff --git a/civet/scripts/find_snps.py b/civet/scripts/find_snps.py
index faef69e7..9a5a5c2c 100644
--- a/civet/scripts/find_snps.py
+++ b/civet/scripts/find_snps.py
@@ -30,8 +30,9 @@ def find_snps():
 
     non_amb = ["A","T","G","C"]
     snp_dict = collections.defaultdict(list)
+
     with open(args.output, "w") as fw:
-        
+        fw.write("name\ttree\tnum_snps\tsnps\n")
         for query_seq in input_seqs:
             snps =[]
 
diff --git a/civet/scripts/find_snps.smk b/civet/scripts/find_snps.smk
index 55121192..df06972b 100644
--- a/civet/scripts/find_snps.smk
+++ b/civet/scripts/find_snps.smk
@@ -2,14 +2,15 @@ from Bio import Phylo
 from Bio import SeqIO
 import csv
 import collections
+import make_genome_graph as make_genome_graph
+import find_ambiguities as find_ambiguities
 
 config["tree_stems"] = config["catchment_str"].split(",")
 
 rule all:
     input:
-        os.path.join(config["outdir"],"snp_reports","snp_reports.txt"),
-        os.path.join(config["outdir"],"figures","genome_graph.png")
-
+        expand(os.path.join(config["outdir"],"snp_reports","{tree}.snps.txt"),tree=config["tree_stems"]),
+        expand(os.path.join(config["outdir"],"figures","genome_graph_{tree}.png"), tree=config["tree_stems"])
 
 rule extract_taxa:
     input:
@@ -53,45 +54,46 @@ rule assess_snps:
     params:
         tree = "{tree}"
     output:
-        snp_report = os.path.join(config["tempdir"], "snp_reports","{tree}.snps.txt")
+        snp_report = os.path.join(config["outdir"], "snp_reports","{tree}.snps.txt")
     shell:
         """
         find_snps.py --input {input.aln:q} --output {output.snp_report:q} --tree {params.tree}
         """
 
-#delete this
-rule gather_snp_reports:
-    input:
-        snps = expand(os.path.join(config["tempdir"],"snp_reports","{tree}.snps.txt"), tree=config["tree_stems"])
-    output:
-        report = os.path.join(config["outdir"],"snp_reports","snp_reports.txt")
-    run:
-        with open(output.report, "w") as fw:
-            fw.write("name\ttree\tnum_snps\tsnps\n")
-            for report in input.snps:
-                fn = os.path.basename(report)
-                with open(report, "r") as f:
-                    for l in f:
-                        l = l.rstrip("\n")
-                        fw.write(l + '\n')
+# rule gather_snp_reports:
+#     input:
+#         snps = expand(os.path.join(config["tempdir"],"snp_reports","{tree}.snps.txt"), tree=config["tree_stems"])
+#     output:
+#         report = os.path.join(config["outdir"],"snp_reports","snp_reports.txt")
+#     run:
+#         with open(output.report, "w") as fw:
+#             #fw.write("name\ttree\tnum_snps\tsnps\n")
+#             for report in input.snps:
+#                 fn = os.path.basename(report)
+#                 with open(report, "r") as f:
+#                     for l in f:
+#                         l = l.rstrip("\n")
+#                         fw.write(l + '\n')
 
-rule gather_snp_seqs:
-    input:
-        expand(os.path.join(config["tempdir"], "seqs_for_snps","{tree}.fasta"), tree=config["tree_stems"])
-    output:
-        seqs = os.path.join(config["tempdir"], "seqs_for_snps","for_ambiguities.fasta")
-    run:
-        with open(output.seqs,"w") as fw:
-            for seq_file in input:
-                for record in SeqIO.parse(seq_file,"fasta"):
-                    fw.write(f">{record.description}\n{record.seq}\n")
+# rule gather_snp_seqs:
+#     input:
+#         expand(os.path.join(config["tempdir"], "seqs_for_snps","{tree}.fasta"), tree=config["tree_stems"])
+#     output:
+#         seqs = os.path.join(config["tempdir"], "seqs_for_snps","for_ambiguities.fasta")
+#     run:
+#         with open(output.seqs,"w") as fw:
+#             for seq_file in input:
+#                 for record in SeqIO.parse(seq_file,"fasta"):
+#                     fw.write(f">{record.description}\n{record.seq}\n")
         
 rule ambiguities_at_snp_sites:
     input:
-        seqs = rules.gather_snp_seqs.output.seqs,
-        report = rules.gather_snp_reports.output.report
+        #seqs = rules.gather_snp_seqs.output.seqs,
+        #report = rules.gather_snp_reports.output.report
+        seqs = os.path.join(config["tempdir"], "seqs_for_snps", "{tree}.fasta"),
+        report = os.path.join(config["tempdir"],"snp_reports", "{tree}.snps.txt")
     output:
-        snp_report = os.path.join(config["outdir"], "snp_reports","ambiguities.snps.txt")
+        snp_report = os.path.join(config["outdir"], "snp_reports", "ambiguities_{tree}.snps.txt")
     shell:
         """
         find_ambiguities.py --input {input.seqs:q} --output {output.snp_report:q} --report {input.report:q}
@@ -100,12 +102,13 @@ rule ambiguities_at_snp_sites:
 
 rule make_snp_figure:
     input:
-        rules.gather_snp_reports.output.report, #change this one to snps input in gather_snp_reports
+        #rules.gather_snp_reports.output.report, #change this one to snps input in gather_snp_reports
         #snps = expand(os.path.join(config["tempdir"],"snp_reports","{tree}.snps.txt"), tree=config["tree_stems"]),
-        rules.ambiguities_at_snp_sites.output.snp_report
+        ambs = os.path.join(config["tempdir"], "snp_reports", "ambiguities_{tree}.snps.txt"),
+        snps = os.path.join(config["tempdir"],"snp_reports","{tree}.snps.txt")
     output:
-        os.path.join(config["outdir"],"figures","genome_graph.png")
+        os.path.join(config["outdir"],"figures","genome_graph_{tree}.png")
     shell:
         """
-        make_genome_graph.py --input {input[0]} --ambiguities {input[1]} --output {output[0]} 
+        make_genome_graph.py --input {input.snps:q} --ambiguities {input.ambs:q} --output {output[0]} 
         """
\ No newline at end of file
diff --git a/civet/scripts/make_genome_graph.py b/civet/scripts/make_genome_graph.py
index 3c961e75..841a8043 100644
--- a/civet/scripts/make_genome_graph.py
+++ b/civet/scripts/make_genome_graph.py
@@ -95,8 +95,6 @@ def make_graph():
 
         # snp position labels
         
-        
-        
         for sequence in snp_dict[snp]:
             # sequence variant text
             name,ref,var,y_pos = sequence
diff --git a/civet/scripts/time_functions.py b/civet/scripts/time_functions.py
index 102dfce4..4915da6d 100644
--- a/civet/scripts/time_functions.py
+++ b/civet/scripts/time_functions.py
@@ -75,9 +75,9 @@ def plot_time_series(tips, query_dict, overall_max_date, overall_min_date, date_
     if time_len > 20:
         tick_loc_base = float(math.ceil(time_len/5))
     else:
-        tick_loc_base = time_len
-    
-    loc = plticker.MultipleLocator(base=tick_loc_base)
+        tick_loc_base = 1.0
+
+    loc = plticker.MultipleLocator(base=tick_loc_base) #Sets a tick on each integer multiple of a base within the view interval
 
     fig, ax1 = plt.subplots(1,1, figsize=(20,height))
     ax2 = ax1.twinx()

From ffb31ea5677441649e3e239d8b4a736339fdab27 Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Wed, 9 Sep 2020 18:10:06 +0100
Subject: [PATCH 08/16] remove unncessary import

---
 civet/scripts/assess_input_file.smk | 1 -
 1 file changed, 1 deletion(-)

diff --git a/civet/scripts/assess_input_file.smk b/civet/scripts/assess_input_file.smk
index 896a7c15..f9713102 100644
--- a/civet/scripts/assess_input_file.smk
+++ b/civet/scripts/assess_input_file.smk
@@ -2,7 +2,6 @@ import csv
 from Bio import SeqIO
 import os
 import collections
-from pytools.persistent_dict import PersistentDict
 
 rule check_cog_db:
     input:

From 8780dde32509dc7d3cbb95dc719a267148f4ee4a Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Wed, 9 Sep 2020 18:11:45 +0100
Subject: [PATCH 09/16] remove unncessary imports

---
 civet/scripts/find_snps.smk | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/civet/scripts/find_snps.smk b/civet/scripts/find_snps.smk
index df06972b..27134b99 100644
--- a/civet/scripts/find_snps.smk
+++ b/civet/scripts/find_snps.smk
@@ -2,8 +2,7 @@ from Bio import Phylo
 from Bio import SeqIO
 import csv
 import collections
-import make_genome_graph as make_genome_graph
-import find_ambiguities as find_ambiguities
+
 
 config["tree_stems"] = config["catchment_str"].split(",")
 

From d2f983ac11f6033615c95662b8cc22f4d723a325 Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Thu, 10 Sep 2020 09:42:12 +0100
Subject: [PATCH 10/16] fix tempdir bug

---
 civet/scripts/find_snps.smk | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/civet/scripts/find_snps.smk b/civet/scripts/find_snps.smk
index 27134b99..b1bf6d14 100644
--- a/civet/scripts/find_snps.smk
+++ b/civet/scripts/find_snps.smk
@@ -5,6 +5,7 @@ import collections
 
 
 config["tree_stems"] = config["catchment_str"].split(",")
+print(config["outdir"])
 
 rule all:
     input:
@@ -90,7 +91,7 @@ rule ambiguities_at_snp_sites:
         #seqs = rules.gather_snp_seqs.output.seqs,
         #report = rules.gather_snp_reports.output.report
         seqs = os.path.join(config["tempdir"], "seqs_for_snps", "{tree}.fasta"),
-        report = os.path.join(config["tempdir"],"snp_reports", "{tree}.snps.txt")
+        report = os.path.join(config["outdir"],"snp_reports", "{tree}.snps.txt")
     output:
         snp_report = os.path.join(config["outdir"], "snp_reports", "ambiguities_{tree}.snps.txt")
     shell:
@@ -103,8 +104,8 @@ rule make_snp_figure:
     input:
         #rules.gather_snp_reports.output.report, #change this one to snps input in gather_snp_reports
         #snps = expand(os.path.join(config["tempdir"],"snp_reports","{tree}.snps.txt"), tree=config["tree_stems"]),
-        ambs = os.path.join(config["tempdir"], "snp_reports", "ambiguities_{tree}.snps.txt"),
-        snps = os.path.join(config["tempdir"],"snp_reports","{tree}.snps.txt")
+        ambs = os.path.join(config["outdir"], "snp_reports", "ambiguities_{tree}.snps.txt"),
+        snps = os.path.join(config["outdir"],"snp_reports","{tree}.snps.txt")
     output:
         os.path.join(config["outdir"],"figures","genome_graph_{tree}.png")
     shell:

From e6796619cee6546f5954e5ce03d8237300de6e6d Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Thu, 10 Sep 2020 09:43:51 +0100
Subject: [PATCH 11/16] remove print statement

---
 civet/scripts/find_snps.smk | 1 -
 1 file changed, 1 deletion(-)

diff --git a/civet/scripts/find_snps.smk b/civet/scripts/find_snps.smk
index b1bf6d14..48a33fb6 100644
--- a/civet/scripts/find_snps.smk
+++ b/civet/scripts/find_snps.smk
@@ -5,7 +5,6 @@ import collections
 
 
 config["tree_stems"] = config["catchment_str"].split(",")
-print(config["outdir"])
 
 rule all:
     input:

From a57c4090e5a18439b8e42d439d80a79f42c28908 Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Thu, 10 Sep 2020 11:01:49 +0100
Subject: [PATCH 12/16] fixing print inputs bug

---
 civet/scripts/civet_template.pmd |  5 +++--
 civet/scripts/data_parsing.py    | 15 +++++++++++----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/civet/scripts/civet_template.pmd b/civet/scripts/civet_template.pmd
index c2e63767..3b5a5ee6 100644
--- a/civet/scripts/civet_template.pmd
+++ b/civet/scripts/civet_template.pmd
@@ -130,7 +130,7 @@ pyplot.rcParams.update({'figure.max_open_warning': 0})
 mpl.rcParams['font.weight']=50
 mpl.rcParams['axes.labelweight']=50
 
-inputs = [desired_fields, label_fields, graphic_dict, node_summary_option, map_sequences, x_col, y_col, mapping_trait]
+inputs = [desired_fields, label_fields, graphic_dict, node_summary_option, map_sequences, mapping_trait, map_inputs]
 
 ```
 
@@ -188,7 +188,8 @@ for tax in query_dict.values():
 try:
     dp.analyse_inputs(inputs)
 except:
-    print("inputs failed to print.")
+    print("Failed to print inputs.")
+
 
 print("\n")
 
diff --git a/civet/scripts/data_parsing.py b/civet/scripts/data_parsing.py
index 4e69cfa8..03373f55 100644
--- a/civet/scripts/data_parsing.py
+++ b/civet/scripts/data_parsing.py
@@ -64,7 +64,7 @@ def __init__(self, name, taxa):
 
 def analyse_inputs(inputs):
 
-    desired_fields, label_fields, graphic_dict, node_summary_option, map_sequences, x_col, y_col, mapping_trait = inputs 
+    desired_fields, label_fields, graphic_dict, node_summary_option, map_sequences, mapping_trait, map_inputs = inputs 
 
     print("Showing " + ",".join(desired_fields) + " on the tree.")
     print(",".join(list(graphic_dict.keys())) + " fields are displayed graphically using " + ",".join(list(graphic_dict.values())) + " colour schemes respectively.")
@@ -75,10 +75,17 @@ def analyse_inputs(inputs):
     print("Summarising nodes by " + node_summary_option)
 
     if map_sequences != "False":
-        if mapping_trait != "False":
-            print("Mapping sequences using columns " + x_col + " " + y_col + " for x values and y values respectively, and colouring by " + mapping_trait)
+        map_args = map_inputs.split(",")
+        if len(map_args) == 2:
+            if mapping_trait != "False":
+                print("Mapping sequences using columns " + map_args[0] + " " + map_args[1] + " for x values and y values respectively, and colouring by " + mapping_trait)
+            else:
+                print("Mapping sequences using columns " + map_args[0] + " " + map_args[1] + " for x values and y values respectively.")
         else:
-            print("Mapping sequences using columns " + x_col + " " + y_col + " for x values and y values respectively.")
+            if mapping_trait != "False":
+                print("Mapping sequences using columns " + map_args[0] + " for outer postcodes, and colouring by " + mapping_trait)
+            else:
+                print("Mapping sequences using columns " + map_args[0] + " for outer postocdes.")
 
     
     

From 1b15ba517bc72eb44eee7f5af199c55f1ec4bbc2 Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Thu, 10 Sep 2020 18:47:45 +0100
Subject: [PATCH 13/16] bug fix for labels from full metadata, requires testing

---
 civet/scripts/data_parsing.py | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/civet/scripts/data_parsing.py b/civet/scripts/data_parsing.py
index 03373f55..a654f5e6 100644
--- a/civet/scripts/data_parsing.py
+++ b/civet/scripts/data_parsing.py
@@ -12,7 +12,7 @@
 
 class taxon():
 
-    def __init__(self, name, global_lin, uk_lin, phylotype):
+    def __init__(self, name, global_lin, uk_lin, phylotype, label_fields, tree_fields):
 
         self.name = name
 
@@ -38,6 +38,12 @@ def __init__(self, name, global_lin, uk_lin, phylotype):
         self.in_cog = False
         self.attribute_dict = {}
         self.attribute_dict["adm1"] = "NA"
+        
+        for i in label_fields:
+            self.attribute_dict[i] = "NA"
+        for i in tree_fields:
+            self.attribute_dict[i] = "NA"
+
         self.tree = "NA"
 
         self.closest_distance = "NA"
@@ -116,7 +122,7 @@ def prepping_adm2_adm1_data(full_metadata):
     return adm2_adm1
     
 
-def parse_filtered_metadata(metadata_file, tip_to_tree):
+def parse_filtered_metadata(metadata_file, tip_to_tree, label_fields, tree_fields):
     
     query_dict = {}
     query_id_dict = {}
@@ -146,7 +152,7 @@ def parse_filtered_metadata(metadata_file, tip_to_tree):
             sample_date = sequence["sample_date"]
 
             
-            new_taxon = taxon(query_name, glob_lin, uk_lineage, phylotype)
+            new_taxon = taxon(query_name, glob_lin, uk_lineage, phylotype, label_fields, tree_fields)
 
             new_taxon.query_id = query_id
 
@@ -225,9 +231,7 @@ def parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, date
                             taxon.attribute_dict[col] = sequence[col]
                     else:
                         if col != "name" and col in desired_fields and col != "adm1":
-                            if sequence[col] == "":
-                                taxon.attribute_dict[col] = "NA"
-                            else:
+                            if sequence[col] != "":
                                 taxon.attribute_dict[col] = sequence[col]
             
                         if col == "adm1":
@@ -290,7 +294,7 @@ def parse_tree_tips(tree_dir):
 
     return tips, tip_to_tree, tree_list
 
-def parse_full_metadata(query_dict, label_fields, full_metadata, present_lins, present_in_tree, node_summary_option, date_fields):
+def parse_full_metadata(query_dict, label_fields, tree_fields,full_metadata, present_lins, present_in_tree, node_summary_option, date_fields):
 
     full_tax_dict = query_dict.copy()
 
@@ -317,7 +321,7 @@ def parse_full_metadata(query_dict, label_fields, full_metadata, present_lins, p
 
 
             if (uk_lin in present_lins or seq_name in present_in_tree) and seq_name not in query_dict.keys():
-                new_taxon = taxon(seq_name, glob_lin, uk_lin, phylotype)
+                new_taxon = taxon(seq_name, glob_lin, uk_lin, phylotype, label_fields, tree_fields)
                 if date == "":
                     date = "NA"
                 
@@ -349,13 +353,7 @@ def parse_full_metadata(query_dict, label_fields, full_metadata, present_lins, p
                     
                 for field in label_fields:
                     if field in col_names:
-                        if field not in tax_object.attribute_dict.keys():
-                            if sequence[field] == "":
-                                tax_object.attribute_dict[field] = "NA"
-                            else:
-                                tax_object.attribute_dict[field] = sequence[field]
-                        
-                        elif tax_object.attribute_dict[field] == "NA" and sequence[field] != "":
+                        if tax_object.attribute_dict[field] == "NA" and sequence[field] != "":
                             tax_object.attribute_dict[field] = sequence[field]
 
                 full_tax_dict[seq_name] = tax_object

From e7606c91f90a44febb56825a01963a024703d837 Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Fri, 11 Sep 2020 09:47:51 +0100
Subject: [PATCH 14/16] fixing lookup in full metadata bug

---
 civet/scripts/civet_template.pmd | 4 ++--
 civet/scripts/data_parsing.py    | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/civet/scripts/civet_template.pmd b/civet/scripts/civet_template.pmd
index 3b5a5ee6..496e8e43 100644
--- a/civet/scripts/civet_template.pmd
+++ b/civet/scripts/civet_template.pmd
@@ -140,13 +140,13 @@ inputs = [desired_fields, label_fields, graphic_dict, node_summary_option, map_s
 
 present_in_tree, tip_to_tree, tree_list = dp.parse_tree_tips(tree_dir)
 
-query_dict, query_id_dict, present_lins, tree_to_tip = dp.parse_filtered_metadata(filtered_cog_metadata, tip_to_tree) #Just the lines with their queries plus the closest match in COG
+query_dict, query_id_dict, present_lins, tree_to_tip = dp.parse_filtered_metadata(filtered_cog_metadata, tip_to_tree, label_fields, desired_fields) #Just the lines with their queries plus the closest match in COG
 
 if input_csv != '':
     adm2_to_adm1 = dp.prepping_adm2_adm1_data(full_metadata_file)
     query_dict = dp.parse_input_csv(input_csv, query_id_dict, desired_fields, label_fields, date_fields, adm2_to_adm1, False) #Any query information they have provided
 
-full_tax_dict = dp.parse_full_metadata(query_dict, label_fields, full_metadata_file, present_lins, present_in_tree, node_summary_option, date_fields)
+full_tax_dict = dp.parse_full_metadata(query_dict, label_fields, desired_fields, full_metadata_file, present_lins, present_in_tree, node_summary_option, date_fields)
 
 
 time_outputs = time_func.summarise_dates(query_dict)
diff --git a/civet/scripts/data_parsing.py b/civet/scripts/data_parsing.py
index a654f5e6..97b459d4 100644
--- a/civet/scripts/data_parsing.py
+++ b/civet/scripts/data_parsing.py
@@ -356,6 +356,11 @@ def parse_full_metadata(query_dict, label_fields, tree_fields,full_metadata, pre
                         if tax_object.attribute_dict[field] == "NA" and sequence[field] != "":
                             tax_object.attribute_dict[field] = sequence[field]
 
+                for field in tree_fields:
+                    if field in col_names:
+                        if tax_object.attribute_dict[field] == "NA" and sequence[field] != "":
+                            tax_object.attribute_dict[field] = sequence[field]
+
                 full_tax_dict[seq_name] = tax_object
                     
     return full_tax_dict

From b90f5f7d3b34513e188841d271781ee63da83a97 Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Fri, 11 Sep 2020 09:51:37 +0100
Subject: [PATCH 15/16] change command.py to allow fields to be in full
 metadata

---
 civet/command.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/civet/command.py b/civet/command.py
index 5b98e559..7405ad7e 100644
--- a/civet/command.py
+++ b/civet/command.py
@@ -196,10 +196,10 @@ def main(sysargs = sys.argv[1:]):
         else:
             desired_fields = args.fields.split(",")
             for field in desired_fields:
-                if field in reader.fieldnames:
+                if field in reader.fieldnames or field in full_metadata_headers:
                     fields.append(field)
                 else:
-                    sys.stderr.write(f"Error: {field} field not found in metadata file")
+                    sys.stderr.write(f"Error: {field} field not found in metadata file or full metadata file")
                     sys.exit(-1)
 
         

From 63656cefdab35546d101255b47919af81ba76e90 Mon Sep 17 00:00:00 2001
From: ViralVerity <verity.hill@ed.ac.uk>
Date: Mon, 14 Sep 2020 10:36:39 +0100
Subject: [PATCH 16/16] fixing display name error

---
 civet/scripts/make_tree_figures.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/civet/scripts/make_tree_figures.py b/civet/scripts/make_tree_figures.py
index ca87edfe..7b39be31 100644
--- a/civet/scripts/make_tree_figures.py
+++ b/civet/scripts/make_tree_figures.py
@@ -69,7 +69,7 @@ def display_name(tree, tree_name, tree_dir, full_taxon_dict, query_dict, custom_
     for k in tree.Objects:
         if k.branchType == 'leaf':
             name = k.name
-            
+            display_name = ""
             if "inserted" in name:
                 collapsed_node_info, number_nodes = summarise_collapsed_node_for_label(tree_dir, name, tree_name, full_taxon_dict)
                 k.traits["display"] = collapsed_node_info
@@ -94,7 +94,8 @@ def display_name(tree, tree_name, tree_dir, full_taxon_dict, query_dict, custom_
                                 else:   
                                     display_name = display_name + "|" + taxon_obj.attribute_dict[label_element]
                                 count += 1
-                        k.traits["display"] = display_name
+                            
+                            k.traits["display"] = display_name
 
                     k.node_number = 1
                 
@@ -102,8 +103,8 @@ def display_name(tree, tree_name, tree_dir, full_taxon_dict, query_dict, custom_
                 else:
                     if name.startswith("subtree"):
                         number = name.split("_")[-1]
-                        display = f"Tree {number}"
-                        k.traits["display"] = display
+                        display_name = f"Tree {number}"
+                        k.traits["display"] = display_name
                         k.node_number = 1
                     else:
                         k.traits["display"] = name + "|" + "not in dict"