From 7c5bf5182eff78c7e45a6eab2b7c862138fc13dd Mon Sep 17 00:00:00 2001 From: Savita Karthikeyan Date: Wed, 23 Aug 2023 14:52:34 +0100 Subject: [PATCH] fixed binning code. --- model.py | 44 ++++++++++++++++++++++++++------------------ pages/nodes.py | 14 ++++++++------ 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/model.py b/model.py index a5d05af..a72380f 100644 --- a/model.py +++ b/model.py @@ -480,34 +480,42 @@ def calc_mutations_per_tree(self): mutations_per_tree[unique_values] = counts return mutations_per_tree - def calc_anc_spans(self, win_size_x=1, win_size_y=1): + def calc_anc_spans(self, win_size_x=1_000_000, win_size_y=5_000): edges_df = self.edges_df - num_x = int(np.ceil(edges_df.right.max() / win_size_x)) + num_x = int(np.ceil(edges_df.right.max() - edges_df.right.min()) / win_size_x) num_y = int(np.ceil(edges_df.child_time.max() / win_size_y)) - anc_spans=[] - x_start = 0 # redo start to ts.start, set span to min and max of win/span - x=[] - y=[] + anc_spans = np.zeros((num_x, num_y)) + x_start = edges_df.left.min() + x = np.zeros((num_x, num_y)) + y = np.zeros((num_x, num_y)) for i in range(num_x): x_start = i * win_size_x x_end = x_start + win_size_x - + for j in range(num_y): y_start = j * win_size_y y_end = y_start + win_size_y - x.append(x_end) - y.append(y_start) - anc_spans.append(edges_df[(((x_start >= edges_df.left) & - (x_start < edges_df.right)) | - ((x_end > edges_df.left) & - (x_end <= edges_df.right))) & - (edges_df.child_time >= y_start) & - (edges_df.child_time < y_end)].span.mean()) + x[i][j] = x_end + y[i][j] = y_start + tmp_df = edges_df[ + ( + ((x_start >= edges_df.left) & (x_start < edges_df.right)) + | ((x_end > edges_df.left) & (x_end <= edges_df.right)) + ) + & (edges_df.child_time >= y_start) + & (edges_df.child_time < y_end) + ] + n = np.unique(tmp_df.child).shape[0] + anc_spans[i][j] = np.sum(np.clip(tmp_df.span, None, win_size_x)) / n + anc_spans = anc_spans.flatten() + x = x.flatten() + y = y.flatten() + df = pd.DataFrame( { - "Genomic position": x, + "Genomic_position": x, "Time": y, - "Mean ancestor span": anc_spans, + "Mean_ancestor_span": anc_spans, } ) - return df \ No newline at end of file + return df diff --git a/pages/nodes.py b/pages/nodes.py index 83c4750..3c4de4a 100644 --- a/pages/nodes.py +++ b/pages/nodes.py @@ -10,7 +10,7 @@ from plot_helpers import filter_points from plot_helpers import hover_points from plot_helpers import make_hist_matplotlib -import model + def page(tsm): hv.extension("matplotlib") @@ -61,10 +61,12 @@ def page(tsm): log_y_checkbox, ) - anc_span_data = tsm.calc_anc_spans(win_size_x=100_000, win_size_y=5_000) - #nc_span_data = tsm.calc_anc_spans(win_size_x=1, win_size_y=1) - heatmap = hv.HeatMap( - anc_span_data - ).opts(width=config.PLOT_WIDTH, height=config.PLOT_HEIGHT, tools=['hover'], colorbar=True) + anc_span_data = tsm.calc_anc_spans(win_size_x=1_000_000, win_size_y=5_000) + heatmap = hv.HeatMap(anc_span_data).opts( + width=config.PLOT_WIDTH, + height=config.PLOT_HEIGHT, + tools=["hover"], + colorbar=True, + ) return pn.Column(main, hist_panel, heatmap, plot_options)