Skip to content

Commit

Permalink
Frequentist notebook (#13)
Browse files Browse the repository at this point in the history
* Add requirements file

* Fix dependencies

* Add annotations to create_model

* Add reproducible version

* added plotting utilities

* notebook for fitting latest results

* added notebook on how to prepare the deconv

* numerical stability issues fixed

* freq notebook updated

* black formatting

* Minor config changes

* Minor config changes

---------

Co-authored-by: Paweł Czyż <[email protected]>
  • Loading branch information
dr-david and pawel-czyz authored Nov 11, 2024
1 parent a14b995 commit 6faf6e7
Show file tree
Hide file tree
Showing 5 changed files with 231 additions and 131 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ data/
# Snakemake-generated files
generated/

.DS_Store
src/.DS_Store

# Notebooks by default should not be uploaded (use Quarto Markdown as an alternative)
*.nb
*.ipynb
Expand Down
90 changes: 80 additions & 10 deletions examples/frequentist_fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,28 @@
# format_version: '1.3'
# jupytext_version: 1.16.1
# kernelspec:
# display_name: Python 3 (ipykernel)
# display_name: Python (covvfit)
# language: python
# name: python3
# name: covvfit
# ---

# %%
import covvfit._frequentist as freq
import covvfit._preprocess_abundances as prec
import covvfit.plotting._timeseries as plot_ts
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import pymc as pm

import numpy as np

import matplotlib.ticker as ticker
import matplotlib.pyplot as plt

from scipy.special import expit


import covvfit._frequentist as freq
import covvfit._preprocess_abundances as prec
import covvfit.plotting._timeseries as plot_ts


variants_full = [
"B.1.1.7",
"B.1.351",
Expand Down Expand Up @@ -88,6 +94,12 @@
}


# %%
data.head()

# %%
data2.head()

# %%
DATA_PATH = "../data/robust_deconv2_noisy14.csv"

Expand Down Expand Up @@ -141,7 +153,6 @@ def softmax(x, rates, midpoints):

# make Multinom/n likelihood
def log_likelihood(y, p, n):
# return n*pm.math.sum(y * pm.math.log(p), axis=0) + n*(1-pm.math.sum(y, axis=0))*pm.math.log(1-pm.math.sum(p, axis=0))
return n * pm.math.sum(y * pm.math.log(p), axis=0)

[
Expand Down Expand Up @@ -170,6 +181,65 @@ def log_likelihood(y, p, n):
model_map_fixed = pm.find_MAP(maxeval=50000, seed=12313)


# %%
def boot_rates(ts_lst, ys_lst, n_boot=100):
rates = [] # List to store parameter estimates for each bootstrap iteration

for i in range(n_boot):
# Initialize lists to store the resampled data
ts_lst_resampled = []
ys_lst_resampled = []

# Resample data with consistent indices for both ts and ys
for ts, ys in zip(ts_lst, ys_lst):
boot_indices = np.random.choice(len(ts), size=len(ts), replace=True) # Indices for resampling
ts_resampled = ts[boot_indices] # Apply boot indices to ts
ys_resampled = ys[:,boot_indices] # Apply the same boot indices to ys

ts_lst_resampled.append(ts_resampled)
ys_lst_resampled.append(ys_resampled)

# Fit the model with resampled data
with create_model_fixed2(
ts_lst_resampled,
ys_lst_resampled,
coords={
"cities": cities,
"variants": variants,
}
):
model_map_fixed = pm.find_MAP(maxeval=50000, seed=12313)

# Collect the "rate" parameter estimate from the model
rates.append(model_map_fixed["rate"])

return np.array(rates) # Return an array of rate estimates for all bootstrap iterations



# %%
booted_rates = boot_rates(ts_lst, ys_lst, n_boot=10)

# %%
plt.figure(figsize=(10, 6))
for i in range(7):
plt.hist(booted_rates[:, i], bins=10, alpha=0.5, label=f'Parameter {i+1}')

plt.xlabel('Rate values')
plt.ylabel('Frequency')
plt.title('Histograms of Bootstrapped Rate Parameters')
plt.legend(loc='upper right')
plt.show()

# %%
model_map_fixed["rate"].shape


# %%
def boot_rates(ts_lst, ys_lst, n_boot=100):
for i in range(n_boot):


# %%
## This model takes into account the complement of the variants to be monitored, and sets its fitness to zero
## It has some numerical instabilities that make it not suitable for finding the MAP or MLE, but I use it for the Hessian
Expand Down Expand Up @@ -484,7 +554,7 @@ def log_likelihood(y, p, n):
)


plt.savefig("growth_rates20231108.pdf", bbox_inches="tight")
# plt.savefig("growth_rates20231108.pdf", bbox_inches="tight")

plt.show()

Expand Down
Loading

0 comments on commit 6faf6e7

Please sign in to comment.