From b224412a9eecde41e2abd2fef484fdefe9c5c5bd Mon Sep 17 00:00:00 2001 From: mhostetter Date: Sat, 27 Jul 2024 12:58:01 -0400 Subject: [PATCH 1/7] Add `sdr.sum_distributions()` Fixes #423 --- src/sdr/_probability.py | 137 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/src/sdr/_probability.py b/src/sdr/_probability.py index 655d95715..79fb274ef 100644 --- a/src/sdr/_probability.py +++ b/src/sdr/_probability.py @@ -75,3 +75,140 @@ def Qinv(p: npt.ArrayLike) -> npt.NDArray[np.float64]: x = np.sqrt(2) * scipy.special.erfcinv(2 * p) return convert_output(x) + + +@export +def sum_distributions( + dist1: scipy.stats.rv_continuous | scipy.stats.rv_histogram, + dist2: scipy.stats.rv_continuous | scipy.stats.rv_histogram, + p: float = 1e-16, +) -> scipy.stats.rv_histogram: + r""" + Numerically calculates the distribution of the sum of two independent random variables. + + Arguments: + dist1: The distribution of the first random variable $X$. + dist2: The distribution of the second random variable $Y$. + p: The probability of exceeding the x axis, on either side, for each distribution. This is used to determine + the bounds on the x axis for the numerical convolution. Smaller values of $p$ will result in more accurate + analysis, but will require more computation. + + Returns: + The distribution of the sum $Z = X + Y$. + + Notes: + The PDF of the sum of two independent random variables is the convolution of the PDF of the two distributions. + + $$f_{X+Y}(t) = (f_X * f_Y)(t)$$ + + Examples: + Compute the distribution of the sum of two normal distributions. + + .. ipython:: python + + X = scipy.stats.norm(loc=-1, scale=0.5) + Y = scipy.stats.norm(loc=2, scale=1.5) + x = np.linspace(-5, 10, 1000) + + @savefig sdr_sum_distributions_1.png + plt.figure(); \ + plt.plot(x, X.pdf(x), label="X"); \ + plt.plot(x, Y.pdf(x), label="Y"); \ + plt.plot(x, sdr.sum_distributions(X, Y).pdf(x), label="X + Y"); \ + plt.hist(X.rvs(100_000) + Y.rvs(100_000), bins=101, density=True, histtype="step", label="X + Y empirical"); \ + plt.legend(); \ + plt.xlabel("Random variable"); \ + plt.ylabel("Probability density"); \ + plt.title("Sum of two Normal distributions"); + + Compute the distribution of the sum of two Rayleigh distributions. + + .. ipython:: python + + X = scipy.stats.rayleigh(scale=1) + Y = scipy.stats.rayleigh(loc=1, scale=2) + x = np.linspace(0, 12, 1000) + + @savefig sdr_sum_distributions_2.png + plt.figure(); \ + plt.plot(x, X.pdf(x), label="X"); \ + plt.plot(x, Y.pdf(x), label="Y"); \ + plt.plot(x, sdr.sum_distributions(X, Y).pdf(x), label="X + Y"); \ + plt.hist(X.rvs(100_000) + Y.rvs(100_000), bins=101, density=True, histtype="step", label="X + Y empirical"); \ + plt.legend(); \ + plt.xlabel("Random variable"); \ + plt.ylabel("Probability density"); \ + plt.title("Sum of two Rayleigh distributions"); + + Compute the distribution of the sum of two Rician distributions. + + .. ipython:: python + + X = scipy.stats.rice(2) + Y = scipy.stats.rice(3) + x = np.linspace(0, 12, 1000) + + @savefig sdr_sum_distributions_3.png + plt.figure(); \ + plt.plot(x, X.pdf(x), label="X"); \ + plt.plot(x, Y.pdf(x), label="Y"); \ + plt.plot(x, sdr.sum_distributions(X, Y).pdf(x), label="X + Y"); \ + plt.hist(X.rvs(100_000) + Y.rvs(100_000), bins=101, density=True, histtype="step", label="X + Y empirical"); \ + plt.legend(); \ + plt.xlabel("Random variable"); \ + plt.ylabel("Probability density"); \ + plt.title("Sum of two Rician distributions"); + + Group: + probability + """ + # Determine the x axis of each distribution such that the probability of exceeding the x axis, on either side, + # is p. + x1_min, x1_max = _x_range(dist1, p) + x2_min, x2_max = _x_range(dist2, p) + dx1 = (x1_max - x1_min) / 1_000 + dx2 = (x2_max - x2_min) / 1_000 + dx = np.min([dx1, dx2]) # Use the smaller delta x -- must use the same dx for both distributions + x1 = np.arange(x1_min, x1_max, dx) + x2 = np.arange(x2_min, x2_max, dx) + + # Compute the PDF of each distribution + y1 = dist1.pdf(x1) + y2 = dist2.pdf(x2) + + # The PDF of the sum of two independent random variables is the convolution of the PDF of the two distributions + y = np.convolve(y1, y2, mode="full") * dx + + # Determine the x axis for the output convolution + x = np.arange(y.size) * dx + x1[0] + x2[0] + + # Adjust the histograms bins to be on either side of each point. So there is one extra point added. + x = np.append(x, x[-1] + dx) + x -= dx / 2 + + return scipy.stats.rv_histogram((y, x)) + + +def _x_range(dist: scipy.stats.rv_continuous, p: float) -> tuple[float, float]: + r""" + Determines the range of x values for a given distribution such that the probability of exceeding the x axis, on + either side, is p. + """ + # Need to have these loops because for very small p, sometimes SciPy will return NaN instead of a valid value. + # The while loops will increase the p value until a valid value is returned. + + pp = p + while True: + x_min = dist.ppf(pp) + if not np.isnan(x_min): + break + pp *= 10 + + pp = p + while True: + x_max = dist.isf(pp) + if not np.isnan(x_max): + break + pp *= 10 + + return x_min, x_max From f6eb31ea9f97fd786c8361acbb706b8439f02cba Mon Sep 17 00:00:00 2001 From: mhostetter Date: Sat, 27 Jul 2024 12:58:32 -0400 Subject: [PATCH 2/7] Rename file --- tests/detection/{test_h0_h1_theory.py => test_h0_h1.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/detection/{test_h0_h1_theory.py => test_h0_h1.py} (100%) diff --git a/tests/detection/test_h0_h1_theory.py b/tests/detection/test_h0_h1.py similarity index 100% rename from tests/detection/test_h0_h1_theory.py rename to tests/detection/test_h0_h1.py From bb75b1ef252e3a23606dfded898780b4c0810e1f Mon Sep 17 00:00:00 2001 From: mhostetter Date: Sat, 27 Jul 2024 13:22:11 -0400 Subject: [PATCH 3/7] Add unit tests for `sum_distributions()` --- tests/probability/test_sum_distributions.py | 60 +++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 tests/probability/test_sum_distributions.py diff --git a/tests/probability/test_sum_distributions.py b/tests/probability/test_sum_distributions.py new file mode 100644 index 000000000..1cd536118 --- /dev/null +++ b/tests/probability/test_sum_distributions.py @@ -0,0 +1,60 @@ +import numpy as np +import scipy.stats + +import sdr + + +def test_normal_normal(): + X = scipy.stats.norm(loc=-1, scale=0.5) + Y = scipy.stats.norm(loc=2, scale=1.5) + _verify(X, Y) + + +def test_rayleigh_rayleigh(): + X = scipy.stats.rayleigh(scale=1) + Y = scipy.stats.rayleigh(loc=1, scale=2) + _verify(X, Y) + + +def test_rician_rician(): + X = scipy.stats.rice(2) + Y = scipy.stats.rice(3) + _verify(X, Y) + + +def test_normal_rayleigh(): + X = scipy.stats.norm(loc=-1, scale=0.5) + Y = scipy.stats.rayleigh(loc=2, scale=1.5) + _verify(X, Y) + + +def test_rayleigh_rician(): + X = scipy.stats.rayleigh(scale=1) + Y = scipy.stats.rice(3) + _verify(X, Y) + + +def _verify(X, Y): + # Numerically compute the distribution + Z = sdr.sum_distributions(X, Y) + + # Empirically compute the distribution + z = X.rvs(100_000) + Y.rvs(100_000) + hist, bins = np.histogram(z, bins=101, density=True) + x = bins[1:] - np.diff(bins) / 2 + + if False: + import matplotlib.pyplot as plt + + plt.figure() + plt.plot(x, X.pdf(x), label="X") + plt.plot(x, Y.pdf(x), label="Y") + plt.plot(x, Z.pdf(x), label="X + Y") + plt.hist(z, bins=101, cumulative=False, density=True, histtype="step", label="X + Y empirical") + plt.legend() + plt.xlabel("Random variable") + plt.ylabel("Probability density") + plt.title("Sum of two distributions") + plt.show() + + assert np.allclose(Z.pdf(x), hist, atol=np.max(hist) * 0.1) From adf27221622b929e14389df042f30a8640b434cf Mon Sep 17 00:00:00 2001 From: mhostetter Date: Sun, 28 Jul 2024 10:29:48 -0400 Subject: [PATCH 4/7] Change argument names --- src/sdr/_probability.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/sdr/_probability.py b/src/sdr/_probability.py index 79fb274ef..5512ee33f 100644 --- a/src/sdr/_probability.py +++ b/src/sdr/_probability.py @@ -79,16 +79,16 @@ def Qinv(p: npt.ArrayLike) -> npt.NDArray[np.float64]: @export def sum_distributions( - dist1: scipy.stats.rv_continuous | scipy.stats.rv_histogram, - dist2: scipy.stats.rv_continuous | scipy.stats.rv_histogram, + X: scipy.stats.rv_continuous | scipy.stats.rv_histogram, + Y: scipy.stats.rv_continuous | scipy.stats.rv_histogram, p: float = 1e-16, ) -> scipy.stats.rv_histogram: r""" Numerically calculates the distribution of the sum of two independent random variables. Arguments: - dist1: The distribution of the first random variable $X$. - dist2: The distribution of the second random variable $Y$. + X: The distribution of the first random variable $X$. + Y: The distribution of the second random variable $Y$. p: The probability of exceeding the x axis, on either side, for each distribution. This is used to determine the bounds on the x axis for the numerical convolution. Smaller values of $p$ will result in more accurate analysis, but will require more computation. @@ -164,8 +164,8 @@ def sum_distributions( """ # Determine the x axis of each distribution such that the probability of exceeding the x axis, on either side, # is p. - x1_min, x1_max = _x_range(dist1, p) - x2_min, x2_max = _x_range(dist2, p) + x1_min, x1_max = _x_range(X, p) + x2_min, x2_max = _x_range(Y, p) dx1 = (x1_max - x1_min) / 1_000 dx2 = (x2_max - x2_min) / 1_000 dx = np.min([dx1, dx2]) # Use the smaller delta x -- must use the same dx for both distributions @@ -173,23 +173,23 @@ def sum_distributions( x2 = np.arange(x2_min, x2_max, dx) # Compute the PDF of each distribution - y1 = dist1.pdf(x1) - y2 = dist2.pdf(x2) + f_X = X.pdf(x1) + f_Y = Y.pdf(x2) # The PDF of the sum of two independent random variables is the convolution of the PDF of the two distributions - y = np.convolve(y1, y2, mode="full") * dx + f_Z = np.convolve(f_X, f_Y, mode="full") * dx # Determine the x axis for the output convolution - x = np.arange(y.size) * dx + x1[0] + x2[0] + x = np.arange(f_Z.size) * dx + x1[0] + x2[0] # Adjust the histograms bins to be on either side of each point. So there is one extra point added. x = np.append(x, x[-1] + dx) x -= dx / 2 - return scipy.stats.rv_histogram((y, x)) + return scipy.stats.rv_histogram((f_Z, x)) -def _x_range(dist: scipy.stats.rv_continuous, p: float) -> tuple[float, float]: +def _x_range(X: scipy.stats.rv_continuous, p: float) -> tuple[float, float]: r""" Determines the range of x values for a given distribution such that the probability of exceeding the x axis, on either side, is p. @@ -199,14 +199,14 @@ def _x_range(dist: scipy.stats.rv_continuous, p: float) -> tuple[float, float]: pp = p while True: - x_min = dist.ppf(pp) + x_min = X.ppf(pp) if not np.isnan(x_min): break pp *= 10 pp = p while True: - x_max = dist.isf(pp) + x_max = X.isf(pp) if not np.isnan(x_max): break pp *= 10 From f857bc48d2b60eff275f21eed274aae26ac2ea5e Mon Sep 17 00:00:00 2001 From: mhostetter Date: Sun, 28 Jul 2024 10:56:54 -0400 Subject: [PATCH 5/7] Add `sdr.sum_distribution()` Fixes #423 --- src/sdr/_probability.py | 118 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/src/sdr/_probability.py b/src/sdr/_probability.py index 5512ee33f..1e942cf39 100644 --- a/src/sdr/_probability.py +++ b/src/sdr/_probability.py @@ -8,7 +8,7 @@ import numpy.typing as npt import scipy.special -from ._helper import convert_output, export, verify_arraylike +from ._helper import convert_output, export, verify_arraylike, verify_scalar @export @@ -77,6 +77,120 @@ def Qinv(p: npt.ArrayLike) -> npt.NDArray[np.float64]: return convert_output(x) +@export +def sum_distribution( + X: scipy.stats.rv_continuous | scipy.stats.rv_histogram, + n_terms: int, + p: float = 1e-16, +) -> scipy.stats.rv_histogram: + r""" + Numerically calculates the distribution of the sum of $n$ independent random variables $X$. + + Arguments: + X: The distribution of the random variable $X$. + n_terms: The number $n$ of random variables to sum. + p: The probability of exceeding the x axis, on either side, for each distribution. This is used to determine + the bounds on the x axis for the numerical convolution. Smaller values of $p$ will result in more accurate + analysis, but will require more computation. + + Returns: + The distribution of the sum $Z = X_1 + X_2 + \ldots + X_n$. + + Notes: + The PDF of the sum of $n$ independent random variables is the convolution of the PDF of the base distribution. + + $$f_{X_1 + X_2 + \ldots + X_n}(t) = (f_X * f_X * \ldots * f_X)(t)$$ + + Examples: + Compute the distribution of the sum of two normal distributions. + + .. ipython:: python + + X = scipy.stats.norm(loc=-1, scale=0.5) + n_terms = 2 + x = np.linspace(-6, 2, 1000) + + @savefig sdr_sum_distribution_1.png + plt.figure(); \ + plt.plot(x, X.pdf(x), label="X"); \ + plt.plot(x, sdr.sum_distribution(X, n_terms).pdf(x), label="X + X"); \ + plt.hist(X.rvs((100_000, n_terms)).sum(axis=1), bins=101, density=True, histtype="step", label="X + X empirical"); \ + plt.legend(); \ + plt.xlabel("Random variable"); \ + plt.ylabel("Probability density"); \ + plt.title("Sum of two Normal distributions"); + + Compute the distribution of the sum of three Rayleigh distributions. + + .. ipython:: python + + X = scipy.stats.rayleigh(scale=1) + n_terms = 3 + x = np.linspace(0, 12, 1000) + + @savefig sdr_sum_distribution_2.png + plt.figure(); \ + plt.plot(x, X.pdf(x), label="X"); \ + plt.plot(x, sdr.sum_distribution(X, n_terms).pdf(x), label="X + X + X"); \ + plt.hist(X.rvs((100_000, n_terms)).sum(axis=1), bins=101, density=True, histtype="step", label="X + X + X empirical"); \ + plt.legend(); \ + plt.xlabel("Random variable"); \ + plt.ylabel("Probability density"); \ + plt.title("Sum of three Rayleigh distributions"); + + Compute the distribution of the sum of four Rician distributions. + + .. ipython:: python + + X = scipy.stats.rice(2) + n_terms = 4 + x = np.linspace(0, 12, 1000) + + @savefig sdr_sum_distribution_3.png + plt.figure(); \ + plt.plot(x, X.pdf(x), label="X"); \ + plt.plot(x, sdr.sum_distribution(X, n_terms).pdf(x), label="X + X + X + X"); \ + plt.hist(X.rvs((100_000, n_terms)).sum(axis=1), bins=101, density=True, histtype="step", label="X + X + X + X empirical"); \ + plt.legend(); \ + plt.xlabel("Random variable"); \ + plt.ylabel("Probability density"); \ + plt.title("Sum of four Rician distributions"); + + Group: + probability + """ + verify_scalar(n_terms, int=True, positive=True) + verify_scalar(p, float=True, exclusive_min=0, inclusive_max=0.1) + + if n_terms == 1: + return X + + # Determine the x axis of each distribution such that the probability of exceeding the x axis, on either side, + # is p. + x1_min, x1_max = _x_range(X, p) + x = np.linspace(x1_min, x1_max, 1_001) + dx = np.mean(np.diff(x)) + + # Compute the PDF of the base distribution + f_X = X.pdf(x) + + # The PDF of the sum of n_terms independent random variables is the convolution of the PDF of the base distribution. + # This is efficiently computed in the frequency domain by exponentiating the FFT. The FFT must be zero-padded + # enough that the circular convolutions do not wrap around. + n_fft = scipy.fft.next_fast_len(f_X.size * n_terms) + f_X_fft = np.fft.fft(f_X, n_fft) + f_X_fft = f_X_fft**n_terms + f_Y = np.fft.ifft(f_X_fft).real + f_Y /= f_Y.sum() * dx + x = np.arange(f_Y.size) * dx + x[0] * (n_terms) + + # Adjust the histograms bins to be on either side of each point. So there is one extra point added. + x = np.append(x, x[-1] + dx) + x -= dx / 2 + + return scipy.stats.rv_histogram((f_Y, x)) + + @export def sum_distributions( X: scipy.stats.rv_continuous | scipy.stats.rv_histogram, @@ -162,6 +276,8 @@ def sum_distributions( Group: probability """ + verify_scalar(p, float=True, exclusive_min=0, inclusive_max=0.1) + # Determine the x axis of each distribution such that the probability of exceeding the x axis, on either side, # is p. x1_min, x1_max = _x_range(X, p) From 8f92bdb8a298da40d6e20560e5780a44f1e38b2d Mon Sep 17 00:00:00 2001 From: mhostetter Date: Sun, 28 Jul 2024 10:57:05 -0400 Subject: [PATCH 6/7] Add unit tests for `sum_distribution()` --- tests/probability/test_sum_distribution.py | 50 ++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tests/probability/test_sum_distribution.py diff --git a/tests/probability/test_sum_distribution.py b/tests/probability/test_sum_distribution.py new file mode 100644 index 000000000..c44263829 --- /dev/null +++ b/tests/probability/test_sum_distribution.py @@ -0,0 +1,50 @@ +import numpy as np +import scipy.stats + +import sdr + + +def test_normal(): + X = scipy.stats.norm(loc=-1, scale=0.5) + _verify(X, 2) + _verify(X, 3) + _verify(X, 4) + + +def test_rayleigh(): + X = scipy.stats.rayleigh(scale=1) + _verify(X, 2) + _verify(X, 3) + _verify(X, 4) + + +def test_rician(): + X = scipy.stats.rice(2) + _verify(X, 2) + _verify(X, 3) + _verify(X, 4) + + +def _verify(X, n): + # Numerically compute the distribution + Y = sdr.sum_distribution(X, n) + + # Empirically compute the distribution + y = X.rvs((100_000, n)).sum(axis=1) + hist, bins = np.histogram(y, bins=101, density=True) + x = bins[1:] - np.diff(bins) / 2 + + if False: + import matplotlib.pyplot as plt + + plt.figure() + plt.plot(x, X.pdf(x), label="X") + plt.plot(x, Y.pdf(x), label="X + ... + X") + plt.hist(y, bins=101, cumulative=False, density=True, histtype="step", label="X + .. + X empirical") + plt.legend() + plt.xlabel("Random variable") + plt.ylabel("Probability density") + plt.title("Sum of distribution") + plt.show() + + assert np.allclose(Y.pdf(x), hist, atol=np.max(hist) * 0.1) From fd2d557e492560cf11a7149dfd8869848022cbde Mon Sep 17 00:00:00 2001 From: mhostetter Date: Sun, 28 Jul 2024 11:08:25 -0400 Subject: [PATCH 7/7] Update docstring --- src/sdr/_probability.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/sdr/_probability.py b/src/sdr/_probability.py index 1e942cf39..a118257f0 100644 --- a/src/sdr/_probability.py +++ b/src/sdr/_probability.py @@ -84,10 +84,10 @@ def sum_distribution( p: float = 1e-16, ) -> scipy.stats.rv_histogram: r""" - Numerically calculates the distribution of the sum of $n$ independent random variables $X$. + Numerically calculates the distribution of the sum of $n$ i.i.d. random variables $X_i$. Arguments: - X: The distribution of the random variable $X$. + X: The distribution of the i.i.d. random variables $X_i$. n_terms: The number $n$ of random variables to sum. p: The probability of exceeding the x axis, on either side, for each distribution. This is used to determine the bounds on the x axis for the numerical convolution. Smaller values of $p$ will result in more accurate @@ -108,7 +108,7 @@ def sum_distribution( X = scipy.stats.norm(loc=-1, scale=0.5) n_terms = 2 - x = np.linspace(-6, 2, 1000) + x = np.linspace(-6, 2, 1_001) @savefig sdr_sum_distribution_1.png plt.figure(); \ @@ -126,7 +126,7 @@ def sum_distribution( X = scipy.stats.rayleigh(scale=1) n_terms = 3 - x = np.linspace(0, 12, 1000) + x = np.linspace(0, 10, 1_001) @savefig sdr_sum_distribution_2.png plt.figure(); \ @@ -144,7 +144,7 @@ def sum_distribution( X = scipy.stats.rice(2) n_terms = 4 - x = np.linspace(0, 12, 1000) + x = np.linspace(0, 18, 1_001) @savefig sdr_sum_distribution_3.png plt.figure(); \ @@ -198,7 +198,7 @@ def sum_distributions( p: float = 1e-16, ) -> scipy.stats.rv_histogram: r""" - Numerically calculates the distribution of the sum of two independent random variables. + Numerically calculates the distribution of the sum of two independent random variables $X$ and $Y$. Arguments: X: The distribution of the first random variable $X$. @@ -222,7 +222,7 @@ def sum_distributions( X = scipy.stats.norm(loc=-1, scale=0.5) Y = scipy.stats.norm(loc=2, scale=1.5) - x = np.linspace(-5, 10, 1000) + x = np.linspace(-5, 10, 1_001) @savefig sdr_sum_distributions_1.png plt.figure(); \ @@ -241,7 +241,7 @@ def sum_distributions( X = scipy.stats.rayleigh(scale=1) Y = scipy.stats.rayleigh(loc=1, scale=2) - x = np.linspace(0, 12, 1000) + x = np.linspace(0, 12, 1_001) @savefig sdr_sum_distributions_2.png plt.figure(); \ @@ -260,7 +260,7 @@ def sum_distributions( X = scipy.stats.rice(2) Y = scipy.stats.rice(3) - x = np.linspace(0, 12, 1000) + x = np.linspace(0, 12, 1_001) @savefig sdr_sum_distributions_3.png plt.figure(); \