Skip to content

Commit

Permalink
Add check for non-single sample sets and disable tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher authored and mergify[bot] committed Jan 16, 2024
1 parent d651881 commit 2dae133
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 5 deletions.
49 changes: 46 additions & 3 deletions python/tests/test_divmat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1347,12 +1347,19 @@ def test_bad_arg_types(self, arg):


class TestGeneticRelatednessMatrix:
def check(self, ts, mode, *, windows=None, span_normalise=True):
def check(self, ts, mode, *, sample_sets=None, windows=None, span_normalise=True):
G1 = stats_api_genetic_relatedness_matrix(
ts, mode=mode, windows=windows, span_normalise=span_normalise
ts,
mode=mode,
sample_sets=sample_sets,
windows=windows,
span_normalise=span_normalise,
)
G2 = ts.genetic_relatedness_matrix(
mode=mode, windows=windows, span_normalise=span_normalise
mode=mode,
sample_sets=sample_sets,
windows=windows,
span_normalise=span_normalise,
)
np.testing.assert_array_almost_equal(G1, G2)

Expand All @@ -1368,6 +1375,33 @@ def test_single_tree(self, mode):
ts = tsutil.insert_branch_sites(ts)
self.check(ts, mode)

@pytest.mark.parametrize("mode", DIVMAT_MODES)
def test_single_tree_sample_sets(self, mode):
# 2.00┊ 6 ┊
# ┊ ┏━┻━┓ ┊
# 1.00┊ 4 5 ┊
# ┊ ┏┻┓ ┏┻┓ ┊
# 0.00┊ 0 1 2 3 ┊
# 0 1
ts = tskit.Tree.generate_balanced(4).tree_sequence
ts = tsutil.insert_branch_sites(ts)
with pytest.raises(ValueError, match="2888"):
self.check(ts, mode, sample_sets=[[0, 1], [2, 3]])

@pytest.mark.parametrize("mode", DIVMAT_MODES)
def test_single_tree_single_samples(self, mode):
# 2.00┊ 6 ┊
# ┊ ┏━┻━┓ ┊
# 1.00┊ 4 5 ┊
# ┊ ┏┻┓ ┏┻┓ ┊
# 0.00┊ 0 1 2 3 ┊
# 0 1
ts = tskit.Tree.generate_balanced(4).tree_sequence
ts = tsutil.insert_branch_sites(ts)
self.check(ts, mode, sample_sets=[[0], [1]])
self.check(ts, mode, sample_sets=[[0], [2]])
self.check(ts, mode, sample_sets=[[0], [1], [2]])

@pytest.mark.parametrize("mode", DIVMAT_MODES)
def test_single_tree_windows(self, mode):
# 2.00┊ 6 ┊
Expand All @@ -1390,3 +1424,12 @@ def test_suite_defaults(self, ts, mode):
@pytest.mark.parametrize("span_normalise", [True, False])
def test_suite_span_normalise(self, ts, mode, span_normalise):
self.check(ts, mode=mode, span_normalise=span_normalise)

@pytest.mark.skip("fix sample sets #2888")
@pytest.mark.parametrize("ts", get_example_tree_sequences())
@pytest.mark.parametrize("mode", DIVMAT_MODES)
@pytest.mark.parametrize("num_sets", [2]) # [[2, 3, 4, 5])
def test_suite_sample_sets(self, ts, mode, num_sets):
if ts.num_samples >= num_sets:
sample_sets = np.array_split(ts.samples(), num_sets)
self.check(ts, sample_sets=sample_sets, mode=mode)
14 changes: 12 additions & 2 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2018-2023 Tskit Developers
# Copyright (c) 2018-2024 Tskit Developers
# Copyright (c) 2015-2018 University of Oxford
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down Expand Up @@ -8139,14 +8139,24 @@ def genetic_relatedness_matrix(
span_normalise=span_normalise,
)

# FIXME remove this when sample sets bug has been fixed.
# https://github.com/tskit-dev/tskit/issues/2888
if sample_sets is not None:
if any(len(ss) > 1 for ss in sample_sets):
raise ValueError(
"Only single entry sample sets allowed for now."
" See https://github.com/tskit-dev/tskit/issues/2888"
)

def _normalise(B):
if len(B) == 0:
return B
K = B + np.mean(B)
y = np.mean(B, axis=0)
X = y[:, np.newaxis] + y[np.newaxis, :]
K -= X
# FIXME I don't know what this factor of -2 is about
# FIXME this factor of 2 works for single-sample sample-sets, but not
# otherwise. https://github.com/tskit-dev/tskit/issues/2888
return K / -2

if windows is None:
Expand Down

0 comments on commit 2dae133

Please sign in to comment.