From 97e8aef5ccad6a9aa27b6c48d59b848e69d7edf3 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 12 Sep 2024 23:24:51 -0400 Subject: [PATCH 01/17] add treenode-level tests --- xarray/tests/test_treenode.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/xarray/tests/test_treenode.py b/xarray/tests/test_treenode.py index 22a6a97c3f5..fad8decd39c 100644 --- a/xarray/tests/test_treenode.py +++ b/xarray/tests/test_treenode.py @@ -270,6 +270,17 @@ def test_del_child(self): del john["Mary"] +class TestNamesCannotContainSlashes: + def test_child_keys(self): + parent = TreeNode() + with pytest.raises(ValueError, match="cannot contain forward slashes"): + parent.children = {"a/b": TreeNode()} + + def test_node_names(self): + with pytest.raises(ValueError, match="cannot contain forward slashes"): + NamedNode(name="a/b") + + def create_test_tree() -> tuple[NamedNode, NamedNode]: # a # ├── b From 174c7ffac75175ca576947eadbc0a4b8c89530d8 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 12 Sep 2024 23:25:13 -0400 Subject: [PATCH 02/17] add check in child setter --- xarray/core/treenode.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/core/treenode.py b/xarray/core/treenode.py index d74c82178ea..8616bb32980 100644 --- a/xarray/core/treenode.py +++ b/xarray/core/treenode.py @@ -205,6 +205,9 @@ def _check_children(children: Mapping[str, Tree]) -> None: seen = set() for name, child in children.items(): + if "/" in name: + raise ValueError("child names cannot contain forward slashes") + if not isinstance(child, TreeNode): raise TypeError( f"Cannot add object {name}. It is of type {type(child)}, " From ec16a2d557090a7878e7da3fc61a716bb079ef18 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 12 Sep 2024 23:29:37 -0400 Subject: [PATCH 03/17] test for non-string types --- xarray/tests/test_treenode.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_treenode.py b/xarray/tests/test_treenode.py index fad8decd39c..eb85e7a5330 100644 --- a/xarray/tests/test_treenode.py +++ b/xarray/tests/test_treenode.py @@ -270,16 +270,23 @@ def test_del_child(self): del john["Mary"] -class TestNamesCannotContainSlashes: +class TestValidNames: def test_child_keys(self): parent = TreeNode() with pytest.raises(ValueError, match="cannot contain forward slashes"): parent.children = {"a/b": TreeNode()} + parent = TreeNode() + with pytest.raises(TypeError, match="must be a string or None"): + parent.children = {0: TreeNode()} + def test_node_names(self): with pytest.raises(ValueError, match="cannot contain forward slashes"): NamedNode(name="a/b") + with pytest.raises(TypeError, match="must be a string or None"): + NamedNode(name=0) + def create_test_tree() -> tuple[NamedNode, NamedNode]: # a From abb7ba36a44db25e06a0dcc9ce95dff4fbb18b5d Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 12 Sep 2024 23:29:55 -0400 Subject: [PATCH 04/17] check for non-string types --- xarray/core/treenode.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/core/treenode.py b/xarray/core/treenode.py index 8616bb32980..0fd3e3de317 100644 --- a/xarray/core/treenode.py +++ b/xarray/core/treenode.py @@ -205,6 +205,8 @@ def _check_children(children: Mapping[str, Tree]) -> None: seen = set() for name, child in children.items(): + if not isinstance(name, str): + raise TypeError("child name must be a string or None") if "/" in name: raise ValueError("child names cannot contain forward slashes") From 8eeae1dea2b50e238651a9bcfe043981234ff1dc Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 12 Sep 2024 23:48:58 -0400 Subject: [PATCH 05/17] rename check_for_slashes_in_names --- xarray/core/datatree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 2b3179cb79d..558c1d04d56 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -156,7 +156,7 @@ def check_alignment( check_alignment(child_path, child_ds, base_ds, child.children) -def _check_for_slashes_in_names(variables: Iterable[Hashable]) -> None: +def check_for_slashes_in_names(variables: Iterable[Hashable]) -> None: offending_variable_names = [ name for name in variables if isinstance(name, str) and "/" in name ] @@ -465,7 +465,7 @@ def __init__( super().__init__(name=name, children=children) def _set_node_data(self, dataset: Dataset): - _check_for_slashes_in_names(dataset.variables) + check_for_slashes_in_names(dataset.variables) data_vars, coord_vars = _collect_data_and_coord_variables(dataset) self._data_variables = data_vars self._node_coord_variables = coord_vars From 83f227ca6c02f286f28176623ea8ffe06ec63818 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 12 Sep 2024 23:49:26 -0400 Subject: [PATCH 06/17] regression test for #9485 --- xarray/tests/test_datatree.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 0435b199b9b..8bfa059e2a9 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -718,6 +718,12 @@ def test_inherited(self): # expected = child.assign_coords({"c": 11}) # assert_identical(expected, actual) + def test_forbid_paths_as_names(self): + # regression test for GH issue #9485 + dt = DataTree(Dataset(coords={"x": 0}), children={"child": DataTree()}) + with pytest.raises(ValueError, match="cannot have names containing"): + dt.coords["/child/y"] = 2 + def test_delitem(): ds = Dataset({"a": 0}, coords={"x": ("x", [1, 2]), "z": "a"}) From d0a11c46f07b70be1f2dc34b9880b5996fff8126 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 12 Sep 2024 23:50:06 -0400 Subject: [PATCH 07/17] raise instead of allow coords with slashes in names --- xarray/core/coordinates.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index bb18bac0a1f..a0250da2cfa 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -846,10 +846,11 @@ def to_dataset(self) -> Dataset: def _update_coords( self, coords: dict[Hashable, Variable], indexes: Mapping[Any, Index] ) -> None: - from xarray.core.datatree import check_alignment + from xarray.core.datatree import check_alignment, check_for_slashes_in_names # create updated node (`.to_dataset` makes a copy so this doesn't modify in-place) node_ds = self._data.to_dataset(inherited=False) + check_for_slashes_in_names(list(coords.keys())) node_ds.coords._update_coords(coords, indexes) # check consistency *before* modifying anything in-place From 8eb1588403997c042e67e12981196af7e53d2735 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 13 Sep 2024 00:08:01 -0400 Subject: [PATCH 08/17] mypy --- xarray/tests/test_treenode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_treenode.py b/xarray/tests/test_treenode.py index eb85e7a5330..d5f9946ef72 100644 --- a/xarray/tests/test_treenode.py +++ b/xarray/tests/test_treenode.py @@ -272,13 +272,13 @@ def test_del_child(self): class TestValidNames: def test_child_keys(self): - parent = TreeNode() + parent: TreeNode = TreeNode() with pytest.raises(ValueError, match="cannot contain forward slashes"): parent.children = {"a/b": TreeNode()} - parent = TreeNode() + parent: TreeNode = TreeNode() with pytest.raises(TypeError, match="must be a string or None"): - parent.children = {0: TreeNode()} + parent.children = {0: TreeNode()} # type: ignore[dict-item] def test_node_names(self): with pytest.raises(ValueError, match="cannot contain forward slashes"): From a8bcdf920741ac613423c2f7b7f5f0c0b525caf5 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 13 Sep 2024 00:49:18 -0400 Subject: [PATCH 09/17] mypy2 --- xarray/tests/test_treenode.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_treenode.py b/xarray/tests/test_treenode.py index d5f9946ef72..390e5a7ceb7 100644 --- a/xarray/tests/test_treenode.py +++ b/xarray/tests/test_treenode.py @@ -276,7 +276,6 @@ def test_child_keys(self): with pytest.raises(ValueError, match="cannot contain forward slashes"): parent.children = {"a/b": TreeNode()} - parent: TreeNode = TreeNode() with pytest.raises(TypeError, match="must be a string or None"): parent.children = {0: TreeNode()} # type: ignore[dict-item] From e37fc64a6d690a0e3712b59a71d98d059b836b59 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 16 Sep 2024 09:38:11 -0400 Subject: [PATCH 10/17] use _validate_name function from #9494 --- xarray/core/treenode.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xarray/core/treenode.py b/xarray/core/treenode.py index 45ada1d9007..9d0e2ffc0c4 100644 --- a/xarray/core/treenode.py +++ b/xarray/core/treenode.py @@ -205,10 +205,7 @@ def _check_children(children: Mapping[str, Tree]) -> None: seen = set() for name, child in children.items(): - if not isinstance(name, str): - raise TypeError("child name must be a string or None") - if "/" in name: - raise ValueError("child names cannot contain forward slashes") + _validate_name(name) if not isinstance(child, TreeNode): raise TypeError( From f1b2b3138aa5609a0a1c61a62535178cab243239 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 16 Sep 2024 09:48:47 -0400 Subject: [PATCH 11/17] remove redundant validation check --- xarray/core/treenode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/treenode.py b/xarray/core/treenode.py index 9d0e2ffc0c4..6b93ddddc37 100644 --- a/xarray/core/treenode.py +++ b/xarray/core/treenode.py @@ -694,7 +694,7 @@ def __str__(self) -> str: def _post_attach(self: AnyNamedNode, parent: AnyNamedNode, name: str) -> None: """Ensures child has name attribute corresponding to key under which it has been stored.""" - _validate_name(name) # is this check redundant? + # we have already validated `name`, since it has already been set as the name of another node (the parent) self._name = name def _copy_node( From 9e2bce796a89863b5aafdcd66f4695b3723ed7bc Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 16 Sep 2024 10:31:14 -0400 Subject: [PATCH 12/17] add tests form #9494 but at NamedNode level --- xarray/tests/test_treenode.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/xarray/tests/test_treenode.py b/xarray/tests/test_treenode.py index 390e5a7ceb7..b9f3e862c02 100644 --- a/xarray/tests/test_treenode.py +++ b/xarray/tests/test_treenode.py @@ -286,6 +286,28 @@ def test_node_names(self): with pytest.raises(TypeError, match="must be a string or None"): NamedNode(name=0) + def test_names(self): + nn = NamedNode() + assert nn.name is None + + nn = NamedNode(name="foo") + assert nn.name == "foo" + + nn.name = "bar" + assert nn.name == "bar" + + nn = NamedNode(children={"foo": NamedNode()}) + assert nn.children["foo"].name == "foo" + with pytest.raises( + ValueError, match="cannot set the name of a node which already has a parent" + ): + nn.children["foo"].name = "bar" + + detached = nn.children["foo"].copy() + assert detached.name == "foo" + detached.name = "bar" + assert detached.name == "bar" + def create_test_tree() -> tuple[NamedNode, NamedNode]: # a From a05bc45d24575f8da47780c9de5525a273bbc23f Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 16 Sep 2024 10:33:32 -0400 Subject: [PATCH 13/17] _validate_name -> validate_name so it can be imported from other files --- xarray/core/treenode.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/xarray/core/treenode.py b/xarray/core/treenode.py index 6b93ddddc37..f9841149c92 100644 --- a/xarray/core/treenode.py +++ b/xarray/core/treenode.py @@ -42,6 +42,14 @@ def __init__(self, *pathsegments): # TODO should we also forbid suffixes to avoid node names with dots in them? +def validate_name(name: str | None) -> None: + if name is not None: + if not isinstance(name, str): + raise TypeError("node name must be a string or None") + if "/" in name: + raise ValueError("node names cannot contain forward slashes") + + Tree = TypeVar("Tree", bound="TreeNode") @@ -205,7 +213,7 @@ def _check_children(children: Mapping[str, Tree]) -> None: seen = set() for name, child in children.items(): - _validate_name(name) + validate_name(name) if not isinstance(child, TreeNode): raise TypeError( @@ -642,14 +650,6 @@ def same_tree(self, other: Tree) -> bool: AnyNamedNode = TypeVar("AnyNamedNode", bound="NamedNode") -def _validate_name(name: str | None) -> None: - if name is not None: - if not isinstance(name, str): - raise TypeError("node name must be a string or None") - if "/" in name: - raise ValueError("node names cannot contain forward slashes") - - class NamedNode(TreeNode, Generic[Tree]): """ A TreeNode which knows its own name. @@ -663,7 +663,7 @@ class NamedNode(TreeNode, Generic[Tree]): def __init__(self, name=None, children=None): super().__init__(children=children) - _validate_name(name) + validate_name(name) self._name = name @property @@ -679,7 +679,7 @@ def name(self, name: str | None) -> None: "Consider creating a detached copy of this node via .copy() " "on the parent node." ) - _validate_name(name) + validate_name(name) self._name = name def __repr__(self, level=0): From 9bfd437f3333050285c9402e11c962205c60e287 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 16 Sep 2024 10:56:43 -0400 Subject: [PATCH 14/17] rename name validation function --- xarray/core/coordinates.py | 4 ++-- xarray/core/datatree.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index a0250da2cfa..92b86980aa9 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -846,11 +846,11 @@ def to_dataset(self) -> Dataset: def _update_coords( self, coords: dict[Hashable, Variable], indexes: Mapping[Any, Index] ) -> None: - from xarray.core.datatree import check_alignment, check_for_slashes_in_names + from xarray.core.datatree import check_alignment, validate_variable_names # create updated node (`.to_dataset` makes a copy so this doesn't modify in-place) node_ds = self._data.to_dataset(inherited=False) - check_for_slashes_in_names(list(coords.keys())) + validate_variable_names(list(coords.keys())) node_ds.coords._update_coords(coords, indexes) # check consistency *before* modifying anything in-place diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 558c1d04d56..cc46fa1d3ff 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -156,7 +156,7 @@ def check_alignment( check_alignment(child_path, child_ds, base_ds, child.children) -def check_for_slashes_in_names(variables: Iterable[Hashable]) -> None: +def validate_variable_names(variables: Iterable[Hashable]) -> None: offending_variable_names = [ name for name in variables if isinstance(name, str) and "/" in name ] @@ -465,7 +465,7 @@ def __init__( super().__init__(name=name, children=children) def _set_node_data(self, dataset: Dataset): - check_for_slashes_in_names(dataset.variables) + validate_variable_names(dataset.variables) data_vars, coord_vars = _collect_data_and_coord_variables(dataset) self._data_variables = data_vars self._node_coord_variables = coord_vars From dc9e3bdcc553a81516b0317087848351d8ee0f61 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 16 Sep 2024 10:57:51 -0400 Subject: [PATCH 15/17] remove outdated import check (from when datatree was in separate repo) --- xarray/core/datatree.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index cc46fa1d3ff..c6166b99832 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -46,13 +46,7 @@ either_dict_or_kwargs, maybe_wrap_array, ) -from xarray.core.variable import Variable - -try: - from xarray.core.variable import calculate_dimensions -except ImportError: - # for xarray versions 2022.03.0 and earlier - from xarray.core.dataset import calculate_dimensions +from xarray.core.variable import Variable, calculate_dimensions if TYPE_CHECKING: import pandas as pd From 6456276879f98253ce16627929b5ae4808facc0d Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 16 Sep 2024 11:01:36 -0400 Subject: [PATCH 16/17] type hints --- xarray/tests/test_treenode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_treenode.py b/xarray/tests/test_treenode.py index b9f3e862c02..ef355ad3348 100644 --- a/xarray/tests/test_treenode.py +++ b/xarray/tests/test_treenode.py @@ -287,16 +287,16 @@ def test_node_names(self): NamedNode(name=0) def test_names(self): - nn = NamedNode() + nn: NamedNode = NamedNode() assert nn.name is None - nn = NamedNode(name="foo") + nn: NamedNode = NamedNode(name="foo") assert nn.name == "foo" nn.name = "bar" assert nn.name == "bar" - nn = NamedNode(children={"foo": NamedNode()}) + nn: NamedNode = NamedNode(children={"foo": NamedNode()}) assert nn.children["foo"].name == "foo" with pytest.raises( ValueError, match="cannot set the name of a node which already has a parent" From 6b894f163f3f18fc19a673b1b6ea76d01ddb02e9 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 16 Sep 2024 11:23:58 -0400 Subject: [PATCH 17/17] mypy again --- xarray/tests/test_treenode.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_treenode.py b/xarray/tests/test_treenode.py index ef355ad3348..49e63d3069b 100644 --- a/xarray/tests/test_treenode.py +++ b/xarray/tests/test_treenode.py @@ -290,13 +290,13 @@ def test_names(self): nn: NamedNode = NamedNode() assert nn.name is None - nn: NamedNode = NamedNode(name="foo") + nn = NamedNode(name="foo") assert nn.name == "foo" nn.name = "bar" assert nn.name == "bar" - nn: NamedNode = NamedNode(children={"foo": NamedNode()}) + nn = NamedNode(children={"foo": NamedNode()}) assert nn.children["foo"].name == "foo" with pytest.raises( ValueError, match="cannot set the name of a node which already has a parent"