From ca6571c83557f30eaeb88bf1c75a7d99bade8bc7 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Thu, 10 Oct 2024 17:55:41 +0200 Subject: [PATCH] Normalize databricks paths as part of resolving them (#157) `Path("/a/b/../c").resolve()` returns `Path("/a/c")` Databricks paths should behave the same, but currently don't. This PR fixes the issue, which participates in https://github.com/databrickslabs/ucx/issues/2882 Progresses https://github.com/databrickslabs/ucx/issues/2882 --------- Co-authored-by: Eric Vergnaud --- src/databricks/labs/blueprint/paths.py | 18 +++++++++++++++++- tests/integration/test_paths.py | 16 ++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/blueprint/paths.py b/src/databricks/labs/blueprint/paths.py index b00d64e..a794c3e 100644 --- a/src/databricks/labs/blueprint/paths.py +++ b/src/databricks/labs/blueprint/paths.py @@ -520,7 +520,23 @@ def resolve(self: P, strict: bool = False) -> P: if strict and not absolute.exists(): msg = f"Path does not exist: {self}" raise FileNotFoundError(msg) - return absolute + # pylint: disable=protected-access + return absolute._normalize() + + def _normalize(self: P) -> P: + if ".." not in self._path_parts: + return self + segments: list[str] = [] + for part in self._path_parts: + if part == "..": + if segments: + segments.pop() + elif part is None or part == '.': + continue + else: + segments.append(part) + # pylint: disable=protected-access + return self.with_segments(self.anchor, *segments)._normalize() def absolute(self: P) -> P: if self.is_absolute(): diff --git a/tests/integration/test_paths.py b/tests/integration/test_paths.py index c682bab..0155690 100644 --- a/tests/integration/test_paths.py +++ b/tests/integration/test_paths.py @@ -191,6 +191,22 @@ def test_replace_file(ws, make_random, cls): tmp_dir.rmdir(recursive=True) +@pytest.mark.parametrize("cls", DATABRICKS_PATHLIKE) +def test_resolve_is_consistent(ws, cls): + path = cls(ws, "/a/b/c") / Path("../../d") + resolved = path.resolve() + assert resolved == cls(ws, "/a/d") + path = cls(ws, "/a/b/c") / "../../d" + resolved = path.resolve() + assert resolved == cls(ws, "/a/d") + resolved = cls(ws, "/a/b/c/../../d").resolve() + assert resolved == cls(ws, "/a/d") + resolved = cls(ws, "/../d").resolve() + assert resolved == cls(ws, "/d") + resolved = cls(ws, "/a/b/c/./../../d").resolve() + assert resolved == cls(ws, "/a/d") + + def test_workspace_as_fuse(ws): wsp = WorkspacePath(ws, "/Users/foo/bar/baz") assert Path("/Workspace/Users/foo/bar/baz") == wsp.as_fuse()