From 2eb6f396a5cfe56966b18b3adf79eba510a9da20 Mon Sep 17 00:00:00 2001
From: Krzysztof Godlewski <krzysztof.godlewski@neptune.ai>
Date: Mon, 28 Oct 2024 12:42:07 +0100
Subject: [PATCH] Remove the `family` parameter from `Run.__init__()` (#62)

---
 README.md                       |  5 ---
 src/neptune_scale/__init__.py   | 12 ++-----
 src/neptune_scale/exceptions.py |  2 +-
 src/neptune_scale/parameters.py |  1 -
 tests/unit/test_run.py          | 57 +++++++--------------------------
 5 files changed, 14 insertions(+), 63 deletions(-)

diff --git a/README.md b/README.md
index 44bdaa8c..118b2152 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,6 @@ from neptune_scale import Run
 
 run = Run(
     experiment_name="ExperimentName",
-    family="RunFamilyName",  # must be the same for related runs
     run_id="SomeUniqueRunIdentifier",
 )
 ```
@@ -130,7 +129,6 @@ __Parameters__
 
 | Name             | Type             | Default | Description                                                               |
 |------------------|------------------|---------|---------------------------------------------------------------------------|
-| `family`         | `str`            | -       | Identifies related runs. All runs of the same lineage must have the same `family` value. That is, forking is only possible within the same family. Max length: 128 bytes. |
 | `run_id`         | `str`            | -       | Identifier of the run. Must be unique within the project. Max length: 128 bytes. |
 | `project`        | `str`, optional  | `None`  | Name of a project in the form `workspace-name/project-name`. If `None`, the value of the `NEPTUNE_PROJECT` environment variable is used. |
 | `api_token`      | `str`, optional  | `None`  | Your Neptune API token or a service account's API token. If `None`, the value of the `NEPTUNE_API_TOKEN` environment variable is used. To keep your token secure, don't place it in source code. Instead, save it as an environment variable. |
@@ -156,7 +154,6 @@ from neptune_scale import Run
 with Run(
     project="team-alpha/project-x",
     api_token="h0dHBzOi8aHR0cHM6...Y2MifQ==",
-    family="aquarium",
     run_id="likable-barracuda",
 ) as run:
     ...
@@ -173,7 +170,6 @@ To restart an experiment, create a forked run:
 
 ```python
 with Run(
-    family="aquarium",
     run_id="adventurous-barracuda",
     experiment_name="swim-further",
     fork_run_id="likable-barracuda",
@@ -186,7 +182,6 @@ Continue a run:
 
 ```python
 with Run(
-    family="aquarium",
     run_id="likable-barracuda",  # a Neptune run with this ID already exists
     resume=True,
 ) as run:
diff --git a/src/neptune_scale/__init__.py b/src/neptune_scale/__init__.py
index 8230bee2..ea761b6f 100644
--- a/src/neptune_scale/__init__.py
+++ b/src/neptune_scale/__init__.py
@@ -67,7 +67,6 @@
 )
 from neptune_scale.parameters import (
     MAX_EXPERIMENT_NAME_LENGTH,
-    MAX_FAMILY_LENGTH,
     MAX_QUEUE_SIZE,
     MAX_RUN_ID_LENGTH,
     MINIMAL_WAIT_FOR_ACK_SLEEP_TIME,
@@ -84,7 +83,6 @@ class Run(WithResources, AbstractContextManager):
     def __init__(
         self,
         *,
-        family: str,
         run_id: str,
         project: Optional[str] = None,
         api_token: Optional[str] = None,
@@ -106,8 +104,6 @@ def __init__(
         Initializes a run that logs the model-building metadata to Neptune.
 
         Args:
-            family: Identifies related runs. For example, the same value must apply to all runs within a run hierarchy.
-                Max length: 128 bytes.
             run_id: Unique identifier of a run. Must be unique within the project. Max length: 128 bytes.
             project: Name of the project where the metadata is logged, in the form `workspace-name/project-name`.
                 If not provided, the value of the `NEPTUNE_PROJECT` environment variable is used.
@@ -131,7 +127,6 @@ def __init__(
             on_warning_callback: Callback function triggered when a warning occurs.
         """
 
-        verify_type("family", family, str)
         verify_type("run_id", run_id, str)
         verify_type("resume", resume, bool)
         verify_type("project", project, (str, type(None)))
@@ -182,7 +177,6 @@ def __init__(
         assert api_token is not None  # mypy
         input_api_token: str = api_token
 
-        verify_non_empty("family", family)
         verify_non_empty("run_id", run_id)
         if experiment_name is not None:
             verify_non_empty("experiment_name", experiment_name)
@@ -193,7 +187,6 @@ def __init__(
 
         verify_project_qualified_name("project", project)
 
-        verify_max_length("family", family, MAX_FAMILY_LENGTH)
         verify_max_length("run_id", run_id, MAX_RUN_ID_LENGTH)
 
         # This flag is used to signal that we're closed or being closed (and most likely waiting for sync), and no
@@ -201,7 +194,6 @@ def __init__(
         self._is_closing = False
 
         self._project: str = input_project
-        self._family: str = family
         self._run_id: str = run_id
 
         self._lock = threading.RLock()
@@ -229,7 +221,7 @@ def __init__(
 
         self._sync_process = SyncProcess(
             project=self._project,
-            family=self._family,
+            family=self._run_id,
             operations_queue=self._operations_queue.queue,
             errors_queue=self._errors_queue,
             api_token=input_api_token,
@@ -396,7 +388,7 @@ def _create_run(
             project=self._project,
             run_id=self._run_id,
             create=CreateRun(
-                family=self._family,
+                family=self._run_id,
                 fork_point=fork_point,
                 experiment_id=experiment_name,
                 creation_time=None if creation_time is None else datetime_to_proto(creation_time),
diff --git a/src/neptune_scale/exceptions.py b/src/neptune_scale/exceptions.py
index ae6bc1d2..1a3ed568 100644
--- a/src/neptune_scale/exceptions.py
+++ b/src/neptune_scale/exceptions.py
@@ -278,7 +278,7 @@ class NeptuneRunConflicting(NeptuneScaleError):
 {h1}
 ----NeptuneRunConflicting------------------------------------------------------
 {end}
-Run with specified `run_id` already exists, but has different creation parameters (`family` or `fork_run_id`).
+Run with specified `run_id` already exists, but has a different `fork_run_id` parameter.
 
 {correct}Need help?{end}-> Contact support@neptune.ai
 
diff --git a/src/neptune_scale/parameters.py b/src/neptune_scale/parameters.py
index b2e8bb62..3718721f 100644
--- a/src/neptune_scale/parameters.py
+++ b/src/neptune_scale/parameters.py
@@ -1,6 +1,5 @@
 # Input validation
 MAX_RUN_ID_LENGTH = 128
-MAX_FAMILY_LENGTH = 128
 MAX_EXPERIMENT_NAME_LENGTH = 730
 
 # Operations queue
diff --git a/tests/unit/test_run.py b/tests/unit/test_run.py
index 41d079da..3ef3d99f 100644
--- a/tests/unit/test_run.py
+++ b/tests/unit/test_run.py
@@ -17,7 +17,6 @@ def api_token():
 # Set short timeouts on blocking operations for quicker test execution
 @pytest.fixture(autouse=True, scope="session")
 def short_timeouts():
-    import neptune_scale
     import neptune_scale.core.components
 
     patch = pytest.MonkeyPatch()
@@ -41,10 +40,9 @@ def test_context_manager(api_token):
     # given
     project = "workspace/project"
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # when
-    with Run(project=project, api_token=api_token, family=family, run_id=run_id, mode="disabled"):
+    with Run(project=project, api_token=api_token, run_id=run_id, mode="disabled"):
         ...
 
     # then
@@ -55,10 +53,9 @@ def test_close(api_token):
     # given
     project = "workspace/project"
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # and
-    run = Run(project=project, api_token=api_token, family=family, run_id=run_id, mode="disabled")
+    run = Run(project=project, api_token=api_token, run_id=run_id, mode="disabled")
 
     # when
     run.close()
@@ -67,34 +64,16 @@ def test_close(api_token):
     assert True
 
 
-def test_family_too_long(api_token):
-    # given
-    project = "workspace/project"
-    run_id = str(uuid.uuid4())
-
-    # and
-    family = "a" * 1000
-
-    # when
-    with pytest.raises(ValueError):
-        with Run(project=project, api_token=api_token, family=family, run_id=run_id, mode="disabled"):
-            ...
-
-    # and
-    assert True
-
-
 def test_run_id_too_long(api_token):
     # given
     project = "workspace/project"
-    family = str(uuid.uuid4())
 
     # and
     run_id = "a" * 1000
 
     # then
     with pytest.raises(ValueError):
-        with Run(project=project, api_token=api_token, family=family, run_id=run_id, mode="disabled"):
+        with Run(project=project, api_token=api_token, run_id=run_id, mode="disabled"):
             ...
 
     # and
@@ -104,14 +83,13 @@ def test_run_id_too_long(api_token):
 def test_invalid_project_name(api_token):
     # given
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # and
     project = "just-project"
 
     # then
     with pytest.raises(ValueError):
-        with Run(project=project, api_token=api_token, family=family, run_id=run_id, mode="disabled"):
+        with Run(project=project, api_token=api_token, run_id=run_id, mode="disabled"):
             ...
 
     # and
@@ -122,10 +100,9 @@ def test_metadata(api_token):
     # given
     project = "workspace/project"
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # then
-    with Run(project=project, api_token=api_token, family=family, run_id=run_id, mode="disabled") as run:
+    with Run(project=project, api_token=api_token, run_id=run_id, mode="disabled") as run:
         run.log(
             step=1,
             timestamp=datetime.now(),
@@ -156,10 +133,9 @@ def test_tags(api_token):
     # given
     project = "workspace/project"
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # then
-    with Run(project=project, api_token=api_token, family=family, run_id=run_id, mode="disabled") as run:
+    with Run(project=project, api_token=api_token, run_id=run_id, mode="disabled") as run:
         run.add_tags(["tag1"])
         run.add_tags(["tag2"], group_tags=True)
         run.remove_tags(["tag3"])
@@ -173,10 +149,9 @@ def test_log_without_step(api_token):
     # given
     project = "workspace/project"
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # then
-    with Run(project=project, api_token=api_token, family=family, run_id=run_id, mode="disabled") as run:
+    with Run(project=project, api_token=api_token, run_id=run_id, mode="disabled") as run:
         run.log(
             timestamp=datetime.now(),
             configs={
@@ -192,10 +167,9 @@ def test_log_configs(api_token):
     # given
     project = "workspace/project"
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # then
-    with Run(project=project, api_token=api_token, family=family, run_id=run_id, mode="disabled") as run:
+    with Run(project=project, api_token=api_token, run_id=run_id, mode="disabled") as run:
         run.log_configs({"int": 1})
 
     # and
@@ -206,10 +180,9 @@ def test_log_step_float(api_token):
     # given
     project = "workspace/project"
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # then
-    with Run(project=project, api_token=api_token, family=family, run_id=run_id, mode="disabled") as run:
+    with Run(project=project, api_token=api_token, run_id=run_id, mode="disabled") as run:
         run.log(
             step=3.14,
             timestamp=datetime.now(),
@@ -226,10 +199,9 @@ def test_log_no_timestamp(api_token):
     # given
     project = "workspace/project"
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # then
-    with Run(project=project, api_token=api_token, family=family, run_id=run_id, mode="disabled") as run:
+    with Run(project=project, api_token=api_token, run_id=run_id, mode="disabled") as run:
         run.log(
             step=3.14,
             configs={
@@ -245,10 +217,9 @@ def test_resume(api_token):
     # given
     project = "workspace/project"
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # when
-    with Run(project=project, api_token=api_token, family=family, run_id=run_id, resume=True, mode="disabled") as run:
+    with Run(project=project, api_token=api_token, run_id=run_id, resume=True, mode="disabled") as run:
         run.log(
             step=3.14,
             configs={
@@ -265,13 +236,11 @@ def test_creation_time(api_token):
     # given
     project = "workspace/project"
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # when
     with Run(
         project=project,
         api_token=api_token,
-        family=family,
         run_id=run_id,
         creation_time=datetime.now(),
         mode="disabled",
@@ -286,13 +255,11 @@ def test_assign_experiment(api_token):
     # given
     project = "workspace/project"
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # when
     with Run(
         project=project,
         api_token=api_token,
-        family=family,
         run_id=run_id,
         experiment_name="experiment_id",
         mode="disabled",
@@ -307,13 +274,11 @@ def test_forking(api_token):
     # given
     project = "workspace/project"
     run_id = str(uuid.uuid4())
-    family = run_id
 
     # when
     with Run(
         project=project,
         api_token=api_token,
-        family=family,
         run_id=run_id,
         fork_run_id="parent-run-id",
         fork_step=3.14,