Skip to content

Commit

Permalink
Add appdef metadata to torchx event (#947)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #947

Add appdef metadata to torchx event class. Scuba logs does not get modified yet.

Differential Revision: D61632621
  • Loading branch information
zackycao authored and facebook-github-bot committed Aug 21, 2024
1 parent 86f4344 commit 909e06b
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 5 deletions.
5 changes: 4 additions & 1 deletion torchx/runner/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,12 @@ def run_component(
parent_run_id=parent_run_id,
)
handle = self.schedule(dryrun_info)
app = none_throws(dryrun_info._app)
ctx._torchx_event.workspace = workspace
ctx._torchx_event.scheduler = none_throws(dryrun_info._scheduler)
ctx._torchx_event.app_image = none_throws(dryrun_info._app).roles[0].image
ctx._torchx_event.app_image = app.roles[0].image
ctx._torchx_event.app_id = parse_app_handle(handle)[2]
ctx._torchx_event.app_metadata = app.metadata
return handle

def dryrun_component(
Expand Down Expand Up @@ -263,6 +265,7 @@ def run(
ctx._torchx_event.scheduler = none_throws(dryrun_info._scheduler)
ctx._torchx_event.app_image = none_throws(dryrun_info._app).roles[0].image
ctx._torchx_event.app_id = parse_app_handle(handle)[2]
ctx._torchx_event.app_metadata = app.metadata
return handle

def schedule(self, dryrun_info: AppDryRunInfo) -> AppHandle:
Expand Down
6 changes: 5 additions & 1 deletion torchx/runner/events/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import time
import traceback
from types import TracebackType
from typing import Optional, Type
from typing import Dict, Optional, Type

from torchx.runner.events.handlers import get_logging_handler

Expand Down Expand Up @@ -84,6 +84,7 @@ def __init__(
scheduler: Optional[str] = None,
app_id: Optional[str] = None,
app_image: Optional[str] = None,
app_metadata: Optional[Dict[str, str]] = None,
runcfg: Optional[str] = None,
workspace: Optional[str] = None,
) -> None:
Expand All @@ -92,6 +93,7 @@ def __init__(
scheduler or "",
app_id,
app_image=app_image,
app_metadata=app_metadata,
runcfg=runcfg,
workspace=workspace,
)
Expand Down Expand Up @@ -128,6 +130,7 @@ def _generate_torchx_event(
scheduler: str,
app_id: Optional[str] = None,
app_image: Optional[str] = None,
app_metadata: Optional[Dict[str, str]] = None,
runcfg: Optional[str] = None,
source: SourceType = SourceType.UNKNOWN,
workspace: Optional[str] = None,
Expand All @@ -138,6 +141,7 @@ def _generate_torchx_event(
api=api,
app_id=app_id,
app_image=app_image,
app_metadata=app_metadata,
runcfg=runcfg,
source=source,
workspace=workspace,
Expand Down
6 changes: 5 additions & 1 deletion torchx/runner/events/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import json
from dataclasses import asdict, dataclass
from enum import Enum
from typing import Optional, Union
from typing import Dict, Optional, Union


class SourceType(str, Enum):
Expand All @@ -30,17 +30,21 @@ class TorchxEvent:
api: Api name
app_id: Unique id that is set by the underlying scheduler
image: Image/container bundle that is used to execute request.
app_metadata: metadata to the app (treatment of metadata is scheduler dependent)
runcfg: Run config that was used to schedule app.
source: Type of source the event is generated.
cpu_time_usec: CPU time spent in usec
wall_time_usec: Wall time spent in usec
start_epoch_time_usec: Epoch time in usec when runner event starts
Workspace: Track how different workspaces/no workspace affects build and scheduler
"""

session: str
scheduler: str
api: str
app_id: Optional[str] = None
app_image: Optional[str] = None
app_metadata: Optional[Dict[str, str]] = None
runcfg: Optional[str] = None
raw_exception: Optional[str] = None
source: SourceType = SourceType.UNKNOWN
Expand Down
17 changes: 15 additions & 2 deletions torchx/runner/events/test/lib_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def assert_event(
self.assertEqual(actual_event.app_image, expected_event.app_image)
self.assertEqual(actual_event.runcfg, expected_event.runcfg)
self.assertEqual(actual_event.source, expected_event.source)
self.assertEqual(actual_event.app_metadata, expected_event.app_metadata)

@patch("torchx.runner.events.get_logging_handler")
def test_get_or_create_logger(self, logging_handler_mock: MagicMock) -> None:
Expand All @@ -41,11 +42,13 @@ def test_get_or_create_logger(self, logging_handler_mock: MagicMock) -> None:
self.assertIsInstance(logger.handlers[0], logging.NullHandler)

def test_event_created(self) -> None:
test_metadata = {"test_key": "test_value"}
event = TorchxEvent(
session="test_session",
scheduler="test_scheduler",
api="test_api",
app_image="test_app_image",
app_metadata=test_metadata,
workspace="test_workspace",
)
self.assertEqual("test_session", event.session)
Expand All @@ -54,13 +57,16 @@ def test_event_created(self) -> None:
self.assertEqual("test_app_image", event.app_image)
self.assertEqual(SourceType.UNKNOWN, event.source)
self.assertEqual("test_workspace", event.workspace)
self.assertEqual(test_metadata, event.app_metadata)

def test_event_deser(self) -> None:
test_metadata = {"test_key": "test_value"}
event = TorchxEvent(
session="test_session",
scheduler="test_scheduler",
api="test_api",
app_image="test_app_image",
app_metadata=test_metadata,
workspace="test_workspace",
source=SourceType.EXTERNAL,
)
Expand All @@ -78,14 +84,17 @@ def assert_torchx_event(self, expected: TorchxEvent, actual: TorchxEvent) -> Non
self.assertEqual(expected.app_image, actual.app_image)
self.assertEqual(expected.source, actual.source)
self.assertEqual(expected.workspace, actual.workspace)
self.assertEqual(expected.app_metadata, actual.app_metadata)

def test_create_context(self, _) -> None:
cfg = json.dumps({"test_key": "test_value"})
test_dict = {"test_key": "test_value"}
cfg = json.dumps(test_dict)
context = log_event(
"test_call",
"local",
"test_app_id",
app_image="test_app_image_id",
app_metadata=test_dict,
runcfg=cfg,
workspace="test_workspace",
)
Expand All @@ -95,19 +104,22 @@ def test_create_context(self, _) -> None:
"test_call",
"test_app_id",
app_image="test_app_image_id",
app_metadata=test_dict,
runcfg=cfg,
workspace="test_workspace",
)

self.assert_torchx_event(expected_torchx_event, context._torchx_event)

def test_record_event(self, record_mock: MagicMock) -> None:
cfg = json.dumps({"test_key": "test_value"})
test_dict = {"test_key": "test_value"}
cfg = json.dumps(test_dict)
with log_event(
"test_call",
"local",
"test_app_id",
app_image="test_app_image_id",
app_metadata=test_dict,
runcfg=cfg,
workspace="test_workspace",
) as ctx:
Expand All @@ -119,6 +131,7 @@ def test_record_event(self, record_mock: MagicMock) -> None:
"test_call",
"test_app_id",
app_image="test_app_image_id",
app_metadata=test_dict,
runcfg=cfg,
workspace="test_workspace",
cpu_time_usec=ctx._torchx_event.cpu_time_usec,
Expand Down

0 comments on commit 909e06b

Please sign in to comment.