From 89eb4bacb37b57d8f2b0c882a9e5387af7bc992d Mon Sep 17 00:00:00 2001 From: Naved Ansari Date: Fri, 20 Sep 2024 16:02:12 -0400 Subject: [PATCH 1/3] Add MetricsProcessor class This commit takes the methods merge_metrics and condense_metrics out of utils.py and puts them into the MetricsProcessor class. The callers have been updated to instantiate this class and use the methods provided by it. The unit tests have been moved to their own module. The tests needed some minor updates to run but otherwise have remained unchanged. --- openshift_metrics/merge.py | 14 +- openshift_metrics/metrics_processor.py | 104 +++ .../tests/test_metrics_processor.py | 549 ++++++++++++++ openshift_metrics/tests/test_utils.py | 675 ------------------ openshift_metrics/utils.py | 117 --- 5 files changed, 661 insertions(+), 798 deletions(-) create mode 100644 openshift_metrics/metrics_processor.py create mode 100644 openshift_metrics/tests/test_metrics_processor.py diff --git a/openshift_metrics/merge.py b/openshift_metrics/merge.py index 2b91dac..0954015 100644 --- a/openshift_metrics/merge.py +++ b/openshift_metrics/merge.py @@ -8,6 +8,7 @@ import utils +from metrics_processor import MetricsProcessor def compare_dates(date_str1, date_str2): """Returns true is date1 is earlier than date2""" @@ -33,20 +34,21 @@ def main(): else: output_file = f"{datetime.today().strftime('%Y-%m-%d')}.csv" - merged_dictionary = {} report_start_date = None report_end_date = None + processor = MetricsProcessor() + for file in files: with open(file, "r") as jsonfile: metrics_from_file = json.load(jsonfile) cpu_request_metrics = metrics_from_file["cpu_metrics"] memory_request_metrics = metrics_from_file["memory_metrics"] gpu_request_metrics = metrics_from_file.get("gpu_metrics", None) - utils.merge_metrics("cpu_request", cpu_request_metrics, merged_dictionary) - utils.merge_metrics("memory_request", memory_request_metrics, merged_dictionary) + processor.merge_metrics("cpu_request", cpu_request_metrics) + processor.merge_metrics("memory_request", memory_request_metrics) if gpu_request_metrics is not None: - utils.merge_metrics("gpu_request", gpu_request_metrics, merged_dictionary) + processor.merge_metrics("gpu_request", gpu_request_metrics) if report_start_date is None: report_start_date = metrics_from_file["start_date"] @@ -69,8 +71,8 @@ def main(): print("Warning: The report spans multiple months") report_month += " to " + datetime.strftime(report_end_date, "%Y-%m") - condensed_metrics_dict = utils.condense_metrics( - merged_dictionary, ["cpu_request", "memory_request", "gpu_request", "gpu_type"] + condensed_metrics_dict = processor.condense_metrics( + ["cpu_request", "memory_request", "gpu_request", "gpu_type"] ) utils.write_metrics_by_namespace( condensed_metrics_dict, diff --git a/openshift_metrics/metrics_processor.py b/openshift_metrics/metrics_processor.py new file mode 100644 index 0000000..29fac9c --- /dev/null +++ b/openshift_metrics/metrics_processor.py @@ -0,0 +1,104 @@ +from typing import List, Dict + +GPU_UNKNOWN_TYPE = "GPU_UNKNOWN_TYPE" + + +class MetricsProcessor: + """Provides methods for merging metrics and processing it for billing purposes""" + + def __init__(self, interval_minutes: int = 15, merged_data: dict = None): + self.interval_minutes = interval_minutes + self.merged_data = merged_data if merged_data is not None else {} + + def merge_metrics(self, metric_name, metric_list): + """Merge metrics (cpu, memory, gpu) by pod""" + + for metric in metric_list: + pod = metric["metric"]["pod"] + namespace = metric["metric"]["namespace"] + node = metric["metric"].get("node") + + gpu_type = None + gpu_resource = None + node_model = None + + unique_name = namespace + "+" + pod + if unique_name not in self.merged_data: + self.merged_data[unique_name] = {"namespace": namespace, "metrics": {}} + + if metric_name == "gpu_request": + gpu_type = metric["metric"].get( + "label_nvidia_com_gpu_product", GPU_UNKNOWN_TYPE + ) + gpu_resource = metric["metric"].get("resource") + node_model = metric["metric"].get("label_nvidia_com_gpu_machine") + + for value in metric["values"]: + epoch_time = value[0] + if epoch_time not in self.merged_data[unique_name]["metrics"]: + self.merged_data[unique_name]["metrics"][epoch_time] = {} + self.merged_data[unique_name]["metrics"][epoch_time][ + metric_name + ] = value[1] + if gpu_type: + self.merged_data[unique_name]["metrics"][epoch_time][ + "gpu_type" + ] = gpu_type + if gpu_resource: + self.merged_data[unique_name]["metrics"][epoch_time][ + "gpu_resource" + ] = gpu_resource + if node_model: + self.merged_data[unique_name]["metrics"][epoch_time][ + "node_model" + ] = node_model + if node: + self.merged_data[unique_name]["metrics"][epoch_time]["node"] = node + + def condense_metrics(self, metrics_to_check: List[str]) -> Dict: + """ + Checks if the value of metrics is the same, and removes redundant + metrics while updating the duration. If there's a gap in the reported + metrics then don't count that as part of duration. + """ + interval = self.interval_minutes * 60 + condensed_dict = {} + + for pod, pod_dict in self.merged_data.items(): + metrics_dict = pod_dict["metrics"] + new_metrics_dict = {} + epoch_times_list = sorted(metrics_dict.keys()) + + start_epoch_time = epoch_times_list[0] + + start_metric_dict = metrics_dict[start_epoch_time].copy() + + for i in range(len(epoch_times_list)): + epoch_time = epoch_times_list[i] + same_metrics = True + continuous_metrics = True + for metric in metrics_to_check: + if metrics_dict[start_epoch_time].get(metric, 0) != metrics_dict[epoch_time].get(metric, 0): # fmt: skip + same_metrics = False + + if i != 0 and epoch_time - epoch_times_list[i - 1] > interval: + # i.e. if the difference between 2 consecutive timestamps + # is more than the expected frequency then the pod was stopped + continuous_metrics = False + + if not same_metrics or not continuous_metrics: + duration = epoch_times_list[i - 1] - start_epoch_time + interval + start_metric_dict["duration"] = duration + new_metrics_dict[start_epoch_time] = start_metric_dict + start_epoch_time = epoch_time + start_metric_dict = metrics_dict[start_epoch_time].copy() + + duration = epoch_time - start_epoch_time + interval + start_metric_dict["duration"] = duration + new_metrics_dict[start_epoch_time] = start_metric_dict + + new_pod_dict = pod_dict.copy() + new_pod_dict["metrics"] = new_metrics_dict + condensed_dict[pod] = new_pod_dict + + return condensed_dict diff --git a/openshift_metrics/tests/test_metrics_processor.py b/openshift_metrics/tests/test_metrics_processor.py new file mode 100644 index 0000000..bb0f2a4 --- /dev/null +++ b/openshift_metrics/tests/test_metrics_processor.py @@ -0,0 +1,549 @@ +from unittest import TestCase +from openshift_metrics import metrics_processor, utils + + +class TestMergeMetrics(TestCase): + def test_merge_metrics_empty(self): + test_metric_list = [ + { + "metric": { + "pod": "pod1", + "namespace": "namespace1", + "resource": "cpu", + }, + "values": [ + [0, 10], + [60, 15], + [120, 20], + ], + }, + { + "metric": { + "pod": "pod2", + "namespace": "namespace1", + "resource": "cpu", + }, + "values": [ + [0, 30], + [60, 35], + [120, 40], + ], + }, + ] + expected_output_dict = { + "namespace1+pod1": { + "namespace": "namespace1", + "metrics": { + 0: {"cpu": 10}, + 60: {"cpu": 15}, + 120: {"cpu": 20}, + }, + }, + "namespace1+pod2": { + "namespace": "namespace1", + "metrics": { + 0: {"cpu": 30}, + 60: {"cpu": 35}, + 120: {"cpu": 40}, + }, + }, + } + processor = metrics_processor.MetricsProcessor() + processor.merge_metrics("cpu", test_metric_list) + self.assertEqual(processor.merged_data, expected_output_dict) + + def test_merge_metrics_not_empty(self): + test_metric_list = [ + { + "metric": { + "pod": "pod1", + "namespace": "namespace1", + "resource": "mem", + }, + "values": [ + [0, 100], + [60, 150], + [120, 200], + ], + }, + { + "metric": { + "pod": "pod2", + "namespace": "namespace1", + "resource": "cpu", + }, + "values": [ + [60, 300], + ], + }, + ] + output_dict = { + "namespace1+pod1": { + "namespace": "namespace1", + "metrics": { + 0: {"cpu": 10}, + 60: {"cpu": 15}, + 120: {"cpu": 20}, + }, + }, + "namespace1+pod2": { + "namespace": "namespace1", + "metrics": { + 0: {"cpu": 30}, + 60: {"cpu": 35}, + 120: {"cpu": 40}, + }, + }, + } + expected_output_dict = { + "namespace1+pod1": { + "namespace": "namespace1", + "metrics": { + 0: {"cpu": 10, "mem": 100}, + 60: {"cpu": 15, "mem": 150}, + 120: {"cpu": 20, "mem": 200}, + }, + }, + "namespace1+pod2": { + "namespace": "namespace1", + "metrics": { + 0: {"cpu": 30}, + 60: {"cpu": 35, "mem": 300}, + 120: {"cpu": 40}, + }, + }, + } + processor = metrics_processor.MetricsProcessor(merged_data=output_dict) + processor.merge_metrics("mem", test_metric_list) + self.assertEqual(processor.merged_data, expected_output_dict) + + def test_merge_metrics_overlapping_range(self): + test_metric_list = [ + { + "metric": { + "pod": "pod1", + "namespace": "namespace1", + "resource": "cpu", + }, + "values": [ + [0, 10], + [60, 10], + [120, 10], + ], + }, + ] + test_metric_list_2 = [ + { + "metric": { + "pod": "pod1", + "namespace": "namespace1", + "resource": "cpu", + }, + "values": [ + [60, 8], + [120, 8], + [180, 10], + ], + }, + ] + expected_output_dict = { + "namespace1+pod1": { + "namespace": "namespace1", + "metrics": { + 0: {"cpu": 10}, + 60: {"cpu": 8}, + 120: {"cpu": 8}, + 180: {"cpu": 10}, + }, + }, + } + processor = metrics_processor.MetricsProcessor() + processor.merge_metrics("cpu", test_metric_list) + processor.merge_metrics("cpu", test_metric_list_2) + self.assertEqual(processor.merged_data, expected_output_dict) + + # trying to merge the same metrics again should not change anything + processor.merge_metrics("cpu", test_metric_list_2) + self.assertEqual(processor.merged_data, expected_output_dict) + + def test_merge_metrics_same_pod_name(self): + test_metric_list = [ + { + "metric": { + "pod": "podA", + "namespace": "namespace1", + "resource": "cpu", + }, + "values": [ + [0, 10], + [60, 15], + [120, 20], + ], + }, + { + "metric": { + "pod": "podA", + "namespace": "namespace2", + "resource": "cpu", + }, + "values": [ + [0, 30], + [60, 35], + [120, 40], + ], + }, + ] + expected_output_dict = { + "namespace1+podA": { + "namespace": "namespace1", + "metrics": { + 0: {"cpu": 10}, + 60: {"cpu": 15}, + 120: {"cpu": 20}, + }, + }, + "namespace2+podA": { + "namespace": "namespace2", + "metrics": { + 0: {"cpu": 30}, + 60: {"cpu": 35}, + 120: {"cpu": 40}, + }, + }, + } + processor = metrics_processor.MetricsProcessor() + processor.merge_metrics("cpu", test_metric_list) + self.assertEqual(processor.merged_data, expected_output_dict) + + def test_merge_metrics_not_empty_with_gpu(self): + test_metric_list = [ + { + "metric": { + "pod": "pod1", + "namespace": "namespace1", + "resource": "nvidia.com/gpu", + "label_nvidia_com_gpu_product": "Tesla-V100-PCIE-32GB", + }, + "values": [ + [0, 1], + [60, 1], + [120, 2], + ], + }, + ] + output_dict = { + "namespace1+pod1": { + "namespace": "namespace1", + "metrics": { + 0: {"cpu": 10}, + 60: {"cpu": 15}, + 120: {"cpu": 20}, + }, + }, + } + expected_output_dict = { + "namespace1+pod1": { + "namespace": "namespace1", + "metrics": { + 0: { + "cpu": 10, + "gpu_request": 1, + "gpu_type": "Tesla-V100-PCIE-32GB", + "gpu_resource": "nvidia.com/gpu", + }, + 60: { + "cpu": 15, + "gpu_request": 1, + "gpu_type": "Tesla-V100-PCIE-32GB", + "gpu_resource": "nvidia.com/gpu", + }, + 120: { + "cpu": 20, + "gpu_request": 2, + "gpu_type": "Tesla-V100-PCIE-32GB", + "gpu_resource": "nvidia.com/gpu", + }, + }, + }, + } + processor = metrics_processor.MetricsProcessor(merged_data=output_dict) + processor.merge_metrics("gpu_request", test_metric_list) + self.assertEqual(processor.merged_data, expected_output_dict) + + +class TestCondenseMetrics(TestCase): + def test_condense_metrics(self): + test_input_dict = { + "pod1": { + "metrics": { + 0: { + "cpu": 10, + "mem": 15, + }, + 900: { + "cpu": 10, + "mem": 15, + }, + } + }, + "pod2": { + "metrics": { + 0: { + "cpu": 2, + "mem": 256, + }, + 900: { + "cpu": 2, + "mem": 256, + }, + } + }, + } + expected_condensed_dict = { + "pod1": {"metrics": {0: {"cpu": 10, "mem": 15, "duration": 1800}}}, + "pod2": {"metrics": {0: {"cpu": 2, "mem": 256, "duration": 1800}}}, + } + processor = metrics_processor.MetricsProcessor(merged_data=test_input_dict) + condensed_dict = processor.condense_metrics(["cpu", "mem"]) + self.assertEqual(condensed_dict, expected_condensed_dict) + + def test_condense_metrics_no_interval(self): + test_input_dict = { + "pod1": { + "metrics": { + 0: { + "cpu": 10, + "mem": 15, + } + } + }, + } + expected_condensed_dict = { + "pod1": {"metrics": {0: {"cpu": 10, "mem": 15, "duration": 900}}}, + } + processor = metrics_processor.MetricsProcessor(merged_data=test_input_dict) + condensed_dict = processor.condense_metrics(["cpu", "mem"]) + self.assertEqual(condensed_dict, expected_condensed_dict) + + def test_condense_metrics_with_change(self): + test_input_dict = { + "pod2": { + "metrics": { + 0: { + "cpu": 20, + "mem": 25, + }, + 900: { + "cpu": 20, + "mem": 25, + }, + 1800: { + "cpu": 25, + "mem": 25, + }, + 2700: { + "cpu": 20, + "mem": 25, + }, + } + }, + } + expected_condensed_dict = { + "pod2": { + "metrics": { + 0: {"cpu": 20, "mem": 25, "duration": 1800}, + 1800: {"cpu": 25, "mem": 25, "duration": 900}, + 2700: {"cpu": 20, "mem": 25, "duration": 900}, + } + }, + } + processor = metrics_processor.MetricsProcessor(merged_data=test_input_dict) + condensed_dict = processor.condense_metrics(["cpu", "mem"]) + self.assertEqual(condensed_dict, expected_condensed_dict) + + def test_condense_metrics_skip_metric(self): + test_input_dict = { + "pod3": { + "metrics": { + 0: { + "cpu": 30, + "mem": 35, + "gpu": 1, + }, + 900: { + "cpu": 30, + "mem": 35, + "gpu": 2, + }, + } + } + } + expected_condensed_dict = { + "pod3": { + "metrics": {0: {"cpu": 30, "mem": 35, "gpu": 1, "duration": 1800}} + }, + } + processor = metrics_processor.MetricsProcessor(merged_data=test_input_dict) + condensed_dict = processor.condense_metrics(["cpu", "mem"]) + self.assertEqual(condensed_dict, expected_condensed_dict) + + def test_condense_metrics_with_timeskips(self): + test_input_dict = { + "pod1": { + "metrics": { + 0: { + "cpu": 1, + "mem": 4, + }, + 900: { + "cpu": 1, + "mem": 4, + }, + 1800: { + "cpu": 1, + "mem": 4, + }, + 5400: { # time skipped + "cpu": 1, + "mem": 4, + }, + 6300: { + "cpu": 1, + "mem": 4, + }, + 8100: { # metric changed and time skipped + "cpu": 2, + "mem": 8, + }, + 9000: { + "cpu": 2, + "mem": 8, + }, + } + }, + "pod2": { + "metrics": { + 0: { + "cpu": 2, + "mem": 16, + }, + 900: { + "cpu": 2, + "mem": 16, + }, + } + }, + } + expected_condensed_dict = { + "pod1": { + "metrics": { + 0: {"cpu": 1, "mem": 4, "duration": 2700}, + 5400: {"cpu": 1, "mem": 4, "duration": 1800}, + 8100: {"cpu": 2, "mem": 8, "duration": 1800}, + } + }, + "pod2": {"metrics": {0: {"cpu": 2, "mem": 16, "duration": 1800}}}, + } + processor = metrics_processor.MetricsProcessor(merged_data=test_input_dict) + condensed_dict = processor.condense_metrics(["cpu", "mem"]) + self.assertEqual(condensed_dict, expected_condensed_dict) + + def test_condense_metrics_with_changing_gpu(self): + test_input_dict = { + "pod1": { + "metrics": { + 0: { + "cpu": 1, + "mem": 4, + }, + 900: { + "cpu": 1, + "mem": 4, + }, + 1800: { # pod acquires a GPU + "cpu": 1, + "mem": 4, + "gpu_request": 1, + "gpu_type": utils.GPU_V100, + }, + 2700: { + "cpu": 1, + "mem": 4, + "gpu_request": 1, + "gpu_type": utils.GPU_V100, + }, + 3600: { # type of GPU is changed + "cpu": 1, + "mem": 4, + "gpu_request": 1, + "gpu_type": utils.GPU_A100_SXM4, + }, + 4500: { + "cpu": 1, + "mem": 4, + "gpu_request": 1, + "gpu_type": utils.GPU_A100_SXM4, + }, + 5400: { + "cpu": 1, + "mem": 4, + "gpu_request": 1, + "gpu_type": utils.GPU_A100_SXM4, + }, + 6300: { # count of GPU is changed + "cpu": 1, + "mem": 4, + "gpu_request": 3, + "gpu_type": utils.GPU_A100_SXM4, + }, + 7200: { + "cpu": 1, + "mem": 4, + "gpu_request": 3, + "gpu_type": utils.GPU_A100_SXM4, + }, + 8100: { # no longer using GPUs + "cpu": 1, + "mem": 4, + }, + } + }, + } + expected_condensed_dict = { + "pod1": { + "metrics": { + 0: {"cpu": 1, "mem": 4, "duration": 1800}, + 1800: { + "cpu": 1, + "mem": 4, + "duration": 1800, + "gpu_request": 1, + "gpu_type": utils.GPU_V100, + }, + 3600: { + "cpu": 1, + "mem": 4, + "duration": 2700, + "gpu_request": 1, + "gpu_type": utils.GPU_A100_SXM4, + }, + 6300: { + "cpu": 1, + "mem": 4, + "duration": 1800, + "gpu_request": 3, + "gpu_type": utils.GPU_A100_SXM4, + }, + 8100: { + "cpu": 1, + "mem": 4, + "duration": 900, + }, + } + }, + } + processor = metrics_processor.MetricsProcessor(merged_data=test_input_dict) + condensed_dict = processor.condense_metrics( + ["cpu", "mem", "gpu_request", "gpu_type"] + ) + self.assertEqual(condensed_dict, expected_condensed_dict) diff --git a/openshift_metrics/tests/test_utils.py b/openshift_metrics/tests/test_utils.py index e9da675..9ff3045 100644 --- a/openshift_metrics/tests/test_utils.py +++ b/openshift_metrics/tests/test_utils.py @@ -69,681 +69,6 @@ def test_get_namespace_attributes(self, mock_session, mock_post): self.assertEqual(namespaces_dict, expected_namespaces_dict) -class TestMergeMetrics(TestCase): - - def test_merge_metrics_empty(self): - test_metric_list = [ - { - "metric": { - "pod": "pod1", - "namespace": "namespace1", - "resource": "cpu", - }, - "values": [ - [0, 10], - [60, 15], - [120, 20], - ] - }, - { - "metric": { - "pod": "pod2", - "namespace": "namespace1", - "resource": "cpu", - }, - "values": [ - [0, 30], - [60, 35], - [120, 40], - ] - } - ] - expected_output_dict = { - "namespace1+pod1": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu": 10 - }, - 60: { - "cpu": 15 - }, - 120: { - "cpu": 20 - }, - } - }, - "namespace1+pod2": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu": 30 - }, - 60: { - "cpu": 35 - }, - 120: { - "cpu": 40 - }, - } - } - } - output_dict = {} - utils.merge_metrics('cpu', test_metric_list, output_dict) - self.assertEqual(output_dict, expected_output_dict) - - def test_merge_metrics_not_empty(self): - test_metric_list = [ - { - "metric": { - "pod": "pod1", - "namespace": "namespace1", - "resource": "cpu", - }, - "values": [ - [0, 100], - [60, 150], - [120, 200], - ] - }, - { - "metric": { - "pod": "pod2", - "namespace": "namespace1", - "resource": "cpu", - }, - "values": [ - [60, 300], - ] - } - ] - output_dict = { - "namespace1+pod1": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu": 10 - }, - 60: { - "cpu": 15 - }, - 120: { - "cpu": 20 - }, - } - }, - "namespace1+pod2": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu": 30 - }, - 60: { - "cpu": 35 - }, - 120: { - "cpu": 40 - }, - } - } - } - expected_output_dict = { - "namespace1+pod1": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu": 10, - "mem": 100 - }, - 60: { - "cpu": 15, - "mem": 150 - }, - 120: { - "cpu": 20, - "mem": 200 - }, - } - }, - "namespace1+pod2": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu": 30 - }, - 60: { - "cpu": 35, - "mem": 300 - }, - 120: { - "cpu": 40 - }, - } - } - } - utils.merge_metrics('mem', test_metric_list, output_dict) - self.assertEqual(output_dict, expected_output_dict) - - def test_merge_metrics_overlapping_range(self): - test_metric_list = [ - { - "metric": { - "pod": "pod1", - "namespace": "namespace1", - "resource": "cpu", - }, - "values": [ - [0, 10], - [60, 10], - [120, 10], - ] - }, - - ] - test_metric_list_2 = [ - { - "metric": { - "pod": "pod1", - "namespace": "namespace1", - "resource": "cpu", - }, - "values": [ - [60, 8], - [120, 8], - [180, 10], - ] - }, - - ] - expected_output_dict = { - "namespace1+pod1": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu": 10 - }, - 60: { - "cpu": 8 - }, - 120: { - "cpu": 8 - }, - 180: { - "cpu": 10 - }, - } - }, - } - output_dict = {} - utils.merge_metrics('cpu', test_metric_list, output_dict) - utils.merge_metrics('cpu', test_metric_list_2, output_dict) - self.assertEqual(output_dict, expected_output_dict) - - # trying to merge the same metrics again should not change anything - utils.merge_metrics('cpu', test_metric_list_2, output_dict) - self.assertEqual(output_dict, expected_output_dict) - - def test_merge_metrics_same_pod_name(self): - test_metric_list = [ - { - "metric": { - "pod": "podA", - "namespace": "namespace1", - "resource": "cpu", - }, - "values": [ - [0, 10], - [60, 15], - [120, 20], - ] - }, - { - "metric": { - "pod": "podA", - "namespace": "namespace2", - "resource": "cpu", - }, - "values": [ - [0, 30], - [60, 35], - [120, 40], - ] - } - ] - expected_output_dict = { - "namespace1+podA": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu": 10 - }, - 60: { - "cpu": 15 - }, - 120: { - "cpu": 20 - }, - } - }, - "namespace2+podA": { - "namespace": "namespace2", - "metrics": { - 0: { - "cpu": 30 - }, - 60: { - "cpu": 35 - }, - 120: { - "cpu": 40 - }, - } - } - } - output_dict = {} - utils.merge_metrics('cpu', test_metric_list, output_dict) - self.assertEqual(output_dict, expected_output_dict) - - def test_merge_metrics_not_empty_with_gpu(self): - test_metric_list = [ - { - "metric": { - "pod": "pod1", - "namespace": "namespace1", - "resource": "nvidia.com/gpu", - "label_nvidia_com_gpu_product": "Tesla-V100-PCIE-32GB" - }, - "values": [ - [0, 1], - [60, 1], - [120, 2], - ] - }, - ] - output_dict = { - "namespace1+pod1": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu": 10 - }, - 60: { - "cpu": 15 - }, - 120: { - "cpu": 20 - }, - } - }, - } - expected_output_dict = { - "namespace1+pod1": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu": 10, - "gpu_request": 1, - "gpu_type": "Tesla-V100-PCIE-32GB", - "gpu_resource": "nvidia.com/gpu", - }, - 60: { - "cpu": 15, - "gpu_request": 1, - "gpu_type": "Tesla-V100-PCIE-32GB", - "gpu_resource": "nvidia.com/gpu", - }, - 120: { - "cpu": 20, - "gpu_request": 2, - "gpu_type": "Tesla-V100-PCIE-32GB", - "gpu_resource": "nvidia.com/gpu", - }, - } - }, - } - utils.merge_metrics('gpu_request', test_metric_list, output_dict) - self.assertEqual(output_dict, expected_output_dict) - - -class TestCondenseMetrics(TestCase): - - def test_condense_metrics(self): - test_input_dict = { - "pod1": { - "metrics": { - 0: { - "cpu": 10, - "mem": 15, - }, - 900: { - "cpu": 10, - "mem": 15, - } - } - }, - "pod2": { - "metrics": { - 0: { - "cpu": 2, - "mem": 256, - }, - 900: { - "cpu": 2, - "mem": 256, - } - } - }, - } - expected_condensed_dict = { - "pod1": { - "metrics": { - 0: { - "cpu": 10, - "mem": 15, - "duration": 1800 - } - } - }, - "pod2": { - "metrics": { - 0: { - "cpu": 2, - "mem": 256, - "duration": 1800 - } - } - }, - } - condensed_dict = utils.condense_metrics(test_input_dict,['cpu','mem']) - self.assertEqual(condensed_dict, expected_condensed_dict) - - - def test_condense_metrics_no_interval(self): - test_input_dict = { - "pod1": { - "metrics": { - 0: { - "cpu": 10, - "mem": 15, - } - } - }, - } - expected_condensed_dict = { - "pod1": { - "metrics": { - 0: { - "cpu": 10, - "mem": 15, - "duration": 900 - } - } - }, - } - condensed_dict = utils.condense_metrics(test_input_dict,['cpu','mem']) - self.assertEqual(condensed_dict, expected_condensed_dict) - - def test_condense_metrics_with_change(self): - test_input_dict = { - "pod2": { - "metrics": { - 0: { - "cpu": 20, - "mem": 25, - }, - 900: { - "cpu": 20, - "mem": 25, - }, - 1800: { - "cpu": 25, - "mem": 25, - }, - 2700: { - "cpu": 20, - "mem": 25, - } - } - }, - } - expected_condensed_dict = { - "pod2": { - "metrics": { - 0: { - "cpu": 20, - "mem": 25, - "duration": 1800 - }, - 1800: { - "cpu": 25, - "mem": 25, - "duration": 900 - }, - 2700: { - "cpu": 20, - "mem": 25, - "duration": 900 - } - } - }, - } - condensed_dict = utils.condense_metrics(test_input_dict,['cpu','mem']) - self.assertEqual(condensed_dict, expected_condensed_dict) - - def test_condense_metrics_skip_metric(self): - test_input_dict = { - "pod3": { - "metrics": { - 0: { - "cpu": 30, - "mem": 35, - "gpu": 1, - }, - 900: { - "cpu": 30, - "mem": 35, - "gpu": 2, - }, - } - } - } - expected_condensed_dict = { - "pod3": { - "metrics": { - 0: { - "cpu": 30, - "mem": 35, - "gpu": 1, - "duration": 1800 - } - } - }, - } - condensed_dict = utils.condense_metrics(test_input_dict,['cpu','mem']) - self.assertEqual(condensed_dict, expected_condensed_dict) - - def test_condense_metrics_with_timeskips(self): - test_input_dict = { - "pod1": { - "metrics": { - 0: { - "cpu": 1, - "mem": 4, - }, - 900: { - "cpu": 1, - "mem": 4, - }, - 1800: { - "cpu": 1, - "mem": 4, - }, - 5400: { # time skipped - "cpu": 1, - "mem": 4, - }, - 6300: { - "cpu": 1, - "mem": 4, - }, - 8100: { # metric changed and time skipped - "cpu": 2, - "mem": 8, - }, - 9000: { - "cpu": 2, - "mem": 8, - }, - } - }, - "pod2": { - "metrics": { - 0: { - "cpu": 2, - "mem": 16, - }, - 900: { - "cpu": 2, - "mem": 16, - } - } - }, - } - expected_condensed_dict = { - "pod1": { - "metrics": { - 0: { - "cpu": 1, - "mem": 4, - "duration": 2700 - }, - 5400: { - "cpu": 1, - "mem": 4, - "duration": 1800 - }, - 8100: { - "cpu": 2, - "mem": 8, - "duration": 1800 - }, - } - }, - "pod2": { - "metrics": { - 0: { - "cpu": 2, - "mem": 16, - "duration": 1800 - } - } - }, - } - condensed_dict = utils.condense_metrics(test_input_dict,['cpu','mem']) - self.assertEqual(condensed_dict, expected_condensed_dict) - - def test_condense_metrics_with_changing_gpu(self): - test_input_dict = { - "pod1": { - "metrics": { - 0: { - "cpu": 1, - "mem": 4, - }, - 900: { - "cpu": 1, - "mem": 4, - }, - 1800: { # pod acquires a GPU - "cpu": 1, - "mem": 4, - "gpu_request": 1, - "gpu_type": utils.GPU_V100, - }, - 2700: { - "cpu": 1, - "mem": 4, - "gpu_request": 1, - "gpu_type": utils.GPU_V100, - }, - 3600: { # type of GPU is changed - "cpu": 1, - "mem": 4, - "gpu_request": 1, - "gpu_type": utils.GPU_A100_SXM4, - }, - 4500: { - "cpu": 1, - "mem": 4, - "gpu_request": 1, - "gpu_type": utils.GPU_A100_SXM4, - }, - 5400: { - "cpu": 1, - "mem": 4, - "gpu_request": 1, - "gpu_type": utils.GPU_A100_SXM4, - }, - 6300: { # count of GPU is changed - "cpu": 1, - "mem": 4, - "gpu_request": 3, - "gpu_type": utils.GPU_A100_SXM4, - }, - 7200: { - "cpu": 1, - "mem": 4, - "gpu_request": 3, - "gpu_type": utils.GPU_A100_SXM4, - }, - 8100: { # no longer using GPUs - "cpu": 1, - "mem": 4, - }, - } - }, - } - expected_condensed_dict = { - "pod1": { - "metrics": { - 0: { - "cpu": 1, - "mem": 4, - "duration": 1800 - }, - 1800: { - "cpu": 1, - "mem": 4, - "duration": 1800, - "gpu_request": 1, - "gpu_type": utils.GPU_V100, - }, - 3600: { - "cpu": 1, - "mem": 4, - "duration": 2700, - "gpu_request": 1, - "gpu_type": utils.GPU_A100_SXM4, - }, - 6300: { - "cpu": 1, - "mem": 4, - "duration": 1800, - "gpu_request": 3, - "gpu_type": utils.GPU_A100_SXM4, - }, - 8100: { - "cpu": 1, - "mem": 4, - "duration": 900, - }, - } - }, - } - condensed_dict = utils.condense_metrics(test_input_dict,['cpu','mem', 'gpu_request', 'gpu_type']) - self.assertEqual(condensed_dict, expected_condensed_dict) - - class TestWriteMetricsByPod(TestCase): @mock.patch('openshift_metrics.utils.get_namespace_attributes') diff --git a/openshift_metrics/utils.py b/openshift_metrics/utils.py index a5b816e..aae6d1f 100755 --- a/openshift_metrics/utils.py +++ b/openshift_metrics/utils.py @@ -207,123 +207,6 @@ def get_service_unit(cpu_count, memory_count, gpu_count, gpu_type, gpu_resource) return su_type, su_count, determining_resource -def merge_metrics(metric_name, metric_list, output_dict): - """ - Merge metrics by pod but since pod names aren't guaranteed to be unique across - namespaces, we combine the namespace and podname together when generating the - output dictionary so it contains all pods. - """ - - for metric in metric_list: - pod = metric["metric"]["pod"] - namespace = metric["metric"]["namespace"] - node = metric["metric"].get("node") - - gpu_type = None - gpu_resource = None - node_model = None - - unique_name = namespace + "+" + pod - if unique_name not in output_dict: - output_dict[unique_name] = {"namespace": namespace, "metrics": {}} - - if metric_name == "gpu_request": - gpu_type = metric["metric"].get("label_nvidia_com_gpu_product", GPU_UNKNOWN_TYPE) - gpu_resource = metric["metric"].get("resource") - node_model = metric["metric"].get("label_nvidia_com_gpu_machine") - - for value in metric["values"]: - epoch_time = value[0] - if epoch_time not in output_dict[unique_name]["metrics"]: - output_dict[unique_name]["metrics"][epoch_time] = {} - output_dict[unique_name]["metrics"][epoch_time][metric_name] = value[1] - if gpu_type: - output_dict[unique_name]["metrics"][epoch_time]['gpu_type'] = gpu_type - if gpu_resource: - output_dict[unique_name]["metrics"][epoch_time]['gpu_resource'] = gpu_resource - if node_model: - output_dict[unique_name]["metrics"][epoch_time]['node_model'] = node_model - if node: - output_dict[unique_name]["metrics"][epoch_time]['node'] = node - - return output_dict - - -def condense_metrics(input_metrics_dict, metrics_to_check): - """ - Checks if the value of metrics is the same, and removes redundant - metrics while updating the duration. If there's a gap in the reported - metrics then don't count that as part of duration. - - Here's a sample input dictionary in which I have separated missing metrics - or different metrics by empty lines. - - {'naved-test+test-pod': {'gpu_type': 'No GPU', - 'metrics': {1711741500: {'cpu_request': '1', - 'memory_request': '3221225472'}, - 1711742400: {'cpu_request': '1', - 'memory_request': '3221225472'}, - 1711743300: {'cpu_request': '1', - 'memory_request': '3221225472'}, - 1711744200: {'cpu_request': '1', - 'memory_request': '3221225472'}, - - 1711746000: {'cpu_request': '1', - 'memory_request': '3221225472'}, - - 1711746900: {'cpu_request': '1', - 'memory_request': '4294967296'}, - 1711747800: {'cpu_request': '1', - 'memory_request': '4294967296'}, - 1711748700: {'cpu_request': '1', - 'memory_request': '4294967296'}, - - 1711765800: {'cpu_request': '1', - 'memory_request': '4294967296'}}, - 'namespace': 'naved-test'}} - """ - interval = STEP_MIN * 60 - condensed_dict = {} - for pod, pod_dict in input_metrics_dict.items(): - metrics_dict = pod_dict["metrics"] - new_metrics_dict = {} - epoch_times_list = sorted(metrics_dict.keys()) - - start_epoch_time = epoch_times_list[0] - - start_metric_dict = metrics_dict[start_epoch_time].copy() - - for i in range(len(epoch_times_list)): - epoch_time = epoch_times_list[i] - same_metrics = True - continuous_metrics = True - for metric in metrics_to_check: - if metrics_dict[start_epoch_time].get(metric, 0) != metrics_dict[epoch_time].get(metric, 0): # fmt: skip - same_metrics = False - - if i !=0 and epoch_time - epoch_times_list[i-1]> interval: - # i.e. if the difference between 2 consecutive timestamps - # is more than the expected frequency then the pod was stopped - continuous_metrics = False - - if not same_metrics or not continuous_metrics: - duration = epoch_times_list[i-1] - start_epoch_time + interval - start_metric_dict["duration"] = duration - new_metrics_dict[start_epoch_time] = start_metric_dict - start_epoch_time = epoch_time - start_metric_dict = metrics_dict[start_epoch_time].copy() - - duration = epoch_time - start_epoch_time + interval - start_metric_dict["duration"] = duration - new_metrics_dict[start_epoch_time] = start_metric_dict - - new_pod_dict = pod_dict.copy() - new_pod_dict["metrics"] = new_metrics_dict - condensed_dict[pod] = new_pod_dict - - return condensed_dict - - def csv_writer(rows, file_name): """Writes rows as csv to file_name""" print(f"Writing csv to {file_name}") From 98f4e33bd33ae0cd86b2a8cfc69c2f05119b9513 Mon Sep 17 00:00:00 2001 From: Naved Ansari Date: Mon, 23 Sep 2024 13:49:35 -0400 Subject: [PATCH 2/3] Restructure `merged_data` from concatenated keys to being mapped by namespace Earlier the keys were contacted `namespace` + `pod_name`. So, this commit changes the dictionary from `merged_data[namespace+pod_name]` to `merged_data[namespace][pod_name]` which I think is a better way to organize this. The tests have been updated to work with the new structure. --- openshift_metrics/metrics_processor.py | 104 +-- .../tests/test_metrics_processor.py | 645 +++++++++--------- openshift_metrics/tests/test_utils.py | 254 +++---- openshift_metrics/utils.py | 144 ++-- 4 files changed, 596 insertions(+), 551 deletions(-) diff --git a/openshift_metrics/metrics_processor.py b/openshift_metrics/metrics_processor.py index 29fac9c..433b0b5 100644 --- a/openshift_metrics/metrics_processor.py +++ b/openshift_metrics/metrics_processor.py @@ -22,9 +22,10 @@ def merge_metrics(self, metric_name, metric_list): gpu_resource = None node_model = None - unique_name = namespace + "+" + pod - if unique_name not in self.merged_data: - self.merged_data[unique_name] = {"namespace": namespace, "metrics": {}} + if namespace not in self.merged_data: + self.merged_data[namespace] = {} + if pod not in self.merged_data[namespace]: + self.merged_data[namespace][pod] = {"metrics": {}} if metric_name == "gpu_request": gpu_type = metric["metric"].get( @@ -35,25 +36,29 @@ def merge_metrics(self, metric_name, metric_list): for value in metric["values"]: epoch_time = value[0] - if epoch_time not in self.merged_data[unique_name]["metrics"]: - self.merged_data[unique_name]["metrics"][epoch_time] = {} - self.merged_data[unique_name]["metrics"][epoch_time][ + + if epoch_time not in self.merged_data[namespace][pod]["metrics"]: + self.merged_data[namespace][pod]["metrics"][epoch_time] = {} + + self.merged_data[namespace][pod]["metrics"][epoch_time][ metric_name ] = value[1] if gpu_type: - self.merged_data[unique_name]["metrics"][epoch_time][ + self.merged_data[namespace][pod]["metrics"][epoch_time][ "gpu_type" ] = gpu_type if gpu_resource: - self.merged_data[unique_name]["metrics"][epoch_time][ + self.merged_data[namespace][pod]["metrics"][epoch_time][ "gpu_resource" ] = gpu_resource if node_model: - self.merged_data[unique_name]["metrics"][epoch_time][ + self.merged_data[namespace][pod]["metrics"][epoch_time][ "node_model" ] = node_model if node: - self.merged_data[unique_name]["metrics"][epoch_time]["node"] = node + self.merged_data[namespace][pod]["metrics"][epoch_time][ + "node" + ] = node def condense_metrics(self, metrics_to_check: List[str]) -> Dict: """ @@ -64,41 +69,48 @@ def condense_metrics(self, metrics_to_check: List[str]) -> Dict: interval = self.interval_minutes * 60 condensed_dict = {} - for pod, pod_dict in self.merged_data.items(): - metrics_dict = pod_dict["metrics"] - new_metrics_dict = {} - epoch_times_list = sorted(metrics_dict.keys()) - - start_epoch_time = epoch_times_list[0] - - start_metric_dict = metrics_dict[start_epoch_time].copy() - - for i in range(len(epoch_times_list)): - epoch_time = epoch_times_list[i] - same_metrics = True - continuous_metrics = True - for metric in metrics_to_check: - if metrics_dict[start_epoch_time].get(metric, 0) != metrics_dict[epoch_time].get(metric, 0): # fmt: skip - same_metrics = False - - if i != 0 and epoch_time - epoch_times_list[i - 1] > interval: - # i.e. if the difference between 2 consecutive timestamps - # is more than the expected frequency then the pod was stopped - continuous_metrics = False - - if not same_metrics or not continuous_metrics: - duration = epoch_times_list[i - 1] - start_epoch_time + interval - start_metric_dict["duration"] = duration - new_metrics_dict[start_epoch_time] = start_metric_dict - start_epoch_time = epoch_time - start_metric_dict = metrics_dict[start_epoch_time].copy() - - duration = epoch_time - start_epoch_time + interval - start_metric_dict["duration"] = duration - new_metrics_dict[start_epoch_time] = start_metric_dict - - new_pod_dict = pod_dict.copy() - new_pod_dict["metrics"] = new_metrics_dict - condensed_dict[pod] = new_pod_dict + for namespace, pods in self.merged_data.items(): + + if namespace not in condensed_dict: + condensed_dict[namespace] = {} + + for pod, pod_dict in pods.items(): + + metrics_dict = pod_dict["metrics"] + new_metrics_dict = {} + epoch_times_list = sorted(metrics_dict.keys()) + + start_epoch_time = epoch_times_list[0] + + start_metric_dict = metrics_dict[start_epoch_time].copy() + + for i in range(len(epoch_times_list)): + epoch_time = epoch_times_list[i] + same_metrics = True + continuous_metrics = True + for metric in metrics_to_check: + # If either cpu, memory or gpu request is diferent. + if metrics_dict[start_epoch_time].get(metric, 0) != metrics_dict[epoch_time].get(metric, 0): # fmt: skip + same_metrics = False + + if i != 0 and epoch_time - epoch_times_list[i - 1] > interval: + # i.e. if the difference between 2 consecutive timestamps + # is more than the expected frequency then the pod was stopped + continuous_metrics = False + + if not same_metrics or not continuous_metrics: + duration = epoch_times_list[i - 1] - start_epoch_time + interval + start_metric_dict["duration"] = duration + new_metrics_dict[start_epoch_time] = start_metric_dict + start_epoch_time = epoch_time + start_metric_dict = metrics_dict[start_epoch_time].copy() + + duration = epoch_time - start_epoch_time + interval + start_metric_dict["duration"] = duration + new_metrics_dict[start_epoch_time] = start_metric_dict + + new_pod_dict = pod_dict.copy() + new_pod_dict["metrics"] = new_metrics_dict + condensed_dict[namespace][pod] = new_pod_dict return condensed_dict diff --git a/openshift_metrics/tests/test_metrics_processor.py b/openshift_metrics/tests/test_metrics_processor.py index bb0f2a4..72141af 100644 --- a/openshift_metrics/tests/test_metrics_processor.py +++ b/openshift_metrics/tests/test_metrics_processor.py @@ -31,22 +31,22 @@ def test_merge_metrics_empty(self): }, ] expected_output_dict = { - "namespace1+pod1": { - "namespace": "namespace1", - "metrics": { - 0: {"cpu": 10}, - 60: {"cpu": 15}, - 120: {"cpu": 20}, + "namespace1": { + "pod1": { + "metrics": { + 0: {"cpu": 10}, + 60: {"cpu": 15}, + 120: {"cpu": 20}, + }, }, - }, - "namespace1+pod2": { - "namespace": "namespace1", - "metrics": { - 0: {"cpu": 30}, - 60: {"cpu": 35}, - 120: {"cpu": 40}, + "pod2": { + "metrics": { + 0: {"cpu": 30}, + 60: {"cpu": 35}, + 120: {"cpu": 40}, + }, }, - }, + } } processor = metrics_processor.MetricsProcessor() processor.merge_metrics("cpu", test_metric_list) @@ -78,40 +78,40 @@ def test_merge_metrics_not_empty(self): }, ] output_dict = { - "namespace1+pod1": { - "namespace": "namespace1", - "metrics": { - 0: {"cpu": 10}, - 60: {"cpu": 15}, - 120: {"cpu": 20}, + "namespace1": { + "pod1": { + "metrics": { + 0: {"cpu": 10}, + 60: {"cpu": 15}, + 120: {"cpu": 20}, + }, }, - }, - "namespace1+pod2": { - "namespace": "namespace1", - "metrics": { - 0: {"cpu": 30}, - 60: {"cpu": 35}, - 120: {"cpu": 40}, + "pod2": { + "metrics": { + 0: {"cpu": 30}, + 60: {"cpu": 35}, + 120: {"cpu": 40}, + }, }, - }, + } } expected_output_dict = { - "namespace1+pod1": { - "namespace": "namespace1", - "metrics": { - 0: {"cpu": 10, "mem": 100}, - 60: {"cpu": 15, "mem": 150}, - 120: {"cpu": 20, "mem": 200}, + "namespace1": { + "pod1": { + "metrics": { + 0: {"cpu": 10, "mem": 100}, + 60: {"cpu": 15, "mem": 150}, + 120: {"cpu": 20, "mem": 200}, + }, }, - }, - "namespace1+pod2": { - "namespace": "namespace1", - "metrics": { - 0: {"cpu": 30}, - 60: {"cpu": 35, "mem": 300}, - 120: {"cpu": 40}, + "pod2": { + "metrics": { + 0: {"cpu": 30}, + 60: {"cpu": 35, "mem": 300}, + 120: {"cpu": 40}, + }, }, - }, + } } processor = metrics_processor.MetricsProcessor(merged_data=output_dict) processor.merge_metrics("mem", test_metric_list) @@ -147,15 +147,16 @@ def test_merge_metrics_overlapping_range(self): }, ] expected_output_dict = { - "namespace1+pod1": { - "namespace": "namespace1", - "metrics": { - 0: {"cpu": 10}, - 60: {"cpu": 8}, - 120: {"cpu": 8}, - 180: {"cpu": 10}, + "namespace1": { + "pod1": { + "metrics": { + 0: {"cpu": 10}, + 60: {"cpu": 8}, + 120: {"cpu": 8}, + 180: {"cpu": 10}, + }, }, - }, + } } processor = metrics_processor.MetricsProcessor() processor.merge_metrics("cpu", test_metric_list) @@ -194,20 +195,22 @@ def test_merge_metrics_same_pod_name(self): }, ] expected_output_dict = { - "namespace1+podA": { - "namespace": "namespace1", - "metrics": { - 0: {"cpu": 10}, - 60: {"cpu": 15}, - 120: {"cpu": 20}, - }, + "namespace1": { + "podA": { + "metrics": { + 0: {"cpu": 10}, + 60: {"cpu": 15}, + 120: {"cpu": 20}, + }, + } }, - "namespace2+podA": { - "namespace": "namespace2", - "metrics": { - 0: {"cpu": 30}, - 60: {"cpu": 35}, - 120: {"cpu": 40}, + "namespace2": { + "podA": { + "metrics": { + 0: {"cpu": 30}, + 60: {"cpu": 35}, + 120: {"cpu": 40}, + }, }, }, } @@ -232,39 +235,41 @@ def test_merge_metrics_not_empty_with_gpu(self): }, ] output_dict = { - "namespace1+pod1": { - "namespace": "namespace1", - "metrics": { - 0: {"cpu": 10}, - 60: {"cpu": 15}, - 120: {"cpu": 20}, + "namespace1": { + "pod1": { + "metrics": { + 0: {"cpu": 10}, + 60: {"cpu": 15}, + 120: {"cpu": 20}, + }, }, - }, + } } expected_output_dict = { - "namespace1+pod1": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu": 10, - "gpu_request": 1, - "gpu_type": "Tesla-V100-PCIE-32GB", - "gpu_resource": "nvidia.com/gpu", - }, - 60: { - "cpu": 15, - "gpu_request": 1, - "gpu_type": "Tesla-V100-PCIE-32GB", - "gpu_resource": "nvidia.com/gpu", - }, - 120: { - "cpu": 20, - "gpu_request": 2, - "gpu_type": "Tesla-V100-PCIE-32GB", - "gpu_resource": "nvidia.com/gpu", + "namespace1": { + "pod1": { + "metrics": { + 0: { + "cpu": 10, + "gpu_request": 1, + "gpu_type": "Tesla-V100-PCIE-32GB", + "gpu_resource": "nvidia.com/gpu", + }, + 60: { + "cpu": 15, + "gpu_request": 1, + "gpu_type": "Tesla-V100-PCIE-32GB", + "gpu_resource": "nvidia.com/gpu", + }, + 120: { + "cpu": 20, + "gpu_request": 2, + "gpu_type": "Tesla-V100-PCIE-32GB", + "gpu_resource": "nvidia.com/gpu", + }, }, }, - }, + } } processor = metrics_processor.MetricsProcessor(merged_data=output_dict) processor.merge_metrics("gpu_request", test_metric_list) @@ -274,34 +279,38 @@ def test_merge_metrics_not_empty_with_gpu(self): class TestCondenseMetrics(TestCase): def test_condense_metrics(self): test_input_dict = { - "pod1": { - "metrics": { - 0: { - "cpu": 10, - "mem": 15, - }, - 900: { - "cpu": 10, - "mem": 15, - }, - } - }, - "pod2": { - "metrics": { - 0: { - "cpu": 2, - "mem": 256, - }, - 900: { - "cpu": 2, - "mem": 256, - }, - } - }, + "namespace1": { + "pod1": { + "metrics": { + 0: { + "cpu": 10, + "mem": 15, + }, + 900: { + "cpu": 10, + "mem": 15, + }, + } + }, + "pod2": { + "metrics": { + 0: { + "cpu": 2, + "mem": 256, + }, + 900: { + "cpu": 2, + "mem": 256, + }, + } + }, + } } expected_condensed_dict = { - "pod1": {"metrics": {0: {"cpu": 10, "mem": 15, "duration": 1800}}}, - "pod2": {"metrics": {0: {"cpu": 2, "mem": 256, "duration": 1800}}}, + "namespace1": { + "pod1": {"metrics": {0: {"cpu": 10, "mem": 15, "duration": 1800}}}, + "pod2": {"metrics": {0: {"cpu": 2, "mem": 256, "duration": 1800}}}, + } } processor = metrics_processor.MetricsProcessor(merged_data=test_input_dict) condensed_dict = processor.condense_metrics(["cpu", "mem"]) @@ -309,17 +318,21 @@ def test_condense_metrics(self): def test_condense_metrics_no_interval(self): test_input_dict = { - "pod1": { - "metrics": { - 0: { - "cpu": 10, - "mem": 15, + "namespace1": { + "pod1": { + "metrics": { + 0: { + "cpu": 10, + "mem": 15, + } } - } - }, + }, + } } expected_condensed_dict = { - "pod1": {"metrics": {0: {"cpu": 10, "mem": 15, "duration": 900}}}, + "namespace1": { + "pod1": {"metrics": {0: {"cpu": 10, "mem": 15, "duration": 900}}}, + } } processor = metrics_processor.MetricsProcessor(merged_data=test_input_dict) condensed_dict = processor.condense_metrics(["cpu", "mem"]) @@ -327,35 +340,39 @@ def test_condense_metrics_no_interval(self): def test_condense_metrics_with_change(self): test_input_dict = { - "pod2": { - "metrics": { - 0: { - "cpu": 20, - "mem": 25, - }, - 900: { - "cpu": 20, - "mem": 25, - }, - 1800: { - "cpu": 25, - "mem": 25, - }, - 2700: { - "cpu": 20, - "mem": 25, - }, - } - }, + "namespace1": { + "pod2": { + "metrics": { + 0: { + "cpu": 20, + "mem": 25, + }, + 900: { + "cpu": 20, + "mem": 25, + }, + 1800: { + "cpu": 25, + "mem": 25, + }, + 2700: { + "cpu": 20, + "mem": 25, + }, + } + }, + } } expected_condensed_dict = { - "pod2": { - "metrics": { - 0: {"cpu": 20, "mem": 25, "duration": 1800}, - 1800: {"cpu": 25, "mem": 25, "duration": 900}, - 2700: {"cpu": 20, "mem": 25, "duration": 900}, - } - }, + "namespace1": { + "pod2": { + "metrics": { + 0: {"cpu": 20, "mem": 25, "duration": 1800}, + 1800: {"cpu": 25, "mem": 25, "duration": 900}, + 2700: {"cpu": 20, "mem": 25, "duration": 900}, + } + }, + } } processor = metrics_processor.MetricsProcessor(merged_data=test_input_dict) condensed_dict = processor.condense_metrics(["cpu", "mem"]) @@ -363,25 +380,29 @@ def test_condense_metrics_with_change(self): def test_condense_metrics_skip_metric(self): test_input_dict = { - "pod3": { - "metrics": { - 0: { - "cpu": 30, - "mem": 35, - "gpu": 1, - }, - 900: { - "cpu": 30, - "mem": 35, - "gpu": 2, - }, + "namespace1": { + "pod3": { + "metrics": { + 0: { + "cpu": 30, + "mem": 35, + "gpu": 1, + }, + 900: { + "cpu": 30, + "mem": 35, + "gpu": 2, + }, + } } } } expected_condensed_dict = { - "pod3": { - "metrics": {0: {"cpu": 30, "mem": 35, "gpu": 1, "duration": 1800}} - }, + "namespace1": { + "pod3": { + "metrics": {0: {"cpu": 30, "mem": 35, "gpu": 1, "duration": 1800}} + }, + } } processor = metrics_processor.MetricsProcessor(merged_data=test_input_dict) condensed_dict = processor.condense_metrics(["cpu", "mem"]) @@ -389,60 +410,64 @@ def test_condense_metrics_skip_metric(self): def test_condense_metrics_with_timeskips(self): test_input_dict = { - "pod1": { - "metrics": { - 0: { - "cpu": 1, - "mem": 4, - }, - 900: { - "cpu": 1, - "mem": 4, - }, - 1800: { - "cpu": 1, - "mem": 4, - }, - 5400: { # time skipped - "cpu": 1, - "mem": 4, - }, - 6300: { - "cpu": 1, - "mem": 4, - }, - 8100: { # metric changed and time skipped - "cpu": 2, - "mem": 8, - }, - 9000: { - "cpu": 2, - "mem": 8, - }, - } - }, - "pod2": { - "metrics": { - 0: { - "cpu": 2, - "mem": 16, - }, - 900: { - "cpu": 2, - "mem": 16, - }, - } - }, + "namespace1": { + "pod1": { + "metrics": { + 0: { + "cpu": 1, + "mem": 4, + }, + 900: { + "cpu": 1, + "mem": 4, + }, + 1800: { + "cpu": 1, + "mem": 4, + }, + 5400: { # time skipped + "cpu": 1, + "mem": 4, + }, + 6300: { + "cpu": 1, + "mem": 4, + }, + 8100: { # metric changed and time skipped + "cpu": 2, + "mem": 8, + }, + 9000: { + "cpu": 2, + "mem": 8, + }, + } + }, + "pod2": { + "metrics": { + 0: { + "cpu": 2, + "mem": 16, + }, + 900: { + "cpu": 2, + "mem": 16, + }, + } + }, + } } expected_condensed_dict = { - "pod1": { - "metrics": { - 0: {"cpu": 1, "mem": 4, "duration": 2700}, - 5400: {"cpu": 1, "mem": 4, "duration": 1800}, - 8100: {"cpu": 2, "mem": 8, "duration": 1800}, - } - }, - "pod2": {"metrics": {0: {"cpu": 2, "mem": 16, "duration": 1800}}}, + "namespace1": { + "pod1": { + "metrics": { + 0: {"cpu": 1, "mem": 4, "duration": 2700}, + 5400: {"cpu": 1, "mem": 4, "duration": 1800}, + 8100: {"cpu": 2, "mem": 8, "duration": 1800}, + } + }, + "pod2": {"metrics": {0: {"cpu": 2, "mem": 16, "duration": 1800}}}, + } } processor = metrics_processor.MetricsProcessor(merged_data=test_input_dict) condensed_dict = processor.condense_metrics(["cpu", "mem"]) @@ -450,97 +475,101 @@ def test_condense_metrics_with_timeskips(self): def test_condense_metrics_with_changing_gpu(self): test_input_dict = { - "pod1": { - "metrics": { - 0: { - "cpu": 1, - "mem": 4, - }, - 900: { - "cpu": 1, - "mem": 4, - }, - 1800: { # pod acquires a GPU - "cpu": 1, - "mem": 4, - "gpu_request": 1, - "gpu_type": utils.GPU_V100, - }, - 2700: { - "cpu": 1, - "mem": 4, - "gpu_request": 1, - "gpu_type": utils.GPU_V100, - }, - 3600: { # type of GPU is changed - "cpu": 1, - "mem": 4, - "gpu_request": 1, - "gpu_type": utils.GPU_A100_SXM4, - }, - 4500: { - "cpu": 1, - "mem": 4, - "gpu_request": 1, - "gpu_type": utils.GPU_A100_SXM4, - }, - 5400: { - "cpu": 1, - "mem": 4, - "gpu_request": 1, - "gpu_type": utils.GPU_A100_SXM4, - }, - 6300: { # count of GPU is changed - "cpu": 1, - "mem": 4, - "gpu_request": 3, - "gpu_type": utils.GPU_A100_SXM4, - }, - 7200: { - "cpu": 1, - "mem": 4, - "gpu_request": 3, - "gpu_type": utils.GPU_A100_SXM4, - }, - 8100: { # no longer using GPUs - "cpu": 1, - "mem": 4, - }, - } - }, + "namespace1": { + "pod1": { + "metrics": { + 0: { + "cpu": 1, + "mem": 4, + }, + 900: { + "cpu": 1, + "mem": 4, + }, + 1800: { # pod acquires a GPU + "cpu": 1, + "mem": 4, + "gpu_request": 1, + "gpu_type": utils.GPU_V100, + }, + 2700: { + "cpu": 1, + "mem": 4, + "gpu_request": 1, + "gpu_type": utils.GPU_V100, + }, + 3600: { # type of GPU is changed + "cpu": 1, + "mem": 4, + "gpu_request": 1, + "gpu_type": utils.GPU_A100_SXM4, + }, + 4500: { + "cpu": 1, + "mem": 4, + "gpu_request": 1, + "gpu_type": utils.GPU_A100_SXM4, + }, + 5400: { + "cpu": 1, + "mem": 4, + "gpu_request": 1, + "gpu_type": utils.GPU_A100_SXM4, + }, + 6300: { # count of GPU is changed + "cpu": 1, + "mem": 4, + "gpu_request": 3, + "gpu_type": utils.GPU_A100_SXM4, + }, + 7200: { + "cpu": 1, + "mem": 4, + "gpu_request": 3, + "gpu_type": utils.GPU_A100_SXM4, + }, + 8100: { # no longer using GPUs + "cpu": 1, + "mem": 4, + }, + } + }, + } } expected_condensed_dict = { - "pod1": { - "metrics": { - 0: {"cpu": 1, "mem": 4, "duration": 1800}, - 1800: { - "cpu": 1, - "mem": 4, - "duration": 1800, - "gpu_request": 1, - "gpu_type": utils.GPU_V100, - }, - 3600: { - "cpu": 1, - "mem": 4, - "duration": 2700, - "gpu_request": 1, - "gpu_type": utils.GPU_A100_SXM4, - }, - 6300: { - "cpu": 1, - "mem": 4, - "duration": 1800, - "gpu_request": 3, - "gpu_type": utils.GPU_A100_SXM4, - }, - 8100: { - "cpu": 1, - "mem": 4, - "duration": 900, - }, - } - }, + "namespace1": { + "pod1": { + "metrics": { + 0: {"cpu": 1, "mem": 4, "duration": 1800}, + 1800: { + "cpu": 1, + "mem": 4, + "duration": 1800, + "gpu_request": 1, + "gpu_type": utils.GPU_V100, + }, + 3600: { + "cpu": 1, + "mem": 4, + "duration": 2700, + "gpu_request": 1, + "gpu_type": utils.GPU_A100_SXM4, + }, + 6300: { + "cpu": 1, + "mem": 4, + "duration": 1800, + "gpu_request": 3, + "gpu_type": utils.GPU_A100_SXM4, + }, + 8100: { + "cpu": 1, + "mem": 4, + "duration": 900, + }, + } + }, + } } processor = metrics_processor.MetricsProcessor(merged_data=test_input_dict) condensed_dict = processor.condense_metrics( diff --git a/openshift_metrics/tests/test_utils.py b/openshift_metrics/tests/test_utils.py index 9ff3045..e8da8ad 100644 --- a/openshift_metrics/tests/test_utils.py +++ b/openshift_metrics/tests/test_utils.py @@ -84,65 +84,65 @@ def test_write_metrics_log(self, mock_gna): } } test_metrics_dict = { - "pod1": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu_request": 10, - "memory_request": 1048576, - "duration": 120, - "node": "wrk-1", - "node_model": "Dell", - }, - 120: { - "cpu_request": 20, - "memory_request": 1048576, - "duration": 60, - "node": "wrk-2", - "node_model": "Lenovo" + "namespace1": { + "pod1": { + "metrics": { + 0: { + "cpu_request": 10, + "memory_request": 1048576, + "duration": 120, + "node": "wrk-1", + "node_model": "Dell", + }, + 120: { + "cpu_request": 20, + "memory_request": 1048576, + "duration": 60, + "node": "wrk-2", + "node_model": "Lenovo" + } } - } - }, - "pod2": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu_request": 20, - "memory_request": 10485760, - "duration": 60 - }, - 60: { - "cpu_request": 25, - "memory_request": 10485760, - "duration": 60 - }, - 120: { - "cpu_request": 20, - "memory_request": 10485760, - "duration": 60 + }, + "pod2": { + "metrics": { + 0: { + "cpu_request": 20, + "memory_request": 10485760, + "duration": 60 + }, + 60: { + "cpu_request": 25, + "memory_request": 10485760, + "duration": 60 + }, + 120: { + "cpu_request": 20, + "memory_request": 10485760, + "duration": 60 + } } } }, - "pod3": { - "namespace": "namespace2", - "metrics": { - 0: { - "cpu_request": 45, - "memory_request": 104857600, - "duration": 180 - }, - } - }, - "pod4": { # this results in 0.5 SU - "namespace": "namespace2", - "metrics": { - 0: { - "cpu_request": 0.5, - "memory_request": 2147483648, - "duration": 3600 - }, - } - }, + "namespace2": { + "pod3": { + "metrics": { + 0: { + "cpu_request": 45, + "memory_request": 104857600, + "duration": 180 + }, + } + }, + "pod4": { # this results in 0.5 SU + "metrics": { + 0: { + "cpu_request": 0.5, + "memory_request": 2147483648, + "duration": 3600 + }, + } + }, + } } expected_output = ("Namespace,Coldfront_PI Name,Coldfront Project ID ,Pod Start Time,Pod End Time,Duration (Hours),Pod Name,CPU Request,GPU Request,GPU Type,GPU Resource,Node,Node Model,Memory Request (GiB),Determining Resource,SU Type,SU Count\n" @@ -175,73 +175,72 @@ def test_write_metrics_log(self, mock_gna): } } test_metrics_dict = { - "pod1": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu_request": 2, - "memory_request": 4 * 2**30, - "duration": 43200 - }, - 43200: { - "cpu_request": 4, - "memory_request": 4 * 2**30, - "duration": 43200 + "namespace1": { + "pod1": { + "metrics": { + 0: { + "cpu_request": 2, + "memory_request": 4 * 2**30, + "duration": 43200 + }, + 43200: { + "cpu_request": 4, + "memory_request": 4 * 2**30, + "duration": 43200 + } } - } - }, - "pod2": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu_request": 4, - "memory_request": 1 * 2**30, - "duration": 86400 - }, - 86400: { - "cpu_request": 20, - "memory_request": 1 * 2**30, - "duration": 172800 + }, + "pod2": { + "metrics": { + 0: { + "cpu_request": 4, + "memory_request": 1 * 2**30, + "duration": 86400 + }, + 86400: { + "cpu_request": 20, + "memory_request": 1 * 2**30, + "duration": 172800 + } } } }, - "pod3": { - "namespace": "namespace2", - "metrics": { - 0: { - "cpu_request": 1, - "memory_request": 8 * 2**30, - "duration": 172800 - }, - } - }, - "pod4": { - "namespace": "namespace2", - "metrics": { - 0: { - "cpu_request": 1, - "memory_request": 8 * 2**30, - "gpu_request": 1, - "gpu_type": utils.GPU_A100, - "gpu_resource": utils.WHOLE_GPU, - "duration": 172700 # little under 48 hours, expect to be rounded up in the output - }, - } - }, - "pod5": { - "namespace": "namespace2", - "gpu_type": utils.GPU_A100_SXM4, - "metrics": { - 0: { - "cpu_request": 24, - "memory_request": 8 * 2**30, - "gpu_request": 1, - "gpu_type": utils.GPU_A100_SXM4, - "gpu_resource": utils.WHOLE_GPU, - "duration": 172800 - }, - } + "namespace2": { + "pod3": { + "metrics": { + 0: { + "cpu_request": 1, + "memory_request": 8 * 2**30, + "duration": 172800 + }, + } + }, + "pod4": { + "metrics": { + 0: { + "cpu_request": 1, + "memory_request": 8 * 2**30, + "gpu_request": 1, + "gpu_type": utils.GPU_A100, + "gpu_resource": utils.WHOLE_GPU, + "duration": 172700 # little under 48 hours, expect to be rounded up in the output + }, + } + }, + "pod5": { + "gpu_type": utils.GPU_A100_SXM4, + "metrics": { + 0: { + "cpu_request": 24, + "memory_request": 8 * 2**30, + "gpu_request": 1, + "gpu_type": utils.GPU_A100_SXM4, + "gpu_resource": utils.WHOLE_GPU, + "duration": 172800 + }, + } }, + } } expected_output = ("Invoice Month,Project - Allocation,Project - Allocation ID,Manager (PI),Invoice Email,Invoice Address,Institution,Institution - Specific Code,SU Hours (GBhr or SUhr),SU Type,Rate,Cost\n" @@ -275,16 +274,19 @@ def test_write_metrics_by_namespace_decimal(self, mock_gna): rate = 0.013 test_metrics_dict = { - "pod1": { - "namespace": "namespace1", - "metrics": { - 0: { - "cpu_request": 1, - "memory_request": 4 * 2**30, - "duration": 35*3600 - }, + "namespace1": { + "pod1": { + "namespace": "namespace1", + "metrics": { + 0: { + "cpu_request": 1, + "memory_request": 4 * 2**30, + "duration": 35*3600 + }, + } } - }} + } + } cost = round(duration*rate,2) self.assertEqual(cost, 0.45) diff --git a/openshift_metrics/utils.py b/openshift_metrics/utils.py index aae6d1f..95117b5 100755 --- a/openshift_metrics/utils.py +++ b/openshift_metrics/utils.py @@ -259,11 +259,9 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month): rows.append(headers) - for pod, pod_dict in condensed_metrics_dict.items(): - namespace = pod_dict["namespace"] - pod_metrics_dict = pod_dict["metrics"] - namespace_annotation_dict = namespace_annotations.get(namespace, {}) + for namespace, pods in condensed_metrics_dict.items(): + namespace_annotation_dict = namespace_annotations.get(namespace, {}) cf_pi = namespace_annotation_dict.get("cf_pi") cf_institution_code = namespace_annotation_dict.get("institution_code") @@ -281,26 +279,30 @@ def write_metrics_by_namespace(condensed_metrics_dict, file_name, report_month): "total_cost": 0, } - for epoch_time, pod_metric_dict in pod_metrics_dict.items(): - duration_in_hours = Decimal(pod_metric_dict["duration"]) / 3600 - cpu_request = Decimal(pod_metric_dict.get("cpu_request", 0)) - gpu_request = Decimal(pod_metric_dict.get("gpu_request", 0)) - gpu_type = pod_metric_dict.get("gpu_type") - gpu_resource = pod_metric_dict.get("gpu_resource") - memory_request = Decimal(pod_metric_dict.get("memory_request", 0)) / 2**30 - - _, su_count, _ = get_service_unit(cpu_request, memory_request, gpu_request, gpu_type, gpu_resource) - - if gpu_type == GPU_A100: - metrics_by_namespace[namespace]["SU_A100_GPU_HOURS"] += su_count * duration_in_hours - elif gpu_type == GPU_A100_SXM4: - metrics_by_namespace[namespace]["SU_A100_SXM4_GPU_HOURS"] += su_count * duration_in_hours - elif gpu_type == GPU_V100: - metrics_by_namespace[namespace]["SU_V100_GPU_HOURS"] += su_count * duration_in_hours - elif gpu_type == GPU_UNKNOWN_TYPE: - metrics_by_namespace[namespace]["SU_UNKNOWN_GPU_HOURS"] += su_count * duration_in_hours - else: - metrics_by_namespace[namespace]["SU_CPU_HOURS"] += su_count * duration_in_hours + for pod, pod_dict in pods.items(): + + pod_metrics_dict = pod_dict["metrics"] + + for epoch_time, pod_metric_dict in pod_metrics_dict.items(): + duration_in_hours = Decimal(pod_metric_dict["duration"]) / 3600 + cpu_request = Decimal(pod_metric_dict.get("cpu_request", 0)) + gpu_request = Decimal(pod_metric_dict.get("gpu_request", 0)) + gpu_type = pod_metric_dict.get("gpu_type") + gpu_resource = pod_metric_dict.get("gpu_resource") + memory_request = Decimal(pod_metric_dict.get("memory_request", 0)) / 2**30 + + _, su_count, _ = get_service_unit(cpu_request, memory_request, gpu_request, gpu_type, gpu_resource) + + if gpu_type == GPU_A100: + metrics_by_namespace[namespace]["SU_A100_GPU_HOURS"] += su_count * duration_in_hours + elif gpu_type == GPU_A100_SXM4: + metrics_by_namespace[namespace]["SU_A100_SXM4_GPU_HOURS"] += su_count * duration_in_hours + elif gpu_type == GPU_V100: + metrics_by_namespace[namespace]["SU_V100_GPU_HOURS"] += su_count * duration_in_hours + elif gpu_type == GPU_UNKNOWN_TYPE: + metrics_by_namespace[namespace]["SU_UNKNOWN_GPU_HOURS"] += su_count * duration_in_hours + else: + metrics_by_namespace[namespace]["SU_CPU_HOURS"] += su_count * duration_in_hours for namespace, metrics in metrics_by_namespace.items(): @@ -360,52 +362,52 @@ def write_metrics_by_pod(metrics_dict, file_name): ] rows.append(headers) - for pod, pod_dict in metrics_dict.items(): - namespace = pod_dict["namespace"] - pod_metrics_dict = pod_dict["metrics"] - namespace_annotation_dict = namespace_annotations.get(namespace, {}) - cf_pi = namespace_annotation_dict.get("cf_pi") - cf_project_id = namespace_annotation_dict.get("cf_project_id") - - for epoch_time, pod_metric_dict in pod_metrics_dict.items(): - start_time = datetime.datetime.utcfromtimestamp(float(epoch_time)).strftime( - "%Y-%m-%dT%H:%M:%S" - ) - end_time = datetime.datetime.utcfromtimestamp( - float(epoch_time + pod_metric_dict["duration"]) - ).strftime("%Y-%m-%dT%H:%M:%S") - duration = (Decimal(pod_metric_dict["duration"]) / 3600).quantize(Decimal(".0001"), rounding=decimal.ROUND_HALF_UP) - cpu_request = Decimal(pod_metric_dict.get("cpu_request", 0)) - gpu_request = Decimal(pod_metric_dict.get("gpu_request", 0)) - gpu_type = pod_metric_dict.get("gpu_type") - gpu_resource = pod_metric_dict.get("gpu_resource") - node = pod_metric_dict.get("node", "Unknown Node") - node_model = pod_metric_dict.get("node_model", "Unknown Model") - memory_request = (Decimal(pod_metric_dict.get("memory_request", 0)) / 2**30).quantize(Decimal(".0001"), rounding=decimal.ROUND_HALF_UP) - su_type, su_count, determining_resource = get_service_unit( - cpu_request, memory_request, gpu_request, gpu_type, gpu_resource - ) - - info_list = [ - namespace, - cf_pi, - cf_project_id, - start_time, - end_time, - duration, - pod, - cpu_request, - gpu_request, - gpu_type, - gpu_resource, - node, - node_model, - memory_request, - determining_resource, - su_type, - su_count, - ] - - rows.append(info_list) + for namespace, pods in metrics_dict.items(): + for pod, pod_dict in pods.items(): + pod_metrics_dict = pod_dict["metrics"] + namespace_annotation_dict = namespace_annotations.get(namespace, {}) + cf_pi = namespace_annotation_dict.get("cf_pi") + cf_project_id = namespace_annotation_dict.get("cf_project_id") + + for epoch_time, pod_metric_dict in pod_metrics_dict.items(): + start_time = datetime.datetime.utcfromtimestamp(float(epoch_time)).strftime( + "%Y-%m-%dT%H:%M:%S" + ) + end_time = datetime.datetime.utcfromtimestamp( + float(epoch_time + pod_metric_dict["duration"]) + ).strftime("%Y-%m-%dT%H:%M:%S") + duration = (Decimal(pod_metric_dict["duration"]) / 3600).quantize(Decimal(".0001"), rounding=decimal.ROUND_HALF_UP) + cpu_request = Decimal(pod_metric_dict.get("cpu_request", 0)) + gpu_request = Decimal(pod_metric_dict.get("gpu_request", 0)) + gpu_type = pod_metric_dict.get("gpu_type") + gpu_resource = pod_metric_dict.get("gpu_resource") + node = pod_metric_dict.get("node", "Unknown Node") + node_model = pod_metric_dict.get("node_model", "Unknown Model") + memory_request = (Decimal(pod_metric_dict.get("memory_request", 0)) / 2**30).quantize(Decimal(".0001"), rounding=decimal.ROUND_HALF_UP) + su_type, su_count, determining_resource = get_service_unit( + cpu_request, memory_request, gpu_request, gpu_type, gpu_resource + ) + + info_list = [ + namespace, + cf_pi, + cf_project_id, + start_time, + end_time, + duration, + pod, + cpu_request, + gpu_request, + gpu_type, + gpu_resource, + node, + node_model, + memory_request, + determining_resource, + su_type, + su_count, + ] + + rows.append(info_list) csv_writer(rows, file_name) From e6837e9eccf200a5ad8134f787e2f562f4c4c8b7 Mon Sep 17 00:00:00 2001 From: Naved Ansari Date: Tue, 24 Sep 2024 10:42:23 -0400 Subject: [PATCH 3/3] Use setdefault to simplify things --- openshift_metrics/metrics_processor.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/openshift_metrics/metrics_processor.py b/openshift_metrics/metrics_processor.py index 433b0b5..3132018 100644 --- a/openshift_metrics/metrics_processor.py +++ b/openshift_metrics/metrics_processor.py @@ -22,10 +22,8 @@ def merge_metrics(self, metric_name, metric_list): gpu_resource = None node_model = None - if namespace not in self.merged_data: - self.merged_data[namespace] = {} - if pod not in self.merged_data[namespace]: - self.merged_data[namespace][pod] = {"metrics": {}} + self.merged_data.setdefault(namespace, {}) + self.merged_data[namespace].setdefault(pod, {"metrics": {}}) if metric_name == "gpu_request": gpu_type = metric["metric"].get( @@ -37,8 +35,7 @@ def merge_metrics(self, metric_name, metric_list): for value in metric["values"]: epoch_time = value[0] - if epoch_time not in self.merged_data[namespace][pod]["metrics"]: - self.merged_data[namespace][pod]["metrics"][epoch_time] = {} + self.merged_data[namespace][pod]["metrics"].setdefault(epoch_time, {}) self.merged_data[namespace][pod]["metrics"][epoch_time][ metric_name @@ -71,8 +68,7 @@ def condense_metrics(self, metrics_to_check: List[str]) -> Dict: for namespace, pods in self.merged_data.items(): - if namespace not in condensed_dict: - condensed_dict[namespace] = {} + condensed_dict.setdefault(namespace, {}) for pod, pod_dict in pods.items():