From dfe90dbdcf677c55bd1b1b180b7490c8436595a5 Mon Sep 17 00:00:00 2001 From: Peter Nied Date: Fri, 13 Dec 2024 09:49:38 -0600 Subject: [PATCH 1/2] Capture memory leak failures during tests (#1188) Tests will automatically save memory dumps when out of memory issues occur in java test cases. Updated CI to also pick these up for troubleshooting as needed. Signed-off-by: Peter Nied --- .github/workflows/CI.yml | 18 ++++++++++++++++++ build.gradle | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 8a2ae206d..163cdeeb3 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -121,6 +121,24 @@ jobs: env: OS_MIGRATIONS_GRADLE_SCAN_TOS_AGREE_AND_ENABLED: '' + - name: Detect Memory Dumps + if: failure() + run: | + if find . -type f -name "*.hprof" | grep -q '.'; then + echo "::group::Memory Dumps Detected" + echo "::warning::Memory dumps were found and uploaded as artifacts. Review these files to diagnose OOM issues." + echo "To download and inspect these files, navigate to 'Actions' -> 'Artifacts'." + echo "::endgroup::" + fi + + - name: Upload memory dump + if: failure() + uses: actions/upload-artifact@v4 + with: + if-no-files-found: ignore + name: memory-dumps + path: ./**/*.hprof + - uses: actions/upload-artifact@v4 if: always() with: diff --git a/build.gradle b/build.gradle index 232d8ba32..f03862c97 100644 --- a/build.gradle +++ b/build.gradle @@ -127,7 +127,7 @@ subprojects { systemProperty 'junit.jupiter.execution.parallel.enabled', 'false' systemProperty 'log4j2.contextSelector', 'org.apache.logging.log4j.core.selector.BasicContextSelector' // Verify assertions in tests - jvmArgs '-ea' + jvmArgs = ['-ea', '-XX:+HeapDumpOnOutOfMemoryError'] jacoco.enabled = true } From d45f4f34fd86c965d4d739bac2127bb7ec08a062 Mon Sep 17 00:00:00 2001 From: Andre Kurait Date: Fri, 13 Dec 2024 12:42:04 -0600 Subject: [PATCH 2/2] Fix OSB on specific working workload commit (#1202) * Fix OSB on specific working workload commit --------- Signed-off-by: Andre Kurait --- .../elasticsearchTestConsole/runTestBenchmarks.sh | 12 ++++++++---- .../lib/console_link/console_link/models/cluster.py | 6 ++++-- .../lib/console_link/tests/test_cluster.py | 12 ++++++++---- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/TrafficCapture/dockerSolution/src/main/docker/elasticsearchTestConsole/runTestBenchmarks.sh b/TrafficCapture/dockerSolution/src/main/docker/elasticsearchTestConsole/runTestBenchmarks.sh index d2709a1af..cb75ad911 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/elasticsearchTestConsole/runTestBenchmarks.sh +++ b/TrafficCapture/dockerSolution/src/main/docker/elasticsearchTestConsole/runTestBenchmarks.sh @@ -83,12 +83,16 @@ client_options=$(IFS=,; echo "${options[*]}") set -o xtrace +# Newer OSB Workload revisions fail with single node cluster that is persistently in yellow state +# See https://github.com/opensearch-project/opensearch-migrations/pull/1202 +workload_revision="440ce4b1fc8832b6b7673bdcec948cce3ee87e7e" + echo "Running opensearch-benchmark workloads against ${endpoint}" echo "Running opensearch-benchmark w/ 'geonames' workload..." && -pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --target-host=$endpoint --workload=geonames --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1" --client-options=$client_options && +pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --workload-revision=$workload_revision --target-host=$endpoint --workload=geonames --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1" --client-options=$client_options && echo "Running opensearch-benchmark w/ 'http_logs' workload..." && -pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --target-host=$endpoint --workload=http_logs --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1" --client-options=$client_options && +pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --workload-revision=$workload_revision --target-host=$endpoint --workload=http_logs --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1" --client-options=$client_options && echo "Running opensearch-benchmark w/ 'nested' workload..." && -pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --target-host=$endpoint --workload=nested --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1" --client-options=$client_options && +pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --workload-revision=$workload_revision --target-host=$endpoint --workload=nested --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1" --client-options=$client_options && echo "Running opensearch-benchmark w/ 'nyc_taxis' workload..." && -pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --target-host=$endpoint --workload=nyc_taxis --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1" --client-options=$client_options \ No newline at end of file +pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --workload-revision=$workload_revision --target-host=$endpoint --workload=nyc_taxis --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1" --client-options=$client_options \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/console_link/models/cluster.py b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/console_link/models/cluster.py index 01dec1824..0a91ecaaa 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/console_link/models/cluster.py +++ b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/console_link/models/cluster.py @@ -190,9 +190,11 @@ def execute_benchmark_workload(self, workload: str, raise NotImplementedError(f"Auth type {self.auth_type} is not currently support for executing " f"benchmark workloads") # Note -- we should censor the password when logging this command - logger.info(f"Running opensearch-benchmark with '{workload}' workload") + workload_revision = "440ce4b1fc8832b6b7673bdcec948cce3ee87e7e" + logger.info(f"Running opensearch-benchmark with '{workload}' workload and revision '{workload_revision}'") command = (f"opensearch-benchmark execute-test --distribution-version=1.0.0 --target-host={self.endpoint} " - f"--workload={workload} --pipeline=benchmark-only --test-mode --kill-running-processes " + f"--workload={workload} --workload-revision={workload_revision} --pipeline=benchmark-only " + "--test-mode --kill-running-processes " f"--workload-params={workload_params} --client-options={client_options}") # While a little wordier, this apprach prevents us from censoring the password if it appears in other contexts, # e.g. username:admin,password:admin. diff --git a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/tests/test_cluster.py b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/tests/test_cluster.py index b87fcb9d8..1ea9b8913 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/tests/test_cluster.py +++ b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/tests/test_cluster.py @@ -389,8 +389,10 @@ def test_run_benchmark_executes_correctly_no_auth(mocker): mock = mocker.patch("subprocess.run", autospec=True) workload = "nyctaxis" cluster.execute_benchmark_workload(workload=workload) - mock.assert_called_once_with("opensearch-benchmark execute-test --distribution-version=1.0.0 " - f"--target-host={cluster.endpoint} --workload={workload} --pipeline=benchmark-only" + mock.assert_called_once_with("opensearch-benchmark execute-test --distribution-version=1.0.0" + f" --target-host={cluster.endpoint} --workload={workload}" + f" --workload-revision=440ce4b1fc8832b6b7673bdcec948cce3ee87e7e" + " --pipeline=benchmark-only" " --test-mode --kill-running-processes --workload-params=target_throughput:0.5," "bulk_size:10,bulk_indexing_clients:1,search_clients:1 " "--client-options=verify_certs:false", shell=True) @@ -411,8 +413,10 @@ def test_run_benchmark_executes_correctly_basic_auth_and_https(mocker): mock = mocker.patch("subprocess.run", autospec=True) workload = "nyctaxis" cluster.execute_benchmark_workload(workload=workload) - mock.assert_called_once_with("opensearch-benchmark execute-test --distribution-version=1.0.0 " - f"--target-host={cluster.endpoint} --workload={workload} --pipeline=benchmark-only" + mock.assert_called_once_with("opensearch-benchmark execute-test --distribution-version=1.0.0" + f" --target-host={cluster.endpoint} --workload={workload}" + f" --workload-revision=440ce4b1fc8832b6b7673bdcec948cce3ee87e7e" + " --pipeline=benchmark-only" " --test-mode --kill-running-processes --workload-params=target_throughput:0.5," "bulk_size:10,bulk_indexing_clients:1,search_clients:1 " "--client-options=verify_certs:false,use_ssl:true,"