From dfe90dbdcf677c55bd1b1b180b7490c8436595a5 Mon Sep 17 00:00:00 2001
From: Peter Nied <petern@amazon.com>
Date: Fri, 13 Dec 2024 09:49:38 -0600
Subject: [PATCH 1/2] Capture memory leak failures during tests (#1188)

Tests will automatically save memory dumps when out of memory issues
occur in java test cases. Updated CI to also pick these up for
troubleshooting as needed.

Signed-off-by: Peter Nied <peternied@hotmail.com>
---
 .github/workflows/CI.yml | 18 ++++++++++++++++++
 build.gradle             |  2 +-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 8a2ae206d..163cdeeb3 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -121,6 +121,24 @@ jobs:
         env:
           OS_MIGRATIONS_GRADLE_SCAN_TOS_AGREE_AND_ENABLED: ''
 
+      - name: Detect Memory Dumps
+        if: failure()
+        run: |
+          if find . -type f -name "*.hprof" | grep -q '.'; then
+            echo "::group::Memory Dumps Detected"
+            echo "::warning::Memory dumps were found and uploaded as artifacts. Review these files to diagnose OOM issues."
+            echo "To download and inspect these files, navigate to 'Actions' -> 'Artifacts'."
+            echo "::endgroup::"
+          fi
+
+      - name: Upload memory dump
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          if-no-files-found: ignore
+          name: memory-dumps
+          path: ./**/*.hprof
+
       - uses: actions/upload-artifact@v4
         if: always()
         with:
diff --git a/build.gradle b/build.gradle
index 232d8ba32..f03862c97 100644
--- a/build.gradle
+++ b/build.gradle
@@ -127,7 +127,7 @@ subprojects {
         systemProperty 'junit.jupiter.execution.parallel.enabled', 'false'
         systemProperty 'log4j2.contextSelector', 'org.apache.logging.log4j.core.selector.BasicContextSelector'
         // Verify assertions in tests
-        jvmArgs '-ea'
+        jvmArgs = ['-ea', '-XX:+HeapDumpOnOutOfMemoryError']
         jacoco.enabled = true
     }
 

From d45f4f34fd86c965d4d739bac2127bb7ec08a062 Mon Sep 17 00:00:00 2001
From: Andre Kurait <akurait@amazon.com>
Date: Fri, 13 Dec 2024 12:42:04 -0600
Subject: [PATCH 2/2] Fix OSB on specific working workload commit (#1202)

* Fix OSB on specific working workload commit

---------

Signed-off-by: Andre Kurait <akurait@amazon.com>
---
 .../elasticsearchTestConsole/runTestBenchmarks.sh    | 12 ++++++++----
 .../lib/console_link/console_link/models/cluster.py  |  6 ++++--
 .../lib/console_link/tests/test_cluster.py           | 12 ++++++++----
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/TrafficCapture/dockerSolution/src/main/docker/elasticsearchTestConsole/runTestBenchmarks.sh b/TrafficCapture/dockerSolution/src/main/docker/elasticsearchTestConsole/runTestBenchmarks.sh
index d2709a1af..cb75ad911 100644
--- a/TrafficCapture/dockerSolution/src/main/docker/elasticsearchTestConsole/runTestBenchmarks.sh
+++ b/TrafficCapture/dockerSolution/src/main/docker/elasticsearchTestConsole/runTestBenchmarks.sh
@@ -83,12 +83,16 @@ client_options=$(IFS=,; echo "${options[*]}")
 
 set -o xtrace
 
+# Newer OSB Workload revisions fail with single node cluster that is persistently in yellow state
+# See https://github.com/opensearch-project/opensearch-migrations/pull/1202
+workload_revision="440ce4b1fc8832b6b7673bdcec948cce3ee87e7e"
+
 echo "Running opensearch-benchmark workloads against ${endpoint}"
 echo "Running opensearch-benchmark w/ 'geonames' workload..." &&
-pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --target-host=$endpoint --workload=geonames --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1"  --client-options=$client_options &&
+pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --workload-revision=$workload_revision --target-host=$endpoint --workload=geonames --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1"  --client-options=$client_options &&
 echo "Running opensearch-benchmark w/ 'http_logs' workload..." &&
-pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --target-host=$endpoint --workload=http_logs --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1" --client-options=$client_options &&
+pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --workload-revision=$workload_revision --target-host=$endpoint --workload=http_logs --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1" --client-options=$client_options &&
 echo "Running opensearch-benchmark w/ 'nested' workload..." &&
-pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --target-host=$endpoint --workload=nested --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1"  --client-options=$client_options &&
+pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --workload-revision=$workload_revision --target-host=$endpoint --workload=nested --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1"  --client-options=$client_options &&
 echo "Running opensearch-benchmark w/ 'nyc_taxis' workload..." &&
-pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --target-host=$endpoint --workload=nyc_taxis --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1"  --client-options=$client_options
\ No newline at end of file
+pipenv run opensearch-benchmark execute-test --distribution-version=1.0.0 --workload-revision=$workload_revision --target-host=$endpoint --workload=nyc_taxis --pipeline=benchmark-only --test-mode --kill-running-processes --workload-params "target_throughput:0.5,bulk_size:10,bulk_indexing_clients:1,search_clients:1"  --client-options=$client_options
\ No newline at end of file
diff --git a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/console_link/models/cluster.py b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/console_link/models/cluster.py
index 01dec1824..0a91ecaaa 100644
--- a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/console_link/models/cluster.py
+++ b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/console_link/models/cluster.py
@@ -190,9 +190,11 @@ def execute_benchmark_workload(self, workload: str,
             raise NotImplementedError(f"Auth type {self.auth_type} is not currently support for executing "
                                       f"benchmark workloads")
         # Note -- we should censor the password when logging this command
-        logger.info(f"Running opensearch-benchmark with '{workload}' workload")
+        workload_revision = "440ce4b1fc8832b6b7673bdcec948cce3ee87e7e"
+        logger.info(f"Running opensearch-benchmark with '{workload}' workload and revision '{workload_revision}'")
         command = (f"opensearch-benchmark execute-test --distribution-version=1.0.0 --target-host={self.endpoint} "
-                   f"--workload={workload} --pipeline=benchmark-only --test-mode --kill-running-processes "
+                   f"--workload={workload} --workload-revision={workload_revision} --pipeline=benchmark-only "
+                   "--test-mode --kill-running-processes "
                    f"--workload-params={workload_params} --client-options={client_options}")
         # While a little wordier, this apprach prevents us from censoring the password if it appears in other contexts,
         # e.g. username:admin,password:admin.
diff --git a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/tests/test_cluster.py b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/tests/test_cluster.py
index b87fcb9d8..1ea9b8913 100644
--- a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/tests/test_cluster.py
+++ b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/lib/console_link/tests/test_cluster.py
@@ -389,8 +389,10 @@ def test_run_benchmark_executes_correctly_no_auth(mocker):
     mock = mocker.patch("subprocess.run", autospec=True)
     workload = "nyctaxis"
     cluster.execute_benchmark_workload(workload=workload)
-    mock.assert_called_once_with("opensearch-benchmark execute-test --distribution-version=1.0.0 "
-                                 f"--target-host={cluster.endpoint} --workload={workload} --pipeline=benchmark-only"
+    mock.assert_called_once_with("opensearch-benchmark execute-test --distribution-version=1.0.0"
+                                 f" --target-host={cluster.endpoint} --workload={workload}"
+                                 f" --workload-revision=440ce4b1fc8832b6b7673bdcec948cce3ee87e7e"
+                                 " --pipeline=benchmark-only"
                                  " --test-mode --kill-running-processes --workload-params=target_throughput:0.5,"
                                  "bulk_size:10,bulk_indexing_clients:1,search_clients:1 "
                                  "--client-options=verify_certs:false", shell=True)
@@ -411,8 +413,10 @@ def test_run_benchmark_executes_correctly_basic_auth_and_https(mocker):
     mock = mocker.patch("subprocess.run", autospec=True)
     workload = "nyctaxis"
     cluster.execute_benchmark_workload(workload=workload)
-    mock.assert_called_once_with("opensearch-benchmark execute-test --distribution-version=1.0.0 "
-                                 f"--target-host={cluster.endpoint} --workload={workload} --pipeline=benchmark-only"
+    mock.assert_called_once_with("opensearch-benchmark execute-test --distribution-version=1.0.0"
+                                 f" --target-host={cluster.endpoint} --workload={workload}"
+                                 f" --workload-revision=440ce4b1fc8832b6b7673bdcec948cce3ee87e7e"
+                                 " --pipeline=benchmark-only"
                                  " --test-mode --kill-running-processes --workload-params=target_throughput:0.5,"
                                  "bulk_size:10,bulk_indexing_clients:1,search_clients:1 "
                                  "--client-options=verify_certs:false,use_ssl:true,"