From cba5b9c0ee98806f91a97ecdf7add1c24f8df0ed Mon Sep 17 00:00:00 2001 From: Alex Klibisz Date: Sat, 27 May 2023 22:41:19 -0400 Subject: [PATCH] Various CI improvements (#519) --- .github/scripts/delete-snapshot-releases.sh | 26 ++--- .github/workflows/{ci.yml => ci.yaml} | 105 +++++++++--------- .github/workflows/cleanup-snapshots.yaml | 18 +++ .github/workflows/release.yml | 7 -- .../com/klibisz/elastiknn/RecallSuite.scala | 29 ++--- 5 files changed, 89 insertions(+), 96 deletions(-) rename .github/workflows/{ci.yml => ci.yaml} (71%) create mode 100644 .github/workflows/cleanup-snapshots.yaml diff --git a/.github/scripts/delete-snapshot-releases.sh b/.github/scripts/delete-snapshot-releases.sh index 1106bf5f0..47eacc87c 100755 --- a/.github/scripts/delete-snapshot-releases.sh +++ b/.github/scripts/delete-snapshot-releases.sh @@ -1,24 +1,18 @@ #!/bin/bash set -e -# Delete snapshots from closed PRs. -PRNUMS=$(hub pr list -s closed --format='%I ' --limit=10 --sort=updated) +limit=100 +closed_prs=$(gh pr list --state closed --json number --jq '.[].number' --limit $limit) +pr_release_tags=$(gh release list --limit $limit | grep -E "PR[0-9]+-SNAPSHOT" | awk '{print $3}') -for N in $PRNUMS; +# Loop over the closed PRs and delete any releases that correspond to a closed PR. +for pr in $closed_prs do - TAGS=$(hub release | grep "PR$N-" || true) - for T in $TAGS; + echo "Checking PR $pr" + for tag in $(echo "$pr_release_tags" | grep "PR$pr-SNAPSHOT") do - echo "Deleting $T" - gh release delete "$T" --yes - git push --delete origin "$T" + echo "Deleting release $tag" + gh release delete "$tag" --yes + git push --delete origin "$tag" done done - -# Delete snapshots from master. -TAGS=$(hub release | grep "MAIN[0-9]*-SNAPSHOT" || true) -for T in $TAGS; -do - gh release delete "$T" --yes - git push --delete origin "$T" -done diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yaml similarity index 71% rename from .github/workflows/ci.yml rename to .github/workflows/ci.yaml index 818abcf38..582b9b594 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yaml @@ -4,8 +4,6 @@ on: pull_request: branches: - main - schedule: - - cron: "0 0 * * 1" concurrency: group: ${{ github.ref }} @@ -26,81 +24,86 @@ jobs: name: "Test JVM Code" runs-on: - ubuntu-22.04 - timeout-minutes: 20 steps: - uses: actions/checkout@v3 - - uses: actions/cache@v3 - with: - key: ${{ github.workflow }}.${{ github.job }}.r${{ github.run_number }} - restore-keys: | - ${{ github.workflow }}.${{ github.job }} - path: | - **/target/**/* + timeout-minutes: 1 - uses: actions/setup-java@v3 + timeout-minutes: 1 with: distribution: 'adopt' java-version: 19 cache: 'sbt' - uses: arduino/setup-task@v1 + timeout-minutes: 1 with: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Increase MMAP Limits + timeout-minutes: 1 run: sudo sysctl -w vm.max_map_count=262144 - name: Compile + timeout-minutes: 5 run: task jvmCompile - name: Assemble + timeout-minutes: 1 run: task jvmAssemble - name: Run Unit Tests - run: task jvmUnitTestQuick + timeout-minutes: 1 + run: task jvmUnitTest - name: Run Cluster + timeout-minutes: 5 run: task dockerRunTestingCluster - name: Run Integration Tests - run: task jvmIntegrationTestQuick + timeout-minutes: 20 + run: task jvmIntegrationTest - name: Cluster Logs + timeout-minutes: 1 if: always() run: task dockerLogTestingCluster - name: Stop Cluster + timeout-minutes: 1 if: always() run: task dockerStopTestingCluster test-python: - name: Test Python Code + name: Test Python Client Code runs-on: - ubuntu-22.04 - timeout-minutes: 10 steps: - uses: actions/checkout@v3 - - uses: actions/cache@v3 - with: - key: ${{ github.workflow }}.${{ github.job }}.r${{ github.run_number }} - restore-keys: | - ${{ github.workflow }}.${{ github.job }} - path: | - **/target/**/* + timeout-minutes: 1 - uses: actions/setup-java@v3 + timeout-minutes: 1 with: distribution: 'adopt' java-version: 19 cache: 'sbt' - uses: actions/setup-python@v4 + timeout-minutes: 1 with: python-version: '3.7.15' cache: 'pip' - uses: arduino/setup-task@v1 + timeout-minutes: 1 with: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Increase MMAP Limits + timeout-minutes: 1 run: sudo sysctl -w vm.max_map_count=262144 - name: Docs + timeout-minutes: 1 run: task pyDocs - name: Run Cluster + timeout-minutes: 5 run: task dockerRunTestingCluster - name: Test + timeout-minutes: 1 run: task pyTest - name: Cluster Logs + timeout-minutes: 1 if: always() run: task dockerLogTestingCluster - name: Stop Cluster + timeout-minutes: 1 if: always() run: task dockerStopTestingCluster @@ -112,50 +115,56 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v3 - - uses: actions/cache@v3 - with: - key: ${{ github.workflow }}.${{ github.job }}.r${{ github.run_number }} - restore-keys: | - ${{ github.workflow }}.${{ github.job }} - path: | - **/target/**/* + timeout-minutes: 1 - uses: actions/setup-java@v3 + timeout-minutes: 1 with: distribution: 'adopt' java-version: 19 cache: 'sbt' - uses: arduino/setup-task@v1 + timeout-minutes: 1 with: repo-token: ${{ secrets.GITHUB_TOKEN }} - uses: actions/setup-python@v4 + timeout-minutes: 1 with: python-version: '3.6' cache: 'pip' - name: Increase MMAP Limits + timeout-minutes: 1 run: sudo sysctl -w vm.max_map_count=262144 - name: Initialize Submodule + timeout-minutes: 1 run: task annbCreateSubmodule - name: Install Dependencies + timeout-minutes: 1 run: task annbInstallRequirements - name: Run Cluster + timeout-minutes: 5 run: task dockerRunTestingCluster - name: Test + timeout-minutes: 5 run: task annbTest - name: Cluster Logs + timeout-minutes: 1 if: always() run: task dockerLogTestingCluster - name: Stop Cluster + timeout-minutes: 1 if: always() run: task dockerStopTestingCluster - build-jekyll-site: + test-jekyll-site: name: Test Jekyll Site runs-on: - ubuntu-22.04 timeout-minutes: 10 steps: - uses: actions/checkout@v3 + timeout-minutes: 1 - uses: arduino/setup-task@v1 + timeout-minutes: 1 with: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Compile Jekyll Site @@ -169,49 +178,43 @@ jobs: name: Publish Snapshots runs-on: - ubuntu-22.04 - timeout-minutes: 10 - needs: [show-github-context, test-jvm, test-python, test-benchmarks] + needs: + - test-jvm + - test-python + - test-benchmarks + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v3 + timeout-minutes: 1 with: fetch-depth: 0 # Needed for git-based changelog. - - uses: actions/cache@v3 - with: - key: ${{ github.workflow }}.${{ github.job }}.r${{ github.run_number }} - restore-keys: | - ${{ github.workflow }}.${{ github.job }} - path: | - **/target/**/* - name: Setup Release Credentials + timeout-minutes: 1 env: PYPIRC_B64: ${{ secrets.PYPIRC_B64 }} run: ./.github/scripts/setup-env.sh - uses: actions/setup-java@v3 + timeout-minutes: 1 with: distribution: 'adopt' java-version: 19 cache: 'sbt' - uses: actions/setup-python@v4 + timeout-minutes: 1 with: python-version: '3.7.15' cache: 'pip' - uses: arduino/setup-task@v1 + timeout-minutes: 1 with: repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: Setup Setuptools + - name: Install Setuptools + timeout-minutes: 1 run: python3 -m pip install setuptools - name: Publish to PyPi + timeout-minutes: 1 run: task pyPublishSnapshot VERSION=$(cat version)-dev${{ github.run_number }} - if: github.event_name == 'pull_request' - - name: Publish Plugin from PR - if: github.event_name == 'pull_request' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Publish Plugin to Github + timeout-minutes: 5 run: task jvmPublishSnapshot VERSION=$(cat version)-PR${{ github.event.pull_request.number }}-SNAPSHOT - - name: Publish Plugin from Main - if: github.event_name == 'push' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - ./.github/scripts/delete-snapshot-releases.sh - task jvmPublishSnapshot VERSION=$(cat version)-MAIN${{ github.run_number }}-SNAPSHOT diff --git a/.github/workflows/cleanup-snapshots.yaml b/.github/workflows/cleanup-snapshots.yaml new file mode 100644 index 000000000..20550c745 --- /dev/null +++ b/.github/workflows/cleanup-snapshots.yaml @@ -0,0 +1,18 @@ +name: "Cleanup Snapshots" + +on: + push: + branches: + - main + workflow_dispatch: + +jobs: + cleanup-snapshots: + name: Cleanup Snapshots + runs-on: + - ubuntu-22.04 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + - run: ./.github/scripts/delete-snapshot-releases.sh diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d61a69e4b..c676da3a7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -54,13 +54,6 @@ jobs: - uses: actions/checkout@v3 with: fetch-depth: 0 # Needed for git-based changelog. - - uses: actions/cache@v3 - with: - key: ${{ github.workflow }}.${{ github.job }}.r${{ github.run_number }} - restore-keys: | - ${{ github.workflow }}.${{ github.job }} - path: | - **/target/**/* - uses: actions/setup-python@v4 with: python-version: '3.7.15' diff --git a/elastiknn-plugin/src/it/scala/com/klibisz/elastiknn/RecallSuite.scala b/elastiknn-plugin/src/it/scala/com/klibisz/elastiknn/RecallSuite.scala index 38a880fb6..789cabac0 100644 --- a/elastiknn-plugin/src/it/scala/com/klibisz/elastiknn/RecallSuite.scala +++ b/elastiknn-plugin/src/it/scala/com/klibisz/elastiknn/RecallSuite.scala @@ -63,15 +63,13 @@ class RecallSuite extends AsyncFunSuite with Matchers with ElasticAsyncClient wi Seq( NearestNeighborsQuery.Exact(vecField, Similarity.Jaccard) -> 1d, NearestNeighborsQuery.Exact(vecField, Similarity.Hamming) -> 1d, - NearestNeighborsQuery.JaccardLsh(vecField, 400) -> 0.69, - NearestNeighborsQuery.JaccardLsh(vecField, 800) -> 0.87 + NearestNeighborsQuery.JaccardLsh(vecField, 400) -> 0.69 ) ), Test( Mapping.JaccardLsh(dims, 300, 2), Seq( - NearestNeighborsQuery.JaccardLsh(vecField, 400) -> 0.62, - NearestNeighborsQuery.JaccardLsh(vecField, 800) -> 0.81 + NearestNeighborsQuery.JaccardLsh(vecField, 400) -> 0.62 ) ), // Hamming LSH @@ -80,8 +78,7 @@ class RecallSuite extends AsyncFunSuite with Matchers with ElasticAsyncClient wi Seq( NearestNeighborsQuery.Exact(vecField, Similarity.Jaccard) -> 1d, NearestNeighborsQuery.Exact(vecField, Similarity.Hamming) -> 1d, - NearestNeighborsQuery.HammingLsh(vecField, 200) -> 0.72, - NearestNeighborsQuery.HammingLsh(vecField, 400) -> 0.92 + NearestNeighborsQuery.HammingLsh(vecField, 200) -> 0.72 ) ), Test( @@ -101,16 +98,13 @@ class RecallSuite extends AsyncFunSuite with Matchers with ElasticAsyncClient wi NearestNeighborsQuery.Exact(vecField, Similarity.L1) -> 1d, NearestNeighborsQuery.Exact(vecField, Similarity.L2) -> 1d, NearestNeighborsQuery.Exact(vecField, Similarity.Cosine) -> 1d, - NearestNeighborsQuery.CosineLsh(vecField, 400) -> 0.46, - NearestNeighborsQuery.CosineLsh(vecField, 800) -> 0.67 + NearestNeighborsQuery.CosineLsh(vecField, 400) -> 0.46 ) ), Test( Mapping.CosineLsh(dims, 400, 2), Seq( - NearestNeighborsQuery.CosineLsh(vecField, 200) -> 0.34, - NearestNeighborsQuery.CosineLsh(vecField, 400) -> 0.50, - NearestNeighborsQuery.CosineLsh(vecField, 800) -> 0.72 + NearestNeighborsQuery.CosineLsh(vecField, 200) -> 0.34 ) ), // L2 Lsh @@ -121,12 +115,8 @@ class RecallSuite extends AsyncFunSuite with Matchers with ElasticAsyncClient wi NearestNeighborsQuery.Exact(vecField, Similarity.L2) -> 1d, NearestNeighborsQuery.Exact(vecField, Similarity.Cosine) -> 1d, NearestNeighborsQuery.L2Lsh(vecField, 200) -> 0.12, - NearestNeighborsQuery.L2Lsh(vecField, 400) -> 0.22, - NearestNeighborsQuery.L2Lsh(vecField, 800) -> 0.40, // Adding probes should improve recall, but since k = 1, probing > 2 times should have no effect. - NearestNeighborsQuery.L2Lsh(vecField, 800, 1) -> 0.43, - NearestNeighborsQuery.L2Lsh(vecField, 800, 2) -> 0.49, - NearestNeighborsQuery.L2Lsh(vecField, 800, 10) -> 0.49 + NearestNeighborsQuery.L2Lsh(vecField, 800, 2) -> 0.49 ) ), // Permutation Lsh @@ -137,9 +127,7 @@ class RecallSuite extends AsyncFunSuite with Matchers with ElasticAsyncClient wi NearestNeighborsQuery.Exact(vecField, Similarity.L2) -> 1d, NearestNeighborsQuery.Exact(vecField, Similarity.Cosine) -> 1d, NearestNeighborsQuery.PermutationLsh(vecField, Similarity.Cosine, 200) -> 0.14, - NearestNeighborsQuery.PermutationLsh(vecField, Similarity.Cosine, 400) -> 0.21, - NearestNeighborsQuery.PermutationLsh(vecField, Similarity.L2, 200) -> 0.12, - NearestNeighborsQuery.PermutationLsh(vecField, Similarity.L2, 400) -> 0.20 + NearestNeighborsQuery.PermutationLsh(vecField, Similarity.L2, 200) -> 0.12 ), // TODO: This one seems to be more sensitive for some unknown reason. recallTolerance = 5e-2 @@ -151,9 +139,7 @@ class RecallSuite extends AsyncFunSuite with Matchers with ElasticAsyncClient wi NearestNeighborsQuery.Exact(vecField, Similarity.L2) -> 1d, NearestNeighborsQuery.Exact(vecField, Similarity.Cosine) -> 1d, NearestNeighborsQuery.PermutationLsh(vecField, Similarity.Cosine, 200) -> 0.31, - NearestNeighborsQuery.PermutationLsh(vecField, Similarity.Cosine, 400) -> 0.51, NearestNeighborsQuery.PermutationLsh(vecField, Similarity.L2, 200) -> 0.3, - NearestNeighborsQuery.PermutationLsh(vecField, Similarity.L2, 400) -> 0.43 ), // TODO: This one seems to be more sensitive for some unknown reason. recallTolerance = 5e-2 @@ -273,5 +259,4 @@ class RecallSuite extends AsyncFunSuite with Matchers with ElasticAsyncClient wi } } } - }