-
Notifications
You must be signed in to change notification settings - Fork 49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add image building and tagged testing to CI #350
base: main
Are you sure you want to change the base?
Changes from all commits
96bbc24
42683d3
36facda
79cb6d6
f9d266f
6e5122d
49536db
bb69c94
db82883
fe3ad44
1783048
6122bde
2ed3ffa
82fb8e6
1666e46
fdd90b9
bbbb1c1
f1247c7
67f958f
cab5ebb
bc6d094
4b34844
2cb85e1
6091289
fed8b44
3e0c027
a9eba5c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,269 @@ | ||
#!/bin/bash | ||
|
||
archname() { | ||
local unamearchname="$1" | ||
local arch="amd64" | ||
[[ "$unamearchname" = "arm64" || "$unamearchname" = "aarch64" ]] && arch="arm64" | ||
echo "${arch}" | ||
} | ||
|
||
error() { | ||
echo "ERROR: $@" >&2 | ||
} | ||
|
||
die() { | ||
error "$@" | ||
exit 1 | ||
} | ||
|
||
NOW="$(date +"%Y-%m-%d_%H_%M_%S")" | ||
ARCH="$(archname "$(uname -m)")" | ||
|
||
RUNDIR="apps/integration/runs/${NOW}" | ||
GIT_LOGFILE="${RUNDIR}/git.log" | ||
DOCKER_LOGFILE="${RUNDIR}/docker.log" | ||
POETRY_LOGFILE="${RUNDIR}/poetry.log" | ||
PYTEST_LOGFILE="${RUNDIR}/pytest.log" | ||
QUERY_LOGFILE="${RUNDIR}/test_queries.log" | ||
|
||
# Parse args | ||
SKIP_BUILD=0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. generally prefer positive conditions, so There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it needs to be negative conditions. the logic I need is (if I want to be able to chain each step with &&'s) is:
with, e.g. { [[ $DO_BUILD ]] && build; } \
&& { [[ $DO_TESTS ]] && tests; } \
&& etc... but if { [[ $SKIP_BUILD ]] || build; } \
&& { [[ $SKIP_TESTS ]] || tests; } \
&& etc... which has the intended selectivity behavior. To get the right behavior with positive variable I think I need to negate them in the conditions, so SKIP seems cleaner to me |
||
SKIP_TESTS=0 | ||
SKIP_PUSH=0 | ||
DO_CLEAN=0 | ||
TAG="integration_tests" | ||
declare SSH_TARGET | ||
while [[ $# -gt 0 ]]; do | ||
case "$1" in | ||
--help|-h) | ||
echo "Utility script for building containers, running integration tests, and pushing images" | ||
echo "Make sure to run this from the sycamore root directory." | ||
echo "-------------------------------------------------------" | ||
echo "Arguments:" | ||
echo " --help Display this message" | ||
echo " --build Build images" | ||
echo " --tests Run integration tests" | ||
echo " --push Push images" | ||
echo " --clean Remove logs from previous runs before doing anything." | ||
echo " --tag [TAG] When building, running, and/or pushing, use this docker tag." | ||
echo " Default is 'integration_tests'" | ||
echo " --ssh [TARGET] When building and running tests, also build and run on this host." | ||
echo " Useful for multi-arch builds and tests, e.g. --ssh my-arm-box" | ||
exit 0 | ||
;; | ||
--build) | ||
SKIP_BUILD=1 | ||
echo "Will build images" | ||
shift | ||
;; | ||
--tests) | ||
SKIP_TESTS=1 | ||
echo "Will run integration tests" | ||
shift | ||
;; | ||
--push) | ||
SKIP_PUSH=1 | ||
echo "Will push images" | ||
shift | ||
;; | ||
--tag) | ||
[[ -z $2 ]] && die "A tag must be specified when using the --tag arg; e.g. --tag my-tag" | ||
[[ $2 =~ [a-z]* ]] || die "Detected tag was $2. Tags should begin with lowercase letters" | ||
TAG="$2" | ||
echo "Using tag ${TAG}" | ||
shift | ||
shift | ||
;; | ||
--clean) | ||
DO_CLEAN=0 | ||
echo "Will clean ${RUNDIR} before running anything" | ||
shift | ||
;; | ||
--ssh) | ||
[[ -z $2 ]] && die "A configured ssh target must be specified when using the --ssh arg; e.g. --ssh my-host" | ||
[[ $2 =~ [a-z]* ]] || die "Detected ssh target was $2. ssh tartgets should begin with lowercase letters" | ||
SSH_TARGET="$2" | ||
SSH_BUILDX_PORT=18460 # Selected by googling 5d10 and rerolling until small enough | ||
echo "Using ssh target ${SSH_TARGET}" | ||
shift | ||
shift | ||
;; | ||
esac | ||
done | ||
|
||
main() { | ||
[[ -d ".git" ]] || die "Please run this script from sycamore root!" | ||
[[ $DO_CLEAN ]] && rm -rf "${RUNDIR}" | ||
if [[ -n $SSH_TARGET && $SKIP_BUILD -ne 0 ]]; then | ||
create-dual-builder | ||
trap cleanup-dual-builder 0 1 2 3 6 | ||
fi | ||
mkdir -p "${RUNDIR}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggest having a --clean option that will delete RUNDIR before recreating it. That way you know everything is clean. I usually end up defaulting options like that to off for performance. |
||
echo "Building/testing tag ${TAG}" >&2 | ||
echo "Get the newest git commits" >&2 | ||
if checkout_main_if_new; then | ||
echo "Changes detected. Running Tests" >&2 | ||
poetry install --no-root > "${POETRY_LOGFILE}" 2>&1 \ | ||
&& { [[ $SKIP_BUILD -eq 0 ]] || build_images > "${DOCKER_LOGFILE}" 2>&1; } \ | ||
&& { [[ $SKIP_TESTS -eq 0 ]] || runtests > "${PYTEST_LOGFILE}" 2>&1; } \ | ||
&& touch "${RUNDIR}/passed" | ||
[[ $SKIP_PUSH -eq 0 ]] || push_images >> "${DOCKER_LOGFILE}" 2>&1 | ||
handle_outputs | ||
else | ||
echo "No changes detected. Skipping integration tests" >&2 | ||
fi | ||
} | ||
|
||
|
||
checkout_main_if_new() { | ||
old_sha="$(git rev-parse HEAD)" | ||
git fetch origin main > "${GIT_LOGFILE}" 2>&1 | ||
new_sha="$(git rev-parse FETCH_HEAD)" | ||
if [[ "${old_sha}" != "${new_sha}" ]]; then | ||
[[ -z $(git status --porcelain) ]] || die "Working tree not clean" | ||
git pull --rebase origin main >> "${GIT_LOGFILE}" 2>&1 | ||
echo "==================" >> "${GIT_LOGFILE}" | ||
echo "Using git rev ${new_sha}" >> "${GIT_LOGFILE}" | ||
return 0 | ||
else | ||
return 1 | ||
fi | ||
} | ||
|
||
build_images() { | ||
echo "Building all images" >&2 | ||
docker-build-hub apps/crawler/crawler/http/Dockerfile \ | ||
&& docker-build-hub apps/crawler/crawler/s3/Dockerfile \ | ||
&& docker-build-hub apps/importer/Dockerfile.buildx \ | ||
&& docker-build-hub apps/opensearch/Dockerfile \ | ||
&& docker-build-hub apps/jupyter/Dockerfile.buildx --build-arg=TAG="${TAG}" \ | ||
&& docker-build-hub apps/demo-ui/Dockerfile.buildx \ | ||
&& docker-build-hub apps/remote-processor-service/Dockerfile.buildx \ | ||
&& return 0 | ||
return 1 | ||
} | ||
|
||
handle_outputs() { | ||
echo "Handling test outputs" >&2 | ||
[[ -f test-output.log ]] && mv test-output.log "${QUERY_LOGFILE}" | ||
[[ -f "${RUNDIR}/passed" ]] || touch "${RUNDIR}/failed" | ||
aws s3 cp --recursive "${RUNDIR}/" "s3://sycamore-ci/${NOW}/${ARCH}" | ||
} | ||
|
||
push_images() { | ||
echo "Pushing tested images to dockerhub" >&2 | ||
docker-push-hub apps/crawler/crawler/http/Dockerfile \ | ||
&& docker-push-hub apps/crawler/crawler/s3/Dockerfile \ | ||
&& docker-push-hub apps/importer/Dockerfile.buildx \ | ||
&& docker-push-hub apps/opensearch/Dockerfile \ | ||
&& docker-push-hub apps/jupyter/Dockerfile.buildx \ | ||
&& docker-push-hub apps/demo-ui/Dockerfile.buildx \ | ||
&& docker-push-hub apps/remote-processor-service/Dockerfile.buildx \ | ||
&& return 0 | ||
return 1 | ||
} | ||
|
||
runtests() { | ||
if [[ -n $SSH_TARGET ]]; then | ||
ssh "${SSH_TARGET}" "cd sycamore && ./apps/integration/integration/automation/integrate --test --clean --tag ${TAG}" & | ||
fi | ||
docker volume rm sycamore_crawl_data sycamore_jupyter_data sycamore_opensearch_data | ||
docker network prune -f | ||
docker compose up reset | ||
poetry run pytest apps/integration/ -p integration.conftest --noconftest --docker-tag "${TAG}" | ||
# this is a complicated command, so: | ||
# -p integration.conftest - load conftest with plugins, to capture the custom command line arg (--docker-tag) | ||
# --noconftest - don't load conftest at pytest runtime; it's already loaded | ||
# --docker-tag - specify tag of containers to test | ||
return $? | ||
} | ||
|
||
docker-build-hub() { | ||
local docker_file="$1" | ||
[[ -n "${docker_file}" ]] || { error "missing ${docker_file}"; return 1;} | ||
local repo_name="$(_docker-repo-name "${docker_file}")" | ||
[[ -n "${repo_name}" ]] || { error "empty repo name"; return 1;} | ||
shift | ||
|
||
echo | ||
echo "Building in sycamore and pushing to docker hub with repo name '${repo_name}'" | ||
docker buildx build "$(_docker-build-args)" -t "${repo_name}:${TAG}" -f "${docker_file}" \ | ||
--cache-to type=registry,ref="${repo_name}:build-cache",mode=max \ | ||
--cache-from type=registry,ref="${repo_name}:build-cache" \ | ||
--platform="$(_docker-platforms)" "$@" --push . \ | ||
|| { error "buildx failed" && return 1;} | ||
echo "Successfully built using docker file $docker_file" | ||
} | ||
|
||
docker-push-hub() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you run this on both arm & amd, what shows up in dockerhub? I was building both architectures in a single go because I thought that was necessary for it to show up cleanly (may not matter for integration testing) |
||
local docker_file="$1" | ||
[[ -n "${docker_file}" ]] || { error "missing ${docker_file}"; return 1;} | ||
local repo_name="$(_docker-repo-name "${docker_file}")" | ||
[[ -n "${repo_name}" ]] || { error "empty repo name"; return 1;} | ||
|
||
echo | ||
echo "Pushing image to docker hub for repo '${repo_name}" | ||
docker push "${repo_name}:${TAG}" || { error "docker push failed"; return 1;} | ||
echo "Successfully pushed image previously built from dockerfile ${docker_file}" | ||
} | ||
|
||
_docker-repo-name() { | ||
local docker_file="$1" | ||
echo "Finding repo name in: ${docker_file}" >&2 | ||
local repo_name="$(grep '^# Repo name: ' "${docker_file}" | awk '{print $4}')" | ||
[[ "${repo_name}" = *private* ]] && die "Private repo ${repo_name} disallowed" | ||
if (( $(wc -w <<< ${repo_name}) != 1 )); then | ||
echo "Unable to find repo name in ${docker_file}" 1>&2 | ||
exit 1 | ||
fi | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we expect this? but ok |
||
echo "${repo_name}" | ||
} | ||
|
||
_docker-build-args() { | ||
local branch="$(git branch --show-current)" | ||
local rev="$(git rev-parse --short HEAD)" | ||
local date="$(git show -s --format=%ci HEAD | sed -e 's/ /_/g')" | ||
local diff=unknown | ||
if [[ -z $(git status --porcelain) ]]; then | ||
diff=clean | ||
else | ||
diff="pending_changes_$(git diff HEAD | shasum | awk '{print $1}')" | ||
fi | ||
echo "--build-arg=GIT_BRANCH=${branch} --build-arg=GIT_COMMIT=${rev}--${date} --build-arg=GIT_DIFF=${diff}" | ||
} | ||
|
||
_docker-platforms() { | ||
local remotearch="${ARCH}" | ||
if [[ -n $SSH_TARGET ]]; then | ||
remotearch="$(archname "$(ssh "${SSH_TARGET}" uname -m)")" | ||
fi | ||
if [[ $ARCH != $remotearch ]]; then | ||
echo "linux/${ARCH},linux/${remotearch}" | ||
else | ||
echo "linux/${ARCH}" | ||
fi | ||
} | ||
|
||
create-dual-builder() { | ||
# Over ssh, start a buildkit container on the target, and use port forwarding | ||
# to talk to it. Also start a local buildkit container, and then create a buildx | ||
# remote driver that talks to both of them. | ||
ssh -N -L "${SSH_BUILDX_PORT}":localhost:"${SSH_BUILDX_PORT}" "${SSH_TARGET}" & | ||
REMOTE_ARCH="$(archname "$(ssh "${SSH_TARGET}" uname -m)")" | ||
ssh "${SSH_TARGET}" docker run -d --name=remote-buildkitd --privileged -p "${SSH_BUILDX_PORT}":"${SSH_BUILDX_PORT}" \ | ||
moby/buildkit:latest --addr "tcp://0.0.0.0:${SSH_BUILDX_PORT}" | ||
docker run -d --name=remote-buildkitd --privileged -p "$((SSH_BUILDX_PORT - 1))":"$((SSH_BUILDX_PORT - 1))" \ | ||
moby/buildkit:latest --addr "tcp://0.0.0.0:$((SSH_BUILDX_PORT - 1))" | ||
docker buildx create --name dual-builder --platform "linux/${ARCH}" --driver=remote "tcp://localhost:$((SSH_BUILDX_PORT - 1))" | ||
docker buildx create --append --name dual-builder --platform "linux/${REMOTE_ARCH}" --driver=remote "tcp://localhost:${SSH_BUILDX_PORT}" | ||
docker buildx use dual-builder | ||
} | ||
|
||
cleanup-dual-builder() { | ||
docker buildx rm dual-builder | ||
ssh "${SSH_TARGET}" "docker stop remote-buildkitd && docker rm remote-buildkitd" | ||
docker stop remote-buildkitd && docker rm remote-buildkitd | ||
pgrep -f "${SSH_BUILDX_PORT}:localhost:${SSH_BUILDX_PORT}" | xargs kill | ||
} | ||
|
||
main |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why name this integration/integration from the start?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
wdym?
The apps/integration/integration comes from the python project name being equal to the directory name - that pattern is all over this repo.