fix(openai): logprobs when echo is enabled (#761) #1013
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build and push OpenLLM base container | |
on: | |
workflow_dispatch: | |
push: | |
branches: | |
- 'main' | |
tags: | |
- '*' | |
paths: | |
- 'openllm-python/src/openllm/**' | |
- 'openllm-python/src/openllm_cli/**' | |
- 'openllm-core/src/openllm_core/**' | |
- 'openllm-client/src/openllm_client/**' | |
pull_request: | |
branches: | |
- 'main' | |
paths: | |
- 'openllm-python/src/openllm/**' | |
- 'openllm-python/src/openllm_cli/**' | |
- 'openllm-core/src/openllm_core/**' | |
- 'openllm-client/src/openllm_client/**' | |
types: [labeled, opened, synchronize, reopened] | |
workflow_call: | |
inputs: | |
tags: | |
required: true | |
type: string | |
env: | |
LINES: 120 | |
COLUMNS: 120 | |
OPENLLM_DO_NOT_TRACK: True | |
PYTHONUNBUFFERED: '1' | |
AWS_REGION: us-west-2 | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} | |
cancel-in-progress: true | |
jobs: | |
get_commit_message: | |
name: Get commit message | |
runs-on: ubuntu-latest | |
if: "github.repository == 'bentoml/OpenLLM'" # Don't run on fork repository | |
outputs: | |
message: ${{ steps.commit_message.outputs.message }} | |
steps: | |
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/[email protected] | |
# Gets the correct commit message for pull request | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Get commit message | |
id: commit_message | |
run: | | |
set -xe | |
COMMIT_MSG=$(git log --no-merges -1 --oneline) | |
echo "message=$COMMIT_MSG" >> $GITHUB_OUTPUT | |
echo github.ref ${{ github.ref }} | |
start-runner: | |
name: Start self-hosted EC2 runner | |
runs-on: ubuntu-latest | |
needs: get_commit_message | |
if: >- | |
contains(needs.get_commit_message.outputs.message, '[ec2 build]') || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '00 - EC2 Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main'))) | |
env: | |
EC2_INSTANCE_TYPE: t3.2xlarge | |
EC2_AMI_ID: ami-089dafe9af191a0fd | |
EC2_SUBNET_ID: subnet-0ca63188fe98788c1,subnet-05997205433b249d0,subnet-07ef5d3e974275fed,subnet-0161ef0151089bb0b | |
EC2_SECURITY_GROUP: sg-051366641bf2b8049 | |
outputs: | |
label: ${{ steps.start-ec2-runner.outputs.label }} | |
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a # ratchet:aws-actions/[email protected] | |
with: | |
aws-access-key-id: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }} | |
aws-region: ${{ env.AWS_REGION }} | |
- name: Start EC2 Runner | |
id: start-ec2-runner | |
uses: aarnphm/ec2-github-runner@main # ratchet:exclude | |
with: | |
mode: start | |
github-token: ${{ secrets.OPENLLM_PAT }} | |
ec2-region: ${{ env.AWS_REGION }} | |
ec2-image-id: ${{ env.EC2_AMI_ID }} | |
ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} | |
subnet-id: ${{ env.EC2_SUBNET_ID }} | |
security-group-id: ${{ env.EC2_SECURITY_GROUP }} | |
build-and-push-image: | |
name: Build and push OpenLLM base image | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
needs: start-runner | |
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner | |
permissions: | |
contents: write | |
packages: write | |
# This is used to complete the identity challenge | |
# with sigstore/fulcio when running outside of PRs. | |
id-token: write | |
security-events: write | |
steps: | |
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # ratchet:actions/[email protected] | |
with: | |
fetch-depth: 0 | |
ref: '${{ inputs.tags }}' | |
- name: Inject slug/short variables | |
uses: rlespinasse/github-slug-action@102b1a064a9b145e56556e22b18b19c624538d94 # ratchet:rlespinasse/[email protected] | |
- name: Set up QEMU | |
uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3 # ratchet:docker/[email protected] | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # ratchet:docker/[email protected] | |
with: | |
install: true | |
driver-opts: | | |
image=moby/buildkit:master | |
network=host | |
- name: Install cosign | |
if: github.event_name != 'pull_request' | |
uses: sigstore/cosign-installer@1fc5bd396d372bee37d608f955b336615edf79c8 # ratchet:sigstore/[email protected] | |
with: | |
cosign-release: 'v2.1.1' | |
- name: Login to GitHub Container Registry | |
uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # ratchet:docker/[email protected] | |
if: github.event_name != 'pull_request' | |
with: | |
registry: ghcr.io | |
username: ${{ github.actor }} | |
password: ${{ secrets.GITHUB_TOKEN }} | |
- name: Extract metadata tags and labels for main, release or tag | |
if: github.event_name != 'pull_request' | |
id: meta | |
uses: docker/metadata-action@e6428a5c4e294a61438ed7f43155db912025b6b3 # ratchet:docker/[email protected] | |
with: | |
flavor: | | |
latest=auto | |
images: | | |
ghcr.io/bentoml/openllm | |
tags: | | |
type=semver,pattern={{version}} | |
type=semver,pattern={{major}}.{{minor}} | |
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} | |
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }} | |
labels: | | |
maintainer=aarnphm | |
org.opencontainers.image.source="https://github.com/bentoml/OpenLLM" | |
- name: Build and push Docker image | |
id: build-and-push | |
uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 # ratchet:docker/[email protected] | |
env: | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }} | |
with: | |
context: . | |
file: Dockerfile | |
push: true | |
platforms: 'linux/amd64' | |
build-args: | | |
GIT_SHA=${{ env.GITHUB_SHA }} | |
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }} | |
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }} | |
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }} | |
# TODO: Once https://github.com/aws/containers-roadmap/issues/876 is supported with OCI 1.1 | |
# then move back to saving cache within the public repo. For now we will save the cache manifest within our internal S3 buckets. | |
# NOTE: the region of the S3 on prod is us-east-1, where the EC2 machine is at us-west-2 | |
cache-from: type=s3,region=us-east-1,bucket=openllm-cache,name=y5w8i4y6 | |
# @aarnphm: max is fine here, since we didn't do any custom code yet, so it is ok to cache every layer for optimal build time | |
# We also ignore-error for now, just upload anything to the blob storage | |
cache-to: type=s3,region=us-east-1,bucket=openllm-cache,name=y5w8i4y6,mode=max,compression=zstd,ignore-error=true | |
- name: Sign the released image | |
if: ${{ github.event_name != 'pull_request' }} | |
env: | |
COSIGN_EXPERIMENTAL: 'true' | |
run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign --yes {}@${{ steps.build-and-push.outputs.digest }} | |
- name: Run Trivy in GitHub SBOM mode and submit results to Dependency Graph | |
uses: aquasecurity/trivy-action@2b6a709cf9c4025c5438138008beaddbb02086f0 # ratchet:aquasecurity/trivy-action@master | |
if: ${{ github.event_name != 'pull_request' }} | |
with: | |
image-ref: 'ghcr.io/bentoml/openllm:sha-${{ env.GITHUB_SHA_SHORT }}' | |
format: 'github' | |
output: 'dependency-results.sbom.json' | |
github-pat: ${{ secrets.GITHUB_TOKEN }} | |
scanners: 'vuln' | |
- name: Run Trivy vulnerability scanner | |
uses: aquasecurity/trivy-action@2b6a709cf9c4025c5438138008beaddbb02086f0 # ratchet:aquasecurity/trivy-action@master | |
if: ${{ github.event_name != 'pull_request' }} | |
with: | |
image-ref: 'ghcr.io/bentoml/openllm:sha-${{ env.GITHUB_SHA_SHORT }}' | |
format: 'sarif' | |
output: 'trivy-results.sarif' | |
severity: 'CRITICAL' | |
scanners: 'vuln' | |
- name: Upload Trivy scan results to GitHub Security tab | |
uses: github/codeql-action/upload-sarif@407ffafae6a767df3e0230c3df91b6443ae8df75 # ratchet:github/codeql-action/[email protected] | |
if: ${{ github.event_name != 'pull_request' }} | |
with: | |
sarif_file: 'trivy-results.sarif' | |
# TODO: Add snapshot tests here. | |
stop-runner: | |
name: Stop self-hosted EC2 runner | |
needs: | |
- start-runner | |
- build-and-push-image | |
- get_commit_message | |
runs-on: ubuntu-latest | |
if: >- | |
(contains(needs.get_commit_message.outputs.message, '[ec2 build]') || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, '00 - EC2 Build')) || (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || startsWith(github.ref, 'refs/heads/main')))) && always() | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a # ratchet:aws-actions/[email protected] | |
with: | |
aws-access-key-id: ${{ secrets.AWS_PROD_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_PROD_SECRET_ACCESS_KEY }} | |
aws-region: ${{ env.AWS_REGION }} | |
- name: Stop EC2 runner | |
uses: aarnphm/ec2-github-runner@af796d217e24ecbbc5a2c49e780cd90616e2b962 # ratchet:aarnphm/ec2-github-runner@main | |
with: | |
mode: stop | |
github-token: ${{ secrets.OPENLLM_PAT }} | |
ec2-region: ${{ env.AWS_REGION }} | |
label: ${{ needs.start-runner.outputs.label }} | |
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |