.github/workflows/action_gpu_basic_tests.yml

name: unit-tests-gpu-template

on:
  workflow_call:
    inputs:
      os:
        required: true
        type: string
      python-version:
        required: true
        type: string

jobs:
  basic-tests-on-gpu:
    strategy:
      fail-fast: false # Don't cancel all on first failure
      matrix:
        model:
          - "transformers_gpt2_gpu"
          - "transformers_phi2_gpu"
          # - "transformers_phi3_small_8k_instruct_gpu" Does not run on tesla t4
          # - "llamacpp_llama2_7b_gpu" Keeps causing intermittent segfaults
    runs-on: ${{ inputs.os }}

    steps:
      - uses: actions/checkout@v4
      - name: Install Rust
        shell: bash
        run: |
           curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain 1.75.0
           echo "$HOME/.cargo/bin" >> $GITHUB_PATH
      - name: Set up Python ${{ inputs.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ inputs.python-version }}
      - name: Show GPUs
        run: |
          nvidia-smi
      - name: Update Ubuntu
        run: |
          sudo apt-get update
          sudo apt-get -y upgrade
      - name: Ensure NVIDIA SDK available
        run: |
          sudo apt-get -y install cuda-toolkit
          echo "/usr/local/cuda-12.6/bin" >> $GITHUB_PATH
      - name: Install dependencies
        shell: bash
        run: |
          python -m pip install --upgrade pip
          pip install pytest
          pip install -e .[schemas,test,bench]
          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
      - name: Other dependencies
        run: |
          pip install sentencepiece
          echo "=============================="
          pip uninstall -y transformers
          pip install "transformers!=4.43.0,!=4.43.1,!=4.43.2,!=4.43.3" # Issue 965
      - name: GPU pip installs
        run: |
          pip install accelerate
          echo "=============================="
          pip uninstall -y llama-cpp-python
          CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84,!=0.3.6"
      - name: Check GPU available
        run: |
          python -c "import torch; assert torch.cuda.is_available()"
      - name: Run tests (except server) for ${{ matrix.model }}
        run: |
          pytest -vv --cov=guidance --cov-report=xml --cov-report=term-missing \
            --selected_model ${{ matrix.model }} \
            ./tests/unit ./tests/model_integration ./tests/model_specific
      - name: Upload coverage reports to Codecov
        uses: codecov/codecov-action@v4
        with:
          token: ${{ secrets.CODECOV_TOKEN }}