From a4e5bc746c091b18e24c3b615040fa1c21d10855 Mon Sep 17 00:00:00 2001
From: Ariya Hidayat <ariya.hidayat@gmail.com>
Date: Sun, 28 Apr 2024 06:36:28 -0700
Subject: [PATCH] CI: Use llama.cpp

---
 .github/actions/prepare-llm/action.yml | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/.github/actions/prepare-llm/action.yml b/.github/actions/prepare-llm/action.yml
index c3affc6..591fd1d 100644
--- a/.github/actions/prepare-llm/action.yml
+++ b/.github/actions/prepare-llm/action.yml
@@ -7,23 +7,16 @@ runs:
       shell: bash
       run: curl -OL https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf
 
-    - name: Download and unpack Nitro
+    - name: Download and unpack llama.cpp
       shell: bash
       run: |
-        curl -OL https://github.com/janhq/nitro/releases/download/v0.3.22/nitro-0.3.22-linux-amd64-avx.tar.gz
-        tar zxvf nitro-0.3.22-linux-amd64-avx.tar.gz
+        curl -OL https://github.com/ggerganov/llama.cpp/releases/download/b2751/llama-b2751-bin-ubuntu-x64.zip
+        unzip llama-b2751-bin-ubuntu-x64.zip
 
-    - name: Launch Nitro
+    - name: Launch llama.cpp
       shell: bash
-      run: ./nitro/nitro 2 127.0.0.1 8080 &
+      run: ./build/bin/server -m ./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf &
 
-    - name: Wait until Nitro is ready
+    - name: Wait until it is ready
       shell: bash
-      run: while ! curl -s 'http://localhost:8080/healthz' | grep 'alive'; do sleep 1; done
-
-    - name: Load TinyLlama into Nitro
-      shell: bash
-      run: |
-        curl http://localhost:8080/inferences/llamacpp/loadmodel \
-          -H 'Content-Type: application/json' \
-          -d '{"llama_model_path": "./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"}'
+      run: while ! curl -s 'http://localhost:8080/health' | grep 'ok'; do sleep 1; done