From 315102f89109f1b67c8f89f12d98ab646685e333 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Mon, 5 Feb 2024 16:02:01 -0500 Subject: [PATCH] kompute : disable GPU offload for Mixtral We haven't implemented the necessary GPU kernels yet. Fixes this crash: ggml_vk_graph_compute: error: unsupported op 'ARGSORT' GGML_ASSERT: /home/jared/src/forks/gpt4all/gpt4all-backend/llama.cpp-mainline/ggml-kompute.cpp:1508: !"unsupported op" Signed-off-by: Jared Van Bortel --- llama.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llama.cpp b/llama.cpp index 21fa02b8bc91e..5260f07d4bf22 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4138,6 +4138,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam #ifdef GGML_USE_KOMPUTE if (params.n_gpu_layers > 0 && ( !(model.arch == LLM_ARCH_LLAMA || model.arch == LLM_ARCH_FALCON) + || model.hparams.n_expert > 0 || !( model.ftype == LLAMA_FTYPE_ALL_F32 || model.ftype == LLAMA_FTYPE_MOSTLY_F16 ||