This library was created following the Ollama API documentation.
[dependencies]
ollama-rs = "0.2.4"
use ollama_rs::Ollama;
// By default, it will connect to localhost:11434
let ollama = Ollama::default();
// For custom values:
let ollama = Ollama::new("http://localhost".to_string(), 11434);
Feel free to check the Chatbot example that shows how to use the library to create a simple chatbot in less than 50 lines of code. You can also check some other examples.
These examples use poor error handling for simplicity, but you should handle errors properly in your code.
use ollama_rs::generation::completion::GenerationRequest;
let model = "llama2:latest".to_string();
let prompt = "Why is the sky blue?".to_string();
let res = ollama.generate(GenerationRequest::new(model, prompt)).await;
if let Ok(res) = res {
println!("{}", res.response);
}
OUTPUTS: The sky appears blue because of a phenomenon called Rayleigh scattering...
Requires the stream
feature.
use ollama_rs::generation::completion::GenerationRequest;
use tokio::io::{self, AsyncWriteExt};
use tokio_stream::StreamExt;
let model = "llama2:latest".to_string();
let prompt = "Why is the sky blue?".to_string();
let mut stream = ollama.generate_stream(GenerationRequest::new(model, prompt)).await.unwrap();
let mut stdout = io::stdout();
while let Some(res) = stream.next().await {
let responses = res.unwrap();
for resp in responses {
stdout.write_all(resp.response.as_bytes()).await.unwrap();
stdout.flush().await.unwrap();
}
}
Same output as above but streamed.
use ollama_rs::generation::completion::GenerationRequest;
use ollama_rs::generation::options::GenerationOptions;
let model = "llama2:latest".to_string();
let prompt = "Why is the sky blue?".to_string();
let options = GenerationOptions::default()
.temperature(0.2)
.repeat_penalty(1.5)
.top_k(25)
.top_p(0.25);
let res = ollama.generate(GenerationRequest::new(model, prompt).options(options)).await;
if let Ok(res) = res {
println!("{}", res.response);
}
OUTPUTS: 1. Sun emits white sunlight: The sun consists primarily ...
Every message sent and received will be stored in the library's history.
Example with history:
use ollama_rs::generation::chat::{ChatMessage, ChatMessageRequest};
use ollama_rs::history::ChatHistory;
let model = "llama2:latest".to_string();
let prompt = "Why is the sky blue?".to_string();
// `Vec<ChatMessage>` implements `ChatHistory`,
// but you could also implement it yourself on a custom type
let mut history = vec![];
let res = ollama
.send_chat_messages_with_history(
&mut history, // <- messages will be saved here
ChatMessageRequest::new(
model,
vec![ChatMessage::user(prompt)], // <- You should provide only one message
),
)
.await;
if let Ok(res) = res {
println!("{}", res.message.content);
}
Check chat with history examples for default and stream
let res = ollama.list_local_models().await.unwrap();
Returns a vector of LocalModel
structs.
let res = ollama.show_model_info("llama2:latest".to_string()).await.unwrap();
Returns a ModelInfo
struct.
use ollama_rs::models::create::CreateModelRequest;
let res = ollama.create_model(CreateModelRequest::path("model".into(), "/tmp/Modelfile.example".into())).await.unwrap();
Returns a CreateModelStatus
struct representing the final status of the model creation.
Requires the stream
feature.
use ollama_rs::models::create::CreateModelRequest;
use tokio_stream::StreamExt;
let mut res = ollama.create_model_stream(CreateModelRequest::path("model".into(), "/tmp/Modelfile.example".into())).await.unwrap();
while let Some(res) = res.next().await {
let res = res.unwrap();
// Handle the status
}
Returns a CreateModelStatusStream
that will stream every status update of the model creation.
let _ = ollama.copy_model("mario".into(), "mario_copy".into()).await.unwrap();
let _ = ollama.delete_model("mario_copy".into()).await.unwrap();
use ollama_rs::generation::embeddings::request::GenerateEmbeddingsRequest;
let request = GenerateEmbeddingsRequest::new("llama2:latest".to_string(), "Why is the sky blue?".into());
let res = ollama.generate_embeddings(request).await.unwrap();
use ollama_rs::generation::embeddings::request::GenerateEmbeddingsRequest;
let request = GenerateEmbeddingsRequest::new("llama2:latest".to_string(), vec!["Why is the sky blue?", "Why is the sky red?"].into());
let res = ollama.generate_embeddings(request).await.unwrap();
Returns a GenerateEmbeddingsResponse
struct containing the embeddings (a vector of floats).
use ollama_rs::coordinator::Coordinator;
use ollama_rs::generation::chat::{ChatMessage, ChatMessageRequest};
use ollama_rs::generation::tools::implementations::{DDGSearcher, Scraper, Calculator};
use ollama_rs::generation::options::GenerationOptions;
use ollama_rs::tool_group;
let tools = tool_group![DDGSearcher::new(), Scraper {}, Calculator {}];
let mut history = vec![];
let mut coordinator = Coordinator::new_with_tools(ollama, "qwen2.5:32b".to_string(), history, tools)
.options(GenerationOptions::default().num_ctx(16384));
let resp = coordinator
.chat(vec![ChatMessage::user("What is the current oil price?")])
.await.unwrap();
println!("{}", resp.message.content);
Uses the given tools (such as searching the web) to find an answer, feeds that answer back into the LLM, and returns a ChatMessageResponse
with the answer to the question.