Merge branch 'libreChat_v0.7.5' into target_v0.7.5

Update to the latest version of LibreChat
asiainspection · Nov 15, 2024 · 2ccc1ef · 2ccc1ef
2 parents 69587a6 + 600d217
commit 2ccc1ef
Show file tree

Hide file tree

Showing 415 changed files with 13,733 additions and 35,993 deletions.
diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml
@@ -1,5 +1,3 @@
-version: "3.8"
-
 services:
   app:
     build:

diff --git a/.env.example b/.env.example
@@ -82,7 +82,7 @@ PROXY=
 #============#
 
 ANTHROPIC_API_KEY=user_provided
-# ANTHROPIC_MODELS=claude-3-5-sonnet-20240620,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307,claude-2.1,claude-2,claude-1.2,claude-1,claude-1-100k,claude-instant-1,claude-instant-1-100k
+# ANTHROPIC_MODELS=claude-3-5-sonnet-20241022,claude-3-5-sonnet-latest,claude-3-5-sonnet-20240620,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307,claude-2.1,claude-2,claude-1.2,claude-1,claude-1-100k,claude-instant-1,claude-instant-1-100k
 # ANTHROPIC_REVERSE_PROXY=
 
 #============#
@@ -146,6 +146,8 @@ GOOGLE_KEY=user_provided
 
 # GOOGLE_TITLE_MODEL=gemini-pro
 
+# GOOGLE_LOC=us-central1
+
 # Google Safety Settings
 # NOTE: These settings apply to both Vertex AI and Gemini API (AI Studio)
 #
@@ -412,6 +414,7 @@ LDAP_CA_CERT_PATH=
 # LDAP_LOGIN_USES_USERNAME=true
 # LDAP_ID=
 # LDAP_USERNAME=
+# LDAP_EMAIL=
 # LDAP_FULL_NAME=
 
 #========================#

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
diff --git a/.github/workflows/helmcharts.yml b/.github/workflows/helmcharts.yml
@@ -25,11 +25,9 @@ jobs:
       - name: Install Helm
         uses: azure/setup-helm@v4
         env:
-          GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"       
+          GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
 
       - name: Run chart-releaser
         uses: helm/[email protected]
-        with:
-          charts_dir: helmchart
         env:
-          CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
+          CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-# v0.7.5-rc2
+# v0.7.5
 
 # Base node image
 FROM node:20-alpine AS node

diff --git a/Dockerfile.multi b/Dockerfile.multi
@@ -1,5 +1,5 @@
 # Dockerfile.multi
-# v0.7.5-rc2
+# v0.7.5
 
 # Base for all builds
 FROM node:20-alpine AS base

diff --git a/api/app/clients/AnthropicClient.js b/api/app/clients/AnthropicClient.js
@@ -17,8 +17,8 @@ const {
   parseParamFromPrompt,
   createContextHandlers,
 } = require('./prompts');
+const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
 const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
-const { getModelMaxTokens, matchModelName } = require('~/utils');
 const { sleep } = require('~/server/utils');
 const BaseClient = require('./BaseClient');
 const { logger } = require('~/config');
@@ -64,6 +64,12 @@ class AnthropicClient extends BaseClient {
     /** Whether or not the model supports Prompt Caching
      * @type {boolean} */
     this.supportsCacheControl;
+    /** The key for the usage object's input tokens
+     * @type {string} */
+    this.inputTokensKey = 'input_tokens';
+    /** The key for the usage object's output tokens
+     * @type {string} */
+    this.outputTokensKey = 'output_tokens';
   }
 
   setOptions(options) {
@@ -114,7 +120,14 @@ class AnthropicClient extends BaseClient {
       this.options.maxContextTokens ??
       getModelMaxTokens(this.modelOptions.model, EModelEndpoint.anthropic) ??
       100000;
-    this.maxResponseTokens = this.modelOptions.maxOutputTokens || 1500;
+    this.maxResponseTokens =
+      this.modelOptions.maxOutputTokens ??
+      getModelMaxOutputTokens(
+        this.modelOptions.model,
+        this.options.endpointType ?? this.options.endpoint,
+        this.options.endpointTokenConfig,
+      ) ??
+      1500;
     this.maxPromptTokens =
       this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
 
@@ -138,17 +151,6 @@ class AnthropicClient extends BaseClient {
     this.endToken = '';
     this.gptEncoder = this.constructor.getTokenizer('cl100k_base');
 
-    if (!this.modelOptions.stop) {
-      const stopTokens = [this.startToken];
-      if (this.endToken && this.endToken !== this.startToken) {
-        stopTokens.push(this.endToken);
-      }
-      stopTokens.push(`${this.userLabel}`);
-      stopTokens.push('<|diff_marker|>');
-
-      this.modelOptions.stop = stopTokens;
-    }
-
     return this;
   }
 
@@ -200,15 +202,15 @@ class AnthropicClient extends BaseClient {
   }
 
   /**
-   * Calculates the correct token count for the current message based on the token count map and API usage.
+   * Calculates the correct token count for the current user message based on the token count map and API usage.
    * Edge case: If the calculation results in a negative value, it returns the original estimate.
    * If revisiting a conversation with a chat history entirely composed of token estimates,
    * the cumulative token count going forward should become more accurate as the conversation progresses.
    * @param {Object} params - The parameters for the calculation.
    * @param {Record<string, number>} params.tokenCountMap - A map of message IDs to their token counts.
    * @param {string} params.currentMessageId - The ID of the current message to calculate.
    * @param {AnthropicStreamUsage} params.usage - The usage object returned by the API.
-   * @returns {number} The correct token count for the current message.
+   * @returns {number} The correct token count for the current user message.
    */
   calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) {
     const originalEstimate = tokenCountMap[currentMessageId] || 0;
@@ -680,7 +682,14 @@ class AnthropicClient extends BaseClient {
    */
   checkPromptCacheSupport(modelName) {
     const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
-    if (modelMatch === 'claude-3-5-sonnet' || modelMatch === 'claude-3-haiku') {
+    if (modelMatch.includes('claude-3-5-sonnet-latest')) {
+      return false;
+    }
+    if (
+      modelMatch === 'claude-3-5-sonnet' ||
+      modelMatch === 'claude-3-haiku' ||
+      modelMatch === 'claude-3-opus'
+    ) {
       return true;
     }
     return false;

diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js
@@ -3,7 +3,7 @@ const fetch = require('node-fetch');
 const {
   supportsBalanceCheck,
   isAgentsEndpoint,
-  paramEndpoints,
+  isParamEndpoint,
   ErrorTypes,
   Constants,
   CacheKeys,
@@ -42,6 +42,14 @@ class BaseClient {
     this.conversationId;
     /** @type {string} */
     this.responseMessageId;
+    /** @type {TAttachment[]} */
+    this.attachments;
+    /** The key for the usage object's input tokens
+     * @type {string} */
+    this.inputTokensKey = 'prompt_tokens';
+    /** The key for the usage object's output tokens
+     * @type {string} */
+    this.outputTokensKey = 'completion_tokens';
   }
 
   setOptions() {
@@ -582,7 +590,10 @@ class BaseClient {
 
     if (typeof completion === 'string') {
       responseMessage.text = addSpaceIfNeeded(generation) + completion;
-    } else if (Array.isArray(completion) && paramEndpoints.has(this.options.endpoint)) {
+    } else if (
+      Array.isArray(completion) &&
+      isParamEndpoint(this.options.endpoint, this.options.endpointType)
+    ) {
       responseMessage.text = '';
       responseMessage.content = completion;
     } else if (Array.isArray(completion)) {
@@ -604,8 +615,8 @@ class BaseClient {
        * @type {StreamUsage | null} */
       const usage = this.getStreamUsage != null ? this.getStreamUsage() : null;
 
-      if (usage != null && Number(usage.output_tokens) > 0) {
-        responseMessage.tokenCount = usage.output_tokens;
+      if (usage != null && Number(usage[this.outputTokensKey]) > 0) {
+        responseMessage.tokenCount = usage[this.outputTokensKey];
         completionTokens = responseMessage.tokenCount;
         await this.updateUserMessageTokenCount({ usage, tokenCountMap, userMessage, opts });
       } else {
@@ -620,6 +631,10 @@ class BaseClient {
       await this.userMessagePromise;
     }
 
+    if (this.artifactPromises) {
+      responseMessage.attachments = (await Promise.all(this.artifactPromises)).filter((a) => a);
+    }
+
     this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
     const messageCache = getLogStores(CacheKeys.MESSAGES);
     messageCache.set(
@@ -655,7 +670,7 @@ class BaseClient {
     /** @type {boolean} */
     const shouldUpdateCount =
       this.calculateCurrentTokenCount != null &&
-      Number(usage.input_tokens) > 0 &&
+      Number(usage[this.inputTokensKey]) > 0 &&
       (this.options.resendFiles ||
         (!this.options.resendFiles && !this.options.attachments?.length)) &&
       !this.options.promptPrefix;

diff --git a/api/app/clients/ChatGPTClient.js b/api/app/clients/ChatGPTClient.js
@@ -1,19 +1,21 @@
 const Keyv = require('keyv');
 const crypto = require('crypto');
+const { CohereClient } = require('cohere-ai');
+const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source');
+const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
 const {
+  ImageDetail,
   EModelEndpoint,
   resolveHeaders,
   CohereConstants,
   mapModelToAzureConfig,
 } = require('librechat-data-provider');
-const { CohereClient } = require('cohere-ai');
-const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
-const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source');
+const { extractBaseURL, constructAzureURL, genAzureChatCompletion } = require('~/utils');
+const { createContextHandlers } = require('./prompts');
 const { createCoherePayload } = require('./llm');
 const { Agent, ProxyAgent } = require('undici');
 const BaseClient = require('./BaseClient');
 const { logger } = require('~/config');
-const { extractBaseURL, constructAzureURL, genAzureChatCompletion } = require('~/utils');
 
 const CHATGPT_MODEL = 'gpt-3.5-turbo';
 const tokenizersCache = {};
@@ -612,21 +614,66 @@ ${botMessage.message}
 
   async buildPrompt(messages, { isChatGptModel = false, promptPrefix = null }) {
     promptPrefix = (promptPrefix || this.options.promptPrefix || '').trim();
+
+    // Handle attachments and create augmentedPrompt
+    if (this.options.attachments) {
+      const attachments = await this.options.attachments;
+      const lastMessage = messages[messages.length - 1];
+
+      if (this.message_file_map) {
+        this.message_file_map[lastMessage.messageId] = attachments;
+      } else {
+        this.message_file_map = {
+          [lastMessage.messageId]: attachments,
+        };
+      }
+
+      const files = await this.addImageURLs(lastMessage, attachments);
+      this.options.attachments = files;
+
+      this.contextHandlers = createContextHandlers(this.options.req, lastMessage.text);
+    }
+
+    if (this.message_file_map) {
+      this.contextHandlers = createContextHandlers(
+        this.options.req,
+        messages[messages.length - 1].text,
+      );
+    }
+
+    // Calculate image token cost and process embedded files
+    messages.forEach((message, i) => {
+      if (this.message_file_map && this.message_file_map[message.messageId]) {
+        const attachments = this.message_file_map[message.messageId];
+        for (const file of attachments) {
+          if (file.embedded) {
+            this.contextHandlers?.processFile(file);
+            continue;
+          }
+
+          messages[i].tokenCount =
+            (messages[i].tokenCount || 0) +
+            this.calculateImageTokenCost({
+              width: file.width,
+              height: file.height,
+              detail: this.options.imageDetail ?? ImageDetail.auto,
+            });
+        }
+      }
+    });
+
+    if (this.contextHandlers) {
+      this.augmentedPrompt = await this.contextHandlers.createContext();
+      promptPrefix = this.augmentedPrompt + promptPrefix;
+    }
+
     if (promptPrefix) {
       // If the prompt prefix doesn't end with the end token, add it.
       if (!promptPrefix.endsWith(`${this.endToken}`)) {
         promptPrefix = `${promptPrefix.trim()}${this.endToken}\n\n`;
       }
       promptPrefix = `${this.startToken}Instructions:\n${promptPrefix}`;
-    } else {
-      const currentDateString = new Date().toLocaleDateString('en-us', {
-        year: 'numeric',
-        month: 'long',
-        day: 'numeric',
-      });
-      promptPrefix = `${this.startToken}Instructions:\nYou are ChatGPT, a large language model trained by OpenAI. Respond conversationally.\nCurrent date: ${currentDateString}${this.endToken}\n\n`;
     }
-
     const promptSuffix = `${this.startToken}${this.chatGptLabel}:\n`; // Prompt ChatGPT to respond.
 
     const instructionsPayload = {
@@ -714,10 +761,6 @@ ${botMessage.message}
       this.maxResponseTokens,
     );
 
-    if (this.options.debug) {
-      console.debug(`Prompt : ${prompt}`);
-    }
-
     if (isChatGptModel) {
       return { prompt: [instructionsPayload, messagePayload], context };
     }

diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js
@@ -28,7 +28,7 @@ const {
 } = require('./prompts');
 const BaseClient = require('./BaseClient');
 
-const loc = 'us-central1';
+const loc = process.env.GOOGLE_LOC || 'us-central1';
 const publisher = 'google';
 const endpointPrefix = `https://${loc}-aiplatform.googleapis.com`;
 // const apiEndpoint = loc + '-aiplatform.googleapis.com';
@@ -593,6 +593,8 @@ class GoogleClient extends BaseClient {
 
   createLLM(clientOptions) {
     const model = clientOptions.modelName ?? clientOptions.model;
+    clientOptions.location = loc;
+    clientOptions.endpoint = `${loc}-aiplatform.googleapis.com`;
     if (this.project_id && this.isTextModel) {
       logger.debug('Creating Google VertexAI client');
       return new GoogleVertexAI(clientOptions);