-
-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(vector-store): add MongoDB support for vector store service
* refactor(long-memory): replace jieba with jieba-wasm for improved text segmentation - Updated the dependency from `@node-rs/jieba` to `jieba-wasm` in package.json. - Refactored the text segmentation logic in `similarity.ts` to utilize the new `cut` function from `jieba-wasm`, enhancing compatibility and performance. * refactor(long-memory): enhance BM25 similarity calculation in similarity.ts - Improved the BM25 similarity calculation by introducing term frequency maps for both documents. - Added a smoothing factor and adjusted the scoring formula to normalize against the theoretical maximum score. - Enhanced code readability and maintainability by restructuring the logic for term frequency and IDF calculations. * style(long-memory): prettier * feat(vector-store): add MongoDB configuration options to the vector store service - Introduced new MongoDB configuration parameters: mongodbUrl, mongodbDbName, and mongodbCollectionName. - Updated the configuration schema to include MongoDB as a supported vector store option. - Added documentation link for MongoDB configuration in the usage section. * feat(vector-store): add MongoDB database settings to localization files --------- Co-authored-by: dingyi <[email protected]>
- Loading branch information
1 parent
0fa8d90
commit 3fc51ca
Showing
4 changed files
with
142 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
119 changes: 119 additions & 0 deletions
119
packages/vector-store-service/src/vectorstore/mongodb.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
import { MongoDBAtlasVectorSearch } from '@langchain/mongodb' | ||
import { Context, Logger } from 'koishi' | ||
import { ChatLunaPlugin } from 'koishi-plugin-chatluna/services/chat' | ||
import { createLogger } from 'koishi-plugin-chatluna/utils/logger' | ||
import { Config } from '..' | ||
import { ChatLunaSaveableVectorStore } from 'koishi-plugin-chatluna/llm-core/model/base' | ||
import { MongoClient, ObjectId } from 'mongodb' | ||
|
||
let logger: Logger | ||
|
||
export async function apply( | ||
ctx: Context, | ||
config: Config, | ||
plugin: ChatLunaPlugin | ||
) { | ||
logger = createLogger(ctx, 'chatluna-vector-store-service') | ||
|
||
if (!config.vectorStore.includes('mongodb')) { | ||
return | ||
} | ||
|
||
await importMongoDB() | ||
|
||
plugin.registerVectorStore('mongodb', async (params) => { | ||
const embeddings = params.embeddings | ||
|
||
const client = new MongoClient(config.mongodbUrl) | ||
await client.connect() | ||
|
||
ctx.on('dispose', async () => { | ||
await client.close() | ||
logger.info('MongoDB connection closed') | ||
}) | ||
|
||
const collection = client | ||
.db(config.mongodbDbName) | ||
.collection(config.mongodbCollectionName) | ||
|
||
const vectorStore = new MongoDBAtlasVectorSearch(embeddings, { | ||
collection, | ||
indexName: params.key ?? 'vector_index', | ||
textKey: 'text', | ||
embeddingKey: 'embedding' | ||
}) | ||
|
||
const wrapperStore = | ||
new ChatLunaSaveableVectorStore<MongoDBAtlasVectorSearch>( | ||
vectorStore, | ||
{ | ||
async deletableFunction(_store, options) { | ||
if (options.deleteAll) { | ||
await collection.deleteMany({}) | ||
return | ||
} | ||
|
||
const ids: string[] = [] | ||
if (options.ids) { | ||
ids.push(...options.ids) | ||
} | ||
|
||
if (options.documents) { | ||
const documentIds = options.documents | ||
?.map( | ||
(document) => | ||
document.metadata?.raw_id as | ||
| string | ||
| undefined | ||
) | ||
.filter((id): id is string => id != null) | ||
|
||
ids.push(...documentIds) | ||
} | ||
|
||
if (ids.length > 0) { | ||
await collection.deleteMany({ | ||
_id: { $in: ids.map((id) => new ObjectId(id)) } | ||
}) | ||
} | ||
}, | ||
async addDocumentsFunction( | ||
store, | ||
documents, | ||
options: { ids?: string[] } | ||
) { | ||
let keys = options?.ids ?? [] | ||
|
||
keys = documents.map((document, i) => { | ||
const id = keys[i] ?? crypto.randomUUID() | ||
document.metadata = { | ||
...document.metadata, | ||
raw_id: id | ||
} | ||
return id | ||
}) | ||
|
||
await store.addDocuments(documents) | ||
}, | ||
async saveableFunction(_store) { | ||
await client.close() | ||
logger.info('MongoDB connection closed during save') | ||
} | ||
} | ||
) | ||
|
||
return wrapperStore | ||
}) | ||
} | ||
|
||
async function importMongoDB() { | ||
try { | ||
const { MongoClient } = await import('mongodb') | ||
return { MongoClient } | ||
} catch (err) { | ||
logger.error(err) | ||
throw new Error( | ||
'Please install mongodb as a dependency with, e.g. `npm install -S mongodb`' | ||
) | ||
} | ||
} |