diff --git a/README.md b/README.md index 5ade73d..6ea4d8b 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -# vec2pg - +# `vec2pg`

@@ -8,6 +7,8 @@ Pre-commit Status + Python version + PostgreSQL version

License @@ -17,23 +18,25 @@ Download count

-

- Python version - PostgreSQL version -

--- +**Documentation**: https://supabase-community.github.io/vec2pg + **Source Code**: https://github.com/supabase-community/vec2pg --- -A CLI for migrating data from vector databases to [Supabase](https://supabase.com). +`vec2pg` is a CLI tool for migrating data from third-party vector databases to [Supabase](https://supabase.com) with Pgvector. + Supported data sources include: + - [Pinecone](https://docs.pinecone.io/home) -- (more soon) +- [Qdrant](https://qdrant.tech/) +- [Vote for others](https://github.com/supabase-community/vec2pg/issues/6) +## Usage ``` vec2pg --help @@ -42,175 +45,13 @@ vec2pg --help ``` Usage: vec2pg [OPTIONS] COMMAND [ARGS]... -╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ --install-completion Install completion for the current shell. │ -│ --show-completion Show completion for the current shell, to copy it or customize the installation. │ -│ --help Show this message and exit. │ -╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Commands ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ pinecone │ -╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -``` - -## Installation - -Requirements: - -- Python >= 3.8 - -```sh -pip install vec2pg -``` - - -## Migration Guide - -### Pinecone - -``` -vec2pg pinecone migrate --help -``` - -``` - Usage: vec2pg pinecone migrate [OPTIONS] PINECONE_INDEX PINECONE_API_KEY - POSTGRES_CONNECTION_STRING - -╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ * pinecone_index TEXT [default: None] [required] │ -│ * pinecone_api_key TEXT [env var: PINECONE_API_KEY] [default: None] [required] │ -│ * postgres_connection_string TEXT [env var: POSTGRES_CONNECTION_STRING] [default: None] [required] │ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ --help Show this message and exit. │ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ──────────────────────────────────────────────────────────────╮ +│ --install-completion Install completion for the current shell.│ +│ --show-completion Show completion for the current shell │ +│ --help Show this message and exit. │ +╰────────────────────────────────────────────────────────────────────────╯ +╭─ Commands ─────────────────────────────────────────────────────────────╮ +│ pinecone Move data from Pinecone to Supabase │ +│ qdrant Move data from Qdrant to Supabase │ +╰────────────────────────────────────────────────────────────────────────╯ ``` - - - -To migrate from [Pinecone serverless](https://www.pinecone.io/blog/serverless/) index to Postgres you'll need: - -- A Pinecone API Key - -![pinecone api key](/assets/pinecone_api_key.png) - -- The Pinecone serverless index name - -![pinecone serverless index name](/assets/pinecone_index_name.png) - -- A Supabase instance - -From the Supabase instance, we need the connection parameters. Retrieve them on the [database settings page](https://supabase.com/dashboard/project/_/settings/database)(https://supabase.com/dashboard/project/_/settings/database) - -![supabase connection parameters](/assets/supabase_connection_params.png) - -And substitute those values into a valid Postgres connection string -``` -postgresql://:@:/postgres -``` -e.g. -``` -postgresql://postgres.ahqsutirwnsocaaorimo:@aws-0-us-east-1.pooler.supabase.com:6543/postgres -``` - -Then we can call `vec2pg pinecone migrate` passing our values. You can supply all parameters directly to the CLI, but its a good idea to pass the Pinecone API Key (PINECONE_API_KEY) and Supabase connection string (POSTGRES_CONNECTION_STRING) as environment variables to avoid logging credentials to your shell's history. - -![sample output](/assets/pinecone_to_supabase_output.png) - -The CLI provies a progress bar to monitor the migration. - -On completion, you can view a copy of the Pinecone index data in Supabase Postgres at `vec2pg.` - -![view results](/assets/pinecone_view_results.png) - -From there you can transform and manipulate the data in Postgres using SQL. - -### Qdrant - -``` -vec2pg qdrant migrate --help -``` - -``` - Usage: vec2pg qdrant migrate [OPTIONS] QDRANT_COLLECTION_NAME QDRANT_URL - QDRANT_API_KEY POSTGRES_CONNECTION_STRING - -╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ * qdrant_collection_name TEXT [default: None] [required] │ -│ * qdrant_url TEXT [default: None] [required] │ -│ * qdrant_api_key TEXT [env var: QDRANT_API_KEY] [default: None] [required] │ -│ * postgres_connection_string TEXT [env var: POSTGRES_CONNECTION_STRING] [default: None] [required] │ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ --help Show this message and exit. │ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -``` - -To migrate from Qdrant collection to Postgres you'll need to log in at https://cloud.qdrant.io/ and collect your: - -- Qdrant API Key - -![Qdrant api key](/assets/qdrant_api_key.png) - -- Qdrant URL and collection name - -![Qdrant cluster url](/assets/qdrant_nav_dashboard.png) - -The URL is the "Cluster URL". To find the collection name, select "Open Dashboard". - -![Qdrant collection name](/assets/qdrant_collection_name.png) - -- A Supabase instance - -From the Supabase instance, we need the connection parameters. Retrieve them on the [database settings page](https://supabase.com/dashboard/project/_/settings/database)(https://supabase.com/dashboard/project/_/settings/database) - - -![supabase connection parameters](/assets/supabase_connection_params.png) - -And substitute those values into a valid Postgres connection string -``` -postgresql://:@:/postgres -``` -e.g. -``` -postgresql://postgres.ahqsutirwnsocaaorimo:@aws-0-us-east-1.pooler.supabase.com:6543/postgres -``` - -Then we can call `vec2pg qdrant migrate` passing our values. You can supply all parameters directly to the CLI, but its a good idea to pass the Qdrant API Key (QDRANT_API_KEY) and Supabase connection string (POSTGRES_CONNECTION_STRING) as environment variables to avoid logging credentials to your shell's history. - -![sample output](/assets/qdrant_to_supabase_output.png) - -The CLI provides a progress bar to monitor the migration. - -On completion, you can view a copy of the Pinecone index data in Supabase Postgres at `vec2pg.` - -![view results](/assets/qdrant_view_results.png) - -From there you can transform and manipulate the data in Postgres using SQL. - - - -# Requisites -- Python >= 3.8 - -# Contributing - -To run the tests you will need -- Python >= 3.8 -- docker -- [Pinecone API key](https://docs.pinecone.io/guides/get-started/authentication#find-your-pinecone-api-key) - -The Pinecone API key should be stored as an environment variable `PINECONE_API_KEY` - -Run the tests -``` -poetry run pytest -``` - -Run the pre-commit hooks -``` -poetry run pre-commit run --all -``` - -# Star History - -![](https://starchart.cc/supabase-community/vec2pg.svg) diff --git a/docs/assets/favicon.ico b/docs/assets/favicon.ico new file mode 100644 index 0000000..d3fca2a Binary files /dev/null and b/docs/assets/favicon.ico differ diff --git a/assets/pinecone_api_key.png b/docs/assets/pinecone_api_key.png similarity index 100% rename from assets/pinecone_api_key.png rename to docs/assets/pinecone_api_key.png diff --git a/assets/pinecone_index_name.png b/docs/assets/pinecone_index_name.png similarity index 100% rename from assets/pinecone_index_name.png rename to docs/assets/pinecone_index_name.png diff --git a/assets/pinecone_to_supabase_output.png b/docs/assets/pinecone_to_supabase_output.png similarity index 100% rename from assets/pinecone_to_supabase_output.png rename to docs/assets/pinecone_to_supabase_output.png diff --git a/assets/pinecone_view_results.png b/docs/assets/pinecone_view_results.png similarity index 100% rename from assets/pinecone_view_results.png rename to docs/assets/pinecone_view_results.png diff --git a/assets/qdrant_api_key.png b/docs/assets/qdrant_api_key.png similarity index 100% rename from assets/qdrant_api_key.png rename to docs/assets/qdrant_api_key.png diff --git a/assets/qdrant_collection_name.png b/docs/assets/qdrant_collection_name.png similarity index 100% rename from assets/qdrant_collection_name.png rename to docs/assets/qdrant_collection_name.png diff --git a/assets/qdrant_nav_dashboard.png b/docs/assets/qdrant_nav_dashboard.png similarity index 100% rename from assets/qdrant_nav_dashboard.png rename to docs/assets/qdrant_nav_dashboard.png diff --git a/assets/qdrant_to_supabase_output.png b/docs/assets/qdrant_to_supabase_output.png similarity index 100% rename from assets/qdrant_to_supabase_output.png rename to docs/assets/qdrant_to_supabase_output.png diff --git a/assets/qdrant_view_results.png b/docs/assets/qdrant_view_results.png similarity index 100% rename from assets/qdrant_view_results.png rename to docs/assets/qdrant_view_results.png diff --git a/assets/supabase_connection_params.png b/docs/assets/supabase_connection_params.png similarity index 100% rename from assets/supabase_connection_params.png rename to docs/assets/supabase_connection_params.png diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000..ddd0e57 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,23 @@ +# Contributing + +`vec2pg` is open source software. External contributions are welcome. Note that we have a high bar for testing. + +Before opening a PR, please [create an issue](https://github.com/supabase-community/vec2pg/issues/new/choose) in GitHub to discuss and approve the change you're interested in making. + +To run the tests you will need: + +- Python >= 3.8 +- Docker +- [Pinecone API key](https://docs.pinecone.io/guides/get-started/authentication#find-your-pinecone-api-key) - pinecone does not support a local mode, so we have to hit their service during testing + +The Pinecone API key should be stored as an environment variable `PINECONE_API_KEY` + +Run the tests +``` +poetry run pytest +``` + +Run the pre-commit hooks +``` +poetry run pre-commit run --all +``` diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..bcce177 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,62 @@ +# `vec2pg` + +

+ + Test Status + + + Pre-commit Status + + Python version + PostgreSQL version +

+

+ License + PyPI version + + Codestyle Black + + Download count +

+ +--- + +**Documentation**: https://supabase-community.github.io/vec2pg + +**Source Code**: https://github.com/supabase-community/vec2pg + +--- + +`vec2pg` is a CLI tool for migrating data from third-party vector databases to [Supabase](https://supabase.com). + + +Supported data sources include: + +- [Pinecone](pinecone.md) +- [Qdrant](qdrant.md) +- [[Vote for others]](https://github.com/supabase-community/vec2pg/issues/6) + +The general flow involves passing an API key for your vector database, a Postgres connection string, and a reference to the collection you want to copy. `vec2pg` then presents a progress bar in the terminal that you can use to monitor progress. Once complete, the vectors and any associated metadata are available in your Postgres instance at `vec2pg.`. + + +### Usage + +``` +vec2pg --help +``` + +``` + Usage: vec2pg [OPTIONS] COMMAND [ARGS]... + +╭─ Options ──────────────────────────────────────────────────────────────╮ +│ --install-completion Install completion for the current shell.│ +│ --show-completion Show completion for the current shell │ +│ --help Show this message and exit. │ +╰────────────────────────────────────────────────────────────────────────╯ +╭─ Commands ─────────────────────────────────────────────────────────────╮ +│ pinecone Move data from Pinecone to Supabase │ +│ qdrant Move data from Qdrant to Supabase │ +╰────────────────────────────────────────────────────────────────────────╯ +``` + + diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 0000000..00b8405 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,21 @@ +# Installation + +`vec2pg` is a simple package [available on PYPI](https://pypi.org/project/vec2pg/) + +Requirements: + +- Python >= 3.8 + +### From PYPI + +Use your preferred package manager to add the package to your local enviroment. + +Using pip +```sh +pip install vec2pg +``` + +Using poetry +```sh +poetry add vec2pg +``` diff --git a/docs/pinecone.md b/docs/pinecone.md new file mode 100644 index 0000000..907fd40 --- /dev/null +++ b/docs/pinecone.md @@ -0,0 +1,60 @@ +Pinecone is a closed source vector database with "classic" and "serverless" collection types. Classic collections do not support iterating over all records in a collection. For that reason, `vec2pg` exclusively supports migrating from their more recent "serverless" collections. + +``` +vec2pg pinecone migrate --help +``` + +``` + Usage: vec2pg pinecone migrate [OPTIONS] PINECONE_INDEX PINECONE_API_KEY + POSTGRES_CONNECTION_STRING + +╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * pinecone_index TEXT [default: None] [required] │ +│ * pinecone_api_key TEXT [env var: PINECONE_API_KEY] [default: None] [required] │ +│ * postgres_connection_string TEXT [env var: POSTGRES_CONNECTION_STRING] [default: None] [required] │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +``` + + + +To migrate from [Pinecone serverless](https://www.pinecone.io/blog/serverless/) index to Postgres you'll need: + +- A Pinecone API Key + +![pinecone api key](./assets/pinecone_api_key.png) + +- The Pinecone serverless index name + +![pinecone serverless index name](./assets/pinecone_index_name.png) + +- A Supabase instance + +From the Supabase instance, we need the connection parameters. Retrieve them on the [database settings page](https://supabase.com/dashboard/project/_/settings/database) + +![supabase connection parameters](./assets/supabase_connection_params.png) + +And substitute those values into a valid Postgres connection string +``` +postgresql://:@:/postgres +``` +e.g. +``` +postgresql://postgres.ahqsutirwnsocaaorimo:@aws-0-us-east-1.pooler.supabase.com:6543/postgres +``` + +Then we can call `vec2pg pinecone migrate` passing our values. You can supply all parameters directly to the CLI, but its a good idea to pass the Pinecone API Key `PINECONE_API_KEY` and Supabase connection string `POSTGRES_CONNECTION_STRING` as environment variables to avoid logging credentials to your shell's history. + +![sample output](./assets/pinecone_to_supabase_output.png) + +The CLI provides a progress bar to monitor the migration. + +On completion, you can view a copy of the Pinecone index data in Supabase Postgres at `vec2pg.` + +![view results](./assets/pinecone_view_results.png) + +From there you can transform and manipulate the data in Postgres using SQL. + + diff --git a/docs/qdrant.md b/docs/qdrant.md new file mode 100644 index 0000000..38c5b18 --- /dev/null +++ b/docs/qdrant.md @@ -0,0 +1,64 @@ +Qdrant is an open source vector database written in python. It can be run in memory, locally via Docker, or as a hosted offering. `vec2pg` supports migrating Qdrant collections from Docker or the hosted platform. + +``` +vec2pg qdrant migrate --help +``` + +``` + Usage: vec2pg qdrant migrate [OPTIONS] QDRANT_COLLECTION_NAME QDRANT_URL + QDRANT_API_KEY POSTGRES_CONNECTION_STRING + +╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * qdrant_collection_name TEXT [default: None] [required] │ +│ * qdrant_url TEXT [default: None] [required] │ +│ * qdrant_api_key TEXT [env var: QDRANT_API_KEY] [default: None] [required] │ +│ * postgres_connection_string TEXT [env var: POSTGRES_CONNECTION_STRING] [default: None] [required] │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +``` + +To migrate from a Qdrant collection to Postgres you'll need to log in at https://cloud.qdrant.io/ and collect your: + +- Qdrant API Key + +![Qdrant api key](./assets/qdrant_api_key.png) + +- Qdrant URL and collection name + +![Qdrant cluster url](./assets/qdrant_nav_dashboard.png) + +The URL is the "Cluster URL". To find the collection name, select "Open Dashboard". + +![Qdrant collection name](./assets/qdrant_collection_name.png) + +- A Supabase instance + +From the Supabase instance, we need the connection parameters. Retrieve them on the [database settings page](https://supabase.com/dashboard/project/_/settings/database). + + +![supabase connection parameters](./assets/supabase_connection_params.png) + +And substitute those values into a valid Postgres connection string +``` +postgresql://:@:/postgres +``` +e.g. +``` +postgresql://postgres.ahqsutirwnsocaaorimo:@aws-0-us-east-1.pooler.supabase.com:6543/postgres +``` + +Then we can call `vec2pg qdrant migrate` passing our values. You can supply all parameters directly to the CLI, but its a good idea to pass the Qdrant API Key `QDRANT_API_KEY` and Supabase connection string `POSTGRES_CONNECTION_STRING` as environment variables to avoid logging credentials to your shell's history. + +![sample output](./assets/qdrant_to_supabase_output.png) + +The CLI provides a progress bar to monitor the migration. + +On completion, you can view a copy of the Pinecone index data in Supabase Postgres at `vec2pg.` + +![view results](./assets/qdrant_view_results.png) + +From there you can transform and manipulate the data in Postgres using SQL. + + diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 0000000..37ca797 --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,19 @@ +@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;1,100;1,200;1,300;1,400;1,500;1,600;1,700&display=swap'); + +:root { + --md-text-font: "IBM Plex Sans", "Roboto", sans-serif; + /* color: red; */ +} + +[data-md-color-scheme="slate"] { + --md-default-bg-color:#121212; + --md-default-fg-color--light: white; + --md-code-bg-color: #2a2929; + --md-code-hl-keyword-color: #569cd6; +} + +.md-header, .md-tabs { + background-color: var(--md-default-bg-color); + color: var(--md-default-fg-color--light); + font-family: var(--md-text-font); +} diff --git a/mkdocs.yaml b/mkdocs.yaml new file mode 100644 index 0000000..5ce8975 --- /dev/null +++ b/mkdocs.yaml @@ -0,0 +1,42 @@ +site_name: vec2pg +site_url: https://supabase-community.github.io/vec2pg +site_description: A CLI for migrating vector data to Supabase + +repo_name: supabase-community/vec2pg +repo_url: https://github.com/supabase-community/vec2pg + +nav: + - Welcome: 'index.md' + - Installation: 'installation.md' + - Vendors: + - Pinecone: 'pinecone.md' + - Qdrant: 'qdrant.md' + - Contributing: 'contributing.md' + +theme: + name: 'material' + favicon: 'assets/favicon.ico' + logo: 'assets/favicon.ico' + homepage: https://supabase-community.github.io/vec2pg + features: + - navigation.expand + palette: + scheme: slate + primary: green + accent: green + +extra_css: + - stylesheets/extra.css + +markdown_extensions: + - pymdownx.highlight: + linenums: true + guess_lang: false + use_pygments: true + pygments_style: default + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true + - pymdownx.snippets + - pymdownx.tasklist + - admonition diff --git a/pyproject.toml b/pyproject.toml index da1f4f1..171a460 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ parse = "^1.20.2" pgvector = "^0.2.5" numpy = "^2.0.0" qdrant-client = "^1.10.1" +typer = "^0.12.3" [tool.poetry.dev-dependencies] pre-commit = "^3.5.0" @@ -29,6 +30,10 @@ python-dotenv = "^1.0.1" [tool.poetry.scripts] vec2pg = "vec2pg.cli:app" +[tool.poetry.group.dev.dependencies] +mkdocs = "^1.6.0" +mkdocs-material = "^9.5.31" + [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 92688fa..0000000 --- a/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -typer-slim -pinecone-client -pre-commit -black -flake8 -isort -commitizen -python-semantic-release -psycopg