diff --git a/quote-semantic-search/Dockerfile-quote-search-preview b/.internal/Dockerfile-quote-search-preview similarity index 75% rename from quote-semantic-search/Dockerfile-quote-search-preview rename to .internal/Dockerfile-quote-search-preview index af296b4..4dd3d06 100644 --- a/quote-semantic-search/Dockerfile-quote-search-preview +++ b/.internal/Dockerfile-quote-search-preview @@ -6,8 +6,8 @@ RUN apt-get update \ && pip3 install setuptools sql RUN mkdir /quote-search -COPY ./quote-search /quote-search -COPY --parents ./container-volumes/quote-search/data/quotes.csv.tgz . +COPY ./quote-semantic-search/quote-search /quote-search +COPY --parents ./quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz . WORKDIR /quote-search RUN python3 -m pip install -r requirements.txt --extra-index-url https://aerospike.jfrog.io/artifactory/api/pypi/aerospike-pypi-dev/simple diff --git a/prism-image-search/docker-compose-dev.yml b/.internal/docker-compose-prism-dev.yml similarity index 67% rename from prism-image-search/docker-compose-dev.yml rename to .internal/docker-compose-prism-dev.yml index 046fb8c..6db5dbe 100644 --- a/prism-image-search/docker-compose-dev.yml +++ b/.internal/docker-compose-prism-dev.yml @@ -6,18 +6,18 @@ services: networks: - avs-demo volumes: - - ./container-volumes/aerospike/etc/aerospike:/opt/aerospike/etc/aerospike + - ./prism-image-search/container-volumes/aerospike/etc/aerospike:/opt/aerospike/etc/aerospike command: - "--config-file" - "/opt/aerospike/etc/aerospike/aerospike.conf" avs: - image: aerospike/aerospike-proximus:0.4.0 + image: aerospike/aerospike-vector-search:0.9.0 # ports: # - "5000:5000" networks: - avs-demo volumes: - - ./container-volumes/avs/etc/aerospike-vector-search:/etc/aerospike-proximus + - ./prism-image-search/container-volumes/avs/etc/aerospike-vector-search:/etc/aerospike-proximus app: build: context: . @@ -37,7 +37,7 @@ services: APP_NUM_QUOTES: "5000" GRPC_DNS_RESOLVER: native volumes: - - ./container-volumes/prism/images:/prism/static/images/data + - ./prism-image-search/container-volumes/prism/images:/prism/static/images/data networks: avs-demo: {} diff --git a/quote-semantic-search/docker-compose-dev.yml b/.internal/docker-compose-quote-search-dev.yml similarity index 67% rename from quote-semantic-search/docker-compose-dev.yml rename to .internal/docker-compose-quote-search-dev.yml index 5cc6f6f..c77d741 100644 --- a/quote-semantic-search/docker-compose-dev.yml +++ b/.internal/docker-compose-quote-search-dev.yml @@ -6,18 +6,18 @@ services: networks: - avs-demo volumes: - - ./container-volumes/aerospike/etc/aerospike:/opt/aerospike/etc/aerospike + - ./quote-semantic-search/container-volumes/aerospike/etc/aerospike:/opt/aerospike/etc/aerospike command: - "--config-file" - "/opt/aerospike/etc/aerospike/aerospike.conf" avs: - image: aerospike/aerospike-proximus:0.4.0 + image: aerospike/aerospike-vector-search:0.9.0 # ports: # - "5002:5002" networks: - avs-demo volumes: - - ./container-volumes/avs/etc/aerospike-vector-search:/etc/aerospike-proximus + - ./quote-semantic-search/container-volumes/avs/etc/aerospike-vector-search:/etc/aerospike-proximus app: build: context: . @@ -32,8 +32,7 @@ services: networks: - avs-demo volumes: - - ./container-volumes/quote-search/data:/container-volumes/quote-search/data - # ./usr/local/lib/python3.10/dist-packages/sentence_transformers + - ./quote-semantic-search/container-volumes/quote-search/data:/container-volumes/quote-search/data environment: AVS_HOST: avs AVS_PORT: "5000" diff --git a/README.md b/README.md index aee575e..bf6ed7e 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,9 @@ This is a companion repo for scripts and examples that are helpful to AVS users. ## Installation Examples This repo contains scripts and configuration details for installing AVS. -For more information about AVS, see our [install documentation](https://aerospike.com/docs/vector/operate/install). This repo contains the following: +For more information about AVS, see our [install documentation](https://aerospike.com/docs/vector/install). This repo contains the following: -* [Kubernetes install script](./kubernetes) - A bash script and configuration details for [Installing on Kubernetes](https://aerospike.com/docs/vector/operate/install/kubernetes). +* [Kubernetes install script](./kubernetes) - A bash script and configuration details for [Installing on Kubernetes](https://aerospike.com/docs/vector/install/kubernetes). * [Docker-compose files](./docker) - The `./docker` folder contains a docker-compose file for deploying Aerospike and AVS as containers. Additionally, each example app has a docker-compose file that deploys Aerospike, AVS, and the application itself. diff --git a/basic-search/requirements.txt b/basic-search/requirements.txt index 531469f..cd904e3 100644 --- a/basic-search/requirements.txt +++ b/basic-search/requirements.txt @@ -1 +1 @@ -aerospike-vector-search==0.6.1 \ No newline at end of file +aerospike-vector-search==1.0.0 \ No newline at end of file diff --git a/docker/README.md b/docker/README.md index 7e23ac5..1b4d1c9 100644 --- a/docker/README.md +++ b/docker/README.md @@ -5,7 +5,7 @@ Locate valid `features.conf` in the `./config` directory: > [!IMPORTANT] > If you are running MacOS you will need to replace all occurrences of port 5000 with -> port 5002 in your docker compose file and aerospike-proximus.yml file. +> port 5002 in your docker compose file and aerospike-vector-search.yml file. ## Installation Aerospike and AVS Clusters (docker-compose) ```shell @@ -32,8 +32,8 @@ docker run -d \ --network svc \ -p 5000:5000 \ -p 5040:5040 \ --v ./config:/etc/aerospike-proximus \ -aerospike/aerospike-proximus:0.4.0 +-v ./config:/etc/aerospike-vector-search \ +aerospike/aerospike-vector-search:0.9.0 ``` diff --git a/docker/config/aerospike-proximus.yml b/docker/config/aerospike-vector-search.yml similarity index 89% rename from docker/config/aerospike-proximus.yml rename to docker/config/aerospike-vector-search.yml index bfb15e2..388ad87 100644 --- a/docker/config/aerospike-proximus.yml +++ b/docker/config/aerospike-vector-search.yml @@ -4,9 +4,9 @@ cluster: # node-id: a1 # Unique identifier for this cluster. - cluster-name: aerospike-proximus + cluster-name: aerospike-vector-search -# The Proximus service listening ports, TLS and network interface. +# The AVS service listening ports, TLS and network interface. service: ports: 5000: {} @@ -63,7 +63,7 @@ aerospike: # The logging properties. logging: #format: json - #file: /var/log/aerospike-proximus/aerospike-proximus.log + #file: /var/log/aerospike-vector-search/aerospike-vector-search.log enable-console-logging: true levels: metrics-ticker: off diff --git a/docker/config/aerospike.conf b/docker/config/aerospike.conf index e25482d..661282e 100644 --- a/docker/config/aerospike.conf +++ b/docker/config/aerospike.conf @@ -55,12 +55,12 @@ namespace test { } } -namespace proximus-meta { +namespace avs-meta { replication-factor 1 nsup-period 100 storage-engine device { - file /opt/aerospike/data/proximus-meta.dat + file /opt/aerospike/data/avs-meta.dat filesize 5G } } diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 7671522..267b919 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -19,9 +19,9 @@ services: retries: 3 networks: - svc - aerospike-proximus: - image: aerospike/aerospike-proximus:0.4.0 - container_name: "aerospike-proximus" + aerospike-vector-search: + image: aerospike/aerospike-vector-search:0.9.0 + container_name: "aerospike-vector-search" depends_on: aerospike-cluster: condition: service_healthy @@ -31,9 +31,9 @@ services: networks: - svc volumes: - - ${PWD}/config:/etc/aerospike-proximus + - ${PWD}/config:/etc/aerospike-vector-search healthcheck: - test: ["CMD", "curl", "-f", "http://aerospike-proximus:5040/manage/rest/v1"] + test: ["CMD", "curl", "-f", "http://aerospike-vector-search:5040/manage/rest/v1"] interval: 30s timeout: 20s retries: 3 diff --git a/kubernetes/README.md b/kubernetes/README.md index 2489583..829bd32 100644 --- a/kubernetes/README.md +++ b/kubernetes/README.md @@ -1,6 +1,6 @@ -# Aerospike, Proximus, and Monitoring Deployment on GKE (Google Kubernetes Engine) +# Aerospike Vector Search, and Monitoring Deployment on GKE (Google Kubernetes Engine) -Use the scripts and manifests in this directory deploy Aerospike, Proximus, and monitoring tools on Google Kubernetes Engine (GKE). +Use the scripts and manifests in this directory deploy Aerospike Vector Search, and monitoring tools on Google Kubernetes Engine (GKE). ## Prerequisites @@ -12,7 +12,7 @@ Before you start, make sure you have installed the necessary tools: ## Scripts -- `full-create-and-install.sh`: Creates a GKE cluster, installs Aerospike, Proximus, and sets up monitoring services. +- `full-create-and-install.sh`: Creates a GKE cluster, installs Aerospike Vector Search, and sets up monitoring services. ### Grafana Dashboards diff --git a/kubernetes/full-create-and-install.sh b/kubernetes/full-create-and-install.sh index 27f11cc..5f57a56 100755 --- a/kubernetes/full-create-and-install.sh +++ b/kubernetes/full-create-and-install.sh @@ -155,7 +155,7 @@ kubectl apply -f manifests/istio/avs-virtual-service.yaml helm repo add aerospike-helm https://artifact.aerospike.io/artifactory/api/helm/aerospike-helm helm repo update -helm install avs-gke --values "manifests/avs-gke-values.yaml" --namespace avs aerospike-helm/aerospike-vector-search --wait +helm install avs-gke --values "manifests/avs-gke-values.yaml" --namespace avs aerospike-helm/aerospike-vector-search --version 0.4.0 --wait ############################################## # Monitoring namespace diff --git a/kubernetes/helm/quote-semantic-search/Chart.yaml b/kubernetes/helm/quote-semantic-search/Chart.yaml index 6cc2ff8..838aa7e 100644 --- a/kubernetes/helm/quote-semantic-search/Chart.yaml +++ b/kubernetes/helm/quote-semantic-search/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: quote-semantic-search -description: A Helm chart for Aerospike Proximus +description: A Helm chart for Aerospike Quote Search App type: application icon: https://avatars0.githubusercontent.com/u/2214313?s=200&v=4 keywords: diff --git a/kubernetes/manifests/avs-gke-values.yaml b/kubernetes/manifests/avs-gke-values.yaml index f0c5a0d..419f03e 100644 --- a/kubernetes/manifests/avs-gke-values.yaml +++ b/kubernetes/manifests/avs-gke-values.yaml @@ -1,11 +1,17 @@ replicaCount: 3 - +image: + repository: "aerospike/aerospike-vector-search" + pullPolicy: "IfNotPresent" + # Overrides the image tag whose default is the chart appVersion. + tag: "0.9.0" + aerospikeVectorSearchConfig: cluster: cluster-name: "avs-db-1" - feature-key-file: "/etc/aerospike-proximus/features.conf" + feature-key-file: "/etc/aerospike-vector-search/features.conf" service: + metadata-namespace: "avs-meta" ports: 5000: addresses: @@ -27,13 +33,12 @@ aerospikeVectorSearchConfig: 5001: addresses: 0.0.0.0 - aerospike: - metadata-namespace: "avs-meta" + storage: seeds: - aerocluster-0-0.aerocluster.aerospike.svc.cluster.local: port: 3000 logging: - # file: /var/log/aerospike-proximus/aerospike-proximus.log + # file: /var/log/aerospike-vector-search/aerospike-vector-search.log enable-console-logging: false format: simple max-history: 30 @@ -63,7 +68,7 @@ service: # port: 5000 # targetPort: 5000 -# schedule proximus nodes +# schedule avs nodes affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: diff --git a/prism-image-search/README.md b/prism-image-search/README.md index faaa93b..a212147 100644 --- a/prism-image-search/README.md +++ b/prism-image-search/README.md @@ -74,6 +74,7 @@ Add your features.conf file to container-volumes/avs/etc/avs. ``` docker compose up ``` + ## Developing This demo is built using [Python Flask](https://flask.palletsprojects.com/en/2.3.x/) and [Vue.js](https://vuejs.org/). To start developing, follow the steps to diff --git a/prism-image-search/container-volumes/aerospike/etc/aerospike/aerospike.conf b/prism-image-search/container-volumes/aerospike/etc/aerospike/aerospike.conf index 2cbb6f0..0f01b0a 100644 --- a/prism-image-search/container-volumes/aerospike/etc/aerospike/aerospike.conf +++ b/prism-image-search/container-volumes/aerospike/etc/aerospike/aerospike.conf @@ -54,7 +54,7 @@ namespace test { } } -namespace proximus-meta { +namespace avs-meta { replication-factor 1 nsup-period 100 diff --git a/prism-image-search/container-volumes/avs/etc/aerospike-vector-search/aerospike-proximus.yml b/prism-image-search/container-volumes/avs/etc/aerospike-vector-search/aerospike-proximus.yml deleted file mode 100644 index bcc5d22..0000000 --- a/prism-image-search/container-volumes/avs/etc/aerospike-vector-search/aerospike-proximus.yml +++ /dev/null @@ -1,69 +0,0 @@ -# Change the configuration for your use case. -cluster: - # Custom node-id. It will be auto-generated if not specified. - # node-id: a1 - - # Unique identifier for this cluster. - cluster-name: prism-image-search - -# The Proximus service listening ports, TLS and network interface. -service: - ports: - 5000: {} - -# Management API listening ports, TLS and network interface. -manage: - ports: - 5040: {} - -# Intra cluster interconnect listening ports, TLS and network interface. -interconnect: - ports: - 5001: {} - -#heartbeat: -# seeds: -# - address: localhost -# port: 6001 - -# Target Aerospike cluster -aerospike: - seeds: - - aerospike: - port: 3000 - -# File based credentials store only if security should be enabled. -#security: -# credentials-store: -# type: file -# credentials-file: samples/credentials.yml -# auth-token: -# private-key: samples/auth/private_key.pem -# public-key: samples/auth/public_key.pem - -# Vault based credentials store only if security should be enabled. -#security: -# credentials-store: -# type: vault -# url: https://vault:8200 -# secrets-path: /secret/aerospike/aerodb1 -# tls: -# key-store: -# store-type: PEM -# store-file: key.pem -# store-password-file: keypass.txt # Password protecting key.pem. -# certificate-chain-files: certchain.pem -# trust-store: -# store-type: PEM -# certificate-files: cacert.pem -# auth-token: -# private-key: samples/auth/private_key.pem -# public-key: samples/auth/public_key.pem - -# The logging properties. -logging: - #format: json - #file: /var/log/aerospike-proximus/aerospike-proximus.log - enable-console-logging: true - levels: - metrics-ticker: off diff --git a/prism-image-search/container-volumes/avs/etc/aerospike-vector-search/aerospike-vector-search.yml b/prism-image-search/container-volumes/avs/etc/aerospike-vector-search/aerospike-vector-search.yml new file mode 100644 index 0000000..a5af856 --- /dev/null +++ b/prism-image-search/container-volumes/avs/etc/aerospike-vector-search/aerospike-vector-search.yml @@ -0,0 +1,39 @@ +# Change the configuration for your use case. +cluster: + # Custom node-id. It will be auto-generated if not specified. + # node-id: a1 + + # Unique identifier for this cluster. + cluster-name: prism-image-search + +# The AVS service listening ports, TLS and network interface. +service: + ports: + 5000: {} + +# Management API listening ports, TLS and network interface. +manage: + ports: + 5040: {} + +# Intra cluster interconnect listening ports, TLS and network interface. +interconnect: + ports: + 5001: {} + +#heartbeat: +# seeds: +# - address: localhost +# port: 6001 + +# Target Aerospike cluster +storage: + seeds: + - aerospike: + port: 3000 + +# The logging properties. +logging: + enable-console-logging: true + levels: + metrics-ticker: off diff --git a/prism-image-search/docker-compose.yml b/prism-image-search/docker-compose.yml index a455072..c283d62 100644 --- a/prism-image-search/docker-compose.yml +++ b/prism-image-search/docker-compose.yml @@ -11,15 +11,13 @@ services: - "--config-file" - "/opt/aerospike/etc/aerospike/aerospike.conf" avs: - image: aerospike/aerospike-proximus:0.4.0 - # ports: - # - "5000:5000" + image: aerospike/aerospike-vector-search:0.9.0 networks: - avs-demo volumes: - - ./container-volumes/avs/etc/aerospike-vector-search:/etc/aerospike-proximus + - ./container-volumes/avs/etc/aerospike-vector-search:/etc/aerospike-vector-search app: - image: aerospike/prism-search-example:latest + image: prism:latest ports: - "8080:8080" networks: diff --git a/prism-image-search/prism/avs_client.py b/prism-image-search/prism/avs_client.py index fbf184f..459d35d 100644 --- a/prism-image-search/prism/avs_client.py +++ b/prism-image-search/prism/avs_client.py @@ -5,7 +5,6 @@ seeds = types.HostPort( host=Config.AVS_HOST, port=Config.AVS_PORT, - is_tls=Config.AVS_VERIFY_TLS, ) avs_client = Client( diff --git a/prism-image-search/prism/indexer.py b/prism-image-search/prism/indexer.py index f076ecf..4fe9552 100644 --- a/prism-image-search/prism/indexer.py +++ b/prism-image-search/prism/indexer.py @@ -1,5 +1,6 @@ import glob import os +import sys import threading from multiprocessing import get_context from threading import Thread @@ -23,22 +24,26 @@ def create_index(): - for index in avs_admin_client.index_list(): - if ( - index["id"]["namespace"] == Config.AVS_NAMESPACE - and index["id"]["name"] == Config.AVS_INDEX_NAME - ): - logger.info("Index already exists") - return - - avs_admin_client.index_create( - namespace=Config.AVS_NAMESPACE, - name=Config.AVS_INDEX_NAME, - sets=Config.AVS_SET, - vector_field="image_embedding", - dimensions=MODEL_DIM, - vector_distance_metric=types.VectorDistanceMetric.COSINE, - ) + try: + for index in avs_admin_client.index_list(): + if ( + index["id"]["namespace"] == Config.AVS_NAMESPACE + and index["id"]["name"] == Config.AVS_INDEX_NAME + ): + logger.info("Index already exists") + return + + avs_admin_client.index_create( + namespace=Config.AVS_NAMESPACE, + name=Config.AVS_INDEX_NAME, + sets=Config.AVS_SET, + vector_field="image_embedding", + dimensions=MODEL_DIM, + vector_distance_metric=types.VectorDistanceMetric.COSINE, + ) + except Exception as e: + logger.critical("Failed to connect to avs client %s", str(e)) + sys.exit(1) def either(c): diff --git a/prism-image-search/prism/requirements.txt b/prism-image-search/prism/requirements.txt index e98d295..cd22f34 100644 --- a/prism-image-search/prism/requirements.txt +++ b/prism-image-search/prism/requirements.txt @@ -1,6 +1,6 @@ # TODO: Include exact versions # Aerospike dependencies -aerospike-vector-search==0.6.1 +aerospike-vector-search==1.0.0 # Flask framework flask~=2.3.2 diff --git a/quote-semantic-search/.gitignore b/quote-semantic-search/.gitignore index 115f878..f813c94 100644 --- a/quote-semantic-search/.gitignore +++ b/quote-semantic-search/.gitignore @@ -2,4 +2,5 @@ venv/ .venv/ pip.conf container-volumes/quote-search/data/quotes.csv -container-volumes/avs/etc/aerospike-vector-search/features.conf \ No newline at end of file +container-volumes/avs/etc/aerospike-vector-search/features.conf +data/ \ No newline at end of file diff --git a/quote-semantic-search/Dockerfile-quote-search b/quote-semantic-search/Dockerfile-quote-search index 3789331..118d586 100644 --- a/quote-semantic-search/Dockerfile-quote-search +++ b/quote-semantic-search/Dockerfile-quote-search @@ -4,7 +4,6 @@ FROM ubuntu:22.04 RUN apt-get update \ && apt-get install -y python3 python3-pip \ && pip3 install setuptools sql - RUN mkdir /quote-search COPY ./quote-search /quote-search WORKDIR /quote-search diff --git a/quote-semantic-search/README.md b/quote-semantic-search/README.md index 59b22bc..79dc580 100644 --- a/quote-semantic-search/README.md +++ b/quote-semantic-search/README.md @@ -70,9 +70,6 @@ This demo is build using [Python Flask](https://flask.palletsprojects.com/en/2.3 and [Vue.js](https://vuejs.org/). In order to developer follow the steps to setup your Python environment. -### Setup pip -Setup pip to use Aerospike PyPI repository following instructions [here](https://github.com/citrusleaf/aerospike-proximus-client-python/tree/main#using-the-client-from-your-application-using-pip). - ### Setup Python Virtual Environment ```shell diff --git a/quote-semantic-search/container-volumes/aerospike/etc/aerospike/aerospike.conf b/quote-semantic-search/container-volumes/aerospike/etc/aerospike/aerospike.conf index 41a8c62..8a01f56 100644 --- a/quote-semantic-search/container-volumes/aerospike/etc/aerospike/aerospike.conf +++ b/quote-semantic-search/container-volumes/aerospike/etc/aerospike/aerospike.conf @@ -55,7 +55,7 @@ namespace test { } } -namespace proximus-meta { +namespace avs-meta { replication-factor 1 nsup-period 100 diff --git a/quote-semantic-search/container-volumes/avs/etc/aerospike-vector-search/aerospike-proximus.yml b/quote-semantic-search/container-volumes/avs/etc/aerospike-vector-search/aerospike-proximus.yml deleted file mode 100644 index ae1cb1c..0000000 --- a/quote-semantic-search/container-volumes/avs/etc/aerospike-vector-search/aerospike-proximus.yml +++ /dev/null @@ -1,73 +0,0 @@ -# Change the configuration for your use case. -cluster: - # Custom node-id. It will be auto-generated if not specified. - # node-id: a1 - - # Unique identifier for this cluster. - cluster-name: quote-semantic-search - -# The Proximus service listening ports, TLS and network interface. -service: - ports: - 5000: {} - # advertised-listeners: - # default: - # address: 127.0.0.1 - # port: 5002 - -# Management API listening ports, TLS and network interface. -manage: - ports: - 5040: {} - -# Intra cluster interconnect listening ports, TLS and network interface. -interconnect: - ports: - 5001: {} - -#heartbeat: -# seeds: -# - address: localhost -# port: 6001 - -# Target Aerospike cluster -aerospike: - seeds: - - aerospike: - port: 3000 - -# File based credentials store only if security should be enabled. -#security: -# credentials-store: -# type: file -# credentials-file: samples/credentials.yml -# auth-token: -# private-key: samples/auth/private_key.pem -# public-key: samples/auth/public_key.pem - -# Vault based credentials store only if security should be enabled. -#security: -# credentials-store: -# type: vault -# url: https://vault:8200 -# secrets-path: /secret/aerospike/aerodb1 -# tls: -# key-store: -# store-type: PEM -# store-file: key.pem -# store-password-file: keypass.txt # Password protecting key.pem. -# certificate-chain-files: certchain.pem -# trust-store: -# store-type: PEM -# certificate-files: cacert.pem -# auth-token: -# private-key: samples/auth/private_key.pem -# public-key: samples/auth/public_key.pem - -# The logging properties. -logging: - #format: json - #file: /var/log/aerospike-proximus/aerospike-proximus.log - enable-console-logging: true - levels: - metrics-ticker: off diff --git a/quote-semantic-search/container-volumes/avs/etc/aerospike-vector-search/aerospike-vector-search.yml b/quote-semantic-search/container-volumes/avs/etc/aerospike-vector-search/aerospike-vector-search.yml new file mode 100644 index 0000000..e1b723d --- /dev/null +++ b/quote-semantic-search/container-volumes/avs/etc/aerospike-vector-search/aerospike-vector-search.yml @@ -0,0 +1,44 @@ +# Change the configuration for your use case. +cluster: + # Custom node-id. It will be auto-generated if not specified. + # node-id: a1 + + # Unique identifier for this cluster. + cluster-name: quote-semantic-search + +# The Proximus service listening ports, TLS and network interface. +service: + ports: + 5000: {} + # 5002: + # advertised-listeners: + # default: + # address: 127.0.0.1 + # port: 5002 + +# Management API listening ports, TLS and network interface. +manage: + ports: + 5040: {} + +# Intra cluster interconnect listening ports, TLS and network interface. +interconnect: + ports: + 5001: {} + +#heartbeat: +# seeds: +# - address: localhost +# port: 6001 + +# Target Aerospike cluster +storage: + seeds: + - aerospike: + port: 3000 + +# The logging properties. +logging: + enable-console-logging: true + levels: + metrics-ticker: info diff --git a/quote-semantic-search/docker-compose.yml b/quote-semantic-search/docker-compose.yml index 021a5fa..e297184 100644 --- a/quote-semantic-search/docker-compose.yml +++ b/quote-semantic-search/docker-compose.yml @@ -11,22 +11,21 @@ services: - "--config-file" - "/opt/aerospike/etc/aerospike/aerospike.conf" avs: - image: aerospike/aerospike-proximus:0.4.0 + image: aerospike/aerospike-vector-search:0.9.0 # ports: # - "5002:5002" networks: - avs-demo volumes: - - ./container-volumes/avs/etc/aerospike-vector-search:/etc/aerospike-proximus + - ./container-volumes/avs/etc/aerospike-vector-search:/etc/aerospike-vector-search app: - image: aerospike/quote-search-example:latest + image: quote-search ports: - "8080:8080" networks: - avs-demo volumes: - ./container-volumes/quote-search/data:/container-volumes/quote-search/data - # ./usr/local/lib/python3.10/dist-packages/sentence_transformers environment: AVS_HOST: avs AVS_PORT: "5000" @@ -34,4 +33,4 @@ services: GRPC_DNS_RESOLVER: native networks: - avs-demo: {} + avs-demo: {} \ No newline at end of file diff --git a/quote-semantic-search/quote-search/avs_client.py b/quote-semantic-search/quote-search/avs_client.py index 50bc5dd..6323a84 100644 --- a/quote-semantic-search/quote-search/avs_client.py +++ b/quote-semantic-search/quote-search/avs_client.py @@ -6,7 +6,6 @@ seeds=types.HostPort( host=Config.AVS_HOST, port=Config.AVS_PORT, - is_tls=Config.AVS_VERIFY_TLS, ), listener_name=Config.AVS_ADVERTISED_LISTENER, is_loadbalancer=Config.AVS_IS_LOADBALANCER, @@ -17,7 +16,6 @@ seeds=types.HostPort( host=Config.AVS_HOST, port=Config.AVS_PORT, - is_tls=Config.AVS_VERIFY_TLS, ), listener_name=Config.AVS_ADVERTISED_LISTENER, is_loadbalancer=Config.AVS_IS_LOADBALANCER, diff --git a/quote-semantic-search/quote-search/dataset_stats.py b/quote-semantic-search/quote-search/dataset_stats.py index 9741414..8dbabe0 100644 --- a/quote-semantic-search/quote-search/dataset_stats.py +++ b/quote-semantic-search/quote-search/dataset_stats.py @@ -4,6 +4,7 @@ from avs_client import avs_client from config import Config +from indexer import index_created logger = logging.getLogger(__name__) @@ -18,36 +19,37 @@ def either(c): def collect_stats(): lock.acquire() - try: - logger.info("Collecting statistics") - temp_counts = {} - not_indexed = 0 - - for quote_id in range(Config.NUM_QUOTES): - # Check if record exists - if avs_client.is_indexed( - namespace=Config.AVS_NAMESPACE, - set_name=Config.AVS_SET, - key=quote_id, - index_name=Config.AVS_INDEX_NAME, - ): - # Record exists - quote_id = str(quote_id) - - if quote_id not in temp_counts: - temp_counts[quote_id] = 0 - - temp_counts[quote_id] = temp_counts[quote_id] + 1 - else: - not_indexed += 1 - - dataset_counts.update(temp_counts) - - logger.info( - f"{len(dataset_counts)} quotes indexed and {not_indexed} not indexed." - ) - except Exception as e: - logger.warn("Error collecting statistics:" + str(e)) + if index_created: + try: + logger.info("Collecting statistics") + temp_counts = {} + not_indexed = 0 + + for quote_id in range(Config.NUM_QUOTES): + # Check if record exists + if avs_client.is_indexed( + namespace=Config.AVS_NAMESPACE, + set_name=Config.AVS_SET, + key=quote_id, + index_name=Config.AVS_INDEX_NAME, + ): + # Record exists + quote_id = str(quote_id) + + if quote_id not in temp_counts: + temp_counts[quote_id] = 0 + + temp_counts[quote_id] = temp_counts[quote_id] + 1 + else: + not_indexed += 1 + + dataset_counts.update(temp_counts) + + logger.info( + f"{len(dataset_counts)} quotes indexed and {not_indexed} not indexed." + ) + except Exception as e: + logger.warn("Error collecting statistics:" + str(e)) lock.release() threading.Timer(30, collect_stats).start() diff --git a/quote-semantic-search/quote-search/indexer.py b/quote-semantic-search/quote-search/indexer.py index 10c4ae5..3e9b849 100644 --- a/quote-semantic-search/quote-search/indexer.py +++ b/quote-semantic-search/quote-search/indexer.py @@ -2,6 +2,7 @@ import itertools from multiprocessing import get_context import os +import sys from threading import Thread import logging from tqdm import tqdm @@ -31,24 +32,33 @@ def read_csv(filename): DATASET_FILE = Config.DATASET_FILE_PATH dataset = itertools.islice(read_csv(DATASET_FILE), Config.NUM_QUOTES) +index_created = False def create_index(): - for index in avs_admin_client.index_list(): - if ( - index["id"]["namespace"] == Config.AVS_NAMESPACE - and index["id"]["name"] == Config.AVS_INDEX_NAME - ): - return - - avs_admin_client.index_create( - namespace=Config.AVS_NAMESPACE, - name=Config.AVS_INDEX_NAME, - sets=Config.AVS_SET, - vector_field="quote_embedding", - dimensions=MODEL_DIM, - vector_distance_metric=types.VectorDistanceMetric.COSINE, - ) + global index_created + + try: + for index in avs_admin_client.index_list(): + if ( + index["id"]["namespace"] == Config.AVS_NAMESPACE + and index["id"]["name"] == Config.AVS_INDEX_NAME + ): + return + + avs_admin_client.index_create( + namespace=Config.AVS_NAMESPACE, + name=Config.AVS_INDEX_NAME, + sets=Config.AVS_SET, + vector_field="quote_embedding", + dimensions=MODEL_DIM, + vector_distance_metric=types.VectorDistanceMetric.COSINE, + ) + + index_created = True + except Exception as e: + logger.critical("Failed to connect to avs client %s", str(e)) + sys.exit(1) def either(c): @@ -59,6 +69,7 @@ def index_data(): try: logger.info("Creating index") create_index() + logger.info("Successfully created the index") if Config.INDEXER_PARALLELISM <= 1: for quote in tqdm( @@ -103,7 +114,7 @@ def index_quote(id_quote): key=doc["quote_id"], record_data=doc, ) - except Exception as e: + except types.AVSServerError as e: logger.warning( f"Error inserting vector embedding into avs {id}: {str(e)} quote: {quote}" ) diff --git a/quote-semantic-search/quote-search/quote_search.py b/quote-semantic-search/quote-search/quote_search.py index a313986..697c724 100644 --- a/quote-semantic-search/quote-search/quote_search.py +++ b/quote-semantic-search/quote-search/quote_search.py @@ -3,7 +3,7 @@ from config import Config import logging -logging.basicConfig(level=logging.CRITICAL) +logging.basicConfig(level=logging.WARNING) logger = logging.getLogger(__name__) # The flask application. diff --git a/quote-semantic-search/quote-search/requirements.txt b/quote-semantic-search/quote-search/requirements.txt index 9c8a368..9f21f22 100644 --- a/quote-semantic-search/quote-search/requirements.txt +++ b/quote-semantic-search/quote-search/requirements.txt @@ -1,6 +1,6 @@ # TODO: Include exact versions # Aerospike dependencies -aerospike-vector-search==0.6.1 +aerospike-vector-search==1.0.0 # Flask framework diff --git a/quote-semantic-search/quote-search/routes.py b/quote-semantic-search/quote-search/routes.py index dda5ec7..caa27a2 100644 --- a/quote-semantic-search/quote-search/routes.py +++ b/quote-semantic-search/quote-search/routes.py @@ -1,4 +1,5 @@ import time +import logging from flask import jsonify, request, send_file from config import Config @@ -8,6 +9,9 @@ from avs_client import avs_client from aerospike_vector_search import types +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + @app.route("/") def index_static(): @@ -81,6 +85,7 @@ def vector_search(embedding, count=Config.AVS_MAX_RESULTS): def format_results(results: list[types.Neighbor], time_taken): + logger.info(f"Search took {time_taken} seconds and returned {len(results)} results") return jsonify( {"timeTaken": time_taken, "results": [result.fields for result in results]} )