diff --git a/kubernetes/helm/quote-semantic-search/templates/deployment.yaml b/kubernetes/helm/quote-semantic-search/templates/deployment.yaml index e5ed839..f2a0b54 100644 --- a/kubernetes/helm/quote-semantic-search/templates/deployment.yaml +++ b/kubernetes/helm/quote-semantic-search/templates/deployment.yaml @@ -69,6 +69,10 @@ spec: value: {{ .Values.quoteSearchConfig.avsNamespace | quote }} - name: AVS_SET value: {{ .Values.quoteSearchConfig.avsSet | quote }} + - name: AVS_INDEX_NAMESPACE + value: {{ .Values.quoteSearchConfig.avsIndexNamespace | quote }} + - name: AVS_INDEX_SET + value: {{ .Values.quoteSearchConfig.avsIndexSet | quote }} - name: AVS_VERIFY_TLS value: {{ .Values.quoteSearchConfig.avsVerifyTls | quote }} - name: AVS_MAX_RESULTS diff --git a/prism-image-search/README.md b/prism-image-search/README.md index 050f25f..d72643a 100644 --- a/prism-image-search/README.md +++ b/prism-image-search/README.md @@ -110,10 +110,10 @@ If not set defaults are used. | AVS_HOST | localhost | AVS server seed host | | AVS_PORT | 5000 | AVS server seed host port | | AVS_ADVERTISED_LISTENER| | An optional advertised listener to use if configured on the AVS server | -| AVS_DATA_NAMESPACE | test | The Aerospike namespace for storing the image records | -| AVS_DATA_SET | image-data | The Aerospike set for storing the image records | -| AVS_DATA_NAMESPACE | test | The Aerospike namespace for storing the HNSW index | -| AVS_DATA_SET | image-index | The Aerospike set for storing the HNSW index | +| AVS_NAMESPACE | test | The Aerospike namespace for storing the image records | +| AVS_SET | image-data | The Aerospike set for storing the image records | +| AVS_NAMESPACE | test | The Aerospike namespace for storing the HNSW index | +| AVS_SET | image-index | The Aerospike set for storing the HNSW index | | AVS_INDEX_NAME | prism-image-search | The name of the index | | AVS_MAX_RESULTS | 20 | Maximum number of vector search results to return | | AVS_IS_LOADBALANCER | False | If true, the first seed address will be treated as a load balancer node.``` diff --git a/prism-image-search/docker-compose.yml b/prism-image-search/docker-compose.yml index 589791e..09b5078 100644 --- a/prism-image-search/docker-compose.yml +++ b/prism-image-search/docker-compose.yml @@ -47,8 +47,8 @@ services: APP_NUM_QUOTES: "5000" GRPC_DNS_RESOLVER: native # comment out the following lines to use the default namespace (test) to store all index and vector data - AVS_DATA_NAMESPACE: avs-data - AVS_DATA_SET: quote-data + AVS_NAMESPACE: avs-data + AVS_SET: quote-data AVS_INDEX_NAMESPACE: avs-index AVS_INDEX_SET: quote-index volumes: diff --git a/prism-image-search/prism/config.py b/prism-image-search/prism/config.py index 3a9f720..8bb3407 100644 --- a/prism-image-search/prism/config.py +++ b/prism-image-search/prism/config.py @@ -23,8 +23,8 @@ class Config(object): AVS_PORT = int(os.environ.get("AVS_PORT") or 5000) AVS_ADVERTISED_LISTENER = os.environ.get("AVS_ADVERTISED_LISTENER") or None AVS_INDEX_NAME = os.environ.get("AVS_INDEX_NAME") or "prism-image-search" - AVS_DATA_NAMESPACE = os.environ.get("AVS_DATA_NAMESPACE") or "test" - AVS_DATA_SET = os.environ.get("AVS_DATA_SET") or "image-data" + AVS_NAMESPACE = os.environ.get("AVS_NAMESPACE") or "test" + AVS_SET = os.environ.get("AVS_SET") or "image-data" AVS_INDEX_NAMESPACE = os.environ.get("AVS_INDEX_NAMESPACE") or "test" AVS_INDEX_SET = os.environ.get("AVS_INDEX_SET") or "image-index" AVS_VERIFY_TLS = get_bool_env("VERIFY_TLS", True) diff --git a/prism-image-search/prism/dataset_stats.py b/prism-image-search/prism/dataset_stats.py index 5f31889..f7c952c 100644 --- a/prism-image-search/prism/dataset_stats.py +++ b/prism-image-search/prism/dataset_stats.py @@ -27,8 +27,8 @@ def collect_stats(): for filename in filenames: # Check if record exists if avs_client.is_indexed( - namespace=Config.AVS_DATA_NAMESPACE, - set_name=Config.AVS_DATA_SET, + namespace=Config.AVS_NAMESPACE, + set_name=Config.AVS_SET, key=filename, index_name=Config.AVS_INDEX_NAME, ): diff --git a/prism-image-search/prism/indexer.py b/prism-image-search/prism/indexer.py index efe0767..498356f 100644 --- a/prism-image-search/prism/indexer.py +++ b/prism-image-search/prism/indexer.py @@ -27,16 +27,16 @@ def create_index(): try: for index in avs_admin_client.index_list(): if ( - index["id"]["namespace"] == Config.AVS_DATA_NAMESPACE + index["id"]["namespace"] == Config.AVS_NAMESPACE and index["id"]["name"] == Config.AVS_INDEX_NAME ): logger.info("Index already exists") return avs_admin_client.index_create( - namespace=Config.AVS_DATA_NAMESPACE, + namespace=Config.AVS_NAMESPACE, name=Config.AVS_INDEX_NAME, - sets=Config.AVS_DATA_SET, + sets=Config.AVS_SET, vector_field="image_embedding", dimensions=MODEL_DIM, vector_distance_metric=types.VectorDistanceMetric.COSINE, @@ -63,8 +63,8 @@ def index_data(): # Check if record exists try: if avs_client.is_indexed( - namespace=Config.AVS_DATA_NAMESPACE, - set_name=Config.AVS_DATA_SET, + namespace=Config.AVS_NAMESPACE, + set_name=Config.AVS_SET, key=filename, index_name=Config.AVS_INDEX_NAME, ): @@ -130,8 +130,8 @@ def index_image(filename): try: logger.debug(f"Inserting vector embedding into avs {filename}") avs_client.upsert( - namespace=Config.AVS_DATA_NAMESPACE, - set_name=Config.AVS_DATA_SET, + namespace=Config.AVS_NAMESPACE, + set_name=Config.AVS_SET, key=doc["image_id"], record_data=doc, ) diff --git a/prism-image-search/prism/routes.py b/prism-image-search/prism/routes.py index 38b376f..7fa2d5c 100644 --- a/prism-image-search/prism/routes.py +++ b/prism-image-search/prism/routes.py @@ -51,8 +51,8 @@ def search_internal(): return "image_id is required", 400 record = avs_client.get( - namespace=Config.AVS_DATA_NAMESPACE, - set_name=Config.AVS_DATA_SET, + namespace=Config.AVS_NAMESPACE, + set_name=Config.AVS_SET, key=image_id, field_names=["image_embedding"], ) @@ -72,7 +72,7 @@ def vector_search(embedding, count=Config.AVS_MAX_RESULTS): # Execute kNN search over the image dataset field_names = ["image_id", "image_name", "relative_path"] return avs_client.vector_search( - namespace=Config.AVS_DATA_NAMESPACE, + namespace=Config.AVS_NAMESPACE, index_name=Config.AVS_INDEX_NAME, query=embedding, limit=count, diff --git a/quote-semantic-search/README.md b/quote-semantic-search/README.md index 44cd121..564e377 100644 --- a/quote-semantic-search/README.md +++ b/quote-semantic-search/README.md @@ -101,10 +101,10 @@ If not set defaults are used. | AVS_HOST | localhost | AVS server seed host | | AVS_PORT | 5000 | AVS server seed host port | | AVS_ADVERTISED_LISTENER| | An optional advertised listener to use if configured on the AVS server | -| AVS_DATA_NAMESPACE | test | The Aerospike namespace for storing the quote records | -| AVS_DATA_SET | quote-data | The Aerospike set for storing the quote records | -| AVS_DATA_NAMESPACE | test | The Aerospike namespace for storing the HNSW index | -| AVS_DATA_SET | quote-index | The Aerospike set for storing the HNSW index | +| AVS_NAMESPACE | test | The Aerospike namespace for storing the quote records | +| AVS_SET | quote-data | The Aerospike set for storing the quote records | +| AVS_NAMESPACE | test | The Aerospike namespace for storing the HNSW index | +| AVS_SET | quote-index | The Aerospike set for storing the HNSW index | | AVS_INDEX_NAME | quote-search | The name of the index | | AVS_MAX_RESULTS | 20 | Maximum number of vector search results to return | | AVS_IS_LOADBALANCER | False | If true, the first seed address will be treated as a load balancer node.``` diff --git a/quote-semantic-search/docker-compose.yml b/quote-semantic-search/docker-compose.yml index 31f3dbc..6931747 100644 --- a/quote-semantic-search/docker-compose.yml +++ b/quote-semantic-search/docker-compose.yml @@ -51,8 +51,8 @@ services: APP_NUM_QUOTES: "5000" GRPC_DNS_RESOLVER: native # comment out the following lines to use the default namespace (test) to store all index and vector data - AVS_DATA_NAMESPACE: avs-data - AVS_DATA_SET: quote-data + AVS_NAMESPACE: avs-data + AVS_SET: quote-data AVS_INDEX_NAMESPACE: avs-index AVS_INDEX_SET: quote-index diff --git a/quote-semantic-search/quote-search/config.py b/quote-semantic-search/quote-search/config.py index bbeeeb0..a50fb25 100644 --- a/quote-semantic-search/quote-search/config.py +++ b/quote-semantic-search/quote-search/config.py @@ -21,8 +21,8 @@ class Config(object): AVS_PORT = int(os.environ.get("AVS_PORT") or 5000) AVS_ADVERTISED_LISTENER = os.environ.get("AVS_ADVERTISED_LISTENER") or None AVS_INDEX_NAME = os.environ.get("AVS_INDEX_NAME") or "quote-semantic-search" - AVS_DATA_NAMESPACE = os.environ.get("AVS_DATA_NAMESPACE") or "test" - AVS_DATA_SET = os.environ.get("AVS_DATA_SET") or "quote-data" + AVS_NAMESPACE = os.environ.get("AVS_NAMESPACE") or "test" + AVS_SET = os.environ.get("AVS_SET") or "quote-data" AVS_INDEX_NAMESPACE = os.environ.get("AVS_INDEX_NAMESPACE") or "test" AVS_INDEX_SET = os.environ.get("AVS_INDEX_SET") or "quote-index" AVS_VERIFY_TLS = get_bool_env("VERIFY_TLS", True) diff --git a/quote-semantic-search/quote-search/dataset_stats.py b/quote-semantic-search/quote-search/dataset_stats.py index 65497ea..8dbabe0 100644 --- a/quote-semantic-search/quote-search/dataset_stats.py +++ b/quote-semantic-search/quote-search/dataset_stats.py @@ -28,8 +28,8 @@ def collect_stats(): for quote_id in range(Config.NUM_QUOTES): # Check if record exists if avs_client.is_indexed( - namespace=Config.AVS_DATA_NAMESPACE, - set_name=Config.AVS_DATA_SET, + namespace=Config.AVS_NAMESPACE, + set_name=Config.AVS_SET, key=quote_id, index_name=Config.AVS_INDEX_NAME, ): diff --git a/quote-semantic-search/quote-search/indexer.py b/quote-semantic-search/quote-search/indexer.py index e7303c1..471845a 100644 --- a/quote-semantic-search/quote-search/indexer.py +++ b/quote-semantic-search/quote-search/indexer.py @@ -41,15 +41,15 @@ def create_index(): try: for index in avs_admin_client.index_list(): if ( - index["id"]["namespace"] == Config.AVS_DATA_NAMESPACE + index["id"]["namespace"] == Config.AVS_NAMESPACE and index["id"]["name"] == Config.AVS_INDEX_NAME ): return avs_admin_client.index_create( - namespace=Config.AVS_DATA_NAMESPACE, + namespace=Config.AVS_NAMESPACE, name=Config.AVS_INDEX_NAME, - sets=Config.AVS_DATA_SET, + sets=Config.AVS_SET, vector_field="quote_embedding", dimensions=MODEL_DIM, vector_distance_metric=types.VectorDistanceMetric.COSINE, @@ -110,8 +110,8 @@ def index_quote(id_quote): # Insert record try: avs_client.upsert( - namespace=Config.AVS_DATA_NAMESPACE, - set_name=Config.AVS_DATA_SET, + namespace=Config.AVS_NAMESPACE, + set_name=Config.AVS_SET, key=doc["quote_id"], record_data=doc, ) diff --git a/quote-semantic-search/quote-search/routes.py b/quote-semantic-search/quote-search/routes.py index e383338..caa27a2 100644 --- a/quote-semantic-search/quote-search/routes.py +++ b/quote-semantic-search/quote-search/routes.py @@ -55,8 +55,8 @@ def search_internal(): return "quote_id is required", 400 record = avs_client.get( - namespace=Config.AVS_DATA_NAMESPACE, - set_name=Config.AVS_DATA_SET, + namespace=Config.AVS_NAMESPACE, + set_name=Config.AVS_SET, key=int(quote_id), field_names=["quote_embedding"], ) @@ -75,7 +75,7 @@ def vector_search(embedding, count=Config.AVS_MAX_RESULTS): # Execute kNN search over the dataset field_names = ["quote_id", "quote", "author"] r = avs_client.vector_search( - namespace=Config.AVS_DATA_NAMESPACE, + namespace=Config.AVS_NAMESPACE, index_name=Config.AVS_INDEX_NAME, query=embedding, limit=count,