update eval pipeline

ml6team · Feb 6, 2024 · 649eef0 · 649eef0
1 parent 70dbe30
commit 649eef0
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 27 deletions.
diff --git a/src/evaluation.ipynb b/src/evaluation.ipynb
@@ -433,7 +433,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "TODO: describe retrieve chunk component and ragas eval component"
+    "Afterwards, we are going to retrieve chunks from the vector database and evaluate the retrieved chunks using RAGAS. Finally, we are going to aggregate the metrics to allow an overall performance estimation.\n",
+    "\n",
+    "Take a look at the `components` folder to learn more about the custom component implementation.\""
    ]
   },
   {
@@ -530,16 +532,6 @@
     "Press the ◼️ in the notebook toolbar to **stop the explorer**."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.environ[\"DOCKER_DEFAULT_PLATFORM\"] = \"linux/amd64\""
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,

diff --git a/src/pipeline_eval.py b/src/pipeline_eval.py
@@ -2,6 +2,9 @@
 
 import pyarrow as pa
 from fondant.pipeline import Pipeline, Resources
+from components.retrieve_from_weaviate import RetrieveFromWeaviateComponent
+from components.evaluate_ragas import RagasEvaluator
+from components.aggregrate_eval_results import AggregateResults
 
 
 def create_pipeline(
@@ -10,7 +13,6 @@ def create_pipeline(
     weaviate_url="http://host.docker.internal:8080",
     weaviate_class: str = "Pipeline1",
     evaluation_set_path="./evaluation_datasets",
-    evaluation_set_filename="wikitext_1000_q.csv",
     evaluation_set_separator: str = ";",
     embed_model_provider: str = "huggingface",
     embed_model: str = "all-MiniLM-L6-v2",
@@ -19,7 +21,6 @@ def create_pipeline(
     llm_module_name: str = "langchain.chat_models",
     llm_class_name: str = "ChatOpenAI",
     llm_kwargs: dict = {"model_name": "gpt-3.5-turbo"},
-    evaluation_metrics: list = ["context_precision", "context_relevancy"],
     number_of_accelerators=None,
     accelerator_name=None,
 ):
@@ -33,7 +34,7 @@ def create_pipeline(
     load_from_csv = evaluation_pipeline.read(
         "load_from_csv",
         arguments={
-            "dataset_uri": "/evaldata/" + evaluation_set_filename,
+            "dataset_uri": evaluation_set_path,
             # mounted dir from within docker as extra_volumes
             "column_separator": evaluation_set_separator,
         },
@@ -60,28 +61,29 @@ def create_pipeline(
     )
 
     retrieve_chunks = embed_text_op.apply(
-        "retrieve_from_weaviate",
-        arguments={
-            "weaviate_url": weaviate_url,
-            "class_name": weaviate_class,
-            "top_k": retrieval_top_k,
-        },
-        cache=False,
+    RetrieveFromWeaviateComponent,
+    arguments={
+        "weaviate_url": weaviate_url,
+        "class_name": weaviate_class,
+        "top_k": retrieval_top_k
+    },
     )
 
     retriever_eval = retrieve_chunks.apply(
-        "evaluate_ragas",
+        RagasEvaluator,
         arguments={
             "llm_module_name": llm_module_name,
             "llm_class_name": llm_class_name,
-            "llm_kwargs": llm_kwargs,
-        },
-        produces={metric: pa.float32() for metric in evaluation_metrics},
+            "llm_kwargs": llm_kwargs
+        }
     )
 
     retriever_eval.apply(
-        "components/aggregate_eval_results",
-        consumes={metric: pa.float32() for metric in evaluation_metrics},
+        AggregateResults, 
+        consumes={
+            "context_precision": "context_precision",
+            "context_relevancy": "context_relevancy"
+        }
     )
 
     return evaluation_pipeline