From f6c4e5d2e3b5c4c4f816a4d56cd2503687d4cb7d Mon Sep 17 00:00:00 2001 From: Philippe Moussalli Date: Fri, 9 Feb 2024 14:14:45 +0100 Subject: [PATCH] update indexing notebook (#73) Updated indexing notebook. For now we're using the version from this [PR](https://github.com/ml6team/fondant/pulls) to allow using resuable components in notebooks. * Converted the custom cleaning component and chunking component to lightweight components. * Added TODOs to change after having a stable release Future Todos: * There is an error when using the custom cleaning component related to the way the component text is parsed (related to presence of `\n`. Requires further investigation. (This PR) * Change write index components to lightweight component (This PR) * Switch over the two remaining notebooks ti lightweight components as well (future PRs) --------- Co-authored-by: Matthias Richter --- README.md | 21 +- requirements.txt | 4 +- .../aggregate_eval_results/Dockerfile | 18 - .../fondant_component.yaml | 16 - .../aggregate_eval_results/requirements.txt | 1 - .../aggregate_eval_results/src/main.py | 16 - src/components/text_cleaning/Dockerfile | 13 - .../text_cleaning/fondant_component.yaml | 11 - src/components/text_cleaning/requirements.txt | 1 - src/components/text_cleaning/src/main.py | 18 - src/evaluation.ipynb | 2529 +++++++++++++++-- src/evaluation_datasets/wikitext_1000_q.csv | 39 - src/indexing.ipynb | 1632 +++++++++++ src/parameter_search.ipynb | 625 ---- src/pipeline.ipynb | 608 ---- src/pipeline_eval.py | 87 - src/pipeline_index.py | 70 - src/utils.py | 318 --- 18 files changed, 3970 insertions(+), 2057 deletions(-) delete mode 100644 src/components/aggregate_eval_results/Dockerfile delete mode 100644 src/components/aggregate_eval_results/fondant_component.yaml delete mode 100644 src/components/aggregate_eval_results/requirements.txt delete mode 100644 src/components/aggregate_eval_results/src/main.py delete mode 100644 src/components/text_cleaning/Dockerfile delete mode 100644 src/components/text_cleaning/fondant_component.yaml delete mode 100644 src/components/text_cleaning/requirements.txt delete mode 100644 src/components/text_cleaning/src/main.py create mode 100644 src/indexing.ipynb delete mode 100644 src/parameter_search.ipynb delete mode 100644 src/pipeline.ipynb delete mode 100644 src/pipeline_eval.py delete mode 100644 src/pipeline_index.py diff --git a/README.md b/README.md index 1b00e9b..b29bf7f 100644 --- a/README.md +++ b/README.md @@ -22,28 +22,13 @@ Check out the Fondant [website](https://fondant.ai/) if you want to learn more a ### A simple RAG indexing pipeline -A [**notebook**](./src/pipeline.ipynb) with a simple Fondant pipeline to index your data into a +A [**notebook**](./src/indexing.ipynb) with a simple Fondant pipeline to index your data into a RAG system. ### Iterative tuning of a RAG indexing pipeline A [**notebook**](./src/evaluation.ipynb) which iteratively runs a Fondant -[indexing pipeline](./src/pipeline_index.py) and [evaluation pipeline](./src/pipeline_eval.py) with -different parameters for comparison. You can inspect the data between every step to make -informed choices on which parameters to try. - -### Auto-tuning of a RAG indexing pipeline - -

- A notebook which allows you to automatically search for the - optimal parameter settings using different methods -

-
-

- - - -

+pipeline to evaluate a RAG system using [RAGAS](https://github.com/explodinggradients/ragas/tree/main/src/ragas). ## Getting started @@ -84,4 +69,4 @@ fondant --help There are two options to run the pipeline: - [**Via python files and the Fondant CLI:**](https://fondant.ai/en/latest/pipeline/#running-a-pipeline) how you should run Fondant in production -- [**Via a Jupyter notebook**](./src/pipeline.ipynb): ideal to learn about Fondant +- [**Via a Jupyter notebook**](./src/indexing.ipynb): ideal to learn about Fondant diff --git a/requirements.txt b/requirements.txt index 609e239..e3ce0f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -fondant==0.9.0 +fondant[component,aws,azure,gcp,docker]==0.10.1 notebook==7.0.6 -weaviate-client==3.25.3 +weaviate-client==3.25.3 \ No newline at end of file diff --git a/src/components/aggregate_eval_results/Dockerfile b/src/components/aggregate_eval_results/Dockerfile deleted file mode 100644 index 43bc363..0000000 --- a/src/components/aggregate_eval_results/Dockerfile +++ /dev/null @@ -1,18 +0,0 @@ -FROM --platform=linux/amd64 python:3.8-slim as base - -# System dependencies -RUN apt-get update && \ - apt-get upgrade -y && \ - apt-get install git -y - -# Install requirements -COPY requirements.txt / -RUN pip3 install --no-cache-dir -r requirements.txt - -# Set the working directory to the component folder -WORKDIR /component/src - -# Copy over src-files -COPY src/ . - -ENTRYPOINT ["fondant", "execute", "main"] \ No newline at end of file diff --git a/src/components/aggregate_eval_results/fondant_component.yaml b/src/components/aggregate_eval_results/fondant_component.yaml deleted file mode 100644 index 473bdb9..0000000 --- a/src/components/aggregate_eval_results/fondant_component.yaml +++ /dev/null @@ -1,16 +0,0 @@ -#metadata: to be matched w/ docker image -name: aggregate_eval_results -description: Component that aggregates results of the evaluation of the retriever -image: ghcr.io/ml6team/aggregate_eval_results:dev - -consumes: #TODO: add/retrieve metrics to consider - additionalProperties: true - - -produces: - metric: - type: string - score: - type: float32 - -previous_index: "" diff --git a/src/components/aggregate_eval_results/requirements.txt b/src/components/aggregate_eval_results/requirements.txt deleted file mode 100644 index 53e5d83..0000000 --- a/src/components/aggregate_eval_results/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -fondant[component]==0.9.0 \ No newline at end of file diff --git a/src/components/aggregate_eval_results/src/main.py b/src/components/aggregate_eval_results/src/main.py deleted file mode 100644 index 190ddc0..0000000 --- a/src/components/aggregate_eval_results/src/main.py +++ /dev/null @@ -1,16 +0,0 @@ -import dask.dataframe as dd -from fondant.component import DaskTransformComponent - - -class AggregateResults(DaskTransformComponent): - def __init__(self, consumes: dict, **kwargs): - self.consumes = consumes - - def transform(self, dataframe: dd.DataFrame) -> dd.DataFrame: - metrics = list(self.consumes.keys()) - agg = dataframe[metrics].mean() - agg_df = agg.to_frame(name="score") - agg_df["metric"] = agg.index - agg_df.index = agg_df.index.astype(str) - - return agg_df diff --git a/src/components/text_cleaning/Dockerfile b/src/components/text_cleaning/Dockerfile deleted file mode 100644 index d32a608..0000000 --- a/src/components/text_cleaning/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -FROM --platform=linux/amd64 python:3.8-slim - -# Install requirements -COPY requirements.txt / -RUN pip3 install --no-cache-dir -r requirements.txt - -# Set the working directory to the component folder -WORKDIR /component/src - -# Copy over src-files -COPY src/ . - -ENTRYPOINT ["fondant", "execute", "main"] diff --git a/src/components/text_cleaning/fondant_component.yaml b/src/components/text_cleaning/fondant_component.yaml deleted file mode 100644 index ee2d561..0000000 --- a/src/components/text_cleaning/fondant_component.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: Text cleaning component -description: Clean text passages -image: ghcr.io/ml6team/text_cleaning:dev - -consumes: - text: - type: string - -produces: - text: - type: string diff --git a/src/components/text_cleaning/requirements.txt b/src/components/text_cleaning/requirements.txt deleted file mode 100644 index f2c5454..0000000 --- a/src/components/text_cleaning/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -fondant[component]==0.9.0 diff --git a/src/components/text_cleaning/src/main.py b/src/components/text_cleaning/src/main.py deleted file mode 100644 index 687abdb..0000000 --- a/src/components/text_cleaning/src/main.py +++ /dev/null @@ -1,18 +0,0 @@ -import pandas as pd -from fondant.component import PandasTransformComponent - - -class TextCleaningComponent(PandasTransformComponent): - def __init__(self, **kwargs): - """Initialize your component.""" - - def remove_empty_lines(self, text): - lines = text.split("\n") - non_empty_lines = [line.strip() for line in lines if line.strip()] - return "\n".join(non_empty_lines) - - def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame: - dataframe["text"] = dataframe["text"].apply( - self.remove_empty_lines, - ) - return dataframe diff --git a/src/evaluation.ipynb b/src/evaluation.ipynb index 4bfd424..6b8ce51 100644 --- a/src/evaluation.ipynb +++ b/src/evaluation.ipynb @@ -15,13 +15,6 @@ "> Please note that it is not compatible with Google Colab as the latter does not support Docker." ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> 💡 This notebook allows you to iteratively configure and run a RAG pipeline. Check out our [**advanced notebook**](./parameter_search.ipynb) if you want to perform **parameter search** and **launch multiple runs at once**." - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -42,27 +35,6 @@ "" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will use [**Fondant**](https://fondant.ai), **a hub and framework for easy and shareable data processing**, which has the following advantages for RAG evaluation:\n", - "\n", - "- **Speed**\n", - " - Reusable RAG components from the [Fondant Hub](https://fondant.ai/en/latest/components/hub/) to quickly build RAG pipelines\n", - " - [Pipeline caching](https://fondant.ai/en/latest/caching/) to speed up subsequent runs\n", - " - Parallel processing out of the box to speed up processing of large datasets\n", - "- **Ease-of-use**\n", - " - Change parameters and swap [components](https://fondant.ai/en/latest/components/hub/) by changing only a few lines of code\n", - " - Create your own [custom components](https://fondant.ai/en/latest/components/custom_component/) (e.g. with different chunking strategies) and plug them into your pipeline\n", - " - Reuse your processing components in different pipelines and share them with the [community](https://discord.gg/HnTdWhydGp)\n", - "- **Production-readiness**\n", - " - Full data lineage and a [data explorer](https://fondant.ai/en/latest/data_explorer/) to check the evolution of data after each step\n", - " - Ready to deploy to (managed) platforms such as _Vertex, SageMaker and Kubeflow_\n", - " \n", - "Share your experiences or let us know how we can improve through our [**Discord**](https://discord.gg/HnTdWhydGp) or on [**GitHub**](https://github.com/ml6team/fondant). And of course feel free to give us a [**star ⭐**](https://github.com/ml6team/fondant-usecase-RAG) if you like what we are doing!" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -86,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -104,9 +76,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Docker Compose version v2.19.1\n" + ] + } + ], "source": [ "!docker compose version" ] @@ -115,64 +95,28 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Install **Fondant** framework" + "Install Fondant framework" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "datasets 2.16.1 requires fsspec[http]<=2023.10.0,>=2023.1.0, but you have fsspec 2023.12.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccess\n" + ] + } + ], "source": [ "!pip install -q -r ../requirements.txt --disable-pip-version-check && echo \"Success\"" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make sure that **logs** are displayed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "logger = logging.getLogger()\n", - "logger.setLevel(logging.INFO)\n", - "logging.info(\"test\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Check if GPU is available**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "import subprocess\n", - "\n", - "try:\n", - " subprocess.check_output('nvidia-smi')\n", - " logging.info(\"Found GPU, using it!\")\n", - " number_of_accelerators = 1\n", - " accelerator_name = \"GPU\"\n", - "except Exception:\n", - " logging.warning(\"We recommend to run this pipeline on a GPU, but none could be found, using CPU instead\")\n", - " number_of_accelerators = None\n", - " accelerator_name = None" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -190,16 +134,6 @@ "> - Make sure that Docker uses linux/amd64 platform and not arm64 (cell below should take care of that)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.environ[\"DOCKER_DEFAULT_PLATFORM\"]=\"linux/amd64\"" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -209,9 +143,67 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 0/0\n", + " ⠋ Network weaviate_service_default Creating \u001b[34m0.0s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 1/1\u001b[0m\n", + " \u001b[32m✔\u001b[0m Network weaviate_service_default \u001b[32mCreated\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " ⠋ Container weaviate_service-contextionary-1 Creating \u001b[34m0.0s \u001b[0m\n", + " ⠋ Container weaviate_service-weaviate-1 Creating \u001b[34m0.0s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " \u001b[32m✔\u001b[0m Network weaviate_service_default \u001b[32mCreated\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " ⠙ Container weaviate_service-contextionary-1 Creating \u001b[34m0.1s \u001b[0m\n", + " ⠙ Container weaviate_service-weaviate-1 Creating \u001b[34m0.1s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " \u001b[32m✔\u001b[0m Network weaviate_service_default \u001b[32mCreated\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " ⠿ Container weaviate_service-contextionary-1 Starting \u001b[34m0.2s \u001b[0m\n", + " ⠿ Container weaviate_service-weaviate-1 Starting \u001b[34m0.2s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " \u001b[32m✔\u001b[0m Network weaviate_service_default \u001b[32mCreated\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " ⠿ Container weaviate_service-contextionary-1 Starting \u001b[34m0.3s \u001b[0m\n", + " ⠿ Container weaviate_service-weaviate-1 Starting \u001b[34m0.3s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " \u001b[32m✔\u001b[0m Network weaviate_service_default \u001b[32mCreated\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " ⠿ Container weaviate_service-contextionary-1 Starting \u001b[34m0.4s \u001b[0m\n", + " ⠿ Container weaviate_service-weaviate-1 Starting \u001b[34m0.4s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " \u001b[32m✔\u001b[0m Network weaviate_service_default \u001b[32mCreated\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " ⠿ Container weaviate_service-contextionary-1 Starting \u001b[34m0.5s \u001b[0m\n", + " ⠿ Container weaviate_service-weaviate-1 Starting \u001b[34m0.5s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " \u001b[32m✔\u001b[0m Network weaviate_service_default \u001b[32mCreated\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " ⠿ Container weaviate_service-contextionary-1 Starting \u001b[34m0.6s \u001b[0m\n", + " ⠿ Container weaviate_service-weaviate-1 Starting \u001b[34m0.6s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " \u001b[32m✔\u001b[0m Network weaviate_service_default \u001b[32mCreated\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " ⠿ Container weaviate_service-contextionary-1 Starting \u001b[34m0.7s \u001b[0m\n", + " ⠿ Container weaviate_service-weaviate-1 Starting \u001b[34m0.7s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " \u001b[32m✔\u001b[0m Network weaviate_service_default \u001b[32mCreated\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " ⠿ Container weaviate_service-contextionary-1 Starting \u001b[34m0.8s \u001b[0m\n", + " ⠿ Container weaviate_service-weaviate-1 Starting \u001b[34m0.8s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " \u001b[32m✔\u001b[0m Network weaviate_service_default \u001b[32mCreated\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " ⠿ Container weaviate_service-contextionary-1 Starting \u001b[34m0.9s \u001b[0m\n", + " ⠿ Container weaviate_service-weaviate-1 Starting \u001b[34m0.9s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n", + " \u001b[32m✔\u001b[0m Network weaviate_service_default \u001b[32mCreated\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " ⠿ Container weaviate_service-contextionary-1 Starting \u001b[34m1.0s \u001b[0m\n", + " ⠿ Container weaviate_service-weaviate-1 Starting \u001b[34m1.0s \u001b[0m\n", + "\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 3/3\u001b[0m\n", + " \u001b[32m✔\u001b[0m Network weaviate_service_default \u001b[32mCreated\u001b[0m \u001b[34m0.1s \u001b[0m\n", + " \u001b[32m✔\u001b[0m Container weaviate_service-contextionary-1 \u001b[32mStarted\u001b[0m \u001b[34m1.1s \u001b[0m\n", + " \u001b[32m✔\u001b[0m Container weaviate_service-weaviate-1 \u001b[32mStarted\u001b[0m \u001b[34m1.1s \u001b[0m\n", + "\u001b[?25h" + ] + } + ], "source": [ "!docker compose -f weaviate_service/docker-compose.yaml up --detach" ] @@ -232,9 +224,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/matthias/projects/fondant/fondant-usecase-RAG/.venv/lib/python3.10/site-packages/weaviate/warnings.py:121: DeprecationWarning: Dep005: You are using weaviate-client version 3.25.3. The latest version is 4.4.2.\n", + " Please consider upgrading to the latest version. See https://weaviate.io/developers/weaviate/client-libraries/python for details.\n", + " warnings.warn(\n" + ] + } + ], "source": [ "import logging\n", "import weaviate\n", @@ -257,214 +259,2351 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "`pipeline_index.py` processes text data and loads it into the vector database\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "- [**Load data**](https://github.com/ml6team/fondant/tree/main/components/load_from_parquet): loads data from the Hugging Face Hub\n", - "- [**Chunk data**](https://github.com/ml6team/fondant/tree/main/components/chunk_text): divides the text into sections of a certain size and with a certain overlap\n", - "- [**Embed chunks**](https://github.com/ml6team/fondant/tree/main/components/embed_text): embeds each chunk as a vector, e.g. using [Cohere](https://cohere.com/embeddings)\n", - "- [**Index vector store**](https://github.com/ml6team/fondant/tree/main/components/index_weaviate): writes data and embeddings to the vector store" + "Before we can evaluate data in a vector database we have to index documents. We have created a pipeline in the indexing notebook. Before you continue here, have a look at the notebook and initialise the database and the documents. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "> 💡 This notebook defaults to the first 1000 rows of the [wikitext](https://huggingface.co/datasets/wikitext) dataset for demonstration purposes, but you can load your own dataset using one the other load components available on the [**Fondant Hub**](https://fondant.ai/en/latest/components/hub/#component-hub) or by creating your own [**custom load component**](https://fondant.ai/en/latest/guides/implement_custom_components/). Keep in mind that changing the dataset implies that you also need to change the evaluation dataset used in the evaluation pipeline. " + "## Evaluation Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import utils\n", + "base_path = \"./data\"\n", + "utils.create_directory_if_not_exists(base_path)\n", + "weaviate_url = f\"http://{utils.get_host_ip()}:8081\"\n", + "weaviate_class = \"Index\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`pipeline_eval.py` evaluates retrieval performance using the questions provided in your test dataset\n", + "\n", + "
\n", + "\n", + "
\n", + "\n", + "- [**Load eval data**](https://github.com/ml6team/fondant/tree/main/components/load_from_csv): loads the evaluation dataset (questions) from a csv file\n", + "- [**Embed questons**](https://github.com/ml6team/fondant/tree/main/components/embed_text): embeds each question as a vector, e.g. using [Cohere](https://cohere.com/embeddings)\n", + "- [**Query vector store**](https://github.com/ml6team/fondant/tree/main/components/retrieve_from_weaviate): retrieves the most relevant chunks for each question from the vector store\n", + "- [**Evaluate**](https://github.com/ml6team/fondant/tree/0.8.0/components/evaluate_ragas): evaluates the retrieved chunks for each question, e.g. using [RAGAS](https://docs.ragas.io/en/latest/index.html)\n", + "- [**Aggregate**](https://github.com/ml6team/fondant-usecase-RAG/tree/main/src/components/aggregate_eval_results): calculates aggregated results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Create the indexing pipeline" + "### Create the evaluation pipeline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Change the arguments below if you want to run the pipeline with different parameters. For more information on the possible values, check out the above links to the component documentation." + "⚠️ If you want to use an **OpenAI** model for evaluation you will need an [API key](https://platform.openai.com/docs/quickstart) (see TODO below)\n", + "\n", + "Change the arguments below if you want to run the pipeline with different parameters." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "import pipeline_index\n", - "import utils\n", - "\n", - "# Path where data and artifacts will be stored\n", - "BASE_PATH = \"./data\"\n", - "utils.create_directory_if_not_exists(BASE_PATH)\n", - "\n", - "# Parameters shared between indexing and evaluation pipeline\n", - "shared_args = {\n", - " \"base_path\": BASE_PATH,\n", - " \"embed_model_provider\": \"huggingface\",\n", - " \"embed_model\": \"all-MiniLM-L6-v2\",\n", - " \"embed_api_key\": {},\n", - " \"weaviate_url\": f\"http://{utils.get_host_ip()}:8081\",\n", - " \"weaviate_class\": \"Pipeline1\", # Capitalized, avoid special characters (_, =, -, etc.)\n", - "}\n", - "\n", - "# Parameters for the indexing pipeline\n", - "indexing_args = {\n", - " \"n_rows_to_load\": 1000,\n", - " \"chunk_args\": {\"chunk_size\": 512, \"chunk_overlap\": 32}\n", - "}\n", - "\n", - "# Parameters for the GPU resources\n", - "resources_args = {\n", - " \"number_of_accelerators\": number_of_accelerators,\n", - " \"accelerator_name\": accelerator_name,\n", - "}\n", - "\n", - "indexing_pipeline = pipeline_index.create_pipeline(**shared_args, **indexing_args, **resources_args)" + "import os\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Run the indexing pipeline" + "We begin by initializing our pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "import pyarrow as pa\n", + "from fondant.pipeline import Pipeline\n", + "evaluation_pipeline = Pipeline(\n", + " name=\"evaluation-pipeline\",\n", + " description=\"Pipeline to evaluate a RAG system\",\n", + " base_path=base_path,\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "> 💡 The first time you run a pipeline, you need to **download a docker image for each component** which may take a minute.\n", "\n", - "> 💡 Use a **GPU** or an external API to speed up the embedding step\n", - "\n", - "> 💡 Steps that have been processed before are **cached** and will be skipped in subsequent runs which speeds up processing." + "We have created a set of evaluation questions which we will use to evaluate the retrieval performance of the RAG system. Therefore, we need to load the CSV file containing the questions. We are going to use a reusable component for this task, `load_from_csv`." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "from fondant.pipeline.runner import DockerRunner\n", + "evaluation_set_filename = \"wikitext_1000_q.csv\"\n", "\n", - "runner = DockerRunner()\n", - "runner.run(indexing_pipeline)" + "load_from_csv = evaluation_pipeline.read(\n", + " \"load_from_csv\",\n", + " arguments={\n", + " \"dataset_uri\": \"/evaldata/\" + evaluation_set_filename,\n", + " # mounted dir from within docker as extra_volumes\n", + " \"column_separator\": \";\",\n", + " },\n", + " produces={\n", + " \"question\": pa.string(),\n", + " },\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Evaluation Pipeline" + "Afterward, we are going to embed our questions and retrieve answers from the database. Here we will once again use the reusable `embed_text` component." ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 12, "metadata": {}, + "outputs": [], "source": [ - "`pipeline_eval.py` evaluates retrieval performance using the questions provided in your test dataset\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "- [**Load eval data**](https://github.com/ml6team/fondant/tree/main/components/load_from_csv): loads the evaluation dataset (questions) from a csv file\n", - "- [**Embed questons**](https://github.com/ml6team/fondant/tree/main/components/embed_text): embeds each question as a vector, e.g. using [Cohere](https://cohere.com/embeddings)\n", - "- [**Query vector store**](https://github.com/ml6team/fondant/tree/main/components/retrieve_from_weaviate): retrieves the most relevant chunks for each question from the vector store\n", - "- [**Evaluate**](https://github.com/ml6team/fondant/tree/0.8.0/components/evaluate_ragas): evaluates the retrieved chunks for each question, e.g. using [RAGAS](https://docs.ragas.io/en/latest/index.html)\n", - "- [**Aggregate**](https://github.com/ml6team/fondant-usecase-RAG/tree/main/src/components/aggregate_eval_results): calculates aggregated results" + "embed_text_op = load_from_csv.apply(\n", + " \"embed_text\",\n", + " arguments={\n", + " \"model_provider\": \"huggingface\",\n", + " \"model\": \"all-MiniLM-L6-v2\"\n", + " },\n", + " consumes={\n", + " \"text\": \"question\",\n", + " }\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Create the evaluation pipeline" + "Before we can evaluate answers, we need to retrieve these for our questions. Hence, we are building a custom lightweight component to add to our pipeline later." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:fondant.pipeline.lightweight_component:No consumes defined. Consumes will be inferred from the dataset. All field will be consumed which may lead to additional computation, Consider defining consumes in the component.\n", + " Consumes: {'question': {'type': 'string'}, 'embedding': {'type': 'array', 'items': {'type': 'float32'}}}\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import pyarrow as pa\n", + "from fondant.component import PandasTransformComponent\n", + "from fondant.pipeline import lightweight_component\n", + "\n", + "\n", + "@lightweight_component(\n", + " produces={\"retrieved_chunks\": pa.list_(pa.string())},\n", + " extra_requires=[\"weaviate-client==3.24.1\"],\n", + ")\n", + "class RetrieveFromWeaviateComponent(PandasTransformComponent):\n", + " def __init__(self, *, weaviate_url: str, class_name: str, top_k: int) -> None:\n", + " import weaviate\n", + "\n", + " self.client = weaviate.Client(\n", + " url=weaviate_url,\n", + " additional_config=None,\n", + " additional_headers=None,\n", + " )\n", + " self.class_name = class_name\n", + " self.k = top_k\n", + "\n", + " def teardown(self) -> None:\n", + " # Ensure the weaviate client is closed at the end of the component lifetime\n", + " del self.client\n", + "\n", + " def retrieve_chunks_from_embeddings(self, vector_query: list):\n", + " \"\"\"Get results from weaviate database.\"\"\"\n", + " query = (\n", + " self.client.query.get(self.class_name, [\"passage\"])\n", + " .with_near_vector({\"vector\": vector_query})\n", + " .with_limit(self.k)\n", + " .with_additional([\"distance\"])\n", + " )\n", + "\n", + " result = query.do()\n", + " result_dict = result[\"data\"][\"Get\"][self.class_name]\n", + " return [retrieved_chunk[\"passage\"] for retrieved_chunk in result_dict]\n", + "\n", + " def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:\n", + " dataframe[\"retrieved_chunks\"] = dataframe[\"embedding\"].apply(self.retrieve_chunks_from_embeddings)\n", + " return dataframe\n", + "\n", + "# Add component to pipeline\n", + "retrieve_chunks = embed_text_op.apply(\n", + " RetrieveFromWeaviateComponent,\n", + " arguments={\n", + " \"weaviate_url\": weaviate_url,\n", + " \"class_name\": weaviate_class,\n", + " \"top_k\": 2\n", + " },\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "⚠️ If you want to use an **OpenAI** model for evaluation you will need an [API key](https://platform.openai.com/docs/quickstart) (see TODO below)\n", + "`RetrieveFromWeaviateComponent` will produce `retrieved_chunks`. We aim to evaluate these chunks using RAGAS. RAGAS is an open-source library designed to assess RAG systems by leveraging LLMs. In this example, we'll use gpt-3.5-turbo. Essentially, we pass the retrieved chunks along with the answer to a LLM and ask it to judge the quality of the provided answers.\n", "\n", - "Change the arguments below if you want to run the pipeline with different parameters." + "Feel free to explore the RAGAS documentation and modify the component to suit your needs. RAGAS provides support for altering the prompt and adapting it to your specific domain or language." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ - "import os\n", - "import pipeline_eval\n", - "\n", + "@lightweight_component(\n", + " consumes={\n", + " \"question\": pa.string(),\n", + " \"retrieved_chunks\": pa.list_(pa.string()),\n", + " },\n", + " produces={\n", "\n", - "evaluation_args = {\n", - " \"retrieval_top_k\": 2,\n", - " \"llm_module_name\": \"langchain.chat_models\",\n", - " \"llm_class_name\": \"ChatOpenAI\",\n", - " \"llm_kwargs\": {\n", - " \"openai_api_key\":\"\" , # TODO: Update with your key or use a different model\n", - " \"model_name\" : \"gpt-3.5-turbo\"\n", + " \"context_relevancy\": pa.float32(),\n", " },\n", - " \"evaluation_metrics\": [\"context_precision\", \"context_relevancy\"]\n", - "}\n", + " extra_requires=[\"ragas==0.1.0\"],\n", + ")\n", + "class RagasEvaluator(PandasTransformComponent):\n", + " def __init__(self, *, open_ai_key: str) -> None:\n", + " import os\n", + " os.environ[\"OPENAI_API_KEY\"] = open_ai_key\n", + "\n", + " def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:\n", + " from datasets import Dataset\n", + " from ragas import evaluate\n", + " from ragas.metrics import context_relevancy\n", + " from langchain_openai.chat_models import ChatOpenAI\n", + "\n", + " gpt_evaluator = ChatOpenAI(model_name=\"gpt-3.5-turbo\")\n", + "\n", + " dataframe = dataframe.rename(\n", + " columns={\"retrieved_chunks\": \"contexts\"},\n", + " )\n", + " \n", + " dataset = Dataset.from_pandas(dataframe)\n", + "\n", + " result = evaluate(\n", + " dataset, \n", + " metrics=[context_relevancy],\n", + " llm=gpt_evaluator,\n", + " )\n", + "\n", + " results_df = result.to_pandas()\n", + " results_df = results_df.set_index(dataframe.index)\n", "\n", - "evaluation_pipeline = pipeline_eval.create_pipeline(**shared_args, **evaluation_args)" + " return results_df\n", + " \n", + "# Add component to pipeline\n", + "retriever_eval = retrieve_chunks.apply(\n", + " RagasEvaluator,\n", + " arguments={\n", + " \"open_ai_key\": os.getenv(\"OPENAI_API_KEY\")\n", + " }\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Run the evaluation pipeline" + "The `RAGASEvaluator` component will append two additional columns to our dataset: `context_precision` and `context_relevancy` for each question-answer pair. To evaluate the overall performance of our RAG setup, we need to aggregate these results. For demonstration purposes, we'll write the results to a file. Of course, you can export the aggregated results to any dashboard tool of your choice." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "if utils.check_weaviate_class_exists(\n", - " local_weaviate_client,\n", - " shared_args[\"weaviate_class\"]\n", - "): \n", - " runner = DockerRunner()\n", - " extra_volumes = [str(os.path.join(os.path.abspath('.'), \"evaluation_datasets\")) + \":/evaldata\"]\n", - " runner.run(evaluation_pipeline, extra_volumes=extra_volumes)" + "from fondant.component import DaskWriteComponent\n", + "import dask.dataframe as dd\n", + "\n", + "\n", + "@lightweight_component(\n", + " consumes={\n", + " \"context_relevancy\": pa.float32(),\n", + " }\n", + ")\n", + "class AggregateResults(DaskWriteComponent):\n", + " def write(self, dataframe: dd.DataFrame) -> None:\n", + " import pandas as pd\n", + " mean_context_relevancy = dataframe[\"context_relevancy\"].mean()\n", + " df = pd.DataFrame({\n", + " \"context_relevancy\": mean_context_relevancy\n", + " })\n", + "\n", + " df.to_csv(\"./evaldata/aggregated_results.csv\")\n", + "\n", + "# Add component to pipeline\n", + "retriever_eval.apply(\n", + " AggregateResults, \n", + " consumes={\n", + " \"context_relevancy\": \"context_relevancy\"\n", + " }\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Show evaluation results" + "#### Run the evaluation pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Found reference to un-compiled pipeline... compiling\n", + "INFO:fondant.pipeline.compiler:Compiling evaluation-pipeline to .fondant/compose.yaml\n", + "INFO:fondant.pipeline.compiler:Base path found on local system, setting up ./data as mount volume\n", + "INFO:fondant.pipeline.pipeline:Sorting pipeline component graph topologically.\n", + "INFO:fondant.pipeline.pipeline:All pipeline component specifications match.\n", + "INFO:fondant.pipeline.compiler:Compiling service for load_from_csv\n", + "INFO:fondant.pipeline.compiler:Compiling service for embed_text\n", + "INFO:fondant.pipeline.compiler:Compiling service for retrievefromweaviatecomponent\n", + "INFO:fondant.pipeline.compiler:Compiling service for ragasevaluator\n", + "INFO:fondant.pipeline.compiler:Compiling service for aggregateresults\n", + "INFO:fondant.pipeline.compiler:Successfully compiled to .fondant/compose.yaml\n", + " load_from_csv Pulling \n", + " ragasevaluator Pulling \n", + " aggregateresults Pulling \n", + " embed_text Pulling \n", + " retrievefromweaviatecomponent Pulling \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting pipeline run...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " retrievefromweaviatecomponent Pulled \n", + " ragasevaluator Pulled \n", + " load_from_csv Pulled \n", + " embed_text Pulled \n", + " aggregateresults Pulled \n", + " Container evaluation-pipeline-load_from_csv-1 Recreate\n", + " Container evaluation-pipeline-load_from_csv-1 Recreated\n", + " Container evaluation-pipeline-embed_text-1 Recreate\n", + " Container evaluation-pipeline-embed_text-1 Recreated\n", + " Container evaluation-pipeline-retrievefromweaviatecomponent-1 Recreate\n", + " Container evaluation-pipeline-retrievefromweaviatecomponent-1 Recreated\n", + " Container evaluation-pipeline-ragasevaluator-1 Recreate\n", + " Container evaluation-pipeline-ragasevaluator-1 Recreated\n", + " Container evaluation-pipeline-aggregateresults-1 Recreate\n", + " Container evaluation-pipeline-aggregateresults-1 Recreated\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Attaching to evaluation-pipeline-aggregateresults-1, evaluation-pipeline-embed_text-1, evaluation-pipeline-load_from_csv-1, evaluation-pipeline-ragasevaluator-1, evaluation-pipeline-retrievefromweaviatecomponent-1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "evaluation-pipeline-load_from_csv-1 | [2024-02-08 13:30:29,549 | fondant.cli | INFO] Component `CSVReader` found in module main\n", + "evaluation-pipeline-load_from_csv-1 | [2024-02-08 13:30:29,554 | fondant.component.executor | INFO] Dask default local mode will be used for further executions.Our current supported options are limited to 'local' and 'default'.\n", + "evaluation-pipeline-load_from_csv-1 | [2024-02-08 13:30:29,558 | fondant.component.executor | INFO] Skipping component execution\n", + "evaluation-pipeline-load_from_csv-1 | [2024-02-08 13:30:29,561 | fondant.component.executor | INFO] Matching execution detected for component. The last execution of the component originated from `evaluation-pipeline-20240206105318`.\n", + "evaluation-pipeline-load_from_csv-1 | [2024-02-08 13:30:29,566 | fondant.component.executor | INFO] Saving output manifest to /data/evaluation-pipeline/evaluation-pipeline-20240208143024/load_from_csv/manifest.json\n", + "evaluation-pipeline-load_from_csv-1 | [2024-02-08 13:30:29,567 | fondant.component.executor | INFO] Writing cache key with manifest reference to /data/evaluation-pipeline/cache/f12e1430da7973c18de35ecf782d313d.txt\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluation-pipeline-load_from_csv-1 exited with code 0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "evaluation-pipeline-embed_text-1 | [2024-02-08 13:30:33,559 | fondant.cli | INFO] Component `EmbedTextComponent` found in module main\n", + "evaluation-pipeline-embed_text-1 | [2024-02-08 13:30:33,564 | fondant.component.executor | INFO] Dask default local mode will be used for further executions.Our current supported options are limited to 'local' and 'default'.\n", + "evaluation-pipeline-embed_text-1 | [2024-02-08 13:30:33,569 | fondant.component.executor | INFO] Previous component `load_from_csv` run was cached. Cached pipeline id: evaluation-pipeline-20240206105318\n", + "evaluation-pipeline-embed_text-1 | [2024-02-08 13:30:33,571 | fondant.component.executor | INFO] Skipping component execution\n", + "evaluation-pipeline-embed_text-1 | [2024-02-08 13:30:33,574 | fondant.component.executor | INFO] Matching execution detected for component. The last execution of the component originated from `evaluation-pipeline-20240206105318`.\n", + "evaluation-pipeline-embed_text-1 | [2024-02-08 13:30:33,578 | fondant.component.executor | INFO] Saving output manifest to /data/evaluation-pipeline/evaluation-pipeline-20240208143024/embed_text/manifest.json\n", + "evaluation-pipeline-embed_text-1 | [2024-02-08 13:30:33,578 | fondant.component.executor | INFO] Writing cache key with manifest reference to /data/evaluation-pipeline/cache/325492fed9dd081a79c7a2afef9cee0c.txt\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluation-pipeline-embed_text-1 exited with code 0\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Collecting weaviate-client==3.24.1 (from -r requirements.txt (line 1))\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Obtaining dependency information for weaviate-client==3.24.1 from https://files.pythonhosted.org/packages/59/8f/44d164ed990f7c6faf28125925160af9004595020aeaaf01e94462e3bf8e/weaviate_client-3.24.1-py3-none-any.whl.metadata\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Downloading weaviate_client-3.24.1-py3-none-any.whl.metadata (3.3 kB)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Requirement already satisfied: requests<3.0.0,>=2.30.0 in /usr/local/lib/python3.11/site-packages (from weaviate-client==3.24.1->-r requirements.txt (line 1)) (2.31.0)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Collecting validators<1.0.0,>=0.21.2 (from weaviate-client==3.24.1->-r requirements.txt (line 1))\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Obtaining dependency information for validators<1.0.0,>=0.21.2 from https://files.pythonhosted.org/packages/3a/0c/785d317eea99c3739821718f118c70537639aa43f96bfa1d83a71f68eaf6/validators-0.22.0-py3-none-any.whl.metadata\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Downloading validators-0.22.0-py3-none-any.whl.metadata (4.7 kB)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Collecting authlib<2.0.0,>=1.2.1 (from weaviate-client==3.24.1->-r requirements.txt (line 1))\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Obtaining dependency information for authlib<2.0.0,>=1.2.1 from https://files.pythonhosted.org/packages/25/65/b78eb948b71ab232d08b30c38a2e3b69e6e50c6e166863a0068c877155b9/Authlib-1.3.0-py2.py3-none-any.whl.metadata\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Downloading Authlib-1.3.0-py2.py3-none-any.whl.metadata (3.8 kB)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Requirement already satisfied: cryptography in /usr/local/lib/python3.11/site-packages (from authlib<2.0.0,>=1.2.1->weaviate-client==3.24.1->-r requirements.txt (line 1)) (42.0.2)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client==3.24.1->-r requirements.txt (line 1)) (3.3.2)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client==3.24.1->-r requirements.txt (line 1)) (3.6)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client==3.24.1->-r requirements.txt (line 1)) (2.0.7)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client==3.24.1->-r requirements.txt (line 1)) (2024.2.2)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.11/site-packages (from cryptography->authlib<2.0.0,>=1.2.1->weaviate-client==3.24.1->-r requirements.txt (line 1)) (1.16.0)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Requirement already satisfied: pycparser in /usr/local/lib/python3.11/site-packages (from cffi>=1.12->cryptography->authlib<2.0.0,>=1.2.1->weaviate-client==3.24.1->-r requirements.txt (line 1)) (2.21)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Downloading weaviate_client-3.24.1-py3-none-any.whl (107 kB)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 108.0/108.0 kB 3.1 MB/s eta 0:00:00\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | \n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | \n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Downloading Authlib-1.3.0-py2.py3-none-any.whl (223 kB)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 223.7/223.7 kB 10.2 MB/s eta 0:00:00\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | \n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | \n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Downloading validators-0.22.0-py3-none-any.whl (26 kB)\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Installing collected packages: validators, authlib, weaviate-client\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | Successfully installed authlib-1.3.0 validators-0.22.0 weaviate-client-3.24.1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "evaluation-pipeline-retrievefromweaviatecomponent-1 | WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | \n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | [notice] A new release of pip is available: 23.2.1 -> 24.0\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | [notice] To update, run: pip install --upgrade pip\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | \n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | [2024-02-08 13:30:38,734 | fondant.cli | INFO] Component `RetrieveFromWeaviateComponent` found in module main\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | [2024-02-08 13:30:38,739 | fondant.component.executor | INFO] Dask default local mode will be used for further executions.Our current supported options are limited to 'local' and 'default'.\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | [2024-02-08 13:30:38,742 | fondant.component.executor | INFO] Previous component `embed_text` run was cached. Cached pipeline id: evaluation-pipeline-20240206105318\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | [2024-02-08 13:30:38,744 | fondant.component.executor | INFO] Skipping component execution\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | [2024-02-08 13:30:38,746 | fondant.component.executor | INFO] Matching execution detected for component. The last execution of the component originated from `evaluation-pipeline-20240208135836`.\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | [2024-02-08 13:30:38,750 | fondant.component.executor | INFO] Saving output manifest to /data/evaluation-pipeline/evaluation-pipeline-20240208143024/retrievefromweaviatecomponent/manifest.json\n", + "evaluation-pipeline-retrievefromweaviatecomponent-1 | [2024-02-08 13:30:38,751 | fondant.component.executor | INFO] Writing cache key with manifest reference to /data/evaluation-pipeline/cache/4efd7dca69fe8820c65650cac7529007.txt\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluation-pipeline-retrievefromweaviatecomponent-1 exited with code 0\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting ragas==0.1.0 (from -r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for ragas==0.1.0 from https://files.pythonhosted.org/packages/5e/94/97777b227098625c48fcde0ac292caff3bf2b2a8c6b1cd49e417498722c2/ragas-0.1.0-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading ragas-0.1.0-py3-none-any.whl.metadata (4.7 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: numpy in /usr/local/lib/python3.11/site-packages (from ragas==0.1.0->-r requirements.txt (line 1)) (1.26.3)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting datasets (from ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for datasets from https://files.pythonhosted.org/packages/ec/93/454ada0d1b289a0f4a86ac88dbdeab54921becabac45da3da787d136628f/datasets-2.16.1-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading datasets-2.16.1-py3-none-any.whl.metadata (20 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting tiktoken (from ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for tiktoken from https://files.pythonhosted.org/packages/fb/a9/237dc2db35e6ec0fb7dd63e3d10ebe0377559203bd2a87e12a4adbfc8585/tiktoken-0.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading tiktoken-0.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting langchain (from ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for langchain from https://files.pythonhosted.org/packages/c1/c3/0e59a0c24e0c61b52271445df55302ab2f3dd8489a365721c7ef7ecaba24/langchain-0.1.5-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading langchain-0.1.5-py3-none-any.whl.metadata (13 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting langchain-core (from ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for langchain-core from https://files.pythonhosted.org/packages/ef/8c/e7fc5fa8b57e08ae03aecf184bde3e8dd69e96b168cab46fade4d62b3fec/langchain_core-0.1.21-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading langchain_core-0.1.21-py3-none-any.whl.metadata (6.0 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting langchain-community (from ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for langchain-community from https://files.pythonhosted.org/packages/bf/b4/1b1b22ab0c57320c5476b735cfe1500e49ddc4425df9e4c2e569e4c4472e/langchain_community-0.0.19-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading langchain_community-0.0.19-py3-none-any.whl.metadata (7.9 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting langchain-openai (from ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for langchain-openai from https://files.pythonhosted.org/packages/a2/86/05012e5276dde3004cfb0cb383f56983d3adcb5bc8ce13ebf21ce3ea3d26/langchain_openai-0.0.5-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading langchain_openai-0.0.5-py3-none-any.whl.metadata (2.5 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting openai>1 (from ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for openai>1 from https://files.pythonhosted.org/packages/37/34/f3c3d6bdc3eebf1b6a7c696dd6f934630af6cf5250cec099edf117cd3b53/openai-1.11.1-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading openai-1.11.1-py3-none-any.whl.metadata (18 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting pysbd>=0.3.4 (from ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading pysbd-0.3.4-py3-none-any.whl (71 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 71.1/71.1 kB 2.4 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Collecting nest-asyncio (from ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for nest-asyncio from https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading nest_asyncio-1.6.0-py3-none-any.whl.metadata (2.8 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting appdirs (from ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting anyio<5,>=3.5.0 (from openai>1->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for anyio<5,>=3.5.0 from https://files.pythonhosted.org/packages/bf/cd/d6d9bb1dadf73e7af02d18225cbd2c93f8552e13130484f1c8dcfece292b/anyio-4.2.0-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading anyio-4.2.0-py3-none-any.whl.metadata (4.6 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting distro<2,>=1.7.0 (from openai>1->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for distro<2,>=1.7.0 from https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting httpx<1,>=0.23.0 (from openai>1->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for httpx<1,>=0.23.0 from https://files.pythonhosted.org/packages/39/9b/4937d841aee9c2c8102d9a4eeb800c7dad25386caabb4a1bf5010df81a57/httpx-0.26.0-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading httpx-0.26.0-py3-none-any.whl.metadata (7.6 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting pydantic<3,>=1.9.0 (from openai>1->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for pydantic<3,>=1.9.0 from https://files.pythonhosted.org/packages/db/dc/afecbd9650f486889181c6d1a0d675b580c06253ea7e304588e4c7485bdb/pydantic-2.6.1-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading pydantic-2.6.1-py3-none-any.whl.metadata (83 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 83.5/83.5 kB 17.1 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Collecting sniffio (from openai>1->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading sniffio-1.3.0-py3-none-any.whl (10 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting tqdm>4 (from openai>1->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for tqdm>4 from https://files.pythonhosted.org/packages/00/e5/f12a80907d0884e6dff9c16d0c0114d81b8cd07dc3ae54c5e962cc83037e/tqdm-4.66.1-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 57.6/57.6 kB 12.9 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.11/site-packages (from openai>1->ragas==0.1.0->-r requirements.txt (line 1)) (4.9.0)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting filelock (from datasets->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for filelock from https://files.pythonhosted.org/packages/81/54/84d42a0bee35edba99dee7b59a8d4970eccdd44b99fe728ed912106fc781/filelock-3.13.1-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading filelock-3.13.1-py3-none-any.whl.metadata (2.8 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.11/site-packages (from datasets->ragas==0.1.0->-r requirements.txt (line 1)) (15.0.0)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting pyarrow-hotfix (from datasets->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for pyarrow-hotfix from https://files.pythonhosted.org/packages/e4/f4/9ec2222f5f5f8ea04f66f184caafd991a39c8782e31f5b0266f101cb68ca/pyarrow_hotfix-0.6-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting dill<0.3.8,>=0.3.0 (from datasets->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for dill<0.3.8,>=0.3.0 from https://files.pythonhosted.org/packages/f5/3a/74a29b11cf2cdfcd6ba89c0cecd70b37cd1ba7b77978ce611eb7a146a832/dill-0.3.7-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading dill-0.3.7-py3-none-any.whl.metadata (9.9 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: pandas in /usr/local/lib/python3.11/site-packages (from datasets->ragas==0.1.0->-r requirements.txt (line 1)) (2.2.0)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.11/site-packages (from datasets->ragas==0.1.0->-r requirements.txt (line 1)) (2.31.0)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting xxhash (from datasets->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for xxhash from https://files.pythonhosted.org/packages/eb/3a/25c4aecb61a49d4415fd71d4f66a8a5b558dd44a52d7054ea9aa59ccbac1/xxhash-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading xxhash-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting multiprocess (from datasets->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for multiprocess from https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting fsspec[http]<=2023.10.0,>=2023.1.0 (from datasets->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for fsspec[http]<=2023.10.0,>=2023.1.0 from https://files.pythonhosted.org/packages/e8/f6/3eccfb530aac90ad1301c582da228e4763f19e719ac8200752a4841b0b2d/fsspec-2023.10.0-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading fsspec-2023.10.0-py3-none-any.whl.metadata (6.8 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/site-packages (from datasets->ragas==0.1.0->-r requirements.txt (line 1)) (3.9.3)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting huggingface-hub>=0.19.4 (from datasets->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for huggingface-hub>=0.19.4 from https://files.pythonhosted.org/packages/28/03/7d3c7153113ec59cfb31e3b8ee773f5f420a0dd7d26d40442542b96675c3/huggingface_hub-0.20.3-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading huggingface_hub-0.20.3-py3-none-any.whl.metadata (12 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: packaging in /usr/local/lib/python3.11/site-packages (from datasets->ragas==0.1.0->-r requirements.txt (line 1)) (23.2)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/site-packages (from datasets->ragas==0.1.0->-r requirements.txt (line 1)) (6.0.1)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting SQLAlchemy<3,>=1.4 (from langchain->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for SQLAlchemy<3,>=1.4 from https://files.pythonhosted.org/packages/7a/de/0ca53bf49d213bea164b0bd0187d3c94d6fea650b7679a8e41c91e3182d7/SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting dataclasses-json<0.7,>=0.5.7 (from langchain->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for dataclasses-json<0.7,>=0.5.7 from https://files.pythonhosted.org/packages/91/ca/7219b838086086972e662c19e908694bdc6744537fb41b70392501b8b5e4/dataclasses_json-0.6.4-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading dataclasses_json-0.6.4-py3-none-any.whl.metadata (25 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting jsonpatch<2.0,>=1.33 (from langchain->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for jsonpatch<2.0,>=1.33 from https://files.pythonhosted.org/packages/73/07/02e16ed01e04a374e644b575638ec7987ae846d25ad97bcc9945a3ee4b0e/jsonpatch-1.33-py2.py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting langsmith<0.1,>=0.0.83 (from langchain->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for langsmith<0.1,>=0.0.83 from https://files.pythonhosted.org/packages/94/99/762b50b229516dd133e09c16213736b88d50d75e262b976e20cc244280ed/langsmith-0.0.87-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading langsmith-0.0.87-py3-none-any.whl.metadata (10 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting tenacity<9.0.0,>=8.1.0 (from langchain->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for tenacity<9.0.0,>=8.1.0 from https://files.pythonhosted.org/packages/f4/f1/990741d5bb2487d529d20a433210ffa136a367751e454214013b441c4575/tenacity-8.2.3-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading tenacity-8.2.3-py3-none-any.whl.metadata (1.0 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting regex>=2022.1.18 (from tiktoken->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for regex>=2022.1.18 from https://files.pythonhosted.org/packages/8d/6b/2f6478814954c07c04ba60b78d688d3d7bab10d786e0b6c1db607e4f6673/regex-2023.12.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading regex-2023.12.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 40.9/40.9 kB 6.8 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/site-packages (from aiohttp->datasets->ragas==0.1.0->-r requirements.txt (line 1)) (1.3.1)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/site-packages (from aiohttp->datasets->ragas==0.1.0->-r requirements.txt (line 1)) (23.2.0)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/site-packages (from aiohttp->datasets->ragas==0.1.0->-r requirements.txt (line 1)) (1.4.1)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/site-packages (from aiohttp->datasets->ragas==0.1.0->-r requirements.txt (line 1)) (6.0.5)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.11/site-packages (from aiohttp->datasets->ragas==0.1.0->-r requirements.txt (line 1)) (1.9.4)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/site-packages (from anyio<5,>=3.5.0->openai>1->ragas==0.1.0->-r requirements.txt (line 1)) (3.6)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for marshmallow<4.0.0,>=3.18.0 from https://files.pythonhosted.org/packages/57/e9/4368d49d3b462da16a3bac976487764a84dd85cef97232c7bd61f5bdedf3/marshmallow-3.20.2-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading marshmallow-3.20.2-py3-none-any.whl.metadata (7.5 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for typing-inspect<1,>=0.4.0 from https://files.pythonhosted.org/packages/65/f3/107a22063bf27bdccf2024833d3445f4eea42b2e598abfbd46f6a63b6cb0/typing_inspect-0.9.0-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: certifi in /usr/local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->openai>1->ragas==0.1.0->-r requirements.txt (line 1)) (2024.2.2)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai>1->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for httpcore==1.* from https://files.pythonhosted.org/packages/56/ba/78b0a99c4da0ff8b0f59defa2f13ca4668189b134bd9840b6202a93d9a0f/httpcore-1.0.2-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading httpcore-1.0.2-py3-none-any.whl.metadata (20 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai>1->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 58.3/58.3 kB 5.5 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for jsonpointer>=1.9 from https://files.pythonhosted.org/packages/12/f6/0232cc0c617e195f06f810534d00b74d2f348fe71b2118009ad8ad31f878/jsonpointer-2.4-py2.py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading jsonpointer-2.4-py2.py3-none-any.whl.metadata (2.5 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting annotated-types>=0.4.0 (from pydantic<3,>=1.9.0->openai>1->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for annotated-types>=0.4.0 from https://files.pythonhosted.org/packages/28/78/d31230046e58c207284c6b2c4e8d96e6d3cb4e52354721b944d3e1ee4aa5/annotated_types-0.6.0-py3-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading annotated_types-0.6.0-py3-none-any.whl.metadata (12 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting pydantic-core==2.16.2 (from pydantic<3,>=1.9.0->openai>1->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for pydantic-core==2.16.2 from https://files.pythonhosted.org/packages/ad/03/1cac52dfe893109a1571956755061df771457f33cb55264baed8f89635d6/pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/site-packages (from requests>=2.19.0->datasets->ragas==0.1.0->-r requirements.txt (line 1)) (3.3.2)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/site-packages (from requests>=2.19.0->datasets->ragas==0.1.0->-r requirements.txt (line 1)) (2.0.7)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting greenlet!=0.4.17 (from SQLAlchemy<3,>=1.4->langchain->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for greenlet!=0.4.17 from https://files.pythonhosted.org/packages/c6/1f/12d5a6cc26e8b483c2e7975f9c22e088ac735c0d8dcb8a8f72d31a4e5f04/greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting multiprocess (from datasets->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Obtaining dependency information for multiprocess from https://files.pythonhosted.org/packages/e7/41/96ac938770ba6e7d5ae1d8c9cafebac54b413549042c6260f0d0a6ec6622/multiprocess-0.70.15-py311-none-any.whl.metadata\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading multiprocess-0.70.15-py311-none-any.whl.metadata (7.2 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/site-packages (from pandas->datasets->ragas==0.1.0->-r requirements.txt (line 1)) (2.8.2)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/site-packages (from pandas->datasets->ragas==0.1.0->-r requirements.txt (line 1)) (2024.1)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/site-packages (from pandas->datasets->ragas==0.1.0->-r requirements.txt (line 1)) (2023.4)\n", + "evaluation-pipeline-ragasevaluator-1 | Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas->datasets->ragas==0.1.0->-r requirements.txt (line 1)) (1.16.0)\n", + "evaluation-pipeline-ragasevaluator-1 | Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain->ragas==0.1.0->-r requirements.txt (line 1))\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading ragas-0.1.0-py3-none-any.whl (65 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 65.4/65.4 kB 13.0 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading openai-1.11.1-py3-none-any.whl (226 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 226.1/226.1 kB 6.8 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading datasets-2.16.1-py3-none-any.whl (507 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 507.1/507.1 kB 13.5 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading langchain-0.1.5-py3-none-any.whl (806 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 806.7/806.7 kB 17.9 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading langchain_community-0.0.19-py3-none-any.whl (1.6 MB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.6/1.6 MB 15.7 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading langchain_core-0.1.21-py3-none-any.whl (238 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 238.5/238.5 kB 18.7 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading langchain_openai-0.0.5-py3-none-any.whl (29 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading tiktoken-0.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 17.5 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading nest_asyncio-1.6.0-py3-none-any.whl (5.2 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading anyio-4.2.0-py3-none-any.whl (85 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 85.5/85.5 kB 26.4 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 115.3/115.3 kB 21.8 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading distro-1.9.0-py3-none-any.whl (20 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading httpx-0.26.0-py3-none-any.whl (75 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 75.9/75.9 kB 11.0 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading httpcore-1.0.2-py3-none-any.whl (76 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 76.9/76.9 kB 20.7 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading huggingface_hub-0.20.3-py3-none-any.whl (330 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 330.1/330.1 kB 22.5 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading langsmith-0.0.87-py3-none-any.whl (55 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 55.4/55.4 kB 10.6 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading pydantic-2.6.1-py3-none-any.whl (394 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 394.8/394.8 kB 3.1 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.2/2.2 MB 16.9 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading regex-2023.12.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (785 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 785.1/785.1 kB 18.7 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.2/3.2 MB 12.5 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading tenacity-8.2.3-py3-none-any.whl (24 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading tqdm-4.66.1-py3-none-any.whl (78 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 78.3/78.3 kB 7.5 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading filelock-3.13.1-py3-none-any.whl (11 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading multiprocess-0.70.15-py311-none-any.whl (135 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 135.4/135.4 kB 18.1 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading xxhash-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 194.8/194.8 kB 20.6 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading annotated_types-0.6.0-py3-none-any.whl (12 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading fsspec-2023.10.0-py3-none-any.whl (166 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 166.4/166.4 kB 18.7 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (620 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 620.0/620.0 kB 17.7 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Downloading marshmallow-3.20.2-py3-none-any.whl (49 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 49.4/49.4 kB 12.0 MB/s eta 0:00:00\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "evaluation-pipeline-ragasevaluator-1 | Installing collected packages: appdirs, xxhash, tqdm, tenacity, sniffio, regex, pysbd, pydantic-core, pyarrow-hotfix, nest-asyncio, mypy-extensions, marshmallow, jsonpointer, h11, greenlet, fsspec, filelock, distro, dill, annotated-types, typing-inspect, tiktoken, SQLAlchemy, pydantic, multiprocess, jsonpatch, huggingface-hub, httpcore, anyio, langsmith, httpx, dataclasses-json, openai, langchain-core, datasets, langchain-openai, langchain-community, langchain, ragas\n", + "evaluation-pipeline-ragasevaluator-1 | Attempting uninstall: fsspec\n", + "evaluation-pipeline-ragasevaluator-1 | Found existing installation: fsspec 2023.12.2\n", + "evaluation-pipeline-ragasevaluator-1 | Uninstalling fsspec-2023.12.2:\n", + "evaluation-pipeline-ragasevaluator-1 | Successfully uninstalled fsspec-2023.12.2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "evaluation-pipeline-ragasevaluator-1 | ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "evaluation-pipeline-ragasevaluator-1 | gcsfs 2023.12.2.post1 requires fsspec==2023.12.2, but you have fsspec 2023.10.0 which is incompatible.\n", + "evaluation-pipeline-ragasevaluator-1 | adlfs 2024.1.0 requires fsspec>=2023.12.0, but you have fsspec 2023.10.0 which is incompatible.\n", + "evaluation-pipeline-ragasevaluator-1 | s3fs 2023.12.2 requires fsspec==2023.12.2, but you have fsspec 2023.10.0 which is incompatible.\n", + "evaluation-pipeline-ragasevaluator-1 | WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\n", + "evaluation-pipeline-ragasevaluator-1 | \n", + "evaluation-pipeline-ragasevaluator-1 | [notice] A new release of pip is available: 23.2.1 -> 24.0\n", + "evaluation-pipeline-ragasevaluator-1 | [notice] To update, run: pip install --upgrade pip\n", + "evaluation-pipeline-ragasevaluator-1 | \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluation-pipeline-ragasevaluator-1 | Successfully installed SQLAlchemy-2.0.25 annotated-types-0.6.0 anyio-4.2.0 appdirs-1.4.4 dataclasses-json-0.6.4 datasets-2.16.1 dill-0.3.7 distro-1.9.0 filelock-3.13.1 fsspec-2023.10.0 greenlet-3.0.3 h11-0.14.0 httpcore-1.0.2 httpx-0.26.0 huggingface-hub-0.20.3 jsonpatch-1.33 jsonpointer-2.4 langchain-0.1.5 langchain-community-0.0.19 langchain-core-0.1.21 langchain-openai-0.0.5 langsmith-0.0.87 marshmallow-3.20.2 multiprocess-0.70.15 mypy-extensions-1.0.0 nest-asyncio-1.6.0 openai-1.11.1 pyarrow-hotfix-0.6 pydantic-2.6.1 pydantic-core-2.16.2 pysbd-0.3.4 ragas-0.1.0 regex-2023.12.25 sniffio-1.3.0 tenacity-8.2.3 tiktoken-0.5.2 tqdm-4.66.1 typing-inspect-0.9.0 xxhash-3.4.1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "evaluation-pipeline-ragasevaluator-1 | [2024-02-08 13:30:58,364 | fondant.cli | INFO] Component `RagasEvaluator` found in module main\n", + "evaluation-pipeline-ragasevaluator-1 | [2024-02-08 13:30:58,370 | fondant.component.executor | INFO] Dask default local mode will be used for further executions.Our current supported options are limited to 'local' and 'default'.\n", + "evaluation-pipeline-ragasevaluator-1 | [2024-02-08 13:30:58,385 | fondant.component.executor | INFO] Previous component `retrievefromweaviatecomponent` run was cached. Cached pipeline id: evaluation-pipeline-20240208135836\n", + "evaluation-pipeline-ragasevaluator-1 | [2024-02-08 13:30:58,386 | fondant.component.executor | INFO] No matching execution for component detected\n", + "evaluation-pipeline-ragasevaluator-1 | [2024-02-08 13:30:58,386 | root | INFO] Executing component\n", + "evaluation-pipeline-ragasevaluator-1 | [2024-02-08 13:30:58,556 | root | INFO] Columns of dataframe: ['retrieved_chunks', 'question']\n", + "evaluation-pipeline-ragasevaluator-1 | [2024-02-08 13:30:58,585 | root | INFO] Creating write task for: /data/evaluation-pipeline/evaluation-pipeline-20240208143024/ragasevaluator\n", + "evaluation-pipeline-ragasevaluator-1 | [2024-02-08 13:30:58,585 | root | INFO] Writing data...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ ] | 0% Completed | 561.25 us\n", + "[ ] | 0% Completed | 103.53 ms\n", + "[ ] | 0% Completed | 207.98 ms\n", + "[ ] | 0% Completed | 315.33 ms\n", + "[ ] | 0% Completed | 428.78 ms\n", + "[ ] | 0% Completed | 529.75 ms\n", + "[ ] | 0% Completed | 632.47 ms\n", + "[ ] | 0% Completed | 738.72 ms\n", + "[ ] | 0% Completed | 843.85 ms\n", + "[ ] | 0% Completed | 982.68 ms\n", + "[ ] | 0% Completed | 1.09 s\n", + "[ ] | 0% Completed | 1.20 s\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "evaluation-pipeline-ragasevaluator-1 | /usr/local/lib/python3.11/site-packages/langchain/__init__.py:29: UserWarning: Importing verbose from langchain root module is no longer supported. Please use langchain.globals.set_verbose() / langchain.globals.get_verbose() instead.\n", + "evaluation-pipeline-ragasevaluator-1 | warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ ] | 0% Completed | 1.30 s\n", + "[ ] | 0% Completed | 1.40 s\n", + "[ ] | 0% Completed | 1.50 s\n", + "[ ] | 0% Completed | 1.60 s\n", + "[ ] | 0% Completed | 1.70 s\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 0%| | 0/10 [00:00 2\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m./evaluation_dataset/aggregated_results.csv\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m df\n", + "File \u001b[0;32m~/projects/fondant/fondant-usecase-RAG/.venv/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1024\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 1011\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 1012\u001b[0m dialect,\n\u001b[1;32m 1013\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1020\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 1021\u001b[0m )\n\u001b[1;32m 1022\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m-> 1024\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/projects/fondant/fondant-usecase-RAG/.venv/lib/python3.10/site-packages/pandas/io/parsers/readers.py:618\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 615\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 617\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 618\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 620\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", + "File \u001b[0;32m~/projects/fondant/fondant-usecase-RAG/.venv/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1618\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1615\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1618\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/projects/fondant/fondant-usecase-RAG/.venv/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1878\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1876\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m 1877\u001b[0m mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1878\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1879\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1880\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1881\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1882\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1883\u001b[0m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1884\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1885\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1886\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1887\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1888\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1889\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n", + "File \u001b[0;32m~/projects/fondant/fondant-usecase-RAG/.venv/lib/python3.10/site-packages/pandas/io/common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 869\u001b[0m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m 870\u001b[0m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m 871\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[1;32m 872\u001b[0m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[0;32m--> 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: './evaluation_dataset/aggregated_results.csv'" + ] + } + ], "source": [ - "utils.get_metrics_latest_run(base_path=BASE_PATH)" + "import pandas as pd\n", + "df = pd.read_csv(\"./evaluation_dataset/aggregated_results.csv\")\n", + "df" ] }, { @@ -492,8 +2631,7 @@ "outputs": [], "source": [ "from fondant.explore import run_explorer_app\n", - "\n", - "run_explorer_app(base_path=BASE_PATH)" + "run_explorer_app(base_path=base_path)" ] }, { @@ -510,7 +2648,6 @@ "outputs": [], "source": [ "from fondant.explore import stop_explorer_app\n", - "\n", "stop_explorer_app()" ] }, @@ -572,7 +2709,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.11" } }, "nbformat": 4, diff --git a/src/evaluation_datasets/wikitext_1000_q.csv b/src/evaluation_datasets/wikitext_1000_q.csv index bf09356..da043aa 100644 --- a/src/evaluation_datasets/wikitext_1000_q.csv +++ b/src/evaluation_datasets/wikitext_1000_q.csv @@ -1,41 +1,2 @@ id;question 0;What is the name of the superfamily mentioned in the document? -1;"How many digital downloads did ""Kiss You"" have in the US by January 18, 2013?" -2;"Who directed the film ""Mercenaries"" in which Boulter starred in 2011?" -3;What was the range of barometer readings in West Palm Beach during the storm? -4;What happened to Rifenburg's show after a change in ownership for the station? -5;What was the nickname given to the 1947 Michigan football team? -6;Where did the hurricane briefly emerge into the Gulf of Mexico? -7;"Was ""Kiss You"" officially promoted to US radio stations by January 18, 2013?" -8;"Where did Eliot write the poem ""Little Gidding""?" -9;"What is the duration of the song ""Kiss You""?" -10;What precious item was found with Matsuo Bashō when he died in Osaka? -11;What is the significance of the folded teeth in labyrinthodonts? -12;What were Temnospondyls initially thought to be? -13;When was Du Fu born and where? -14;When did the An Lushan Rebellion begin and how long did it last? -15;"What is the overall opinion of the song ""Kiss You"" on the album Take Me Home?" -16;Which team did Michigan's teens defeat in their opener, and what was the score? -17;What was the ranking of Notre Dame and the University of Michigan in the Associated Press poll at the end of the 1947 season? -18;How did mines and obstructions affect the advantages of monitors in battle? -19;"Why was the term ""Temnospondyli"" rarely used in the decades following Watson's proposal?" -20;What was the CSS Virginia originally made of? -21;Who is Simeon and what is his role in the search party? -22;What were the targets of the American Third Fleet's attacks in Indochina and southern China? -23;Which town does NY 31B enter after leaving Weedsport? -24;When did lissamphibians first appear? -25;How have the central pressures assigned to tropical cyclones been modified? -26;How many homeless residents in Stuart were offered shelters by the National Guard after the storm? -27;Where is Rifenburg from? -28;How many ironclads were completed or under construction by Britain and France in 1862? -29;Who devised the wind-pressure relationship for the northwest Pacific in 1975? -30;How much rainfall did the hurricane drop in Clermont along its path? -31;Name two films that Boulter starred in 2008. -32;How long did it take to launch each aircraft using the catapults? -33;What is the purpose of analyzing the central dense overcast and embedded eye pattern in tropical cyclones? -34;What is the title of the album released by Hed PE in 1997? -35;What was the position of Task Force Manchu along the Naktong River? -36;What was the title of the article in Time magazine that reported on Rifenburg's performance? -37;In which sports was Rifenburg named All State? -38;What happened to A Company during the withdraw? -39;"What do the lyrics of ""Kiss You"" describe?" \ No newline at end of file diff --git a/src/indexing.ipynb b/src/indexing.ipynb new file mode 100644 index 0000000..34260b4 --- /dev/null +++ b/src/indexing.ipynb @@ -0,0 +1,1632 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🍫 Building a RAG indexing pipeline with Fondant\n", + "\n", + "> ⚠️ Please note that this notebook **is not** compatible with **Google Colab**. To complete the tutorial, you must \n", + "> initiate Docker containers. Starting Docker containers within Google Colab is not supported.\n", + "\n", + "This repository demonstrates a Fondant data pipeline that ingests text\n", + "data into a vector database. \n", + "\n", + "We provide a Docker Compose setup for Weaviate, enabling local testing and development.\n", + "\n", + "### Pipeline overview\n", + "\n", + "The primary goal of this sample is to showcase how you can use a Fondant pipeline and reusable\n", + "components to load, chunk and embed text, as well as ingest the text embeddings to a vector\n", + "database.\n", + "\n", + "Pipeline Steps:\n", + "\n", + "- [Data Loading](https://github.com/ml6team/fondant/tree/main/components/load_from_parquet): The\n", + " pipeline begins by loading text data from a Parquet file, which serves as the\n", + " source for subsequent processing. For the minimal example we are using a dataset from Huggingface.\n", + "- [Text Chunking](https://github.com/ml6team/fondant/tree/main/components/chunk_text): Text data is\n", + " chunked into manageable sections to prepare it for embedding. This\n", + " step\n", + " is crucial for performant RAG systems.\n", + "- [Text Embedding](https://github.com/ml6team/fondant/tree/main/components/embed_text): We are using\n", + " a small HuggingFace model for the generation of text embeddings.\n", + " The `embed_text` component easily allows the usage of different models as well.\n", + "- [Write to Weaviate](https://github.com/ml6team/fondant/tree/main/components/index_weaviate): The\n", + " final step of the pipeline involves writing the embedded text data to\n", + " a Weaviate database.\n", + "\n", + "## Environment\n", + "### This section checks the prerequisites of your environment. Read any errors or warnings carefully. \n", + "\n", + "**Ensure a Python between version 3.8 and 3.10 is available**" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "if sys.version_info < (3, 8, 0) or sys.version_info >= (3, 11, 0):\n", + " raise Exception(f\"A Python version between 3.8 and 3.10 is required. You are running {sys.version}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Check if docker compose is installed and the docker daemon is running**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!docker compose version\n", + "!docker ps && echo \"Docker running\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Check if GPU is available**" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:We recommend to run this pipeline on a GPU, but none could be found, using CPU instead\n" + ] + } + ], + "source": [ + "import logging\n", + "import subprocess\n", + "\n", + "try:\n", + " subprocess.check_output('nvidia-smi')\n", + " logging.info(\"Found GPU, using it!\")\n", + " number_of_accelerators = 1\n", + " accelerator_name = \"GPU\"\n", + "except Exception:\n", + " logging.warning(\"We recommend to run this pipeline on a GPU, but none could be found, using CPU instead\")\n", + " number_of_accelerators = None\n", + " accelerator_name = None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Install Fondant**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -q -r ../requirements.txt --disable-pip-version-check && echo \"Success\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Implement the pipeline\n", + "\n", + "First of all, we need to initialize the pipeline, which includes specifying a name for your pipeline, providing a description, and setting a base_path. The base_path is used to store the pipeline artifacts and data generated by the components" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from fondant.pipeline import Pipeline, Resources\n", + "\n", + "BASE_PATH = \"./data\"\n", + "Path(BASE_PATH).mkdir(parents=True, exist_ok=True)\n", + "\n", + "pipeline = Pipeline(\n", + " name=\"ingestion-pipeline\", # Add a unique pipeline name to easily track your progress and data\n", + " description=\"Pipeline to prepare and process data for building a RAG solution\",\n", + " base_path=BASE_PATH, # The demo pipelines uses a local directory to store the data.\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For demonstration purposes, we will utilize a dataset available on Hugging Face. As such, we will use a reusable Fondant component `load_from_hf_hub`. Note that the `load_from_hf_hub` component does not define a fixed schema for the data it produces, which means we need to provide hits ourselves with the `produces` argument. It takes a mapping from field names to `pyarrow` types." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import pyarrow as pa\n", + "\n", + "text = pipeline.read(\n", + " \"load_from_hf_hub\",\n", + " arguments={\n", + " # Add arguments\n", + " \"dataset_name\": \"wikitext@~parquet\",\n", + " \"n_rows_to_load\": 100,\n", + " },\n", + " produces={\n", + " \"text\": pa.string()\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Implement a custom component \n", + "\n", + "You can build Fondant pipelines using reusable components from the component hub. Of course, you can implement your custom components. The easiest way to implement your custom components is to build a `lightweight_component`. You can easily implement and test the component code in a notebook and use the same code as part of your pipeline.\n", + "\n", + "Here, we will implement a custom chunking component using Langchain.\n", + "\n", + "Text data is chunked into manageable sections to prepare it for embedding. This step is crucial for efficient RAG systems. Langchain provides an interface to chunk text snippets efficiently. We will implement a Fondant component around the Langchain interface. Here, we are creating a custom `lightweight_component`. Check out [our documentation](https://fondant.ai/en/latest/components/lightweight_components/) for more information." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import typing as t \n", + "from fondant.component import PandasTransformComponent\n", + "from fondant.pipeline import lightweight_component\n", + "\n", + "@lightweight_component(\n", + " consumes={\"text\":pa.string()},\n", + " produces={\"text\":pa.string(), \"original_document_id\":pa.string()},\n", + " extra_requires=[\"langchain==0.0.329\"]\n", + ")\n", + "class ChunkTextComponent(PandasTransformComponent):\n", + " \"\"\"Component that chunks text into smaller segments.\n", + " More information about the different chunking strategies can be here:\n", + " - https://python.langchain.com/docs/modules/data_connection/document_transformers/\n", + " - https://www.pinecone.io/learn/chunking-strategies/.\n", + " \"\"\"\n", + " \n", + "\n", + " def __init__(\n", + " self,\n", + " *,\n", + " chunk_size: int,\n", + " chunk_overlap: int,\n", + " ):\n", + " \"\"\"\n", + " Args:\n", + " chunk_size: the chunk size \n", + " chunk_overlap: the overlap between chunks\n", + " \"\"\"\n", + " from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + " self.chunker = RecursiveCharacterTextSplitter(\n", + " chunk_size=chunk_size,\n", + " chunk_overlap=chunk_overlap\n", + " )\n", + "\n", + " def chunk_text(self, row) -> t.List[t.Tuple]:\n", + " # Multi-index df has id under the name attribute\n", + " doc_id = row.name\n", + " text_data = row[\"text\"]\n", + " docs = self.chunker.create_documents([text_data])\n", + "\n", + " return [\n", + " (doc_id, f\"{doc_id}_{chunk_id}\", chunk.page_content)\n", + " for chunk_id, chunk in enumerate(docs)\n", + " ]\n", + "\n", + " def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:\n", + " import itertools\n", + " results = dataframe.apply(\n", + " self.chunk_text,\n", + " axis=1,\n", + " ).to_list()\n", + "\n", + " # Flatten results\n", + " results = list(itertools.chain.from_iterable(results))\n", + "\n", + " # Turn into dataframes\n", + " results_df = pd.DataFrame(\n", + " results,\n", + " columns=[\"original_document_id\", \"id\", \"text\"],\n", + " )\n", + " results_df = results_df.set_index(\"id\")\n", + "\n", + " return results_df\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This method doesn't execute the component yet, but adds it to the execution graph of the pipeline, and returns a lazy `Dataset` instance. \n", + "We can now add the implemented chunking component to the pipeline using `Dataset.apply()`.\n", + "Beside our custom component we start to add reusable components, `embed_text` and `index_weaviate`, from the [Fondant Hub](https://fondant.ai/en/latest/components/hub/)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import utils\n", + "\n", + "chunks = text.apply(\n", + " ChunkTextComponent,\n", + " arguments={\n", + " \"chunk_size\": 512, \"chunk_overlap\": 32\n", + " }\n", + ")\n", + "\n", + "\n", + "embeddings = chunks.apply(\n", + " \"embed_text\",\n", + " arguments={\n", + " \"model_provider\": \"huggingface\",\n", + " \"model\": \"all-MiniLM-L6-v2\"\n", + " },\n", + " resources=Resources(\n", + " accelerator_number=number_of_accelerators,\n", + " accelerator_name=accelerator_name,\n", + " ),\n", + " cluster_type=\"local\" if number_of_accelerators is not None else \"default\",\n", + " cache=False\n", + ")\n", + "\n", + "embeddings.write(\n", + " \"index_weaviate\",\n", + " arguments={\n", + " \"weaviate_url\": f\"http://{utils.get_host_ip()}:8081\",\n", + " \"class_name\": \"index\",\n", + " },\n", + " consumes={\n", + " \"text\": pa.string(),\n", + " \"embedding\": pa.list_(pa.float32()), \n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our pipeline now looks as follows:\n", + "\n", + "`read_from_hf_hub` -> `chunk_text` -> `embed_text` -> `index_weaviate`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Running the pipeline\n", + "\n", + "The pipeline will load and process text data, then ingest the processed data into a vector database. Before executing the pipeline, we need to start the Weaviate database. Otherwise the pipeline execution will fail.\n", + "\n", + "To do this, we can utilize the Docker setup provided in the `weaviate` folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!docker compose -f weaviate_service/docker-compose.yaml up --detach --quiet-pull" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we can execute our pipeline. \n", + "Fondant provides multiple runners to run our pipeline:\n", + "\n", + "- A Docker runner for local execution\n", + "- A Vertex AI runner for managed execution on Google Cloud\n", + "- A Sagemaker runner for managed execution on AWS\n", + "- A Kubeflow Pipelines runner for execution anywhere\n", + "Here we will use the DockerRunner for local execution, which utilizes docker-compose under the hood.\n", + "\n", + "The runner will download the reusable components from the component hub. Afterwards, you will see the components execute one by one." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Found reference to un-compiled pipeline... compiling\n", + "INFO:fondant.pipeline.compiler:Compiling ingestion-pipeline to .fondant/compose.yaml\n", + "INFO:fondant.pipeline.compiler:Base path found on local system, setting up ./data as mount volume\n", + "INFO:fondant.pipeline.pipeline:Sorting pipeline component graph topologically.\n", + "INFO:fondant.pipeline.pipeline:All pipeline component specifications match.\n", + "INFO:fondant.pipeline.compiler:Compiling service for load_from_hugging_face_hub\n", + "INFO:fondant.pipeline.compiler:Compiling service for chunktextcomponent\n", + "INFO:fondant.pipeline.compiler:Compiling service for embed_text\n", + "INFO:fondant.pipeline.compiler:Compiling service for index_weaviate\n", + "INFO:fondant.pipeline.compiler:Successfully compiled to .fondant/compose.yaml\n", + " load_from_hugging_face_hub Pulling \n", + " embed_text Pulling \n", + " index_weaviate Pulling \n", + " chunktextcomponent Pulling \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting pipeline run...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " embed_text Pulled \n", + " chunktextcomponent Pulled \n", + " c57ee5000d61 Already exists \n", + " be0f2e005f57 Already exists \n", + " 1cf9e04c14ca Already exists \n", + " d971e6b3ab55 Already exists \n", + " eda4bb0752cd Already exists \n", + " 4234b2d8f067 Already exists \n", + " 9bed5a4782d3 Pulling fs layer \n", + " 10d156fc77f8 Pulling fs layer \n", + " 50d0da9c2da1 Pulling fs layer \n", + " 81d6c226fa1c Pulling fs layer \n", + " 5e0225960697 Pulling fs layer \n", + " 4f4fb700ef54 Pulling fs layer \n", + " 5e0225960697 Waiting \n", + " 81d6c226fa1c Waiting \n", + " 4f4fb700ef54 Waiting \n", + " c57ee5000d61 Already exists \n", + " be0f2e005f57 Already exists \n", + " 1cf9e04c14ca Already exists \n", + " d971e6b3ab55 Already exists \n", + " eda4bb0752cd Already exists \n", + " 4234b2d8f067 Already exists \n", + " ce88c3d8161b Pulling fs layer \n", + " 6959468190ca Pulling fs layer \n", + " d76db7b75f8a Pulling fs layer \n", + " c1a9c0eb704d Pulling fs layer \n", + " 6959468190ca Waiting \n", + " ce88c3d8161b Waiting \n", + " 53cb618491a3 Pulling fs layer \n", + " c1a9c0eb704d Waiting \n", + " 53cb618491a3 Waiting \n", + " d76db7b75f8a Waiting \n", + " 50d0da9c2da1 Downloading [> ] 537.1kB/229.5MB\n", + " 50d0da9c2da1 Downloading [> ] 2.679MB/229.5MB\n", + " 10d156fc77f8 Downloading [> ] 98.97kB/9.664MB\n", + " 50d0da9c2da1 Downloading [> ] 4.301MB/229.5MB\n", + " 10d156fc77f8 Downloading [=> ] 307.7kB/9.664MB\n", + " 50d0da9c2da1 Downloading [=> ] 5.907MB/229.5MB\n", + " 10d156fc77f8 Downloading [====> ] 803.4kB/9.664MB\n", + " 50d0da9c2da1 Downloading [=> ] 6.984MB/229.5MB\n", + " 9bed5a4782d3 Downloading [==================================================>] 140B/140B\n", + " 9bed5a4782d3 Download complete \n", + " 9bed5a4782d3 Extracting [==================================================>] 140B/140B\n", + " 9bed5a4782d3 Extracting [==================================================>] 140B/140B\n", + " 9bed5a4782d3 Pull complete \n", + " 10d156fc77f8 Downloading [======> ] 1.209MB/9.664MB\n", + " 50d0da9c2da1 Downloading [=> ] 8.606MB/229.5MB\n", + " 10d156fc77f8 Downloading [=========> ] 1.811MB/9.664MB\n", + " 50d0da9c2da1 Downloading [==> ] 9.675MB/229.5MB\n", + " 10d156fc77f8 Downloading [============> ] 2.413MB/9.664MB\n", + " 50d0da9c2da1 Downloading [==> ] 11.29MB/229.5MB\n", + " 10d156fc77f8 Downloading [==============> ] 2.806MB/9.664MB\n", + " 10d156fc77f8 Downloading [===============> ] 2.905MB/9.664MB\n", + " 50d0da9c2da1 Downloading [==> ] 11.83MB/229.5MB\n", + " 10d156fc77f8 Downloading [=================> ] 3.396MB/9.664MB\n", + " 50d0da9c2da1 Downloading [==> ] 13.45MB/229.5MB\n", + " 10d156fc77f8 Downloading [====================> ] 3.99MB/9.664MB\n", + " 50d0da9c2da1 Downloading [===> ] 15.07MB/229.5MB\n", + " 10d156fc77f8 Downloading [=======================> ] 4.58MB/9.664MB\n", + " 50d0da9c2da1 Downloading [===> ] 16.7MB/229.5MB\n", + " 10d156fc77f8 Downloading [==========================> ] 5.071MB/9.664MB\n", + " 50d0da9c2da1 Downloading [===> ] 17.24MB/229.5MB\n", + " 10d156fc77f8 Downloading [============================> ] 5.563MB/9.664MB\n", + " 81d6c226fa1c Downloading [==================================================>] 99B/99B\n", + " 50d0da9c2da1 Downloading [====> ] 18.86MB/229.5MB\n", + " 81d6c226fa1c Verifying Checksum \n", + " 81d6c226fa1c Download complete \n", + " 10d156fc77f8 Downloading [===============================> ] 6.153MB/9.664MB\n", + " 50d0da9c2da1 Downloading [====> ] 20.47MB/229.5MB\n", + " 10d156fc77f8 Downloading [==================================> ] 6.755MB/9.664MB\n", + " 10d156fc77f8 Downloading [=====================================> ] 7.25MB/9.664MB\n", + " 50d0da9c2da1 Downloading [====> ] 22.07MB/229.5MB\n", + " 50d0da9c2da1 Downloading [====> ] 22.6MB/229.5MB\n", + " 10d156fc77f8 Downloading [======================================> ] 7.349MB/9.664MB\n", + " 10d156fc77f8 Downloading [=========================================> ] 8.037MB/9.664MB\n", + " 50d0da9c2da1 Downloading [=====> ] 23.67MB/229.5MB\n", + " 10d156fc77f8 Downloading [=============================================> ] 8.725MB/9.664MB\n", + " 50d0da9c2da1 Downloading [=====> ] 24.75MB/229.5MB\n", + " 10d156fc77f8 Downloading [================================================> ] 9.319MB/9.664MB\n", + " 50d0da9c2da1 Downloading [=====> ] 25.81MB/229.5MB\n", + " 10d156fc77f8 Verifying Checksum \n", + " 10d156fc77f8 Download complete \n", + " 10d156fc77f8 Extracting [> ] 98.3kB/9.664MB\n", + " 50d0da9c2da1 Downloading [=====> ] 26.87MB/229.5MB\n", + " 10d156fc77f8 Extracting [=========> ] 1.868MB/9.664MB\n", + " 5e0225960697 Downloading [===============================> ] 733B/1.172kB\n", + " 5e0225960697 Downloading [==================================================>] 1.172kB/1.172kB\n", + " 5e0225960697 Verifying Checksum \n", + " 5e0225960697 Download complete \n", + " 50d0da9c2da1 Downloading [======> ] 28.49MB/229.5MB\n", + " 10d156fc77f8 Extracting [=============> ] 2.654MB/9.664MB\n", + " 10d156fc77f8 Extracting [======================================> ] 7.373MB/9.664MB\n", + " 50d0da9c2da1 Downloading [======> ] 30.09MB/229.5MB\n", + " 10d156fc77f8 Extracting [=============================================> ] 8.847MB/9.664MB\n", + " 50d0da9c2da1 Downloading [======> ] 31.72MB/229.5MB\n", + " 10d156fc77f8 Extracting [=================================================> ] 9.634MB/9.664MB\n", + " 10d156fc77f8 Extracting [==================================================>] 9.664MB/9.664MB\n", + " 10d156fc77f8 Pull complete \n", + " 50d0da9c2da1 Downloading [=======> ] 32.8MB/229.5MB\n", + " 4f4fb700ef54 Downloading [==================================================>] 32B/32B\n", + " 4f4fb700ef54 Verifying Checksum \n", + " 4f4fb700ef54 Download complete \n", + " 50d0da9c2da1 Downloading [=======> ] 34.95MB/229.5MB\n", + " 50d0da9c2da1 Downloading [========> ] 37.64MB/229.5MB\n", + " 50d0da9c2da1 Downloading [========> ] 39.25MB/229.5MB\n", + " 50d0da9c2da1 Downloading [========> ] 39.79MB/229.5MB\n", + " ce88c3d8161b Downloading [==================================================>] 145B/145B\n", + " ce88c3d8161b Download complete \n", + " ce88c3d8161b Extracting [==================================================>] 145B/145B\n", + " ce88c3d8161b Extracting [==================================================>] 145B/145B\n", + " ce88c3d8161b Pull complete \n", + " 50d0da9c2da1 Downloading [=========> ] 41.41MB/229.5MB\n", + " 50d0da9c2da1 Downloading [=========> ] 43.01MB/229.5MB\n", + " 50d0da9c2da1 Downloading [=========> ] 44.62MB/229.5MB\n", + " 50d0da9c2da1 Downloading [==========> ] 46.24MB/229.5MB\n", + " 6959468190ca Downloading [> ] 94.74kB/9.197MB\n", + " 50d0da9c2da1 Downloading [==========> ] 48.39MB/229.5MB\n", + " 6959468190ca Downloading [=> ] 291.3kB/9.197MB\n", + " 50d0da9c2da1 Downloading [==========> ] 50MB/229.5MB\n", + " 6959468190ca Downloading [===> ] 684.6kB/9.197MB\n", + " 50d0da9c2da1 Downloading [===========> ] 51.61MB/229.5MB\n", + " 6959468190ca Downloading [=====> ] 979.5kB/9.197MB\n", + " 50d0da9c2da1 Downloading [===========> ] 52.15MB/229.5MB\n", + " 6959468190ca Downloading [=======> ] 1.373MB/9.197MB\n", + " 50d0da9c2da1 Downloading [===========> ] 53.76MB/229.5MB\n", + " 6959468190ca Downloading [=========> ] 1.766MB/9.197MB\n", + " 50d0da9c2da1 Downloading [===========> ] 54.84MB/229.5MB\n", + " 6959468190ca Downloading [===========> ] 2.167MB/9.197MB\n", + " d76db7b75f8a Downloading [> ] 539.9kB/229.1MB\n", + " 50d0da9c2da1 Downloading [============> ] 56.46MB/229.5MB\n", + " 6959468190ca Downloading [============> ] 2.364MB/9.197MB\n", + " 50d0da9c2da1 Downloading [============> ] 57MB/229.5MB\n", + " 6959468190ca Downloading [==============> ] 2.757MB/9.197MB\n", + " d76db7b75f8a Downloading [> ] 1.065MB/229.1MB\n", + " 50d0da9c2da1 Downloading [============> ] 58.08MB/229.5MB\n", + " 6959468190ca Downloading [=================> ] 3.15MB/9.197MB\n", + " d76db7b75f8a Downloading [> ] 1.606MB/229.1MB\n", + " 6959468190ca Downloading [===================> ] 3.544MB/9.197MB\n", + " 50d0da9c2da1 Downloading [============> ] 59.16MB/229.5MB\n", + " d76db7b75f8a Downloading [> ] 2.147MB/229.1MB\n", + " 6959468190ca Downloading [====================> ] 3.838MB/9.197MB\n", + " d76db7b75f8a Downloading [> ] 2.688MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=============> ] 60.24MB/229.5MB\n", + " 6959468190ca Downloading [=======================> ] 4.334MB/9.197MB\n", + " d76db7b75f8a Downloading [> ] 3.769MB/229.1MB\n", + " 6959468190ca Downloading [=========================> ] 4.727MB/9.197MB\n", + " 50d0da9c2da1 Downloading [=============> ] 61.31MB/229.5MB\n", + " d76db7b75f8a Downloading [> ] 4.31MB/229.1MB\n", + " 6959468190ca Downloading [============================> ] 5.219MB/9.197MB\n", + " 50d0da9c2da1 Downloading [=============> ] 62.39MB/229.5MB\n", + " 6959468190ca Downloading [=============================> ] 5.514MB/9.197MB\n", + " d76db7b75f8a Downloading [=> ] 4.85MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=============> ] 62.93MB/229.5MB\n", + " d76db7b75f8a Downloading [=> ] 5.919MB/229.1MB\n", + " 6959468190ca Downloading [================================> ] 6.005MB/9.197MB\n", + " 50d0da9c2da1 Downloading [=============> ] 63.46MB/229.5MB\n", + " d76db7b75f8a Downloading [=> ] 6.988MB/229.1MB\n", + " 6959468190ca Downloading [===================================> ] 6.497MB/9.197MB\n", + " 50d0da9c2da1 Downloading [=============> ] 64MB/229.5MB\n", + " 6959468190ca Downloading [=====================================> ] 6.988MB/9.197MB\n", + " d76db7b75f8a Downloading [=> ] 8.066MB/229.1MB\n", + " 6959468190ca Downloading [=======================================> ] 7.185MB/9.197MB\n", + " d76db7b75f8a Downloading [=> ] 8.602MB/229.1MB\n", + " 6959468190ca Downloading [=========================================> ] 7.676MB/9.197MB\n", + " d76db7b75f8a Downloading [==> ] 9.659MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==============> ] 65.09MB/229.5MB\n", + " 6959468190ca Downloading [===========================================> ] 8.07MB/9.197MB\n", + " 50d0da9c2da1 Downloading [==============> ] 65.63MB/229.5MB\n", + " d76db7b75f8a Downloading [==> ] 10.74MB/229.1MB\n", + " 6959468190ca Downloading [==============================================> ] 8.557MB/9.197MB\n", + " 50d0da9c2da1 Downloading [==============> ] 66.17MB/229.5MB\n", + " 6959468190ca Downloading [================================================> ] 8.852MB/9.197MB\n", + " d76db7b75f8a Downloading [==> ] 11.82MB/229.1MB\n", + " 6959468190ca Verifying Checksum \n", + " 6959468190ca Download complete \n", + " 6959468190ca Extracting [> ] 98.3kB/9.197MB\n", + " 50d0da9c2da1 Downloading [==============> ] 66.7MB/229.5MB\n", + " d76db7b75f8a Downloading [==> ] 13.44MB/229.1MB\n", + " 6959468190ca Extracting [========> ] 1.573MB/9.197MB\n", + " 6959468190ca Extracting [=============================> ] 5.407MB/9.197MB\n", + " d76db7b75f8a Downloading [===> ] 15.07MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==============> ] 67.78MB/229.5MB\n", + " 6959468190ca Extracting [=====================================> ] 6.98MB/9.197MB\n", + " 6959468190ca Extracting [============================================> ] 8.258MB/9.197MB\n", + " 6959468190ca Extracting [==================================================>] 9.197MB/9.197MB\n", + " 6959468190ca Pull complete \n", + " 50d0da9c2da1 Downloading [==============> ] 68.32MB/229.5MB\n", + " d76db7b75f8a Downloading [===> ] 16.15MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============> ] 68.86MB/229.5MB\n", + " d76db7b75f8a Downloading [===> ] 17.77MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============> ] 69.4MB/229.5MB\n", + " d76db7b75f8a Downloading [====> ] 18.85MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============> ] 70.48MB/229.5MB\n", + " d76db7b75f8a Downloading [====> ] 20.45MB/229.1MB\n", + " d76db7b75f8a Downloading [====> ] 20.99MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============> ] 71.02MB/229.5MB\n", + " c1a9c0eb704d Downloading [==================================================>] 118B/118B\n", + " c1a9c0eb704d Verifying Checksum \n", + " c1a9c0eb704d Download complete \n", + " d76db7b75f8a Downloading [====> ] 22.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============> ] 72.09MB/229.5MB\n", + " d76db7b75f8a Downloading [=====> ] 23.68MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============> ] 72.63MB/229.5MB\n", + " d76db7b75f8a Downloading [=====> ] 25.29MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============> ] 73.16MB/229.5MB\n", + " d76db7b75f8a Downloading [=====> ] 26.89MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================> ] 73.7MB/229.5MB\n", + " d76db7b75f8a Downloading [======> ] 27.97MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================> ] 74.24MB/229.5MB\n", + " d76db7b75f8a Downloading [======> ] 29.05MB/229.1MB\n", + " d76db7b75f8a Downloading [======> ] 29.59MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================> ] 75.32MB/229.5MB\n", + " d76db7b75f8a Downloading [======> ] 31.21MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================> ] 75.86MB/229.5MB\n", + " d76db7b75f8a Downloading [=======> ] 32.28MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================> ] 76.4MB/229.5MB\n", + " 53cb618491a3 Downloading [====================> ] 733B/1.792kB\n", + " 53cb618491a3 Downloading [==================================================>] 1.792kB/1.792kB\n", + " 53cb618491a3 Verifying Checksum \n", + " 53cb618491a3 Download complete \n", + " d76db7b75f8a Downloading [=======> ] 33.36MB/229.1MB\n", + " d76db7b75f8a Downloading [=======> ] 33.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================> ] 76.94MB/229.5MB\n", + " d76db7b75f8a Downloading [=======> ] 34.97MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================> ] 77.48MB/229.5MB\n", + " d76db7b75f8a Downloading [=======> ] 36.05MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================> ] 78.01MB/229.5MB\n", + " d76db7b75f8a Downloading [=======> ] 36.58MB/229.1MB\n", + " d76db7b75f8a Downloading [========> ] 38.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=================> ] 78.55MB/229.5MB\n", + " d76db7b75f8a Downloading [========> ] 39.27MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=================> ] 79.09MB/229.5MB\n", + " 50d0da9c2da1 Downloading [=================> ] 79.63MB/229.5MB\n", + " d76db7b75f8a Downloading [========> ] 40.87MB/229.1MB\n", + " d76db7b75f8a Downloading [=========> ] 42.47MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=================> ] 80.71MB/229.5MB\n", + " d76db7b75f8a Downloading [=========> ] 43MB/229.1MB\n", + " d76db7b75f8a Downloading [=========> ] 44.08MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=================> ] 81.25MB/229.5MB\n", + " 50d0da9c2da1 Downloading [=================> ] 81.79MB/229.5MB\n", + " d76db7b75f8a Downloading [=========> ] 45.7MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=================> ] 82.33MB/229.5MB\n", + " d76db7b75f8a Downloading [==========> ] 46.78MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================> ] 82.87MB/229.5MB\n", + " d76db7b75f8a Downloading [==========> ] 47.84MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================> ] 83.41MB/229.5MB\n", + " d76db7b75f8a Downloading [==========> ] 49.45MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================> ] 83.95MB/229.5MB\n", + " d76db7b75f8a Downloading [===========> ] 51.05MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================> ] 84.49MB/229.5MB\n", + " d76db7b75f8a Downloading [===========> ] 52.66MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================> ] 85.03MB/229.5MB\n", + " d76db7b75f8a Downloading [===========> ] 54.26MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================> ] 85.57MB/229.5MB\n", + " 50d0da9c2da1 Downloading [==================> ] 86.11MB/229.5MB\n", + " d76db7b75f8a Downloading [============> ] 55.87MB/229.1MB\n", + " d76db7b75f8a Downloading [============> ] 57.48MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================> ] 87.19MB/229.5MB\n", + " d76db7b75f8a Downloading [============> ] 59.09MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================> ] 87.73MB/229.5MB\n", + " d76db7b75f8a Downloading [=============> ] 60.71MB/229.1MB\n", + " d76db7b75f8a Downloading [=============> ] 62.31MB/229.1MB\n", + " d76db7b75f8a Downloading [=============> ] 62.85MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================> ] 88.27MB/229.5MB\n", + " d76db7b75f8a Downloading [==============> ] 64.46MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================> ] 88.81MB/229.5MB\n", + " d76db7b75f8a Downloading [==============> ] 66.06MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================> ] 89.35MB/229.5MB\n", + " d76db7b75f8a Downloading [==============> ] 67.67MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================> ] 89.9MB/229.5MB\n", + " d76db7b75f8a Downloading [==============> ] 68.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================> ] 90.44MB/229.5MB\n", + " d76db7b75f8a Downloading [===============> ] 69.28MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================> ] 90.97MB/229.5MB\n", + " d76db7b75f8a Downloading [===============> ] 70.89MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================> ] 91.51MB/229.5MB\n", + " d76db7b75f8a Downloading [===============> ] 71.96MB/229.1MB\n", + " d76db7b75f8a Downloading [===============> ] 72.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================> ] 92.05MB/229.5MB\n", + " d76db7b75f8a Downloading [================> ] 74.1MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================> ] 92.59MB/229.5MB\n", + " d76db7b75f8a Downloading [================> ] 75.18MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================> ] 93.13MB/229.5MB\n", + " d76db7b75f8a Downloading [================> ] 76.24MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================> ] 93.67MB/229.5MB\n", + " d76db7b75f8a Downloading [================> ] 77.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================> ] 94.21MB/229.5MB\n", + " d76db7b75f8a Downloading [=================> ] 78.38MB/229.1MB\n", + " d76db7b75f8a Downloading [=================> ] 79.44MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================> ] 94.75MB/229.5MB\n", + " d76db7b75f8a Downloading [=================> ] 80.52MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================> ] 95.29MB/229.5MB\n", + " d76db7b75f8a Downloading [=================> ] 81.58MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================> ] 95.83MB/229.5MB\n", + " d76db7b75f8a Downloading [==================> ] 83.18MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================> ] 96.37MB/229.5MB\n", + " d76db7b75f8a Downloading [==================> ] 84.26MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=====================> ] 96.9MB/229.5MB\n", + " d76db7b75f8a Downloading [==================> ] 85.34MB/229.1MB\n", + " d76db7b75f8a Downloading [==================> ] 85.88MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=====================> ] 97.44MB/229.5MB\n", + " d76db7b75f8a Downloading [==================> ] 86.95MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=====================> ] 97.98MB/229.5MB\n", + " d76db7b75f8a Downloading [===================> ] 88.03MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=====================> ] 98.51MB/229.5MB\n", + " d76db7b75f8a Downloading [===================> ] 89.65MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=====================> ] 99.05MB/229.5MB\n", + " d76db7b75f8a Downloading [===================> ] 90.74MB/229.1MB\n", + " d76db7b75f8a Downloading [====================> ] 91.81MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=====================> ] 99.59MB/229.5MB\n", + " d76db7b75f8a Downloading [====================> ] 93.43MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=====================> ] 100.1MB/229.5MB\n", + " 50d0da9c2da1 Downloading [=====================> ] 100.7MB/229.5MB\n", + " d76db7b75f8a Downloading [====================> ] 95.05MB/229.1MB\n", + " d76db7b75f8a Downloading [====================> ] 95.59MB/229.1MB\n", + " 50d0da9c2da1 Downloading [======================> ] 101.2MB/229.5MB\n", + " d76db7b75f8a Downloading [=====================> ] 97.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [======================> ] 101.7MB/229.5MB\n", + " d76db7b75f8a Downloading [=====================> ] 98.28MB/229.1MB\n", + " 50d0da9c2da1 Downloading [======================> ] 102.3MB/229.5MB\n", + " d76db7b75f8a Downloading [=====================> ] 99.34MB/229.1MB\n", + " d76db7b75f8a Downloading [=====================> ] 100.4MB/229.1MB\n", + " 50d0da9c2da1 Downloading [======================> ] 102.8MB/229.5MB\n", + " d76db7b75f8a Downloading [======================> ] 102MB/229.1MB\n", + " 50d0da9c2da1 Downloading [======================> ] 103.4MB/229.5MB\n", + " d76db7b75f8a Downloading [======================> ] 103.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [======================> ] 103.9MB/229.5MB\n", + " d76db7b75f8a Downloading [======================> ] 104.7MB/229.1MB\n", + " 50d0da9c2da1 Downloading [======================> ] 104.4MB/229.5MB\n", + " d76db7b75f8a Downloading [=======================> ] 106.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [======================> ] 105MB/229.5MB\n", + " 50d0da9c2da1 Downloading [======================> ] 105.5MB/229.5MB\n", + " d76db7b75f8a Downloading [=======================> ] 107.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=======================> ] 106.1MB/229.5MB\n", + " d76db7b75f8a Downloading [=======================> ] 109.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=======================> ] 106.6MB/229.5MB\n", + " d76db7b75f8a Downloading [========================> ] 110.6MB/229.1MB\n", + " d76db7b75f8a Downloading [========================> ] 112.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=======================> ] 107.7MB/229.5MB\n", + " d76db7b75f8a Downloading [========================> ] 113.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=======================> ] 108.2MB/229.5MB\n", + " d76db7b75f8a Downloading [========================> ] 113.8MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=======================> ] 108.8MB/229.5MB\n", + " d76db7b75f8a Downloading [=========================> ] 115.4MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=======================> ] 109.3MB/229.5MB\n", + " d76db7b75f8a Downloading [=========================> ] 116.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=======================> ] 109.8MB/229.5MB\n", + " d76db7b75f8a Downloading [=========================> ] 117.6MB/229.1MB\n", + " d76db7b75f8a Downloading [=========================> ] 118.1MB/229.1MB\n", + " 50d0da9c2da1 Downloading [========================> ] 110.4MB/229.5MB\n", + " d76db7b75f8a Downloading [=========================> ] 118.6MB/229.1MB\n", + " d76db7b75f8a Downloading [==========================> ] 119.2MB/229.1MB\n", + " d76db7b75f8a Downloading [==========================> ] 119.7MB/229.1MB\n", + " 50d0da9c2da1 Downloading [========================> ] 110.9MB/229.5MB\n", + " d76db7b75f8a Downloading [==========================> ] 120.3MB/229.1MB\n", + " d76db7b75f8a Downloading [==========================> ] 120.8MB/229.1MB\n", + " 50d0da9c2da1 Downloading [========================> ] 111.5MB/229.5MB\n", + " d76db7b75f8a Downloading [==========================> ] 121.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [========================> ] 112MB/229.5MB\n", + " d76db7b75f8a Downloading [==========================> ] 122.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [========================> ] 112.5MB/229.5MB\n", + " d76db7b75f8a Downloading [===========================> ] 124MB/229.1MB\n", + " 50d0da9c2da1 Downloading [========================> ] 113.1MB/229.5MB\n", + " d76db7b75f8a Downloading [===========================> ] 125.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [========================> ] 113.6MB/229.5MB\n", + " 50d0da9c2da1 Downloading [========================> ] 114.2MB/229.5MB\n", + " d76db7b75f8a Downloading [===========================> ] 127.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [========================> ] 114.7MB/229.5MB\n", + " d76db7b75f8a Downloading [============================> ] 128.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=========================> ] 115.2MB/229.5MB\n", + " d76db7b75f8a Downloading [============================> ] 129.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=========================> ] 115.8MB/229.5MB\n", + " d76db7b75f8a Downloading [============================> ] 131.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=========================> ] 116.8MB/229.5MB\n", + " d76db7b75f8a Downloading [============================> ] 132.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=========================> ] 117.4MB/229.5MB\n", + " d76db7b75f8a Downloading [=============================> ] 133.7MB/229.1MB\n", + " d76db7b75f8a Downloading [=============================> ] 134.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=========================> ] 117.9MB/229.5MB\n", + " d76db7b75f8a Downloading [=============================> ] 135.3MB/229.1MB\n", + " d76db7b75f8a Downloading [=============================> ] 136.4MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=========================> ] 119MB/229.5MB\n", + " d76db7b75f8a Downloading [=============================> ] 137.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==========================> ] 119.5MB/229.5MB\n", + " d76db7b75f8a Downloading [==============================> ] 138.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==========================> ] 120.1MB/229.5MB\n", + " d76db7b75f8a Downloading [==============================> ] 140.1MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==========================> ] 121.2MB/229.5MB\n", + " d76db7b75f8a Downloading [==============================> ] 141.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==========================> ] 121.7MB/229.5MB\n", + " d76db7b75f8a Downloading [===============================> ] 142.3MB/229.1MB\n", + " d76db7b75f8a Downloading [===============================> ] 142.8MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==========================> ] 122.2MB/229.5MB\n", + " d76db7b75f8a Downloading [===============================> ] 143.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==========================> ] 123.3MB/229.5MB\n", + " d76db7b75f8a Downloading [===============================> ] 145MB/229.1MB\n", + " d76db7b75f8a Downloading [===============================> ] 146MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===========================> ] 124.4MB/229.5MB\n", + " d76db7b75f8a Downloading [================================> ] 147.1MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===========================> ] 125.4MB/229.5MB\n", + " d76db7b75f8a Downloading [================================> ] 148.2MB/229.1MB\n", + " d76db7b75f8a Downloading [================================> ] 149.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===========================> ] 126.5MB/229.5MB\n", + " d76db7b75f8a Downloading [================================> ] 150.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===========================> ] 127.1MB/229.5MB\n", + " d76db7b75f8a Downloading [================================> ] 150.9MB/229.1MB\n", + " d76db7b75f8a Downloading [=================================> ] 151.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===========================> ] 127.6MB/229.5MB\n", + " 50d0da9c2da1 Downloading [===========================> ] 128.1MB/229.5MB\n", + " d76db7b75f8a Downloading [=================================> ] 153MB/229.1MB\n", + " d76db7b75f8a Downloading [=================================> ] 153.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [============================> ] 128.7MB/229.5MB\n", + " d76db7b75f8a Downloading [=================================> ] 154.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [============================> ] 129.2MB/229.5MB\n", + " d76db7b75f8a Downloading [=================================> ] 155.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [============================> ] 129.8MB/229.5MB\n", + " d76db7b75f8a Downloading [==================================> ] 156.2MB/229.1MB\n", + " d76db7b75f8a Downloading [==================================> ] 156.8MB/229.1MB\n", + " 50d0da9c2da1 Downloading [============================> ] 130.3MB/229.5MB\n", + " d76db7b75f8a Downloading [==================================> ] 158.4MB/229.1MB\n", + " 50d0da9c2da1 Downloading [============================> ] 131.4MB/229.5MB\n", + " d76db7b75f8a Downloading [==================================> ] 159.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [============================> ] 131.9MB/229.5MB\n", + " d76db7b75f8a Downloading [===================================> ] 160.6MB/229.1MB\n", + " d76db7b75f8a Downloading [===================================> ] 161.1MB/229.1MB\n", + " 50d0da9c2da1 Downloading [============================> ] 132.5MB/229.5MB\n", + " 50d0da9c2da1 Downloading [============================> ] 133MB/229.5MB\n", + " d76db7b75f8a Downloading [===================================> ] 162.7MB/229.1MB\n", + " d76db7b75f8a Downloading [===================================> ] 164.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=============================> ] 134.1MB/229.5MB\n", + " d76db7b75f8a Downloading [====================================> ] 165.4MB/229.1MB\n", + " d76db7b75f8a Downloading [====================================> ] 165.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=============================> ] 135.2MB/229.5MB\n", + " d76db7b75f8a Downloading [====================================> ] 166.5MB/229.1MB\n", + " d76db7b75f8a Downloading [====================================> ] 167MB/229.1MB\n", + " d76db7b75f8a Downloading [====================================> ] 167.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=============================> ] 135.7MB/229.5MB\n", + " d76db7b75f8a Downloading [====================================> ] 168.1MB/229.1MB\n", + " d76db7b75f8a Downloading [====================================> ] 169.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=============================> ] 136.2MB/229.5MB\n", + " d76db7b75f8a Downloading [=====================================> ] 170.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=============================> ] 136.8MB/229.5MB\n", + " d76db7b75f8a Downloading [=====================================> ] 171.3MB/229.1MB\n", + " d76db7b75f8a Downloading [=====================================> ] 171.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=============================> ] 137.3MB/229.5MB\n", + " 50d0da9c2da1 Downloading [==============================> ] 137.8MB/229.5MB\n", + " d76db7b75f8a Downloading [=====================================> ] 173.5MB/229.1MB\n", + " d76db7b75f8a Downloading [======================================> ] 174.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==============================> ] 138.4MB/229.5MB\n", + " d76db7b75f8a Downloading [======================================> ] 176.2MB/229.1MB\n", + " d76db7b75f8a Downloading [======================================> ] 176.7MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==============================> ] 139.4MB/229.5MB\n", + " d76db7b75f8a Downloading [======================================> ] 177.8MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==============================> ] 140MB/229.5MB\n", + " d76db7b75f8a Downloading [=======================================> ] 178.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==============================> ] 140.5MB/229.5MB\n", + " d76db7b75f8a Downloading [=======================================> ] 179.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==============================> ] 141.1MB/229.5MB\n", + " d76db7b75f8a Downloading [=======================================> ] 181MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==============================> ] 141.6MB/229.5MB\n", + " d76db7b75f8a Downloading [=======================================> ] 182.1MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==============================> ] 142.1MB/229.5MB\n", + " d76db7b75f8a Downloading [=======================================> ] 183.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============================> ] 142.7MB/229.5MB\n", + " d76db7b75f8a Downloading [========================================> ] 184.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============================> ] 143.2MB/229.5MB\n", + " d76db7b75f8a Downloading [========================================> ] 184.8MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============================> ] 143.7MB/229.5MB\n", + " d76db7b75f8a Downloading [========================================> ] 186.4MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============================> ] 144.8MB/229.5MB\n", + " d76db7b75f8a Downloading [========================================> ] 187.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============================> ] 145.9MB/229.5MB\n", + " d76db7b75f8a Downloading [=========================================> ] 188.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============================> ] 146.4MB/229.5MB\n", + " d76db7b75f8a Downloading [=========================================> ] 189.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================================> ] 147MB/229.5MB\n", + " d76db7b75f8a Downloading [=========================================> ] 190.7MB/229.1MB\n", + " d76db7b75f8a Downloading [=========================================> ] 191.8MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================================> ] 147.5MB/229.5MB\n", + " d76db7b75f8a Downloading [=========================================> ] 192.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================================> ] 148.1MB/229.5MB\n", + " d76db7b75f8a Downloading [==========================================> ] 193.4MB/229.1MB\n", + " d76db7b75f8a Downloading [==========================================> ] 194.4MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================================> ] 149.1MB/229.5MB\n", + " d76db7b75f8a Downloading [==========================================> ] 195.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================================> ] 149.7MB/229.5MB\n", + " d76db7b75f8a Downloading [==========================================> ] 196.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================================> ] 150.2MB/229.5MB\n", + " d76db7b75f8a Downloading [===========================================> ] 197.7MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================================> ] 150.7MB/229.5MB\n", + " d76db7b75f8a Downloading [===========================================> ] 198.7MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================================> ] 151.3MB/229.5MB\n", + " d76db7b75f8a Downloading [===========================================> ] 199.8MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=================================> ] 151.8MB/229.5MB\n", + " d76db7b75f8a Downloading [===========================================> ] 200.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=================================> ] 152.9MB/229.5MB\n", + " 50d0da9c2da1 Downloading [=================================> ] 154MB/229.5MB\n", + " d76db7b75f8a Downloading [===========================================> ] 200.9MB/229.1MB\n", + " d76db7b75f8a Downloading [===========================================> ] 201.4MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=================================> ] 155.1MB/229.5MB\n", + " d76db7b75f8a Downloading [============================================> ] 202MB/229.1MB\n", + " d76db7b75f8a Downloading [============================================> ] 203MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=================================> ] 155.6MB/229.5MB\n", + " d76db7b75f8a Downloading [============================================> ] 204.1MB/229.1MB\n", + " d76db7b75f8a Downloading [============================================> ] 205.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================================> ] 156.1MB/229.5MB\n", + " d76db7b75f8a Downloading [=============================================> ] 206.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================================> ] 156.7MB/229.5MB\n", + " d76db7b75f8a Downloading [=============================================> ] 207.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================================> ] 157.2MB/229.5MB\n", + " 50d0da9c2da1 Downloading [==================================> ] 157.7MB/229.5MB\n", + " d76db7b75f8a Downloading [=============================================> ] 208.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================================> ] 158.3MB/229.5MB\n", + " d76db7b75f8a Downloading [=============================================> ] 210MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================================> ] 158.8MB/229.5MB\n", + " d76db7b75f8a Downloading [==============================================> ] 211.1MB/229.1MB\n", + " d76db7b75f8a Downloading [==============================================> ] 211.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================================> ] 159.4MB/229.5MB\n", + " d76db7b75f8a Downloading [==============================================> ] 212.7MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================================> ] 159.9MB/229.5MB\n", + " d76db7b75f8a Downloading [==============================================> ] 213.8MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==================================> ] 160.4MB/229.5MB\n", + " d76db7b75f8a Downloading [===============================================> ] 215.4MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================================> ] 161MB/229.5MB\n", + " d76db7b75f8a Downloading [===============================================> ] 216.5MB/229.1MB\n", + " d76db7b75f8a Downloading [===============================================> ] 217.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================================> ] 161.5MB/229.5MB\n", + " 50d0da9c2da1 Downloading [===================================> ] 162.1MB/229.5MB\n", + " d76db7b75f8a Downloading [===============================================> ] 219.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================================> ] 162.6MB/229.5MB\n", + " d76db7b75f8a Downloading [================================================> ] 220.8MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================================> ] 163.1MB/229.5MB\n", + " d76db7b75f8a Downloading [================================================> ] 222.4MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================================> ] 163.7MB/229.5MB\n", + " d76db7b75f8a Downloading [================================================> ] 223.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================================> ] 164.2MB/229.5MB\n", + " d76db7b75f8a Downloading [=================================================> ] 225.1MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===================================> ] 164.7MB/229.5MB\n", + " d76db7b75f8a Downloading [=================================================> ] 225.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================================> ] 165.3MB/229.5MB\n", + " d76db7b75f8a Downloading [=================================================> ] 227.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================================> ] 165.8MB/229.5MB\n", + " d76db7b75f8a Downloading [=================================================> ] 228.3MB/229.1MB\n", + " d76db7b75f8a Verifying Checksum \n", + " d76db7b75f8a Download complete \n", + " 50d0da9c2da1 Downloading [====================================> ] 166.4MB/229.5MB\n", + " d76db7b75f8a Extracting [> ] 557.1kB/229.1MB\n", + " d76db7b75f8a Extracting [==> ] 11.14MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================================> ] 167.4MB/229.5MB\n", + " d76db7b75f8a Extracting [====> ] 22.28MB/229.1MB\n", + " d76db7b75f8a Extracting [======> ] 29.52MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================================> ] 168.5MB/229.5MB\n", + " d76db7b75f8a Extracting [========> ] 39.55MB/229.1MB\n", + " 50d0da9c2da1 Downloading [====================================> ] 169.6MB/229.5MB\n", + " d76db7b75f8a Extracting [===========> ] 51.25MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=====================================> ] 170.7MB/229.5MB\n", + " d76db7b75f8a Extracting [============> ] 57.93MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=====================================> ] 171.2MB/229.5MB\n", + " d76db7b75f8a Extracting [==============> ] 68.52MB/229.1MB\n", + " d76db7b75f8a Extracting [=================> ] 77.99MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=====================================> ] 172.8MB/229.5MB\n", + " d76db7b75f8a Extracting [==================> ] 83MB/229.1MB\n", + " 50d0da9c2da1 Downloading [======================================> ] 174.4MB/229.5MB\n", + " d76db7b75f8a Extracting [===================> ] 88.01MB/229.1MB\n", + " 50d0da9c2da1 Downloading [======================================> ] 176MB/229.5MB\n", + " d76db7b75f8a Extracting [====================> ] 94.7MB/229.1MB\n", + " 50d0da9c2da1 Downloading [======================================> ] 177.7MB/229.5MB\n", + " d76db7b75f8a Extracting [======================> ] 104.7MB/229.1MB\n", + " d76db7b75f8a Extracting [=======================> ] 109.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=======================================> ] 179.3MB/229.5MB\n", + " 50d0da9c2da1 Downloading [=======================================> ] 180.9MB/229.5MB\n", + " d76db7b75f8a Extracting [========================> ] 110.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=======================================> ] 183.1MB/229.5MB\n", + " d76db7b75f8a Extracting [========================> ] 111.4MB/229.1MB\n", + " d76db7b75f8a Extracting [========================> ] 112MB/229.1MB\n", + " 50d0da9c2da1 Downloading [========================================> ] 184.1MB/229.5MB\n", + " 50d0da9c2da1 Downloading [========================================> ] 185.8MB/229.5MB\n", + " d76db7b75f8a Extracting [========================> ] 113.1MB/229.1MB\n", + " 50d0da9c2da1 Downloading [========================================> ] 187.4MB/229.5MB\n", + " d76db7b75f8a Extracting [========================> ] 114.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=========================================> ] 189MB/229.5MB\n", + " d76db7b75f8a Extracting [=========================> ] 117MB/229.1MB\n", + " d76db7b75f8a Extracting [=========================> ] 118.7MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=========================================> ] 190.6MB/229.5MB\n", + " 50d0da9c2da1 Downloading [=========================================> ] 192.2MB/229.5MB\n", + " 50d0da9c2da1 Downloading [==========================================> ] 194.4MB/229.5MB\n", + " d76db7b75f8a Extracting [==========================> ] 120.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==========================================> ] 195.4MB/229.5MB\n", + " d76db7b75f8a Extracting [==========================> ] 120.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==========================================> ] 197.1MB/229.5MB\n", + " d76db7b75f8a Extracting [==========================> ] 122.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===========================================> ] 198.7MB/229.5MB\n", + " 50d0da9c2da1 Downloading [===========================================> ] 200.3MB/229.5MB\n", + " d76db7b75f8a Extracting [===========================> ] 124.8MB/229.1MB\n", + " 50d0da9c2da1 Downloading [============================================> ] 202.4MB/229.5MB\n", + " d76db7b75f8a Extracting [===========================> ] 127.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [============================================> ] 203MB/229.5MB\n", + " d76db7b75f8a Extracting [============================> ] 129.2MB/229.1MB\n", + " 50d0da9c2da1 Downloading [============================================> ] 204.1MB/229.5MB\n", + " d76db7b75f8a Extracting [============================> ] 130.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [============================================> ] 205.7MB/229.5MB\n", + " 50d0da9c2da1 Downloading [============================================> ] 206.2MB/229.5MB\n", + " d76db7b75f8a Extracting [============================> ] 131.5MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=============================================> ] 209.9MB/229.5MB\n", + " d76db7b75f8a Extracting [============================> ] 132.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==============================================> ] 211.5MB/229.5MB\n", + " d76db7b75f8a Extracting [=============================> ] 133.7MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==============================================> ] 213.2MB/229.5MB\n", + " d76db7b75f8a Extracting [==============================> ] 137.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [==============================================> ] 213.7MB/229.5MB\n", + " 50d0da9c2da1 Downloading [==============================================> ] 214.2MB/229.5MB\n", + " 50d0da9c2da1 Downloading [==============================================> ] 215.3MB/229.5MB\n", + " d76db7b75f8a Extracting [==============================> ] 138.7MB/229.1MB\n", + " 50d0da9c2da1 Downloading [===============================================> ] 219.1MB/229.5MB\n", + " d76db7b75f8a Extracting [==============================> ] 139.3MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================================================> ] 220.7MB/229.5MB\n", + " d76db7b75f8a Extracting [==============================> ] 140.9MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================================================> ] 222.3MB/229.5MB\n", + " d76db7b75f8a Extracting [===============================> ] 142.6MB/229.1MB\n", + " 50d0da9c2da1 Downloading [================================================> ] 223.9MB/229.5MB\n", + " d76db7b75f8a Extracting [===============================> ] 143.7MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=================================================> ] 225.5MB/229.5MB\n", + " d76db7b75f8a Extracting [===============================> ] 144.8MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=================================================> ] 227.2MB/229.5MB\n", + " d76db7b75f8a Extracting [===============================> ] 145.4MB/229.1MB\n", + " 50d0da9c2da1 Downloading [=================================================> ] 228.8MB/229.5MB\n", + " 50d0da9c2da1 Download complete \n", + " 50d0da9c2da1 Extracting [> ] 557.1kB/229.5MB\n", + " d76db7b75f8a Extracting [===============================> ] 145.9MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=> ] 7.799MB/229.5MB\n", + " d76db7b75f8a Extracting [===============================> ] 146.5MB/229.1MB\n", + " 50d0da9c2da1 Extracting [====> ] 18.94MB/229.5MB\n", + " d76db7b75f8a Extracting [================================> ] 147.6MB/229.1MB\n", + " 50d0da9c2da1 Extracting [======> ] 31.2MB/229.5MB\n", + " d76db7b75f8a Extracting [================================> ] 150.4MB/229.1MB\n", + " 50d0da9c2da1 Extracting [========> ] 41.22MB/229.5MB\n", + " d76db7b75f8a Extracting [=================================> ] 152.1MB/229.1MB\n", + " 50d0da9c2da1 Extracting [===========> ] 51.81MB/229.5MB\n", + " d76db7b75f8a Extracting [=================================> ] 152.6MB/229.1MB\n", + " 50d0da9c2da1 Extracting [============> ] 57.38MB/229.5MB\n", + " d76db7b75f8a Extracting [=================================> ] 154.3MB/229.1MB\n", + " 50d0da9c2da1 Extracting [==============> ] 66.85MB/229.5MB\n", + " 50d0da9c2da1 Extracting [================> ] 75.2MB/229.5MB\n", + " d76db7b75f8a Extracting [==================================> ] 156.5MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=================> ] 82.44MB/229.5MB\n", + " d76db7b75f8a Extracting [==================================> ] 158.2MB/229.1MB\n", + " 50d0da9c2da1 Extracting [==================> ] 85.23MB/229.5MB\n", + " d76db7b75f8a Extracting [===================================> ] 160.4MB/229.1MB\n", + " 50d0da9c2da1 Extracting [====================> ] 93.59MB/229.5MB\n", + " d76db7b75f8a Extracting [===================================> ] 162.7MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=====================> ] 100.8MB/229.5MB\n", + " 50d0da9c2da1 Extracting [=======================> ] 106.4MB/229.5MB\n", + " d76db7b75f8a Extracting [====================================> ] 166.6MB/229.1MB\n", + " d76db7b75f8a Extracting [====================================> ] 168.2MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=======================> ] 109.7MB/229.5MB\n", + " d76db7b75f8a Extracting [=====================================> ] 170.5MB/229.1MB\n", + " 50d0da9c2da1 Extracting [========================> ] 111.4MB/229.5MB\n", + " d76db7b75f8a Extracting [=====================================> ] 171.6MB/229.1MB\n", + " 50d0da9c2da1 Extracting [========================> ] 112.5MB/229.5MB\n", + " d76db7b75f8a Extracting [======================================> ] 174.4MB/229.1MB\n", + " 50d0da9c2da1 Extracting [========================> ] 113.1MB/229.5MB\n", + " d76db7b75f8a Extracting [======================================> ] 176MB/229.1MB\n", + " 50d0da9c2da1 Extracting [========================> ] 113.6MB/229.5MB\n", + " d76db7b75f8a Extracting [======================================> ] 177.1MB/229.1MB\n", + " 50d0da9c2da1 Extracting [========================> ] 114.2MB/229.5MB\n", + " d76db7b75f8a Extracting [======================================> ] 177.7MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=========================> ] 114.8MB/229.5MB\n", + " d76db7b75f8a Extracting [=======================================> ] 178.8MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=========================> ] 115.3MB/229.5MB\n", + " d76db7b75f8a Extracting [=======================================> ] 179.4MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=========================> ] 117.5MB/229.5MB\n", + " d76db7b75f8a Extracting [=======================================> ] 179.9MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=========================> ] 119.2MB/229.5MB\n", + " d76db7b75f8a Extracting [=======================================> ] 181MB/229.1MB\n", + " 50d0da9c2da1 Extracting [==========================> ] 121.4MB/229.5MB\n", + " d76db7b75f8a Extracting [=======================================> ] 181.6MB/229.1MB\n", + " 50d0da9c2da1 Extracting [==========================> ] 122MB/229.5MB\n", + " d76db7b75f8a Extracting [=======================================> ] 182.7MB/229.1MB\n", + " d76db7b75f8a Extracting [=======================================> ] 183.3MB/229.1MB\n", + " 50d0da9c2da1 Extracting [==========================> ] 122.6MB/229.5MB\n", + " 50d0da9c2da1 Extracting [===========================> ] 124.2MB/229.5MB\n", + " d76db7b75f8a Extracting [========================================> ] 184.4MB/229.1MB\n", + " 50d0da9c2da1 Extracting [===========================> ] 125.3MB/229.5MB\n", + " d76db7b75f8a Extracting [========================================> ] 184.9MB/229.1MB\n", + " 50d0da9c2da1 Extracting [===========================> ] 127.6MB/229.5MB\n", + " d76db7b75f8a Extracting [========================================> ] 186.6MB/229.1MB\n", + " 50d0da9c2da1 Extracting [============================> ] 128.7MB/229.5MB\n", + " d76db7b75f8a Extracting [=========================================> ] 188.3MB/229.1MB\n", + " 50d0da9c2da1 Extracting [============================> ] 130.4MB/229.5MB\n", + " d76db7b75f8a Extracting [=========================================> ] 188.8MB/229.1MB\n", + " 50d0da9c2da1 Extracting [============================> ] 131.5MB/229.5MB\n", + " d76db7b75f8a Extracting [=========================================> ] 190.5MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=============================> ] 133.1MB/229.5MB\n", + " d76db7b75f8a Extracting [=========================================> ] 191.6MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=============================> ] 133.7MB/229.5MB\n", + " d76db7b75f8a Extracting [==========================================> ] 195MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=============================> ] 134.8MB/229.5MB\n", + " d76db7b75f8a Extracting [===========================================> ] 199.4MB/229.1MB\n", + " d76db7b75f8a Extracting [============================================> ] 203.9MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=============================> ] 136.5MB/229.5MB\n", + " d76db7b75f8a Extracting [=============================================> ] 206.7MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=============================> ] 137.6MB/229.5MB\n", + " d76db7b75f8a Extracting [=============================================> ] 208.3MB/229.1MB\n", + " 50d0da9c2da1 Extracting [==============================> ] 138.7MB/229.5MB\n", + " d76db7b75f8a Extracting [=============================================> ] 210MB/229.1MB\n", + " d76db7b75f8a Extracting [==============================================> ] 213.9MB/229.1MB\n", + " 50d0da9c2da1 Extracting [==============================> ] 139.8MB/229.5MB\n", + " d76db7b75f8a Extracting [===============================================> ] 217.8MB/229.1MB\n", + " 50d0da9c2da1 Extracting [==============================> ] 140.9MB/229.5MB\n", + " d76db7b75f8a Extracting [================================================> ] 220.6MB/229.1MB\n", + " d76db7b75f8a Extracting [================================================> ] 223.9MB/229.1MB\n", + " 50d0da9c2da1 Extracting [==============================> ] 142MB/229.5MB\n", + " d76db7b75f8a Extracting [=================================================> ] 225.1MB/229.1MB\n", + " 50d0da9c2da1 Extracting [===============================> ] 143.2MB/229.5MB\n", + " 50d0da9c2da1 Extracting [===============================> ] 144.8MB/229.5MB\n", + " d76db7b75f8a Extracting [=================================================> ] 225.6MB/229.1MB\n", + " 50d0da9c2da1 Extracting [================================> ] 147.1MB/229.5MB\n", + " d76db7b75f8a Extracting [=================================================> ] 226.2MB/229.1MB\n", + " 50d0da9c2da1 Extracting [================================> ] 148.2MB/229.5MB\n", + " d76db7b75f8a Extracting [=================================================> ] 227.3MB/229.1MB\n", + " 50d0da9c2da1 Extracting [================================> ] 149.3MB/229.5MB\n", + " 50d0da9c2da1 Extracting [================================> ] 149.8MB/229.5MB\n", + " d76db7b75f8a Extracting [=================================================> ] 228.4MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=================================> ] 152.1MB/229.5MB\n", + " d76db7b75f8a Extracting [==================================================>] 229.1MB/229.1MB\n", + " 50d0da9c2da1 Extracting [=================================> ] 153.2MB/229.5MB\n", + " 50d0da9c2da1 Extracting [==================================> ] 156.5MB/229.5MB\n", + " 50d0da9c2da1 Extracting [===================================> ] 163.8MB/229.5MB\n", + " d76db7b75f8a Pull complete \n", + " c1a9c0eb704d Extracting [==================================================>] 118B/118B\n", + " c1a9c0eb704d Extracting [==================================================>] 118B/118B\n", + " 50d0da9c2da1 Extracting [====================================> ] 166MB/229.5MB\n", + " c1a9c0eb704d Pull complete \n", + " 53cb618491a3 Extracting [==================================================>] 1.792kB/1.792kB\n", + " 53cb618491a3 Extracting [==================================================>] 1.792kB/1.792kB\n", + " 53cb618491a3 Pull complete \n", + " load_from_hugging_face_hub Pulled \n", + " 50d0da9c2da1 Extracting [=====================================> ] 169.9MB/229.5MB\n", + " 50d0da9c2da1 Extracting [=====================================> ] 172.7MB/229.5MB\n", + " 50d0da9c2da1 Extracting [=====================================> ] 174.4MB/229.5MB\n", + " 50d0da9c2da1 Extracting [======================================> ] 175.5MB/229.5MB\n", + " 50d0da9c2da1 Extracting [======================================> ] 176MB/229.5MB\n", + " 50d0da9c2da1 Extracting [======================================> ] 177.1MB/229.5MB\n", + " 50d0da9c2da1 Extracting [======================================> ] 178.3MB/229.5MB\n", + " 50d0da9c2da1 Extracting [=======================================> ] 179.9MB/229.5MB\n", + " 50d0da9c2da1 Extracting [=======================================> ] 181MB/229.5MB\n", + " 50d0da9c2da1 Extracting [========================================> ] 184.9MB/229.5MB\n", + " 50d0da9c2da1 Extracting [=========================================> ] 188.3MB/229.5MB\n", + " 50d0da9c2da1 Extracting [=========================================> ] 191.1MB/229.5MB\n", + " 50d0da9c2da1 Extracting [===========================================> ] 198.3MB/229.5MB\n", + " 50d0da9c2da1 Extracting [=============================================> ] 206.7MB/229.5MB\n", + " 50d0da9c2da1 Extracting [==============================================> ] 213.4MB/229.5MB\n", + " 50d0da9c2da1 Extracting [===============================================> ] 218.9MB/229.5MB\n", + " 50d0da9c2da1 Extracting [================================================> ] 221.7MB/229.5MB\n", + " 50d0da9c2da1 Extracting [================================================> ] 224.5MB/229.5MB\n", + " 50d0da9c2da1 Extracting [=================================================> ] 225.1MB/229.5MB\n", + " 50d0da9c2da1 Extracting [=================================================> ] 225.6MB/229.5MB\n", + " 50d0da9c2da1 Extracting [=================================================> ] 227.3MB/229.5MB\n", + " 50d0da9c2da1 Extracting [=================================================> ] 228.4MB/229.5MB\n", + " 50d0da9c2da1 Extracting [==================================================>] 229.5MB/229.5MB\n", + " 50d0da9c2da1 Extracting [==================================================>] 229.5MB/229.5MB\n", + " 50d0da9c2da1 Pull complete \n", + " 81d6c226fa1c Extracting [==================================================>] 99B/99B\n", + " 81d6c226fa1c Extracting [==================================================>] 99B/99B\n", + " 81d6c226fa1c Pull complete \n", + " 5e0225960697 Extracting [==================================================>] 1.172kB/1.172kB\n", + " 5e0225960697 Extracting [==================================================>] 1.172kB/1.172kB\n", + " 5e0225960697 Pull complete \n", + " 4f4fb700ef54 Extracting [==================================================>] 32B/32B\n", + " 4f4fb700ef54 Extracting [==================================================>] 32B/32B\n", + " 4f4fb700ef54 Pull complete \n", + " index_weaviate Pulled \n", + " Network ingestion-pipeline_default Creating\n", + " Network ingestion-pipeline_default Created\n", + " Container ingestion-pipeline-load_from_hugging_face_hub-1 Creating\n", + " Container ingestion-pipeline-load_from_hugging_face_hub-1 Created\n", + " Container ingestion-pipeline-chunktextcomponent-1 Creating\n", + " Container ingestion-pipeline-chunktextcomponent-1 Created\n", + " Container ingestion-pipeline-embed_text-1 Creating\n", + " Container ingestion-pipeline-embed_text-1 Created\n", + " Container ingestion-pipeline-index_weaviate-1 Creating\n", + " Container ingestion-pipeline-index_weaviate-1 Created\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Attaching to ingestion-pipeline-chunktextcomponent-1, ingestion-pipeline-embed_text-1, ingestion-pipeline-index_weaviate-1, ingestion-pipeline-load_from_hugging_face_hub-1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ingestion-pipeline-load_from_hugging_face_hub-1 | [2024-02-08 12:54:08,818 | fondant.cli | INFO] Component `LoadFromHubComponent` found in module main\n", + "ingestion-pipeline-load_from_hugging_face_hub-1 | [2024-02-08 12:54:08,823 | fondant.component.executor | INFO] Dask default local mode will be used for further executions.Our current supported options are limited to 'local' and 'default'.\n", + "ingestion-pipeline-load_from_hugging_face_hub-1 | [2024-02-08 12:54:08,829 | fondant.component.executor | INFO] Skipping component execution\n", + "ingestion-pipeline-load_from_hugging_face_hub-1 | [2024-02-08 12:54:08,832 | fondant.component.executor | INFO] Matching execution detected for component. The last execution of the component originated from `ingestion-pipeline-20240205151753`.\n", + "ingestion-pipeline-load_from_hugging_face_hub-1 | [2024-02-08 12:54:08,838 | fondant.component.executor | INFO] Saving output manifest to /data/ingestion-pipeline/ingestion-pipeline-20240208135309/load_from_hugging_face_hub/manifest.json\n", + "ingestion-pipeline-load_from_hugging_face_hub-1 | [2024-02-08 12:54:08,838 | fondant.component.executor | INFO] Writing cache key with manifest reference to /data/ingestion-pipeline/cache/697fb89e2f81203d68c394906febb8b1.txt\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ingestion-pipeline-load_from_hugging_face_hub-1 exited with code 0\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting langchain==0.0.329 (from -r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for langchain==0.0.329 from https://files.pythonhosted.org/packages/42/4e/86204994aeb2e4ac367a7fade896b13532eae2430299052eb2c80ca35d2c/langchain-0.0.329-py3-none-any.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading langchain-0.0.329-py3-none-any.whl.metadata (16 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.11/site-packages (from langchain==0.0.329->-r requirements.txt (line 1)) (6.0.1)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting SQLAlchemy<3,>=1.4 (from langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for SQLAlchemy<3,>=1.4 from https://files.pythonhosted.org/packages/7a/de/0ca53bf49d213bea164b0bd0187d3c94d6fea650b7679a8e41c91e3182d7/SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.11/site-packages (from langchain==0.0.329->-r requirements.txt (line 1)) (3.9.3)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting anyio<4.0 (from langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for anyio<4.0 from https://files.pythonhosted.org/packages/19/24/44299477fe7dcc9cb58d0a57d5a7588d6af2ff403fdd2d47a246c91a3246/anyio-3.7.1-py3-none-any.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading anyio-3.7.1-py3-none-any.whl.metadata (4.7 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting dataclasses-json<0.7,>=0.5.7 (from langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for dataclasses-json<0.7,>=0.5.7 from https://files.pythonhosted.org/packages/91/ca/7219b838086086972e662c19e908694bdc6744537fb41b70392501b8b5e4/dataclasses_json-0.6.4-py3-none-any.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading dataclasses_json-0.6.4-py3-none-any.whl.metadata (25 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting jsonpatch<2.0,>=1.33 (from langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for jsonpatch<2.0,>=1.33 from https://files.pythonhosted.org/packages/73/07/02e16ed01e04a374e644b575638ec7987ae846d25ad97bcc9945a3ee4b0e/jsonpatch-1.33-py2.py3-none-any.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting langsmith<0.1.0,>=0.0.52 (from langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for langsmith<0.1.0,>=0.0.52 from https://files.pythonhosted.org/packages/94/99/762b50b229516dd133e09c16213736b88d50d75e262b976e20cc244280ed/langsmith-0.0.87-py3-none-any.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading langsmith-0.0.87-py3-none-any.whl.metadata (10 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.11/site-packages (from langchain==0.0.329->-r requirements.txt (line 1)) (1.26.3)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting pydantic<3,>=1 (from langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for pydantic<3,>=1 from https://files.pythonhosted.org/packages/db/dc/afecbd9650f486889181c6d1a0d675b580c06253ea7e304588e4c7485bdb/pydantic-2.6.1-py3-none-any.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading pydantic-2.6.1-py3-none-any.whl.metadata (83 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 83.5/83.5 kB 4.8 MB/s eta 0:00:00\n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.11/site-packages (from langchain==0.0.329->-r requirements.txt (line 1)) (2.31.0)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting tenacity<9.0.0,>=8.1.0 (from langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for tenacity<9.0.0,>=8.1.0 from https://files.pythonhosted.org/packages/f4/f1/990741d5bb2487d529d20a433210ffa136a367751e454214013b441c4575/tenacity-8.2.3-py3-none-any.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading tenacity-8.2.3-py3-none-any.whl.metadata (1.0 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.0.329->-r requirements.txt (line 1)) (1.3.1)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.0.329->-r requirements.txt (line 1)) (23.2.0)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.0.329->-r requirements.txt (line 1)) (1.4.1)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.0.329->-r requirements.txt (line 1)) (6.0.5)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain==0.0.329->-r requirements.txt (line 1)) (1.9.4)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/site-packages (from anyio<4.0->langchain==0.0.329->-r requirements.txt (line 1)) (3.6)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting sniffio>=1.1 (from anyio<4.0->langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading sniffio-1.3.0-py3-none-any.whl (10 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for marshmallow<4.0.0,>=3.18.0 from https://files.pythonhosted.org/packages/57/e9/4368d49d3b462da16a3bac976487764a84dd85cef97232c7bd61f5bdedf3/marshmallow-3.20.2-py3-none-any.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading marshmallow-3.20.2-py3-none-any.whl.metadata (7.5 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for typing-inspect<1,>=0.4.0 from https://files.pythonhosted.org/packages/65/f3/107a22063bf27bdccf2024833d3445f4eea42b2e598abfbd46f6a63b6cb0/typing_inspect-0.9.0-py3-none-any.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for jsonpointer>=1.9 from https://files.pythonhosted.org/packages/12/f6/0232cc0c617e195f06f810534d00b74d2f348fe71b2118009ad8ad31f878/jsonpointer-2.4-py2.py3-none-any.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading jsonpointer-2.4-py2.py3-none-any.whl.metadata (2.5 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting annotated-types>=0.4.0 (from pydantic<3,>=1->langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for annotated-types>=0.4.0 from https://files.pythonhosted.org/packages/28/78/d31230046e58c207284c6b2c4e8d96e6d3cb4e52354721b944d3e1ee4aa5/annotated_types-0.6.0-py3-none-any.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading annotated_types-0.6.0-py3-none-any.whl.metadata (12 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting pydantic-core==2.16.2 (from pydantic<3,>=1->langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for pydantic-core==2.16.2 from https://files.pythonhosted.org/packages/ad/03/1cac52dfe893109a1571956755061df771457f33cb55264baed8f89635d6/pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.11/site-packages (from pydantic<3,>=1->langchain==0.0.329->-r requirements.txt (line 1)) (4.9.0)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/site-packages (from requests<3,>=2->langchain==0.0.329->-r requirements.txt (line 1)) (3.3.2)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/site-packages (from requests<3,>=2->langchain==0.0.329->-r requirements.txt (line 1)) (2.0.7)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/site-packages (from requests<3,>=2->langchain==0.0.329->-r requirements.txt (line 1)) (2024.2.2)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting greenlet!=0.4.17 (from SQLAlchemy<3,>=1.4->langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Obtaining dependency information for greenlet!=0.4.17 from https://files.pythonhosted.org/packages/c6/1f/12d5a6cc26e8b483c2e7975f9c22e088ac735c0d8dcb8a8f72d31a4e5f04/greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.11/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json<0.7,>=0.5.7->langchain==0.0.329->-r requirements.txt (line 1)) (23.2)\n", + "ingestion-pipeline-chunktextcomponent-1 | Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain==0.0.329->-r requirements.txt (line 1))\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading langchain-0.0.329-py3-none-any.whl (2.0 MB)\n", + "ingestion-pipeline-chunktextcomponent-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 7.4 MB/s eta 0:00:00\n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading anyio-3.7.1-py3-none-any.whl (80 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 80.9/80.9 kB 19.1 MB/s eta 0:00:00\n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading langsmith-0.0.87-py3-none-any.whl (55 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 55.4/55.4 kB 9.0 MB/s eta 0:00:00\n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading pydantic-2.6.1-py3-none-any.whl (394 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 394.8/394.8 kB 14.9 MB/s eta 0:00:00\n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n", + "ingestion-pipeline-chunktextcomponent-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.2/2.2 MB 8.5 MB/s eta 0:00:00\n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)\n", + "ingestion-pipeline-chunktextcomponent-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.2/3.2 MB 10.3 MB/s eta 0:00:00\n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading tenacity-8.2.3-py3-none-any.whl (24 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading annotated_types-0.6.0-py3-none-any.whl (12 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (620 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 620.0/620.0 kB 14.3 MB/s eta 0:00:00\n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading marshmallow-3.20.2-py3-none-any.whl (49 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 49.4/49.4 kB 17.3 MB/s eta 0:00:00\n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "ingestion-pipeline-chunktextcomponent-1 | Installing collected packages: tenacity, sniffio, pydantic-core, mypy-extensions, marshmallow, jsonpointer, greenlet, annotated-types, typing-inspect, SQLAlchemy, pydantic, jsonpatch, anyio, langsmith, dataclasses-json, langchain\n", + "ingestion-pipeline-chunktextcomponent-1 | Successfully installed SQLAlchemy-2.0.25 annotated-types-0.6.0 anyio-3.7.1 dataclasses-json-0.6.4 greenlet-3.0.3 jsonpatch-1.33 jsonpointer-2.4 langchain-0.0.329 langsmith-0.0.87 marshmallow-3.20.2 mypy-extensions-1.0.0 pydantic-2.6.1 pydantic-core-2.16.2 sniffio-1.3.0 tenacity-8.2.3 typing-inspect-0.9.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ingestion-pipeline-chunktextcomponent-1 | WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | [notice] A new release of pip is available: 23.2.1 -> 24.0\n", + "ingestion-pipeline-chunktextcomponent-1 | [notice] To update, run: pip install --upgrade pip\n", + "ingestion-pipeline-chunktextcomponent-1 | \n", + "ingestion-pipeline-chunktextcomponent-1 | [2024-02-08 12:54:21,912 | fondant.cli | INFO] Component `ChunkTextComponent` found in module main\n", + "ingestion-pipeline-chunktextcomponent-1 | [2024-02-08 12:54:21,917 | fondant.component.executor | INFO] Dask default local mode will be used for further executions.Our current supported options are limited to 'local' and 'default'.\n", + "ingestion-pipeline-chunktextcomponent-1 | [2024-02-08 12:54:21,921 | fondant.component.executor | INFO] Previous component `load_from_hugging_face_hub` run was cached. Cached pipeline id: ingestion-pipeline-20240205151753\n", + "ingestion-pipeline-chunktextcomponent-1 | [2024-02-08 12:54:21,924 | fondant.component.executor | INFO] Skipping component execution\n", + "ingestion-pipeline-chunktextcomponent-1 | [2024-02-08 12:54:21,927 | fondant.component.executor | INFO] Matching execution detected for component. The last execution of the component originated from `ingestion-pipeline-20240205151827`.\n", + "ingestion-pipeline-chunktextcomponent-1 | [2024-02-08 12:54:21,931 | fondant.component.executor | INFO] Saving output manifest to /data/ingestion-pipeline/ingestion-pipeline-20240208135309/chunktextcomponent/manifest.json\n", + "ingestion-pipeline-chunktextcomponent-1 | [2024-02-08 12:54:21,931 | fondant.component.executor | INFO] Writing cache key with manifest reference to /data/ingestion-pipeline/cache/8b8d6199d78c130f70b78d3ecf8ef420.txt\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ingestion-pipeline-chunktextcomponent-1 exited with code 0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ingestion-pipeline-embed_text-1 | [2024-02-08 12:54:26,451 | fondant.cli | INFO] Component `EmbedTextComponent` found in module main\n", + "ingestion-pipeline-embed_text-1 | [2024-02-08 12:54:26,457 | fondant.component.executor | INFO] Dask default local mode will be used for further executions.Our current supported options are limited to 'local' and 'default'.\n", + "ingestion-pipeline-embed_text-1 | [2024-02-08 12:54:26,462 | fondant.component.executor | INFO] Caching disabled for the component\n", + "ingestion-pipeline-embed_text-1 | [2024-02-08 12:54:26,462 | root | INFO] Executing component\n", + "ingestion-pipeline-embed_text-1 | [2024-02-08 12:54:32,097 | sentence_transformers.SentenceTransformer | INFO] Load pretrained SentenceTransformer: all-MiniLM-L6-v2\n", + ".gitattributes: 0%| | 0.00/1.18k [00:00 ⚠️ This notebook can be run on your local machine or on a virtual machine and requires [Docker Compose](https://docs.docker.com/desktop/).\n", - "> Please note that it is not compatible with Google Colab as the latter does not support Docker." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this notebook we demonstrate how to **perform parameter search** and **automatically tune a Retrieval-Augmented Generation (RAG)** system using [Fondant](https://fondant.ai).\n", - "\n", - "We will:\n", - "\n", - "1. Set up an environment and a [Weaviate](https://weaviate.io/platform) Vector Store\n", - "2. Define the sets of parameters that should be tried\n", - "3. Run the parameter search which automatically:\n", - " * Runs an indexing pipeline for each combination of parameters to be tested\n", - " * Runs an evaluation pipeline for each index\n", - " * Collects results\n", - "5. Explore results\n", - "\n", - "
\n", - "\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will use [**Fondant**](https://fondant.ai), **a hub and framework for easy and shareable data processing**, which has the following advantages for RAG evaluation:\n", - "\n", - "- **Speed**\n", - " - Reusable RAG components from the [Fondant Hub](https://fondant.ai/en/latest/components/hub/) to quickly build RAG pipelines\n", - " - [Pipeline caching](https://fondant.ai/en/latest/caching/) to speed up subsequent runs\n", - " - Parallel processing out of the box to speed up processing of large datasets\n", - "- **Ease-of-use**\n", - " - Change parameters and swap [components](https://fondant.ai/en/latest/components/hub/) by changing only a few lines of code\n", - " - Create your own [custom components](https://fondant.ai/en/latest/components/custom_component/) (e.g. with different chunking strategies) and plug them into your pipeline\n", - " - Reuse your processing components in different pipelines and share them with the [community](https://discord.gg/HnTdWhydGp)\n", - "- **Production-readiness**\n", - " - Full data lineage and a [data explorer](https://fondant.ai/en/latest/data_explorer/) to check the evolution of data after each step\n", - " - Ready to deploy to (managed) platforms such as _Vertex, SageMaker and Kubeflow_\n", - " \n", - "Share your experiences or let us know how we can improve through our [**Discord**](https://discord.gg/HnTdWhydGp) or on [**GitHub**](https://github.com/ml6team/fondant). And of course feel free to give us a [**star ⭐**](https://github.com/ml6team/fondant-usecase-RAG) if you like what we are doing!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Set up environment" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> ⚠️ This section checks the prerequisites of your environment. Read any errors or warnings carefully." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Ensure a **Python between version 3.8 and 3.10** is available" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "if sys.version_info < (3, 8, 0) or sys.version_info >= (3, 11, 0):\n", - " raise Exception(f\"A Python version between 3.8 and 3.10 is required. You are running {sys.version}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Check if **docker compose** is installed and the **docker daemon** is running" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!docker compose version\n", - "!docker ps && echo \"Docker running\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make sure that **logs** are displayed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "logger = logging.getLogger()\n", - "logger.setLevel(logging.INFO)\n", - "logging.info(\"test\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Check if **GPU** is available" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "import subprocess\n", - "\n", - "try:\n", - " subprocess.check_output('nvidia-smi')\n", - " logging.info(\"Found GPU, using it!\")\n", - " number_of_accelerators = 1\n", - " accelerator_name = \"GPU\"\n", - "except Exception:\n", - " logging.warning(\"We recommend to run this pipeline on a GPU, but none could be found, using CPU instead\")\n", - " number_of_accelerators = None\n", - " accelerator_name = None" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Install **Fondant** framework" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install -q -r ../requirements.txt --disable-pip-version-check && echo \"Success\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Spin up the Weaviate vector store" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> ⚠️ For **Apple M1/M2** chip users:\n", - "> \n", - "> - In Docker Desktop Dashboard `Settings -> Features in development`, make sure to **un**check `Use containerd` for pulling and storing images. More info [here](https://docs.docker.com/desktop/settings/mac/#beta-features)\n", - "> - Make sure that Docker uses linux/amd64 platform and not arm64 (cell below should take care of that)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.environ[\"DOCKER_DEFAULT_PLATFORM\"]=\"linux/amd64\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Run **Weaviate** with Docker compose" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make sure you have **Weaviate client v3**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!docker compose -f weaviate_service/docker-compose.yaml up --detach" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make sure the vectorDB is running and accessible" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "import weaviate\n", - "\n", - "try:\n", - " weaviate_client = weaviate.Client(\"http://localhost:8081\")\n", - " logging.info(\"Connected to Weaviate instance\")\n", - "except weaviate.WeaviateStartUpError:\n", - " logging.error(\"Cannot connect to weaviate instance, is it running?\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Parameter search" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Parameter search allows you **try out different configurations of pipelines and compare their performance**\n", - "\n", - "`pipeline_index.py` processes text data and loads it into the vector database\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "- [**Load data**](https://github.com/ml6team/fondant/tree/main/components/load_from_parquet): loads data from the Hugging Face Hub\n", - "- [**Chunk data**](https://github.com/ml6team/fondant/tree/main/components/chunk_text): divides the text into sections of a certain size and with a certain overlap\n", - "- [**Embed chunks**](https://github.com/ml6team/fondant/tree/main/components/embed_text): embeds each chunk as a vector, e.g. using [Cohere](https://cohere.com/embeddings)\n", - "- [**Index vector store**](https://github.com/ml6team/fondant/tree/main/components/index_weaviate): writes data and embeddings to the vector store\n", - "\n", - "`pipeline_eval.py` evaluates retrieval performance using the questions provided in your test dataset\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", - "- [**Load eval data**](https://github.com/ml6team/fondant/tree/main/components/load_from_csv): loads the evaluation dataset (questions) from a csv file\n", - "- [**Embed questons**](https://github.com/ml6team/fondant/tree/main/components/embed_text): embeds each question as a vector, e.g. using [Cohere](https://cohere.com/embeddings)\n", - "- [**Query vector store**](https://github.com/ml6team/fondant/tree/main/components/retrieve_from_weaviate): retrieves the most relevant chunks for each question from the vector store\n", - "- [**Evaluate**](https://github.com/ml6team/fondant/tree/0.8.0/components/evaluate_ragas): evaluates the retrieved chunks for each question, e.g. using [RAGAS](https://docs.ragas.io/en/latest/index.html)\n", - "- [**Aggregate**](https://github.com/ml6team/fondant-usecase-RAG/tree/main/src/components/aggregate_eval_results): calculates aggregated results" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> 💡 This notebook defaults to the first 1000 rows of the [wikitext](https://huggingface.co/datasets/wikitext) dataset for demonstration purposes, but you can load your own dataset using one the other load components available on the [**Fondant Hub**](https://fondant.ai/en/latest/components/hub/#component-hub) or by creating your own [**custom load component**](https://fondant.ai/en/latest/guides/implement_custom_components/). Keep in mind that changing the dataset implies that you also need to change the [evaluation dataset](evaluation_datasets/wikitext_1000_q.csv) used in the evaluation pipeline. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set up parameter search" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Choose **parameters** over which to search\n", - "\n", - "- `chunk_size`: size of each text chunk, in number of characters ([chunk text](https://github.com/ml6team/fondant/tree/main/components/chunk_text) component)\n", - "- `chunk_overlap`: overlap between chunks ([chunk text](https://github.com/ml6team/fondant/tree/main/components/chunk_text) component)\n", - "- `embed_model`: model used to embed ([embed text](https://github.com/ml6team/fondant/tree/main/components/embed_text) component)\n", - "- `retrieval_top_k`: number of retrieved chunks taken into account for evaluation ([retrieve](https://github.com/ml6team/fondant/tree/main/components/retrieve_from_weaviate) component)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Choose a **search method**\n", - "- [`grid search`](https://en.wikipedia.org/wiki/Hyperparameter_optimization): tries all possible combinations\n", - "- `progressive search` (recommended): identifies best option per step, much more efficient as complexity increases linearly with number of search options vs. exponentially for grid search (with similar performance)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "searchable_index_params = {\n", - " 'chunk_size' : [128, 256, 384],\n", - " 'chunk_overlap' : [16, 64, 128],\n", - "}\n", - "searchable_shared_params = {\n", - " 'embed_model' : [(\"huggingface\",\"all-MiniLM-L6-v2\")] # add more as tuples: ,(\"huggingface\", \"BAAI/bge-base-en-v1.5\")\n", - "}\n", - "searchable_eval_params = {\n", - " 'retrieval_top_k' : [2, 4, 8]\n", - "}\n", - "\n", - "evaluation_set_path = \"./evaluation_datasets\"\n", - "search_method = 'progressive_search' # 'grid_search', 'progressive_search'\n", - "target_metric = 'context_precision' # relevant for 'smart' methods that use previous results to determine params, e.g. progressive search" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "⚠️ If you want to use an **OpenAI** model for evaluation you will need an [API key](https://platform.openai.com/docs/quickstart) (see TODO below)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import get_host_ip\n", - "\n", - "\n", - "# configurable parameters\n", - "shared_args = {\n", - " \"base_path\" : \"./data\", # where data goes\n", - " \"embed_api_key\" : {},\n", - " \"weaviate_url\" : f\"http://{get_host_ip()}:8081\"\n", - "}\n", - "index_args = {\n", - " \"n_rows_to_load\" : 1000,\n", - "}\n", - "eval_args = {\n", - " \"evaluation_set_filename\" : \"wikitext_1000_q.csv\",\n", - " \"evaluation_set_separator\" : \";\",\n", - " \"llm_module_name\": \"langchain.chat_models\",\n", - " \"llm_class_name\": \"ChatOpenAI\",\n", - " \"llm_kwargs\": {\n", - " \"openai_api_key\": \"\" , # TODO: update with your key or use a different model\n", - " \"model_name\" : \"gpt-3.5-turbo\" # choose model, e.g. \"gpt-4\", \"gpt-3.5-turbo\"\n", - " },\n", - " \"evaluation_metrics\" : [\"context_precision\", \"context_relevancy\"]\n", - "}\n", - "\n", - "# Parameters for the GPU resources\n", - "resource_args = {\n", - " \"number_of_accelerators\": number_of_accelerators,\n", - " \"accelerator_name\": accelerator_name,\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run parameter search" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> 💡 The first time you run a pipeline, you need to **download a docker image for each component** which may take a minute.\n", - "\n", - "> 💡 Use a **GPU** or an external API to speed up the embedding step\n", - "\n", - "> 💡 Steps that have been processed before are **cached** and will be skipped in subsequent runs which speeds up processing.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "from utils import ParameterSearch\n", - "from utils import check_weaviate_class_exists\n", - "\n", - "mysearch = ParameterSearch(\n", - " searchable_index_params = searchable_index_params,\n", - " searchable_shared_params = searchable_shared_params,\n", - " searchable_eval_params = searchable_eval_params,\n", - " shared_args = shared_args,\n", - " index_args = index_args,\n", - " eval_args = eval_args,\n", - " resource_args = resource_args,\n", - " search_method = search_method,\n", - " target_metric = target_metric,\n", - " evaluation_set_path=evaluation_set_path,\n", - " debug=False,\n", - ")\n", - "\n", - "results = mysearch.run(weaviate_client)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Display Results" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Compare the performance** of your runs below. The default evaluation component uses [Ragas](https://github.com/explodinggradients/ragas) and provides the following two performance measures [context precision](https://docs.ragas.io/en/latest/concepts/metrics/context_precision.html) and [context relevancy](https://docs.ragas.io/en/latest/concepts/metrics/context_relevancy.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "results" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Visualize Results" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make sure **Plotly** is installed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install -q \"plotly\" --disable-pip-version-check && echo \"Plotly installed successfully\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Show legend of **embedding models** used" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from utils import add_embed_model_numerical_column, show_legend_embed_models\n", - "\n", - "results = add_embed_model_numerical_column(results)\n", - "show_legend_embed_models(results)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Plot results**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import plotly.express as px\n", - "\n", - "dimensions = ['chunk_size', 'chunk_overlap', 'embed_model_numerical', 'retrieval_top_k', 'context_precision']\n", - "fig = px.parallel_coordinates(results, color=\"context_precision\",\n", - " dimensions=dimensions,\n", - " color_continuous_scale=px.colors.sequential.Bluered)\n", - "fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Explore data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also check your data and results at each step in the pipelines using the **Fondant data explorer**. The first time you run the data explorer, you need to download the docker image which may take a minute. Then you can access the data explorer at: **http://localhost:8501/**\n", - "\n", - "Enjoy the exploration! 🍫 \n", - "\n", - "Press the ◼️ in the notebook toolbar to **stop the explorer**." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from fondant.explore import run_explorer_app\n", - "\n", - "run_explorer_app(base_path=shared_args[\"base_path\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To stop the Explore, run the cell below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from fondant.explore import stop_explorer_app\n", - "\n", - "stop_explorer_app()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Clean up your environment" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "After your pipeline ran successfully, you can **clean up** your environment and stop the weaviate database." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!docker compose -f weaviate/docker-compose.yaml down" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Feedback" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Please share your experience or **let us know how we can improve** through our \n", - "* [**Discord**](https://discord.gg/HnTdWhydGp) \n", - "* [**GitHub**](https://github.com/ml6team/fondant)\n", - "\n", - "And of course feel free to give us a [**star** ⭐](https://github.com/ml6team/fondant) if you like what we are doing!" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/src/pipeline.ipynb b/src/pipeline.ipynb deleted file mode 100644 index 5735e42..0000000 --- a/src/pipeline.ipynb +++ /dev/null @@ -1,608 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 🍫 Building a RAG indexing pipeline with Fondant\n", - "\n", - "> ⚠️ Please note that this notebook **is not** compatible with **Google Colab**. To complete the tutorial, you must \n", - "> initiate Docker containers. Starting Docker containers within Google Colab is not supported.\n", - "\n", - "This repository demonstrates a Fondant data pipeline that ingests text\n", - "data into a vector database. The pipeline uses four reusable Fondant components. \n", - "Additionally, we provide a Docker Compose setup for Weaviate, enabling local testing and\n", - "development.\n", - "\n", - "### Pipeline overview\n", - "\n", - "The primary goal of this sample is to showcase how you can use a Fondant pipeline and reusable\n", - "components to load, chunk and embed text, as well as ingest the text embeddings to a vector\n", - "database.\n", - "Pipeline Steps:\n", - "\n", - "- [Data Loading](https://github.com/ml6team/fondant/tree/main/components/load_from_parquet): The\n", - " pipeline begins by loading text data from a Parquet file, which serves as the\n", - " source for subsequent processing. For the minimal example we are using a dataset from Huggingface.\n", - "- [Text Chunking](https://github.com/ml6team/fondant/tree/main/components/chunk_text): Text data is\n", - " chunked into manageable sections to prepare it for embedding. This\n", - " step\n", - " is crucial for performant RAG systems.\n", - "- [Text Embedding](https://github.com/ml6team/fondant/tree/main/components/embed_text): We are using\n", - " a small HuggingFace model for the generation of text embeddings.\n", - " The `embed_text` component easily allows the usage of different models as well.\n", - "- [Write to Weaviate](https://github.com/ml6team/fondant/tree/main/components/index_weaviate): The\n", - " final step of the pipeline involves writing the embedded text data to\n", - " a Weaviate database.\n", - "\n", - "## Environment\n", - "### This section checks the prerequisites of your environment. Read any errors or warnings carefully. \n", - "\n", - "**Ensure a Python between version 3.8 and 3.10 is available**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "if sys.version_info < (3, 8, 0) or sys.version_info >= (3, 11, 0):\n", - " raise Exception(f\"A Python version between 3.8 and 3.10 is required. You are running {sys.version}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Check if docker compose is installed and the docker daemon is running**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!docker compose version\n", - "!docker ps && echo \"Docker running\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Check if GPU is available**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "import subprocess\n", - "\n", - "try:\n", - " subprocess.check_output('nvidia-smi')\n", - " logging.info(\"Found GPU, using it!\")\n", - " number_of_accelerators = 1\n", - " accelerator_name = \"GPU\"\n", - "except Exception:\n", - " logging.warning(\"We recommend to run this pipeline on a GPU, but none could be found, using CPU instead\")\n", - " number_of_accelerators = None\n", - " accelerator_name = None" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Install Fondant**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -r ../requirements.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Implement the pipeline\n", - "\n", - "First of all, we need to initialize the pipeline, which includes specifying a name for your pipeline, providing a description, and setting a base_path. The base_path is used to store the pipeline artifacts and data generated by the components" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from fondant.pipeline import Pipeline, Resources\n", - "\n", - "BASE_PATH = \"./data\"\n", - "Path(BASE_PATH).mkdir(parents=True, exist_ok=True)\n", - "\n", - "pipeline = Pipeline(\n", - " name=\"ingestion-pipeline\", # Add a unique pipeline name to easily track your progress and data\n", - " description=\"Pipeline to prepare and process data for building a RAG solution\",\n", - " base_path=BASE_PATH, # The demo pipelines uses a local directory to store the data.\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For demonstration purposes, we will utilize a dataset available on Hugging Face. As such, we will use a reusable Fondant component `load_from_hf_hub`. Note that the `load_from_hf_hub` component does not define a fixed schema for the data it produces, which means we need to provide hits ourselves with the `produces` argument. It takes a mapping from field names to `pyarrow` types." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pyarrow as pa\n", - "\n", - "text = pipeline.read(\n", - " \"load_from_hf_hub\",\n", - " arguments={\n", - " # Add arguments\n", - " \"dataset_name\": \"wikitext@~parquet\",\n", - " \"n_rows_to_load\": 1000,\n", - " },\n", - " produces={\n", - " \"text\": pa.string()\n", - " }\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This method doesn't execute the component yet, but adds it to the execution graph of the pipeline, and returns a lazy `Dataset` instance. We can now chain additional components from the [Fondant Hub](https://fondant.ai/en/latest/components/hub/) using the `Dataset.apply()`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import utils\n", - "\n", - "\n", - "chunks = text.apply(\n", - " \"chunk_text\",\n", - " arguments={\n", - " \"chunk_args\": {\"chunk_size\": 512, \"chunk_overlap\": 32}\n", - " }\n", - ")\n", - "\n", - "embeddings = chunks.apply(\n", - " \"embed_text\",\n", - " arguments={\n", - " \"model_provider\": \"huggingface\",\n", - " \"model\": \"all-MiniLM-L6-v2\"\n", - " },\n", - " resources=Resources(\n", - " accelerator_number=number_of_accelerators,\n", - " accelerator_name=accelerator_name,\n", - " ),\n", - " cluster_type=\"local\" if number_of_accelerators is not None else \"default\",\n", - ")\n", - "\n", - "embeddings.write(\n", - " \"index_weaviate\",\n", - " arguments={\n", - " \"weaviate_url\": f\"http://{utils.get_host_ip()}:8081\",\n", - " \"class_name\": \"index\",\n", - " },\n", - " cache=False\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our pipeline now looks as follows:\n", - "\n", - "`read_from_hf_hub` -> `chunk_text` -> `embed_text` -> `index_weaviate`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Running the pipeline\n", - "\n", - "The pipeline will load and process text data, then ingest the processed data into a vector database. Before executing the pipeline, we need to start the Weaviate database. Otherwise the pipeline execution will fail.\n", - "\n", - "To do this, we can utilize the Docker setup provided in the `weaviate` folder." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If you are using a MacBook with a M1 processor you have to make sure to set the docker default platform to linux/amd64\n", - "import os\n", - "os.environ[\"DOCKER_DEFAULT_PLATFORM\"]=\"linux/amd64\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!docker compose -f weaviate_service/docker-compose.yaml up --detach --quiet-pull" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, we can execute our pipeline. \n", - "Fondant provides multiple runners to run our pipeline:\n", - "\n", - "- A Docker runner for local execution\n", - "- A Vertex AI runner for managed execution on Google Cloud\n", - "- A Sagemaker runner for managed execution on AWS\n", - "- A Kubeflow Pipelines runner for execution anywhere\n", - "Here we will use the DockerRunner for local execution, which utilizes docker-compose under the hood.\n", - "\n", - "The runner will download the reusable components from the component hub. Afterwards, you will see the components execute one by one." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from fondant.pipeline.runner import DockerRunner\n", - "\n", - "DockerRunner().run(pipeline)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Exploring the dataset\n", - "\n", - "You can also explore the dataset using the fondant explorer, this enables you to visualize your output dataset at each component step. It might take a while to start the first time as it needs to download the explorer docker image first. You can browse at \n", - "http://localhost:8501/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from fondant.explore import run_explorer_app\n", - "\n", - "run_explorer_app(base_path=BASE_PATH)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To stop the Explore, run the cell below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from fondant.explore import stop_explorer_app\n", - "\n", - "stop_explorer_app()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create your own component\n", - "\n", - "Certainly, you can create your own custom components and use them in the pipeline. Let's consider building a component that cleans our text articles. For demo purpose we will implement a component thats removes all empty lines.\n", - "\n", - "To implement a custom component, a couple of files need to be defined:\n", - "\n", - "- Fondant component specification\n", - "- main.py script in a src folder\n", - "- Dockerfile\n", - "- requirements.txt\n", - "\n", - "If you want to learn more about the creating custom components checkout [our documentation](https://fondant.ai/en/latest/components/custom_component/).\n", - "\n", - "\n", - "### Component specification\n", - "\n", - "The component specification is represented by a single `fondant_component.yaml` file. There you can define which fields your component consumes and produces. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile components/text_cleaning/fondant_component.yaml\n", - "name: Text cleaning component\n", - "description: Clean text passages\n", - "image: ghcr.io/ml6team/text_cleaning:dev\n", - "\n", - "consumes:\n", - " text:\n", - " type: string\n", - "\n", - "produces:\n", - " text:\n", - " type: string" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Main.py script\n", - "\n", - "The core logic of the component should be implemented in a `main.py` script in a folder called `src`. We can implement the text cleaning logic as a class. We will inherit from the base class `PandasTransformComponent`. The `PandasTransformComponent` operates on pandas dataframes. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile components/text_cleaning/src/main.py\n", - "import pandas as pd\n", - "from fondant.component import PandasTransformComponent\n", - "\n", - "\n", - "class TextCleaningComponent(PandasTransformComponent):\n", - " def __init__(self, **kwargs):\n", - " \"\"\"Initialize your component\"\"\"\n", - "\n", - " def remove_empty_lines(self, text):\n", - " lines = text.split(\"\\n\")\n", - " non_empty_lines = [line.strip() for line in lines if line.strip()]\n", - " return \"\\n\".join(non_empty_lines)\n", - "\n", - " def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:\n", - " dataframe[\"text\"] = dataframe[\"text\"].apply(\n", - " self.remove_empty_lines\n", - " )\n", - " return dataframe" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Dockerfile \n", - "The Dockerfile defines how to build the component into a Docker image. You can use the following:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile components/text_cleaning/Dockerfile\n", - "FROM --platform=linux/amd64 python:3.8-slim\n", - "\n", - "# Install requirements\n", - "COPY requirements.txt /\n", - "RUN pip3 install --no-cache-dir -r requirements.txt\n", - "\n", - "# Set the working directory to the component folder\n", - "WORKDIR /component/src\n", - "\n", - "# Copy over src-files\n", - "COPY src/ .\n", - "\n", - "ENTRYPOINT [\"fondant\", \"execute\", \"main\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Requirements.txt\n", - "\n", - "In the requirements.txt we define all dependencies of the component." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile components/text_cleaning/requirements.txt\n", - "fondant[component]==0.9.0" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Add the new component to the pipeline\n", - "\n", - "Now we can add the new component to the pipeline with the `Dataset.apply` function. We just specify the path to the directory containing the custom component instead of the name of the reusable component." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pyarrow as pa\n", - "from fondant.pipeline import Pipeline\n", - "\n", - "\n", - "pipeline = Pipeline(\n", - " name=\"ingestion-pipeline\",\n", - " description=\"Pipeline to prepare and process data for building a RAG solution\",\n", - " base_path=BASE_PATH, # The demo pipelines uses a local directory to store the data.\n", - ")\n", - "\n", - "text = pipeline.read(\n", - " \"load_from_hf_hub\",\n", - " arguments={\n", - " \"dataset_name\": \"wikitext@~parquet\",\n", - " \"n_rows_to_load\": 1000,\n", - " },\n", - " produces={\n", - " \"text\": pa.string()\n", - " }\n", - ")\n", - "\n", - "cleaned_text = text.apply(\n", - " \"components/text_cleaning\", # Path to custom component\n", - ")\n", - "\n", - "chunks = cleaned_text.apply(\n", - " \"chunk_text\",\n", - " arguments={\n", - " \"chunk_size\": 512,\n", - " \"chunk_overlap\": 32,\n", - " },\n", - ")\n", - "\n", - "embeddings = chunks.apply(\n", - " \"embed_text\",\n", - " arguments={\n", - " \"model_provider\": \"huggingface\",\n", - " \"model\": \"all-MiniLM-L6-v2\",\n", - " },\n", - ")\n", - "\n", - "embeddings.write(\n", - " \"index_weaviate\",\n", - " arguments={\n", - " \"weaviate_url\": f\"http://{utils.get_host_ip()}:8081\",\n", - " \"class_name\": \"index\",\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you now run your pipeline, the new changes will be picked up and Fondant will automatically re-build the component with the changes included." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "DockerRunner().run(pipeline)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you check the logs, you will see that components executed with the same parameters which enables faster pipeline iteration.\n", - "\n", - "If you restart the Explorer, you'll see that you can now select a second pipeline and inspect your new dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "run_explorer_app(base_path=BASE_PATH)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Clean up your environment\n", - "\n", - "After your pipeline run successfully, you should clean up your environment and stop the weaviate database." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!docker compose -f weaviate/docker-compose.yaml down" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "stop_explorer_app()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Scaling up\n", - "If you're happy with your dataset, it's time to scale up. Check [our documentation](https://fondant.ai/en/latest/pipeline/#compiling-and-running-a-pipeline) for more information about the available runners.\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/src/pipeline_eval.py b/src/pipeline_eval.py deleted file mode 100644 index 05db011..0000000 --- a/src/pipeline_eval.py +++ /dev/null @@ -1,87 +0,0 @@ -"""Fondant pipeline to evaluate a RAG pipeline.""" - -import pyarrow as pa -from fondant.pipeline import Pipeline, Resources - - -def create_pipeline( - *, - base_path: str = "./data", - weaviate_url="http://host.docker.internal:8080", - weaviate_class: str = "Pipeline1", - evaluation_set_path="./evaluation_datasets", - evaluation_set_filename="wikitext_1000_q.csv", - evaluation_set_separator: str = ";", - embed_model_provider: str = "huggingface", - embed_model: str = "all-MiniLM-L6-v2", - embed_api_key: dict = {}, - retrieval_top_k: int = 3, - llm_module_name: str = "langchain.chat_models", - llm_class_name: str = "ChatOpenAI", - llm_kwargs: dict = {"model_name": "gpt-3.5-turbo"}, - evaluation_metrics: list = ["context_precision", "context_relevancy"], - number_of_accelerators=None, - accelerator_name=None, -): - """Create a Fondant pipeline based on the provided arguments.""" - evaluation_pipeline = Pipeline( - name="evaluation-pipeline", - description="Pipeline to evaluate a RAG solution", - base_path=base_path, - ) - - load_from_csv = evaluation_pipeline.read( - "load_from_csv", - arguments={ - "dataset_uri": "/evaldata/" + evaluation_set_filename, - # mounted dir from within docker as extra_volumes - "column_separator": evaluation_set_separator, - }, - produces={ - "question": pa.string(), - }, - ) - - embed_text_op = load_from_csv.apply( - "embed_text", - arguments={ - "model_provider": embed_model_provider, - "model": embed_model, - "api_keys": embed_api_key, - }, - consumes={ - "text": "question", - }, - resources=Resources( - accelerator_number=number_of_accelerators, - accelerator_name=accelerator_name, - ), - cluster_type="local" if number_of_accelerators is not None else "default", - ) - - retrieve_chunks = embed_text_op.apply( - "retrieve_from_weaviate", - arguments={ - "weaviate_url": weaviate_url, - "class_name": weaviate_class, - "top_k": retrieval_top_k, - }, - cache=False, - ) - - retriever_eval = retrieve_chunks.apply( - "evaluate_ragas", - arguments={ - "llm_module_name": llm_module_name, - "llm_class_name": llm_class_name, - "llm_kwargs": llm_kwargs, - }, - produces={metric: pa.float32() for metric in evaluation_metrics}, - ) - - retriever_eval.apply( - "components/aggregate_eval_results", - consumes={metric: pa.float32() for metric in evaluation_metrics}, - ) - - return evaluation_pipeline diff --git a/src/pipeline_index.py b/src/pipeline_index.py deleted file mode 100644 index 999fddb..0000000 --- a/src/pipeline_index.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Fondant pipeline to index a RAG system.""" -import pyarrow as pa -from fondant.pipeline import Pipeline, Resources - - -def create_pipeline( - *, - weaviate_url: str, - base_path: str = "./data", - n_rows_to_load: int = 1000, - weaviate_class: str = "Pipeline1", - weaviate_overwrite: bool = True, - embed_model_provider: str = "huggingface", - embed_model: str = "all-MiniLM-L6-v2", - embed_api_key: dict = {}, - chunk_args: dict = {"chunk_size": 512, "chunk_overlap": 32}, - number_of_accelerators=None, - accelerator_name=None, -): - """Create a Fondant pipeline based on the provided arguments.""" - indexing_pipeline = Pipeline( - name="indexing-pipeline", - description="Pipeline to prepare and process data for building a RAG solution", - base_path=base_path, - ) - - text = indexing_pipeline.read( - "load_from_hf_hub", - arguments={ - # Add arguments - "dataset_name": "wikitext@~parquet", - "n_rows_to_load": n_rows_to_load, - }, - produces={ - "text": pa.string(), - }, - ) - - chunks = text.apply( - "chunk_text", - arguments={ - "chunk_args": chunk_args, - }, - ) - - embeddings = chunks.apply( - "embed_text", - arguments={ - "model_provider": embed_model_provider, - "model": embed_model, - "api_keys": embed_api_key, - }, - resources=Resources( - accelerator_number=number_of_accelerators, - accelerator_name=accelerator_name, - ), - cluster_type="local" if number_of_accelerators is not None else "default", - ) - - embeddings.write( - "index_weaviate", - arguments={ - "weaviate_url": weaviate_url, - "class_name": weaviate_class, - "overwrite": weaviate_overwrite, - }, - cache=False, - ) - - return indexing_pipeline diff --git a/src/utils.py b/src/utils.py index 6e34f09..d704df0 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,16 +1,8 @@ import logging -import os import socket -import typing as t -from datetime import datetime -from itertools import product from pathlib import Path -import pandas as pd -import pipeline_eval -import pipeline_index import weaviate -from fondant.pipeline.runner import DockerRunner logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -51,313 +43,3 @@ def create_directory_if_not_exists(path): p_base_path = Path(path).resolve() p_base_path.mkdir(parents=True, exist_ok=True) return str(p_base_path) - - -def cartesian_product(input_dict): - return ( - dict(zip(input_dict.keys(), values)) for values in product(*input_dict.values()) - ) - - -def extract_timestamp(folder_name): - timestamp_str = folder_name.split("-")[-1] - return datetime.strptime(timestamp_str, "%Y%m%d%H%M%S") - - -def has_parquet_file(data_directory, entry, component_name): - component_folder = os.path.join(data_directory, entry, component_name) - # Check if the component exists - if not os.path.exists(component_folder) or not os.path.isdir(component_folder): - return False - parquet_files = [ - file for file in os.listdir(component_folder) if file.endswith(".parquet") - ] - return bool(parquet_files) - - -def get_metrics_latest_run( - base_path, - pipeline_name="evaluation-pipeline", - component_name="aggregate_eval_results", -): - data_directory = f"{base_path}/{pipeline_name}" - - # keep data folders that belong to pipeline and contain parquet file - valid_entries = [ - d - for d in os.listdir(data_directory) - if os.path.isdir(os.path.join(data_directory, d)) - and d.startswith(pipeline_name) - and has_parquet_file(data_directory, d, component_name) - ] - - # keep the latest folder - latest_run = sorted(valid_entries, key=extract_timestamp, reverse=True)[0] - - # read all Parquet files and concatenate them into a single DataFrame - component_folder = os.path.join(data_directory, latest_run, component_name) - parquet_files = [f for f in os.listdir(component_folder) if f.endswith(".parquet")] - dfs = [ - pd.read_parquet(os.path.join(component_folder, file)) for file in parquet_files - ] - - # Concatenate DataFrames and set index - concatenated_df = pd.concat(dfs, ignore_index=True).set_index("metric") - - return concatenated_df["score"].apply(lambda x: round(x, 2)).to_dict() - - -def add_embed_model_numerical_column(df): - df["embed_model_numerical"] = pd.factorize(df["embed_model"])[0] + 1 - return df - - -def show_legend_embed_models(df): - columns_to_show = ["embed_model", "embed_model_numerical"] - df = df[columns_to_show].drop_duplicates().set_index("embed_model_numerical") - df.index.name = "" - return df - - -class ParameterSearch: - """RAG parameter search.""" - - def __init__( - self, - *, - searchable_index_params: t.Dict[str, t.Any], - searchable_shared_params: t.Dict[str, t.Any], - searchable_eval_params: t.Dict[str, t.Any], - index_args: t.Dict[str, t.Any], - shared_args: t.Dict[str, t.Any], - eval_args: t.Dict[str, t.Any], - resource_args: t.Dict[str, t.Any], - evaluation_set_path: str = "./evaluation_datasets", - search_method: str = "progressive_search", - target_metric: str = "context_precision", - debug=False, - ): - self.searchable_index_params = searchable_index_params - self.searchable_shared_params = searchable_shared_params - self.searchable_eval_params = searchable_eval_params - self.searchable_params = { - **searchable_index_params, - **searchable_shared_params, - **searchable_eval_params, - } - self.index_args = index_args - self.resource_args = resource_args - self.shared_args = shared_args - self.eval_args = eval_args - self.search_method = search_method - self.target_metric = target_metric - self.debug = debug - - # create directory for pipeline output data - self.base_path = create_directory_if_not_exists(shared_args["base_path"]) - - # mount directory of pipeline output data from docker - self.extra_volumes = [ - str(os.path.join(os.path.abspath(evaluation_set_path))) + ":/evaldata", - ] - - # define pipeline runner - self.runner = DockerRunner() - - # list of dicts to store all params & results - self.results = [] - - def run(self, weaviate_client: weaviate.Client): - run_count = 0 - - while True: - configs = self.create_configs(run_count) - - # stop running when out of configs - if configs is None: - break - - # create configs - indexing_config, evaluation_config, n_runs = configs - - # create pipeline objects - indexing_pipeline, evaluation_pipeline = self.create_pipelines( - indexing_config, - evaluation_config, - ) - - # run indexing pipeline - logger.info( - f"Starting indexing pipeline of RUN {run_count}/{n_runs} with {indexing_config}", - ) - self.runner.run(indexing_pipeline) - - check_weaviate_class_exists( - weaviate_client, - indexing_config["weaviate_class"], - ) - - # run evaluation pipeline - logger.info( - f"Starting evaluation pipeline of run #{run_count} / \ - {n_runs} with {evaluation_config}", - ) - self.runner.run( - input=evaluation_pipeline, - extra_volumes=self.extra_volumes, - ) - - # read metrics from pipeline output - metrics = {} - metrics = get_metrics_latest_run(self.base_path) - - metadata = { - "run_number": run_count, - "date_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - } - - # collect results - self.results.append( - {**metadata, **indexing_config, **evaluation_config, **metrics}, - ) - - run_count += 1 - - return pd.DataFrame(self.results) - - def create_configs(self, run_count: int): - if self.search_method == "grid_search": - # all possible combinations of parameters - all_combinations = list(cartesian_product(self.searchable_params)) - n_runs = len(all_combinations) - - # when all combinations have been tried, stop searching - if run_count > len(all_combinations) - 1: - return None - - # create base config for indexing pipeline - pipeline_config = all_combinations[run_count] - - elif self.search_method == "progressive_search": - # initialize pipeline config with middle values for each parameter - pipeline_config = {} - for key, value in self.searchable_params.items(): - middle_index = int((len(value) - 1) / 2) - pipeline_config.update({key: value[middle_index]}) - - # make a list of variations to try - keys_to_try = [] - values_to_try = [] - step = 0 - for key, values in self.searchable_params.items(): - if len(values) > 1: # only variations to try when more than one option - for option in values: - # for the first step we need to try all options, for subsequent - # steps we need not repeat the default starting options (pipeline_config) - if step == 0 or not ( - key in pipeline_config and option == pipeline_config[key] - ): - keys_to_try.append(key) - values_to_try.append(option) - step += 1 - variations_to_try = [ - {keys_to_try[i]: values_to_try[i]} for i in range(len(keys_to_try)) - ] - n_runs = len(variations_to_try) + 1 - - # if there are no variations to try, just schedule one run - if len(variations_to_try) == 0: - variations_to_try = [ - { - list(pipeline_config.keys())[0]: list(pipeline_config.values())[ - 0 - ], - }, - ] - - # when all variations have been tried, stop searching - if run_count > len(variations_to_try) - 1: - return None - - # update with best performing params - if len(self.results): - pipeline_config = ( - pd.DataFrame(self.results) - .sort_values(self.target_metric, ascending=False) - .iloc[0] - .to_dict() - ) - pipeline_config.update( - { - "embed_model": ( - pipeline_config["embed_model_provider"], - pipeline_config["embed_model"], - ), - }, - ) # TOD0 cleanify - - logging.info(f"Trying: {variations_to_try[run_count]}") - pipeline_config.update(variations_to_try[run_count]) - - else: - msg = "Please provide a valid search method" - raise ValueError(msg) - - # filter out indexing & evaluation parameters - indexing_config = { - key: pipeline_config[key] - for key in {**self.searchable_index_params, **self.searchable_shared_params} - } - evaluation_config = { - key: pipeline_config[key] - for key in {**self.searchable_eval_params, **self.searchable_shared_params} - } - - # More shared parameters - indexing_config["weaviate_class"] = evaluation_config[ - "weaviate_class" - ] = f"Run{run_count}" - indexing_config["embed_model_provider"] = evaluation_config[ - "embed_model_provider" - ] = indexing_config["embed_model"][0] - indexing_config["embed_model"] = evaluation_config[ - "embed_model" - ] = indexing_config["embed_model"][1] - - return indexing_config, evaluation_config, n_runs - - def create_pipelines(self, indexing_config, evaluation_config): - # create indexing pipeline - - indexing_config_copy = indexing_config.copy() - - indexing_config_copy["chunk_args"] = { - "chunk_size": indexing_config_copy.pop("chunk_size"), - "chunk_overlap": indexing_config_copy.pop("chunk_overlap"), - } - - indexing_pipeline = pipeline_index.create_pipeline( - **self.shared_args, - **self.index_args, - **indexing_config_copy, - **self.resource_args, - ) - - # create evaluation pipeline - evaluation_pipeline = pipeline_eval.create_pipeline( - **self.shared_args, - **self.eval_args, - **evaluation_config, - **self.resource_args, - ) - - if self.debug: - logger.info("\nIntermediary results:") - logger.info(pd.DataFrame(self.results)) - logger.info(f'RUN {indexing_config["weaviate_class"]}') - logger.info("\nIndexing pipeline parameters:") - logger.info({**self.shared_args, **self.index_args, **indexing_config}) - logger.info("\nEvaluation pipeline parameters:") - logger.info({**self.shared_args, **self.eval_args, **evaluation_config}) - - return indexing_pipeline, evaluation_pipeline