Skip to content

Commit

Permalink
Update eval_iter_loop.ipynb (#38)
Browse files Browse the repository at this point in the history
  • Loading branch information
Hakimovich99 authored Dec 13, 2023
1 parent 3f846a5 commit 68b36a5
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 69 deletions.
2 changes: 1 addition & 1 deletion src/components/aggregate_eval_results/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
fondant[component]==0.8.dev4
fondant[component]==0.8.dev6
219 changes: 152 additions & 67 deletions src/eval_iter_loop.ipynb

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions src/pipeline_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ def create_pipeline( # noqa: PLR0913
pipeline_dir: str = "./data-dir",
embed_model_provider: str = "huggingface",
embed_model: str = "all-MiniLM-L6-v2",
embed_api_key: dict = {},
weaviate_url="http://host.docker.internal:8080",
weaviate_class_name: str = "Pipeline1",
# evaluation args
Expand Down Expand Up @@ -43,6 +44,7 @@ def create_pipeline( # noqa: PLR0913
arguments={
"model_provider": embed_model_provider,
"model": embed_model,
"api_keys": embed_api_key,
},
)

Expand Down
2 changes: 2 additions & 0 deletions src/pipeline_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ def create_pipeline( # noqa: PLR0913
pipeline_dir: str = "./data-dir",
embed_model_provider: str = "huggingface",
embed_model: str = "all-MiniLM-L6-v2",
embed_api_key: dict = {},
weaviate_url: str = "http://host.docker.internal:8080",
weaviate_class_name: str = "Pipeline1",
overwrite: bool = True,
Expand Down Expand Up @@ -49,6 +50,7 @@ def create_pipeline( # noqa: PLR0913
arguments={
"model_provider": embed_model_provider,
"model": embed_model,
"api_keys": embed_api_key,
},
)

Expand Down
54 changes: 53 additions & 1 deletion src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,36 @@ def create_directory_if_not_exists(path):
return str(p_base_path)


# Read latest chosen component
# Store pipeline results
def store_results( # noqa: PLR0913
rag_results,
shared_args,
indexing_args,
evaluation_args,
index_pipeline_datetime,
eval_pipeline_datetime,
):
pipeline_dir = shared_args["pipeline_dir"]
pipeline_name = "evaluation-pipeline"
component_name = "aggregate_eval_results"

results_dict = {}
results_dict["shared_args"] = shared_args
results_dict["indexing_datetime"] = index_pipeline_datetime
results_dict["indexing_args"] = indexing_args
results_dict["evaluation_args"] = evaluation_args
results_dict["evaluation_datetime"] = eval_pipeline_datetime
results_dict["agg_metrics"] = read_latest_data(
base_path=pipeline_dir,
pipeline_name=pipeline_name,
component_name=component_name,
)

rag_results.append(results_dict)

return rag_results


def read_latest_data(base_path: str, pipeline_name: str, component_name: str):
# Specify the path to the 'data' directory
data_directory = f"{base_path}/{pipeline_name}"
Expand Down Expand Up @@ -90,3 +119,26 @@ def extract_timestamp(folder_name):
timestamp_str = folder_name.split("-")[-1]
# Convert the timestamp string to a datetime object
return datetime.strptime(timestamp_str, "%Y%m%d%H%M%S")


# Output pipelines evaluations results dataframe
def output_results(results):
flat_results = []

for entry in results:
flat_entry = entry.copy()

for key, value in entry.items():
if isinstance(value, dict):
for sub_key, sub_value in value.items():
flat_entry[sub_key] = sub_value
del flat_entry[key]

elif isinstance(value, pd.DataFrame):
for sub_key, sub_value in zip(value["metric"], value["score"]):
flat_entry[sub_key] = sub_value
del flat_entry[key]

flat_results.append(flat_entry)

return pd.DataFrame(flat_results)

0 comments on commit 68b36a5

Please sign in to comment.