diff --git a/jupyterlite/files/examples/Fleiss Kappa.ipynb b/jupyterlite/files/examples/Fleiss Kappa.ipynb index 2e1f9fe..255de25 100644 --- a/jupyterlite/files/examples/Fleiss Kappa.ipynb +++ b/jupyterlite/files/examples/Fleiss Kappa.ipynb @@ -1,151 +1,217 @@ { - "metadata": { - "kernelspec": { - "name": "python", - "display_name": "Python (Pyodide)", - "language": "python" - }, - "language_info": { - "codemirror_mode": { - "name": "python", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8" - } - }, - "nbformat_minor": 5, - "nbformat": 4, - "cells": [ - { - "cell_type": "markdown", - "source": "# Fleiss' Kappa \nTo understand how much your judges agree with each other. It is meant to be used with more than two judges.\n\nRead https://www.datanovia.com/en/blog/kappa-coefficient-interpretation/ to learn more.\n\nPlease copy this example and customize it for your own purposes!", - "metadata": {}, - "id": "bd7e4efa-eb00-451e-984d-ed6646d8e25f" - }, - { - "cell_type": "markdown", - "source": "## Imports", - "metadata": {}, - "id": "e3412382" - }, - { - "cell_type": "code", - "source": "import pandas as pd\nfrom js import fetch\nimport json\n\nfrom collections import defaultdict\nfrom statsmodels.stats.inter_rater import aggregate_raters\nfrom statsmodels.stats.inter_rater import fleiss_kappa\nfrom IPython.display import display, Markdown", - "metadata": { - "trusted": true - }, - "execution_count": 1, - "outputs": [], - "id": "4972936a" - }, - { - "cell_type": "markdown", - "source": "## Step 0: Configuration", - "metadata": {}, - "id": "6da26c5e" - }, - { - "cell_type": "code", - "source": "QUEPID_BOOK_NUM = 25", - "metadata": { - "trusted": true - }, - "execution_count": 2, - "outputs": [], - "id": "71803a49-4065-4adf-a69e-cb0fe2d00f22" - }, - { - "cell_type": "markdown", - "source": "## Step 1: Download the Quepid Book", - "metadata": {}, - "id": "420416df-9e6a-41b4-987b-7a03c9dd38b3" - }, - { - "cell_type": "code", - "source": "# Generic GET call to a JSON endpoint \nasync def get_json(url):\n resp = await fetch(url)\n resp_text = await resp.text()\n return json.loads(resp_text)\n\n", - "metadata": { - "trusted": true - }, - "execution_count": 3, - "outputs": [], - "id": "31193536-98eb-4b46-ab98-af04ee07c6d3" - }, - { - "cell_type": "code", - "source": "data = await get_json(f'/api/export/books/{QUEPID_BOOK_NUM}')", - "metadata": { - "trusted": true - }, - "execution_count": null, - "outputs": [], - "id": "8fef6231-daa8-467f-ac57-13a144e8a356" - }, - { - "cell_type": "markdown", - "source": "## Step 2: Extract and Prepare Data", - "metadata": {}, - "id": "79d985ad-cd11-44a9-a7e1-0851bc99aef3" - }, - { - "cell_type": "code", - "source": "# Initialize a list to hold the tuples of (doc_id, rating, count)\nratings_data = []\n\n# Iterate through each query-doc pair\nfor pair in data['query_doc_pairs']:\n # Initialize a dictionary to count the ratings for this pair\n ratings_count = defaultdict(int)\n \n # Extract judgements and count the ratings\n for judgement in pair['judgements']:\n rating = judgement['rating']\n ratings_count[rating] += 1\n\n # Append the counts to the ratings_data list\n for rating, count in ratings_count.items():\n ratings_data.append((pair['doc_id'], rating, count))\n", - "metadata": { - "trusted": true - }, - "execution_count": null, - "outputs": [], - "id": "9a8561fd-2dbf-477e-9ac1-4df6d5ebdc91" - }, - { - "cell_type": "markdown", - "source": "## Step 3: Aggregate Raters' Data", - "metadata": {}, - "id": "caf5632b-132a-4e1b-80fe-c8c5ab7f2f3a" - }, - { - "cell_type": "code", - "source": "# Convert ratings_data to a DataFrame\ndf = pd.DataFrame(ratings_data, columns=['doc_id', 'rating', 'count'])\n\n# Use crosstab to create a contingency table\ndata_crosstab = pd.crosstab(index=df['doc_id'], columns=df['rating'], values=df['count'], aggfunc='sum')\n\n# Drop any rows missing judgements\ndata_crosstab = data_crosstab.dropna(how='any')\n\n# Convert the DataFrame to the format expected by aggregate_raters\ndata_for_aggregation = data_crosstab.values\n\n# Aggregate the raters' data\ntable, _ = aggregate_raters(data_for_aggregation)", - "metadata": { - "trusted": true - }, - "execution_count": null, - "outputs": [], - "id": "a7598308-129b-4628-ad3a-fc3d703f8205" - }, - { - "cell_type": "markdown", - "source": "## Step 4: Compute Fleiss' Kappa", - "metadata": {}, - "id": "25c79fbc" - }, - { - "cell_type": "code", - "source": "kappa = fleiss_kappa(table, method='fleiss')\ndisplay(Markdown(f\"## Fleiss' Kappa: {kappa:.4f}\"))", - "metadata": { - "trusted": true - }, - "execution_count": null, - "outputs": [], - "id": "25a613f9" - }, - { - "cell_type": "markdown", - "source": "_This notebook was last updated 19-FEB-2024_", - "metadata": {}, - "id": "5704579e-2321-4629-8de0-6608b428e2b6" - }, - { - "cell_type": "code", - "source": "", - "metadata": {}, - "execution_count": null, - "outputs": [], - "id": "7203f6cc-c068-4f75-a59a-1f49c5555319" - } - ] -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "id": "bd7e4efa-eb00-451e-984d-ed6646d8e25f", + "metadata": {}, + "source": [ + "# Fleiss' Kappa \n", + "To understand how much your judges agree with each other. It is meant to be used with more than two judges.\n", + "\n", + "Read https://www.datanovia.com/en/blog/kappa-coefficient-interpretation/ to learn more.\n", + "\n", + "Please copy this example and customize it for your own purposes!" + ] + }, + { + "cell_type": "markdown", + "id": "e3412382", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4972936a", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from js import fetch\n", + "import json\n", + "\n", + "from collections import defaultdict\n", + "from statsmodels.stats.inter_rater import aggregate_raters\n", + "from statsmodels.stats.inter_rater import fleiss_kappa\n", + "from IPython.display import display, Markdown" + ] + }, + { + "cell_type": "markdown", + "id": "6da26c5e", + "metadata": {}, + "source": [ + "## Step 0: Configuration" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "71803a49-4065-4adf-a69e-cb0fe2d00f22", + "metadata": {}, + "outputs": [], + "source": [ + "QUEPID_BOOK_NUM = 25" + ] + }, + { + "cell_type": "markdown", + "id": "420416df-9e6a-41b4-987b-7a03c9dd38b3", + "metadata": {}, + "source": [ + "## Step 1: Download the Quepid Book" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "31193536-98eb-4b46-ab98-af04ee07c6d3", + "metadata": {}, + "outputs": [], + "source": [ + "# Generic GET call to a JSON endpoint \n", + "async def get_json(url):\n", + " resp = await fetch(url)\n", + " resp_text = await resp.text()\n", + " return json.loads(resp_text)\n", + "\n", + "async def get_text(url):\n", + " resp = await fetch(url)\n", + " resp_text = await resp.text()\n", + " return resp_text\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fef6231-daa8-467f-ac57-13a144e8a356", + "metadata": {}, + "outputs": [], + "source": [ + "data = await get_text(f'/api/books/{QUEPID_BOOK_NUM}.csv')" + ] + }, + { + "cell_type": "markdown", + "id": "79d985ad-cd11-44a9-a7e1-0851bc99aef3", + "metadata": {}, + "source": [ + "## Step 2: Extract and Prepare Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a8561fd-2dbf-477e-9ac1-4df6d5ebdc91", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize a list to hold the tuples of (doc_id, rating, count)\n", + "ratings_data = []\n", + "\n", + "# Iterate through each query-doc pair\n", + "for pair in data['query_doc_pairs']:\n", + " # Initialize a dictionary to count the ratings for this pair\n", + " ratings_count = defaultdict(int)\n", + " \n", + " # Extract judgements and count the ratings\n", + " for judgement in pair['judgements']:\n", + " rating = judgement['rating']\n", + " ratings_count[rating] += 1\n", + "\n", + " # Append the counts to the ratings_data list\n", + " for rating, count in ratings_count.items():\n", + " ratings_data.append((pair['doc_id'], rating, count))\n" + ] + }, + { + "cell_type": "markdown", + "id": "caf5632b-132a-4e1b-80fe-c8c5ab7f2f3a", + "metadata": {}, + "source": [ + "## Step 3: Aggregate Raters' Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7598308-129b-4628-ad3a-fc3d703f8205", + "metadata": {}, + "outputs": [], + "source": [ + "# Convert ratings_data to a DataFrame\n", + "df = pd.DataFrame(ratings_data, columns=['doc_id', 'rating', 'count'])\n", + "\n", + "# Use crosstab to create a contingency table\n", + "data_crosstab = pd.crosstab(index=df['doc_id'], columns=df['rating'], values=df['count'], aggfunc='sum')\n", + "\n", + "# Drop any rows missing judgements\n", + "data_crosstab = data_crosstab.dropna(how='any')\n", + "\n", + "# Convert the DataFrame to the format expected by aggregate_raters\n", + "data_for_aggregation = data_crosstab.values\n", + "\n", + "# Aggregate the raters' data\n", + "table, _ = aggregate_raters(data_for_aggregation)" + ] + }, + { + "cell_type": "markdown", + "id": "25c79fbc", + "metadata": {}, + "source": [ + "## Step 4: Compute Fleiss' Kappa" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25a613f9", + "metadata": {}, + "outputs": [], + "source": [ + "kappa = fleiss_kappa(table, method='fleiss')\n", + "display(Markdown(f\"## Fleiss' Kappa: {kappa:.4f}\"))" + ] + }, + { + "cell_type": "markdown", + "id": "5704579e-2321-4629-8de0-6608b428e2b6", + "metadata": {}, + "source": [ + "_This notebook was last updated 16_January_2025_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7203f6cc-c068-4f75-a59a-1f49c5555319", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/jupyterlite/files/examples/Jaccard and RBO Comparison.ipynb b/jupyterlite/files/examples/Jaccard and RBO Comparison.ipynb index e86fc58..b806d64 100644 --- a/jupyterlite/files/examples/Jaccard and RBO Comparison.ipynb +++ b/jupyterlite/files/examples/Jaccard and RBO Comparison.ipynb @@ -1,173 +1,690 @@ { - "metadata": { - "kernelspec": { - "name": "python", - "display_name": "Python (Pyodide)", - "language": "python" - }, - "language_info": { - "codemirror_mode": { - "name": "python", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Jaccard and RBO Comparison \n", + "To understand the magnatude of changes to your query result sets, you can compare multiple snapshots to each other.\n", + "\n", + "This notebook provides both Jaccard and Rank Biased Overlap (RBO) metrics.\n", + "\n", + "Please copy this example and customize it for your own purposes!" + ] }, - "nbformat_minor": 4, - "nbformat": 4, - "cells": [ - { - "cell_type": "markdown", - "source": "# Jaccard and RBO Comparison \nTo understand the magnatude of changes to your query result sets, you can compare multiple snapshots to each other.\n\nThis notebook provides both Jaccard and Rank Biased Overlap (RBO) metrics.\n\nPlease copy this example and customize it for your own purposes!", - "metadata": {} - }, - { - "cell_type": "code", - "source": "from js import fetch\nfrom typing import List, Optional, Union\n\nimport json\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\n\nimport piplite\nawait piplite.install('seaborn')\nawait piplite.install('rbo')\n\nimport rbo\nimport seaborn as sns\n\nimport os", - "metadata": { - "trusted": true - }, - "execution_count": 1, - "outputs": [] - }, - { - "cell_type": "code", - "source": "# Generic GET call to a JSON endpoint \nasync def get_json(url):\n resp = await fetch(url)\n resp_text = await resp.text()\n return json.loads(resp_text)", - "metadata": { - "trusted": true - }, - "execution_count": 2, - "outputs": [] - }, - { - "cell_type": "code", - "source": "# Basic Quepid API client methods\n\nasync def get_snapshots(case):\n response = await get_json(f'/api/cases/{case}/snapshots?shallow=true')\n return [{'id': snapshot['id'], 'name': snapshot['name']} for snapshot in response['snapshots']]\n\nasync def get_cases_with_snapshots():\n cases = await get_cases()\n cases_with_snapshots = [{\n 'id': case['id'],\n 'name': case['name'],\n 'snapshots': [ {\n 'id': snapshot['id'],\n 'name': snapshot['name']\n } for snapshot in (await get_snapshots(case['id'])) ]\n } for case in cases]\n return cases_with_snapshots\n", - "metadata": { - "trusted": true - }, - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "code", - "source": "# Load snapshot, return dict of queries and their (number of results, avg. score, [doc ids])\nasync def load_snapshot(case_id, snapshot_id):\n snapshot = await get_json(f'/api/cases/{case_id}/snapshots/{snapshot_id}')\n docs = snapshot['docs']\n queries = snapshot['queries']\n \n # scores are a list of dicts, group them by query\n scores_list = snapshot['scores']\n scores = {}\n for scores_dict in scores_list:\n scores[scores_dict['query_id']] = scores_dict\n \n return pd.DataFrame({\n \"num_results\": [scores[query['query_id']]['number_of_results'] for query in queries],\n \"score\": [scores[query['query_id']]['score'] for query in queries],\n \"docs\": [[doc['id'] for doc in docs[str(query['query_id'])] if doc['rated_only'] == False] for query in queries]\n },\n index=pd.Series(name='query', data=[query['query_text'] for query in queries])\n )\n\nawait load_snapshot(case_id=6789, snapshot_id=2471)", - "metadata": { - "trusted": true - }, - "execution_count": 4, - "outputs": [ - { - "execution_count": 4, - "output_type": "execute_result", - "data": { - "text/plain": " num_results score \\\nquery \nprojector screen 1 1.0 \nnotebook 1 1.0 \niphone 8 1 1.0 \nprinter 1 1.0 \ncomputer 1 1.0 \n... ... ... \nwindows 10 1 1.0 \nmicrowave 1 1.0 \nbluetooth speakers 1 1.0 \ncoffee 1 1.0 \nvans 1 1.0 \n\n docs \nquery \nprojector screen [1069226, 47471, 490523, 1229109, 1229118, 325... \nnotebook [3851056, 3959000, 1550833, 1684763, 1675257, ... \niphone 8 [2048598, 1648546, 79524888, 1857711, 3613408,... \nprinter [3849563, 2225354, 1569761, 798960, 377837, 13... \ncomputer [560468, 532095, 560475, 523407, 693956, 56047... \n... ... \nwindows 10 [4481689, 3902727, 1560529, 1797902, 3155116, ... \nmicrowave [79513345, 4020048, 1768856, 2936032] \nbluetooth speakers [1993197, 3537784, 279672, 2663204, 558184, 33... \ncoffee [1996660, 2102472, 79583150, 1357989, 656359, ... \nvans [78503576, 79118095, 77388459, 78322005, 79013... \n\n[135 rows x 3 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
num_resultsscoredocs
query
projector screen11.0[1069226, 47471, 490523, 1229109, 1229118, 325...
notebook11.0[3851056, 3959000, 1550833, 1684763, 1675257, ...
iphone 811.0[2048598, 1648546, 79524888, 1857711, 3613408,...
printer11.0[3849563, 2225354, 1569761, 798960, 377837, 13...
computer11.0[560468, 532095, 560475, 523407, 693956, 56047...
............
windows 1011.0[4481689, 3902727, 1560529, 1797902, 3155116, ...
microwave11.0[79513345, 4020048, 1768856, 2936032]
bluetooth speakers11.0[1993197, 3537784, 279672, 2663204, 558184, 33...
coffee11.0[1996660, 2102472, 79583150, 1357989, 656359, ...
vans11.0[78503576, 79118095, 77388459, 78322005, 79013...
\n

135 rows × 3 columns

\n
" - }, - "metadata": {} - } - ] - }, - { - "cell_type": "code", - "source": "os.environ[\"TQDM_DISABLE\"] = \"1\"", - "metadata": { - "trusted": true - }, - "execution_count": 5, - "outputs": [] - }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from js import fetch\n", + "from typing import List, Optional, Union\n", + "\n", + "import json\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import piplite\n", + "await piplite.install('seaborn')\n", + "await piplite.install('rbo')\n", + "\n", + "import rbo\n", + "import seaborn as sns\n", + "\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Generic GET call to a JSON endpoint \n", + "async def get_json(url):\n", + " resp = await fetch(url)\n", + " resp_text = await resp.text()\n", + " return json.loads(resp_text)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Basic Quepid API client methods\n", + "\n", + "async def get_snapshots(case):\n", + " response = await get_json(f'/api/cases/{case}/snapshots?shallow=true')\n", + " return [{'id': snapshot['id'], 'name': snapshot['name']} for snapshot in response['snapshots']]\n", + "\n", + "async def get_cases_with_snapshots():\n", + " cases = await get_cases()\n", + " cases_with_snapshots = [{\n", + " 'id': case['id'],\n", + " 'name': case['name'],\n", + " 'snapshots': [ {\n", + " 'id': snapshot['id'],\n", + " 'name': snapshot['name']\n", + " } for snapshot in (await get_snapshots(case['id'])) ]\n", + " } for case in cases]\n", + " return cases_with_snapshots\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "def jaccard(l1, l2, max_n):\n if len(l1) == 0 and len(l2) == 0:\n return 1\n max_len = min(len(l1), len(l2), max_n)\n set1 = set(l1[:max_len])\n set2 = set(l2[:max_len])\n intersection = len(set1.intersection(set2))\n union = len(set1) + len(set2) - intersection\n return float(intersection) / union\n\nasync def load_snapshots(case_id1, snapshot_id1, case_id2, snapshot_id2):\n df_a = await load_snapshot(case_id1, snapshot_id1)\n df_b = await load_snapshot(case_id2, snapshot_id2)\n return df_a.merge(df_b, on='query')\n\nasync def compare(case_id1, snapshot_id1, case_id2, snapshot_id2):\n df = await load_snapshots(case_id1, snapshot_id1, case_id2, snapshot_id2)\n \n df['jaccard'] = df.apply(lambda row: jaccard(row['docs_x'], row['docs_y'], 10), axis=1)\n df['rbo'] = df.apply(lambda row: rbo.RankingSimilarity(row['docs_x'], row['docs_y']).rbo(), axis=1)\n df['score_delta'] = df['score_y'] - df['score_x']\n df.name = f\"Case {case_id1} snapshot {snapshot_id1} vs. case {case_id1} snapshot {snapshot_id2}\"\n return df\n\n\n\nawait compare(case_id1=6789, snapshot_id1=2471, case_id2=6789, snapshot_id2=2472)", - "metadata": { - "trusted": true - }, - "execution_count": 6, - "outputs": [ - { - "name": "stderr", - "text": "/lib/python3.11/site-packages/rbo/rbo.py:129: TqdmMonitorWarning: tqdm:disabling monitor support (monitor_interval = 0) due to:\ncan't start new thread\n for d in tqdm(range(1, k), disable=~self.verbose):\n", - "output_type": "stream" - }, - { - "execution_count": 6, - "output_type": "execute_result", - "data": { - "text/plain": " num_results_x score_x \\\nquery \nprojector screen 1 1.0 \nnotebook 1 1.0 \niphone 8 1 1.0 \nprinter 1 1.0 \ncomputer 1 1.0 \n... ... ... \nwindows 10 1 1.0 \nmicrowave 1 1.0 \nbluetooth speakers 1 1.0 \ncoffee 1 1.0 \nvans 1 1.0 \n\n docs_x \\\nquery \nprojector screen [1069226, 47471, 490523, 1229109, 1229118, 325... \nnotebook [3851056, 3959000, 1550833, 1684763, 1675257, ... \niphone 8 [2048598, 1648546, 79524888, 1857711, 3613408,... \nprinter [3849563, 2225354, 1569761, 798960, 377837, 13... \ncomputer [560468, 532095, 560475, 523407, 693956, 56047... \n... ... \nwindows 10 [4481689, 3902727, 1560529, 1797902, 3155116, ... \nmicrowave [79513345, 4020048, 1768856, 2936032] \nbluetooth speakers [1993197, 3537784, 279672, 2663204, 558184, 33... \ncoffee [1996660, 2102472, 79583150, 1357989, 656359, ... \nvans [78503576, 79118095, 77388459, 78322005, 79013... \n\n num_results_y score_y \\\nquery \nprojector screen 1 1.0 \nnotebook 1 1.0 \niphone 8 1 1.0 \nprinter 1 1.0 \ncomputer 1 1.0 \n... ... ... \nwindows 10 1 1.0 \nmicrowave 1 1.0 \nbluetooth speakers 1 1.0 \ncoffee 1 1.0 \nvans 1 1.0 \n\n docs_y \\\nquery \nprojector screen [1069226, 47471, 490523, 1229109, 1229118, 325... \nnotebook [3851056, 3959000, 1550833, 1684763, 1675257, ... \niphone 8 [2048598, 1648546, 79524888, 1857711, 3613408,... \nprinter [3849563, 2225354, 1569761, 798960, 377837, 13... \ncomputer [560468, 532095, 560475, 523407, 693956, 56047... \n... ... \nwindows 10 [4481689, 3902727, 1560529, 1797902, 3155116, ... \nmicrowave [79513345, 4020048, 1768856, 2936032] \nbluetooth speakers [1993197, 3537784, 279672, 2663204, 558184, 33... \ncoffee [1996660, 2102472, 79583150, 1357989, 656359, ... \nvans [78503576, 79118095, 77388459, 78322005, 79013... \n\n jaccard rbo score_delta \nquery \nprojector screen 1.0 1.0 0.0 \nnotebook 1.0 1.0 0.0 \niphone 8 1.0 1.0 0.0 \nprinter 1.0 1.0 0.0 \ncomputer 1.0 1.0 0.0 \n... ... ... ... \nwindows 10 1.0 1.0 0.0 \nmicrowave 1.0 1.0 0.0 \nbluetooth speakers 1.0 1.0 0.0 \ncoffee 1.0 1.0 0.0 \nvans 1.0 1.0 0.0 \n\n[135 rows x 9 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
num_results_xscore_xdocs_xnum_results_yscore_ydocs_yjaccardrboscore_delta
query
projector screen11.0[1069226, 47471, 490523, 1229109, 1229118, 325...11.0[1069226, 47471, 490523, 1229109, 1229118, 325...1.01.00.0
notebook11.0[3851056, 3959000, 1550833, 1684763, 1675257, ...11.0[3851056, 3959000, 1550833, 1684763, 1675257, ...1.01.00.0
iphone 811.0[2048598, 1648546, 79524888, 1857711, 3613408,...11.0[2048598, 1648546, 79524888, 1857711, 3613408,...1.01.00.0
printer11.0[3849563, 2225354, 1569761, 798960, 377837, 13...11.0[3849563, 2225354, 1569761, 798960, 377837, 13...1.01.00.0
computer11.0[560468, 532095, 560475, 523407, 693956, 56047...11.0[560468, 532095, 560475, 523407, 693956, 56047...1.01.00.0
..............................
windows 1011.0[4481689, 3902727, 1560529, 1797902, 3155116, ...11.0[4481689, 3902727, 1560529, 1797902, 3155116, ...1.01.00.0
microwave11.0[79513345, 4020048, 1768856, 2936032]11.0[79513345, 4020048, 1768856, 2936032]1.01.00.0
bluetooth speakers11.0[1993197, 3537784, 279672, 2663204, 558184, 33...11.0[1993197, 3537784, 279672, 2663204, 558184, 33...1.01.00.0
coffee11.0[1996660, 2102472, 79583150, 1357989, 656359, ...11.0[1996660, 2102472, 79583150, 1357989, 656359, ...1.01.00.0
vans11.0[78503576, 79118095, 77388459, 78322005, 79013...11.0[78503576, 79118095, 77388459, 78322005, 79013...1.01.00.0
\n

135 rows × 9 columns

\n
" - }, - "metadata": {} - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
num_resultsscoredocs
query
projector screen11.0[1069226, 47471, 490523, 1229109, 1229118, 325...
notebook11.0[3851056, 3959000, 1550833, 1684763, 1675257, ...
iphone 811.0[2048598, 1648546, 79524888, 1857711, 3613408,...
printer11.0[3849563, 2225354, 1569761, 798960, 377837, 13...
computer11.0[560468, 532095, 560475, 523407, 693956, 56047...
............
windows 1011.0[4481689, 3902727, 1560529, 1797902, 3155116, ...
microwave11.0[79513345, 4020048, 1768856, 2936032]
bluetooth speakers11.0[1993197, 3537784, 279672, 2663204, 558184, 33...
coffee11.0[1996660, 2102472, 79583150, 1357989, 656359, ...
vans11.0[78503576, 79118095, 77388459, 78322005, 79013...
\n", + "

135 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " num_results score \\\n", + "query \n", + "projector screen 1 1.0 \n", + "notebook 1 1.0 \n", + "iphone 8 1 1.0 \n", + "printer 1 1.0 \n", + "computer 1 1.0 \n", + "... ... ... \n", + "windows 10 1 1.0 \n", + "microwave 1 1.0 \n", + "bluetooth speakers 1 1.0 \n", + "coffee 1 1.0 \n", + "vans 1 1.0 \n", + "\n", + " docs \n", + "query \n", + "projector screen [1069226, 47471, 490523, 1229109, 1229118, 325... \n", + "notebook [3851056, 3959000, 1550833, 1684763, 1675257, ... \n", + "iphone 8 [2048598, 1648546, 79524888, 1857711, 3613408,... \n", + "printer [3849563, 2225354, 1569761, 798960, 377837, 13... \n", + "computer [560468, 532095, 560475, 523407, 693956, 56047... \n", + "... ... \n", + "windows 10 [4481689, 3902727, 1560529, 1797902, 3155116, ... \n", + "microwave [79513345, 4020048, 1768856, 2936032] \n", + "bluetooth speakers [1993197, 3537784, 279672, 2663204, 558184, 33... \n", + "coffee [1996660, 2102472, 79583150, 1357989, 656359, ... \n", + "vans [78503576, 79118095, 77388459, 78322005, 79013... \n", + "\n", + "[135 rows x 3 columns]" ] - }, - { - "cell_type": "code", - "source": "import matplotlib\nmatplotlib.rc_file_defaults()\n\ndef plot_compare(df):\n figure, axes = plt.subplots(1, 3, figsize=(10, 4))\n figure.suptitle(df.name)\n\n sns.barplot(ax=axes[0], x=df['score_delta'], y=df.index, width=0.3, color='darkgrey')\n axes[0].set(xlim=(-1, 1))\n axes[0].set_xlabel('Change in Score')\n axes[0].set_ylabel('')\n axes[0].set_facecolor((0.90, 0.90, 0.90))\n axes[0].grid(True)\n axes[0].spines['top'].set_visible(False)\n axes[0].spines['right'].set_visible(False)\n axes[0].spines['bottom'].set_visible(False)\n axes[0].spines['left'].set_visible(False)\n axes[0].set_axisbelow(True)\n axes[0].xaxis.grid(color='w', linestyle='solid')\n axes[0].yaxis.grid(color='w', linestyle='solid')\n \n sns.heatmap(df[['jaccard']], ax=axes[1], cmap='crest', annot=True, xticklabels=False, yticklabels=False)\n axes[1].set_xlabel('Jaccard Similiarity')\n axes[1].set_ylabel('')\n \n sns.heatmap(df[['rbo']], ax=axes[2], cmap='crest', annot=True, xticklabels=False, yticklabels=False)\n axes[2].set_xlabel('Rank Biased Overlap')\n axes[2].set_ylabel('')\n \n plt.show()\n \ndf = await compare(case_id1=6789, snapshot_id1=2471, case_id2=6789, snapshot_id2=2473)\n", - "metadata": { - "trusted": true - }, - "execution_count": 7, - "outputs": [] - }, + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load snapshot, return dict of queries and their (number of results, avg. score, [doc ids])\n", + "async def load_snapshot(case_id, snapshot_id):\n", + " snapshot = await get_json(f'/api/cases/{case_id}/snapshots/{snapshot_id}')\n", + " docs = snapshot['docs']\n", + " queries = snapshot['queries']\n", + " \n", + " # scores are a list of dicts, group them by query\n", + " scores_list = snapshot['scores']\n", + " scores = {}\n", + " for scores_dict in scores_list:\n", + " scores[scores_dict['query_id']] = scores_dict\n", + " \n", + " return pd.DataFrame({\n", + " \"num_results\": [scores[query['query_id']]['number_of_results'] for query in queries],\n", + " \"score\": [scores[query['query_id']]['score'] for query in queries],\n", + " \"docs\": [[doc['id'] for doc in docs[str(query['query_id'])] if doc['rated_only'] == False] for query in queries]\n", + " },\n", + " index=pd.Series(name='query', data=[query['query_text'] for query in queries])\n", + " )\n", + "\n", + "await load_snapshot(case_id=6789, snapshot_id=2471)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"TQDM_DISABLE\"] = \"1\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "source": "## Overall Jaccard and RBO Scores", - "metadata": {} + "name": "stderr", + "output_type": "stream", + "text": [ + "/lib/python3.11/site-packages/rbo/rbo.py:129: TqdmMonitorWarning: tqdm:disabling monitor support (monitor_interval = 0) due to:\n", + "can't start new thread\n", + " for d in tqdm(range(1, k), disable=~self.verbose):\n" + ] }, { - "cell_type": "code", - "source": "print(f\"Overall Jaccard Score: {df['jaccard'].mean()}\\nOverall RBO Score: {df['rbo'].mean()}\")", - "metadata": { - "trusted": true - }, - "execution_count": 8, - "outputs": [ - { - "name": "stdout", - "text": "Overall Jaccard Score: 1.0\nOverall RBO Score: 1.0\n", - "output_type": "stream" - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
num_results_xscore_xdocs_xnum_results_yscore_ydocs_yjaccardrboscore_delta
query
projector screen11.0[1069226, 47471, 490523, 1229109, 1229118, 325...11.0[1069226, 47471, 490523, 1229109, 1229118, 325...1.01.00.0
notebook11.0[3851056, 3959000, 1550833, 1684763, 1675257, ...11.0[3851056, 3959000, 1550833, 1684763, 1675257, ...1.01.00.0
iphone 811.0[2048598, 1648546, 79524888, 1857711, 3613408,...11.0[2048598, 1648546, 79524888, 1857711, 3613408,...1.01.00.0
printer11.0[3849563, 2225354, 1569761, 798960, 377837, 13...11.0[3849563, 2225354, 1569761, 798960, 377837, 13...1.01.00.0
computer11.0[560468, 532095, 560475, 523407, 693956, 56047...11.0[560468, 532095, 560475, 523407, 693956, 56047...1.01.00.0
..............................
windows 1011.0[4481689, 3902727, 1560529, 1797902, 3155116, ...11.0[4481689, 3902727, 1560529, 1797902, 3155116, ...1.01.00.0
microwave11.0[79513345, 4020048, 1768856, 2936032]11.0[79513345, 4020048, 1768856, 2936032]1.01.00.0
bluetooth speakers11.0[1993197, 3537784, 279672, 2663204, 558184, 33...11.0[1993197, 3537784, 279672, 2663204, 558184, 33...1.01.00.0
coffee11.0[1996660, 2102472, 79583150, 1357989, 656359, ...11.0[1996660, 2102472, 79583150, 1357989, 656359, ...1.01.00.0
vans11.0[78503576, 79118095, 77388459, 78322005, 79013...11.0[78503576, 79118095, 77388459, 78322005, 79013...1.01.00.0
\n", + "

135 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " num_results_x score_x \\\n", + "query \n", + "projector screen 1 1.0 \n", + "notebook 1 1.0 \n", + "iphone 8 1 1.0 \n", + "printer 1 1.0 \n", + "computer 1 1.0 \n", + "... ... ... \n", + "windows 10 1 1.0 \n", + "microwave 1 1.0 \n", + "bluetooth speakers 1 1.0 \n", + "coffee 1 1.0 \n", + "vans 1 1.0 \n", + "\n", + " docs_x \\\n", + "query \n", + "projector screen [1069226, 47471, 490523, 1229109, 1229118, 325... \n", + "notebook [3851056, 3959000, 1550833, 1684763, 1675257, ... \n", + "iphone 8 [2048598, 1648546, 79524888, 1857711, 3613408,... \n", + "printer [3849563, 2225354, 1569761, 798960, 377837, 13... \n", + "computer [560468, 532095, 560475, 523407, 693956, 56047... \n", + "... ... \n", + "windows 10 [4481689, 3902727, 1560529, 1797902, 3155116, ... \n", + "microwave [79513345, 4020048, 1768856, 2936032] \n", + "bluetooth speakers [1993197, 3537784, 279672, 2663204, 558184, 33... \n", + "coffee [1996660, 2102472, 79583150, 1357989, 656359, ... \n", + "vans [78503576, 79118095, 77388459, 78322005, 79013... \n", + "\n", + " num_results_y score_y \\\n", + "query \n", + "projector screen 1 1.0 \n", + "notebook 1 1.0 \n", + "iphone 8 1 1.0 \n", + "printer 1 1.0 \n", + "computer 1 1.0 \n", + "... ... ... \n", + "windows 10 1 1.0 \n", + "microwave 1 1.0 \n", + "bluetooth speakers 1 1.0 \n", + "coffee 1 1.0 \n", + "vans 1 1.0 \n", + "\n", + " docs_y \\\n", + "query \n", + "projector screen [1069226, 47471, 490523, 1229109, 1229118, 325... \n", + "notebook [3851056, 3959000, 1550833, 1684763, 1675257, ... \n", + "iphone 8 [2048598, 1648546, 79524888, 1857711, 3613408,... \n", + "printer [3849563, 2225354, 1569761, 798960, 377837, 13... \n", + "computer [560468, 532095, 560475, 523407, 693956, 56047... \n", + "... ... \n", + "windows 10 [4481689, 3902727, 1560529, 1797902, 3155116, ... \n", + "microwave [79513345, 4020048, 1768856, 2936032] \n", + "bluetooth speakers [1993197, 3537784, 279672, 2663204, 558184, 33... \n", + "coffee [1996660, 2102472, 79583150, 1357989, 656359, ... \n", + "vans [78503576, 79118095, 77388459, 78322005, 79013... \n", + "\n", + " jaccard rbo score_delta \n", + "query \n", + "projector screen 1.0 1.0 0.0 \n", + "notebook 1.0 1.0 0.0 \n", + "iphone 8 1.0 1.0 0.0 \n", + "printer 1.0 1.0 0.0 \n", + "computer 1.0 1.0 0.0 \n", + "... ... ... ... \n", + "windows 10 1.0 1.0 0.0 \n", + "microwave 1.0 1.0 0.0 \n", + "bluetooth speakers 1.0 1.0 0.0 \n", + "coffee 1.0 1.0 0.0 \n", + "vans 1.0 1.0 0.0 \n", + "\n", + "[135 rows x 9 columns]" ] - }, + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def jaccard(l1, l2, max_n):\n", + " if len(l1) == 0 and len(l2) == 0:\n", + " return 1\n", + " max_len = min(len(l1), len(l2), max_n)\n", + " set1 = set(l1[:max_len])\n", + " set2 = set(l2[:max_len])\n", + " intersection = len(set1.intersection(set2))\n", + " union = len(set1) + len(set2) - intersection\n", + " return float(intersection) / union\n", + "\n", + "async def load_snapshots(case_id1, snapshot_id1, case_id2, snapshot_id2):\n", + " df_a = await load_snapshot(case_id1, snapshot_id1)\n", + " df_b = await load_snapshot(case_id2, snapshot_id2)\n", + " return df_a.merge(df_b, on='query')\n", + "\n", + "async def compare(case_id1, snapshot_id1, case_id2, snapshot_id2):\n", + " df = await load_snapshots(case_id1, snapshot_id1, case_id2, snapshot_id2)\n", + " \n", + " df['jaccard'] = df.apply(lambda row: jaccard(row['docs_x'], row['docs_y'], 10), axis=1)\n", + " df['rbo'] = df.apply(lambda row: rbo.RankingSimilarity(row['docs_x'], row['docs_y']).rbo(), axis=1)\n", + " df['score_delta'] = df['score_y'] - df['score_x']\n", + " df.name = f\"Case {case_id1} snapshot {snapshot_id1} vs. case {case_id1} snapshot {snapshot_id2}\"\n", + " return df\n", + "\n", + "\n", + "\n", + "await compare(case_id1=6789, snapshot_id1=2471, case_id2=6789, snapshot_id2=2472)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib\n", + "matplotlib.rc_file_defaults()\n", + "\n", + "def plot_compare(df):\n", + " figure, axes = plt.subplots(1, 3, figsize=(10, 4))\n", + " figure.suptitle(df.name)\n", + "\n", + " sns.barplot(ax=axes[0], x=df['score_delta'], y=df.index, width=0.3, color='darkgrey')\n", + " axes[0].set(xlim=(-1, 1))\n", + " axes[0].set_xlabel('Change in Score')\n", + " axes[0].set_ylabel('')\n", + " axes[0].set_facecolor((0.90, 0.90, 0.90))\n", + " axes[0].grid(True)\n", + " axes[0].spines['top'].set_visible(False)\n", + " axes[0].spines['right'].set_visible(False)\n", + " axes[0].spines['bottom'].set_visible(False)\n", + " axes[0].spines['left'].set_visible(False)\n", + " axes[0].set_axisbelow(True)\n", + " axes[0].xaxis.grid(color='w', linestyle='solid')\n", + " axes[0].yaxis.grid(color='w', linestyle='solid')\n", + " \n", + " sns.heatmap(df[['jaccard']], ax=axes[1], cmap='crest', annot=True, xticklabels=False, yticklabels=False)\n", + " axes[1].set_xlabel('Jaccard Similiarity')\n", + " axes[1].set_ylabel('')\n", + " \n", + " sns.heatmap(df[['rbo']], ax=axes[2], cmap='crest', annot=True, xticklabels=False, yticklabels=False)\n", + " axes[2].set_xlabel('Rank Biased Overlap')\n", + " axes[2].set_ylabel('')\n", + " \n", + " plt.show()\n", + " \n", + "df = await compare(case_id1=6789, snapshot_id1=2471, case_id2=6789, snapshot_id2=2473)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overall Jaccard and RBO Scores" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "source": "## Query Level Jaccard and RBO Scores", - "metadata": {} - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Overall Jaccard Score: 1.0\n", + "Overall RBO Score: 1.0\n" + ] + } + ], + "source": [ + "print(f\"Overall Jaccard Score: {df['jaccard'].mean()}\\nOverall RBO Score: {df['rbo'].mean()}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query Level Jaccard and RBO Scores" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "plot_compare(df)", - "metadata": { - "trusted": true - }, - "execution_count": 9, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/png": "\n" - }, - "metadata": {} - } + "data": { + "image/png": "\n", + "text/plain": [ + "
" ] - }, - { - "cell_type": "markdown", - "source": "_This notebook was last updated 19-FEB-2024_", - "metadata": {} - }, - { - "cell_type": "code", - "source": "", - "metadata": {}, - "execution_count": null, - "outputs": [] + }, + "metadata": {}, + "output_type": "display_data" } - ] -} \ No newline at end of file + ], + "source": [ + "plot_compare(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_This notebook was last updated 19-FEB-2024_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/jupyterlite/files/examples/Multiple Raters Analysis.ipynb b/jupyterlite/files/examples/Multiple Raters Analysis.ipynb index 53c3283..990138e 100644 --- a/jupyterlite/files/examples/Multiple Raters Analysis.ipynb +++ b/jupyterlite/files/examples/Multiple Raters Analysis.ipynb @@ -1,581 +1,2532 @@ { - "metadata": { - "kernelspec": { - "name": "python", - "display_name": "Python (Pyodide)", - "language": "python" - }, - "language_info": { - "codemirror_mode": { - "name": "python", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8" - } + "cells": [ + { + "cell_type": "markdown", + "id": "5aa3a7af", + "metadata": {}, + "source": [ + "# Analysis of Judgements with Multiple Raters\n", + "\n", + "This notebook is an example of how we can analyze query / document pairs judgements in the case of multiple judges.\n", + "\n", + "**Why?**\n", + "When we have one single rater for our judgements, we have no other choice than trusting the rating of that judge. However when we have multiple judges rating the same pairs, this gives us much more information about the actual rating of the pair. For example if all judges disagree, it might be a good indicator that the pair is hard to judge, for example because the intent is not 100% clear. Conversely, if all judges aree, we can have much higher confidence in this rating.\n", + "\n", + "**Goal**\n", + "Analyze agreement between raters to measure confidence about each rating, in particular we would like to:\n", + "\n", + " - analyze the distribution of grades\n", + " - measure raters's consistency\n", + " - identify suspicious ratings in other to clean the dataset and make sure we only keep for which we have enough confidence in the rating.\n", + "\n", + "\n", + "**Data**\n", + "For this we need a dataset with following information:\n", + " \n", + " - pairs \n", + " - Each pair is rated by 3 different judges. In this example we consider a rating grade between 0 and 3:\n", + " - `0`: document relevance is **poor**\n", + " - `1`: document relevance is **fair**\n", + " - `2`: document relevance is **good**\n", + " - `3`: document relevance is **perfect**\n", + " \n", + "The analysis would also work with other grade scale with minor changes in the code.\n", + "\n", + "To get this data, we will directly get judgement book from Quepid, via the API. Any external data could also be used.\n", + "\n", + " \n" + ] }, - "nbformat_minor": 5, - "nbformat": 4, - "cells": [ - { - "cell_type": "markdown", - "source": "# Analysis of Judgements with Multiple Raters\n\nThis notebook is an example of how we can analyze query / document pairs judgements in the case of multiple judges.\n\n**Why?**\nWhen we have one single rater for our judgements, we have no other choice than trusting the rating of that judge. However when we have multiple judges rating the same pairs, this gives us much more information about the actual rating of the pair. For example if all judges disagree, it might be a good indicator that the pair is hard to judge, for example because the intent is not 100% clear. Conversely, if all judges aree, we can have much higher confidence in this rating.\n\n**Goal**\nAnalyze agreement between raters to measure confidence about each rating, in particular we would like to:\n\n - analyze the distribution of grades\n - measure raters's consistency\n - identify suspicious ratings in other to clean the dataset and make sure we only keep for which we have enough confidence in the rating.\n\n\n**Data**\nFor this we need a dataset with following information:\n \n - pairs \n - Each pair is rated by 3 different judges. In this example we consider a rating grade between 0 and 3:\n - `0`: document relevance is **poor**\n - `1`: document relevance is **fair**\n - `2`: document relevance is **good**\n - `3`: document relevance is **perfect**\n \nThe analysis would also work with other grade scale with minor changes in the code.\n\nTo get this data, we will directly get judgement book from Quepid, via the API. Any external data could also be used.\n\n \n", - "metadata": {}, - "id": "5aa3a7af" - }, - { - "cell_type": "markdown", - "source": "## Setup\nBasic libraries needed in this analysis", - "metadata": {}, - "id": "77cc28f8" - }, - { - "cell_type": "code", - "source": "import pandas as pd\nimport numpy as np\nimport matplotlib.pylab as plt\nimport random\nimport string\nfrom js import fetch\nfrom datetime import datetime\nimport random\nfrom matplotlib import pyplot\nfrom io import StringIO\n%matplotlib inline", - "metadata": { - "trusted": true - }, - "execution_count": 1, - "outputs": [], - "id": "ce97f3d0" - }, - { - "cell_type": "code", - "source": "ratings = ['0-Bad', '1-Fair', '2-Good', '3-Perfect']", - "metadata": { - "trusted": true - }, - "execution_count": 2, - "outputs": [], - "id": "d985176c-95c1-4d29-bc03-06f03cf77ed4" - }, - { - "cell_type": "markdown", - "source": "## Get Ratings From Books API", - "metadata": {}, - "id": "b57a896c-416b-4992-980d-2870de7cc120" - }, - { - "cell_type": "code", - "source": "# You need to get your book_id from Quepid UI. You should be able to see it's content if you open /api/books/1.json\nBOOK_ID = 25", - "metadata": { - "trusted": true - }, - "execution_count": 3, - "outputs": [], - "id": "ed1f99f3-ff72-4e9d-81cd-3afef204cbef" - }, - { - "cell_type": "code", - "source": "# Get content of the book in CSV format (could also use JSON)\nres = await fetch(f'/api/books/{BOOK_ID}.csv')\nres_str = await res.text()\ndf = pd.read_csv(StringIO(res_str))\ndf", - "metadata": { - "trusted": true - }, - "execution_count": 66, - "outputs": [ - { - "execution_count": 66, - "output_type": "execute_result", - "data": { - "text/plain": " query docid charlie@flax.co.uk \\\n0 projector screen 325961 NaN \n1 projector screen 47471 NaN \n2 projector screen 126679 NaN \n3 projector screen 254441 NaN \n4 projector screen 325958 NaN \n... ... ... ... \n2415 power supply 1667352 NaN \n2416 power supply 1667804 NaN \n2417 power supply 1667752 NaN \n2418 power supply 1667821 NaN \n2419 power supply 1667357 NaN \n\n epugh@opensourceconnections.com eschramma@cas.org dtaivpp@gmail.com \\\n0 3.0 NaN 3.0 \n1 3.0 NaN 3.0 \n2 3.0 NaN 3.0 \n3 3.0 NaN NaN \n4 3.0 NaN NaN \n... ... ... ... \n2415 0.0 NaN NaN \n2416 0.0 NaN NaN \n2417 0.0 NaN NaN \n2418 0.0 NaN NaN \n2419 0.0 NaN NaN \n\n aarora@opensourceconnections.com cmcollier@gmail.com \\\n0 NaN NaN \n1 NaN NaN \n2 NaN NaN \n3 NaN NaN \n4 NaN NaN \n... ... ... \n2415 NaN NaN \n2416 NaN NaN \n2417 NaN NaN \n2418 NaN NaN \n2419 NaN NaN \n\n ben.w.trent@gmail.com jeff@vin.com cmarino@enterprise-knowledge.com \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n msfroh@gmail.com peter@searchintuition.com maximilian.werk@jina.ai \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n ryan.finley@ferguson.com \n0 NaN \n1 NaN \n2 NaN \n3 NaN \n4 NaN \n... ... \n2415 NaN \n2416 NaN \n2417 NaN \n2418 NaN \n2419 NaN \n\n[2420 rows x 15 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidcharlie@flax.co.ukepugh@opensourceconnections.comeschramma@cas.orgdtaivpp@gmail.comaarora@opensourceconnections.comcmcollier@gmail.comben.w.trent@gmail.comjeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.com
0projector screen325961NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
1projector screen47471NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
2projector screen126679NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
3projector screen254441NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4projector screen325958NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
................................................
2415power supply1667352NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2416power supply1667804NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2417power supply1667752NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2418power supply1667821NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2419power supply1667357NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n

2420 rows × 15 columns

\n
" - }, - "metadata": {} - } - ], - "id": "30ce3dc2-c049-436c-8049-58c0d8f4e46f" - }, - { - "cell_type": "code", - "source": "if df.shape[0] == 0:\n print('It Looks like your book is empty or does not exists')", - "metadata": { - "trusted": true - }, - "execution_count": 28, - "outputs": [], - "id": "98ad3844-d67f-44a2-8bae-034223de6c68" - }, - { - "cell_type": "code", - "source": "#df.dropna(inplace=True)\ndf.shape", - "metadata": { - "trusted": true - }, - "execution_count": 6, - "outputs": [ - { - "execution_count": 6, - "output_type": "execute_result", - "data": { - "text/plain": "(0, 15)" - }, - "metadata": {} - } - ], - "id": "79ce92af-e034-4e7d-bc8d-3a96c0fcd14f" - }, - { - "cell_type": "code", - "source": "df.loc[df['docid'] == '325961']", - "metadata": { - "trusted": true - }, - "execution_count": 29, - "outputs": [ - { - "execution_count": 29, - "output_type": "execute_result", - "data": { - "text/plain": "Empty DataFrame\nColumns: [query, docid, charlie@flax.co.uk, epugh@opensourceconnections.com, eschramma@cas.org, dtaivpp@gmail.com, aarora@opensourceconnections.com, cmcollier@gmail.com, ben.w.trent@gmail.com, jeff@vin.com, cmarino@enterprise-knowledge.com, msfroh@gmail.com, peter@searchintuition.com, maximilian.werk@jina.ai, ryan.finley@ferguson.com]\nIndex: []", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidcharlie@flax.co.ukepugh@opensourceconnections.comeschramma@cas.orgdtaivpp@gmail.comaarora@opensourceconnections.comcmcollier@gmail.comben.w.trent@gmail.comjeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.com
\n
" - }, - "metadata": {} - } - ], - "id": "25006e0e-2013-4384-a34f-8fa920579614" - }, - { - "cell_type": "code", - "source": "raters = list(df.columns[2:])\nraters", - "metadata": { - "trusted": true - }, - "execution_count": 30, - "outputs": [ - { - "execution_count": 30, - "output_type": "execute_result", - "data": { - "text/plain": "['charlie@flax.co.uk',\n 'epugh@opensourceconnections.com',\n 'eschramma@cas.org',\n 'dtaivpp@gmail.com',\n 'aarora@opensourceconnections.com',\n 'cmcollier@gmail.com',\n 'ben.w.trent@gmail.com',\n 'jeff@vin.com',\n 'cmarino@enterprise-knowledge.com',\n 'msfroh@gmail.com',\n 'peter@searchintuition.com',\n 'maximilian.werk@jina.ai',\n 'ryan.finley@ferguson.com']" - }, - "metadata": {} - } - ], - "id": "72d4481e-ae12-4fff-bbbd-1888a894f69a" - }, - { - "cell_type": "code", - "source": "# We need to filter to raters that we THINK might have some overlap\nraters = [\n 'epugh@opensourceconnections.com',\n 'aarora@opensourceconnections.com',\n 'ben.w.trent@gmail.com'\n]", - "metadata": { - "trusted": true - }, - "execution_count": 67, - "outputs": [], - "id": "dcd60629-44fc-4122-95dd-98fe2558489d" - }, - { - "cell_type": "code", - "source": "nb_raters = len(raters)", - "metadata": { - "trusted": true - }, - "execution_count": 68, - "outputs": [], - "id": "4c1fc91f-cda6-4e76-8372-3062e6975adb" - }, - { - "cell_type": "markdown", - "source": "We just transform a bit the data so that it's easier to process:", - "metadata": {}, - "id": "d704b517-aea3-4177-bb49-88f8d57ce647" - }, + { + "cell_type": "markdown", + "id": "77cc28f8", + "metadata": {}, + "source": [ + "## Setup\n", + "Basic libraries needed in this analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ce97f3d0", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pylab as plt\n", + "import random\n", + "import string\n", + "from js import fetch\n", + "from datetime import datetime\n", + "import random\n", + "from matplotlib import pyplot\n", + "from io import StringIO\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d985176c-95c1-4d29-bc03-06f03cf77ed4", + "metadata": {}, + "outputs": [], + "source": [ + "ratings = ['0-Bad', '1-Fair', '2-Good', '3-Perfect']" + ] + }, + { + "cell_type": "markdown", + "id": "b57a896c-416b-4992-980d-2870de7cc120", + "metadata": {}, + "source": [ + "## Get Ratings From Books API" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ed1f99f3-ff72-4e9d-81cd-3afef204cbef", + "metadata": {}, + "outputs": [], + "source": [ + "# You need to get your book_id from Quepid UI. You should be able to see it's content if you open /api/books/1.json\n", + "BOOK_ID = 25" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "30ce3dc2-c049-436c-8049-58c0d8f4e46f", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "df.rename(columns={rn:f'rating_{i}' for i,rn in enumerate(raters)}, inplace=True)\nfor i, rn in enumerate(raters):\n df[f'rater_{i}'] = rn\ndf", - "metadata": { - "trusted": true - }, - "execution_count": 69, - "outputs": [ - { - "execution_count": 69, - "output_type": "execute_result", - "data": { - "text/plain": " query docid charlie@flax.co.uk rating_0 \\\n0 projector screen 325961 NaN 3.0 \n1 projector screen 47471 NaN 3.0 \n2 projector screen 126679 NaN 3.0 \n3 projector screen 254441 NaN 3.0 \n4 projector screen 325958 NaN 3.0 \n... ... ... ... ... \n2415 power supply 1667352 NaN 0.0 \n2416 power supply 1667804 NaN 0.0 \n2417 power supply 1667752 NaN 0.0 \n2418 power supply 1667821 NaN 0.0 \n2419 power supply 1667357 NaN 0.0 \n\n eschramma@cas.org dtaivpp@gmail.com rating_1 cmcollier@gmail.com \\\n0 NaN 3.0 NaN NaN \n1 NaN 3.0 NaN NaN \n2 NaN 3.0 NaN NaN \n3 NaN NaN NaN NaN \n4 NaN NaN NaN NaN \n... ... ... ... ... \n2415 NaN NaN NaN NaN \n2416 NaN NaN NaN NaN \n2417 NaN NaN NaN NaN \n2418 NaN NaN NaN NaN \n2419 NaN NaN NaN NaN \n\n rating_2 jeff@vin.com cmarino@enterprise-knowledge.com \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n msfroh@gmail.com peter@searchintuition.com maximilian.werk@jina.ai \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n ryan.finley@ferguson.com rater_0 \\\n0 NaN epugh@opensourceconnections.com \n1 NaN epugh@opensourceconnections.com \n2 NaN epugh@opensourceconnections.com \n3 NaN epugh@opensourceconnections.com \n4 NaN epugh@opensourceconnections.com \n... ... ... \n2415 NaN epugh@opensourceconnections.com \n2416 NaN epugh@opensourceconnections.com \n2417 NaN epugh@opensourceconnections.com \n2418 NaN epugh@opensourceconnections.com \n2419 NaN epugh@opensourceconnections.com \n\n rater_1 rater_2 \n0 aarora@opensourceconnections.com ben.w.trent@gmail.com \n1 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2 aarora@opensourceconnections.com ben.w.trent@gmail.com \n3 aarora@opensourceconnections.com ben.w.trent@gmail.com \n4 aarora@opensourceconnections.com ben.w.trent@gmail.com \n... ... ... \n2415 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2416 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2417 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2418 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2419 aarora@opensourceconnections.com ben.w.trent@gmail.com \n\n[2420 rows x 18 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidcharlie@flax.co.ukrating_0eschramma@cas.orgdtaivpp@gmail.comrating_1cmcollier@gmail.comrating_2jeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.comrater_0rater_1rater_2
0projector screen325961NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
1projector screen47471NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2projector screen126679NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
3projector screen254441NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
4projector screen325958NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
.........................................................
2415power supply1667352NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2416power supply1667804NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2417power supply1667752NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2418power supply1667821NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2419power supply1667357NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
\n

2420 rows × 18 columns

\n
" - }, - "metadata": {} - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
querydocidcharlie@flax.co.ukepugh@opensourceconnections.comeschramma@cas.orgdtaivpp@gmail.comaarora@opensourceconnections.comcmcollier@gmail.comben.w.trent@gmail.comjeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.com
0projector screen325961NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
1projector screen47471NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
2projector screen126679NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
3projector screen254441NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4projector screen325958NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
................................................
2415power supply1667352NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2416power supply1667804NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2417power supply1667752NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2418power supply1667821NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2419power supply1667357NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

2420 rows × 15 columns

\n", + "
" ], - "id": "e81fbcd6-f2aa-476b-9b81-2ae7a950fe99" - }, + "text/plain": [ + " query docid charlie@flax.co.uk \\\n", + "0 projector screen 325961 NaN \n", + "1 projector screen 47471 NaN \n", + "2 projector screen 126679 NaN \n", + "3 projector screen 254441 NaN \n", + "4 projector screen 325958 NaN \n", + "... ... ... ... \n", + "2415 power supply 1667352 NaN \n", + "2416 power supply 1667804 NaN \n", + "2417 power supply 1667752 NaN \n", + "2418 power supply 1667821 NaN \n", + "2419 power supply 1667357 NaN \n", + "\n", + " epugh@opensourceconnections.com eschramma@cas.org dtaivpp@gmail.com \\\n", + "0 3.0 NaN 3.0 \n", + "1 3.0 NaN 3.0 \n", + "2 3.0 NaN 3.0 \n", + "3 3.0 NaN NaN \n", + "4 3.0 NaN NaN \n", + "... ... ... ... \n", + "2415 0.0 NaN NaN \n", + "2416 0.0 NaN NaN \n", + "2417 0.0 NaN NaN \n", + "2418 0.0 NaN NaN \n", + "2419 0.0 NaN NaN \n", + "\n", + " aarora@opensourceconnections.com cmcollier@gmail.com \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... \n", + "2415 NaN NaN \n", + "2416 NaN NaN \n", + "2417 NaN NaN \n", + "2418 NaN NaN \n", + "2419 NaN NaN \n", + "\n", + " ben.w.trent@gmail.com jeff@vin.com cmarino@enterprise-knowledge.com \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "... ... ... ... \n", + "2415 NaN NaN NaN \n", + "2416 NaN NaN NaN \n", + "2417 NaN NaN NaN \n", + "2418 NaN NaN NaN \n", + "2419 NaN NaN NaN \n", + "\n", + " msfroh@gmail.com peter@searchintuition.com maximilian.werk@jina.ai \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "... ... ... ... \n", + "2415 NaN NaN NaN \n", + "2416 NaN NaN NaN \n", + "2417 NaN NaN NaN \n", + "2418 NaN NaN NaN \n", + "2419 NaN NaN NaN \n", + "\n", + " ryan.finley@ferguson.com \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "2415 NaN \n", + "2416 NaN \n", + "2417 NaN \n", + "2418 NaN \n", + "2419 NaN \n", + "\n", + "[2420 rows x 15 columns]" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get content of the book in CSV format (could also use JSON)\n", + "res = await fetch(f'/api/books/{BOOK_ID}.csv')\n", + "res_str = await res.text()\n", + "df = pd.read_csv(StringIO(res_str))\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "98ad3844-d67f-44a2-8bae-034223de6c68", + "metadata": {}, + "outputs": [], + "source": [ + "if df.shape[0] == 0:\n", + " print('It Looks like your book is empty or does not exists')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "79ce92af-e034-4e7d-bc8d-3a96c0fcd14f", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "source": "We flatten the data to have 1 rating per row:", - "metadata": {}, - "id": "c0396b4d-eb54-4786-939e-b8ad69d335ab" - }, + "data": { + "text/plain": [ + "(0, 15)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#df.dropna(inplace=True)\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "25006e0e-2013-4384-a34f-8fa920579614", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "df_overall = pd.concat([\n df[['query','docid',f'rating_{i}', f'rater_{i}']].rename(\n columns={f'rating_{i}':'rating', f'rater_{i}':'rater'}) for i in range(nb_raters)]).reset_index(drop=True)\ndf_overall.sort_values(by=['docid'], inplace=True)\ndf_overall", - "metadata": { - "trusted": true - }, - "execution_count": 70, - "outputs": [ - { - "execution_count": 70, - "output_type": "execute_result", - "data": { - "text/plain": " query docid rating rater\n4063 iphone 11 1423 NaN aarora@opensourceconnections.com\n6483 iphone 11 1423 NaN ben.w.trent@gmail.com\n1643 iphone 11 1423 NaN epugh@opensourceconnections.com\n4065 iphone 11 1424 NaN aarora@opensourceconnections.com\n6485 iphone 11 1424 NaN ben.w.trent@gmail.com\n... ... ... ... ...\n2383 windows 10 79583170 NaN epugh@opensourceconnections.com\n4803 windows 10 79583170 NaN aarora@opensourceconnections.com\n5879 samsung 79659021 NaN ben.w.trent@gmail.com\n3459 samsung 79659021 3.0 aarora@opensourceconnections.com\n1039 samsung 79659021 3.0 epugh@opensourceconnections.com\n\n[7260 rows x 4 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidratingrater
4063iphone 111423NaNaarora@opensourceconnections.com
6483iphone 111423NaNben.w.trent@gmail.com
1643iphone 111423NaNepugh@opensourceconnections.com
4065iphone 111424NaNaarora@opensourceconnections.com
6485iphone 111424NaNben.w.trent@gmail.com
...............
2383windows 1079583170NaNepugh@opensourceconnections.com
4803windows 1079583170NaNaarora@opensourceconnections.com
5879samsung79659021NaNben.w.trent@gmail.com
3459samsung796590213.0aarora@opensourceconnections.com
1039samsung796590213.0epugh@opensourceconnections.com
\n

7260 rows × 4 columns

\n
" - }, - "metadata": {} - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
querydocidcharlie@flax.co.ukepugh@opensourceconnections.comeschramma@cas.orgdtaivpp@gmail.comaarora@opensourceconnections.comcmcollier@gmail.comben.w.trent@gmail.comjeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.com
\n", + "
" ], - "id": "97b0bc0c-a20e-49b2-a65a-ef09eb7e6a58" - }, - { - "cell_type": "code", - "source": "df_overall.dropna(inplace=True)", - "metadata": { - "trusted": true - }, - "execution_count": 71, - "outputs": [], - "id": "d58d14a6-6bfb-4c77-8145-514c0030bc53" - }, - { - "cell_type": "markdown", - "source": "### Rating distribution per query\nHe we just want to plot the distribution of ratings for each query:\n", - "metadata": {}, - "id": "ff2dcb29-e376-4621-9ecf-f3374e71f464" - }, + "text/plain": [ + "Empty DataFrame\n", + "Columns: [query, docid, charlie@flax.co.uk, epugh@opensourceconnections.com, eschramma@cas.org, dtaivpp@gmail.com, aarora@opensourceconnections.com, cmcollier@gmail.com, ben.w.trent@gmail.com, jeff@vin.com, cmarino@enterprise-knowledge.com, msfroh@gmail.com, peter@searchintuition.com, maximilian.werk@jina.ai, ryan.finley@ferguson.com]\n", + "Index: []" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[df['docid'] == '325961']" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "72d4481e-ae12-4fff-bbbd-1888a894f69a", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "df_overall[['query', 'rating']].groupby('query').agg(['count', 'mean', 'std'])", - "metadata": { - "trusted": true - }, - "execution_count": 72, - "outputs": [ - { - "execution_count": 72, - "output_type": "execute_result", - "data": { - "text/plain": " rating \n count mean std\nquery \n120v power supply 2 0.000000 0.000000\naa 10 1.800000 1.549193\naa battery 10 1.800000 1.135292\naaa 7 1.285714 1.603567\nadapter 10 1.300000 0.674949\n... ... ... ...\nwireless headphones 5 0.800000 1.303840\nwireless mouse 14 2.000000 1.109400\nxbox 15 0.000000 0.000000\nxbox one 7 0.428571 0.786796\nyoutube 12 0.000000 0.000000\n\n[137 rows x 3 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
rating
countmeanstd
query
120v power supply20.0000000.000000
aa101.8000001.549193
aa battery101.8000001.135292
aaa71.2857141.603567
adapter101.3000000.674949
............
wireless headphones50.8000001.303840
wireless mouse142.0000001.109400
xbox150.0000000.000000
xbox one70.4285710.786796
youtube120.0000000.000000
\n

137 rows × 3 columns

\n
" - }, - "metadata": {} - } - ], - "id": "50d46a70" - }, + "data": { + "text/plain": [ + "['charlie@flax.co.uk',\n", + " 'epugh@opensourceconnections.com',\n", + " 'eschramma@cas.org',\n", + " 'dtaivpp@gmail.com',\n", + " 'aarora@opensourceconnections.com',\n", + " 'cmcollier@gmail.com',\n", + " 'ben.w.trent@gmail.com',\n", + " 'jeff@vin.com',\n", + " 'cmarino@enterprise-knowledge.com',\n", + " 'msfroh@gmail.com',\n", + " 'peter@searchintuition.com',\n", + " 'maximilian.werk@jina.ai',\n", + " 'ryan.finley@ferguson.com']" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raters = list(df.columns[2:])\n", + "raters" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "dcd60629-44fc-4122-95dd-98fe2558489d", + "metadata": {}, + "outputs": [], + "source": [ + "# We need to filter to raters that we THINK might have some overlap\n", + "raters = [\n", + " 'Eric Pugh',\n", + " 'Atita Arora',\n", + " 'Benjamin Trent'\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "4c1fc91f-cda6-4e76-8372-3062e6975adb", + "metadata": {}, + "outputs": [], + "source": [ + "nb_raters = len(raters)" + ] + }, + { + "cell_type": "markdown", + "id": "d704b517-aea3-4177-bb49-88f8d57ce647", + "metadata": {}, + "source": [ + "We just transform a bit the data so that it's easier to process:" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "e81fbcd6-f2aa-476b-9b81-2ae7a950fe99", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "fig, axes = plt.subplots()\nqueries = df_overall['query'].unique()\ndataset = [df_overall[df_overall['query'] == q][\"rating\"] for q in queries]\n\nnb_queries = len(queries)\n\naxes.violinplot(dataset = dataset, showmeans=True, bw_method=0.05)\naxes.set_xlabel('query')\naxes.set_ylabel('ratings')\naxes.yaxis.grid(True)\naxes.set_xticks(range(1,nb_queries+1))\naxes.set_xticklabels(queries,rotation=45)\nplt.title('Rating distribution per query')\nplt.show()", - "metadata": { - "trusted": true - }, - "execution_count": 73, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/png": "\n" - }, - "metadata": {} - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
querydocidcharlie@flax.co.ukrating_0eschramma@cas.orgdtaivpp@gmail.comrating_1cmcollier@gmail.comrating_2jeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.comrater_0rater_1rater_2
0projector screen325961NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
1projector screen47471NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2projector screen126679NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
3projector screen254441NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
4projector screen325958NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
.........................................................
2415power supply1667352NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2416power supply1667804NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2417power supply1667752NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2418power supply1667821NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
2419power supply1667357NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com
\n", + "

2420 rows × 18 columns

\n", + "
" ], - "id": "70b93e5d-425d-4925-97fd-1b062f7c373f" - }, - { - "cell_type": "markdown", - "source": "### Rating distribution per rater\n\nAssuming raters have seen the same pairs and that they had the same guidelines, their ratings should be overall calibrated. This is what we want to check here. \nWe could easily detect outlier raters that rate very different to others, for example because they haven't really understood the guidelines.", - "metadata": {}, - "id": "fa14bfdd-ea38-4fcc-9532-4019306410b1" - }, + "text/plain": [ + " query docid charlie@flax.co.uk rating_0 \\\n", + "0 projector screen 325961 NaN 3.0 \n", + "1 projector screen 47471 NaN 3.0 \n", + "2 projector screen 126679 NaN 3.0 \n", + "3 projector screen 254441 NaN 3.0 \n", + "4 projector screen 325958 NaN 3.0 \n", + "... ... ... ... ... \n", + "2415 power supply 1667352 NaN 0.0 \n", + "2416 power supply 1667804 NaN 0.0 \n", + "2417 power supply 1667752 NaN 0.0 \n", + "2418 power supply 1667821 NaN 0.0 \n", + "2419 power supply 1667357 NaN 0.0 \n", + "\n", + " eschramma@cas.org dtaivpp@gmail.com rating_1 cmcollier@gmail.com \\\n", + "0 NaN 3.0 NaN NaN \n", + "1 NaN 3.0 NaN NaN \n", + "2 NaN 3.0 NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN \n", + "... ... ... ... ... \n", + "2415 NaN NaN NaN NaN \n", + "2416 NaN NaN NaN NaN \n", + "2417 NaN NaN NaN NaN \n", + "2418 NaN NaN NaN NaN \n", + "2419 NaN NaN NaN NaN \n", + "\n", + " rating_2 jeff@vin.com cmarino@enterprise-knowledge.com \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "... ... ... ... \n", + "2415 NaN NaN NaN \n", + "2416 NaN NaN NaN \n", + "2417 NaN NaN NaN \n", + "2418 NaN NaN NaN \n", + "2419 NaN NaN NaN \n", + "\n", + " msfroh@gmail.com peter@searchintuition.com maximilian.werk@jina.ai \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "... ... ... ... \n", + "2415 NaN NaN NaN \n", + "2416 NaN NaN NaN \n", + "2417 NaN NaN NaN \n", + "2418 NaN NaN NaN \n", + "2419 NaN NaN NaN \n", + "\n", + " ryan.finley@ferguson.com rater_0 \\\n", + "0 NaN epugh@opensourceconnections.com \n", + "1 NaN epugh@opensourceconnections.com \n", + "2 NaN epugh@opensourceconnections.com \n", + "3 NaN epugh@opensourceconnections.com \n", + "4 NaN epugh@opensourceconnections.com \n", + "... ... ... \n", + "2415 NaN epugh@opensourceconnections.com \n", + "2416 NaN epugh@opensourceconnections.com \n", + "2417 NaN epugh@opensourceconnections.com \n", + "2418 NaN epugh@opensourceconnections.com \n", + "2419 NaN epugh@opensourceconnections.com \n", + "\n", + " rater_1 rater_2 \n", + "0 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "1 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "2 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "3 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "4 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "... ... ... \n", + "2415 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "2416 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "2417 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "2418 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "2419 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "\n", + "[2420 rows x 18 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.rename(columns={rn:f'rating_{i}' for i,rn in enumerate(raters)}, inplace=True)\n", + "for i, rn in enumerate(raters):\n", + " df[f'rater_{i}'] = rn\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "c0396b4d-eb54-4786-939e-b8ad69d335ab", + "metadata": {}, + "source": [ + "We flatten the data to have 1 rating per row:" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "97b0bc0c-a20e-49b2-a65a-ef09eb7e6a58", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "fig, axes = plt.subplots()\nraters = df_overall['rater'].unique()\ndataset = [df_overall[df_overall['rater'] == r][\"rating\"] for r in raters]\n\naxes.violinplot(dataset = dataset, showmeans=True, bw_method=0.05)\naxes.set_xlabel('rater')\naxes.set_ylabel('ratings')\naxes.yaxis.grid(True)\naxes.set_xticks(range(1,nb_raters+1))\naxes.set_xticklabels(raters)\nplt.title('Rating distribution per rater')\nplt.show()", - "metadata": { - "trusted": true - }, - "execution_count": 74, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/png": "\n" - }, - "metadata": {} - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
querydocidratingrater
4063iphone 111423NaNaarora@opensourceconnections.com
6483iphone 111423NaNben.w.trent@gmail.com
1643iphone 111423NaNepugh@opensourceconnections.com
4065iphone 111424NaNaarora@opensourceconnections.com
6485iphone 111424NaNben.w.trent@gmail.com
...............
2383windows 1079583170NaNepugh@opensourceconnections.com
4803windows 1079583170NaNaarora@opensourceconnections.com
5879samsung79659021NaNben.w.trent@gmail.com
3459samsung796590213.0aarora@opensourceconnections.com
1039samsung796590213.0epugh@opensourceconnections.com
\n", + "

7260 rows × 4 columns

\n", + "
" ], - "id": "f954846d-54a9-4cf1-a9da-ab0faaa46df9" - }, - { - "cell_type": "markdown", - "source": "## Ratings Analysis\n\nWe now focus on the ratings themselves. We first plot the overall rating distributions", - "metadata": {}, - "id": "6168e768" - }, - { - "cell_type": "markdown", - "source": "### Overall rating distribution", - "metadata": {}, - "id": "92f38adf" - }, + "text/plain": [ + " query docid rating rater\n", + "4063 iphone 11 1423 NaN aarora@opensourceconnections.com\n", + "6483 iphone 11 1423 NaN ben.w.trent@gmail.com\n", + "1643 iphone 11 1423 NaN epugh@opensourceconnections.com\n", + "4065 iphone 11 1424 NaN aarora@opensourceconnections.com\n", + "6485 iphone 11 1424 NaN ben.w.trent@gmail.com\n", + "... ... ... ... ...\n", + "2383 windows 10 79583170 NaN epugh@opensourceconnections.com\n", + "4803 windows 10 79583170 NaN aarora@opensourceconnections.com\n", + "5879 samsung 79659021 NaN ben.w.trent@gmail.com\n", + "3459 samsung 79659021 3.0 aarora@opensourceconnections.com\n", + "1039 samsung 79659021 3.0 epugh@opensourceconnections.com\n", + "\n", + "[7260 rows x 4 columns]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_overall = pd.concat([\n", + " df[['query','docid',f'rating_{i}', f'rater_{i}']].rename(\n", + " columns={f'rating_{i}':'rating', f'rater_{i}':'rater'}) for i in range(nb_raters)]).reset_index(drop=True)\n", + "df_overall.sort_values(by=['docid'], inplace=True)\n", + "df_overall" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "d58d14a6-6bfb-4c77-8145-514c0030bc53", + "metadata": {}, + "outputs": [], + "source": [ + "df_overall.dropna(inplace=True)" + ] + }, + { + "cell_type": "markdown", + "id": "ff2dcb29-e376-4621-9ecf-f3374e71f464", + "metadata": {}, + "source": [ + "### Rating distribution per query\n", + "He we just want to plot the distribution of ratings for each query:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "50d46a70", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "from matplotlib.ticker import MaxNLocator\nplt.hist(df_overall['rating'])\nplt.title('Overall ratings')\nplt.xticks(range(len(ratings)), ratings,\n rotation=60) \nNone\nplt.ylabel('nb_ratings')\nplt.gca().yaxis.set_major_locator(MaxNLocator(integer=True))\n\nplt.grid()", - "metadata": { - "trusted": true - }, - "execution_count": 75, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/png": "\n" - }, - "metadata": {} - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rating
countmeanstd
query
120v power supply20.0000000.000000
aa101.8000001.549193
aa battery101.8000001.135292
aaa71.2857141.603567
adapter101.3000000.674949
............
wireless headphones50.8000001.303840
wireless mouse142.0000001.109400
xbox150.0000000.000000
xbox one70.4285710.786796
youtube120.0000000.000000
\n", + "

137 rows × 3 columns

\n", + "
" ], - "id": "078955ba" - }, - { - "cell_type": "markdown", - "source": "### Agreements between raters\nIn this section we want to check how much raters agree or disagree.\n\n - If all raters agreeing on a rating will give us confidence about the rating\n - If all raters disagree, for example, 1 rating `Poor` another one rating `Fair` and the third one rating `Perfect`, then something draws our attention. Either the intent was not clear or raters did not rate as expected. ", - "metadata": {}, - "id": "cf354a46-d693-47e5-8148-fbef8e8d8b25" - }, + "text/plain": [ + " rating \n", + " count mean std\n", + "query \n", + "120v power supply 2 0.000000 0.000000\n", + "aa 10 1.800000 1.549193\n", + "aa battery 10 1.800000 1.135292\n", + "aaa 7 1.285714 1.603567\n", + "adapter 10 1.300000 0.674949\n", + "... ... ... ...\n", + "wireless headphones 5 0.800000 1.303840\n", + "wireless mouse 14 2.000000 1.109400\n", + "xbox 15 0.000000 0.000000\n", + "xbox one 7 0.428571 0.786796\n", + "youtube 12 0.000000 0.000000\n", + "\n", + "[137 rows x 3 columns]" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_overall[['query', 'rating']].groupby('query').agg(['count', 'mean', 'std'])" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "70b93e5d-425d-4925-97fd-1b062f7c373f", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "vals = []\nlabs = []\ncols = []\nfor agent,col in (('rating_0','blue'),\n ('rating_1','red'),\n ('rating_2', 'green')\n ):\n vals.append(df[agent])\n labs.append(agent)\n cols.append(col)\nplt.hist(vals, color=cols, label=labs)\n\nplt.legend()\nplt.title('Overall ratings')\nplt.xticks(range(len(ratings)), ratings, rotation=60) \nNone", - "metadata": { - "trusted": true - }, - "execution_count": 76, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/png": "\n" - }, - "metadata": {} - } - ], - "id": "cf6869d0-3498-4d44-9bf6-d55e580916d1" - }, + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axes = plt.subplots()\n", + "queries = df_overall['query'].unique()\n", + "dataset = [df_overall[df_overall['query'] == q][\"rating\"] for q in queries]\n", + "\n", + "nb_queries = len(queries)\n", + "\n", + "axes.violinplot(dataset = dataset, showmeans=True, bw_method=0.05)\n", + "axes.set_xlabel('query')\n", + "axes.set_ylabel('ratings')\n", + "axes.yaxis.grid(True)\n", + "axes.set_xticks(range(1,nb_queries+1))\n", + "axes.set_xticklabels(queries,rotation=45)\n", + "plt.title('Rating distribution per query')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "fa14bfdd-ea38-4fcc-9532-4019306410b1", + "metadata": {}, + "source": [ + "### Rating distribution per rater\n", + "\n", + "Assuming raters have seen the same pairs and that they had the same guidelines, their ratings should be overall calibrated. This is what we want to check here. \n", + "We could easily detect outlier raters that rate very different to others, for example because they haven't really understood the guidelines." + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "f954846d-54a9-4cf1-a9da-ab0faaa46df9", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "def nb_distinct_ratings(r):\n return len({r['rating_0'], r['rating_1'], r['rating_2']})\ndf['nb_distinct_ratings'] = df.apply(nb_distinct_ratings, axis=1)\ndf['nb_distinct_ratings'].hist()\nplt.xticks([1, 2, 3], ['3 Agree', '2 Agree', 'All disagree'],\n rotation=45) \nplt.ylabel('nb cases')\nplt.title('Agents agreements')", - "metadata": { - "trusted": true - }, - "execution_count": 77, - "outputs": [ - { - "execution_count": 77, - "output_type": "execute_result", - "data": { - "text/plain": "Text(0.5, 1.0, 'Agents agreements')" - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/png": "\n" - }, - "metadata": {} - } - ], - "id": "fbbb3ed9-dcab-4c20-8a08-fa689b62db5d" - }, + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axes = plt.subplots()\n", + "raters = df_overall['rater'].unique()\n", + "dataset = [df_overall[df_overall['rater'] == r][\"rating\"] for r in raters]\n", + "\n", + "axes.violinplot(dataset = dataset, showmeans=True, bw_method=0.05)\n", + "axes.set_xlabel('rater')\n", + "axes.set_ylabel('ratings')\n", + "axes.yaxis.grid(True)\n", + "axes.set_xticks(range(1,nb_raters+1))\n", + "axes.set_xticklabels(raters)\n", + "plt.title('Rating distribution per rater')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "6168e768", + "metadata": {}, + "source": [ + "## Ratings Analysis\n", + "\n", + "We now focus on the ratings themselves. We first plot the overall rating distributions" + ] + }, + { + "cell_type": "markdown", + "id": "92f38adf", + "metadata": {}, + "source": [ + "### Overall rating distribution" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "078955ba", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "source": "Some suspicious cases we will further investigate later on:", - "metadata": {}, - "id": "7fb17be1-df26-44fc-82d3-88c5255ca5ee" - }, + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from matplotlib.ticker import MaxNLocator\n", + "plt.hist(df_overall['rating'])\n", + "plt.title('Overall ratings')\n", + "plt.xticks(range(len(ratings)), ratings,\n", + " rotation=60) \n", + "None\n", + "plt.ylabel('nb_ratings')\n", + "plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True))\n", + "\n", + "plt.grid()" + ] + }, + { + "cell_type": "markdown", + "id": "cf354a46-d693-47e5-8148-fbef8e8d8b25", + "metadata": {}, + "source": [ + "### Agreements between raters\n", + "In this section we want to check how much raters agree or disagree.\n", + "\n", + " - If all raters agreeing on a rating will give us confidence about the rating\n", + " - If all raters disagree, for example, 1 rating `Poor` another one rating `Fair` and the third one rating `Perfect`, then something draws our attention. Either the intent was not clear or raters did not rate as expected. " + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "cf6869d0-3498-4d44-9bf6-d55e580916d1", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "print('All disagree:')\ndf[df['nb_distinct_ratings']==3]", - "metadata": { - "trusted": true - }, - "execution_count": 78, - "outputs": [ - { - "name": "stdout", - "text": "All disagree:\n", - "output_type": "stream" - }, - { - "execution_count": 78, - "output_type": "execute_result", - "data": { - "text/plain": " query docid charlie@flax.co.uk rating_0 \\\n0 projector screen 325961 NaN 3.0 \n1 projector screen 47471 NaN 3.0 \n2 projector screen 126679 NaN 3.0 \n3 projector screen 254441 NaN 3.0 \n4 projector screen 325958 NaN 3.0 \n... ... ... ... ... \n2415 power supply 1667352 NaN 0.0 \n2416 power supply 1667804 NaN 0.0 \n2417 power supply 1667752 NaN 0.0 \n2418 power supply 1667821 NaN 0.0 \n2419 power supply 1667357 NaN 0.0 \n\n eschramma@cas.org dtaivpp@gmail.com rating_1 cmcollier@gmail.com \\\n0 NaN 3.0 NaN NaN \n1 NaN 3.0 NaN NaN \n2 NaN 3.0 NaN NaN \n3 NaN NaN NaN NaN \n4 NaN NaN NaN NaN \n... ... ... ... ... \n2415 NaN NaN NaN NaN \n2416 NaN NaN NaN NaN \n2417 NaN NaN NaN NaN \n2418 NaN NaN NaN NaN \n2419 NaN NaN NaN NaN \n\n rating_2 jeff@vin.com cmarino@enterprise-knowledge.com \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n msfroh@gmail.com peter@searchintuition.com maximilian.werk@jina.ai \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n ryan.finley@ferguson.com rater_0 \\\n0 NaN epugh@opensourceconnections.com \n1 NaN epugh@opensourceconnections.com \n2 NaN epugh@opensourceconnections.com \n3 NaN epugh@opensourceconnections.com \n4 NaN epugh@opensourceconnections.com \n... ... ... \n2415 NaN epugh@opensourceconnections.com \n2416 NaN epugh@opensourceconnections.com \n2417 NaN epugh@opensourceconnections.com \n2418 NaN epugh@opensourceconnections.com \n2419 NaN epugh@opensourceconnections.com \n\n rater_1 rater_2 \\\n0 aarora@opensourceconnections.com ben.w.trent@gmail.com \n1 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2 aarora@opensourceconnections.com ben.w.trent@gmail.com \n3 aarora@opensourceconnections.com ben.w.trent@gmail.com \n4 aarora@opensourceconnections.com ben.w.trent@gmail.com \n... ... ... \n2415 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2416 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2417 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2418 aarora@opensourceconnections.com ben.w.trent@gmail.com \n2419 aarora@opensourceconnections.com ben.w.trent@gmail.com \n\n nb_distinct_ratings \n0 3 \n1 3 \n2 3 \n3 3 \n4 3 \n... ... \n2415 3 \n2416 3 \n2417 3 \n2418 3 \n2419 3 \n\n[2148 rows x 19 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidcharlie@flax.co.ukrating_0eschramma@cas.orgdtaivpp@gmail.comrating_1cmcollier@gmail.comrating_2jeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.comrater_0rater_1rater_2nb_distinct_ratings
0projector screen325961NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
1projector screen47471NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2projector screen126679NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
3projector screen254441NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
4projector screen325958NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
............................................................
2415power supply1667352NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2416power supply1667804NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2417power supply1667752NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2418power supply1667821NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2419power supply1667357NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
\n

2148 rows × 19 columns

\n
" - }, - "metadata": {} - } - ], - "id": "7d9b3876-17b5-42a0-97de-d9007b777aad" - }, + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "vals = []\n", + "labs = []\n", + "cols = []\n", + "for agent,col in (('rating_0','blue'),\n", + " ('rating_1','red'),\n", + " ('rating_2', 'green')\n", + " ):\n", + " vals.append(df[agent])\n", + " labs.append(agent)\n", + " cols.append(col)\n", + "plt.hist(vals, color=cols, label=labs)\n", + "\n", + "plt.legend()\n", + "plt.title('Overall ratings')\n", + "plt.xticks(range(len(ratings)), ratings, rotation=60) \n", + "None" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "fbbb3ed9-dcab-4c20-8a08-fa689b62db5d", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "source": "Some cases where all raters agree, we can have good confidence in the rating:", - "metadata": {}, - "id": "b4a60ac4-4c2b-4477-b7c9-285fb29f843c" + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Agents agreements')" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" }, { - "cell_type": "code", - "source": "# We have none that everyone agrees on\nprint('All agree:')\ndf[df['nb_distinct_ratings']==1].sample(5)", - "metadata": { - "trusted": true - }, - "execution_count": 79, - "outputs": [ - { - "name": "stdout", - "text": "All agree:\n", - "output_type": "stream" - }, - { - "ename": "", - "evalue": "a must be greater than 0 unless no samples are taken", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[79], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# We have none that everyone agrees on\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAll agree:\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnb_distinct_ratings\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/lib/python3.11/site-packages/pandas/core/generic.py:5773\u001b[0m, in \u001b[0;36mNDFrame.sample\u001b[0;34m(self, n, frac, replace, weights, random_state, axis, ignore_index)\u001b[0m\n\u001b[1;32m 5770\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m weights \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 5771\u001b[0m weights \u001b[38;5;241m=\u001b[39m sample\u001b[38;5;241m.\u001b[39mpreprocess_weights(\u001b[38;5;28mself\u001b[39m, weights, axis)\n\u001b[0;32m-> 5773\u001b[0m sampled_indices \u001b[38;5;241m=\u001b[39m \u001b[43msample\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweights\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5774\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(sampled_indices, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m 5776\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ignore_index:\n", - "File \u001b[0;32m/lib/python3.11/site-packages/pandas/core/sample.py:150\u001b[0m, in \u001b[0;36msample\u001b[0;34m(obj_len, size, replace, weights, random_state)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid weights: weights sum to zero\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchoice\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mp\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mastype(\n\u001b[1;32m 151\u001b[0m np\u001b[38;5;241m.\u001b[39mintp, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 152\u001b[0m )\n", - "File \u001b[0;32mmtrand.pyx:928\u001b[0m, in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: a must be greater than 0 unless no samples are taken" - ], - "output_type": "error" - } - ], - "id": "8b065a8e-c76c-4e8b-ab5e-e1ec25db6a60" - }, + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def nb_distinct_ratings(r):\n", + " return len({r['rating_0'], r['rating_1'], r['rating_2']})\n", + "df['nb_distinct_ratings'] = df.apply(nb_distinct_ratings, axis=1)\n", + "df['nb_distinct_ratings'].hist()\n", + "plt.xticks([1, 2, 3], ['3 Agree', '2 Agree', 'All disagree'],\n", + " rotation=45) \n", + "plt.ylabel('nb cases')\n", + "plt.title('Agents agreements')" + ] + }, + { + "cell_type": "markdown", + "id": "7fb17be1-df26-44fc-82d3-88c5255ca5ee", + "metadata": {}, + "source": [ + "Some suspicious cases we will further investigate later on:" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "7d9b3876-17b5-42a0-97de-d9007b777aad", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "source": "Not perfect but 2 / 3 raters agreed on the rating value:", - "metadata": {}, - "id": "294c4445-1c81-4c73-96ac-6001f3f83ff2" + "name": "stdout", + "output_type": "stream", + "text": [ + "All disagree:\n" + ] }, { - "cell_type": "code", - "source": "# We have none\nprint('Majority agree:')\ndf[df['nb_distinct_ratings']==2]", - "metadata": { - "trusted": true - }, - "execution_count": 80, - "outputs": [ - { - "name": "stdout", - "text": "Majority agree:\n", - "output_type": "stream" - }, - { - "execution_count": 80, - "output_type": "execute_result", - "data": { - "text/plain": " query docid charlie@flax.co.uk rating_0 \\\n6 projector screen 549808 NaN 3.0 \n19 laptop 77031393 NaN 3.0 \n20 iphone 8 79283963 NaN 0.0 \n21 iphone 8 79284190 NaN 0.0 \n24 iphone 8 77911774 NaN 0.0 \n... ... ... ... ... \n1330 coffee 656359 NaN 3.0 \n1331 coffee 77265396 NaN 2.0 \n1334 coffee 2102472 NaN 2.0 \n1340 vans 77129498 NaN 0.0 \n1342 vans 77388459 NaN 0.0 \n\n eschramma@cas.org dtaivpp@gmail.com rating_1 cmcollier@gmail.com \\\n6 NaN NaN 3.0 NaN \n19 NaN NaN NaN NaN \n20 NaN NaN NaN NaN \n21 NaN NaN NaN NaN \n24 NaN NaN 0.0 NaN \n... ... ... ... ... \n1330 NaN NaN 3.0 NaN \n1331 NaN NaN 2.0 NaN \n1334 NaN NaN NaN NaN \n1340 NaN NaN 0.0 NaN \n1342 NaN NaN NaN NaN \n\n rating_2 jeff@vin.com cmarino@enterprise-knowledge.com \\\n6 NaN NaN NaN \n19 3.0 NaN NaN \n20 0.0 NaN NaN \n21 0.0 NaN NaN \n24 NaN NaN NaN \n... ... ... ... \n1330 NaN NaN NaN \n1331 NaN NaN NaN \n1334 2.0 NaN NaN \n1340 NaN NaN NaN \n1342 0.0 NaN NaN \n\n msfroh@gmail.com peter@searchintuition.com maximilian.werk@jina.ai \\\n6 NaN NaN NaN \n19 NaN NaN NaN \n20 NaN NaN NaN \n21 NaN NaN NaN \n24 NaN NaN NaN \n... ... ... ... \n1330 NaN NaN NaN \n1331 NaN NaN NaN \n1334 NaN NaN NaN \n1340 NaN NaN NaN \n1342 NaN NaN NaN \n\n ryan.finley@ferguson.com rater_0 \\\n6 NaN epugh@opensourceconnections.com \n19 NaN epugh@opensourceconnections.com \n20 NaN epugh@opensourceconnections.com \n21 NaN epugh@opensourceconnections.com \n24 NaN epugh@opensourceconnections.com \n... ... ... \n1330 NaN epugh@opensourceconnections.com \n1331 NaN epugh@opensourceconnections.com \n1334 NaN epugh@opensourceconnections.com \n1340 NaN epugh@opensourceconnections.com \n1342 NaN epugh@opensourceconnections.com \n\n rater_1 rater_2 \\\n6 aarora@opensourceconnections.com ben.w.trent@gmail.com \n19 aarora@opensourceconnections.com ben.w.trent@gmail.com \n20 aarora@opensourceconnections.com ben.w.trent@gmail.com \n21 aarora@opensourceconnections.com ben.w.trent@gmail.com \n24 aarora@opensourceconnections.com ben.w.trent@gmail.com \n... ... ... \n1330 aarora@opensourceconnections.com ben.w.trent@gmail.com \n1331 aarora@opensourceconnections.com ben.w.trent@gmail.com \n1334 aarora@opensourceconnections.com ben.w.trent@gmail.com \n1340 aarora@opensourceconnections.com ben.w.trent@gmail.com \n1342 aarora@opensourceconnections.com ben.w.trent@gmail.com \n\n nb_distinct_ratings \n6 2 \n19 2 \n20 2 \n21 2 \n24 2 \n... ... \n1330 2 \n1331 2 \n1334 2 \n1340 2 \n1342 2 \n\n[272 rows x 19 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
querydocidcharlie@flax.co.ukrating_0eschramma@cas.orgdtaivpp@gmail.comrating_1cmcollier@gmail.comrating_2jeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.comrater_0rater_1rater_2nb_distinct_ratings
6projector screen549808NaN3.0NaNNaN3.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
19laptop77031393NaN3.0NaNNaNNaNNaN3.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
20iphone 879283963NaN0.0NaNNaNNaNNaN0.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
21iphone 879284190NaN0.0NaNNaNNaNNaN0.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
24iphone 877911774NaN0.0NaNNaN0.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
............................................................
1330coffee656359NaN3.0NaNNaN3.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
1331coffee77265396NaN2.0NaNNaN2.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
1334coffee2102472NaN2.0NaNNaNNaNNaN2.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
1340vans77129498NaN0.0NaNNaN0.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
1342vans77388459NaN0.0NaNNaNNaNNaN0.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
\n

272 rows × 19 columns

\n
" - }, - "metadata": {} - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
querydocidcharlie@flax.co.ukrating_0eschramma@cas.orgdtaivpp@gmail.comrating_1cmcollier@gmail.comrating_2jeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.comrater_0rater_1rater_2nb_distinct_ratings
0projector screen325961NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
1projector screen47471NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2projector screen126679NaN3.0NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
3projector screen254441NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
4projector screen325958NaN3.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
............................................................
2415power supply1667352NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2416power supply1667804NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2417power supply1667752NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2418power supply1667821NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
2419power supply1667357NaN0.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com3
\n", + "

2148 rows × 19 columns

\n", + "
" ], - "id": "2b300fa0-4876-46c6-9526-91afe73b1d7a" - }, - { - "cell_type": "markdown", - "source": "### Big discrepancies\n\nThos cases are the most suspicious ones. Some raters rated as `3-Perfect` some other rated as '1-Fair' or '0-Bad' (or at least a difference of 2 grades).\nThere can be several reasons for that:\n - there is an issue with the pair: query not clear, document not clear\n - guidelines not well specified: a very common case is when the guidelines are not 100% clear to the raters. For example, what happens if the image of the results is relevant but the text is not? Or if some document fields are missing?\n - no clear intent. Was the intent of the query clear enough? This can cause confusion to the raters. It's important to give an opportunity to the rater to say `I don't know how to rate this pair!`\n", - "metadata": {}, - "id": "a2b7880d-4707-4cf0-ad2e-20f5ee1f912e" - }, + "text/plain": [ + " query docid charlie@flax.co.uk rating_0 \\\n", + "0 projector screen 325961 NaN 3.0 \n", + "1 projector screen 47471 NaN 3.0 \n", + "2 projector screen 126679 NaN 3.0 \n", + "3 projector screen 254441 NaN 3.0 \n", + "4 projector screen 325958 NaN 3.0 \n", + "... ... ... ... ... \n", + "2415 power supply 1667352 NaN 0.0 \n", + "2416 power supply 1667804 NaN 0.0 \n", + "2417 power supply 1667752 NaN 0.0 \n", + "2418 power supply 1667821 NaN 0.0 \n", + "2419 power supply 1667357 NaN 0.0 \n", + "\n", + " eschramma@cas.org dtaivpp@gmail.com rating_1 cmcollier@gmail.com \\\n", + "0 NaN 3.0 NaN NaN \n", + "1 NaN 3.0 NaN NaN \n", + "2 NaN 3.0 NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN \n", + "... ... ... ... ... \n", + "2415 NaN NaN NaN NaN \n", + "2416 NaN NaN NaN NaN \n", + "2417 NaN NaN NaN NaN \n", + "2418 NaN NaN NaN NaN \n", + "2419 NaN NaN NaN NaN \n", + "\n", + " rating_2 jeff@vin.com cmarino@enterprise-knowledge.com \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "... ... ... ... \n", + "2415 NaN NaN NaN \n", + "2416 NaN NaN NaN \n", + "2417 NaN NaN NaN \n", + "2418 NaN NaN NaN \n", + "2419 NaN NaN NaN \n", + "\n", + " msfroh@gmail.com peter@searchintuition.com maximilian.werk@jina.ai \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "... ... ... ... \n", + "2415 NaN NaN NaN \n", + "2416 NaN NaN NaN \n", + "2417 NaN NaN NaN \n", + "2418 NaN NaN NaN \n", + "2419 NaN NaN NaN \n", + "\n", + " ryan.finley@ferguson.com rater_0 \\\n", + "0 NaN epugh@opensourceconnections.com \n", + "1 NaN epugh@opensourceconnections.com \n", + "2 NaN epugh@opensourceconnections.com \n", + "3 NaN epugh@opensourceconnections.com \n", + "4 NaN epugh@opensourceconnections.com \n", + "... ... ... \n", + "2415 NaN epugh@opensourceconnections.com \n", + "2416 NaN epugh@opensourceconnections.com \n", + "2417 NaN epugh@opensourceconnections.com \n", + "2418 NaN epugh@opensourceconnections.com \n", + "2419 NaN epugh@opensourceconnections.com \n", + "\n", + " rater_1 rater_2 \\\n", + "0 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "1 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "2 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "3 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "4 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "... ... ... \n", + "2415 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "2416 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "2417 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "2418 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "2419 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "\n", + " nb_distinct_ratings \n", + "0 3 \n", + "1 3 \n", + "2 3 \n", + "3 3 \n", + "4 3 \n", + "... ... \n", + "2415 3 \n", + "2416 3 \n", + "2417 3 \n", + "2418 3 \n", + "2419 3 \n", + "\n", + "[2148 rows x 19 columns]" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print('All disagree:')\n", + "df[df['nb_distinct_ratings']==3]" + ] + }, + { + "cell_type": "markdown", + "id": "b4a60ac4-4c2b-4477-b7c9-285fb29f843c", + "metadata": {}, + "source": [ + "Some cases where all raters agree, we can have good confidence in the rating:" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "8b065a8e-c76c-4e8b-ab5e-e1ec25db6a60", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "def big_discrepancy(r):\n \"\"\"returns 1 if there is at least one 2 grades between 1 rating and another, 0 otherwise\"\"\"\n ratings = [r[f'rating_{i}'] for i in range(nb_raters)]\n return 1 if max(ratings) - min(ratings) >=2 else 0\ndf['big_discrepancy'] = df.apply(big_discrepancy, axis=1)\ndf[df['big_discrepancy']==1].sample(2)", - "metadata": { - "trusted": true - }, - "execution_count": 81, - "outputs": [ - { - "ename": "", - "evalue": "a must be greater than 0 unless no samples are taken", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[81], line 6\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m1\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mmax\u001b[39m(ratings) \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mmin\u001b[39m(ratings) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 5\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbig_discrepancy\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mapply(big_discrepancy, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbig_discrepancy\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/lib/python3.11/site-packages/pandas/core/generic.py:5773\u001b[0m, in \u001b[0;36mNDFrame.sample\u001b[0;34m(self, n, frac, replace, weights, random_state, axis, ignore_index)\u001b[0m\n\u001b[1;32m 5770\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m weights \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 5771\u001b[0m weights \u001b[38;5;241m=\u001b[39m sample\u001b[38;5;241m.\u001b[39mpreprocess_weights(\u001b[38;5;28mself\u001b[39m, weights, axis)\n\u001b[0;32m-> 5773\u001b[0m sampled_indices \u001b[38;5;241m=\u001b[39m \u001b[43msample\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweights\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5774\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(sampled_indices, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m 5776\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ignore_index:\n", - "File \u001b[0;32m/lib/python3.11/site-packages/pandas/core/sample.py:150\u001b[0m, in \u001b[0;36msample\u001b[0;34m(obj_len, size, replace, weights, random_state)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid weights: weights sum to zero\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchoice\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mp\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mastype(\n\u001b[1;32m 151\u001b[0m np\u001b[38;5;241m.\u001b[39mintp, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 152\u001b[0m )\n", - "File \u001b[0;32mmtrand.pyx:928\u001b[0m, in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: a must be greater than 0 unless no samples are taken" - ], - "output_type": "error" - } - ], - "id": "a784ae64-cb70-43dd-8bca-459a60663987" + "name": "stdout", + "output_type": "stream", + "text": [ + "All agree:\n" + ] }, { - "cell_type": "markdown", - "source": "### Overall confusion between raters\nConfusion analysis allows to understand the types of disagreements between raters. \nIn a health rating setup we would expect to have most of the confusions between `0` and `1` or `1` and `2`.\n", - "metadata": {}, - "id": "9df4a62c-df0d-46fd-bfa0-c96c9158542c" - }, + "ename": "", + "evalue": "a must be greater than 0 unless no samples are taken", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[79], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# We have none that everyone agrees on\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAll agree:\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnb_distinct_ratings\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/lib/python3.11/site-packages/pandas/core/generic.py:5773\u001b[0m, in \u001b[0;36mNDFrame.sample\u001b[0;34m(self, n, frac, replace, weights, random_state, axis, ignore_index)\u001b[0m\n\u001b[1;32m 5770\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m weights \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 5771\u001b[0m weights \u001b[38;5;241m=\u001b[39m sample\u001b[38;5;241m.\u001b[39mpreprocess_weights(\u001b[38;5;28mself\u001b[39m, weights, axis)\n\u001b[0;32m-> 5773\u001b[0m sampled_indices \u001b[38;5;241m=\u001b[39m \u001b[43msample\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweights\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5774\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(sampled_indices, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m 5776\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ignore_index:\n", + "File \u001b[0;32m/lib/python3.11/site-packages/pandas/core/sample.py:150\u001b[0m, in \u001b[0;36msample\u001b[0;34m(obj_len, size, replace, weights, random_state)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid weights: weights sum to zero\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchoice\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mp\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mastype(\n\u001b[1;32m 151\u001b[0m np\u001b[38;5;241m.\u001b[39mintp, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 152\u001b[0m )\n", + "File \u001b[0;32mmtrand.pyx:928\u001b[0m, in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: a must be greater than 0 unless no samples are taken" + ] + } + ], + "source": [ + "# We have none that everyone agrees on\n", + "print('All agree:')\n", + "df[df['nb_distinct_ratings']==1].sample(5)" + ] + }, + { + "cell_type": "markdown", + "id": "294c4445-1c81-4c73-96ac-6001f3f83ff2", + "metadata": {}, + "source": [ + "Not perfect but 2 / 3 raters agreed on the rating value:" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "2b300fa0-4876-46c6-9526-91afe73b1d7a", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "y1 = []\ny2 = []\nfor i,r in df.iterrows():\n #12\n y1.append(r['rating_0'])\n y2.append(r['rating_1'])\n\n ", - "metadata": { - "trusted": true - }, - "execution_count": 82, - "outputs": [], - "id": "6d8a99b9-f823-4829-aed8-9e376a0dfa73" + "name": "stdout", + "output_type": "stream", + "text": [ + "Majority agree:\n" + ] }, { - "cell_type": "code", - "source": "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\ncm = confusion_matrix(y1, y2, labels=range(len(ratings)))\ndisp = ConfusionMatrixDisplay(confusion_matrix=cm,\n display_labels=ratings)\nplt.figure(figsize=(8,8))\ndisp.plot(xticks_rotation=45, colorbar=False, cmap=plt.cm.Blues, ax=plt.gca(), values_format='d')\nplt.xlabel('');plt.ylabel('')\nplt.gca().xaxis.tick_top()", - "metadata": { - "trusted": true - }, - "execution_count": 83, - "outputs": [ - { - "ename": "", - "evalue": "Input y_true contains NaN.", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[83], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m confusion_matrix, ConfusionMatrixDisplay\n\u001b[0;32m----> 2\u001b[0m cm \u001b[38;5;241m=\u001b[39m \u001b[43mconfusion_matrix\u001b[49m\u001b[43m(\u001b[49m\u001b[43my1\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my2\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mrange\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mratings\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m disp \u001b[38;5;241m=\u001b[39m ConfusionMatrixDisplay(confusion_matrix\u001b[38;5;241m=\u001b[39mcm,\n\u001b[1;32m 4\u001b[0m display_labels\u001b[38;5;241m=\u001b[39mratings)\n\u001b[1;32m 5\u001b[0m plt\u001b[38;5;241m.\u001b[39mfigure(figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m8\u001b[39m,\u001b[38;5;241m8\u001b[39m))\n", - "File \u001b[0;32m/lib/python3.11/site-packages/sklearn/metrics/_classification.py:317\u001b[0m, in \u001b[0;36mconfusion_matrix\u001b[0;34m(y_true, y_pred, labels, sample_weight, normalize)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconfusion_matrix\u001b[39m(\n\u001b[1;32m 233\u001b[0m y_true, y_pred, \u001b[38;5;241m*\u001b[39m, labels\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, sample_weight\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, normalize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 234\u001b[0m ):\n\u001b[1;32m 235\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Compute confusion matrix to evaluate the accuracy of a classification.\u001b[39;00m\n\u001b[1;32m 236\u001b[0m \n\u001b[1;32m 237\u001b[0m \u001b[38;5;124;03m By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 315\u001b[0m \u001b[38;5;124;03m (0, 2, 1, 1)\u001b[39;00m\n\u001b[1;32m 316\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 317\u001b[0m y_type, y_true, y_pred \u001b[38;5;241m=\u001b[39m \u001b[43m_check_targets\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_pred\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 318\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y_type \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbinary\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmulticlass\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 319\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m is not supported\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m y_type)\n", - "File \u001b[0;32m/lib/python3.11/site-packages/sklearn/metrics/_classification.py:87\u001b[0m, in \u001b[0;36m_check_targets\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Check that y_true and y_pred belong to the same classification task.\u001b[39;00m\n\u001b[1;32m 61\u001b[0m \n\u001b[1;32m 62\u001b[0m \u001b[38;5;124;03mThis converts multiclass or binary types to a common shape, and raises a\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;124;03my_pred : array or indicator matrix\u001b[39;00m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 86\u001b[0m check_consistent_length(y_true, y_pred)\n\u001b[0;32m---> 87\u001b[0m type_true \u001b[38;5;241m=\u001b[39m \u001b[43mtype_of_target\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43my_true\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 88\u001b[0m type_pred \u001b[38;5;241m=\u001b[39m type_of_target(y_pred, input_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124my_pred\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 90\u001b[0m y_type \u001b[38;5;241m=\u001b[39m {type_true, type_pred}\n", - "File \u001b[0;32m/lib/python3.11/site-packages/sklearn/utils/multiclass.py:381\u001b[0m, in \u001b[0;36mtype_of_target\u001b[0;34m(y, input_name)\u001b[0m\n\u001b[1;32m 379\u001b[0m data \u001b[38;5;241m=\u001b[39m y\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;28;01mif\u001b[39;00m issparse(y) \u001b[38;5;28;01melse\u001b[39;00m y\n\u001b[1;32m 380\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m xp\u001b[38;5;241m.\u001b[39many(data \u001b[38;5;241m!=\u001b[39m data\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mint\u001b[39m)):\n\u001b[0;32m--> 381\u001b[0m \u001b[43m_assert_all_finite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 382\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontinuous\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m suffix\n\u001b[1;32m 384\u001b[0m \u001b[38;5;66;03m# Check multiclass\u001b[39;00m\n", - "File \u001b[0;32m/lib/python3.11/site-packages/sklearn/utils/validation.py:161\u001b[0m, in \u001b[0;36m_assert_all_finite\u001b[0;34m(X, allow_nan, msg_dtype, estimator_name, input_name)\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m estimator_name \u001b[38;5;129;01mand\u001b[39;00m input_name \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m has_nan_error:\n\u001b[1;32m 145\u001b[0m \u001b[38;5;66;03m# Improve the error message on how to handle missing values in\u001b[39;00m\n\u001b[1;32m 146\u001b[0m \u001b[38;5;66;03m# scikit-learn.\u001b[39;00m\n\u001b[1;32m 147\u001b[0m msg_err \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 148\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mestimator_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not accept missing values\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m encoded as NaN natively. For supervised learning, you might want\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m#estimators-that-handle-nan-values\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 160\u001b[0m )\n\u001b[0;32m--> 161\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg_err)\n", - "\u001b[0;31mValueError\u001b[0m: Input y_true contains NaN." - ], - "output_type": "error" - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
querydocidcharlie@flax.co.ukrating_0eschramma@cas.orgdtaivpp@gmail.comrating_1cmcollier@gmail.comrating_2jeff@vin.comcmarino@enterprise-knowledge.commsfroh@gmail.competer@searchintuition.commaximilian.werk@jina.airyan.finley@ferguson.comrater_0rater_1rater_2nb_distinct_ratings
6projector screen549808NaN3.0NaNNaN3.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
19laptop77031393NaN3.0NaNNaNNaNNaN3.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
20iphone 879283963NaN0.0NaNNaNNaNNaN0.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
21iphone 879284190NaN0.0NaNNaNNaNNaN0.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
24iphone 877911774NaN0.0NaNNaN0.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
............................................................
1330coffee656359NaN3.0NaNNaN3.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
1331coffee77265396NaN2.0NaNNaN2.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
1334coffee2102472NaN2.0NaNNaNNaNNaN2.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
1340vans77129498NaN0.0NaNNaN0.0NaNNaNNaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
1342vans77388459NaN0.0NaNNaNNaNNaN0.0NaNNaNNaNNaNNaNNaNepugh@opensourceconnections.comaarora@opensourceconnections.comben.w.trent@gmail.com2
\n", + "

272 rows × 19 columns

\n", + "
" ], - "id": "1a4f7484-185c-42a3-8931-32a28a6d6964" - }, + "text/plain": [ + " query docid charlie@flax.co.uk rating_0 \\\n", + "6 projector screen 549808 NaN 3.0 \n", + "19 laptop 77031393 NaN 3.0 \n", + "20 iphone 8 79283963 NaN 0.0 \n", + "21 iphone 8 79284190 NaN 0.0 \n", + "24 iphone 8 77911774 NaN 0.0 \n", + "... ... ... ... ... \n", + "1330 coffee 656359 NaN 3.0 \n", + "1331 coffee 77265396 NaN 2.0 \n", + "1334 coffee 2102472 NaN 2.0 \n", + "1340 vans 77129498 NaN 0.0 \n", + "1342 vans 77388459 NaN 0.0 \n", + "\n", + " eschramma@cas.org dtaivpp@gmail.com rating_1 cmcollier@gmail.com \\\n", + "6 NaN NaN 3.0 NaN \n", + "19 NaN NaN NaN NaN \n", + "20 NaN NaN NaN NaN \n", + "21 NaN NaN NaN NaN \n", + "24 NaN NaN 0.0 NaN \n", + "... ... ... ... ... \n", + "1330 NaN NaN 3.0 NaN \n", + "1331 NaN NaN 2.0 NaN \n", + "1334 NaN NaN NaN NaN \n", + "1340 NaN NaN 0.0 NaN \n", + "1342 NaN NaN NaN NaN \n", + "\n", + " rating_2 jeff@vin.com cmarino@enterprise-knowledge.com \\\n", + "6 NaN NaN NaN \n", + "19 3.0 NaN NaN \n", + "20 0.0 NaN NaN \n", + "21 0.0 NaN NaN \n", + "24 NaN NaN NaN \n", + "... ... ... ... \n", + "1330 NaN NaN NaN \n", + "1331 NaN NaN NaN \n", + "1334 2.0 NaN NaN \n", + "1340 NaN NaN NaN \n", + "1342 0.0 NaN NaN \n", + "\n", + " msfroh@gmail.com peter@searchintuition.com maximilian.werk@jina.ai \\\n", + "6 NaN NaN NaN \n", + "19 NaN NaN NaN \n", + "20 NaN NaN NaN \n", + "21 NaN NaN NaN \n", + "24 NaN NaN NaN \n", + "... ... ... ... \n", + "1330 NaN NaN NaN \n", + "1331 NaN NaN NaN \n", + "1334 NaN NaN NaN \n", + "1340 NaN NaN NaN \n", + "1342 NaN NaN NaN \n", + "\n", + " ryan.finley@ferguson.com rater_0 \\\n", + "6 NaN epugh@opensourceconnections.com \n", + "19 NaN epugh@opensourceconnections.com \n", + "20 NaN epugh@opensourceconnections.com \n", + "21 NaN epugh@opensourceconnections.com \n", + "24 NaN epugh@opensourceconnections.com \n", + "... ... ... \n", + "1330 NaN epugh@opensourceconnections.com \n", + "1331 NaN epugh@opensourceconnections.com \n", + "1334 NaN epugh@opensourceconnections.com \n", + "1340 NaN epugh@opensourceconnections.com \n", + "1342 NaN epugh@opensourceconnections.com \n", + "\n", + " rater_1 rater_2 \\\n", + "6 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "19 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "20 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "21 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "24 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "... ... ... \n", + "1330 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "1331 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "1334 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "1340 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "1342 aarora@opensourceconnections.com ben.w.trent@gmail.com \n", + "\n", + " nb_distinct_ratings \n", + "6 2 \n", + "19 2 \n", + "20 2 \n", + "21 2 \n", + "24 2 \n", + "... ... \n", + "1330 2 \n", + "1331 2 \n", + "1334 2 \n", + "1340 2 \n", + "1342 2 \n", + "\n", + "[272 rows x 19 columns]" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We have none\n", + "print('Majority agree:')\n", + "df[df['nb_distinct_ratings']==2]" + ] + }, + { + "cell_type": "markdown", + "id": "a2b7880d-4707-4cf0-ad2e-20f5ee1f912e", + "metadata": {}, + "source": [ + "### Big discrepancies\n", + "\n", + "Thos cases are the most suspicious ones. Some raters rated as `3-Perfect` some other rated as '1-Fair' or '0-Bad' (or at least a difference of 2 grades).\n", + "There can be several reasons for that:\n", + " - there is an issue with the pair: query not clear, document not clear\n", + " - guidelines not well specified: a very common case is when the guidelines are not 100% clear to the raters. For example, what happens if the image of the results is relevant but the text is not? Or if some document fields are missing?\n", + " - no clear intent. Was the intent of the query clear enough? This can cause confusion to the raters. It's important to give an opportunity to the rater to say `I don't know how to rate this pair!`\n" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "a784ae64-cb70-43dd-8bca-459a60663987", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "source": "This work has been provided by Wallapop Search team (http://www.wallapop.com/).\n\n_This notebook was last updated 17-FEB-2024_", - "metadata": {}, - "id": "7c08fac4-ea2e-4aae-92e8-cde6739dc131" - }, + "ename": "", + "evalue": "a must be greater than 0 unless no samples are taken", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[81], line 6\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m1\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mmax\u001b[39m(ratings) \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mmin\u001b[39m(ratings) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 5\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbig_discrepancy\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mapply(big_discrepancy, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbig_discrepancy\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/lib/python3.11/site-packages/pandas/core/generic.py:5773\u001b[0m, in \u001b[0;36mNDFrame.sample\u001b[0;34m(self, n, frac, replace, weights, random_state, axis, ignore_index)\u001b[0m\n\u001b[1;32m 5770\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m weights \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 5771\u001b[0m weights \u001b[38;5;241m=\u001b[39m sample\u001b[38;5;241m.\u001b[39mpreprocess_weights(\u001b[38;5;28mself\u001b[39m, weights, axis)\n\u001b[0;32m-> 5773\u001b[0m sampled_indices \u001b[38;5;241m=\u001b[39m \u001b[43msample\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweights\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5774\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(sampled_indices, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m 5776\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ignore_index:\n", + "File \u001b[0;32m/lib/python3.11/site-packages/pandas/core/sample.py:150\u001b[0m, in \u001b[0;36msample\u001b[0;34m(obj_len, size, replace, weights, random_state)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid weights: weights sum to zero\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchoice\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreplace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mp\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mastype(\n\u001b[1;32m 151\u001b[0m np\u001b[38;5;241m.\u001b[39mintp, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 152\u001b[0m )\n", + "File \u001b[0;32mmtrand.pyx:928\u001b[0m, in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: a must be greater than 0 unless no samples are taken" + ] + } + ], + "source": [ + "def big_discrepancy(r):\n", + " \"\"\"returns 1 if there is at least one 2 grades between 1 rating and another, 0 otherwise\"\"\"\n", + " ratings = [r[f'rating_{i}'] for i in range(nb_raters)]\n", + " return 1 if max(ratings) - min(ratings) >=2 else 0\n", + "df['big_discrepancy'] = df.apply(big_discrepancy, axis=1)\n", + "df[df['big_discrepancy']==1].sample(2)" + ] + }, + { + "cell_type": "markdown", + "id": "9df4a62c-df0d-46fd-bfa0-c96c9158542c", + "metadata": {}, + "source": [ + "### Overall confusion between raters\n", + "Confusion analysis allows to understand the types of disagreements between raters. \n", + "In a health rating setup we would expect to have most of the confusions between `0` and `1` or `1` and `2`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "6d8a99b9-f823-4829-aed8-9e376a0dfa73", + "metadata": {}, + "outputs": [], + "source": [ + "y1 = []\n", + "y2 = []\n", + "for i,r in df.iterrows():\n", + " #12\n", + " y1.append(r['rating_0'])\n", + " y2.append(r['rating_1'])\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "1a4f7484-185c-42a3-8931-32a28a6d6964", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "", - "metadata": {}, - "execution_count": null, - "outputs": [], - "id": "349421e4-dab9-42ef-afba-c2d9df1a9929" + "ename": "", + "evalue": "Input y_true contains NaN.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[83], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m confusion_matrix, ConfusionMatrixDisplay\n\u001b[0;32m----> 2\u001b[0m cm \u001b[38;5;241m=\u001b[39m \u001b[43mconfusion_matrix\u001b[49m\u001b[43m(\u001b[49m\u001b[43my1\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my2\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mrange\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mratings\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m disp \u001b[38;5;241m=\u001b[39m ConfusionMatrixDisplay(confusion_matrix\u001b[38;5;241m=\u001b[39mcm,\n\u001b[1;32m 4\u001b[0m display_labels\u001b[38;5;241m=\u001b[39mratings)\n\u001b[1;32m 5\u001b[0m plt\u001b[38;5;241m.\u001b[39mfigure(figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m8\u001b[39m,\u001b[38;5;241m8\u001b[39m))\n", + "File \u001b[0;32m/lib/python3.11/site-packages/sklearn/metrics/_classification.py:317\u001b[0m, in \u001b[0;36mconfusion_matrix\u001b[0;34m(y_true, y_pred, labels, sample_weight, normalize)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconfusion_matrix\u001b[39m(\n\u001b[1;32m 233\u001b[0m y_true, y_pred, \u001b[38;5;241m*\u001b[39m, labels\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, sample_weight\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, normalize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 234\u001b[0m ):\n\u001b[1;32m 235\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Compute confusion matrix to evaluate the accuracy of a classification.\u001b[39;00m\n\u001b[1;32m 236\u001b[0m \n\u001b[1;32m 237\u001b[0m \u001b[38;5;124;03m By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 315\u001b[0m \u001b[38;5;124;03m (0, 2, 1, 1)\u001b[39;00m\n\u001b[1;32m 316\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 317\u001b[0m y_type, y_true, y_pred \u001b[38;5;241m=\u001b[39m \u001b[43m_check_targets\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_pred\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 318\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y_type \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbinary\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmulticlass\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 319\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m is not supported\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m y_type)\n", + "File \u001b[0;32m/lib/python3.11/site-packages/sklearn/metrics/_classification.py:87\u001b[0m, in \u001b[0;36m_check_targets\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Check that y_true and y_pred belong to the same classification task.\u001b[39;00m\n\u001b[1;32m 61\u001b[0m \n\u001b[1;32m 62\u001b[0m \u001b[38;5;124;03mThis converts multiclass or binary types to a common shape, and raises a\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;124;03my_pred : array or indicator matrix\u001b[39;00m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 86\u001b[0m check_consistent_length(y_true, y_pred)\n\u001b[0;32m---> 87\u001b[0m type_true \u001b[38;5;241m=\u001b[39m \u001b[43mtype_of_target\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43my_true\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 88\u001b[0m type_pred \u001b[38;5;241m=\u001b[39m type_of_target(y_pred, input_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124my_pred\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 90\u001b[0m y_type \u001b[38;5;241m=\u001b[39m {type_true, type_pred}\n", + "File \u001b[0;32m/lib/python3.11/site-packages/sklearn/utils/multiclass.py:381\u001b[0m, in \u001b[0;36mtype_of_target\u001b[0;34m(y, input_name)\u001b[0m\n\u001b[1;32m 379\u001b[0m data \u001b[38;5;241m=\u001b[39m y\u001b[38;5;241m.\u001b[39mdata \u001b[38;5;28;01mif\u001b[39;00m issparse(y) \u001b[38;5;28;01melse\u001b[39;00m y\n\u001b[1;32m 380\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m xp\u001b[38;5;241m.\u001b[39many(data \u001b[38;5;241m!=\u001b[39m data\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mint\u001b[39m)):\n\u001b[0;32m--> 381\u001b[0m \u001b[43m_assert_all_finite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 382\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontinuous\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m suffix\n\u001b[1;32m 384\u001b[0m \u001b[38;5;66;03m# Check multiclass\u001b[39;00m\n", + "File \u001b[0;32m/lib/python3.11/site-packages/sklearn/utils/validation.py:161\u001b[0m, in \u001b[0;36m_assert_all_finite\u001b[0;34m(X, allow_nan, msg_dtype, estimator_name, input_name)\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m estimator_name \u001b[38;5;129;01mand\u001b[39;00m input_name \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m has_nan_error:\n\u001b[1;32m 145\u001b[0m \u001b[38;5;66;03m# Improve the error message on how to handle missing values in\u001b[39;00m\n\u001b[1;32m 146\u001b[0m \u001b[38;5;66;03m# scikit-learn.\u001b[39;00m\n\u001b[1;32m 147\u001b[0m msg_err \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 148\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mestimator_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not accept missing values\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m encoded as NaN natively. For supervised learning, you might want\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m#estimators-that-handle-nan-values\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 160\u001b[0m )\n\u001b[0;32m--> 161\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg_err)\n", + "\u001b[0;31mValueError\u001b[0m: Input y_true contains NaN." + ] } - ] -} \ No newline at end of file + ], + "source": [ + "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", + "cm = confusion_matrix(y1, y2, labels=range(len(ratings)))\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n", + " display_labels=ratings)\n", + "plt.figure(figsize=(8,8))\n", + "disp.plot(xticks_rotation=45, colorbar=False, cmap=plt.cm.Blues, ax=plt.gca(), values_format='d')\n", + "plt.xlabel('');plt.ylabel('')\n", + "plt.gca().xaxis.tick_top()" + ] + }, + { + "cell_type": "markdown", + "id": "7c08fac4-ea2e-4aae-92e8-cde6739dc131", + "metadata": {}, + "source": [ + "This work has been provided by Wallapop Search team (http://www.wallapop.com/).\n", + "\n", + "_This notebook was last updated 16_January_2025_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "349421e4-dab9-42ef-afba-c2d9df1a9929", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/jupyterlite/files/examples/Scoring Comparison.ipynb b/jupyterlite/files/examples/Scoring Comparison.ipynb index c5c5df6..b7e779a 100644 --- a/jupyterlite/files/examples/Scoring Comparison.ipynb +++ b/jupyterlite/files/examples/Scoring Comparison.ipynb @@ -1,131 +1,303 @@ { - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "python", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8" - }, - "kernelspec": { - "name": "python", - "display_name": "Python (Pyodide)", - "language": "python" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Scoring Comparison\n", + "To understand the magnatude of changes, you can compare multiple snapshots of a case against each other. The final visualization shows you a histogram of your snapshots overlaid with each other so you can see how your individual query scorers changed.\n", + "This visualization assumes you are using the same scorer and query set for all the snapshots. It also assumes the snapshots come from the same case!\n", + "\n", + "Please copy this example and customize it for your own purposes!" + ] }, - "nbformat_minor": 4, - "nbformat": 4, - "cells": [ - { - "cell_type": "markdown", - "source": "# Scoring Comparison\nTo understand the magnatude of changes, you can compare multiple snapshots of a case against each other. The final visualization shows you a histogram of your snapshots overlaid with each other so you can see how your individual query scorers changed.\nThis visualization assumes you are using the same scorer and query set for all the snapshots. It also assumes the snapshots come from the same case!\n\nPlease copy this example and customize it for your own purposes!", - "metadata": {} - }, - { - "cell_type": "markdown", - "source": "### Imports", - "metadata": {} - }, - { - "cell_type": "code", - "source": "from js import fetch\nimport pandas as pd\nfrom datetime import datetime\nimport random\nfrom matplotlib import pyplot\n%matplotlib inline", - "metadata": { - "trusted": true - }, - "execution_count": 1, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": "## Define the Data You Want", - "metadata": {} - }, - { - "cell_type": "code", - "source": "CASE_ID = 6789 # Your Case\nSNAPSHOT_IDS = [2471,2473] # Your Snapshots. Use the Compare Snapshot function in Quepid to see what the specific ID's are of your snapshots.", - "metadata": { - "trusted": true - }, - "execution_count": 2, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": "### Pull data directly from Quepid's snapshot repository", - "metadata": {} - }, - { - "cell_type": "code", - "source": "\n# Retrieve from Quepid API all the snapshots\nsnapshots = []\nfor snapshot_id in SNAPSHOT_IDS:\n res = await fetch(f'/api/cases/{CASE_ID}/snapshots/{snapshot_id}.json')\n snapshots.append(await res.json())", - "metadata": { - "trusted": true - }, - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": "### Read in data to a dataframe", - "metadata": {} - }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from js import fetch\n", + "import pandas as pd\n", + "from datetime import datetime\n", + "import random\n", + "from matplotlib import pyplot\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define the Data You Want" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "CASE_ID = 6789 # Your Case\n", + "SNAPSHOT_IDS = [2471,2473] # Your Snapshots. Use the Compare Snapshot function in Quepid to see what the specific ID's are of your snapshots." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pull data directly from Quepid's snapshot repository" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Retrieve from Quepid API all the snapshots\n", + "snapshots = []\n", + "for snapshot_id in SNAPSHOT_IDS:\n", + " res = await fetch(f'/api/cases/{CASE_ID}/snapshots/{snapshot_id}.json')\n", + " snapshots.append(await res.json())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read in data to a dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "queryData = []\nsnapshotNames = {}\n\nfor snapshot in snapshots:\n queryToScoreDict = {}\n queryToNumFoundDict = {}\n snapshotNames[snapshot.id] = snapshot.name\n for snapshotScore in snapshot.scores:\n queryToScoreDict[snapshotScore.query_id] = snapshotScore.score\n queryToNumFoundDict[snapshotScore.query_id] = snapshotScore.number_of_results\n for snapshotQuery in snapshot.queries:\n queryData.append({'snapshot_id':snapshot.id, 'query':snapshotQuery.query_text, 'numfound':queryToNumFoundDict[snapshotQuery.query_id], 'score':queryToScoreDict[snapshotQuery.query_id]})\n \ndf = pd.DataFrame(queryData)\ndf", - "metadata": { - "trusted": true - }, - "execution_count": 4, - "outputs": [ - { - "execution_count": 4, - "output_type": "execute_result", - "data": { - "text/plain": " snapshot_id query numfound score\n0 2471 projector screen 1 1\n1 2471 notebook 1 1\n2 2471 iphone 8 1 1\n3 2471 printer 1 1\n4 2471 computer 1 1\n.. ... ... ... ...\n265 2473 windows 10 1 1\n266 2473 microwave 1 1\n267 2473 bluetooth speakers 1 1\n268 2473 coffee 1 1\n269 2473 vans 1 1\n\n[270 rows x 4 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
snapshot_idquerynumfoundscore
02471projector screen11
12471notebook11
22471iphone 811
32471printer11
42471computer11
...............
2652473windows 1011
2662473microwave11
2672473bluetooth speakers11
2682473coffee11
2692473vans11
\n

270 rows × 4 columns

\n
" - }, - "metadata": {} - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
snapshot_idquerynumfoundscore
02471projector screen11
12471notebook11
22471iphone 811
32471printer11
42471computer11
...............
2652473windows 1011
2662473microwave11
2672473bluetooth speakers11
2682473coffee11
2692473vans11
\n", + "

270 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " snapshot_id query numfound score\n", + "0 2471 projector screen 1 1\n", + "1 2471 notebook 1 1\n", + "2 2471 iphone 8 1 1\n", + "3 2471 printer 1 1\n", + "4 2471 computer 1 1\n", + ".. ... ... ... ...\n", + "265 2473 windows 10 1 1\n", + "266 2473 microwave 1 1\n", + "267 2473 bluetooth speakers 1 1\n", + "268 2473 coffee 1 1\n", + "269 2473 vans 1 1\n", + "\n", + "[270 rows x 4 columns]" ] - }, - { - "cell_type": "markdown", - "source": "## Create a histogram to compare snapshots\n\nThe snapshots that are represented in fill color.", - "metadata": {} - }, + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "queryData = []\n", + "snapshotNames = {}\n", + "\n", + "for snapshot in snapshots:\n", + " queryToScoreDict = {}\n", + " queryToNumFoundDict = {}\n", + " snapshotNames[snapshot.id] = snapshot.name\n", + " for snapshotScore in snapshot.scores:\n", + " queryToScoreDict[snapshotScore.query_id] = snapshotScore.score\n", + " queryToNumFoundDict[snapshotScore.query_id] = snapshotScore.number_of_results\n", + " for snapshotQuery in snapshot.queries:\n", + " queryData.append({'snapshot_id':snapshot.id, 'query':snapshotQuery.query_text, 'numfound':queryToNumFoundDict[snapshotQuery.query_id], 'score':queryToScoreDict[snapshotQuery.query_id]})\n", + " \n", + "df = pd.DataFrame(queryData)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a histogram to compare snapshots\n", + "\n", + "The snapshots that are represented in fill color." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "source": "for snapshot_id in SNAPSHOT_IDS:\n pyplot.hist(df.loc[df['snapshot_id'] == snapshot_id]['score'], 20, alpha=0.5, label=f'{snapshotNames[snapshot_id]} ({snapshot_id})')\npyplot.legend(loc='upper right')\npyplot.show()", - "metadata": { - "trusted": true - }, - "execution_count": 5, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/png": "\n" - }, - "metadata": {} - } + "data": { + "image/png": "\n", + "text/plain": [ + "
" ] - }, - { - "cell_type": "markdown", - "source": "_This notebook was last updated 16-FEB-2024_", - "metadata": {} - }, - { - "cell_type": "code", - "source": "", - "metadata": {}, - "execution_count": null, - "outputs": [] + }, + "metadata": {}, + "output_type": "display_data" } - ] -} \ No newline at end of file + ], + "source": [ + "for snapshot_id in SNAPSHOT_IDS:\n", + " pyplot.hist(df.loc[df['snapshot_id'] == snapshot_id]['score'], 20, alpha=0.5, label=f'{snapshotNames[snapshot_id]} ({snapshot_id})')\n", + "pyplot.legend(loc='upper right')\n", + "pyplot.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_This notebook was last updated 16-FEB-2024_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}