-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
David Fisher
committed
Jan 16, 2025
1 parent
5c6665a
commit 84d89eb
Showing
4 changed files
with
3,687 additions
and
981 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,151 +1,217 @@ | ||
{ | ||
"metadata": { | ||
"kernelspec": { | ||
"name": "python", | ||
"display_name": "Python (Pyodide)", | ||
"language": "python" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "python", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8" | ||
} | ||
}, | ||
"nbformat_minor": 5, | ||
"nbformat": 4, | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"source": "# Fleiss' Kappa \nTo understand how much your judges agree with each other. It is meant to be used with more than two judges.\n\nRead https://www.datanovia.com/en/blog/kappa-coefficient-interpretation/ to learn more.\n\nPlease copy this example and customize it for your own purposes!", | ||
"metadata": {}, | ||
"id": "bd7e4efa-eb00-451e-984d-ed6646d8e25f" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Imports", | ||
"metadata": {}, | ||
"id": "e3412382" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "import pandas as pd\nfrom js import fetch\nimport json\n\nfrom collections import defaultdict\nfrom statsmodels.stats.inter_rater import aggregate_raters\nfrom statsmodels.stats.inter_rater import fleiss_kappa\nfrom IPython.display import display, Markdown", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 1, | ||
"outputs": [], | ||
"id": "4972936a" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 0: Configuration", | ||
"metadata": {}, | ||
"id": "6da26c5e" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "QUEPID_BOOK_NUM = 25", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 2, | ||
"outputs": [], | ||
"id": "71803a49-4065-4adf-a69e-cb0fe2d00f22" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 1: Download the Quepid Book", | ||
"metadata": {}, | ||
"id": "420416df-9e6a-41b4-987b-7a03c9dd38b3" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "# Generic GET call to a JSON endpoint \nasync def get_json(url):\n resp = await fetch(url)\n resp_text = await resp.text()\n return json.loads(resp_text)\n\n", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 3, | ||
"outputs": [], | ||
"id": "31193536-98eb-4b46-ab98-af04ee07c6d3" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "data = await get_json(f'/api/export/books/{QUEPID_BOOK_NUM}')", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": null, | ||
"outputs": [], | ||
"id": "8fef6231-daa8-467f-ac57-13a144e8a356" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 2: Extract and Prepare Data", | ||
"metadata": {}, | ||
"id": "79d985ad-cd11-44a9-a7e1-0851bc99aef3" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "# Initialize a list to hold the tuples of (doc_id, rating, count)\nratings_data = []\n\n# Iterate through each query-doc pair\nfor pair in data['query_doc_pairs']:\n # Initialize a dictionary to count the ratings for this pair\n ratings_count = defaultdict(int)\n \n # Extract judgements and count the ratings\n for judgement in pair['judgements']:\n rating = judgement['rating']\n ratings_count[rating] += 1\n\n # Append the counts to the ratings_data list\n for rating, count in ratings_count.items():\n ratings_data.append((pair['doc_id'], rating, count))\n", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": null, | ||
"outputs": [], | ||
"id": "9a8561fd-2dbf-477e-9ac1-4df6d5ebdc91" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 3: Aggregate Raters' Data", | ||
"metadata": {}, | ||
"id": "caf5632b-132a-4e1b-80fe-c8c5ab7f2f3a" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "# Convert ratings_data to a DataFrame\ndf = pd.DataFrame(ratings_data, columns=['doc_id', 'rating', 'count'])\n\n# Use crosstab to create a contingency table\ndata_crosstab = pd.crosstab(index=df['doc_id'], columns=df['rating'], values=df['count'], aggfunc='sum')\n\n# Drop any rows missing judgements\ndata_crosstab = data_crosstab.dropna(how='any')\n\n# Convert the DataFrame to the format expected by aggregate_raters\ndata_for_aggregation = data_crosstab.values\n\n# Aggregate the raters' data\ntable, _ = aggregate_raters(data_for_aggregation)", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": null, | ||
"outputs": [], | ||
"id": "a7598308-129b-4628-ad3a-fc3d703f8205" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 4: Compute Fleiss' Kappa", | ||
"metadata": {}, | ||
"id": "25c79fbc" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "kappa = fleiss_kappa(table, method='fleiss')\ndisplay(Markdown(f\"## Fleiss' Kappa: {kappa:.4f}\"))", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": null, | ||
"outputs": [], | ||
"id": "25a613f9" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "_This notebook was last updated 19-FEB-2024_", | ||
"metadata": {}, | ||
"id": "5704579e-2321-4629-8de0-6608b428e2b6" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "", | ||
"metadata": {}, | ||
"execution_count": null, | ||
"outputs": [], | ||
"id": "7203f6cc-c068-4f75-a59a-1f49c5555319" | ||
} | ||
] | ||
} | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "bd7e4efa-eb00-451e-984d-ed6646d8e25f", | ||
"metadata": {}, | ||
"source": [ | ||
"# Fleiss' Kappa \n", | ||
"To understand how much your judges agree with each other. It is meant to be used with more than two judges.\n", | ||
"\n", | ||
"Read https://www.datanovia.com/en/blog/kappa-coefficient-interpretation/ to learn more.\n", | ||
"\n", | ||
"Please copy this example and customize it for your own purposes!" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "e3412382", | ||
"metadata": {}, | ||
"source": [ | ||
"## Imports" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "4972936a", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"from js import fetch\n", | ||
"import json\n", | ||
"\n", | ||
"from collections import defaultdict\n", | ||
"from statsmodels.stats.inter_rater import aggregate_raters\n", | ||
"from statsmodels.stats.inter_rater import fleiss_kappa\n", | ||
"from IPython.display import display, Markdown" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "6da26c5e", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 0: Configuration" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "71803a49-4065-4adf-a69e-cb0fe2d00f22", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"QUEPID_BOOK_NUM = 25" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "420416df-9e6a-41b4-987b-7a03c9dd38b3", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 1: Download the Quepid Book" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "31193536-98eb-4b46-ab98-af04ee07c6d3", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Generic GET call to a JSON endpoint \n", | ||
"async def get_json(url):\n", | ||
" resp = await fetch(url)\n", | ||
" resp_text = await resp.text()\n", | ||
" return json.loads(resp_text)\n", | ||
"\n", | ||
"async def get_text(url):\n", | ||
" resp = await fetch(url)\n", | ||
" resp_text = await resp.text()\n", | ||
" return resp_text\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "8fef6231-daa8-467f-ac57-13a144e8a356", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"data = await get_text(f'/api/books/{QUEPID_BOOK_NUM}.csv')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "79d985ad-cd11-44a9-a7e1-0851bc99aef3", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 2: Extract and Prepare Data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "9a8561fd-2dbf-477e-9ac1-4df6d5ebdc91", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Initialize a list to hold the tuples of (doc_id, rating, count)\n", | ||
"ratings_data = []\n", | ||
"\n", | ||
"# Iterate through each query-doc pair\n", | ||
"for pair in data['query_doc_pairs']:\n", | ||
" # Initialize a dictionary to count the ratings for this pair\n", | ||
" ratings_count = defaultdict(int)\n", | ||
" \n", | ||
" # Extract judgements and count the ratings\n", | ||
" for judgement in pair['judgements']:\n", | ||
" rating = judgement['rating']\n", | ||
" ratings_count[rating] += 1\n", | ||
"\n", | ||
" # Append the counts to the ratings_data list\n", | ||
" for rating, count in ratings_count.items():\n", | ||
" ratings_data.append((pair['doc_id'], rating, count))\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "caf5632b-132a-4e1b-80fe-c8c5ab7f2f3a", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 3: Aggregate Raters' Data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "a7598308-129b-4628-ad3a-fc3d703f8205", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Convert ratings_data to a DataFrame\n", | ||
"df = pd.DataFrame(ratings_data, columns=['doc_id', 'rating', 'count'])\n", | ||
"\n", | ||
"# Use crosstab to create a contingency table\n", | ||
"data_crosstab = pd.crosstab(index=df['doc_id'], columns=df['rating'], values=df['count'], aggfunc='sum')\n", | ||
"\n", | ||
"# Drop any rows missing judgements\n", | ||
"data_crosstab = data_crosstab.dropna(how='any')\n", | ||
"\n", | ||
"# Convert the DataFrame to the format expected by aggregate_raters\n", | ||
"data_for_aggregation = data_crosstab.values\n", | ||
"\n", | ||
"# Aggregate the raters' data\n", | ||
"table, _ = aggregate_raters(data_for_aggregation)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "25c79fbc", | ||
"metadata": {}, | ||
"source": [ | ||
"## Step 4: Compute Fleiss' Kappa" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "25a613f9", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"kappa = fleiss_kappa(table, method='fleiss')\n", | ||
"display(Markdown(f\"## Fleiss' Kappa: {kappa:.4f}\"))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "5704579e-2321-4629-8de0-6608b428e2b6", | ||
"metadata": {}, | ||
"source": [ | ||
"_This notebook was last updated 16_January_2025_" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "7203f6cc-c068-4f75-a59a-1f49c5555319", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.