-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from o19s/add_book_related_notebooks
Still need some work, but at least got them in
- Loading branch information
Showing
4 changed files
with
752 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
{ | ||
"metadata": { | ||
"kernelspec": { | ||
"name": "python", | ||
"display_name": "Python (Pyodide)", | ||
"language": "python" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "python", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8" | ||
} | ||
}, | ||
"nbformat_minor": 5, | ||
"nbformat": 4, | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"source": "# Fleiss' Kappa \nTo understand how much your raters what? Scott, need some text!\n\nPlease copy this example and customize it for your own purposes!", | ||
"metadata": {}, | ||
"id": "bd7e4efa-eb00-451e-984d-ed6646d8e25f" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Imports", | ||
"metadata": {}, | ||
"id": "e3412382" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "import pandas as pd\nfrom js import fetch\nimport json\n\nfrom collections import defaultdict\nfrom statsmodels.stats.inter_rater import aggregate_raters\nfrom statsmodels.stats.inter_rater import fleiss_kappa\nfrom IPython.display import display, Markdown", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 1, | ||
"outputs": [], | ||
"id": "4972936a" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 0: Configuration", | ||
"metadata": {}, | ||
"id": "6da26c5e" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "QUEPID_BOOK_NUM = 25\n\n# Not needed if running within Quepid JupyterLite\n# QUEPID_API_TOKEN = \"\"", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 3, | ||
"outputs": [], | ||
"id": "71803a49-4065-4adf-a69e-cb0fe2d00f22" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 1: Download the Quepid Book", | ||
"metadata": {}, | ||
"id": "420416df-9e6a-41b4-987b-7a03c9dd38b3" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "# Generic GET call to a JSON endpoint \nasync def get_json(url):\n resp = await fetch(url)\n resp_text = await resp.text()\n return json.loads(resp_text)\n\n", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 4, | ||
"outputs": [], | ||
"id": "31193536-98eb-4b46-ab98-af04ee07c6d3" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "data = await get_json(f'/api/export/books/{QUEPID_BOOK_NUM}')", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 5, | ||
"outputs": [], | ||
"id": "8fef6231-daa8-467f-ac57-13a144e8a356" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 2: Extract and Prepare Data", | ||
"metadata": {}, | ||
"id": "79d985ad-cd11-44a9-a7e1-0851bc99aef3" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "# Initialize a list to hold the tuples of (doc_id, rating, count)\nratings_data = []\n\n# Iterate through each query-doc pair\nfor pair in data['query_doc_pairs']:\n # Initialize a dictionary to count the ratings for this pair\n ratings_count = defaultdict(int)\n \n # Extract judgements and count the ratings\n for judgement in pair['judgements']:\n rating = judgement['rating']\n ratings_count[rating] += 1\n\n # Append the counts to the ratings_data list\n for rating, count in ratings_count.items():\n ratings_data.append((pair['doc_id'], rating, count))\n", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 6, | ||
"outputs": [], | ||
"id": "9a8561fd-2dbf-477e-9ac1-4df6d5ebdc91" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 3: Aggregate Raters' Data", | ||
"metadata": {}, | ||
"id": "caf5632b-132a-4e1b-80fe-c8c5ab7f2f3a" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "# Convert ratings_data to a DataFrame\ndf = pd.DataFrame(ratings_data, columns=['doc_id', 'rating', 'count'])\n\n# Use crosstab to create a contingency table\ndata_crosstab = pd.crosstab(index=df['doc_id'], columns=df['rating'], values=df['count'], aggfunc='sum')\n\n# Drop any rows missing judgements\ndata_crosstab = data_crosstab.dropna(how='any')\n\n# Convert the DataFrame to the format expected by aggregate_raters\ndata_for_aggregation = data_crosstab.values\n\n# Aggregate the raters' data\ntable, _ = aggregate_raters(data_for_aggregation)", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 7, | ||
"outputs": [], | ||
"id": "a7598308-129b-4628-ad3a-fc3d703f8205" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "## Step 4: Compute Fleiss' Kappa", | ||
"metadata": {}, | ||
"id": "25c79fbc" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "kappa = fleiss_kappa(table, method='fleiss')\ndisplay(Markdown(f\"## Fleiss' Kappa: {kappa:.4f}\"))", | ||
"metadata": { | ||
"trusted": true | ||
}, | ||
"execution_count": 8, | ||
"outputs": [ | ||
{ | ||
"output_type": "display_data", | ||
"data": { | ||
"text/plain": "<IPython.core.display.Markdown object>", | ||
"text/markdown": "## Fleiss' Kappa: -0.3333" | ||
}, | ||
"metadata": {} | ||
} | ||
], | ||
"id": "25a613f9" | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": "_This notebook was last updated 17-FEB-2024_", | ||
"metadata": {}, | ||
"id": "5704579e-2321-4629-8de0-6608b428e2b6" | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": "", | ||
"metadata": {}, | ||
"execution_count": null, | ||
"outputs": [], | ||
"id": "7203f6cc-c068-4f75-a59a-1f49c5555319" | ||
} | ||
] | ||
} |
Oops, something went wrong.