fix a couple of notebook bugs

o19s · Jan 16, 2025 · 84d89eb · 84d89eb
1 parent 5c6665a
commit 84d89eb
Show file tree

Hide file tree

Showing 4 changed files with 3,687 additions and 981 deletions.
diff --git a/jupyterlite/files/examples/Fleiss Kappa.ipynb b/jupyterlite/files/examples/Fleiss Kappa.ipynb
@@ -1,151 +1,217 @@
 {
-  "metadata": {
-    "kernelspec": {
-      "name": "python",
-      "display_name": "Python (Pyodide)",
-      "language": "python"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "python",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.8"
-    }
-  },
-  "nbformat_minor": 5,
-  "nbformat": 4,
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": "# Fleiss' Kappa \nTo understand how much your judges agree with each other.  It is meant to be used with more than two judges.\n\nRead https://www.datanovia.com/en/blog/kappa-coefficient-interpretation/ to learn more.\n\nPlease copy this example and customize it for your own purposes!",
-      "metadata": {},
-      "id": "bd7e4efa-eb00-451e-984d-ed6646d8e25f"
-    },
-    {
-      "cell_type": "markdown",
-      "source": "## Imports",
-      "metadata": {},
-      "id": "e3412382"
-    },
-    {
-      "cell_type": "code",
-      "source": "import pandas as pd\nfrom js import fetch\nimport json\n\nfrom collections import defaultdict\nfrom statsmodels.stats.inter_rater import aggregate_raters\nfrom statsmodels.stats.inter_rater import fleiss_kappa\nfrom IPython.display import display, Markdown",
-      "metadata": {
-        "trusted": true
-      },
-      "execution_count": 1,
-      "outputs": [],
-      "id": "4972936a"
-    },
-    {
-      "cell_type": "markdown",
-      "source": "## Step 0: Configuration",
-      "metadata": {},
-      "id": "6da26c5e"
-    },
-    {
-      "cell_type": "code",
-      "source": "QUEPID_BOOK_NUM = 25",
-      "metadata": {
-        "trusted": true
-      },
-      "execution_count": 2,
-      "outputs": [],
-      "id": "71803a49-4065-4adf-a69e-cb0fe2d00f22"
-    },
-    {
-      "cell_type": "markdown",
-      "source": "## Step 1: Download the Quepid Book",
-      "metadata": {},
-      "id": "420416df-9e6a-41b4-987b-7a03c9dd38b3"
-    },
-    {
-      "cell_type": "code",
-      "source": "# Generic GET call to a JSON endpoint \nasync def get_json(url):\n    resp = await fetch(url)\n    resp_text = await resp.text()\n    return json.loads(resp_text)\n\n",
-      "metadata": {
-        "trusted": true
-      },
-      "execution_count": 3,
-      "outputs": [],
-      "id": "31193536-98eb-4b46-ab98-af04ee07c6d3"
-    },
-    {
-      "cell_type": "code",
-      "source": "data = await get_json(f'/api/export/books/{QUEPID_BOOK_NUM}')",
-      "metadata": {
-        "trusted": true
-      },
-      "execution_count": null,
-      "outputs": [],
-      "id": "8fef6231-daa8-467f-ac57-13a144e8a356"
-    },
-    {
-      "cell_type": "markdown",
-      "source": "## Step 2: Extract and Prepare Data",
-      "metadata": {},
-      "id": "79d985ad-cd11-44a9-a7e1-0851bc99aef3"
-    },
-    {
-      "cell_type": "code",
-      "source": "# Initialize a list to hold the tuples of (doc_id, rating, count)\nratings_data = []\n\n# Iterate through each query-doc pair\nfor pair in data['query_doc_pairs']:\n    # Initialize a dictionary to count the ratings for this pair\n    ratings_count = defaultdict(int)\n    \n    # Extract judgements and count the ratings\n    for judgement in pair['judgements']:\n        rating = judgement['rating']\n        ratings_count[rating] += 1\n\n    # Append the counts to the ratings_data list\n    for rating, count in ratings_count.items():\n        ratings_data.append((pair['doc_id'], rating, count))\n",
-      "metadata": {
-        "trusted": true
-      },
-      "execution_count": null,
-      "outputs": [],
-      "id": "9a8561fd-2dbf-477e-9ac1-4df6d5ebdc91"
-    },
-    {
-      "cell_type": "markdown",
-      "source": "## Step 3: Aggregate Raters' Data",
-      "metadata": {},
-      "id": "caf5632b-132a-4e1b-80fe-c8c5ab7f2f3a"
-    },
-    {
-      "cell_type": "code",
-      "source": "# Convert ratings_data to a DataFrame\ndf = pd.DataFrame(ratings_data, columns=['doc_id', 'rating', 'count'])\n\n# Use crosstab to create a contingency table\ndata_crosstab = pd.crosstab(index=df['doc_id'], columns=df['rating'], values=df['count'], aggfunc='sum')\n\n# Drop any rows missing judgements\ndata_crosstab = data_crosstab.dropna(how='any')\n\n# Convert the DataFrame to the format expected by aggregate_raters\ndata_for_aggregation = data_crosstab.values\n\n# Aggregate the raters' data\ntable, _ = aggregate_raters(data_for_aggregation)",
-      "metadata": {
-        "trusted": true
-      },
-      "execution_count": null,
-      "outputs": [],
-      "id": "a7598308-129b-4628-ad3a-fc3d703f8205"
-    },
-    {
-      "cell_type": "markdown",
-      "source": "## Step 4: Compute Fleiss' Kappa",
-      "metadata": {},
-      "id": "25c79fbc"
-    },
-    {
-      "cell_type": "code",
-      "source": "kappa = fleiss_kappa(table, method='fleiss')\ndisplay(Markdown(f\"## Fleiss' Kappa: {kappa:.4f}\"))",
-      "metadata": {
-        "trusted": true
-      },
-      "execution_count": null,
-      "outputs": [],
-      "id": "25a613f9"
-    },
-    {
-      "cell_type": "markdown",
-      "source": "_This notebook was last updated 19-FEB-2024_",
-      "metadata": {},
-      "id": "5704579e-2321-4629-8de0-6608b428e2b6"
-    },
-    {
-      "cell_type": "code",
-      "source": "",
-      "metadata": {},
-      "execution_count": null,
-      "outputs": [],
-      "id": "7203f6cc-c068-4f75-a59a-1f49c5555319"
-    }
-  ]
-}
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "bd7e4efa-eb00-451e-984d-ed6646d8e25f",
+   "metadata": {},
+   "source": [
+    "# Fleiss' Kappa \n",
+    "To understand how much your judges agree with each other.  It is meant to be used with more than two judges.\n",
+    "\n",
+    "Read https://www.datanovia.com/en/blog/kappa-coefficient-interpretation/ to learn more.\n",
+    "\n",
+    "Please copy this example and customize it for your own purposes!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e3412382",
+   "metadata": {},
+   "source": [
+    "## Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4972936a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from js import fetch\n",
+    "import json\n",
+    "\n",
+    "from collections import defaultdict\n",
+    "from statsmodels.stats.inter_rater import aggregate_raters\n",
+    "from statsmodels.stats.inter_rater import fleiss_kappa\n",
+    "from IPython.display import display, Markdown"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6da26c5e",
+   "metadata": {},
+   "source": [
+    "## Step 0: Configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "71803a49-4065-4adf-a69e-cb0fe2d00f22",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "QUEPID_BOOK_NUM = 25"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "420416df-9e6a-41b4-987b-7a03c9dd38b3",
+   "metadata": {},
+   "source": [
+    "## Step 1: Download the Quepid Book"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "31193536-98eb-4b46-ab98-af04ee07c6d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generic GET call to a JSON endpoint \n",
+    "async def get_json(url):\n",
+    "    resp = await fetch(url)\n",
+    "    resp_text = await resp.text()\n",
+    "    return json.loads(resp_text)\n",
+    "\n",
+    "async def get_text(url):\n",
+    "    resp = await fetch(url)\n",
+    "    resp_text = await resp.text()\n",
+    "    return resp_text\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8fef6231-daa8-467f-ac57-13a144e8a356",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = await get_text(f'/api/books/{QUEPID_BOOK_NUM}.csv')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "79d985ad-cd11-44a9-a7e1-0851bc99aef3",
+   "metadata": {},
+   "source": [
+    "## Step 2: Extract and Prepare Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9a8561fd-2dbf-477e-9ac1-4df6d5ebdc91",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize a list to hold the tuples of (doc_id, rating, count)\n",
+    "ratings_data = []\n",
+    "\n",
+    "# Iterate through each query-doc pair\n",
+    "for pair in data['query_doc_pairs']:\n",
+    "    # Initialize a dictionary to count the ratings for this pair\n",
+    "    ratings_count = defaultdict(int)\n",
+    "    \n",
+    "    # Extract judgements and count the ratings\n",
+    "    for judgement in pair['judgements']:\n",
+    "        rating = judgement['rating']\n",
+    "        ratings_count[rating] += 1\n",
+    "\n",
+    "    # Append the counts to the ratings_data list\n",
+    "    for rating, count in ratings_count.items():\n",
+    "        ratings_data.append((pair['doc_id'], rating, count))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "caf5632b-132a-4e1b-80fe-c8c5ab7f2f3a",
+   "metadata": {},
+   "source": [
+    "## Step 3: Aggregate Raters' Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7598308-129b-4628-ad3a-fc3d703f8205",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Convert ratings_data to a DataFrame\n",
+    "df = pd.DataFrame(ratings_data, columns=['doc_id', 'rating', 'count'])\n",
+    "\n",
+    "# Use crosstab to create a contingency table\n",
+    "data_crosstab = pd.crosstab(index=df['doc_id'], columns=df['rating'], values=df['count'], aggfunc='sum')\n",
+    "\n",
+    "# Drop any rows missing judgements\n",
+    "data_crosstab = data_crosstab.dropna(how='any')\n",
+    "\n",
+    "# Convert the DataFrame to the format expected by aggregate_raters\n",
+    "data_for_aggregation = data_crosstab.values\n",
+    "\n",
+    "# Aggregate the raters' data\n",
+    "table, _ = aggregate_raters(data_for_aggregation)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "25c79fbc",
+   "metadata": {},
+   "source": [
+    "## Step 4: Compute Fleiss' Kappa"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "25a613f9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kappa = fleiss_kappa(table, method='fleiss')\n",
+    "display(Markdown(f\"## Fleiss' Kappa: {kappa:.4f}\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5704579e-2321-4629-8de0-6608b428e2b6",
+   "metadata": {},
+   "source": [
+    "_This notebook was last updated 16_January_2025_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7203f6cc-c068-4f75-a59a-1f49c5555319",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}