From 3a5541d83c1ec20ce3851ec079fadee067794c13 Mon Sep 17 00:00:00 2001 From: Kartik Choudhary Date: Tue, 17 Oct 2023 14:00:14 -0400 Subject: [PATCH] Improvements in HuggingFA QA example notebook (#2391) * Added info about required packages * Update responsibleaidashboard-question-answering-model-debugging.ipynb * show example prediction * Update responsibleaidashboard-question-answering-model-debugging.ipynb --- ...d-question-answering-model-debugging.ipynb | 85 +++++++++++++------ 1 file changed, 58 insertions(+), 27 deletions(-) diff --git a/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb b/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb index 3b663cfc61..dbcb6b8dc9 100644 --- a/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb +++ b/notebooks/responsibleaidashboard/text/responsibleaidashboard-question-answering-model-debugging.ipynb @@ -42,6 +42,31 @@ "The following section examines the code necessary to create datasets and a model. It then generates insights using the `responsibleai` API that can be visually analyzed." ] }, +{ + "cell_type": "markdown", + "id": "6174bcad", + "metadata": {}, + "source": [ + "### Prepare\n", + "\n", + "To run this notebook, we need to install the following packages:\n", + "\n", + "```\n", + "raiutils\n", + "raiwidgets\n", + "datasets\n", + "transformers\n", + "responsibleai_text\n", + "torch\n", + "```\n", + "\n", + "Run the following command to load the spacy pipeline:\n", + "\n", + "```bash\n", + "python -m spacy download en_core_web_sm\n", + "```" + ] + }, { "cell_type": "markdown", "id": "40739025", @@ -86,16 +111,7 @@ "metadata": {}, "outputs": [], "source": [ - "dataset = datasets.load_dataset(\"squad\", split=\"train\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a0eef443", - "metadata": {}, - "outputs": [], - "source": [ + "dataset = datasets.load_dataset(\"squad\", split=\"train\")\n", "dataset" ] }, @@ -130,17 +146,9 @@ "metadata": {}, "outputs": [], "source": [ - "data = pd.DataFrame({'context': context, 'questions': questions, 'answers': answers})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e6f87e9c", - "metadata": {}, - "outputs": [], - "source": [ - "data" + "data = pd.DataFrame({'context': context, 'questions': questions, 'answers': answers})\n", + "data = data.sample(frac=1.0, random_state=42).reset_index(drop=True)\n", + "data.head()" ] }, { @@ -159,18 +167,42 @@ "outputs": [], "source": [ "# load the question-answering model\n", - "pmodel = pipeline('question-answering')" + "pipeline_model = pipeline('question-answering')\n", + "test_size = 5\n", + "\n", + "train_data = data\n", + "test_data = data[:test_size]" + ] + }, + { + "cell_type": "markdown", + "id": "7cf8327b", + "metadata": {}, + "source": [ + "See an example of the model's predictions" ] }, { "cell_type": "code", "execution_count": null, - "id": "04801887", + "id": "ce087699", "metadata": {}, "outputs": [], "source": [ - "train_data = data\n", - "test_data = data[:5]" + "def get_answer(dataset, idx):\n", + " model_output = pipeline_model(question=dataset['questions'][idx], \n", + " context=dataset['context'][idx])\n", + " pred = model_output['answer']\n", + " return pred\n", + "\n", + "def check_answer(dataset, idx):\n", + " pred = get_answer(dataset, idx)\n", + " print('Question : ', dataset['questions'][idx])\n", + " print('Answer : ', dataset['answers'][idx])\n", + " print('Predicted : ', pred)\n", + " print('Correct : ', pred == dataset['answers'][idx])\n", + "\n", + "check_answer(test_data, 0)\n" ] }, { @@ -209,8 +241,7 @@ "metadata": {}, "outputs": [], "source": [ - "rai_insights = RAITextInsights(pmodel, test_data,\n", - " \"answers\",\n", + "rai_insights = RAITextInsights(pipeline_model, test_data, \"answers\",\n", " task_type=ModelTask.QUESTION_ANSWERING)" ] },