-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: hand symptoms to cTAKES in a format it expects
Ever since the March 17th, 2023 commit that fed cTAKES a custom bsv file, our COVID NLP performance has taken a hit (unbeknownst to us). cTAKES actually expects a slightly different format for symptoms bsv files, which we now enforce as we send it files. This gets our COVID NLP performance on a standard BCH set of 200-odd Cerner html notes from 0.784 to 0.813 F1. This commit bumps the covid tasks' task_format from 3 to 4.
- Loading branch information
Showing
6 changed files
with
52 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
8 changes: 4 additions & 4 deletions
8
tests/data/covid/output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
{"id": "c31a3dbf188ed241b2c06b2475cd56159017fa1df1ea882d3fc4beab860fc24d.0", "docref_id": "c31a3dbf188ed241b2c06b2475cd56159017fa1df1ea882d3fc4beab860fc24d", "encounter_id": "b3d0707624491d8b71a808bd20b63625981af48f526b95214146de2a15f7dd43", "subject_id": "00680c7c0e2e1712e9c4a01eb5c6dfb8949871faef6337c5db204d19e1d9ca58", "generated_on": "2021-09-14T21:23:45+00:00", "task_version": 3, "match": {"begin": 6, "end": 9, "text": "for", "polarity": 0, "conceptAttributes": [{"code": "386661006", "cui": "C0015967", "codingScheme": "SNOMEDCT_US", "tui": "T184"}, {"code": "50177009", "cui": "C0015967", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}} | ||
{"id": "c31a3dbf188ed241b2c06b2475cd56159017fa1df1ea882d3fc4beab860fc24d.1", "docref_id": "c31a3dbf188ed241b2c06b2475cd56159017fa1df1ea882d3fc4beab860fc24d", "encounter_id": "b3d0707624491d8b71a808bd20b63625981af48f526b95214146de2a15f7dd43", "subject_id": "00680c7c0e2e1712e9c4a01eb5c6dfb8949871faef6337c5db204d19e1d9ca58", "generated_on": "2021-09-14T21:23:45+00:00", "task_version": 3, "match": {"begin": 6, "end": 9, "text": "for", "polarity": 0, "conceptAttributes": [{"code": "422587007", "cui": "C0027497", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}} | ||
{"id": "eb30741bbb9395fc3da72d02fd29b96e2e4c0c2592c3ae997d80bf522c80070e.0", "docref_id": "eb30741bbb9395fc3da72d02fd29b96e2e4c0c2592c3ae997d80bf522c80070e", "encounter_id": "58a65c6cc5693a507af44f25f062171898aa6bc469766956b2c802d39fc6d4a7", "subject_id": "84cc1e7381070fda74a80df28a29323101be3b2c26b4d604abf43946ab1759f6", "generated_on": "2021-09-14T21:23:45+00:00", "task_version": 3, "match": {"begin": 7, "end": 10, "text": "for", "polarity": 0, "conceptAttributes": [{"code": "386661006", "cui": "C0015967", "codingScheme": "SNOMEDCT_US", "tui": "T184"}, {"code": "50177009", "cui": "C0015967", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}} | ||
{"id": "eb30741bbb9395fc3da72d02fd29b96e2e4c0c2592c3ae997d80bf522c80070e.1", "docref_id": "eb30741bbb9395fc3da72d02fd29b96e2e4c0c2592c3ae997d80bf522c80070e", "encounter_id": "58a65c6cc5693a507af44f25f062171898aa6bc469766956b2c802d39fc6d4a7", "subject_id": "84cc1e7381070fda74a80df28a29323101be3b2c26b4d604abf43946ab1759f6", "generated_on": "2021-09-14T21:23:45+00:00", "task_version": 3, "match": {"begin": 7, "end": 10, "text": "for", "polarity": 0, "conceptAttributes": [{"code": "422587007", "cui": "C0027497", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}} | ||
{"id": "c31a3dbf188ed241b2c06b2475cd56159017fa1df1ea882d3fc4beab860fc24d.0", "docref_id": "c31a3dbf188ed241b2c06b2475cd56159017fa1df1ea882d3fc4beab860fc24d", "encounter_id": "b3d0707624491d8b71a808bd20b63625981af48f526b95214146de2a15f7dd43", "subject_id": "00680c7c0e2e1712e9c4a01eb5c6dfb8949871faef6337c5db204d19e1d9ca58", "generated_on": "2021-09-14T21:23:45+00:00", "task_version": 4, "match": {"begin": 6, "end": 9, "text": "for", "polarity": 0, "conceptAttributes": [{"code": "386661006", "cui": "C0015967", "codingScheme": "SNOMEDCT_US", "tui": "T184"}, {"code": "50177009", "cui": "C0015967", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}} | ||
{"id": "c31a3dbf188ed241b2c06b2475cd56159017fa1df1ea882d3fc4beab860fc24d.1", "docref_id": "c31a3dbf188ed241b2c06b2475cd56159017fa1df1ea882d3fc4beab860fc24d", "encounter_id": "b3d0707624491d8b71a808bd20b63625981af48f526b95214146de2a15f7dd43", "subject_id": "00680c7c0e2e1712e9c4a01eb5c6dfb8949871faef6337c5db204d19e1d9ca58", "generated_on": "2021-09-14T21:23:45+00:00", "task_version": 4, "match": {"begin": 6, "end": 9, "text": "for", "polarity": 0, "conceptAttributes": [{"code": "422587007", "cui": "C0027497", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}} | ||
{"id": "eb30741bbb9395fc3da72d02fd29b96e2e4c0c2592c3ae997d80bf522c80070e.0", "docref_id": "eb30741bbb9395fc3da72d02fd29b96e2e4c0c2592c3ae997d80bf522c80070e", "encounter_id": "58a65c6cc5693a507af44f25f062171898aa6bc469766956b2c802d39fc6d4a7", "subject_id": "84cc1e7381070fda74a80df28a29323101be3b2c26b4d604abf43946ab1759f6", "generated_on": "2021-09-14T21:23:45+00:00", "task_version": 4, "match": {"begin": 7, "end": 10, "text": "for", "polarity": 0, "conceptAttributes": [{"code": "386661006", "cui": "C0015967", "codingScheme": "SNOMEDCT_US", "tui": "T184"}, {"code": "50177009", "cui": "C0015967", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}} | ||
{"id": "eb30741bbb9395fc3da72d02fd29b96e2e4c0c2592c3ae997d80bf522c80070e.1", "docref_id": "eb30741bbb9395fc3da72d02fd29b96e2e4c0c2592c3ae997d80bf522c80070e", "encounter_id": "58a65c6cc5693a507af44f25f062171898aa6bc469766956b2c802d39fc6d4a7", "subject_id": "84cc1e7381070fda74a80df28a29323101be3b2c26b4d604abf43946ab1759f6", "generated_on": "2021-09-14T21:23:45+00:00", "task_version": 4, "match": {"begin": 7, "end": 10, "text": "for", "polarity": 0, "conceptAttributes": [{"code": "422587007", "cui": "C0027497", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}} |
4 changes: 2 additions & 2 deletions
4
.../covid_symptom__nlp_results_term_exists/covid_symptom__nlp_results_term_exists.000.ndjson
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
{"id": "c31a3dbf188ed241b2c06b2475cd56159017fa1df1ea882d3fc4beab860fc24d.1", "docref_id": "c31a3dbf188ed241b2c06b2475cd56159017fa1df1ea882d3fc4beab860fc24d", "encounter_id": "b3d0707624491d8b71a808bd20b63625981af48f526b95214146de2a15f7dd43", "subject_id": "00680c7c0e2e1712e9c4a01eb5c6dfb8949871faef6337c5db204d19e1d9ca58", "generated_on": "2021-09-14T21:23:45+00:00", "task_version": 3, "match": {"begin": 6, "end": 9, "text": "for", "polarity": 0, "conceptAttributes": [{"code": "422587007", "cui": "C0027497", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}} | ||
{"id": "eb30741bbb9395fc3da72d02fd29b96e2e4c0c2592c3ae997d80bf522c80070e.1", "docref_id": "eb30741bbb9395fc3da72d02fd29b96e2e4c0c2592c3ae997d80bf522c80070e", "encounter_id": "58a65c6cc5693a507af44f25f062171898aa6bc469766956b2c802d39fc6d4a7", "subject_id": "84cc1e7381070fda74a80df28a29323101be3b2c26b4d604abf43946ab1759f6", "generated_on": "2021-09-14T21:23:45+00:00", "task_version": 3, "match": {"begin": 7, "end": 10, "text": "for", "polarity": 0, "conceptAttributes": [{"code": "422587007", "cui": "C0027497", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}} | ||
{"id": "c31a3dbf188ed241b2c06b2475cd56159017fa1df1ea882d3fc4beab860fc24d.1", "docref_id": "c31a3dbf188ed241b2c06b2475cd56159017fa1df1ea882d3fc4beab860fc24d", "encounter_id": "b3d0707624491d8b71a808bd20b63625981af48f526b95214146de2a15f7dd43", "subject_id": "00680c7c0e2e1712e9c4a01eb5c6dfb8949871faef6337c5db204d19e1d9ca58", "generated_on": "2021-09-14T21:23:45+00:00", "task_version": 4, "match": {"begin": 6, "end": 9, "text": "for", "polarity": 0, "conceptAttributes": [{"code": "422587007", "cui": "C0027497", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}} | ||
{"id": "eb30741bbb9395fc3da72d02fd29b96e2e4c0c2592c3ae997d80bf522c80070e.1", "docref_id": "eb30741bbb9395fc3da72d02fd29b96e2e4c0c2592c3ae997d80bf522c80070e", "encounter_id": "58a65c6cc5693a507af44f25f062171898aa6bc469766956b2c802d39fc6d4a7", "subject_id": "84cc1e7381070fda74a80df28a29323101be3b2c26b4d604abf43946ab1759f6", "generated_on": "2021-09-14T21:23:45+00:00", "task_version": 4, "match": {"begin": 7, "end": 10, "text": "for", "polarity": 0, "conceptAttributes": [{"code": "422587007", "cui": "C0027497", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters