diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index b45f16b6..a9132aac 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -69,8 +69,11 @@ jobs:
           pip install bandit[toml] pycodestyle pylint black==22.12.0
 
       - name: Run pycodestyle
+        # E203: pycodestyle is a little too rigid about slices & whitespace
+        #  See https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#slices
+        # W503: a default ignore that we are restoring
         run: |
-          pycodestyle --max-line-length=120 .
+          pycodestyle --max-line-length=120 --ignore=E203,W503 .
 
       - name: Run pylint
         if: success() || failure() # still run pylint if above checks fail
diff --git a/compose.yaml b/compose.yaml
index e598087a..ff6038c5 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -51,12 +51,15 @@ services:
   ctakes-covid:
     extends: ctakes-covid-base
     profiles:
+      - chart-review
+      - chart-review-gpu
       - covid-symptom
       - covid-symptom-gpu
 
   cnlpt-negation:
     image: smartonfhir/cnlp-transformers:negation-0.6.1-cpu
     profiles:
+      - chart-review
       - covid-symptom
     networks:
       - cumulus-etl
@@ -64,6 +67,7 @@ services:
   cnlpt-negation-gpu:
     image: smartonfhir/cnlp-transformers:negation-0.6.1-gpu
     profiles:
+      - chart-review-gpu
       - covid-symptom-gpu
     networks:
       - cumulus-etl
diff --git a/cumulus_etl/chart_review/cli.py b/cumulus_etl/chart_review/cli.py
index a30f910b..cb7f1c33 100644
--- a/cumulus_etl/chart_review/cli.py
+++ b/cumulus_etl/chart_review/cli.py
@@ -20,7 +20,7 @@ def init_checks(args: argparse.Namespace):
 
     if args.nlp:
         nlp.check_ctakes()
-        nlp.check_cnlpt()
+        nlp.check_negation_cnlpt()
 
     if not cli_utils.is_url_available(args.label_studio_url, retry=False):
         errors.fatal(
@@ -80,8 +80,18 @@ async def read_notes_from_ndjson(
         anon_enc_id = codebook.fake_id("Encounter", enc_id)
         doc_id = docrefs[i]["id"]
         doc_mappings = {doc_id: codebook.fake_id("DocumentReference", doc_id)}
-
-        notes.append(LabelStudioNote(enc_id, anon_enc_id, doc_mappings, title, text))
+        doc_spans = {doc_id: (0, len(text))}
+
+        notes.append(
+            LabelStudioNote(
+                enc_id,
+                anon_enc_id,
+                doc_mappings=doc_mappings,
+                doc_spans=doc_spans,
+                title=title,
+                text=text,
+            )
+        )
 
     return notes
 
@@ -101,7 +111,12 @@ async def run_nlp(notes: Collection[LabelStudioNote], args: argparse.Namespace)
         ctakes_json = await ctakesclient.client.extract(note.text, client=http_client)
         matches = ctakes_json.list_match(polarity=Polarity.pos)
         spans = ctakes_json.list_spans(matches)
-        cnlpt_results = await ctakesclient.transformer.list_polarity(note.text, spans, client=http_client)
+        cnlpt_results = await ctakesclient.transformer.list_polarity(
+            note.text,
+            spans,
+            client=http_client,
+            model=nlp.TransformerModel.NEGATION,
+        )
         note.matches = [match for i, match in enumerate(matches) if cnlpt_results[i] == Polarity.pos]
 
 
@@ -133,6 +148,7 @@ def group_notes_by_encounter(notes: Collection[LabelStudioNote]) -> list[LabelSt
         grouped_text = ""
         grouped_matches = []
         grouped_doc_mappings = {}
+        grouped_doc_spans = {}
 
         for note in enc_notes:
             grouped_doc_mappings.update(note.doc_mappings)
@@ -145,14 +161,24 @@ def group_notes_by_encounter(notes: Collection[LabelStudioNote]) -> list[LabelSt
             offset = len(grouped_text)
             grouped_text += note.text
 
+            offset_doc_spans = {k: (v[0] + offset, v[1] + offset) for k, v in note.doc_spans.items()}
+            grouped_doc_spans.update(offset_doc_spans)
+
             for match in note.matches:
                 match.begin += offset
                 match.end += offset
                 grouped_matches.append(match)
 
-        grouped_note = LabelStudioNote(enc_id, enc_notes[0].anon_id, grouped_doc_mappings, "", grouped_text)
-        grouped_note.matches = grouped_matches
-        grouped_notes.append(grouped_note)
+        grouped_notes.append(
+            LabelStudioNote(
+                enc_id,
+                enc_notes[0].anon_id,
+                text=grouped_text,
+                doc_mappings=grouped_doc_mappings,
+                doc_spans=grouped_doc_spans,
+                matches=grouped_matches,
+            )
+        )
 
     return grouped_notes
 
diff --git a/cumulus_etl/chart_review/downloader.py b/cumulus_etl/chart_review/downloader.py
index cb6a0c40..27f3cb90 100644
--- a/cumulus_etl/chart_review/downloader.py
+++ b/cumulus_etl/chart_review/downloader.py
@@ -68,7 +68,7 @@ async def _download_docrefs_from_real_ids(
 
     # Grab identifiers for which specific docrefs we need
     with common.read_csv(docref_csv) as reader:
-        docref_ids = {row["docref_id"] for row in reader}
+        docref_ids = sorted({row["docref_id"] for row in reader})
 
     # Kick off a bunch of requests to the FHIR server for these documents
     coroutines = [_request_docref(client, docref_id) for docref_id in docref_ids]
diff --git a/cumulus_etl/chart_review/labelstudio.py b/cumulus_etl/chart_review/labelstudio.py
index a7939b94..e34686aa 100644
--- a/cumulus_etl/chart_review/labelstudio.py
+++ b/cumulus_etl/chart_review/labelstudio.py
@@ -1,5 +1,6 @@
 """LabelStudio document annotation"""
 
+import dataclasses
 from collections.abc import Collection, Iterable
 
 import ctakesclient.typesystem
@@ -17,14 +18,26 @@
 ###############################################################################
 
 
+@dataclasses.dataclass
 class LabelStudioNote:
-    def __init__(self, enc_id: str, anon_id: str, doc_mappings: dict[str, str], title: str, text: str):
-        self.enc_id = enc_id
-        self.anon_id = anon_id
-        self.doc_mappings = doc_mappings
-        self.title = title
-        self.text = text
-        self.matches: list[ctakesclient.typesystem.MatchText] = []
+    """Holds all the data that Label Studio will need for a single note (or a single grouped encounter note)"""
+
+    enc_id: str  # real Encounter ID
+    anon_id: str  # anonymized Encounter ID
+    text: str = ""  # text of the note, sent to Label Studio
+
+    # A title is only used when combining notes into one big encounter note. It's not sent to Label Studio.
+    title: str = ""
+
+    # Doc mappings is a dict of real DocRef ID -> anonymized DocRef ID of all contained notes, in order
+    doc_mappings: dict[str, str] = dataclasses.field(default_factory=dict)
+
+    # Doc spans indicate which bits of the text come from which DocRef - it will map real DocRef ID to a pair of
+    # "first character" (0-based) and "last character" (0-based, exclusive) - just like cTAKES match text spans.
+    doc_spans: dict[str, tuple[int, int]] = dataclasses.field(default_factory=dict)
+
+    # Matches found by cTAKES
+    matches: list[ctakesclient.typesystem.MatchText] = dataclasses.field(default_factory=list)
 
 
 class LabelStudioClient:
@@ -84,6 +97,7 @@ def _format_task_for_note(self, note: LabelStudioNote) -> dict:
                 "enc_id": note.enc_id,
                 "anon_id": note.anon_id,
                 "docref_mappings": note.doc_mappings,
+                "docref_spans": {k: list(v) for k, v in note.doc_spans.items()},  # json doesn't natively have tuples
             },
         }
 
diff --git a/cumulus_etl/nlp/__init__.py b/cumulus_etl/nlp/__init__.py
index 541fa689..783a16a7 100644
--- a/cumulus_etl/nlp/__init__.py
+++ b/cumulus_etl/nlp/__init__.py
@@ -1,6 +1,6 @@
 """Support code for NLP servers"""
 
-from .extract import ctakes_extract, ctakes_httpx_client, list_polarity
+from .extract import TransformerModel, ctakes_extract, ctakes_httpx_client, list_polarity
 from .huggingface import hf_prompt, hf_info, llama2_prompt
 from .utils import cache_wrapper, is_docref_valid
 from .watcher import check_negation_cnlpt, check_term_exists_cnlpt, check_ctakes, restart_ctakes_with_bsv
diff --git a/docs/chart-review.md b/docs/chart-review.md
index 336fbda2..0392aab6 100644
--- a/docs/chart-review.md
+++ b/docs/chart-review.md
@@ -17,7 +17,9 @@ Along the way, it can mark the note with NLP results and/or anonymize the note w
 This is useful for not just actual chart reviews, but also for developing a custom NLP dictionary.
 You can feed Cumulus ETL a custom NLP dictionary, review how it performs, and iterate upon it.
 
-## Preliminary Label Studio Setup
+## Preliminaries
+
+### Label Studio Setup
 
 This guide assumes you already have a local instance of Label Studio running.
 They offer Docker images and reasonable
@@ -27,6 +29,19 @@ If you haven't set that up yet, go do that and come back.
 The Cumulus team can help you with setting it up if you come talk to us,
 but the rest of this guide will mostly deal with chart review mode itself.
 
+### Dependent Services
+
+Some features of chart review mode need external services (like cTAKES to run NLP).
+Launch those before you begin using chart review:
+
+```shell
+export UMLS_API_KEY=your-umls-api-key
+docker compose --profile chart-review up -d
+```
+
+Or if you have access to a GPU,
+you can speed up the NLP by launching the GPU profile instead with `--profile chart-review-gpu`.
+
 ## Basic Operation
 
 At its core, chart review mode is just another ETL (extract, transform, load) operation.
diff --git a/tests/chart_review/__init__.py b/tests/chart_review/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_chart_cli.py b/tests/chart_review/test_chart_cli.py
similarity index 86%
rename from tests/test_chart_cli.py
rename to tests/chart_review/test_chart_cli.py
index 989cedfb..38b5bd43 100644
--- a/tests/test_chart_cli.py
+++ b/tests/chart_review/test_chart_cli.py
@@ -105,7 +105,7 @@ async def run_chart_review(
         await cli.main(args)
 
     @staticmethod
-    def make_docref(doc_id: str, text: str = None, content: list[dict] = None) -> dict:
+    def make_docref(doc_id: str, text: str = None, content: list[dict] = None, enc_id: str = None) -> dict:
         if content is None:
             text = text or "What's up doc?"
             content = [
@@ -117,11 +117,12 @@ def make_docref(doc_id: str, text: str = None, content: list[dict] = None) -> di
                 }
             ]
 
+        enc_id = enc_id or f"enc-{doc_id}"
         return {
             "resourceType": "DocumentReference",
             "id": doc_id,
             "content": content,
-            "context": {"encounter": [{"reference": f"Encounter/enc-{doc_id}"}]},
+            "context": {"encounter": [{"reference": f"Encounter/{enc_id}"}]},
         }
 
     @staticmethod
@@ -332,9 +333,7 @@ async def test_disabled_nlp(self):
 
     @ddt.data(True, False)
     async def test_philter(self, run_philter):
-        notes = [
-            LabelStudioNote("EncID", "EncAnon", {"DocID": "DocAnon"}, "My Title", "John Smith called on 10/13/2010")
-        ]
+        notes = [LabelStudioNote("EncID", "EncAnon", title="My Title", text="John Smith called on 10/13/2010")]
         with mock.patch("cumulus_etl.chart_review.cli.read_notes_from_ndjson", return_value=notes):
             await self.run_chart_review(philter=run_philter)
 
@@ -350,3 +349,44 @@ async def test_philter(self, run_philter):
         else:
             expected_text = "John Smith called on 10/13/2010"
         self.assertEqual(self.wrap_note("My Title", expected_text), task.text)
+
+    @respx.mock(assert_all_mocked=False)
+    async def test_combined_encounter_offsets(self, respx_mock):
+        # use server notes just for ease of making fake ones
+        self.mock_read_url(respx_mock, "D1", enc_id="43")
+        self.mock_read_url(respx_mock, "D2", enc_id="43")
+        respx_mock.post(os.environ["URL_CTAKES_REST"]).pass_through()  # ignore cTAKES
+
+        with tempfile.NamedTemporaryFile() as file:
+            self.write_real_docrefs(file.name, ["D1", "D2"])
+            await self.run_chart_review(input_path="https://localhost", docrefs=file.name)
+
+        notes = self.ls_client.push_tasks.call_args[0][0]
+        self.assertEqual(1, len(notes))
+        note = notes[0]
+
+        # Did we mark that both IDs occur in one note correctly?
+        self.assertEqual({"D1": ANON_D1, "D2": ANON_D2}, note.doc_mappings)
+
+        # Did we mark the internal docref spans correctly?
+        first_span = (93, 107)
+        second_span = (285, 299)
+        self.assertEqual("What's up doc?", note.text[first_span[0] : first_span[1]])
+        self.assertEqual("What's up doc?", note.text[second_span[0] : second_span[1]])
+        self.assertEqual({"D1": first_span, "D2": second_span}, note.doc_spans)
+
+        # Did we edit cTAKES results correctly?
+        match1a = (93, 99)
+        match1b = (100, 102)
+        match1c = (103, 107)
+        match2a = (285, 291)
+        match2b = (292, 294)
+        match2c = (295, 299)
+        self.assertEqual("What's", note.text[match1a[0] : match1a[1]])
+        self.assertEqual("up", note.text[match1b[0] : match1b[1]])
+        self.assertEqual("doc?", note.text[match1c[0] : match1c[1]])
+        self.assertEqual("What's", note.text[match2a[0] : match2a[1]])
+        self.assertEqual("up", note.text[match2b[0] : match2b[1]])
+        self.assertEqual("doc?", note.text[match2c[0] : match2c[1]])
+        spans = {x.span().key() for x in note.matches}
+        self.assertEqual({match1a, match1b, match1c, match2a, match2b, match2c}, spans)
diff --git a/tests/test_chart_labelstudio.py b/tests/chart_review/test_chart_labelstudio.py
similarity index 93%
rename from tests/test_chart_labelstudio.py
rename to tests/chart_review/test_chart_labelstudio.py
index 5c59a882..a7eee71b 100644
--- a/tests/test_chart_labelstudio.py
+++ b/tests/chart_review/test_chart_labelstudio.py
@@ -26,7 +26,14 @@ def setUp(self):
 
     @staticmethod
     def make_note(*, enc_id: str = "enc", matches: bool = True) -> LabelStudioNote:
-        note = LabelStudioNote(enc_id, "enc-anon", {"doc": "doc-anon"}, "Ignored Title", "Normal note text")
+        text = "Normal note text"
+        note = LabelStudioNote(
+            enc_id,
+            "enc-anon",
+            doc_mappings={"doc": "doc-anon"},
+            doc_spans={"doc": (0, len(text))},
+            text=text,
+        )
         if matches:
             note.matches = ctakesmock.fake_ctakes_extract(note.text).list_match(polarity=Polarity.pos)
         return note
@@ -61,6 +68,7 @@ def test_basic_push(self):
                     "enc_id": "enc",
                     "anon_id": "enc-anon",
                     "docref_mappings": {"doc": "doc-anon"},
+                    "docref_spans": {"doc": [0, 16]},
                 },
                 "predictions": [
                     {
@@ -109,6 +117,7 @@ def test_no_matches(self):
                     "enc_id": "enc",
                     "anon_id": "enc-anon",
                     "docref_mappings": {"doc": "doc-anon"},
+                    "docref_spans": {"doc": [0, 16]},
                 },
                 "predictions": [
                     {
@@ -132,6 +141,7 @@ def test_dynamic_labels(self, label_type):
                 "enc_id": "enc",
                 "anon_id": "enc-anon",
                 "docref_mappings": {"doc": "doc-anon"},
+                "docref_spans": {"doc": [0, 16]},
                 "mylabel": [
                     {"value": "Itch"},
                     {"value": "Nausea"},
@@ -151,6 +161,7 @@ def test_dynamic_labels_no_matches(self):
                 "enc_id": "enc",
                 "anon_id": "enc-anon",
                 "docref_mappings": {"doc": "doc-anon"},
+                "docref_spans": {"doc": [0, 16]},
                 "mylabel": [],  # this needs to be sent, or the server will complain
             },
             self.get_pushed_task()["data"],