Merge pull request #3637 from zachlindsey/improve_csv_reader_quote_ha…

…ndling_and_separators FIX: Parse commas in CSV fields
nipy · Mar 17, 2024 · 58d4fc7 · 58d4fc7
2 parents f277d18 + f746c34
commit 58d4fc7
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 8 deletions.
diff --git a/nipype/interfaces/utility/csv.py b/nipype/interfaces/utility/csv.py
@@ -2,6 +2,7 @@
 # vi: set ft=python sts=4 ts=4 sw=4 et:
 """CSV Handling utilities
 """
+import csv
 from ..base import traits, TraitedSpec, DynamicTraitedSpec, File, BaseInterface
 from ..io import add_traits
 
@@ -13,6 +14,7 @@ class CSVReaderInputSpec(DynamicTraitedSpec, TraitedSpec):
     header = traits.Bool(
         False, usedefault=True, desc="True if the first line is a column header"
     )
+    delimiter = traits.String(",", usedefault=True, desc="Delimiter to use.")
 
 
 class CSVReader(BaseInterface):
@@ -52,14 +54,11 @@ def _append_entry(self, outputs, entry):
             outputs[key].append(value)
         return outputs
 
-    def _parse_line(self, line):
-        line = line.replace("\n", "")
-        entry = [x.strip() for x in line.split(",")]
-        return entry
-
     def _get_outfields(self):
         with open(self.inputs.in_file) as fid:
-            entry = self._parse_line(fid.readline())
+            reader = csv.reader(fid, delimiter=self.inputs.delimiter)
+
+            entry = next(reader)
             if self.inputs.header:
                 self._outfields = tuple(entry)
             else:
@@ -82,10 +81,10 @@ def _list_outputs(self):
         for key in self._outfields:
             outputs[key] = []  # initialize outfields
         with open(self.inputs.in_file) as fid:
-            for line in fid.readlines():
+            reader = csv.reader(fid, delimiter=self.inputs.delimiter)
+            for entry in reader:
                 if self.inputs.header and isHeader:  # skip header line
                     isHeader = False
                     continue
-                entry = self._parse_line(line)
                 outputs = self._append_entry(outputs, entry)
         return outputs
diff --git a/nipype/interfaces/utility/tests/test_auto_CSVReader.py b/nipype/interfaces/utility/tests/test_auto_CSVReader.py
@@ -4,6 +4,9 @@
 
 def test_CSVReader_inputs():
     input_map = dict(
+        delimiter=dict(
+            usedefault=True,
+        ),
         header=dict(
             usedefault=True,
         ),

diff --git a/nipype/interfaces/utility/tests/test_csv.py b/nipype/interfaces/utility/tests/test_csv.py
@@ -26,3 +26,44 @@ def test_csvReader(tmpdir):
                 assert out.outputs.column_0 == ["foo", "bar", "baz"]
                 assert out.outputs.column_1 == ["hello", "world", "goodbye"]
                 assert out.outputs.column_2 == ["300.1", "5", "0.3"]
+
+
+def test_csvReader_quoted(tmpdir):
+    header = "files,labels,erosion\n"
+    lines = ['foo,"hello, world",300.1\n']
+
+    name = tmpdir.join("testfile.csv").strpath
+    with open(name, "w") as fid:
+        reader = utility.CSVReader()
+        fid.writelines(lines)
+        fid.flush()
+        reader.inputs.in_file = name
+        out = reader.run()
+
+        assert out.outputs.column_0 == ["foo"]
+        assert out.outputs.column_1 == ["hello, world"]
+        assert out.outputs.column_2 == ["300.1"]
+
+
+def test_csvReader_tabs(tmpdir):
+    header = "files\tlabels\terosion\n"
+    lines = ["foo\thello\t300.1\n", "bar\tworld\t5\n", "baz\tgoodbye\t0.3\n"]
+    for x in range(2):
+        name = tmpdir.join("testfile.csv").strpath
+        with open(name, "w") as fid:
+            reader = utility.CSVReader(delimiter="\t")
+            if x % 2 == 0:
+                fid.write(header)
+                reader.inputs.header = True
+            fid.writelines(lines)
+            fid.flush()
+            reader.inputs.in_file = name
+            out = reader.run()
+            if x % 2 == 0:
+                assert out.outputs.files == ["foo", "bar", "baz"]
+                assert out.outputs.labels == ["hello", "world", "goodbye"]
+                assert out.outputs.erosion == ["300.1", "5", "0.3"]
+            else:
+                assert out.outputs.column_0 == ["foo", "bar", "baz"]
+                assert out.outputs.column_1 == ["hello", "world", "goodbye"]
+                assert out.outputs.column_2 == ["300.1", "5", "0.3"]