Skip to content

Commit

Permalink
Merge pull request #3637 from zachlindsey/improve_csv_reader_quote_ha…
Browse files Browse the repository at this point in the history
…ndling_and_separators

FIX: Parse commas in CSV fields
  • Loading branch information
effigies authored Mar 17, 2024
2 parents f277d18 + f746c34 commit 58d4fc7
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 8 deletions.
15 changes: 7 additions & 8 deletions nipype/interfaces/utility/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# vi: set ft=python sts=4 ts=4 sw=4 et:
"""CSV Handling utilities
"""
import csv
from ..base import traits, TraitedSpec, DynamicTraitedSpec, File, BaseInterface
from ..io import add_traits

Expand All @@ -13,6 +14,7 @@ class CSVReaderInputSpec(DynamicTraitedSpec, TraitedSpec):
header = traits.Bool(
False, usedefault=True, desc="True if the first line is a column header"
)
delimiter = traits.String(",", usedefault=True, desc="Delimiter to use.")


class CSVReader(BaseInterface):
Expand Down Expand Up @@ -52,14 +54,11 @@ def _append_entry(self, outputs, entry):
outputs[key].append(value)
return outputs

def _parse_line(self, line):
line = line.replace("\n", "")
entry = [x.strip() for x in line.split(",")]
return entry

def _get_outfields(self):
with open(self.inputs.in_file) as fid:
entry = self._parse_line(fid.readline())
reader = csv.reader(fid, delimiter=self.inputs.delimiter)

entry = next(reader)
if self.inputs.header:
self._outfields = tuple(entry)
else:
Expand All @@ -82,10 +81,10 @@ def _list_outputs(self):
for key in self._outfields:
outputs[key] = [] # initialize outfields
with open(self.inputs.in_file) as fid:
for line in fid.readlines():
reader = csv.reader(fid, delimiter=self.inputs.delimiter)
for entry in reader:
if self.inputs.header and isHeader: # skip header line
isHeader = False
continue
entry = self._parse_line(line)
outputs = self._append_entry(outputs, entry)
return outputs
3 changes: 3 additions & 0 deletions nipype/interfaces/utility/tests/test_auto_CSVReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

def test_CSVReader_inputs():
input_map = dict(
delimiter=dict(
usedefault=True,
),
header=dict(
usedefault=True,
),
Expand Down
41 changes: 41 additions & 0 deletions nipype/interfaces/utility/tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,44 @@ def test_csvReader(tmpdir):
assert out.outputs.column_0 == ["foo", "bar", "baz"]
assert out.outputs.column_1 == ["hello", "world", "goodbye"]
assert out.outputs.column_2 == ["300.1", "5", "0.3"]


def test_csvReader_quoted(tmpdir):
header = "files,labels,erosion\n"
lines = ['foo,"hello, world",300.1\n']

name = tmpdir.join("testfile.csv").strpath
with open(name, "w") as fid:
reader = utility.CSVReader()
fid.writelines(lines)
fid.flush()
reader.inputs.in_file = name
out = reader.run()

assert out.outputs.column_0 == ["foo"]
assert out.outputs.column_1 == ["hello, world"]
assert out.outputs.column_2 == ["300.1"]


def test_csvReader_tabs(tmpdir):
header = "files\tlabels\terosion\n"
lines = ["foo\thello\t300.1\n", "bar\tworld\t5\n", "baz\tgoodbye\t0.3\n"]
for x in range(2):
name = tmpdir.join("testfile.csv").strpath
with open(name, "w") as fid:
reader = utility.CSVReader(delimiter="\t")
if x % 2 == 0:
fid.write(header)
reader.inputs.header = True
fid.writelines(lines)
fid.flush()
reader.inputs.in_file = name
out = reader.run()
if x % 2 == 0:
assert out.outputs.files == ["foo", "bar", "baz"]
assert out.outputs.labels == ["hello", "world", "goodbye"]
assert out.outputs.erosion == ["300.1", "5", "0.3"]
else:
assert out.outputs.column_0 == ["foo", "bar", "baz"]
assert out.outputs.column_1 == ["hello", "world", "goodbye"]
assert out.outputs.column_2 == ["300.1", "5", "0.3"]

0 comments on commit 58d4fc7

Please sign in to comment.