Skip to content

Commit

Permalink
renamed scrub -> sanitize; added comments
Browse files Browse the repository at this point in the history
  • Loading branch information
crowemi committed May 17, 2023
1 parent 39a35d4 commit da8bb0b
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions target_s3/formats/format_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def validate_list(value, fields):

return value

def clean(self, value):
def sanitize(self, value):
if isinstance(value, dict) and not value:
# pyarrow can't process empty struct
return None
Expand All @@ -174,14 +174,15 @@ def create_dataframe(self) -> Table:
schema = dict()
input = {
f: [
self.validate(schema, self.clean(f), row.get(f))
self.validate(schema, self.sanitize(f), row.get(f))
for row in self.records
]
for f in fields
}
else:
input = {
f: [self.clean(row.get(f)) for row in self.records] for f in fields
f: [self.sanitize(row.get(f)) for row in self.records]
for f in fields
}

ret = Table.from_pydict(mapping=input)
Expand Down

0 comments on commit da8bb0b

Please sign in to comment.