Skip to content

Commit

Permalink
feat(bigquery): add PreserveAsciiControlCharacters support for CSV (#…
Browse files Browse the repository at this point in the history
…6448)

* feat(bigquery): add PreserveAsciiControlCharacters support for CSV
  • Loading branch information
shollyman authored Aug 23, 2022
1 parent 74da335 commit b7bac2f
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 37 deletions.
32 changes: 19 additions & 13 deletions bigquery/external.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,17 +230,22 @@ type CSVOptions struct {
// An optional custom string that will represent a NULL
// value in CSV import data.
NullMarker string

// Preserves the embedded ASCII control characters (the first 32 characters in the ASCII-table,
// from '\\x00' to '\\x1F') when loading from CSV. Only applicable to CSV, ignored for other formats.
PreserveASCIIControlCharacters bool
}

func (o *CSVOptions) populateExternalDataConfig(c *bq.ExternalDataConfiguration) {
c.CsvOptions = &bq.CsvOptions{
AllowJaggedRows: o.AllowJaggedRows,
AllowQuotedNewlines: o.AllowQuotedNewlines,
Encoding: string(o.Encoding),
FieldDelimiter: o.FieldDelimiter,
Quote: o.quote(),
SkipLeadingRows: o.SkipLeadingRows,
NullMarker: o.NullMarker,
AllowJaggedRows: o.AllowJaggedRows,
AllowQuotedNewlines: o.AllowQuotedNewlines,
Encoding: string(o.Encoding),
FieldDelimiter: o.FieldDelimiter,
Quote: o.quote(),
SkipLeadingRows: o.SkipLeadingRows,
NullMarker: o.NullMarker,
PreserveAsciiControlCharacters: o.PreserveASCIIControlCharacters,
}
}

Expand All @@ -267,12 +272,13 @@ func (o *CSVOptions) setQuote(ps *string) {

func bqToCSVOptions(q *bq.CsvOptions) *CSVOptions {
o := &CSVOptions{
AllowJaggedRows: q.AllowJaggedRows,
AllowQuotedNewlines: q.AllowQuotedNewlines,
Encoding: Encoding(q.Encoding),
FieldDelimiter: q.FieldDelimiter,
SkipLeadingRows: q.SkipLeadingRows,
NullMarker: q.NullMarker,
AllowJaggedRows: q.AllowJaggedRows,
AllowQuotedNewlines: q.AllowQuotedNewlines,
Encoding: Encoding(q.Encoding),
FieldDelimiter: q.FieldDelimiter,
SkipLeadingRows: q.SkipLeadingRows,
NullMarker: q.NullMarker,
PreserveASCIIControlCharacters: q.PreserveAsciiControlCharacters,
}
o.setQuote(q.Quote)
return o
Expand Down
2 changes: 2 additions & 0 deletions bigquery/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
conf.MaxBadRecords = fc.MaxBadRecords
conf.NullMarker = fc.NullMarker
conf.PreserveAsciiControlCharacters = fc.PreserveASCIIControlCharacters
if fc.Schema != nil {
conf.Schema = fc.Schema.toBQ()
}
Expand Down Expand Up @@ -120,6 +121,7 @@ func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) {
fc.Encoding = Encoding(conf.Encoding)
fc.FieldDelimiter = conf.FieldDelimiter
fc.CSVOptions.NullMarker = conf.NullMarker
fc.CSVOptions.PreserveASCIIControlCharacters = conf.PreserveAsciiControlCharacters
fc.CSVOptions.setQuote(conf.Quote)
}

Expand Down
51 changes: 27 additions & 24 deletions bigquery/file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@ var (
nestedFieldSchema(),
},
CSVOptions: CSVOptions{
Quote: hyphen,
FieldDelimiter: "\t",
SkipLeadingRows: 8,
AllowJaggedRows: true,
AllowQuotedNewlines: true,
Encoding: UTF_8,
NullMarker: "marker",
Quote: hyphen,
FieldDelimiter: "\t",
SkipLeadingRows: 8,
AllowJaggedRows: true,
AllowQuotedNewlines: true,
Encoding: UTF_8,
NullMarker: "marker",
PreserveASCIIControlCharacters: true,
},
}
)
Expand All @@ -63,16 +64,17 @@ func TestFileConfigPopulateLoadConfig(t *testing.T) {
description: "csv",
fileConfig: &fc,
want: &bq.JobConfigurationLoad{
SourceFormat: "CSV",
FieldDelimiter: "\t",
SkipLeadingRows: 8,
AllowJaggedRows: true,
AllowQuotedNewlines: true,
Autodetect: true,
Encoding: "UTF-8",
MaxBadRecords: 7,
IgnoreUnknownValues: true,
NullMarker: "marker",
SourceFormat: "CSV",
FieldDelimiter: "\t",
SkipLeadingRows: 8,
AllowJaggedRows: true,
AllowQuotedNewlines: true,
Autodetect: true,
Encoding: "UTF-8",
MaxBadRecords: 7,
IgnoreUnknownValues: true,
NullMarker: "marker",
PreserveAsciiControlCharacters: true,
Schema: &bq.TableSchema{
Fields: []*bq.TableFieldSchema{
bqStringFieldSchema(),
Expand Down Expand Up @@ -150,13 +152,14 @@ func TestFileConfigPopulateExternalDataConfig(t *testing.T) {
bqNestedFieldSchema(),
}},
CsvOptions: &bq.CsvOptions{
AllowJaggedRows: true,
AllowQuotedNewlines: true,
Encoding: "UTF-8",
FieldDelimiter: "\t",
Quote: &hyphen,
SkipLeadingRows: 8,
NullMarker: "marker",
AllowJaggedRows: true,
AllowQuotedNewlines: true,
Encoding: "UTF-8",
FieldDelimiter: "\t",
Quote: &hyphen,
SkipLeadingRows: 8,
NullMarker: "marker",
PreserveAsciiControlCharacters: true,
},
},
},
Expand Down

0 comments on commit b7bac2f

Please sign in to comment.