Skip to content

Commit

Permalink
individual instument tar file working
Browse files Browse the repository at this point in the history
  • Loading branch information
mdye committed Jul 31, 2020
1 parent 2bfe01e commit 993f5b8
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 34 deletions.
97 changes: 71 additions & 26 deletions app/jobs/create_bulk_download_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,54 +20,99 @@ def perform(*args)


# track all the files that are created
temp_files = Array.new
temp_file_paths = Array.new


# Create the header for the master/final csv file
# (If one will be created)
header_row_zip_file_path = bd.create_master_csv_header_zip_file
temp_files.push(header_row_zip_file_path)
if (bd.create_single_master_file)
# Create the header for the master/final csv file
# (If one will be created)
header_row_zip_file_path = bd.create_master_csv_header_zip_file
temp_file_paths.push(header_row_zip_file_path)
end



# Create zip files for each variable of each desired instrument
bd.instruments.each do |instrument|

# define a few file names
if (bd.create_separate_instrument_files)
var_zip_files = Array.new

instrument_header_row_zip_file_path = bd.create_instrument_csv_header_zip_file(instrument)
var_zip_files.push(instrument_header_row_zip_file_path)

# Rails.logger.debug "*" * 80
# Rails.logger.debug var_zip_files
# Rails.logger.debug "*" * 80

end

# TheTZVOLCANOGNSSNetwork_Volcanoflank_OLO1
# instrument_header_row_file_name = "#{bd.random_job_id}_header_row.csv"
# instrument_header_row_file_path = "#{bd.processing_dir}/#{header_row_file_name}"
# instrument_header_row_zip_file_path = "#{bd.processing_dir}/#{header_row_file_name}.gz"

instrument.vars.each do |var|

var_output_file_path = bd.var_temp_output_file_path(var)

zipped_var_file_path = ExportTsPointsToFile.call(
var,
bd,
var_output_file_path
)
zipped_var_file_path = ExportTsPointsToFile.call(var, bd, var_output_file_path)

if zipped_var_file_path # Make sure the file is created - it was not if there were no data point
temp_files.push(zipped_var_file_path)

if (bd.create_separate_instrument_files)
var_zip_files.push(zipped_var_file_path)
else
temp_file_paths.push(zipped_var_file_path)
end
end
end

# If separate instrument files are being created, create the instrument specifix zip file
if (bd.create_separate_instrument_files)

instrument_zip_file_path = bd.instrument_zip_file_path(instrument)

# create the instument zip file by merging the var files
files_string = var_zip_files.join(" ")
# command = "cat #{files_string} > #{final_file_path}"
command = "zcat #{files_string} | gzip -c > #{instrument_zip_file_path}"
system(command)

# Add the instrument file to the list of files to tar at the end of the process
temp_file_paths.push(instrument_zip_file_path)


# Remove the temp var files
var_zip_files.each do |file_path|
Rails.logger.debug file_path

File.delete(file_path)
end

end
end


# Merge the zip files together
files_string = temp_files.join(" ")
# command = "cat #{files_string} > #{final_file_path}"
command = "zcat #{files_string} | gzip -c > #{bd.final_file_path}"
system(command)
if (bd.create_separate_instrument_files)

temp_file_names = Array.new
temp_file_paths.each do |temp_file_path|
temp_file_names.push(File.basename(temp_file_path))
end
files_string = temp_file_names.join(" ")

# create a tar file of all the
command = "tar czf #{bd.final_tar_file_path} -C #{BulkDownload.processing_dir} #{files_string}"
results = system(command)
else
# Merge the zip files together (this if for the one singe file creation)
files_string = temp_file_paths.join(" ")
# command = "cat #{files_string} > #{final_file_path}"
command = "zcat #{files_string} | gzip -c > #{bd.final_gz_file_path}"
system(command)
end

# Remove the temp files
temp_file_paths.each do |file_path|
File.delete(file_path)
end

# Remove the temp files
temp_files.each do |file_path|
File.delete(file_path)
end

File.delete(bd.placeholder_file_path)

Expand Down
65 changes: 58 additions & 7 deletions app/models/concerns/bulk_download.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class BulkDownload
attr_accessor :instrument_fields
attr_accessor :var_fields
attr_accessor :create_separate_instrument_files
attr_accessor :create_single_master_file

attr_accessor :instruments

Expand All @@ -32,6 +33,8 @@ def initialize(*args)
@var_fields = args[6]
@create_separate_instrument_files = args[7]

@create_single_master_file = ! create_separate_instrument_files


@instruments = Instrument.where(id: self.instrument_ids)

Expand Down Expand Up @@ -59,10 +62,15 @@ def final_file_name_base
end


def final_file_path
def final_gz_file_path
return "#{BulkDownload.tmp_dir}/#{self.final_file_name_base}.csv.gz"
end

def final_tar_file_path
return "#{BulkDownload.tmp_dir}/#{self.final_file_name_base}.tar.gz"
end


def placeholder_file_path
return "#{BulkDownload.tmp_dir}/#{self.final_file_name_base}.temp"
end
Expand All @@ -79,33 +87,76 @@ def var_temp_output_file_path(var)



def instrument_final_file_name_base(instrument)


def instrument_zip_file_path(instrument)
site_string = instrument.site.name.parameterize
instrument_string = instrument.name.parameterize
instrument_id_string = "inst-id-#{instrument.id}"

return "#{self.profile_string}_#{site_string}_#{instrument_string}_#{instrument_id_string}_#{self.creation_time_string}"
file_name = "#{self.profile_string}_#{site_string}_#{instrument_string}_#{instrument_id_string}_#{self.creation_time_string}"
file_path = "#{BulkDownload.processing_dir}/#{file_name}.csv.gz"

return file_path
end


def master_csv_header

def instrument_csv_header(instrument)

# Get the header rows for the master csv file
csv_header_rows = "# CSV file creation initiated at: #{self.creation_time.to_s}\n"
csv_header_rows += "# Start Date (inclusive): #{self.start_time.strftime('%Y-%m-%d')}\n"
csv_header_rows += "# End Date (inclusive): #{self.end_time.strftime('%Y-%m-%d')}\n"
csv_header_rows += "# Include Test Data: #{self.include_test_data}\n"
csv_header_rows += "# Instrument IDs: #{self.instrument_ids.join(', ')}\n"
csv_header_rows += "# Instrument Names: #{self.instruments.pluck(:name).join(', ')}\n"
csv_header_rows += "# Instrument ID: #{instrument.id}\n"
csv_header_rows += "# Instrument Names: #{instrument.name}\n"

csv_header_rows += self.row_labels
end


def instrument_csv_header
def instrument_header_row_file_path(instrument)
header_row_file_name = "#{self.random_job_id}_instrument_#{instrument.id}_header_row.csv"

return "#{BulkDownload.processing_dir}/#{header_row_file_name}"
end

def instrument_header_row_zip_file_path(instrument)
header_row_file_name = "#{self.random_job_id}_instrument_#{instrument.id}_header_row.csv.gz"

return "#{BulkDownload.processing_dir}/#{header_row_file_name}"
end


def create_instrument_csv_header_zip_file(instrument)


# write the header rows to it's own file
File.write(self.instrument_header_row_file_path(instrument), self.instrument_csv_header(instrument) )

# zip the temp file
command = "gzip -f #{self.instrument_header_row_file_path(instrument)}"
system(command)

return self.instrument_header_row_zip_file_path(instrument)
end



def master_csv_header
# Get the header rows for the master csv file
csv_header_rows = "# CSV file creation initiated at: #{self.creation_time.to_s}\n"
csv_header_rows += "# Start Date (inclusive): #{self.start_time.strftime('%Y-%m-%d')}\n"
csv_header_rows += "# End Date (inclusive): #{self.end_time.strftime('%Y-%m-%d')}\n"
csv_header_rows += "# Include Test Data: #{self.include_test_data}\n"
csv_header_rows += "# Instrument IDs: #{self.instrument_ids.join(', ')}\n"
csv_header_rows += "# Instrument Names: #{self.instruments.pluck(:name).join(', ')}\n"

csv_header_rows += self.row_labels
end


def header_row_file_path
header_row_file_name = "#{self.random_job_id}_header_row.csv"

Expand Down
3 changes: 2 additions & 1 deletion app/views/bulk_download/_bulk_download.html.haml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@
include_test_data: $('#include_test_data').is(":checked"),
site_fields: site_fields.join(),
instrument_fields: instrument_fields.join(),
var_fields: var_fields.join()
var_fields: var_fields.join(),
create_separate_instrument_files: $('#create_separate_instrument_files').is(":checked")
}, // data to be submit
function(data, status, jqXHR) {// success callback
$('.bulk_data_notifications').html('Your bulk download is now being created. Please reload this page to check its status.');
Expand Down
Binary file added z
Binary file not shown.

0 comments on commit 993f5b8

Please sign in to comment.