individual instument tar file working

earthcubeprojects-chords · Jul 31, 2020 · 993f5b8 · 993f5b8
1 parent 2bfe01e
commit 993f5b8
Show file tree

Hide file tree

Showing 4 changed files with 131 additions and 34 deletions.
diff --git a/app/jobs/create_bulk_download_job.rb b/app/jobs/create_bulk_download_job.rb
@@ -20,54 +20,99 @@ def perform(*args)
 
 
     # track all the files that are created
-    temp_files = Array.new
+    temp_file_paths = Array.new
 
 
-    # Create the header for the master/final csv file 
-    # (If one will be created)
-    header_row_zip_file_path = bd.create_master_csv_header_zip_file
-  	temp_files.push(header_row_zip_file_path)
+    if (bd.create_single_master_file)
+      # Create the header for the master/final csv file 
+      # (If one will be created)
+      header_row_zip_file_path = bd.create_master_csv_header_zip_file
+      temp_file_paths.push(header_row_zip_file_path)
+    end
 
 
 
   	# Create zip files for each variable of each desired instrument
 		bd.instruments.each do |instrument|
 
-      # define a few file names
+      if (bd.create_separate_instrument_files)
+        var_zip_files = Array.new
+
+        instrument_header_row_zip_file_path = bd.create_instrument_csv_header_zip_file(instrument)
+        var_zip_files.push(instrument_header_row_zip_file_path)
+
+        # Rails.logger.debug "*" * 80
+        # Rails.logger.debug var_zip_files
+        # Rails.logger.debug "*" * 80
+
+      end
 
-      # TheTZVOLCANOGNSSNetwork_Volcanoflank_OLO1
-      # instrument_header_row_file_name       = "#{bd.random_job_id}_header_row.csv"
-      # instrument_header_row_file_path       = "#{bd.processing_dir}/#{header_row_file_name}"
-      # instrument_header_row_zip_file_path   = "#{bd.processing_dir}/#{header_row_file_name}.gz"
 
 			instrument.vars.each do |var|
-
 		    var_output_file_path = bd.var_temp_output_file_path(var)
 
-				zipped_var_file_path = ExportTsPointsToFile.call(
-					var,
-          bd,
-					var_output_file_path
-				)
+				zipped_var_file_path = ExportTsPointsToFile.call(var, bd, var_output_file_path)
 
 				if zipped_var_file_path # Make sure the file is created - it was not if there were no data point
-					temp_files.push(zipped_var_file_path)
+
+          if (bd.create_separate_instrument_files)
+            var_zip_files.push(zipped_var_file_path)
+          else
+            temp_file_paths.push(zipped_var_file_path)
+          end					
 				end
 			end
+
+      # If separate instrument files are being created, create the instrument specifix zip file
+      if (bd.create_separate_instrument_files)
+
+        instrument_zip_file_path = bd.instrument_zip_file_path(instrument)
+
+        # create the instument zip file by merging the var files
+        files_string = var_zip_files.join(" ")
+        # command = "cat  #{files_string} > #{final_file_path}"
+        command = "zcat #{files_string} | gzip -c > #{instrument_zip_file_path}"
+        system(command)
+
+        # Add the instrument file to the list of files to tar at the end of the process
+        temp_file_paths.push(instrument_zip_file_path)
+
+
+        # Remove the temp var files
+        var_zip_files.each do |file_path|
+          Rails.logger.debug file_path
+
+          File.delete(file_path)
+        end
+
+      end
 		end
 
 
-		# Merge the zip files together
-		files_string = temp_files.join(" ")
-		# command = "cat  #{files_string} > #{final_file_path}"
-    command = "zcat #{files_string} | gzip -c > #{bd.final_file_path}"
-		system(command)
+    if (bd.create_separate_instrument_files)
+
+      temp_file_names = Array.new
+      temp_file_paths.each do |temp_file_path|
+        temp_file_names.push(File.basename(temp_file_path))
+      end
+      files_string = temp_file_names.join(" ")
+
+      # create a tar file of all the 
+      command = "tar czf #{bd.final_tar_file_path} -C #{BulkDownload.processing_dir} #{files_string}"
+      results = system(command)
+    else
+      # Merge the zip files together (this if for the one singe file creation)
+      files_string = temp_file_paths.join(" ")
+      # command = "cat  #{files_string} > #{final_file_path}"
+      command = "zcat #{files_string} | gzip -c > #{bd.final_gz_file_path}"
+      system(command)
+    end
 
+    # Remove the temp files
+    temp_file_paths.each do |file_path|
+      File.delete(file_path)
+    end
 
-  	# Remove the temp files
-  	temp_files.each do |file_path|
-  		File.delete(file_path)
-  	end
 
   	File.delete(bd.placeholder_file_path)
 

diff --git a/app/models/concerns/bulk_download.rb b/app/models/concerns/bulk_download.rb
@@ -12,6 +12,7 @@ class BulkDownload
   attr_accessor :instrument_fields
   attr_accessor :var_fields
   attr_accessor :create_separate_instrument_files
+  attr_accessor :create_single_master_file
 
   attr_accessor :instruments
 
@@ -32,6 +33,8 @@ def initialize(*args)
     @var_fields                       = args[6]
     @create_separate_instrument_files = args[7]
 
+    @create_single_master_file = ! create_separate_instrument_files
+
 
     @instruments = Instrument.where(id: self.instrument_ids) 
 
@@ -59,10 +62,15 @@ def final_file_name_base
   end
 
 
-  def final_file_path
+  def final_gz_file_path
     return "#{BulkDownload.tmp_dir}/#{self.final_file_name_base}.csv.gz" 
   end
 
+  def final_tar_file_path
+    return "#{BulkDownload.tmp_dir}/#{self.final_file_name_base}.tar.gz" 
+  end
+
+
   def placeholder_file_path
     return "#{BulkDownload.tmp_dir}/#{self.final_file_name_base}.temp"
   end
@@ -79,33 +87,76 @@ def var_temp_output_file_path(var)
 
 
 
-  def instrument_final_file_name_base(instrument)
+
+
+  def instrument_zip_file_path(instrument)
     site_string           = instrument.site.name.parameterize
     instrument_string     = instrument.name.parameterize
     instrument_id_string  = "inst-id-#{instrument.id}"
 
-    return "#{self.profile_string}_#{site_string}_#{instrument_string}_#{instrument_id_string}_#{self.creation_time_string}"
+    file_name = "#{self.profile_string}_#{site_string}_#{instrument_string}_#{instrument_id_string}_#{self.creation_time_string}"
+    file_path = "#{BulkDownload.processing_dir}/#{file_name}.csv.gz" 
+
+    return file_path
   end
 
 
-  def master_csv_header
+
+  def instrument_csv_header(instrument)
+
     # Get the header rows for the master csv file
     csv_header_rows =  "# CSV file creation initiated at: #{self.creation_time.to_s}\n"
     csv_header_rows += "# Start Date (inclusive): #{self.start_time.strftime('%Y-%m-%d')}\n"
     csv_header_rows += "# End Date (inclusive):   #{self.end_time.strftime('%Y-%m-%d')}\n"
     csv_header_rows += "# Include Test Data: #{self.include_test_data}\n"
-    csv_header_rows += "# Instrument IDs: #{self.instrument_ids.join(', ')}\n"
-    csv_header_rows += "# Instrument Names: #{self.instruments.pluck(:name).join(', ')}\n"
+    csv_header_rows += "# Instrument ID: #{instrument.id}\n"
+    csv_header_rows += "# Instrument Names: #{instrument.name}\n"
 
     csv_header_rows += self.row_labels
   end
 
 
-  def instrument_csv_header
+  def instrument_header_row_file_path(instrument)
+    header_row_file_name = "#{self.random_job_id}_instrument_#{instrument.id}_header_row.csv"
+
+    return "#{BulkDownload.processing_dir}/#{header_row_file_name}"
+  end  
+
+  def instrument_header_row_zip_file_path(instrument)
+    header_row_file_name = "#{self.random_job_id}_instrument_#{instrument.id}_header_row.csv.gz"
+
+    return "#{BulkDownload.processing_dir}/#{header_row_file_name}"
+  end  
+
+
+  def create_instrument_csv_header_zip_file(instrument)
+
+
+    # write the header rows to it's own file
+    File.write(self.instrument_header_row_file_path(instrument), self.instrument_csv_header(instrument) )
+
+    # zip the temp file
+    command = "gzip -f #{self.instrument_header_row_file_path(instrument)}"
+    system(command)
+
+    return self.instrument_header_row_zip_file_path(instrument)
   end
 
 
 
+  def master_csv_header
+    # Get the header rows for the master csv file
+    csv_header_rows =  "# CSV file creation initiated at: #{self.creation_time.to_s}\n"
+    csv_header_rows += "# Start Date (inclusive): #{self.start_time.strftime('%Y-%m-%d')}\n"
+    csv_header_rows += "# End Date (inclusive):   #{self.end_time.strftime('%Y-%m-%d')}\n"
+    csv_header_rows += "# Include Test Data: #{self.include_test_data}\n"
+    csv_header_rows += "# Instrument IDs: #{self.instrument_ids.join(', ')}\n"
+    csv_header_rows += "# Instrument Names: #{self.instruments.pluck(:name).join(', ')}\n"
+
+    csv_header_rows += self.row_labels
+  end
+
+
   def header_row_file_path
     header_row_file_name = "#{self.random_job_id}_header_row.csv"
 

diff --git a/app/views/bulk_download/_bulk_download.html.haml b/app/views/bulk_download/_bulk_download.html.haml
@@ -54,7 +54,8 @@
             include_test_data: $('#include_test_data').is(":checked"),
             site_fields: site_fields.join(),
             instrument_fields: instrument_fields.join(),
-            var_fields: var_fields.join()
+            var_fields: var_fields.join(),
+            create_separate_instrument_files: $('#create_separate_instrument_files').is(":checked")
           }, // data to be submit
           function(data, status, jqXHR) {// success callback
             $('.bulk_data_notifications').html('Your bulk download is now being created. Please reload this page to check its status.');

diff --git a/z b/z