Skip to content

Commit

Permalink
Merge pull request #10115 from ErykKul/10114_validate_file_retry
Browse files Browse the repository at this point in the history
Single retry when datafile checksum validation fails
  • Loading branch information
landreev authored Nov 16, 2023
2 parents 8c27a8b + 7715ff9 commit 8d8f35f
Showing 1 changed file with 27 additions and 34 deletions.
61 changes: 27 additions & 34 deletions src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -1449,6 +1449,17 @@ public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) {
return s3io;
}

private static InputStream getOriginalFileInputStream(StorageIO<DataFile> storage, boolean isTabularData) throws IOException {
storage.open(DataAccessOption.READ_ACCESS);
if (!isTabularData) {
return storage.getInputStream();
} else {
// if this is a tabular file, read the preserved original "auxiliary file"
// instead:
return storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
}
}

public static void validateDataFileChecksum(DataFile dataFile) throws IOException {
DataFile.ChecksumType checksumType = dataFile.getChecksumType();
if (checksumType == null) {
Expand All @@ -1458,35 +1469,24 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio
}

StorageIO<DataFile> storage = dataFile.getStorageIO();
InputStream in = null;

try {
storage.open(DataAccessOption.READ_ACCESS);
String recalculatedChecksum = null;

if (!dataFile.isTabularData()) {
in = storage.getInputStream();
} else {
// if this is a tabular file, read the preserved original "auxiliary file"
// instead:
in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
}
try (InputStream inputStream = getOriginalFileInputStream(storage, dataFile.isTabularData())) {
recalculatedChecksum = FileUtil.calculateChecksum(inputStream, checksumType);
} catch (IOException ioex) {
in = null;
}

if (in == null) {
String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString()));
logger.log(Level.INFO, info);
throw new IOException(info);
}

String recalculatedChecksum = null;
try {
recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
} catch (RuntimeException rte) {
logger.log(Level.SEVERE, "failed to calculated checksum, one retry", rte);
recalculatedChecksum = null;
} finally {
IOUtils.closeQuietly(in);
}

if (recalculatedChecksum == null) { //retry once
storage = dataFile.getStorageIO();
try (InputStream inputStream = getOriginalFileInputStream(storage, dataFile.isTabularData())) {
recalculatedChecksum = FileUtil.calculateChecksum(inputStream, checksumType);
}
}

if (recalculatedChecksum == null) {
Expand All @@ -1504,19 +1504,12 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio
boolean fixed = false;
if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) {
// try again, see if the .orig file happens to be there:
try {
in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
} catch (IOException ioex) {
in = null;
try (InputStream in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION)) {
recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
} catch (RuntimeException rte) {
recalculatedChecksum = null;
}
if (in != null) {
try {
recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
} catch (RuntimeException rte) {
recalculatedChecksum = null;
} finally {
IOUtils.closeQuietly(in);
}
if (recalculatedChecksum != null) {
// try again:
if (recalculatedChecksum.equals(dataFile.getChecksumValue())) {
fixed = true;
Expand Down

0 comments on commit 8d8f35f

Please sign in to comment.