diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index b7fc71cc5..ef9111285 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -13,20 +13,18 @@ import com.conveyal.file.FileUtils; import com.conveyal.gtfs.GTFSCache; import com.conveyal.gtfs.GTFSFeed; -import com.conveyal.gtfs.error.GTFSError; import com.conveyal.gtfs.error.GeneralError; import com.conveyal.gtfs.model.Stop; import com.conveyal.gtfs.validator.PostLoadValidator; import com.conveyal.osmlib.Node; import com.conveyal.osmlib.OSM; -import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.r5.analyst.cluster.TransportNetworkConfig; +import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.r5.analyst.progress.Task; import com.conveyal.r5.streets.OSMCache; import com.conveyal.r5.util.ExceptionUtils; import com.mongodb.QueryBuilder; import org.apache.commons.fileupload.FileItem; -import org.apache.commons.fileupload.disk.DiskFileItem; import org.bson.types.ObjectId; import org.locationtech.jts.geom.Envelope; import org.slf4j.Logger; @@ -107,19 +105,25 @@ private Bundle create (Request req, Response res) { // Do some initial synchronous work setting up the bundle to fail fast if the request is bad. final Map> files = HttpUtils.getRequestFiles(req.raw()); final Bundle bundle = new Bundle(); + final File osmPbfFile; + final List gtfsZipFiles; try { bundle.name = files.get("bundleName").get(0).getString("UTF-8"); bundle.regionId = files.get("regionId").get(0).getString("UTF-8"); if (files.get("osmId") != null) { + osmPbfFile = null; bundle.osmId = files.get("osmId").get(0).getString("UTF-8"); Bundle bundleWithOsm = Persistence.bundles.find(QueryBuilder.start("osmId").is(bundle.osmId).get()).next(); if (bundleWithOsm == null) { throw AnalysisServerException.badRequest("Selected OSM does not exist."); } + } else { + osmPbfFile = HttpUtils.saveFileItemLocally(files.get("osm").get(0)); } if (files.get("feedGroupId") != null) { + gtfsZipFiles = null; bundle.feedGroupId = files.get("feedGroupId").get(0).getString("UTF-8"); Bundle bundleWithFeed = Persistence.bundles.find(QueryBuilder.start("feedGroupId").is(bundle.feedGroupId).get()).next(); if (bundleWithFeed == null) { @@ -134,6 +138,8 @@ private Bundle create (Request req, Response res) { bundle.feeds = bundleWithFeed.feeds; bundle.feedsComplete = bundleWithFeed.feedsComplete; bundle.totalFeeds = bundleWithFeed.totalFeeds; + } else { + gtfsZipFiles = HttpUtils.saveFileItemsLocally(files.get("feedGroup")); } UserPermissions userPermissions = UserPermissions.from(req); bundle.accessGroup = userPermissions.accessGroup; @@ -155,16 +161,15 @@ private Bundle create (Request req, Response res) { .withWorkProduct(BUNDLE, bundle._id, bundle.regionId) .withAction(progressListener -> { try { - if (bundle.osmId == null) { + if (osmPbfFile != null) { // Process uploaded OSM. bundle.osmId = new ObjectId().toString(); - DiskFileItem fi = (DiskFileItem) files.get("osm").get(0); // Here we perform minimal validation by loading the OSM, but don't retain the resulting MapDB. OSM osm = new OSM(null); osm.intersectionDetection = true; // Number of entities in an OSM file is unknown, so derive progress from the number of bytes read. // Wrapping in buffered input stream should reduce number of progress updates. - osm.readPbf(ProgressInputStream.forFileItem(fi, progressListener)); + osm.readPbf(ProgressInputStream.forFile(osmPbfFile, progressListener)); // osm.readPbf(new BufferedInputStream(fi.getInputStream())); Envelope osmBounds = new Envelope(); for (Node n : osm.nodes.values()) { @@ -173,10 +178,10 @@ private Bundle create (Request req, Response res) { osm.close(); checkWgsEnvelopeSize(osmBounds, "OSM data"); // Store the source OSM file. Note that we're not storing the derived MapDB file here. - fileStorage.moveIntoStorage(OSMCache.getKey(bundle.osmId), fi.getStoreLocation()); + fileStorage.moveIntoStorage(OSMCache.getKey(bundle.osmId), osmPbfFile); } - if (bundle.feedGroupId == null) { + if (gtfsZipFiles != null) { // Process uploaded GTFS files bundle.feedGroupId = new ObjectId().toString(); @@ -186,8 +191,7 @@ private Bundle create (Request req, Response res) { bundle.feeds = new ArrayList<>(); bundle.totalFeeds = files.get("feedGroup").size(); - for (FileItem fileItem : files.get("feedGroup")) { - File feedFile = ((DiskFileItem) fileItem).getStoreLocation(); + for (File feedFile : gtfsZipFiles) { ZipFile zipFile = new ZipFile(feedFile); File tempDbFile = FileUtils.createScratchFile("db"); File tempDbpFile = new File(tempDbFile.getAbsolutePath() + ".p"); diff --git a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java index b1afcfc05..3e320d43b 100644 --- a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java +++ b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java @@ -3,6 +3,7 @@ import com.conveyal.analysis.AnalysisServerException; import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.components.TaskScheduler; +import com.conveyal.analysis.datasource.DataSourceUtil; import com.conveyal.analysis.grids.SeamlessCensusGridExtractor; import com.conveyal.analysis.models.DataGroup; import com.conveyal.analysis.models.OpportunityDataset; @@ -11,7 +12,6 @@ import com.conveyal.analysis.persistence.AnalysisCollection; import com.conveyal.analysis.persistence.AnalysisDB; import com.conveyal.analysis.persistence.Persistence; -import com.conveyal.analysis.util.FileItemInputStreamProvider; import com.conveyal.analysis.util.HttpUtils; import com.conveyal.analysis.util.JsonUtil; import com.conveyal.file.FileStorage; @@ -25,7 +25,6 @@ import com.conveyal.r5.analyst.progress.Task; import com.conveyal.r5.analyst.progress.WorkProduct; import com.conveyal.r5.util.ExceptionUtils; -import com.conveyal.r5.util.InputStreamProvider; import com.conveyal.r5.util.ProgressListener; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.io.Files; @@ -56,7 +55,6 @@ import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; -import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate; import static com.conveyal.analysis.util.JsonUtil.toJson; import static com.conveyal.file.FileCategory.GRIDS; import static com.conveyal.r5.analyst.WebMercatorExtents.parseZoom; @@ -275,7 +273,7 @@ private static FileStorageFormat getFormatCode (PointSet pointSet) { * This method executes in a blocking (synchronous) manner, but it can take a while so should be called within an * non-blocking asynchronous task. */ - private List createFreeFormPointSetsFromCsv(FileItem csvFileItem, Map params) { + private List createFreeFormPointSetsFromCsv(File csvFile, Map params) { String latField = params.get("latField"); String lonField = params.get("lonField"); @@ -296,12 +294,11 @@ private List createFreeFormPointSetsFromCsv(FileItem csvFileIt try { List pointSets = new ArrayList<>(); - InputStreamProvider csvStreamProvider = new FileItemInputStreamProvider(csvFileItem); - pointSets.add(FreeFormPointSet.fromCsv(csvStreamProvider, latField, lonField, idField, countField)); + pointSets.add(FreeFormPointSet.fromCsv(csvFile, latField, lonField, idField, countField)); // The second pair of lat and lon fields allow creating two matched pointsets from the same CSV. // This is used for one-to-one travel times between specific origins/destinations. if (latField2 != null && lonField2 != null) { - pointSets.add(FreeFormPointSet.fromCsv(csvStreamProvider, latField2, lonField2, idField, countField)); + pointSets.add(FreeFormPointSet.fromCsv(csvFile, latField2, lonField2, idField, countField)); } return pointSets; } catch (Exception e) { @@ -329,22 +326,25 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res // are recorded in a persistent purpose-built way rather than falling back on the UI's catch-all error window. // TODO more standardized mechanism for tracking asynchronous tasks and catching exceptions on them OpportunityDatasetUploadStatus status = new OpportunityDatasetUploadStatus(regionId, sourceName); - addStatusAndRemoveOldStatuses(status); - final List fileItems; + final List files; final FileStorageFormat uploadFormat; final Map parameters; try { // Validate inputs and parameters, which will throw an exception if there's anything wrong with them. // Call remove() rather than get() so that subsequent code will see only string parameters, not the files. - fileItems = formFields.remove("files"); - uploadFormat = detectUploadFormatAndValidate(fileItems); + files = HttpUtils.extractFilesFromFileItemsAndUnzip(formFields.remove("files")); + uploadFormat = DataSourceUtil.detectUploadFormatAndValidate(files); parameters = extractStringParameters(formFields); } catch (Exception e) { status.completeWithError(e); + res.status(400); return status; } + // Add the status to the region wide tracker before we begin the heavy tasks. + addStatusAndRemoveOldStatuses(status); + // We are going to call several potentially slow blocking methods to create and persist new pointsets. // This whole series of actions will be run sequentially but within an asynchronous Executor task. // After enqueueing, the status is returned so the UI can track progress. @@ -354,35 +354,35 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res List pointsets = new ArrayList<>(); if (uploadFormat == FileStorageFormat.GRID) { LOG.info("Detected opportunity dataset stored in Conveyal binary format."); - pointsets.addAll(createGridsFromBinaryGridFiles(fileItems, status)); + pointsets.addAll(createGridsFromBinaryGridFiles(files, status)); } else if (uploadFormat == FileStorageFormat.SHP) { LOG.info("Detected opportunity dataset stored as ESRI shapefile."); - pointsets.addAll(createGridsFromShapefile(fileItems, zoom, status)); + pointsets.addAll(createGridsFromShapefile(files, zoom, status)); } else if (uploadFormat == FileStorageFormat.CSV) { LOG.info("Detected opportunity dataset stored as CSV"); // Create a grid even when user has requested a freeform pointset so we have something to visualize. - FileItem csvFileItem = fileItems.get(0); + File csvFile = files.get(0); // FIXME why were we uploading to S3 using the file path not the UUID? // writeFileToS3(csvFile); // TODO report progress / status as with grids. That involves pre-scanning the CSV which would be // facilitated by retaining the CSV server side and later converting to pointset. boolean requestedFreeForm = Boolean.parseBoolean(parameters.get("freeform")); // Hack to enable freeform pointset building without exposing a UI element, via file name. - if (csvFileItem.getName().contains("FREEFORM_PS.")) { + if (csvFile.getName().contains("FREEFORM_PS.")) { requestedFreeForm = true; } if (requestedFreeForm) { LOG.info("Processing CSV as freeform (rather than gridded) pointset as requested."); // This newer process creates a FreeFormPointSet only for the specified count fields, // as well as a Grid to assist in visualization of the uploaded data. - for (FreeFormPointSet freeForm : createFreeFormPointSetsFromCsv(csvFileItem, parameters)) { + for (FreeFormPointSet freeForm : createFreeFormPointSetsFromCsv(csvFile, parameters)) { Grid gridFromFreeForm = Grid.fromFreeForm(freeForm, zoom); pointsets.add(freeForm); pointsets.add(gridFromFreeForm); } } else { // This is the common default process: create a grid for every non-ignored field in the CSV. - pointsets.addAll(createGridsFromCsv(csvFileItem, formFields, zoom, status)); + pointsets.addAll(createGridsFromCsv(csvFile, formFields, zoom, status)); } } if (pointsets.isEmpty()) { @@ -473,7 +473,7 @@ private OpportunityDataset deleteDataset(String id, UserPermissions userPermissi * TODO explain latField2 usage * @return one or two Grids for each numeric column in the CSV input. */ - private List createGridsFromCsv(FileItem csvFileItem, + private List createGridsFromCsv(File csvFile, Map> query, int zoom, OpportunityDatasetUploadStatus status) throws Exception { @@ -488,12 +488,11 @@ private List createGridsFromCsv(FileItem csvFileItem, String lonField2 = HttpUtils.getFormField(query, "lonField2", false); List ignoreFields = Arrays.asList(idField, latField2, lonField2); - InputStreamProvider csvStreamProvider = new FileItemInputStreamProvider(csvFileItem); - List grids = Grid.fromCsv(csvStreamProvider, latField, lonField, ignoreFields, zoom, status); + List grids = Grid.fromCsv(csvFile, latField, lonField, ignoreFields, zoom, status); // TODO verify correctness of this second pass if (latField2 != null && lonField2 != null) { ignoreFields = Arrays.asList(idField, latField, lonField); - grids.addAll(Grid.fromCsv(csvStreamProvider, latField2, lonField2, ignoreFields, zoom, status)); + grids.addAll(Grid.fromCsv(csvFile, latField2, lonField2, ignoreFields, zoom, status)); } return grids; @@ -503,14 +502,14 @@ private List createGridsFromCsv(FileItem csvFileItem, * Create a grid from an input stream containing a binary grid file. * For those in the know, we can upload manually created binary grid files. */ - private List createGridsFromBinaryGridFiles(List uploadedFiles, + private List createGridsFromBinaryGridFiles(List uploadedFiles, OpportunityDatasetUploadStatus status) throws Exception { List grids = new ArrayList<>(); status.totalFeatures = uploadedFiles.size(); - for (FileItem fileItem : uploadedFiles) { - Grid grid = Grid.read(fileItem.getInputStream()); - String name = fileItem.getName(); + for (File file : uploadedFiles) { + Grid grid = Grid.read(FileUtils.getInputStream(file)); + String name = file.getName(); // Remove ".grid" from the name if (name.contains(".grid")) name = name.split(".grid")[0]; grid.name = name; @@ -522,37 +521,37 @@ private List createGridsFromBinaryGridFiles(List uploadedFiles, } /** - * Preconditions: fileItems must contain SHP, DBF, and PRJ files, and optionally SHX. All files should have the + * Preconditions: files must contain SHP, DBF, and PRJ files, and optionally SHX. All files should have the * same base name, and should not contain any other files but these three or four. */ - private List createGridsFromShapefile(List fileItems, + private List createGridsFromShapefile(List files, int zoom, OpportunityDatasetUploadStatus status) throws Exception { // In the caller, we should have already verified that all files have the same base name and have an extension. // Extract the relevant files: .shp, .prj, .dbf, and .shx. // We need the SHX even though we're looping over every feature as they might be sparse. - Map filesByExtension = new HashMap<>(); - for (FileItem fileItem : fileItems) { - filesByExtension.put(FilenameUtils.getExtension(fileItem.getName()).toUpperCase(), fileItem); + Map filesByExtension = new HashMap<>(); + for (File file : files) { + filesByExtension.put(FilenameUtils.getExtension(file.getName()).toUpperCase(), file); } // Copy the shapefile component files into a temporary directory with a fixed base name. - File tempDir = Files.createTempDir(); + File tempDir = FileUtils.createScratchDirectory(); File shpFile = new File(tempDir, "grid.shp"); - filesByExtension.get("SHP").write(shpFile); + Files.copy(filesByExtension.get("SHP"), shpFile); File prjFile = new File(tempDir, "grid.prj"); - filesByExtension.get("PRJ").write(prjFile); + Files.copy(filesByExtension.get("PRJ"), prjFile); File dbfFile = new File(tempDir, "grid.dbf"); - filesByExtension.get("DBF").write(dbfFile); + Files.copy(filesByExtension.get("DBF"), dbfFile); // The .shx file is an index. It is optional, and not needed for dense shapefiles. if (filesByExtension.containsKey("SHX")) { File shxFile = new File(tempDir, "grid.shx"); - filesByExtension.get("SHX").write(shxFile); + Files.copy(filesByExtension.get("SHX"), shxFile); } List grids = Grid.fromShapefile(shpFile, zoom, status); @@ -625,6 +624,7 @@ public static class OpportunityDatasetUploadStatus implements ProgressListener { public Status status = Status.PROCESSING; public String name; public String message; + public String stackTrace; public Date createdAt; public Date completedAt; @@ -641,7 +641,8 @@ private void completed (Status status) { } public void completeWithError (Exception e) { - message = "Unable to create opportunity dataset. " + ExceptionUtils.stackTraceString(e); + stackTrace = ExceptionUtils.stackTraceString(e); + message = "Unable to create opportunity dataset. " + e.getMessage(); completed(Status.ERROR); } diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java index 39f6740ab..28028f861 100644 --- a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java @@ -3,13 +3,13 @@ import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.models.DataSource; import com.conveyal.analysis.persistence.AnalysisCollection; +import com.conveyal.analysis.util.HttpUtils; import com.conveyal.file.FileStorage; import com.conveyal.file.FileStorageFormat; import com.conveyal.file.FileStorageKey; import com.conveyal.r5.analyst.progress.ProgressListener; import com.conveyal.r5.analyst.progress.TaskAction; import org.apache.commons.fileupload.FileItem; -import org.apache.commons.fileupload.disk.DiskFileItem; import org.apache.commons.io.FilenameUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -21,11 +21,8 @@ import java.util.stream.Collectors; import static com.conveyal.analysis.util.HttpUtils.getFormField; -import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate; import static com.conveyal.file.FileCategory.DATASOURCES; import static com.conveyal.file.FileStorageFormat.SHP; -import static com.google.common.base.Preconditions.checkNotNull; -import static com.google.common.base.Preconditions.checkState; /** * Given a batch of uploaded files, put them into FileStorage, categorize and validate them, and record metadata as @@ -42,7 +39,7 @@ public class DataSourceUploadAction implements TaskAction { private final AnalysisCollection dataSourceCollection; /** The files provided in the HTTP post form. These will be moved into storage. */ - private final List fileItems; + private final List files; /** * This DataSourceIngester provides encapsulated loading and validation logic for a single format, by composition @@ -50,12 +47,6 @@ public class DataSourceUploadAction implements TaskAction { */ private DataSourceIngester ingester; - /** - * The file to be ingested, after it has been moved into storage. For Shapefiles and other such "sidecar" formats, - * this is the main file (.shp), with the same base name and in the same directory as all its sidecar files. - */ - private File file; - // This method is a stopgaps - it seems like this should be done differently. public String getDataSourceName () { return ingester.dataSource().name; @@ -64,20 +55,20 @@ public String getDataSourceName () { public DataSourceUploadAction ( FileStorage fileStorage, AnalysisCollection dataSourceCollection, - List fileItems, + List files, DataSourceIngester ingester ) { this.fileStorage = fileStorage; this.dataSourceCollection = dataSourceCollection; - this.fileItems = fileItems; + this.files = files; this.ingester = ingester; } @Override public final void action (ProgressListener progressListener) throws Exception { progressListener.setWorkProduct(ingester.dataSource().toWorkProduct()); - moveFilesIntoStorage(progressListener); - ingester.ingest(file, progressListener); + File dataSourceFile = moveFilesIntoStorage(progressListener); + ingester.ingest(dataSourceFile, progressListener); dataSourceCollection.insert(ingester.dataSource()); } @@ -89,28 +80,31 @@ public final void action (ProgressListener progressListener) throws Exception { * We should also consider whether preprocessing like conversion of GTFS to MapDBs should happen at this upload * stage. If so, then this logic needs to change a bit. */ - private final void moveFilesIntoStorage (ProgressListener progressListener) { + private File moveFilesIntoStorage (ProgressListener progressListener) { // Loop through uploaded files, registering the extensions and writing to storage // (with filenames that correspond to the source id) progressListener.beginTask("Moving files into storage...", 1); final String dataSourceId = ingester.dataSource()._id.toString(); - for (FileItem fileItem : fileItems) { - DiskFileItem dfi = (DiskFileItem) fileItem; + File dataSourceFile = null; + for (File file : files) { // Use canonical extension from file type - files may be uploaded with e.g. tif instead of tiff or geotiff. String extension = ingester.dataSource().fileFormat.extension; - if (fileItems.size() > 1) { + if (files.size() > 1) { // If we have multiple files, as with Shapefile, just keep the original extension for each file. // This could lead to orphaned files after a deletion, we might want to implement wildcard deletion. - extension = FilenameUtils.getExtension(fileItem.getName()).toLowerCase(Locale.ROOT); + extension = FilenameUtils.getExtension(file.getName()).toLowerCase(Locale.ROOT); } FileStorageKey key = new FileStorageKey(DATASOURCES, dataSourceId, extension); - fileStorage.moveIntoStorage(key, dfi.getStoreLocation()); - if (fileItems.size() == 1 || extension.equalsIgnoreCase(SHP.extension)) { - file = fileStorage.getFile(key); + fileStorage.moveIntoStorage(key, file); + if (files.size() == 1 || extension.equalsIgnoreCase(SHP.extension)) { + dataSourceFile = fileStorage.getFile(key); } } - checkNotNull(file); - checkState(file.exists()); + + if (dataSourceFile == null || !dataSourceFile.exists()) { + throw new DataSourceException("Uploaded file cannot be found."); + } + return dataSourceFile; } /** @@ -127,15 +121,15 @@ public static DataSourceUploadAction forFormFields ( // Extract required parameters. Throws AnalysisServerException on failure, e.g. if a field is missing. final String sourceName = getFormField(formFields, "sourceName", true); final String regionId = getFormField(formFields, "regionId", true); - final List fileItems = formFields.get("sourceFiles"); + final List files = HttpUtils.extractFilesFromFileItemsAndUnzip(formFields.get("sourceFiles")); - FileStorageFormat format = detectUploadFormatAndValidate(fileItems); + FileStorageFormat format = DataSourceUtil.detectUploadFormatAndValidate(files); DataSourceIngester ingester = DataSourceIngester.forFormat(format); - String originalFileNames = fileItems.stream().map(FileItem::getName).collect(Collectors.joining(", ")); + String originalFileNames = files.stream().map(File::getName).collect(Collectors.joining(", ")); ingester.initializeDataSource(sourceName, originalFileNames, regionId, userPermissions); DataSourceUploadAction dataSourceUploadAction = - new DataSourceUploadAction(fileStorage, dataSourceCollection, fileItems, ingester); + new DataSourceUploadAction(fileStorage, dataSourceCollection, files, ingester); return dataSourceUploadAction; } diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java index d85b883e9..01222c210 100644 --- a/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java @@ -2,8 +2,6 @@ import com.conveyal.file.FileStorageFormat; import com.google.common.collect.Sets; -import org.apache.commons.fileupload.FileItem; -import org.apache.commons.fileupload.disk.DiskFileItem; import org.apache.commons.io.FilenameUtils; import java.io.File; @@ -29,15 +27,15 @@ public abstract class DataSourceUtil { * @throws DataSourceException if the type of the upload can't be detected or preconditions are violated. * @return the expected type of the uploaded file or files, never null. */ - public static FileStorageFormat detectUploadFormatAndValidate (List fileItems) { - if (isNullOrEmpty(fileItems)) { + public static FileStorageFormat detectUploadFormatAndValidate (List files) { + if (isNullOrEmpty(files)) { throw new DataSourceException("You must select some files to upload."); } - Set fileExtensions = extractFileExtensions(fileItems); + Set fileExtensions = extractFileExtensions(files); if (fileExtensions.isEmpty()) { throw new DataSourceException("No file extensions seen, cannot detect upload type."); } - checkFileCharacteristics(fileItems); + checkFileCharacteristics(files); if (fileExtensions.contains("zip")) { throw new DataSourceException("Upload of spatial .zip files not yet supported"); // TODO unzip and process unzipped files - will need to peek inside to detect GTFS uploads first. @@ -47,7 +45,7 @@ public static FileStorageFormat detectUploadFormatAndValidate (List fi final Set shapefileExtensions = Sets.newHashSet("shp", "dbf", "prj"); if ( ! Sets.intersection(fileExtensions, shapefileExtensions).isEmpty()) { if (fileExtensions.containsAll(shapefileExtensions)) { - verifyBaseNamesSame(fileItems); + verifyBaseNamesSame(files); // TODO check that any additional file is .shx, and that there are no more than 4 files. } else { throw new DataSourceException("You must multi-select at least SHP, DBF, and PRJ files for shapefile upload."); @@ -78,10 +76,8 @@ public static FileStorageFormat detectUploadFormatAndValidate (List fi * Check that all FileItems supplied are stored in disk files (not memory), that they are all readable and all * have nonzero size. */ - private static void checkFileCharacteristics (List fileItems) { - for (FileItem fileItem : fileItems) { - checkState(fileItem instanceof DiskFileItem, "Uploaded file was not stored to disk."); - File diskFile = ((DiskFileItem)fileItem).getStoreLocation(); + private static void checkFileCharacteristics (List files) { + for (File diskFile : files) { checkState(diskFile.exists(), "Uploaded file does not exist on filesystem as expected."); checkState(diskFile.canRead(), "Read permissions were not granted on uploaded file."); checkState(diskFile.length() > 0, "Uploaded file was empty (contained no data)."); @@ -92,10 +88,10 @@ private static void checkFileCharacteristics (List fileItems) { * Given a list of FileItems, return a set of all unique file extensions present, normalized to lower case. * Always returns a set instance which may be empty, but never null. */ - private static Set extractFileExtensions (List fileItems) { + private static Set extractFileExtensions (List files) { Set fileExtensions = new HashSet<>(); - for (FileItem fileItem : fileItems) { - String fileName = fileItem.getName(); + for (File file : files) { + String fileName = file.getName(); String extension = FilenameUtils.getExtension(fileName); if (extension.isEmpty()) { throw new DataSourceException("Filename has no extension: " + fileName); @@ -106,10 +102,10 @@ private static Set extractFileExtensions (List fileItems) { } /** In uploads containing more than one file, all files are expected to have the same name before the extension. */ - private static void verifyBaseNamesSame (List fileItems) { + private static void verifyBaseNamesSame (List files) { String firstBaseName = null; - for (FileItem fileItem : fileItems) { - String baseName = FilenameUtils.getBaseName(fileItem.getName()); + for (File file : files) { + String baseName = FilenameUtils.getBaseName(file.getName()); if (firstBaseName == null) { firstBaseName = baseName; } else if (!firstBaseName.equals(baseName)) { @@ -117,5 +113,4 @@ private static void verifyBaseNamesSame (List fileItems) { } } } - } diff --git a/src/main/java/com/conveyal/analysis/util/HttpUtils.java b/src/main/java/com/conveyal/analysis/util/HttpUtils.java index 53cb2a04a..74fef654e 100644 --- a/src/main/java/com/conveyal/analysis/util/HttpUtils.java +++ b/src/main/java/com/conveyal/analysis/util/HttpUtils.java @@ -1,14 +1,16 @@ package com.conveyal.analysis.util; import com.conveyal.analysis.AnalysisServerException; +import com.conveyal.file.FileUtils; import com.conveyal.r5.util.ExceptionUtils; import org.apache.commons.fileupload.FileItem; -import org.apache.commons.fileupload.FileItemFactory; import org.apache.commons.fileupload.disk.DiskFileItemFactory; import org.apache.commons.fileupload.servlet.ServletFileUpload; import javax.servlet.http.HttpServletRequest; +import java.io.File; import java.io.UnsupportedEncodingException; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -36,8 +38,8 @@ public static Map> getRequestFiles (HttpServletRequest re // If we always saved the FileItems via write() or read them with getInputStream() they would not all need to // be on disk. try { - FileItemFactory fileItemFactory = new DiskFileItemFactory(0, null); - ServletFileUpload sfu = new ServletFileUpload(fileItemFactory); + DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(0, null); + ServletFileUpload sfu = new ServletFileUpload(diskFileItemFactory); return sfu.parseParameterMap(req); } catch (Exception e) { throw AnalysisServerException.fileUpload(ExceptionUtils.stackTraceString(e)); @@ -66,4 +68,56 @@ public static String getFormField(Map> formFields, String fieldName)); } } + + /** + * Extracts `FileItem`s contents locally in a temp directory. Automatically unzip files. Return the list of new + * `File` handles. + */ + public static List extractFilesFromFileItemsAndUnzip(List fileItems) { + File directory = FileUtils.createScratchDirectory(); + List files = new ArrayList<>(); + for (FileItem fi : fileItems) { + File file = moveFileItemIntoDirectory(fi, directory); + String name = file.getName(); + if (name.toLowerCase().endsWith(".zip")) { + files.addAll(FileUtils.unzipFileIntoDirectory(file, directory)); + } else { + files.add(file); + } + } + return files; + } + + /** + * Move `FileItem`s contents into a temporary directory. Return the list of new `File` handles. + */ + public static List saveFileItemsLocally(List fileItems) { + File directory = FileUtils.createScratchDirectory(); + List files = new ArrayList<>(); + for (FileItem fileItem : fileItems) { + files.add(moveFileItemIntoDirectory(fileItem, directory)); + } + return files; + } + + /** + * Save the contents of a `FileItem` in a temporary directory and return the `File`. + */ + public static File saveFileItemLocally(FileItem fileItem) { + return moveFileItemIntoDirectory(fileItem, FileUtils.createScratchDirectory()); + } + + /** + * Move the contents of a `FileItem` to the given directory by calling `write`. `DiskFileItem`s will call `renameTo` + * if the `FileItem's contents are already on the disk. + */ + public static File moveFileItemIntoDirectory(FileItem fileItem, File directory) { + File file = new File(directory, fileItem.getName()); + try { + fileItem.write(file); + } catch (Exception e) { + throw new AnalysisServerException(e, "Error storing file in directory on disk. FileItem.write() failed."); + } + return file; + } } diff --git a/src/main/java/com/conveyal/file/FileUtils.java b/src/main/java/com/conveyal/file/FileUtils.java index f2eb94842..5890d18bd 100644 --- a/src/main/java/com/conveyal/file/FileUtils.java +++ b/src/main/java/com/conveyal/file/FileUtils.java @@ -1,17 +1,13 @@ package com.conveyal.file; -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.RandomAccessFile; +import java.io.*; import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; import java.util.zip.GZIPInputStream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; public abstract class FileUtils { /** @@ -123,4 +119,51 @@ public static boolean isGzip (File file) { throw new RuntimeException(e); } } + + /** + * Extract the files inside a zipped file into a given directory and return the `File` handles to the new files. + */ + public static List unzipFileIntoDirectory(File zipFile, File directory) { + List files = new ArrayList<>(); + ZipInputStream zis = new ZipInputStream(FileUtils.getInputStream(zipFile)); + ZipEntry zipEntry; + byte[] buffer = new byte[1024]; + try { + while ((zipEntry = zis.getNextEntry()) != null) { + String entryName = zipEntry.getName(); + File newFile = new File(directory, entryName); + if (zipEntry.isDirectory()) { + // Skip special `__MACOSX` directories. + if (entryName.toUpperCase().contains("__MACOSX")) continue; + if (!newFile.isDirectory() && !newFile.mkdirs()) { + throw new IOException("Failed to create directory " + newFile); + } + } else { + // Skip file names beginning with a "." + if (Path.of(entryName).getFileName().startsWith(".")) continue; + + // fix for Windows-created archives + File parent = newFile.getParentFile(); + if (!parent.isDirectory() && !parent.mkdirs()) { + throw new IOException("Failed to create directory " + parent); + } + + // write file content + FileOutputStream fos = new FileOutputStream(newFile); + int len; + while ((len = zis.read(buffer)) > 0) { + fos.write(buffer, 0, len); + } + fos.close(); + files.add(newFile); + } + } + zis.closeEntry(); + zis.close(); + } catch (IOException e) { + throw new RuntimeException(e.getMessage()); + } + + return files; + } } diff --git a/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java b/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java index d0deaed13..a6d3a6011 100644 --- a/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java +++ b/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java @@ -1,7 +1,7 @@ package com.conveyal.r5.analyst; import com.beust.jcommander.ParameterException; -import com.conveyal.r5.util.InputStreamProvider; +import com.conveyal.file.FileUtils; import com.csvreader.CsvReader; import gnu.trove.list.TIntList; import gnu.trove.list.array.TIntArrayList; @@ -12,6 +12,7 @@ import java.io.DataInputStream; import java.io.DataOutputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -49,7 +50,7 @@ public class FreeFormPointSet extends PointSet { * for the points; if not, row numbers will be used as the ids. */ public static FreeFormPointSet fromCsv ( - InputStreamProvider csvInputStreamProvider, + File csvFile, String latField, String lonField, String idField, @@ -61,7 +62,7 @@ public static FreeFormPointSet fromCsv ( int lonCol = -1; int idCol = -1; int countCol = -1; - try (InputStream csvInputStream = csvInputStreamProvider.getInputStream()) { + try (InputStream csvInputStream = FileUtils.getInputStream(csvFile)) { CsvReader reader = new CsvReader(csvInputStream, ',', StandardCharsets.UTF_8); reader.readHeaders(); int nCols = reader.getHeaderCount(); @@ -105,7 +106,7 @@ public static FreeFormPointSet fromCsv ( /* If we reached here, the file is entirely readable. Re-read it from the beginning and record values. */ // Note that we're doing two passes just so we know the array size. We could just use TIntLists. int rec = -1; - try (InputStream csvInputStream = csvInputStreamProvider.getInputStream()) { + try (InputStream csvInputStream = FileUtils.getInputStream(csvFile)) { CsvReader reader = new CsvReader(csvInputStream, ',', StandardCharsets.UTF_8); FreeFormPointSet ret = new FreeFormPointSet(nRecs); ret.name = countField != null ? countField : "[COUNT]"; diff --git a/src/main/java/com/conveyal/r5/analyst/Grid.java b/src/main/java/com/conveyal/r5/analyst/Grid.java index ad9faf1ed..0ec13b52f 100644 --- a/src/main/java/com/conveyal/r5/analyst/Grid.java +++ b/src/main/java/com/conveyal/r5/analyst/Grid.java @@ -1,8 +1,8 @@ package com.conveyal.r5.analyst; import com.conveyal.analysis.datasource.DataSourceException; +import com.conveyal.file.FileUtils; import com.conveyal.r5.common.GeometryUtils; -import com.conveyal.r5.util.InputStreamProvider; import com.conveyal.r5.util.ProgressListener; import com.conveyal.r5.util.ShapefileReader; import com.csvreader.CsvReader; @@ -554,7 +554,7 @@ public static Polygon getPixelGeometry (int localX, int localY, WebMercatorExten * @param ignoreFields if this is non-null, the fields with these names will not be considered when looking for * numeric opportunity count fields. Null strings in the collection are ignored. */ - public static List fromCsv(InputStreamProvider csvInputStreamProvider, + public static List fromCsv(File csvFile, String latField, String lonField, Collection ignoreFields, @@ -563,7 +563,7 @@ public static List fromCsv(InputStreamProvider csvInputStreamProvider, // Read through the CSV file once to establish its structure (which fields are numeric). // TODO factor out this logic for all CSV loading, reuse for freeform and grids, set progress properly. - CsvReader reader = new CsvReader(csvInputStreamProvider.getInputStream(), StandardCharsets.UTF_8); + CsvReader reader = new CsvReader(FileUtils.getInputStream(csvFile), StandardCharsets.UTF_8); reader.readHeaders(); List headers = Arrays.asList(reader.getHeaders()); if (!headers.contains(latField)) { @@ -646,7 +646,7 @@ public static List fromCsv(InputStreamProvider csvInputStreamProvider, // The first read through the CSV just established its structure (e.g. which fields were numeric). // Now, re-read the CSV from the beginning to load the values and populate the grids. - reader = new CsvReader(csvInputStreamProvider.getInputStream(), StandardCharsets.UTF_8); + reader = new CsvReader(FileUtils.getInputStream(csvFile), StandardCharsets.UTF_8); reader.readHeaders(); int i = 0; diff --git a/src/main/java/com/conveyal/r5/analyst/progress/ProgressInputStream.java b/src/main/java/com/conveyal/r5/analyst/progress/ProgressInputStream.java index bc02502d2..417ae6de9 100644 --- a/src/main/java/com/conveyal/r5/analyst/progress/ProgressInputStream.java +++ b/src/main/java/com/conveyal/r5/analyst/progress/ProgressInputStream.java @@ -1,8 +1,9 @@ package com.conveyal.r5.analyst.progress; -import org.apache.commons.fileupload.FileItem; +import com.conveyal.file.FileUtils; import org.apache.commons.io.input.ProxyInputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -13,7 +14,7 @@ * This will report progress as the total number of bytes that have passed through the stream, like CountingInputStream. * This can exceed 100% of the file size if the caller uses mark and reset. The progressListener should be * pre-configured with the total number of bytes expected and a detail message using ProgressListener::beginTask. - * The static method forFileItem() demonstrates usage when reading from a file of known length. + * The static method forFile() demonstrates usage when reading from a file of known length. */ public class ProgressInputStream extends ProxyInputStream { @@ -40,18 +41,10 @@ public synchronized long skip (final long length) throws IOException { /** * Given an uploaded file, report progress on reading it. - * Incrementing the progress seems to introduce some inefficiency when performing unbuffered small reads, such as - * calls to InputStream.read() which are used by DataInputStream to read numbers. - * TODO wrap in buffered input stream to reduce small read calls, or tune to only report once per percentage? */ - public static ProgressInputStream forFileItem (FileItem fileItem, ProgressListener progressListener) { - try { - checkArgument(fileItem.getSize() < Integer.MAX_VALUE); - progressListener.beginTask("Reading file " + fileItem.getName(), (int)(fileItem.getSize())); - return new ProgressInputStream(progressListener, fileItem.getInputStream()); - } catch (IOException e) { - throw new RuntimeException(e); - } + public static ProgressInputStream forFile (File file, ProgressListener progressListener) { + checkArgument(file.length() < Integer.MAX_VALUE); + progressListener.beginTask("Reading file " + file.getName(), (int)(file.length())); + return new ProgressInputStream(progressListener, FileUtils.getInputStream(file)); } - }