diff --git a/src/main/java/org/globalbioticinteractions/elton/cmd/CmdStream.java b/src/main/java/org/globalbioticinteractions/elton/cmd/CmdStream.java index acc28a0..2babe59 100644 --- a/src/main/java/org/globalbioticinteractions/elton/cmd/CmdStream.java +++ b/src/main/java/org/globalbioticinteractions/elton/cmd/CmdStream.java @@ -14,7 +14,8 @@ import org.eol.globi.data.NodeFactory; import org.eol.globi.domain.LogContext; import org.eol.globi.tool.NullImportLogger; - +import org.globalbioticinteractions.dataset.Dataset; +import org.globalbioticinteractions.dataset.DatasetImpl; import org.globalbioticinteractions.elton.util.DatasetRegistryUtil; import org.globalbioticinteractions.elton.util.ProgressCursor; import org.globalbioticinteractions.elton.util.ProgressCursorFactory; @@ -29,9 +30,7 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; -import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Supplier; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -46,10 +45,6 @@ public class CmdStream extends CmdDefaultParams { public static final String DESCRIPTION = "stream interactions associated with dataset configuration provided by globi.json line-json as input.\n" + "example input:" + "{ \"namespace\": \"hash://sha256/9cd053d40ef148e16389982ea16d724063b82567f7ba1799962670fc97876fbf\", \"citation\": \"hash://sha256/9cd053d40ef148e16389982ea16d724063b82567f7ba1799962670fc97876fbf\", \"format\": \"dwca\", \"url\": \"https://linker.bio/hash://sha256/9cd053d40ef148e16389982ea16d724063b82567f7ba1799962670fc97876fbf\" }\n"; - public static final String ASSOCIATED_WITH = " "; - public static final String FORMAT = " "; - public static final String HAS_VERSION = " "; - public static final String URN_LSID_GLOBALBIOTICINTERACTIONS_ORG = "urn:lsid:globalbioticinteractions.org:"; public void setRecordType(String recordType) { this.recordType = recordType; @@ -73,12 +68,14 @@ public void doRun() { String line; while ((line = reader.readLine()) != null) { - boolean handled = jsonLineHandler.processLine(line, shouldWriteHeader.get()); - if (!handled) { - provLineHandler.processLine(line, shouldWriteHeader.get()); + Dataset dataset = jsonLineHandler.extractDataset(line, shouldWriteHeader.get()); + if (dataset == null) { + dataset = provLineHandler.extractDataset(line, shouldWriteHeader.get()); } - if (handled) { - shouldWriteHeader.set(false); + if (dataset != null) { + if (handleDatasetConfig(dataset.getNamespace(), shouldWriteHeader.get(), dataset.getConfig())) { + shouldWriteHeader.set(false); + } } } } catch (IOException ex) { @@ -87,21 +84,7 @@ public void doRun() { } - private boolean handleAsGloBIJson(String line, boolean isFirst) throws IOException { - boolean handled = false; - try { - JsonNode jsonNode = new ObjectMapper().readTree(line); - String namespace = jsonNode.at("/namespace").asText(DatasetRegistryUtil.NAMESPACE_LOCAL); - if (StringUtils.isNotBlank(namespace)) { - handled = handleGloBIJson(namespace, isFirst, jsonNode); - } - } catch (JsonProcessingException e) { - // ignore non-json lines - } - return handled; - } - - private boolean handleGloBIJson(final String namespace, boolean shouldWriteHeader, JsonNode jsonNode) throws IOException { + private boolean handleDatasetConfig(final String namespace, boolean shouldWriteHeader, JsonNode jsonNode) throws IOException { boolean handled = false; ImportLoggerFactory loggerFactory = new ImportLoggerFactoryImpl( recordType, @@ -214,20 +197,36 @@ public NodeFactory createNodeFactory() { public class LineHandlerJson implements LineHandler { @Override - public boolean processLine(String line, boolean isFirstLine) throws IOException { - return handleAsGloBIJson(line, isFirstLine); + public Dataset extractDataset(String line, boolean isFirstLine) throws IOException { + Dataset dataset = null; + try { + JsonNode jsonNode = new ObjectMapper().readTree(line); + String namespace = jsonNode.at("/namespace").asText(DatasetRegistryUtil.NAMESPACE_LOCAL); + if (StringUtils.isNotBlank(namespace)) { + dataset = new DatasetImpl(namespace, null, null); + dataset.setConfig(jsonNode); + } + } catch (JsonProcessingException e) { + // ignore non-json lines + } + return dataset; } } public class LineHandlerProv implements LineHandler { + static final String ASSOCIATED_WITH = " "; + static final String FORMAT = " "; + static final String HAS_VERSION = " "; + static final String URN_LSID_GLOBALBIOTICINTERACTIONS_ORG = "urn:lsid:globalbioticinteractions.org:"; + IRI resourceLocation = null; IRI resourceNamespace = null; String resourceFormat = null; IRI resourceVersion = null; @Override - public boolean processLine(String line, boolean isFirstLine) throws IOException { - boolean handled = false; + public Dataset extractDataset(String line, boolean isFirstLine) throws IOException { + Dataset dataset = null; if (StringUtils.contains(line, ASSOCIATED_WITH)) { // possible namespace statement Pattern namespacePattern = Pattern.compile("<(?" + URN_LSID_GLOBALBIOTICINTERACTIONS_ORG + "[^>]+)>" + ASSOCIATED_WITH + "<(?[^>]+)>.*"); @@ -270,10 +269,11 @@ public boolean processLine(String line, boolean isFirstLine) throws IOException objectNode.put(resourceLocation.getIRIString(), "https://linker.bio/" + resourceVersion.getIRIString()); resourceMapping.add(objectNode); //globiConfig.set("resources", resourceMapping); - handled = handleGloBIJson(namespace, isFirstLine, globiConfig); + dataset = new DatasetImpl(namespace, null, null); + dataset.setConfig(globiConfig); resetContext(); } - return handled; + return dataset; } private void resetOnLocationSwitch(Matcher matcher) { diff --git a/src/main/java/org/globalbioticinteractions/elton/cmd/CmdUtil.java b/src/main/java/org/globalbioticinteractions/elton/cmd/CmdUtil.java index 8537a78..b0cd8ce 100644 --- a/src/main/java/org/globalbioticinteractions/elton/cmd/CmdUtil.java +++ b/src/main/java/org/globalbioticinteractions/elton/cmd/CmdUtil.java @@ -71,7 +71,7 @@ static DatasetRegistry createDataFinderLoggingCaching( ActivityListener activityListener, ActivityContext ctx, Supplier iriFactory, - DatasetRegistry registryProvenanceLogger) { + DatasetRegistry datasetRegistry) { CacheFactory cacheFactory = createCacheFactory( namespace, @@ -85,7 +85,7 @@ static DatasetRegistry createDataFinderLoggingCaching( iriFactory ); return new DatasetRegistryWithCache( - registryProvenanceLogger, + datasetRegistry, cacheFactory ); } diff --git a/src/main/java/org/globalbioticinteractions/elton/cmd/LineHandler.java b/src/main/java/org/globalbioticinteractions/elton/cmd/LineHandler.java index f2a2617..cac6f71 100644 --- a/src/main/java/org/globalbioticinteractions/elton/cmd/LineHandler.java +++ b/src/main/java/org/globalbioticinteractions/elton/cmd/LineHandler.java @@ -1,8 +1,10 @@ package org.globalbioticinteractions.elton.cmd; +import org.globalbioticinteractions.dataset.Dataset; + import java.io.IOException; public interface LineHandler { - boolean processLine(String line, boolean isFirstLine) throws IOException; + Dataset extractDataset(String line, boolean isFirstLine) throws IOException; } diff --git a/src/main/java/org/globalbioticinteractions/elton/cmd/StreamingDatasetsHandler.java b/src/main/java/org/globalbioticinteractions/elton/cmd/StreamingDatasetsHandler.java index 40f3b11..3d44d94 100644 --- a/src/main/java/org/globalbioticinteractions/elton/cmd/StreamingDatasetsHandler.java +++ b/src/main/java/org/globalbioticinteractions/elton/cmd/StreamingDatasetsHandler.java @@ -30,7 +30,7 @@ class StreamingDatasetsHandler implements NamespaceHandler { private final static Logger LOG = LoggerFactory.getLogger(StreamingDatasetsHandler.class); - private final String cacheDir; + private final String dataDir; private final PrintStream stderr; private InputStreamFactory factory; @@ -44,7 +44,7 @@ class StreamingDatasetsHandler implements NamespaceHandler { private Supplier activityIdFactory; public StreamingDatasetsHandler(JsonNode config, - String cacheDir, + String dataDir, String provDir, PrintStream stderr, InputStreamFactory inputStreamFactory, @@ -55,7 +55,7 @@ public StreamingDatasetsHandler(JsonNode config, ActivityContext ctx, Supplier activityIdFactory) { this.factory = inputStreamFactory; - this.cacheDir = cacheDir; + this.dataDir = dataDir; this.provDir = provDir; this.stderr = stderr; this.config = config; @@ -74,7 +74,7 @@ public void onNamespace(String namespace) throws Exception { CacheFactory cacheFactory = CmdUtil.createCacheFactory( namespace, - cacheDir, + dataDir, provDir, factory, contentPathFactory, @@ -101,7 +101,7 @@ public void onNamespace(String namespace) throws Exception { datasetWithCache, nodeFactory, loggerFactory.createImportLogger(), - new File(cacheDir) + new File(dataDir) ); stderr.println("done."); } catch (StudyImporterException ex) {