-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add [elton log] and [elton cat] cmds to let elton spreak preston; rel…
…ated to #52
- Loading branch information
Jorrit Poelen
committed
Nov 30, 2023
1 parent
2377e26
commit 3fd598b
Showing
8 changed files
with
389 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
103 changes: 103 additions & 0 deletions
103
src/main/java/org/globalbioticinteractions/elton/cmd/CmdGet.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
package org.globalbioticinteractions.elton.cmd; | ||
|
||
import bio.guoda.preston.HashType; | ||
import bio.guoda.preston.Hasher; | ||
import bio.guoda.preston.RefNodeConstants; | ||
import bio.guoda.preston.RefNodeFactory; | ||
import org.apache.commons.io.IOUtils; | ||
import org.apache.commons.lang.StringUtils; | ||
import org.apache.commons.rdf.api.IRI; | ||
import org.apache.commons.rdf.api.Quad; | ||
import org.eol.globi.data.NodeFactory; | ||
import org.eol.globi.service.CacheService; | ||
import org.eol.globi.service.ResourceService; | ||
import org.eol.globi.tool.NullImportLogger; | ||
import org.globalbioticinteractions.dataset.Dataset; | ||
import org.globalbioticinteractions.dataset.DatasetProxy; | ||
import org.globalbioticinteractions.dataset.DatasetRegistry; | ||
import org.globalbioticinteractions.dataset.DatasetRegistryException; | ||
import org.globalbioticinteractions.dataset.DatasetRegistryProxy; | ||
import org.globalbioticinteractions.elton.util.DatasetRegistryUtil; | ||
import org.globalbioticinteractions.elton.util.NamespaceHandler; | ||
import org.globalbioticinteractions.elton.util.NodeFactoryNull; | ||
import org.nanopub.Run; | ||
import picocli.CommandLine; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.File; | ||
import java.io.FileInputStream; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.InputStreamReader; | ||
import java.io.PrintStream; | ||
import java.net.URI; | ||
import java.security.DigestInputStream; | ||
import java.security.MessageDigest; | ||
import java.security.NoSuchAlgorithmException; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.concurrent.atomic.AtomicBoolean; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
import static bio.guoda.preston.RefNodeConstants.HAS_VERSION; | ||
|
||
@CommandLine.Command( | ||
name = "cat", | ||
aliases = {"get"}, | ||
description = "gets resource by hash uri" | ||
) | ||
public class CmdGet extends CmdDefaultParams { | ||
|
||
private static final Pattern PATTERN_OBJECT_NEWER = Pattern.compile(".* (" + HAS_VERSION.toString() + ") <(?<obj>[^>]*)>(.*) [.]$"); | ||
|
||
@Override | ||
public void run() { | ||
run(getStdout()); | ||
} | ||
|
||
void run(PrintStream out) { | ||
DatasetRegistry registry = DatasetRegistryUtil.forCacheDirOrLocalDir( | ||
getCacheDir(), | ||
getWorkDir(), | ||
createInputStreamFactory() | ||
); | ||
|
||
final List<String> actualNamespaces = new ArrayList<>(); | ||
try { | ||
CmdUtil.handleNamespaces(registry, actualNamespaces::add, getNamespaces()); | ||
} catch (DatasetRegistryException e) { | ||
throw new RuntimeException("failed to handle namespaces", e); | ||
} | ||
|
||
BufferedReader reader = new BufferedReader(new InputStreamReader(getStdin())); | ||
String line; | ||
try { | ||
while ((line = reader.readLine()) != null) { | ||
Matcher matcher = PATTERN_OBJECT_NEWER.matcher(line); | ||
if (matcher.matches()) { | ||
IRI contentId = RefNodeFactory.toIRI(matcher.group("obj")); | ||
File file = null; | ||
for (String namespace : actualNamespaces) { | ||
File namespaceDir = new File(getCacheDir(), namespace); | ||
File fileCandidate = new File(namespaceDir, StringUtils.substring(contentId.getIRIString(), "hash://sha256/".length())); | ||
if (fileCandidate.exists()) { | ||
file = fileCandidate; | ||
break; | ||
} | ||
} | ||
if (file == null) { | ||
throw new RuntimeException("cannot resolve [" + contentId + "]"); | ||
} | ||
IOUtils.copy(new FileInputStream(file), out); | ||
} | ||
} | ||
} catch (IOException ex) { | ||
throw new RuntimeException("failed to read from stdin", ex); | ||
} | ||
} | ||
|
||
} | ||
|
||
|
186 changes: 186 additions & 0 deletions
186
src/main/java/org/globalbioticinteractions/elton/cmd/CmdLog.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
package org.globalbioticinteractions.elton.cmd; | ||
|
||
import bio.guoda.preston.HashType; | ||
import bio.guoda.preston.Hasher; | ||
import bio.guoda.preston.RefNodeConstants; | ||
import bio.guoda.preston.RefNodeFactory; | ||
import org.apache.commons.io.IOUtils; | ||
import org.apache.commons.io.output.NullAppendable; | ||
import org.apache.commons.io.output.NullOutputStream; | ||
import org.apache.commons.rdf.api.IRI; | ||
import org.apache.commons.rdf.api.Quad; | ||
import org.apache.jena.tdb.store.Hash; | ||
import org.eol.globi.data.NodeFactory; | ||
import org.eol.globi.service.ResourceService; | ||
import org.eol.globi.tool.NullImportLogger; | ||
import org.eol.globi.util.ResourceServiceLocal; | ||
import org.eol.globi.util.ResourceServiceLocalAndRemote; | ||
import org.eol.globi.util.ResourceUtil; | ||
import org.globalbioticinteractions.cache.CacheUtil; | ||
import org.globalbioticinteractions.dataset.Dataset; | ||
import org.globalbioticinteractions.dataset.DatasetFinderUtil; | ||
import org.globalbioticinteractions.dataset.DatasetProxy; | ||
import org.globalbioticinteractions.dataset.DatasetRegistry; | ||
import org.globalbioticinteractions.dataset.DatasetRegistryException; | ||
import org.globalbioticinteractions.dataset.DatasetRegistryProxy; | ||
import org.globalbioticinteractions.dataset.DatasetUtil; | ||
import org.globalbioticinteractions.elton.util.DatasetRegistryUtil; | ||
import org.globalbioticinteractions.elton.util.NodeFactoryNull; | ||
import picocli.CommandLine; | ||
|
||
import javax.validation.constraints.Null; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.PrintStream; | ||
import java.net.URI; | ||
import java.security.DigestInputStream; | ||
import java.security.MessageDigest; | ||
import java.security.NoSuchAlgorithmException; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.concurrent.atomic.AtomicBoolean; | ||
|
||
@CommandLine.Command( | ||
name = "log", | ||
aliases = {"prov"}, | ||
description = "lists provenance of original resources" | ||
) | ||
public class CmdLog extends CmdDefaultParams { | ||
|
||
@Override | ||
public void run() { | ||
run(System.out); | ||
} | ||
|
||
void run(PrintStream out) { | ||
DatasetRegistry registry = DatasetRegistryUtil.forCacheDirOrLocalDir( | ||
getCacheDir(), | ||
getWorkDir(), | ||
createInputStreamFactory() | ||
); | ||
|
||
DatasetRegistry proxy = new DatasetRegistryProxy(Collections.singletonList(registry)) { | ||
public Dataset datasetFor(String namespace) throws DatasetRegistryException { | ||
Dataset dataset = super.datasetFor(namespace); | ||
return new DatasetProxy(dataset) { | ||
ResourceService service = new LoggingResourceService(out, dataset); | ||
|
||
public InputStream retrieve(URI resourcePath) throws IOException { | ||
return service.retrieve(resourcePath); | ||
} | ||
}; | ||
} | ||
|
||
; | ||
}; | ||
|
||
NodeFactory nodeFactory = new NodeFactoryNull(); | ||
CmdUtil.handleNamespaces( | ||
proxy, | ||
nodeFactory, | ||
getNamespaces(), | ||
"logging provenance", | ||
NullAppendable.INSTANCE, | ||
new NullImportLogger()); | ||
} | ||
|
||
private static class LoggingResourceService implements ResourceService { | ||
private final PrintStream out; | ||
private HashType sha256; | ||
private final ResourceService local; | ||
|
||
public LoggingResourceService(PrintStream out, ResourceService resourceService) { | ||
this.out = out; | ||
this.local = resourceService; | ||
sha256 = HashType.sha256; | ||
} | ||
|
||
@Override | ||
public InputStream retrieve(URI uri) throws IOException { | ||
return logVersion(uri, this.local.retrieve(uri)); | ||
} | ||
|
||
private InputStream logVersion(URI uri, InputStream retrieve) throws IOException { | ||
try { | ||
final MessageDigest md = MessageDigest.getInstance(sha256.getAlgorithm()); | ||
final URI resource = local instanceof Dataset | ||
? getLocationInDataset(uri, (Dataset) local) : uri; | ||
|
||
return new DigestLoggingInputStream(retrieve, md, resource); | ||
} catch (NoSuchAlgorithmException e) { | ||
throw new RuntimeException("cannot calculate sha256 hashes", e); | ||
} | ||
} | ||
|
||
private URI getLocationInDataset(URI uri, Dataset dataset) throws IOException { | ||
URI archiveURI = dataset.getArchiveURI(); | ||
URI resourceLocation = uri; | ||
if (!uri.isAbsolute()) { | ||
if (CacheUtil.isLocalDir(archiveURI)) { | ||
resourceLocation = ResourceUtil.getAbsoluteResourceURI(archiveURI, uri); | ||
} else { | ||
IRI archiveContentId = Hasher.calcHashIRI(local.retrieve(archiveURI), NullOutputStream.NULL_OUTPUT_STREAM, sha256); | ||
Quad quad = RefNodeFactory.toStatement( | ||
RefNodeFactory.toIRI(archiveURI), | ||
RefNodeConstants.HAS_VERSION, | ||
archiveContentId | ||
); | ||
out.println(quad.toString()); | ||
|
||
String localDatasetRoot = DatasetFinderUtil.getLocalDatasetURIRoot(local.retrieve(archiveURI)); | ||
|
||
URI localArchiveRoot = URI.create("zip:" + archiveContentId.getIRIString() + "!/" + localDatasetRoot); | ||
resourceLocation = ResourceUtil.getAbsoluteResourceURI(localArchiveRoot, uri); | ||
} | ||
} | ||
return resourceLocation; | ||
} | ||
|
||
private class DigestLoggingInputStream extends DigestInputStream { | ||
final AtomicBoolean isEOF; | ||
final AtomicBoolean hasLogged; | ||
final URI resourceLocation; | ||
private final MessageDigest md; | ||
private final URI resource; | ||
|
||
public DigestLoggingInputStream(InputStream retrieve, MessageDigest md, URI resource) { | ||
super(retrieve, md); | ||
this.md = md; | ||
this.resource = resource; | ||
isEOF = new AtomicBoolean(false); | ||
hasLogged = new AtomicBoolean(false); | ||
resourceLocation = resource; | ||
} | ||
|
||
public int read() throws IOException { | ||
return setEOFIfEncountered(super.read()); | ||
} | ||
|
||
public int read(byte[] var1, int var2, int var3) throws IOException { | ||
return setEOFIfEncountered(super.read(var1, var2, var3)); | ||
} | ||
|
||
private int setEOFIfEncountered(int read) { | ||
if (read == -1) { | ||
isEOF.set(true); | ||
} | ||
return read; | ||
} | ||
|
||
public void close() throws IOException { | ||
this.in.close(); | ||
Quad quad = RefNodeFactory.toStatement( | ||
RefNodeFactory.toIRI(resourceLocation), | ||
RefNodeConstants.HAS_VERSION, | ||
Hasher.toHashIRI(md, HashType.sha256) | ||
); | ||
if (isEOF.get() && !hasLogged.get()) { | ||
out.println(quad.toString()); | ||
hasLogged.set(true); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.