Skip to content

Commit

Permalink
add [elton log] and [elton cat] cmds to let elton spreak preston; rel…
Browse files Browse the repository at this point in the history
…ated to #52
  • Loading branch information
Jorrit Poelen committed Nov 30, 2023
1 parent 2377e26 commit 3fd598b
Show file tree
Hide file tree
Showing 8 changed files with 389 additions and 4 deletions.
4 changes: 4 additions & 0 deletions src/main/java/org/globalbioticinteractions/elton/Elton.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@

import org.apache.commons.lang.StringUtils;
import org.globalbioticinteractions.elton.cmd.CmdDatasets;
import org.globalbioticinteractions.elton.cmd.CmdGet;
import org.globalbioticinteractions.elton.cmd.CmdInit;
import org.globalbioticinteractions.elton.cmd.CmdInstallManual;
import org.globalbioticinteractions.elton.cmd.CmdInteractions;
import org.globalbioticinteractions.elton.cmd.CmdList;
import org.globalbioticinteractions.elton.cmd.CmdLog;
import org.globalbioticinteractions.elton.cmd.CmdNames;
import org.globalbioticinteractions.elton.cmd.CmdNanoPubs;
import org.globalbioticinteractions.elton.cmd.CmdReview;
Expand All @@ -26,6 +28,8 @@
subcommands = {
CmdInit.class,
CmdInteractions.class,
CmdLog.class,
CmdGet.class,
CmdNames.class,
CmdReview.class,
CmdDatasets.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.globalbioticinteractions.elton.util.ProgressCursorRotating;
import picocli.CommandLine;

import java.io.InputStream;
import java.io.PrintStream;
import java.net.URI;
import java.nio.file.Paths;
Expand Down Expand Up @@ -38,6 +39,7 @@ public void setNamespaces(List<String> namespaces) {

private PrintStream stderr = System.err;
private PrintStream stdout = System.out;
private InputStream stdin = System.in;

final private ProgressCursorFactory cursorFactory = new ProgressCursorFactory() {
private final ProgressCursor cursor = new ProgressCursorRotating(stderr);
Expand Down Expand Up @@ -67,6 +69,14 @@ public void setStdout(PrintStream out) {
this.stdout = out;
}

public void setStdin(InputStream stdin) {
this.stdin = stdin;
}

public InputStream getStdin() {
return this.stdin;
}

public URI getWorkDir() {
return workDir == null
? Paths.get("").toUri()
Expand Down
103 changes: 103 additions & 0 deletions src/main/java/org/globalbioticinteractions/elton/cmd/CmdGet.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package org.globalbioticinteractions.elton.cmd;

import bio.guoda.preston.HashType;
import bio.guoda.preston.Hasher;
import bio.guoda.preston.RefNodeConstants;
import bio.guoda.preston.RefNodeFactory;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.rdf.api.IRI;
import org.apache.commons.rdf.api.Quad;
import org.eol.globi.data.NodeFactory;
import org.eol.globi.service.CacheService;
import org.eol.globi.service.ResourceService;
import org.eol.globi.tool.NullImportLogger;
import org.globalbioticinteractions.dataset.Dataset;
import org.globalbioticinteractions.dataset.DatasetProxy;
import org.globalbioticinteractions.dataset.DatasetRegistry;
import org.globalbioticinteractions.dataset.DatasetRegistryException;
import org.globalbioticinteractions.dataset.DatasetRegistryProxy;
import org.globalbioticinteractions.elton.util.DatasetRegistryUtil;
import org.globalbioticinteractions.elton.util.NamespaceHandler;
import org.globalbioticinteractions.elton.util.NodeFactoryNull;
import org.nanopub.Run;
import picocli.CommandLine;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.net.URI;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static bio.guoda.preston.RefNodeConstants.HAS_VERSION;

@CommandLine.Command(
name = "cat",
aliases = {"get"},
description = "gets resource by hash uri"
)
public class CmdGet extends CmdDefaultParams {

private static final Pattern PATTERN_OBJECT_NEWER = Pattern.compile(".* (" + HAS_VERSION.toString() + ") <(?<obj>[^>]*)>(.*) [.]$");

@Override
public void run() {
run(getStdout());
}

void run(PrintStream out) {
DatasetRegistry registry = DatasetRegistryUtil.forCacheDirOrLocalDir(
getCacheDir(),
getWorkDir(),
createInputStreamFactory()
);

final List<String> actualNamespaces = new ArrayList<>();
try {
CmdUtil.handleNamespaces(registry, actualNamespaces::add, getNamespaces());
} catch (DatasetRegistryException e) {
throw new RuntimeException("failed to handle namespaces", e);
}

BufferedReader reader = new BufferedReader(new InputStreamReader(getStdin()));
String line;
try {
while ((line = reader.readLine()) != null) {
Matcher matcher = PATTERN_OBJECT_NEWER.matcher(line);
if (matcher.matches()) {
IRI contentId = RefNodeFactory.toIRI(matcher.group("obj"));
File file = null;
for (String namespace : actualNamespaces) {
File namespaceDir = new File(getCacheDir(), namespace);
File fileCandidate = new File(namespaceDir, StringUtils.substring(contentId.getIRIString(), "hash://sha256/".length()));
if (fileCandidate.exists()) {
file = fileCandidate;
break;
}
}
if (file == null) {
throw new RuntimeException("cannot resolve [" + contentId + "]");
}
IOUtils.copy(new FileInputStream(file), out);
}
}
} catch (IOException ex) {
throw new RuntimeException("failed to read from stdin", ex);
}
}

}


186 changes: 186 additions & 0 deletions src/main/java/org/globalbioticinteractions/elton/cmd/CmdLog.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
package org.globalbioticinteractions.elton.cmd;

import bio.guoda.preston.HashType;
import bio.guoda.preston.Hasher;
import bio.guoda.preston.RefNodeConstants;
import bio.guoda.preston.RefNodeFactory;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.NullAppendable;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.commons.rdf.api.IRI;
import org.apache.commons.rdf.api.Quad;
import org.apache.jena.tdb.store.Hash;
import org.eol.globi.data.NodeFactory;
import org.eol.globi.service.ResourceService;
import org.eol.globi.tool.NullImportLogger;
import org.eol.globi.util.ResourceServiceLocal;
import org.eol.globi.util.ResourceServiceLocalAndRemote;
import org.eol.globi.util.ResourceUtil;
import org.globalbioticinteractions.cache.CacheUtil;
import org.globalbioticinteractions.dataset.Dataset;
import org.globalbioticinteractions.dataset.DatasetFinderUtil;
import org.globalbioticinteractions.dataset.DatasetProxy;
import org.globalbioticinteractions.dataset.DatasetRegistry;
import org.globalbioticinteractions.dataset.DatasetRegistryException;
import org.globalbioticinteractions.dataset.DatasetRegistryProxy;
import org.globalbioticinteractions.dataset.DatasetUtil;
import org.globalbioticinteractions.elton.util.DatasetRegistryUtil;
import org.globalbioticinteractions.elton.util.NodeFactoryNull;
import picocli.CommandLine;

import javax.validation.constraints.Null;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.net.URI;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.Collections;
import java.util.concurrent.atomic.AtomicBoolean;

@CommandLine.Command(
name = "log",
aliases = {"prov"},
description = "lists provenance of original resources"
)
public class CmdLog extends CmdDefaultParams {

@Override
public void run() {
run(System.out);
}

void run(PrintStream out) {
DatasetRegistry registry = DatasetRegistryUtil.forCacheDirOrLocalDir(
getCacheDir(),
getWorkDir(),
createInputStreamFactory()
);

DatasetRegistry proxy = new DatasetRegistryProxy(Collections.singletonList(registry)) {
public Dataset datasetFor(String namespace) throws DatasetRegistryException {
Dataset dataset = super.datasetFor(namespace);
return new DatasetProxy(dataset) {
ResourceService service = new LoggingResourceService(out, dataset);

public InputStream retrieve(URI resourcePath) throws IOException {
return service.retrieve(resourcePath);
}
};
}

;
};

NodeFactory nodeFactory = new NodeFactoryNull();
CmdUtil.handleNamespaces(
proxy,
nodeFactory,
getNamespaces(),
"logging provenance",
NullAppendable.INSTANCE,
new NullImportLogger());
}

private static class LoggingResourceService implements ResourceService {
private final PrintStream out;
private HashType sha256;
private final ResourceService local;

public LoggingResourceService(PrintStream out, ResourceService resourceService) {
this.out = out;
this.local = resourceService;
sha256 = HashType.sha256;
}

@Override
public InputStream retrieve(URI uri) throws IOException {
return logVersion(uri, this.local.retrieve(uri));
}

private InputStream logVersion(URI uri, InputStream retrieve) throws IOException {
try {
final MessageDigest md = MessageDigest.getInstance(sha256.getAlgorithm());
final URI resource = local instanceof Dataset
? getLocationInDataset(uri, (Dataset) local) : uri;

return new DigestLoggingInputStream(retrieve, md, resource);
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException("cannot calculate sha256 hashes", e);
}
}

private URI getLocationInDataset(URI uri, Dataset dataset) throws IOException {
URI archiveURI = dataset.getArchiveURI();
URI resourceLocation = uri;
if (!uri.isAbsolute()) {
if (CacheUtil.isLocalDir(archiveURI)) {
resourceLocation = ResourceUtil.getAbsoluteResourceURI(archiveURI, uri);
} else {
IRI archiveContentId = Hasher.calcHashIRI(local.retrieve(archiveURI), NullOutputStream.NULL_OUTPUT_STREAM, sha256);
Quad quad = RefNodeFactory.toStatement(
RefNodeFactory.toIRI(archiveURI),
RefNodeConstants.HAS_VERSION,
archiveContentId
);
out.println(quad.toString());

String localDatasetRoot = DatasetFinderUtil.getLocalDatasetURIRoot(local.retrieve(archiveURI));

URI localArchiveRoot = URI.create("zip:" + archiveContentId.getIRIString() + "!/" + localDatasetRoot);
resourceLocation = ResourceUtil.getAbsoluteResourceURI(localArchiveRoot, uri);
}
}
return resourceLocation;
}

private class DigestLoggingInputStream extends DigestInputStream {
final AtomicBoolean isEOF;
final AtomicBoolean hasLogged;
final URI resourceLocation;
private final MessageDigest md;
private final URI resource;

public DigestLoggingInputStream(InputStream retrieve, MessageDigest md, URI resource) {
super(retrieve, md);
this.md = md;
this.resource = resource;
isEOF = new AtomicBoolean(false);
hasLogged = new AtomicBoolean(false);
resourceLocation = resource;
}

public int read() throws IOException {
return setEOFIfEncountered(super.read());
}

public int read(byte[] var1, int var2, int var3) throws IOException {
return setEOFIfEncountered(super.read(var1, var2, var3));
}

private int setEOFIfEncountered(int read) {
if (read == -1) {
isEOF.set(true);
}
return read;
}

public void close() throws IOException {
this.in.close();
Quad quad = RefNodeFactory.toStatement(
RefNodeFactory.toIRI(resourceLocation),
RefNodeConstants.HAS_VERSION,
Hasher.toHashIRI(md, HashType.sha256)
);
if (isEOF.get() && !hasLogged.get()) {
out.println(quad.toString());
hasLogged.set(true);
}
}
}
}
}


Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ public static void handleNamespaces(DatasetRegistry registry, NodeFactory nodeFa
}

private static void handleSingleNamespace(DatasetRegistry registry,
NodeFactory nodeFactory, String namespace,
NodeFactory nodeFactory,
String namespace,
ImportLogger logger) throws DatasetRegistryException, StudyImporterException {
Dataset dataset = new DatasetFactory(registry).datasetFor(namespace);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ private static CacheFactory getCacheFactoryLocal(String cacheDir,
}

public static DatasetRegistry forCacheDir(String cacheDir,
ResourceServiceLocal resourceServiceLocal) {
ResourceService resourceServiceLocal) {
return new DatasetRegistryLocal(
cacheDir,
getCacheFactoryLocal(cacheDir, resourceServiceLocal),
Expand All @@ -52,12 +52,16 @@ private static boolean isEmpty(DatasetRegistry registry) {
}

public static DatasetRegistry forCacheDirOrLocalDir(String cacheDir, URI workDir, InputStreamFactory streamFactory) {
DatasetRegistry registry = forCacheDir(cacheDir, new ResourceServiceLocal(streamFactory));
return forCacheDirOrLocalDir(cacheDir, workDir, new ResourceServiceLocal(streamFactory), new ResourceServiceLocalAndRemote(streamFactory));
}

public static DatasetRegistry forCacheDirOrLocalDir(String cacheDir, URI workDir, ResourceService resourceServiceLocal, ResourceService resourceServiceRemote) {
DatasetRegistry registry = forCacheDir(cacheDir, resourceServiceLocal);
if (isEmpty(registry)) {
registry = forLocalDir(
workDir,
cacheDir,
new ResourceServiceLocalAndRemote(streamFactory)
resourceServiceRemote
);
}
return registry;
Expand Down
Loading

0 comments on commit 3fd598b

Please sign in to comment.