Skip to content

Commit

Permalink
Merge pull request #242 from pdowler/master
Browse files Browse the repository at this point in the history
Entity metaChecksum algorithm
  • Loading branch information
pdowler authored Feb 15, 2024
2 parents d789051 + 035ebb8 commit a86a5fb
Show file tree
Hide file tree
Showing 5 changed files with 300 additions and 60 deletions.
2 changes: 1 addition & 1 deletion cadc-util/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ sourceCompatibility = 1.8

group = 'org.opencadc'

version = '1.10.5'
version = '1.10.6'

description = 'OpenCADC core utility library'
def git_url = 'https://github.com/opencadc/core'
Expand Down
162 changes: 120 additions & 42 deletions cadc-util/src/main/java/org/opencadc/persist/Entity.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
******************* CANADIAN ASTRONOMY DATA CENTRE *******************
************** CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES **************
*
* (c) 2023. (c) 2023.
* (c) 2024. (c) 2024.
* Government of Canada Gouvernement du Canada
* National Research Council Conseil national de recherches
* Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6
Expand Down Expand Up @@ -91,7 +91,12 @@
import org.apache.log4j.Logger;

/**
* Base class for entity persistence.
* Base class for entity persistence. The metaChecksum algorithm implemented here has a
* flaw where moving a value from one optional field to another (with no values contributing
* bytes in between) does not cause the computed metaChecksum to change. If a specific
* data model is susceptible to this, it can use the "digestFieldNames" option to prevent
* it, but changing options will change existing (stored) metaChecksum values so a change
* like this has an operational impact that needs to be evaluated.
*
* @author pdowler
*/
Expand All @@ -101,6 +106,7 @@ public abstract class Entity {
private final String localPackage;
public static boolean MCS_DEBUG = false; // way to much debug when true

private final boolean digestFieldNames;
private final boolean truncateDateToSec;
private UUID id;
private Date lastModified;
Expand All @@ -124,24 +130,57 @@ static final void assertNotNull(Class caller, String name, Object test)
}

/**
* Constructor.
* Backwards compatible constructor: digestFieldNames==false.
*
* @param truncateDateToSec truncate Date values to seconds when converting to bytes for meta checksum calculation
* @deprecated hard code Entity(boolean, boolean) in model
*/
@Deprecated
protected Entity(boolean truncateDateToSec) {
this(UUID.randomUUID(), truncateDateToSec);
this(truncateDateToSec, false);
}

/**
* Constructor.
* Backwards compatible constructor: digestFieldNames==false.
*
* @param id assign the specified Entity.id
* @param truncateDateToSec truncate Date values to seconds when converting to bytes for meta checksum calculation
* @deprecated hard code Entity(UUID, boolean, boolean) in model
*/
@Deprecated
protected Entity(UUID id, boolean truncateDateToSec) {
this(id, truncateDateToSec, false);
}

/**
* Constructor. This creates a new entity with a random UUID.
*
* @param truncateDateToSec truncate Date values to seconds when converting to bytes for meta checksum calculation
* @param digestFieldNames when a field is not null (or collection is non-empty), include the field name in the
* metaChecksum calculation
*/
protected Entity(boolean truncateDateToSec, boolean digestFieldNames) {
this(UUID.randomUUID(), truncateDateToSec, digestFieldNames);
}

/**
* Constructor. This creates an entity with an existing UUID when reconstructing an instance. The
* truncateDateToSec option should be used if instances of the model are to be serialised or stored
* in a way that does not recover the exact timestamp to milliseconds. The digestFieldNames option
* is needed for any model with "adjacent" fields that could contain the same value; this option
* ensures that "moving" the value from one field to another will change the checksum by changing
* the sequence of bytes that are digested.
*
* @param id unique ID value to assign/restore
* @param truncateDateToSec truncate Date values to seconds when converting to bytes for meta checksum calculation
* @param digestFieldNames when a field is not null (or collection is non-empty), include the field name in the
* metaChecksum calculation
*/
protected Entity(UUID id, boolean truncateDateToSec) {
protected Entity(UUID id, boolean truncateDateToSec, boolean digestFieldNames) {
Entity.assertNotNull(Entity.class, "id", id);
this.id = id;
this.truncateDateToSec = truncateDateToSec;
this.digestFieldNames = digestFieldNames;
this.localPackage = this.getClass().getPackage().getName();
}

Expand Down Expand Up @@ -212,7 +251,11 @@ public String toString() {
*/
public URI computeMetaChecksum(MessageDigest digest) {
try {
calcMetaChecksum(this.getClass(), this, digest);
MessageDigestWrapper mdw = new MessageDigestWrapper(digest);
calcMetaChecksum(this.getClass(), this, mdw);
if (MCS_DEBUG) {
log.debug("computeMetaChecksum: " + mdw.getNumBytes() + " bytes");
}
byte[] metaChecksumBytes = digest.digest();
String hexMetaChecksum = HexUtil.toHex(metaChecksumBytes);
String alg = digest.getAlgorithm().toLowerCase();
Expand All @@ -232,35 +275,41 @@ public URI computeMetaChecksum(MessageDigest digest) {
* @param o
* @param digest
*/
protected void calcMetaChecksum(Class c, Object o, MessageDigest digest) {
protected final void calcMetaChecksum(Class c, Object o, MessageDigestWrapper digest) {
// calculation order:
// 1. Entity.id for entities
// 2. Entity.metaProducer
// 3. state fields in alphabetic order; depth-first recursion
// value handling:
// enum: find and call getValue() by reflection and continue
// Date: truncate time to whole number of seconds and treat as a long
// Date: normally milliseconds to long
// optional Date handling: truncate time to whole number of seconds
// String: UTF-8 encoded bytes
// URI: UTF-8 encoded bytes of string representation
// float: IEEE754 single (4 bytes)
// double: IEEE754 double (8 bytes)
// boolean: convert to single byte, false=0, true=1 (1 bytes)
// byte: as-is (1 byte)
// boolean: convert to single byte, false=0, true=1 (1 byte)
// byte: as-is
// short: (2 bytes, network byte order == big endian))
// integer: (4 bytes, network byte order == big endian)
// long: (8 bytes, network byte order == big endian)
// UUID: 8 most-significant bytes + 8 least significant bytes (16 bytes)
// optional for ALL fields: if non-zero bytes updated the digest, UTF-8 encoded bytes of the field name
try {
if (o instanceof Entity) {
Entity ce = (Entity) o;
digest.update(primitiveValueToBytes(ce.id, "Entity.id", digest.getAlgorithm()));
digest.update(primitiveValueToBytes(ce.id, "Entity.id"));
if (ce.metaProducer != null) {
digest.update(primitiveValueToBytes(ce.metaProducer, "Entity.metaProducer", digest.getAlgorithm()));
digest.update(primitiveValueToBytes(ce.metaProducer, "Entity.metaProducer"));
if (digestFieldNames) {
digest.update(primitiveValueToBytes("Entity.metaProducer", "Entity.metaProducer"));
}
}
}

SortedSet<Field> fields = getStateFields(c);
for (Field f : fields) {
String cf = c.getSimpleName() + "." + f.getName();
String cf = f.getDeclaringClass().getSimpleName() + "." + f.getName();
f.setAccessible(true);
Object fo = f.get(o);
if (fo != null) {
Expand All @@ -269,36 +318,51 @@ protected void calcMetaChecksum(Class c, Object o, MessageDigest digest) {
try {
Method m = ac.getMethod("getValue");
Object val = m.invoke(fo);
digest.update(primitiveValueToBytes(val, cf, digest.getAlgorithm()));
digest.update(primitiveValueToBytes(val, cf));
if (digestFieldNames) {
digest.update(primitiveValueToBytes(cf, cf)); // field name
}
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException ex) {
throw new RuntimeException("BUG - enum " + ac.getName() + " does not have getValue()", ex);
}
} else if (isDataModelClass(ac)) {
// depth-first recursion
int num = digest.getNumBytes();
calcMetaChecksum(ac, fo, digest);
if (digestFieldNames && num < digest.getNumBytes()) {
digest.update(primitiveValueToBytes(cf, cf)); // field name
}
} else if (fo instanceof Collection) {
Collection stuff = (Collection) fo;
Iterator i = stuff.iterator();
while (i.hasNext()) {
Object co = i.next();
Class cc = co.getClass();
if (cc.isEnum()) {
try {
Method m = cc.getMethod("getValue");
Object val = m.invoke(co);
digest.update(primitiveValueToBytes(val, cf, digest.getAlgorithm()));
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException ex) {
throw new RuntimeException("BUG", ex);
if (!stuff.isEmpty()) {
Iterator i = stuff.iterator();
while (i.hasNext()) {
Object co = i.next();
Class cc = co.getClass();
if (cc.isEnum()) {
try {
Method m = cc.getMethod("getValue");
Object val = m.invoke(co);
digest.update(primitiveValueToBytes(val, cf));
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException ex) {
throw new RuntimeException("BUG", ex);
}
} else if (isDataModelClass(cc)) {
// depth-first recursion
calcMetaChecksum(cc, co, digest);
} else {
digest.update(primitiveValueToBytes(co, cf));
}
} else if (isDataModelClass(cc)) {
// depth-first recursion
calcMetaChecksum(cc, co, digest);
} else {
digest.update(primitiveValueToBytes(co, cf, digest.getAlgorithm()));
}
if (digestFieldNames) {
digest.update(primitiveValueToBytes(cf, cf)); // field name
}
}
} else {
digest.update(primitiveValueToBytes(fo, cf, digest.getAlgorithm()));
digest.update(primitiveValueToBytes(fo, cf));
if (digestFieldNames) {
digest.update(primitiveValueToBytes(cf, cf)); // field name
}
}
} else if (MCS_DEBUG) {
log.debug("skip null: " + cf);
Expand All @@ -310,6 +374,24 @@ protected void calcMetaChecksum(Class c, Object o, MessageDigest digest) {
}
}

private static class MessageDigestWrapper {
private MessageDigest digest;
private int numBytes = 0;

public MessageDigestWrapper(MessageDigest digest) {
this.digest = digest;
}

public void update(byte[] b) {
digest.update(b);
numBytes += b.length;
}

public int getNumBytes() {
return numBytes;
}
}

/**
* Determine if the argument type is part of a data model implementation
* so reflection can be used to drill down into the structure. The standard
Expand Down Expand Up @@ -392,7 +474,7 @@ public static boolean isChildCollection(Field f) throws IllegalAccessException {
return false;
}

protected byte[] primitiveValueToBytes(Object o, String name, String digestAlg) {
protected byte[] primitiveValueToBytes(Object o, String name) {
byte[] ret = null;
if (o instanceof Byte) {
ret = HexUtil.toBytes((Byte) o); // auto-unbox
Expand Down Expand Up @@ -449,20 +531,16 @@ protected byte[] primitiveValueToBytes(Object o, String name, String digestAlg)
System.arraycopy(msb, 0, ret, 0, 8);
System.arraycopy(lsb, 0, ret, 8, 8);
} else if (o instanceof byte[]) {
byte[] bytes = (byte[]) o;
return bytes;
ret = (byte[]) o;
}

if (ret != null) {
if (MCS_DEBUG) {
try {
MessageDigest md = MessageDigest.getInstance(digestAlg);
byte[] dig = md.digest(ret);
log.debug(o.getClass().getSimpleName() + " " + name + " = " + o.toString()
+ " -- " + HexUtil.toHex(dig));
} catch (Exception ignore) {
log.debug("OOPS", ignore);
String dfn = "";
if (o == name) {
dfn = " digest-field-name";
}
log.debug(o.getClass().getSimpleName() + " " + name + " = " + o.toString() + " " + ret.length + " bytes" + dfn);
}
return ret;
}
Expand Down
Loading

0 comments on commit a86a5fb

Please sign in to comment.