diff --git a/cadc-util/build.gradle b/cadc-util/build.gradle index 5fd0e7f8..04e27d08 100644 --- a/cadc-util/build.gradle +++ b/cadc-util/build.gradle @@ -16,7 +16,7 @@ sourceCompatibility = 1.8 group = 'org.opencadc' -version = '1.10.5' +version = '1.10.6' description = 'OpenCADC core utility library' def git_url = 'https://github.com/opencadc/core' diff --git a/cadc-util/src/main/java/org/opencadc/persist/Entity.java b/cadc-util/src/main/java/org/opencadc/persist/Entity.java index 4de5b19c..1034da4e 100644 --- a/cadc-util/src/main/java/org/opencadc/persist/Entity.java +++ b/cadc-util/src/main/java/org/opencadc/persist/Entity.java @@ -3,7 +3,7 @@ ******************* CANADIAN ASTRONOMY DATA CENTRE ******************* ************** CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES ************** * -* (c) 2023. (c) 2023. +* (c) 2024. (c) 2024. * Government of Canada Gouvernement du Canada * National Research Council Conseil national de recherches * Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6 @@ -91,7 +91,12 @@ import org.apache.log4j.Logger; /** - * Base class for entity persistence. + * Base class for entity persistence. The metaChecksum algorithm implemented here has a + * flaw where moving a value from one optional field to another (with no values contributing + * bytes in between) does not cause the computed metaChecksum to change. If a specific + * data model is susceptible to this, it can use the "digestFieldNames" option to prevent + * it, but changing options will change existing (stored) metaChecksum values so a change + * like this has an operational impact that needs to be evaluated. * * @author pdowler */ @@ -101,6 +106,7 @@ public abstract class Entity { private final String localPackage; public static boolean MCS_DEBUG = false; // way to much debug when true + private final boolean digestFieldNames; private final boolean truncateDateToSec; private UUID id; private Date lastModified; @@ -124,24 +130,57 @@ static final void assertNotNull(Class caller, String name, Object test) } /** - * Constructor. + * Backwards compatible constructor: digestFieldNames==false. * * @param truncateDateToSec truncate Date values to seconds when converting to bytes for meta checksum calculation + * @deprecated hard code Entity(boolean, boolean) in model */ + @Deprecated protected Entity(boolean truncateDateToSec) { - this(UUID.randomUUID(), truncateDateToSec); + this(truncateDateToSec, false); } /** - * Constructor. + * Backwards compatible constructor: digestFieldNames==false. + * + * @param id assign the specified Entity.id + * @param truncateDateToSec truncate Date values to seconds when converting to bytes for meta checksum calculation + * @deprecated hard code Entity(UUID, boolean, boolean) in model + */ + @Deprecated + protected Entity(UUID id, boolean truncateDateToSec) { + this(id, truncateDateToSec, false); + } + + /** + * Constructor. This creates a new entity with a random UUID. + * + * @param truncateDateToSec truncate Date values to seconds when converting to bytes for meta checksum calculation + * @param digestFieldNames when a field is not null (or collection is non-empty), include the field name in the + * metaChecksum calculation + */ + protected Entity(boolean truncateDateToSec, boolean digestFieldNames) { + this(UUID.randomUUID(), truncateDateToSec, digestFieldNames); + } + + /** + * Constructor. This creates an entity with an existing UUID when reconstructing an instance. The + * truncateDateToSec option should be used if instances of the model are to be serialised or stored + * in a way that does not recover the exact timestamp to milliseconds. The digestFieldNames option + * is needed for any model with "adjacent" fields that could contain the same value; this option + * ensures that "moving" the value from one field to another will change the checksum by changing + * the sequence of bytes that are digested. * * @param id unique ID value to assign/restore * @param truncateDateToSec truncate Date values to seconds when converting to bytes for meta checksum calculation + * @param digestFieldNames when a field is not null (or collection is non-empty), include the field name in the + * metaChecksum calculation */ - protected Entity(UUID id, boolean truncateDateToSec) { + protected Entity(UUID id, boolean truncateDateToSec, boolean digestFieldNames) { Entity.assertNotNull(Entity.class, "id", id); this.id = id; this.truncateDateToSec = truncateDateToSec; + this.digestFieldNames = digestFieldNames; this.localPackage = this.getClass().getPackage().getName(); } @@ -212,7 +251,11 @@ public String toString() { */ public URI computeMetaChecksum(MessageDigest digest) { try { - calcMetaChecksum(this.getClass(), this, digest); + MessageDigestWrapper mdw = new MessageDigestWrapper(digest); + calcMetaChecksum(this.getClass(), this, mdw); + if (MCS_DEBUG) { + log.debug("computeMetaChecksum: " + mdw.getNumBytes() + " bytes"); + } byte[] metaChecksumBytes = digest.digest(); String hexMetaChecksum = HexUtil.toHex(metaChecksumBytes); String alg = digest.getAlgorithm().toLowerCase(); @@ -232,35 +275,41 @@ public URI computeMetaChecksum(MessageDigest digest) { * @param o * @param digest */ - protected void calcMetaChecksum(Class c, Object o, MessageDigest digest) { + protected final void calcMetaChecksum(Class c, Object o, MessageDigestWrapper digest) { // calculation order: // 1. Entity.id for entities // 2. Entity.metaProducer // 3. state fields in alphabetic order; depth-first recursion // value handling: // enum: find and call getValue() by reflection and continue - // Date: truncate time to whole number of seconds and treat as a long + // Date: normally milliseconds to long + // optional Date handling: truncate time to whole number of seconds // String: UTF-8 encoded bytes // URI: UTF-8 encoded bytes of string representation // float: IEEE754 single (4 bytes) // double: IEEE754 double (8 bytes) - // boolean: convert to single byte, false=0, true=1 (1 bytes) - // byte: as-is (1 byte) + // boolean: convert to single byte, false=0, true=1 (1 byte) + // byte: as-is // short: (2 bytes, network byte order == big endian)) // integer: (4 bytes, network byte order == big endian) // long: (8 bytes, network byte order == big endian) + // UUID: 8 most-significant bytes + 8 least significant bytes (16 bytes) + // optional for ALL fields: if non-zero bytes updated the digest, UTF-8 encoded bytes of the field name try { if (o instanceof Entity) { Entity ce = (Entity) o; - digest.update(primitiveValueToBytes(ce.id, "Entity.id", digest.getAlgorithm())); + digest.update(primitiveValueToBytes(ce.id, "Entity.id")); if (ce.metaProducer != null) { - digest.update(primitiveValueToBytes(ce.metaProducer, "Entity.metaProducer", digest.getAlgorithm())); + digest.update(primitiveValueToBytes(ce.metaProducer, "Entity.metaProducer")); + if (digestFieldNames) { + digest.update(primitiveValueToBytes("Entity.metaProducer", "Entity.metaProducer")); + } } } SortedSet fields = getStateFields(c); for (Field f : fields) { - String cf = c.getSimpleName() + "." + f.getName(); + String cf = f.getDeclaringClass().getSimpleName() + "." + f.getName(); f.setAccessible(true); Object fo = f.get(o); if (fo != null) { @@ -269,36 +318,51 @@ protected void calcMetaChecksum(Class c, Object o, MessageDigest digest) { try { Method m = ac.getMethod("getValue"); Object val = m.invoke(fo); - digest.update(primitiveValueToBytes(val, cf, digest.getAlgorithm())); + digest.update(primitiveValueToBytes(val, cf)); + if (digestFieldNames) { + digest.update(primitiveValueToBytes(cf, cf)); // field name + } } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException ex) { throw new RuntimeException("BUG - enum " + ac.getName() + " does not have getValue()", ex); } } else if (isDataModelClass(ac)) { // depth-first recursion + int num = digest.getNumBytes(); calcMetaChecksum(ac, fo, digest); + if (digestFieldNames && num < digest.getNumBytes()) { + digest.update(primitiveValueToBytes(cf, cf)); // field name + } } else if (fo instanceof Collection) { Collection stuff = (Collection) fo; - Iterator i = stuff.iterator(); - while (i.hasNext()) { - Object co = i.next(); - Class cc = co.getClass(); - if (cc.isEnum()) { - try { - Method m = cc.getMethod("getValue"); - Object val = m.invoke(co); - digest.update(primitiveValueToBytes(val, cf, digest.getAlgorithm())); - } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException ex) { - throw new RuntimeException("BUG", ex); + if (!stuff.isEmpty()) { + Iterator i = stuff.iterator(); + while (i.hasNext()) { + Object co = i.next(); + Class cc = co.getClass(); + if (cc.isEnum()) { + try { + Method m = cc.getMethod("getValue"); + Object val = m.invoke(co); + digest.update(primitiveValueToBytes(val, cf)); + } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException ex) { + throw new RuntimeException("BUG", ex); + } + } else if (isDataModelClass(cc)) { + // depth-first recursion + calcMetaChecksum(cc, co, digest); + } else { + digest.update(primitiveValueToBytes(co, cf)); } - } else if (isDataModelClass(cc)) { - // depth-first recursion - calcMetaChecksum(cc, co, digest); - } else { - digest.update(primitiveValueToBytes(co, cf, digest.getAlgorithm())); + } + if (digestFieldNames) { + digest.update(primitiveValueToBytes(cf, cf)); // field name } } } else { - digest.update(primitiveValueToBytes(fo, cf, digest.getAlgorithm())); + digest.update(primitiveValueToBytes(fo, cf)); + if (digestFieldNames) { + digest.update(primitiveValueToBytes(cf, cf)); // field name + } } } else if (MCS_DEBUG) { log.debug("skip null: " + cf); @@ -310,6 +374,24 @@ protected void calcMetaChecksum(Class c, Object o, MessageDigest digest) { } } + private static class MessageDigestWrapper { + private MessageDigest digest; + private int numBytes = 0; + + public MessageDigestWrapper(MessageDigest digest) { + this.digest = digest; + } + + public void update(byte[] b) { + digest.update(b); + numBytes += b.length; + } + + public int getNumBytes() { + return numBytes; + } + } + /** * Determine if the argument type is part of a data model implementation * so reflection can be used to drill down into the structure. The standard @@ -392,7 +474,7 @@ public static boolean isChildCollection(Field f) throws IllegalAccessException { return false; } - protected byte[] primitiveValueToBytes(Object o, String name, String digestAlg) { + protected byte[] primitiveValueToBytes(Object o, String name) { byte[] ret = null; if (o instanceof Byte) { ret = HexUtil.toBytes((Byte) o); // auto-unbox @@ -449,20 +531,16 @@ protected byte[] primitiveValueToBytes(Object o, String name, String digestAlg) System.arraycopy(msb, 0, ret, 0, 8); System.arraycopy(lsb, 0, ret, 8, 8); } else if (o instanceof byte[]) { - byte[] bytes = (byte[]) o; - return bytes; + ret = (byte[]) o; } if (ret != null) { if (MCS_DEBUG) { - try { - MessageDigest md = MessageDigest.getInstance(digestAlg); - byte[] dig = md.digest(ret); - log.debug(o.getClass().getSimpleName() + " " + name + " = " + o.toString() - + " -- " + HexUtil.toHex(dig)); - } catch (Exception ignore) { - log.debug("OOPS", ignore); + String dfn = ""; + if (o == name) { + dfn = " digest-field-name"; } + log.debug(o.getClass().getSimpleName() + " " + name + " = " + o.toString() + " " + ret.length + " bytes" + dfn); } return ret; } diff --git a/cadc-util/src/test/java/org/opencadc/persist/EntityTest.java b/cadc-util/src/test/java/org/opencadc/persist/EntityTest.java index e9deb65d..043c7683 100644 --- a/cadc-util/src/test/java/org/opencadc/persist/EntityTest.java +++ b/cadc-util/src/test/java/org/opencadc/persist/EntityTest.java @@ -71,7 +71,6 @@ import java.net.URI; import java.security.MessageDigest; import java.util.Date; -import java.util.UUID; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.junit.Assert; @@ -85,7 +84,11 @@ public class EntityTest { private static final Logger log = Logger.getLogger(EntityTest.class); static { - Log4jInit.setLevel("org.opencadc.persist", Level.INFO); + Log4jInit.setLevel("org.opencadc.persist", Level.DEBUG); + // this actually controls the large amoutn of debug output from checksum + // algorithm, but it effects the whole jvm so only enable when running + // these tests specificially and looking at output + //Entity.MCS_DEBUG = true; } public EntityTest() { @@ -105,9 +108,35 @@ public void testTemplate() { @Test public void testEntity() { - + // base: the cadc-inventory-0.x configuration + doEntityTest(false, false); + doNewVersionTest(false, false); + } + + @Test + public void testEntityTruncateDates() { + // the caom2-2.4 configuration + doEntityTest(true, false); + doNewVersionTest(true, false); + } + + @Test + public void testEntityDigestFieldNames() { + // the cadc-vos-2.x configuration + doEntityTest(false, true); + doNewVersionTest(false, true); + } + + @Test + public void testEntitySafeMode() { + // no known use, but truncateDates and digestFieldNames is the safest mode + doEntityTest(true, true); + doNewVersionTest(true, true); + } + + private void doEntityTest(boolean trunc, boolean dig) { try { - SampleEntity sample = new SampleEntity("name-of-this-entity"); + SampleEntity sample = new SampleEntity("name-of-this-entity", trunc, dig); log.info("created: " + sample); URI mcs1 = sample.computeMetaChecksum(MessageDigest.getInstance("MD5")); @@ -148,14 +177,22 @@ public void testEntity() { URI mcs10 = sample.computeMetaChecksum(MessageDigest.getInstance("MD5")); Assert.assertEquals(mcs7, mcs10); - // entities do not get included in metaChecksum - sample.children.add(new SampleEntity("flibble")); + // nested object + sample.nested = new SampleEntity.Nested(); URI mcs11 = sample.computeMetaChecksum(MessageDigest.getInstance("MD5")); - Assert.assertEquals(mcs10, mcs11); + Assert.assertEquals(mcs7, mcs11); + sample.nested.nstr = "boo"; + URI mcs12 = sample.computeMetaChecksum(MessageDigest.getInstance("MD5")); + Assert.assertNotEquals(mcs7, mcs12); - sample.relation = new SampleEntity("flibble"); + // entities do not get included in metaChecksum + sample.children.add(new SampleEntity("flibble", trunc, dig)); + URI tcs1 = sample.computeMetaChecksum(MessageDigest.getInstance("MD5")); + Assert.assertEquals(mcs12, tcs1); + + sample.relation = new SampleEntity("flibble", trunc, dig); mcs11 = sample.computeMetaChecksum(MessageDigest.getInstance("MD5")); - Assert.assertEquals(mcs10, mcs11); + Assert.assertEquals(mcs12, mcs11); // revert sample.dateVal = null; @@ -164,8 +201,9 @@ public void testEntity() { sample.uriVal = null; sample.sampleSE = null; sample.sampleIE = null; - URI mcs12 = sample.computeMetaChecksum(MessageDigest.getInstance("MD5")); - Assert.assertEquals(mcs1, mcs12); + sample.nested = null; + URI clearCS = sample.computeMetaChecksum(MessageDigest.getInstance("MD5")); + Assert.assertEquals(mcs1, clearCS); } catch (Exception ex) { log.error("unexpected exception", ex); @@ -173,16 +211,35 @@ public void testEntity() { } } + // also doubles as a sub-class/extension test + private void doNewVersionTest(boolean trunc, boolean dig) { + try { + SampleEntity v1 = new SampleEntity("name-of-this-entity", trunc, dig); + log.info("created: " + v1); + URI mcs1 = v1.computeMetaChecksum(MessageDigest.getInstance("MD5")); + + SampleEntityV2 v2 = new SampleEntityV2(v1.getID(), v1.getName(), trunc, dig); + log.info("created: " + v1); + URI mcs2 = v2.computeMetaChecksum(MessageDigest.getInstance("MD5")); + + Assert.assertEquals(mcs1, mcs2); + } catch (Exception ex) { + log.error("unexpected exception", ex); + Assert.fail("unexpected exception: " + ex); + } + } + @Test - public void testArtifactTransientState() { + public void testNonState() { try { - SampleEntity sample = new SampleEntity("name-of-this-entity"); + SampleEntity sample = new SampleEntity("name-of-this-entity", false, false); log.info("created: " + sample); URI mcs1 = sample.computeMetaChecksum(MessageDigest.getInstance("MD5")); - sample.transientVal = "mrs flibble"; - SampleEntity.staticVal = "electricity"; + sample.transientVal = "mrs flibble"; // transient + SampleEntity.staticVal = "electricity"; // static + URI mcs2 = sample.computeMetaChecksum(MessageDigest.getInstance("MD5")); Assert.assertEquals(mcs1, mcs2); diff --git a/cadc-util/src/test/java/org/opencadc/persist/SampleEntity.java b/cadc-util/src/test/java/org/opencadc/persist/SampleEntity.java index 02d9d9fd..b8d4806d 100644 --- a/cadc-util/src/test/java/org/opencadc/persist/SampleEntity.java +++ b/cadc-util/src/test/java/org/opencadc/persist/SampleEntity.java @@ -72,6 +72,7 @@ import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.UUID; import org.apache.log4j.Logger; /** @@ -90,6 +91,7 @@ public class SampleEntity extends Entity implements Comparable { public final SortedSet strList = new TreeSet<>(); public SampleStringEnum sampleSE; public SampleIntEnum sampleIE; + public Nested nested; // not included public Set children = new TreeSet<>(); @@ -98,11 +100,20 @@ public class SampleEntity extends Entity implements Comparable { public transient String transientVal; - public SampleEntity(String name) { - super(false); + public SampleEntity(String name, boolean truncateDateToSec, boolean digestFieldNames) { + super(truncateDateToSec, digestFieldNames); this.name = name; } + public SampleEntity(UUID id, String name, boolean truncateDateToSec, boolean digestFieldNames) { + super(id, truncateDateToSec, digestFieldNames); + this.name = name; + } + + public String getName() { + return name; + } + public String toString() { return "SampleEntity[" + name + "]"; } @@ -114,4 +125,8 @@ public int compareTo(SampleEntity se) { } return name.compareTo(se.name); } + + static class Nested { + public String nstr; + } } diff --git a/cadc-util/src/test/java/org/opencadc/persist/SampleEntityV2.java b/cadc-util/src/test/java/org/opencadc/persist/SampleEntityV2.java new file mode 100644 index 00000000..b7a3c2e9 --- /dev/null +++ b/cadc-util/src/test/java/org/opencadc/persist/SampleEntityV2.java @@ -0,0 +1,90 @@ +/* +************************************************************************ +******************* CANADIAN ASTRONOMY DATA CENTRE ******************* +************** CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES ************** +* +* (c) 2024. (c) 2024. +* Government of Canada Gouvernement du Canada +* National Research Council Conseil national de recherches +* Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6 +* All rights reserved Tous droits réservés +* +* NRC disclaims any warranties, Le CNRC dénie toute garantie +* expressed, implied, or énoncée, implicite ou légale, +* statutory, of any kind with de quelque nature que ce +* respect to the software, soit, concernant le logiciel, +* including without limitation y compris sans restriction +* any warranty of merchantability toute garantie de valeur +* or fitness for a particular marchande ou de pertinence +* purpose. NRC shall not be pour un usage particulier. +* liable in any event for any Le CNRC ne pourra en aucun cas +* damages, whether direct or être tenu responsable de tout +* indirect, special or general, dommage, direct ou indirect, +* consequential or incidental, particulier ou général, +* arising from the use of the accessoire ou fortuit, résultant +* software. Neither the name de l'utilisation du logiciel. Ni +* of the National Research le nom du Conseil National de +* Council of Canada nor the Recherches du Canada ni les noms +* names of its contributors may de ses participants ne peuvent +* be used to endorse or promote être utilisés pour approuver ou +* products derived from this promouvoir les produits dérivés +* software without specific prior de ce logiciel sans autorisation +* written permission. préalable et particulière +* par écrit. +* +* This file is part of the Ce fichier fait partie du projet +* OpenCADC project. OpenCADC. +* +* OpenCADC is free software: OpenCADC est un logiciel libre ; +* you can redistribute it and/or vous pouvez le redistribuer ou le +* modify it under the terms of modifier suivant les termes de +* the GNU Affero General Public la “GNU Affero General Public +* License as published by the License” telle que publiée +* Free Software Foundation, par la Free Software Foundation +* either version 3 of the : soit la version 3 de cette +* License, or (at your option) licence, soit (à votre gré) +* any later version. toute version ultérieure. +* +* OpenCADC is distributed in the OpenCADC est distribué +* hope that it will be useful, dans l’espoir qu’il vous +* but WITHOUT ANY WARRANTY; sera utile, mais SANS AUCUNE +* without even the implied GARANTIE : sans même la garantie +* warranty of MERCHANTABILITY implicite de COMMERCIALISABILITÉ +* or FITNESS FOR A PARTICULAR ni d’ADÉQUATION À UN OBJECTIF +* PURPOSE. See the GNU Affero PARTICULIER. Consultez la Licence +* General Public License for Générale Publique GNU Affero +* more details. pour plus de détails. +* +* You should have received Vous devriez avoir reçu une +* a copy of the GNU Affero copie de la Licence Générale +* General Public License along Publique GNU Affero avec +* with OpenCADC. If not, see OpenCADC ; si ce n’est +* . pas le cas, consultez : +* . +* +************************************************************************ +*/ + +package org.opencadc.persist; + +import java.util.UUID; +import org.apache.log4j.Logger; + +/** + * + * @author pdowler + */ +public class SampleEntityV2 extends SampleEntity { + private static final Logger log = Logger.getLogger(SampleEntityV2.class); + + public Integer optionalInt; + public String optionalString; + + public SampleEntityV2(UUID id, String name, boolean trunc, boolean dig) { + super(id, name, trunc, dig); + } + + public String toString() { + return "SampleEntityV2[" + getName() + "]"; + } +}