From 9fb1b2e6ee04905dbc5c7acbe884ff62e1b5a88c Mon Sep 17 00:00:00 2001 From: Nathan Baltzell Date: Thu, 2 Jan 2025 10:01:33 -0500 Subject: [PATCH] simple truth efficiency calculator (#396) * update URL * truth efficiency matrix calculator * cleanup, add warning printout * rename, cleanup, add json * merge scripts * use an array * parallelize * cleanup * actually loop * now it works * use gemc files with truth-matching enabled * cleanup, run truth efficiency * bugfix * cleanup and bugfix * bugfix, now it works * avoid unnecessary lookups * more convenient JSON structure * cleanup for thread safety * make adding threadsafe --- bin/trutheff | 10 + .../org/jlab/analysis/efficiency/Truth.java | 183 ++++++++++++++++++ .../org/jlab/utils/options/OptionParser.java | 26 +-- install-clara | 2 +- validation/advanced-tests/run-eb-tests.sh | 24 +-- .../advanced-tests/src/eb/scripts/gemc-all.sh | 9 - .../advanced-tests/src/eb/scripts/gemc.sh | 81 ++++++-- 7 files changed, 279 insertions(+), 56 deletions(-) create mode 100755 bin/trutheff create mode 100644 common-tools/clas-analysis/src/main/java/org/jlab/analysis/efficiency/Truth.java delete mode 100755 validation/advanced-tests/src/eb/scripts/gemc-all.sh diff --git a/bin/trutheff b/bin/trutheff new file mode 100755 index 0000000000..5ba7ba36a9 --- /dev/null +++ b/bin/trutheff @@ -0,0 +1,10 @@ +#!/bin/bash + +. `dirname $0`/../libexec/env.sh + +export MALLOC_ARENA_MAX=1 + +java -Xmx1536m -Xms1024m -XX:+UseSerialGC \ + -cp "$CLAS12DIR/lib/clas/*:$CLAS12DIR/lib/services/*:$CLAS12DIR/lib/utils/*" \ + org.jlab.analysis.efficiency.Truth \ + $* diff --git a/common-tools/clas-analysis/src/main/java/org/jlab/analysis/efficiency/Truth.java b/common-tools/clas-analysis/src/main/java/org/jlab/analysis/efficiency/Truth.java new file mode 100644 index 0000000000..6442e69d31 --- /dev/null +++ b/common-tools/clas-analysis/src/main/java/org/jlab/analysis/efficiency/Truth.java @@ -0,0 +1,183 @@ +package org.jlab.analysis.efficiency; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.TreeMap; +import org.jlab.jnp.hipo4.data.Bank; +import org.jlab.jnp.hipo4.data.Event; +import org.jlab.jnp.hipo4.data.Schema; +import org.jlab.jnp.hipo4.data.SchemaFactory; +import org.jlab.jnp.hipo4.io.HipoReader; +import org.jlab.jnp.utils.json.JsonArray; +import org.jlab.jnp.utils.json.JsonObject; +import org.jlab.utils.options.OptionParser; + +/** + * Efficiency matrix calculator based solely on the MC::GenMatch truth-matching + * bank (which is purely hit-based), and a pid assignment match in MC::Particle + * and REC::Particle. + * + * @author baltzell + */ +public class Truth { + + static final int UDF = 0; + static final List NEGATIVES = Arrays.asList(11, -211, -321, -2212); + static final List POSITIVES = Arrays.asList(-11, 211, 321, 2212, 45); + static final List NEUTRALS = Arrays.asList(22, 2112); + + List validPids; + Schema mcGenMatch; + Schema mcParticle; + Schema recParticle; + long[][] recTallies; + long[] mcTallies; + + public static void main(String[] args) { + OptionParser o = new OptionParser("trutheff"); + o.setRequiresInputList(true); + o.parse(args); + Truth t = new Truth(o.getInputList().get(0)); + t.add(o.getInputList()); + System.out.println(t.toTable()); + System.out.println(t.toJson()); + } + + public Truth(SchemaFactory s) { + init(s); + } + + public Truth(HipoReader r) { + init(r.getSchemaFactory()); + } + + public Truth(String filename) { + HipoReader r = new HipoReader(); + r.open(filename); + init(r.getSchemaFactory()); + } + + private void init(SchemaFactory schema) { + validPids = new ArrayList(NEGATIVES); + validPids.addAll(POSITIVES); + validPids.addAll(NEUTRALS); + validPids.add(UDF); + mcTallies = new long[validPids.size()]; + recTallies = new long[validPids.size()][validPids.size()]; + mcGenMatch = schema.getSchema("MC::GenMatch"); + mcParticle = schema.getSchema("MC::Particle"); + recParticle = schema.getSchema("REC::Particle"); + } + + /** + * Get one element of the efficiency matrix. + * @param truth true PID + * @param rec reconstructed PID + * @return probability + */ + public float get(int truth, int rec) { + long sum = mcTallies[validPids.indexOf(truth)]; + return sum>0 ? ((float)recTallies[validPids.indexOf(truth)][validPids.indexOf(rec)])/sum : 0; + } + + /** + * Add an event in the form of truth and reconstructed particle species. + * @param truth truth PID + * @param rec reconstructed PID + */ + public synchronized void add(int truth, int rec) { + final int t = validPids.indexOf(truth); + if (t < 0) return; + final int r = validPids.indexOf(rec); + mcTallies[t]++; + if (r < 0) recTallies[t][UDF]++; + else recTallies[t][r]++; + } + + /** + * Add a HIPO event. + * @param e + */ + public void add(Event e) { + Bank bm = new Bank(mcParticle); + Bank br = new Bank(recParticle); + e.read(bm); + e.read(br); + TreeMap good = getMapping(e); + for (short row=0; row filenames) { + Event e = new Event(); + for (String f : filenames) { + HipoReader r = new HipoReader(); + r.open(f); + while (r.hasNext()) { + r.nextEvent(e); + add(e); + } + } + } + + /** + * Truth-matching banks contain pointers to MC::Particle and REC::Particle, + * and here we cache that mapping to avoid nested loops. + */ + private TreeMap getMapping(Event e) { + Bank b = new Bank(mcGenMatch); + e.read(b); + TreeMap m = new TreeMap<>(); + for (int row=0; row optionsDescriptors = new TreeMap(); - private Map requiredOptions = new TreeMap(); - private Map parsedOptions = new TreeMap(); - private List parsedInputList = new ArrayList(); + private Map optionsDescriptors = new TreeMap<>(); + private Map requiredOptions = new TreeMap<>(); + private Map parsedOptions = new TreeMap<>(); + private List parsedInputList = new ArrayList<>(); private String program = "undefined"; private boolean requiresInputList = true; private String programDescription = ""; - public OptionParser(){ - - } + public OptionParser(){} public OptionParser(String pname){ this.program = pname; @@ -130,7 +123,6 @@ public void parse(String[] args){ System.exit(0); } -//this.show(arguments); for(Map.Entry entry : this.requiredOptions.entrySet()){ boolean status = entry.getValue().parse(arguments); if(status==false) { @@ -153,7 +145,11 @@ public void parse(String[] args){ this.parsedInputList.add(item); } } - //this.show(arguments); + + // FIXME: seems like we should really be throwing a RuntimeException ... + if (this.requiresInputList && this.parsedInputList.isEmpty()) { + System.err.println(" \n*** ERROR *** Empty Input List."); + } } public List getInputList(){ @@ -167,9 +163,7 @@ public static void main(String[] args){ parser.addOption("-r", "10"); parser.addOption("-t", "25.0"); parser.addOption("-d", "35"); - parser.parse(options); - parser.show(); } } diff --git a/install-clara b/install-clara index ffa63d1e7e..b62a16954b 100755 --- a/install-clara +++ b/install-clara @@ -59,7 +59,7 @@ function cleanup() { function build_clara { export CLARA_HOME=$1 - git clone https://github.com/baltzell/clara-java + git clone https://code.jlab.org/hallb/clas12/clara-java cd clara-java git checkout java21 ./gradlew && ./gradlew deploy diff --git a/validation/advanced-tests/run-eb-tests.sh b/validation/advanced-tests/run-eb-tests.sh index 70b776ba26..6f2ba34d58 100755 --- a/validation/advanced-tests/run-eb-tests.sh +++ b/validation/advanced-tests/run-eb-tests.sh @@ -1,7 +1,7 @@ #!/bin/bash webDir=http://clasweb.jlab.org/clas12offline/distribution/coatjava/validation_files/eb -webVersion=5.10-fid-r11 +webVersion=5.10-fid-tm-r11 webDir=$webDir/$webVersion # coatjava must already be built at ../../coatjava/ @@ -40,11 +40,11 @@ do done # last argument is input file stub: -webFileStub="${@: -1}" +stub="${@: -1}" # sanity check on filestub name, # just to error with reasonable message before proceeding: -case $webFileStub in +case $stub in # electron in forward, hadron in forward: electronproton) ;; @@ -79,7 +79,7 @@ case $webFileStub in electrondeuteronC) ;; *) - echo Invalid input evio file: $webFileStub + echo Invalid input evio file: $stub exit 1 esac @@ -122,20 +122,20 @@ then fi # download test files, if necessary: - wget -N --no-check-certificate $webDir/${webFileStub}.hipo + wget -N --no-check-certificate $webDir/${stub}.hipo if [ $? != 0 ] ; then echo "wget validation files failure" ; exit 1 ; fi # update the schema dictionary: (no longer necessary now that recon-util does it) - #rm -f up_${webFileStub}.hipo - #../../coatjava/bin/hipo-utils -update -d ../../coatjava/etc/bankdefs/hipo4/ -o up_${webFileStub}.hipo ${webFileStub}.hipo + #rm -f up_${stub}.hipo + #../../coatjava/bin/hipo-utils -update -d ../../coatjava/etc/bankdefs/hipo4/ -o up_${stub}.hipo ${stub}.hipo # run reconstruction: - rm -f out_${webFileStub}.hipo + rm -f out_${stub}.hipo if [ $useClara -eq 0 ] then GEOMDBVAR=$geoDbVariation export GEOMDBVAR - ../../coatjava/bin/recon-util -i ${webFileStub}.hipo -o out_${webFileStub}.hipo -c 2 + ../../coatjava/bin/recon-util -i ${stub}.hipo -o out_${stub}.hipo -c 2 else echo "set inputDir $PWD/" > cook.clara echo "set outputDir $PWD/" >> cook.clara @@ -143,7 +143,7 @@ then echo "set javaMemory 2" >> cook.clara echo "set session s_cook" >> cook.clara echo "set description d_cook" >> cook.clara - ls ${webFileStub}.hipo > files.list + ls ${stub}.hipo > files.list echo "set fileList $PWD/files.list" >> cook.clara echo "run local" >> cook.clara echo "exit" >> cook.clara @@ -152,8 +152,10 @@ then fi # run Event Builder tests: -java -DCLAS12DIR="$COAT" -Xmx1536m -Xms1024m -cp $classPath2 -DINPUTFILE=out_${webFileStub}.hipo org.junit.runner.JUnitCore eb.EBTwoTrackTest +java -DCLAS12DIR="$COAT" -Xmx1536m -Xms1024m -cp $classPath2 -DINPUTFILE=out_${stub}.hipo org.junit.runner.JUnitCore eb.EBTwoTrackTest if [ $? != 0 ] ; then echo "EBTwoTrackTest unit test failure" ; exit 1 ; else echo "EBTwoTrackTest passed unit tests" ; fi +$COAT/bin/trutheff ./out_${stub}.hipo + exit 0 diff --git a/validation/advanced-tests/src/eb/scripts/gemc-all.sh b/validation/advanced-tests/src/eb/scripts/gemc-all.sh deleted file mode 100755 index 179b95b388..0000000000 --- a/validation/advanced-tests/src/eb/scripts/gemc-all.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -gemc=${DIR}/gemc.sh -list=${DIR}/list.txt - -for xx in `awk '{print$1}' $list` -do - $gemc $xx -done diff --git a/validation/advanced-tests/src/eb/scripts/gemc.sh b/validation/advanced-tests/src/eb/scripts/gemc.sh index f6f9045203..a721a35b1a 100755 --- a/validation/advanced-tests/src/eb/scripts/gemc.sh +++ b/validation/advanced-tests/src/eb/scripts/gemc.sh @@ -1,28 +1,71 @@ #!/bin/sh -if ! [ -e "$1.txt" ] +usage() { echo "Usage: $0 [-g GEMC] [-n NEV] [-p PARTS] [-c GCARD] [-m]" 1>&2; exit $1; } + +run=11 +gemc=5.10 +nevents=100 +particles=() + +while getopts "g:n:c:p:mdh" o +do + case ${o} in + g) gemc=${OPTARG} ;; + n) nevents=${OPTARG} ;; + c) gcard=${OPTARG} ;; + p) particles+=(${OPTARG}) ;; + m) multithread=yes ;; + d) dryrun=echo ;; + h) usage 0 ;; + *) usage 1 ;; + esac +done + +if [ ${#particles[@]} -eq 0 ] then - echo Missing input file: $1.txt - exit + top=$(cd $(dirname ${BASH_SOURCE[0]}) && pwd) + for x in $(awk '{print$1}' $top/list.txt) + do + particles+=($x) + done fi -if [ -e "$1.evio" ] || [ -e "$1.hipo" ] + +if [ -z ${gcard+x} ] then - echo Output file already exists: ${1}.evio/hipo - exit + test -d clas12-config || git clone https://github.com/jeffersonlab/clas12-config + gcard=clas12-config/gemc/$gemc/clas12-default.gcard fi -v=5.10 -test -d clas12-config || git clone https://github.com/jeffersonlab/clas12-config +function run_gemc () { + local OPTIND + while getopts "r:g:n:c:i:o:d" o + do + case ${o} in + r) _run=${OPTARG} ;; + g) _gemc=${OPTARG} ;; + n) _nevents=${OPTARG} ;; + c) _gcard=${OPTARG} ;; + i) _input=${OPTARG} ;; + o) _output=${OPTARG} ;; + esac + done + ! [ -e "$_input" ] && echo Missing input file: $_input && exit 2 + [ -e "$_output" ] && echo Output file already exists: $_output && exit 3 + [ -z ${dryrun+x} ] && set -x + $dryrun gemc \ + $_gcard -RUNNO=$_run -USE_GUI=0 -N=$_nevents \ + -SAVE_ALL_MOTHERS=1 -SKIPREJECTEDHITS=1 -INTEGRATEDRAW="*" -NGENP=50 \ + -INPUT_GEN_FILE="LUND, $_input" \ + -OUTPUT="hipo, $_output" & + pid=$! + [ -z ${dryrun+x} ] && set +x + [ -z ${multithread+x} ] && wait $pid +} -run=11 -nEvents=1000 -gcard=clas12-config/gemc/$v/clas12-default.gcard - -gemc \ - $gcard \ - -INPUT_GEN_FILE="LUND, $1.txt" \ - -OUTPUT="hipo, $1.hipo" \ - -RUNNO=$run \ - -USE_GUI=0 \ - -N=$nEvents +for p in "${particles[@]}" +do + [ -z ${multithread+} ] && args= || args=-m + run_gemc -r $run -g $gemc -n $nevents -c $gcard -i $p.txt -o $p.hipo $args +done +[ -z ${multithread+x} ] || wait