Skip to content

Commit

Permalink
Merge pull request #717 from gundam-organization/feature/writeTreeInd…
Browse files Browse the repository at this point in the history
…icesOfEvent

Feature/write tree indices of event
  • Loading branch information
nadrino authored Dec 10, 2024
2 parents acdf511 + 92c1279 commit c7705c5
Show file tree
Hide file tree
Showing 10 changed files with 109 additions and 60 deletions.
14 changes: 14 additions & 0 deletions src/Applications/src/gundamFitter.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ int main(int argc, char** argv){
clParser.addOption("kickMc", {"--kick-mc"}, "Amount to push the starting parameters away from their prior values (default: 0)", 1, true);
clParser.addTriggerOption("ignoreVersionCheck", {"--ignore-version"}, "Don't check GUNDAM version with config request");
clParser.addOption("debugMaxNbEventToLoad", {"-me", "--max-events"}, "Set the maximum number of events to load per dataset", 1);
clParser.addOption("debugFracOfEntries", {"-fe", "--fraction-of-entries"}, "Set the fraction of the total entries of each TTree that will be read", 1);
clParser.addDummyOption();

#ifdef GUNDAM_USING_CACHE_MANAGER
Expand Down Expand Up @@ -357,6 +358,19 @@ int main(int argc, char** argv){
}
}

if( clParser.isOptionTriggered("debugFracOfEntries") ){
LogThrowIf(clParser.getNbValueSet("debugFracOfEntries") != 1, "Nb of event not specified.");

auto fractionOfEntries{clParser.getOptionVal<double>("debugFracOfEntries")};
LogThrowIf(fractionOfEntries > 1, "fractionOfEntries should be between 0 and 1");
LogThrowIf(fractionOfEntries < 0, "fractionOfEntries should be between 0 and 1");

LogDebug << "Will load " << fractionOfEntries*100. << "% of the datasets." << std::endl;
for( auto& dataset : fitter.getLikelihoodInterface().getDatasetList() ){
dataset.setFractionOfEntriesToLoad(fractionOfEntries);
}
}

// --------------------------
// Load:
// --------------------------
Expand Down
19 changes: 0 additions & 19 deletions src/DatasetManager/include/DataDispenser.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,25 +68,6 @@ class DataDispenser : public JsonBaseClass {
void runEventFillThreads(int iThread_);
void loadEvent(int iThread_);

struct ThreadSharedData{
Long64_t currentEntryIndex{0};
Long64_t nbEntries{0};

std::shared_ptr<TChain> treeChain{nullptr};

std::vector<const GenericToolbox::LeafForm*> leafFormIndexingList{};
std::vector<const GenericToolbox::LeafForm*> leafFormStorageList{};

// has to be hooked to the TChain
TTreeFormula* dialIndexTreeFormula{nullptr};
TTreeFormula* nominalWeightTreeFormula{nullptr};

// thread communication
GenericToolbox::Atomic<bool> requestReadNextEntry{false};
GenericToolbox::Atomic<bool> isEntryBufferReady{false};
GenericToolbox::Atomic<bool> isDoneReading{false};
GenericToolbox::Atomic<bool> isEventFillerReady{false};
};
std::vector<ThreadSharedData> threadSharedDataList{};


Expand Down
26 changes: 25 additions & 1 deletion src/DatasetManager/include/DataDispenserUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
#include "EventVarTransformLib.h"

#include "GenericToolbox.Wrappers.h"
#include "GenericToolbox.Root.h"

#include "TChain.h"
#include "TTreeFormula.h"


#include "string"
Expand All @@ -21,9 +25,10 @@ struct DataDispenserParameters{
bool useReweightEngine{false};
bool isData{false}; // shall fetch slpit vars?
size_t debugNbMaxEventsToLoad{0};
double fractionOfEntries{1.};

std::string name{};
std::string treePath{};
std::string globalTreePath{};
std::string dialIndexFormula{};
std::string nominalWeightFormulaStr{};
std::string selectionCutFormulaStr{};
Expand Down Expand Up @@ -98,6 +103,25 @@ struct DataDispenserCache{

};

struct ThreadSharedData{
Long64_t nbEntries{0};

std::shared_ptr<TChain> treeChain{nullptr};

std::vector<const GenericToolbox::LeafForm*> leafFormIndexingList{};
std::vector<const GenericToolbox::LeafForm*> leafFormStorageList{};

// has to be hooked to the TChain
TTreeFormula* dialIndexTreeFormula{nullptr};
TTreeFormula* nominalWeightTreeFormula{nullptr};

// thread communication
GenericToolbox::Atomic<bool> requestReadNextEntry{false};
GenericToolbox::Atomic<bool> isEntryBufferReady{false};
GenericToolbox::Atomic<bool> isDoneReading{false};
GenericToolbox::Atomic<bool> isEventFillerReady{false};
};



#endif //GUNDAM_DATA_DISPENSER_UTILS_H
6 changes: 6 additions & 0 deletions src/DatasetManager/include/DatasetDefinition.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ class DatasetDefinition : public JsonBaseClass {
dd.second.getParameters().debugNbMaxEventsToLoad = nbMaxEventToLoad_;
}
}
void setFractionOfEntriesToLoad(double fraction_){
_modelDispenser_.getParameters().fractionOfEntries = fraction_;
for( auto& dd : _dataDispenserDict_ ){
dd.second.getParameters().fractionOfEntries = fraction_;
}
}

private:
// config
Expand Down
74 changes: 47 additions & 27 deletions src/DatasetManager/src/DataDispenser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,8 @@ void DataDispenser::configureImpl(){
_parameters_.variableDict[ varName ] = varExpr;
}

// TODO: better implementation of those
_parameters_.selectionCutFormulaStr = GenericToolbox::Json::buildFormula(_config_, "selectionCutFormula", "&&", _parameters_.selectionCutFormulaStr);
_parameters_.nominalWeightFormulaStr = GenericToolbox::Json::buildFormula(_config_, "nominalWeightFormula", "*", _parameters_.nominalWeightFormulaStr);

// options
GenericToolbox::Json::fillValue(_config_, _parameters_.treePath, "tree");
GenericToolbox::Json::fillValue(_config_, _parameters_.globalTreePath, "tree");
GenericToolbox::Json::fillValue(_config_, _parameters_.filePathList, "filePathList");
GenericToolbox::Json::fillValue(_config_, _parameters_.additionalVarsStorage, {{"additionalLeavesStorage"}, {"additionalVarsStorage"}});
GenericToolbox::Json::fillValue(_config_, _parameters_.dummyVariablesList, "dummyVariablesList");
Expand All @@ -95,6 +91,9 @@ void DataDispenser::configureImpl(){
GenericToolbox::Json::fillValue(_config_, _parameters_.dialIndexFormula, "dialIndexFormula");
GenericToolbox::Json::fillValue(_config_, _parameters_.overridePropagatorConfig, "overridePropagatorConfig");

GenericToolbox::Json::fillFormula(_config_, _parameters_.selectionCutFormulaStr, "selectionCutFormula", "&&");
GenericToolbox::Json::fillFormula(_config_, _parameters_.nominalWeightFormulaStr, "nominalWeightFormula", "*");

}
void DataDispenser::initializeImpl(){
// Nothing else to do other than read config?
Expand Down Expand Up @@ -148,7 +147,7 @@ void DataDispenser::load(Propagator& propagator_){

for( const auto& file: _parameters_.filePathList){
std::string path = GenericToolbox::expandEnvironmentVariables(file);
LogThrowIf(not GenericToolbox::doesTFileIsValid(path, {_parameters_.treePath}), "Invalid file: " << path);
LogThrowIf(not GenericToolbox::doesTFileIsValid(path, {_parameters_.globalTreePath}), "Invalid file: " << path);
}

this->parseStringParameters();
Expand Down Expand Up @@ -419,18 +418,10 @@ void DataDispenser::preAllocateMemory(){
/// of a vector memory. This is not thread safe, so better ensure the vector
/// won't have to do this by allocating the right event size.

// MEMORY CLAIM?
TChain treeChain(_parameters_.treePath.c_str());
for( const auto& file: _parameters_.filePathList){
std::string name = GenericToolbox::expandEnvironmentVariables(file);
if (name != file) {
LogWarning << "Filename expanded to: " << name << std::endl;
}
treeChain.Add(name.c_str());
}
auto treeChain = openChain();

GenericToolbox::LeafCollection lCollection;
lCollection.setTreePtr( &treeChain );
lCollection.setTreePtr( treeChain.get() );
for( auto& var : _cache_.varsRequestedForIndexing ){
// look for override requests
lCollection.addLeafExpression(
Expand Down Expand Up @@ -627,15 +618,41 @@ void DataDispenser::loadFromHistContent(){
std::shared_ptr<TChain> DataDispenser::openChain(bool verbose_){
LogInfoIf(verbose_) << "Opening ROOT files containing events..." << std::endl;

std::shared_ptr<TChain> treeChain(std::make_unique<TChain>(_parameters_.treePath.c_str()));
std::shared_ptr<TChain> treeChain(std::make_unique<TChain>());
for( const auto& file: _parameters_.filePathList){
std::string name = GenericToolbox::expandEnvironmentVariables(file);
GenericToolbox::replaceSubstringInsideInputString(name, "//", "/");

if( verbose_ ){
LogScopeIndent;
LogWarning << name << std::endl;
}
treeChain->Add(name.c_str());

std::string treePath{_parameters_.globalTreePath};
auto chunks = GenericToolbox::splitString(name, ":", true);
if( chunks.size() > 1 ){ treePath = chunks[1]; name = chunks[0]; }

LogThrowIf( treePath.empty(), "TTree path not set." );

LogThrowIf( not GenericToolbox::doesTFileIsValid(name, {treePath}), "Could not open TFile: " << name << " with TTree " << treePath);

Long64_t nMaxEntries{TTree::kMaxEntries};
if( _parameters_.fractionOfEntries != 1. ){
std::unique_ptr<TFile> temp{TFile::Open(name.c_str())};
LogThrowIf(temp== nullptr, "Error while opening TFile: " << name);

auto* tree = temp->Get<TTree>(treePath.c_str());
LogThrowIf(tree== nullptr, "Error while opening TTree: " << treePath << " in " << name);

nMaxEntries = Long64_t( double(tree->GetEntries()) * _parameters_.fractionOfEntries );
if( verbose_ ){
LogScopeIndent;
LogWarning << "Max entries: " << nMaxEntries << std::endl;
}

}
treeChain->AddFile(name.c_str(), nMaxEntries, treePath.c_str());

}

return treeChain;
Expand All @@ -647,7 +664,7 @@ void DataDispenser::eventSelectionFunction(int iThread_){
if( iThread_ == -1 ){ iThread_ = 0; nThreads = 1; }

// Opening ROOT files and make a TChain
auto treeChain{this->openChain(false)};
auto treeChain{this->openChain()};

// Create the memory buffer for the TChain
GenericToolbox::LeafCollection lCollection;
Expand Down Expand Up @@ -858,19 +875,20 @@ void DataDispenser::runEventFillThreads(int iThread_){
// Load the first TTree / need to wait for the event filler to finish hooking branches
threadSharedData.treeChain->LoadTree(bounds.beginIndex);

for( threadSharedData.currentEntryIndex = bounds.beginIndex ; threadSharedData.currentEntryIndex < bounds.endIndex ; threadSharedData.currentEntryIndex++ ){
for( Long64_t iEntry = bounds.beginIndex ; iEntry < bounds.endIndex ; iEntry++ ){

// before load, check if it has a sample
bool hasSample = _cache_.entrySampleIndexList[threadSharedData.currentEntryIndex] != -1;
bool hasSample = _cache_.entrySampleIndexList[iEntry] != -1;
if( not hasSample ){ continue; }

Int_t nBytes{ threadSharedData.treeChain->GetEntry(threadSharedData.currentEntryIndex) };
Int_t nBytes{ threadSharedData.treeChain->GetEntry(iEntry) };

threadSharedData.isEntryBufferReady.setValue(true); // loaded! -> let the other thread get everything it needs

if( iThread_ == 0 ){
readSpeed.addQuantity(nBytes * nThreads);

if( GenericToolbox::showProgressBar(threadSharedData.currentEntryIndex*nThreads, threadSharedData.nbEntries) ){
if( GenericToolbox::showProgressBar(iEntry*nThreads, threadSharedData.nbEntries) ){

ssProgressBar.str("");

Expand All @@ -884,7 +902,7 @@ void DataDispenser::runEventFillThreads(int iThread_){

ssProgressBar << LogInfo.getPrefixString() << progressTitle;
GenericToolbox::displayProgressBar(
threadSharedData.currentEntryIndex*nThreads,
iEntry*nThreads,
threadSharedData.nbEntries,
ssProgressBar.str()
);
Expand All @@ -894,7 +912,7 @@ void DataDispenser::runEventFillThreads(int iThread_){
// make sure the event filler thread has received the signal for the last entry
threadSharedData.isEntryBufferReady.waitUntilEqual( false );

// make sure currentEntryIndex don't get updated while it hasn't been read by the other thread
// make sure currentEntry don't get updated while it hasn't been read by the other thread
threadSharedData.requestReadNextEntry.waitUntilEqualThenToggle( true );

// was the event loader stopped?
Expand Down Expand Up @@ -1087,8 +1105,10 @@ void DataDispenser::loadEvent(int iThread_){
if( eventIndexingBuffer.getWeights().base == 0 ){ continue; }
}

// currentEntryIndex is modified by the TChain reader
eventIndexingBuffer.getIndices().entry = threadSharedData.currentEntryIndex;
// grab data from TChain
eventIndexingBuffer.getIndices().entry = threadSharedData.treeChain->GetReadEntry();
eventIndexingBuffer.getIndices().treeFile = threadSharedData.treeChain->GetTreeNumber();
eventIndexingBuffer.getIndices().treeEntry = threadSharedData.treeChain->GetTree()->GetReadEntry();

// get sample index / all -1 samples have been ruled out by the chain reader
iSample = _cache_.entrySampleIndexList[eventIndexingBuffer.getIndices().entry];
Expand Down
2 changes: 1 addition & 1 deletion src/DatasetManager/src/DataDispenserUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ std::string DataDispenserParameters::getSummary() const{
std::stringstream ss;
ss << GET_VAR_NAME_VALUE(useReweightEngine);
ss << std::endl << GET_VAR_NAME_VALUE(name);
ss << std::endl << GET_VAR_NAME_VALUE(treePath);
ss << std::endl << GET_VAR_NAME_VALUE(globalTreePath);
ss << std::endl << GET_VAR_NAME_VALUE(nominalWeightFormulaStr);
ss << std::endl << GET_VAR_NAME_VALUE(selectionCutFormulaStr);
ss << std::endl << "activeLeafNameList = " << GenericToolbox::toString(activeLeafNameList, true);
Expand Down
12 changes: 7 additions & 5 deletions src/Propagator/src/EventTreeWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,13 @@ template<typename T> void EventTreeWriter::writeEventsTemplate(const GenericTool

GenericToolbox::RawDataArray privateMemberArr;
std::map<std::string, std::function<void(GenericToolbox::RawDataArray&, const Event&)>> leafDictionary;
leafDictionary["eventWeight/D"] = [](GenericToolbox::RawDataArray& arr_, const Event& ev_){ arr_.writeRawData(ev_.getWeights().current); };
leafDictionary["treeWeight/D"] = [](GenericToolbox::RawDataArray& arr_, const Event& ev_){ arr_.writeRawData(ev_.getWeights().base); };
leafDictionary["sampleBinIndex/I"]= [](GenericToolbox::RawDataArray& arr_, const Event& ev_){ arr_.writeRawData(ev_.getIndices().bin); };
leafDictionary["dataSetIndex/I"] = [](GenericToolbox::RawDataArray& arr_, const Event& ev_){ arr_.writeRawData(ev_.getIndices().dataset); };
leafDictionary["entryIndex/L"] = [](GenericToolbox::RawDataArray& arr_, const Event& ev_){ arr_.writeRawData(ev_.getIndices().entry); };
leafDictionary["eventWeight/D"] = [](GenericToolbox::RawDataArray& arr_, const Event& ev_){ arr_.writeRawData(ev_.getWeights().current); };
leafDictionary["treeWeight/D"] = [](GenericToolbox::RawDataArray& arr_, const Event& ev_){ arr_.writeRawData(ev_.getWeights().base); };
leafDictionary["sampleBinIndex/I"]= [](GenericToolbox::RawDataArray& arr_, const Event& ev_){ arr_.writeRawData(ev_.getIndices().bin); };
leafDictionary["dataSetIndex/I"] = [](GenericToolbox::RawDataArray& arr_, const Event& ev_){ arr_.writeRawData(ev_.getIndices().dataset); };
leafDictionary["entryIndex/L"] = [](GenericToolbox::RawDataArray& arr_, const Event& ev_){ arr_.writeRawData(ev_.getIndices().entry); };
leafDictionary["treeEntryIndex/L"] = [](GenericToolbox::RawDataArray& arr_, const Event& ev_){ arr_.writeRawData(ev_.getIndices().treeEntry); };
leafDictionary["treeFileIndex/I"] = [](GenericToolbox::RawDataArray& arr_, const Event& ev_){ arr_.writeRawData(ev_.getIndices().treeFile); };
std::string branchDefStr;
for( auto& leafDef : leafDictionary ){
if( not branchDefStr.empty() ) branchDefStr += ":";
Expand Down
12 changes: 7 additions & 5 deletions src/SamplesManager/include/EventUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@ namespace EventUtils{

struct Indices{

// declaring variables ordered by type (struct 8 bytes lighter because of alignment)

// source
int dataset{-1}; // which DatasetDefinition?
long long entry{-1}; // which entry of the TChain?

// destination
int sample{-1}; // this information is lost in the EventDialCache manager
int bin{-1}; // which bin of the sample?
int treeFile{-1}; // which file?
int sample{-1}; // this information is lost in the EventDialCache manager
int bin{-1}; // which bin of the sample?
long entry{-1}; // which entry of the TChain?
long treeEntry{-1}; // which entry of the TTree?

[[nodiscard]] std::string getSummary() const;
friend std::ostream& operator <<( std::ostream& o, const Indices& this_ ){ o << this_.getSummary(); return o; }
Expand Down
2 changes: 1 addition & 1 deletion src/SamplesManager/src/EventUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace EventUtils{
std::string Indices::getSummary() const{
std::stringstream ss;
ss << "dataset(" << dataset << ")";
ss << ", " << "entry(" << entry << ")";
ss << ", " << "entry(" << entry << " / " << treeFile << "-" << treeEntry << ")";
ss << ", " << "sample(" << sample << ")";
ss << ", " << "bin(" << bin << ")";
return ss.str();
Expand Down
2 changes: 1 addition & 1 deletion submodules/cpp-generic-toolbox

0 comments on commit c7705c5

Please sign in to comment.