Skip to content

Commit

Permalink
WIP: trying to load rntuple into a jagged array data structure
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Dec 3, 2021
1 parent e91b86d commit eb74c37
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 11 deletions.
5 changes: 3 additions & 2 deletions examples/common/ttjet_13tev_june2019.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

using bit = bool;
using byte = unsigned char;
using Index = std::uint64_t;
using Index = std::uint32_t;

// clang-format off
struct run {};
Expand Down Expand Up @@ -1538,7 +1538,7 @@ using Electron = llama::Record<
llama::Field<Electron_pdgId, std::int32_t>,
llama::Field<Electron_photonIdx, std::int32_t>,
llama::Field<Electron_tightCharge, std::int32_t>,
llama::Field<Electron_vidNestedWPbitmap, std::int32_t>,
//llama::Field<Electron_vidNestedWPbitmap, std::int32_t>,
llama::Field<Electron_convVeto, bit>,
llama::Field<Electron_cutBased_HEEP, bit>,
llama::Field<Electron_isPFcand, bit>,
Expand Down Expand Up @@ -1947,6 +1947,7 @@ using Event = llama::Record<
llama::Field<ChsMET_sumEt, float>,
//llama::Field<nCorrT1METJet, Index>,
//llama::Field<nElectron, Index>,
llama::Field<nElectron, Electron[]>,
llama::Field<Flag_ecalBadCalibFilterV2, bit>,
//llama::Field<nFatJet, Index>,
//llama::Field<nGenJetAK8, Index>,
Expand Down
82 changes: 73 additions & 9 deletions examples/hep_rntuple/hep_rntuple.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// This example uses a non-public CMS NanoAOD file called: ttjet_13tev_june2019_lzma.
// Please ask contact us if you need it.
// Please contact us if you need it.

#include "../common/ttjet_13tev_june2019.hpp"

Expand All @@ -14,6 +14,8 @@
#include <llama/DumpMapping.hpp>
#include <llama/llama.hpp>

using SmallEvent = boost::mp11::mp_take_c<Event, 100>;

int main(int argc, const char* argv[])
{
if (argc != 2)
Expand All @@ -25,27 +27,89 @@ int main(int argc, const char* argv[])
using namespace std::chrono;
using namespace ROOT::Experimental;

// auto ntuple
// = RNTupleReader::Open(RNTupleModel::Create(), "NTuple", "/mnt/c/dev/llama/ttjet_13tev_june2019_lzma.root");
auto ntuple = RNTupleReader::Open(RNTupleModel::Create(), "NTuple", argv[1]);
const auto n = ntuple->GetNEntries();
// try
//{
// ntuple->PrintInfo(ROOT::Experimental::ENTupleInfo::kStorageDetails);
//}
// catch (const std::exception& e)
//{
// fmt::print("PrintInfo error: {}", e.what());
//}
const auto eventCount = ntuple->GetNEntries();
const auto& d = ntuple->GetDescriptor();
const auto electronCount
= d.GetNElements(d.FindColumnId(d.FindFieldId("nElectron.nElectron.Electron_deltaEtaSC"), 0));
fmt::print("File contains {} events with {} electrons\n", eventCount, electronCount);

auto start = steady_clock::now();
auto view = llama::allocView(llama::mapping::SoA<llama::ArrayDims<1>, Event, true>{llama::ArrayDims{n}});
auto mapping = llama::mapping::OffsetTable<llama::ArrayDims<1>, SmallEvent>{
llama::ArrayDims{eventCount},
llama::ArrayDims{electronCount}};
auto view = llama::allocView(mapping);
fmt::print("Alloc LLAMA view: {}ms\n", duration_cast<milliseconds>(steady_clock::now() - start).count());

std::size_t totalSize = 0;
for (auto i = 0u; i < view.mapping.blobCount; i++)
totalSize += view.mapping.blobSize(i);
fmt::print("Total LLAMA view memory: {}MiB in {} blobs\n", totalSize / 1024 / 1024, view.mapping.blobCount);

// fill offset table
start = steady_clock::now();
llama::forEachLeaf<Event>(
std::size_t offset = 0;
auto electronViewCollection = ntuple->GetViewCollection("nElectron");
for (std::size_t i = 0; i < eventCount; i++)
{
offset += electronViewCollection(i);
view(i)(llama::EndOffset<nElectron>{}) = offset;
assert(offset <= electronCount);
}
fmt::print("Fill offset table: {}ms\n", duration_cast<milliseconds>(steady_clock::now() - start).count());

using AugmentedSmallEvent = typename decltype(mapping)::RecordDim;
start = steady_clock::now();
llama::forEachLeaf<AugmentedSmallEvent>(
[&](auto coord)
{
using Name = llama::GetTag<Event, decltype(coord)>;
using Type = llama::GetType<Event, decltype(coord)>;
auto column = ntuple->GetView<Type>(llama::structName<Name>());
for (std::size_t i = 0; i < n; i++)
view(i)(coord) = column(i);
using Coord = decltype(coord);
using LeafTag = llama::GetTag<AugmentedSmallEvent, Coord>;
using Type = llama::GetType<AugmentedSmallEvent, Coord>;

fmt::print("Copying {}\n", llama::structName<LeafTag>());
if constexpr (
!llama::mapping::internal::isEndOffsetField<LeafTag> && !llama::mapping::internal::isSizeField<LeafTag>)
{
if constexpr (boost::mp11::mp_contains<typename Coord::List, boost::mp11::mp_size_t<llama::dynamic>>::
value)
{
using Before = llama::mapping::internal::BeforeDynamic<Coord>;
using BeforeBefore = llama::RecordCoordFromList<boost::mp11::mp_pop_front<typename Before::List>>;
using After = llama::mapping::internal::AfterDynamic<Coord>;
using SubCollectionTag = llama::GetTag<AugmentedSmallEvent, Before>;

auto collectionColumn = ntuple->GetViewCollection(llama::structName<SubCollectionTag>());
auto column = collectionColumn.template GetView<Type>(
llama::structName<SubCollectionTag>() + "." + llama::structName<LeafTag>());
for (std::size_t i = 0; i < eventCount; i++)
{
const auto subCollectionCount = view(i)(BeforeBefore{})(llama::Size<SubCollectionTag>{});
for (std::size_t j = 0; j < subCollectionCount; j++)
{
const auto value = column(j);
auto& dst = view(i)(Before{})(j) (After{});
dst = value;
}
}
}
else
{
auto column = ntuple->GetView<Type>(llama::structName<LeafTag>());
for (std::size_t i = 0; i < eventCount; i++)
view(i)(coord) = column(i);
}
}
});
fmt::print("Copy RNTuple -> LLAMA view: {}ms\n", duration_cast<milliseconds>(steady_clock::now() - start).count());

Expand Down

0 comments on commit eb74c37

Please sign in to comment.