Skip to content

Commit

Permalink
Add method to get every record's disk size
Browse files Browse the repository at this point in the history
Summary:
Understanding data usage within VRS files is difficult, because of record level compression.
This diff gives interesting insight into stream sizes.
- add API to get each record's size
- add display of stream  sizes in vrstool, by default

Differential Revision: D67431185

fbshipit-source-id: 217fa544c07c49a00c9b9a003d6553e66733cefb
  • Loading branch information
Georges Berenger authored and facebook-github-bot committed Dec 19, 2024
1 parent b0d241b commit 9413f4d
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 34 deletions.
18 changes: 18 additions & 0 deletions vrs/MultiRecordFileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,24 @@ const IndexRecord::RecordInfo* MultiRecordFileReader::getRecord(uint32_t globalI
return globalIndex < recordIndex_->size() ? (*recordIndex_)[globalIndex] : nullptr;
}

uint32_t MultiRecordFileReader::getRecordSize(uint32_t globalIndex) const {
if (!isOpened_) {
return 0;
}
if (hasSingleFile()) {
return readers_.front()->getRecordSize(globalIndex);
}
const IndexRecord::RecordInfo* record = getRecord(globalIndex);
if (record == nullptr) {
return 0;
}
auto reader = getReader(record);
if (reader == nullptr) {
return 0;
}
return reader->getRecordSize(reader->getRecordIndex(record));
}

const IndexRecord::RecordInfo* MultiRecordFileReader::getRecord(
UniqueStreamId streamId,
uint32_t indexNumber) const {
Expand Down
5 changes: 5 additions & 0 deletions vrs/MultiRecordFileReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,11 @@ class MultiRecordFileReader {
/// @return Corresponding record if present or nullptr if the given index is invalid.
const IndexRecord::RecordInfo* getRecord(uint32_t globalIndex) const;

/// Get a record's size in the global index.
/// @param globalIndex: Position in the global index to look up.
/// @return The size of the record on disk, all included, or 0 if the index is invalid.
uint32_t getRecordSize(uint32_t globalIndex) const;

/// Find a specific record for a specific stream, regardless of type, by index number.
/// @param streamId: UniqueStreamId of the record stream to consider.
/// @param indexNumber: Index position (for streamId - not global index) of the record to look
Expand Down
16 changes: 15 additions & 1 deletion vrs/RecordFileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,20 @@ const vector<int64_t>& RecordFileReader::getRecordBoundaries() const {
return recordBoundaries_;
}

uint32_t RecordFileReader::getRecordSize(uint32_t recordIndex) const {
if (recordIndex >= recordIndex_.size()) {
return 0;
}
const IndexRecord::RecordInfo& record = recordIndex_[recordIndex];
const vector<int64_t>& boundaries = getRecordBoundaries();
auto nextBoundary = upper_bound(boundaries.begin(), boundaries.end(), record.fileOffset);
if (!XR_VERIFY(nextBoundary != boundaries.end()) ||
!XR_VERIFY(*nextBoundary > record.fileOffset)) {
return 0;
}
return *nextBoundary - record.fileOffset;
}

bool RecordFileReader::prefetchRecordSequence(
const vector<const IndexRecord::RecordInfo*>& records,
bool clearSequence) {
Expand Down Expand Up @@ -901,7 +915,7 @@ bool RecordFileReader::getRecordFormat(
Record::Type recordType,
uint32_t formatVersion,
RecordFormat& outFormat) const {
string formatStr = getTag(
const string& formatStr = getTag(
getTags(streamId).vrs, RecordFormat::getRecordFormatTagName(recordType, formatVersion));
if (formatStr.empty()) {
outFormat = ContentType::CUSTOM;
Expand Down
10 changes: 6 additions & 4 deletions vrs/RecordFileReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,9 @@ class RecordFileReader {
/// @return Index in the record's stream index, or getRecordCount() is record is nullptr.
uint32_t getRecordStreamIndex(const IndexRecord::RecordInfo* record) const;

/// Get a record's disk size.
uint32_t getRecordSize(uint32_t recordIndex) const;

/// Timestamp for the first data record in the whole file.
/// @return The timestamp for the file data record, or 0, if the file contains no data record.
double getFirstDataRecordTime() const;
Expand Down Expand Up @@ -486,10 +489,6 @@ class RecordFileReader {
uint32_t getRecordFormats(StreamId streamId, RecordFormatMap& outFormats) const;
std::unique_ptr<DataLayout> getDataLayout(StreamId streamId, const ContentBlockId& blockId) const;

/// The offset of each record + the end of the file, sorted.
/// Useful to know where records end without reading the record.
const vector<int64_t>& getRecordBoundaries() const;

/// Option to control logging when opening a file.
/// @param progressLogger: a logger implementation, or nullptr, to disable logging.
void setOpenProgressLogger(ProgressLogger* progressLogger);
Expand Down Expand Up @@ -569,6 +568,9 @@ class RecordFileReader {
static const string& getTag(const map<string, string>& tags, const string& name); ///< private
bool mightContainContentTypeInDataRecord(StreamId streamId, ContentType type) const; ///< private

/// Record boundaries, in sequential order, but not necessarily in record order!
const vector<int64_t>& getRecordBoundaries() const; ///< private

// Members to read an open VRS file
std::unique_ptr<FileHandler> file_;
UncompressedRecordReader uncompressedRecordReader_;
Expand Down
15 changes: 15 additions & 0 deletions vrs/test/GetRecordTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,3 +359,18 @@ TEST_F(GetRecordTester, GetRecordForwardBackwardTest) {
}
}
}

TEST_F(GetRecordTester, GetRecordSizeTest) {
vrs::RecordFileReader file;
ASSERT_EQ(file.openFile(kTestFile2), 0);
const auto& index = file.getIndex();
map<StreamId, uint64_t> streamSizes;
for (const auto& record : index) {
streamSizes[record.streamId] += file.getRecordSize(file.getRecordIndex(&record));
}
map<StreamId, uint64_t> actualSizes = {
{StreamId(RecordableTypeId::RgbCameraRecordableClass, 1), 13522400},
{StreamId(RecordableTypeId::SlamCameraData, 1), 6106787},
{StreamId(RecordableTypeId::SlamImuData, 1), 1673048}};
EXPECT_EQ(streamSizes, actualSizes);
}
72 changes: 44 additions & 28 deletions vrs/utils/RecordFileInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,15 @@ void overView(ostream& out, RecordFileReader& file, StreamId id, Details details
if (!nowKnownAsName.empty()) {
out << " (device now known as \"" << nowKnownAsName << "\")";
}
if (details & Details::StreamRecordSizes) {
size_t size = 0;
for (const auto& record : index) {
size += file.getRecordSize(file.getRecordIndex(record));
}
out << ", " << humanReadableFileSize(size);
}
out << ".\n";
if (details && Details::StreamTags) {
if (details & Details::StreamTags) {
const StreamTags& tags = file.getTags(id);
for (const auto& iter : tags.vrs) {
stringstream ss;
Expand All @@ -163,7 +170,7 @@ void overView(ostream& out, RecordFileReader& file, StreamId id, Details details
}
printTags(out, tags.user);
}
if (details && Details::StreamRecordCounts) {
if (details & Details::StreamRecordCounts) {
RecordCounter configRecords, stateRecords, dataRecords;
for (const auto& record : index) {
switch (record->recordType) {
Expand Down Expand Up @@ -223,7 +230,7 @@ void printOverview(
}
out << "VRS file with " << chunks.size() << " chunks, " << humanReadableFileSize(totalSize)
<< " total";
if (details && Details::ChunkList) {
if (details & Details::ChunkList) {
out << ":\n";
for (size_t index = 0; index < chunks.size(); index++) {
const pair<string, int64_t>& chunk = chunks[index];
Expand All @@ -241,7 +248,7 @@ void printOverview(
recordCounter.count(&record);
}
}
if (details && Details::MainCounters) {
if (details & Details::MainCounters) {
out << "Found ";
printCountedName(out, streamIds.size(), "stream");
out << ", ";
Expand Down Expand Up @@ -272,11 +279,11 @@ void printOverview(
}
out << ".\n";
}
if (details && Details::ListFileTags) {
if (details & Details::ListFileTags) {
const auto& tags = recordFile.getTags();
printTags(out, tags);
}
if (details && (Details::StreamNames | Details::StreamTags | Details::StreamRecordCounts)) {
if (details & (Details::StreamNames | Details::StreamTags | Details::StreamRecordCounts)) {
for (auto id : streamIds) {
overView(out, recordFile, id, details);
}
Expand Down Expand Up @@ -314,33 +321,33 @@ static JsonValue devicesOverView(
StreamId id,
Details details,
JsonDocument::AllocatorType& allocator) {
JsonValue recordData(kObjectType);
JsonValue streamData(kObjectType);
const auto& index = file.getIndex(id);

if (details && Details::StreamNames) {
bool pub = details && Details::UsePublicNames;
if (details & Details::StreamNames) {
bool pub = details & Details::UsePublicNames;
JsonValue recName = stringToJvalue(pub ? "device_name" : "recordable_name", allocator);
recordData.AddMember(recName, stringToJvalue(id.getTypeName(), allocator), allocator);
streamData.AddMember(recName, stringToJvalue(id.getTypeName(), allocator), allocator);
JsonValue recTypId = stringToJvalue(pub ? "device_type_id" : "recordable_id", allocator);
recordData.AddMember(recTypId, static_cast<int>(id.getTypeId()), allocator);
streamData.AddMember(recTypId, static_cast<int>(id.getTypeId()), allocator);
JsonValue recInstId = stringToJvalue(pub ? "device_instance_id" : "instance_id", allocator);
recordData.AddMember(recInstId, id.getInstanceId(), allocator);
streamData.AddMember(recInstId, id.getInstanceId(), allocator);
const string& flavor = file.getFlavor(id);
if (!flavor.empty()) {
JsonValue recFlavor = stringToJvalue(pub ? "device_flavor" : "recordable_flavor", allocator);
recordData.AddMember(recFlavor, stringToJvalue(flavor, allocator), allocator);
streamData.AddMember(recFlavor, stringToJvalue(flavor, allocator), allocator);
}

const string& name = file.getOriginalRecordableTypeName(id);
if (name != id.getTypeName()) {
recordData.AddMember(
streamData.AddMember(
stringToJvalue(pub ? "device_original_name" : "recordable_original_name", allocator),
stringToJvalue(name, allocator),
allocator);
}
}

if (details && Details::StreamTags) {
if (details & Details::StreamTags) {
const StreamTags& tags = file.getTags(id);

JsonValue recordTags(kObjectType);
Expand All @@ -350,7 +357,7 @@ static JsonValue devicesOverView(
stringToJvalue(make_printable(iter.second), allocator),
allocator);
}
recordData.AddMember(stringToJvalue("tags", allocator), recordTags, allocator);
streamData.AddMember(stringToJvalue("tags", allocator), recordTags, allocator);

JsonValue VRStags(kObjectType);
for (const auto& iter : tags.vrs) {
Expand All @@ -359,10 +366,10 @@ static JsonValue devicesOverView(
stringToJvalue(make_printable(iter.second), allocator),
allocator);
}
recordData.AddMember(stringToJvalue("vrs_tag", allocator), VRStags, allocator);
streamData.AddMember(stringToJvalue("vrs_tag", allocator), VRStags, allocator);
}

if (details && Details::StreamRecordCounts) {
if (details & Details::StreamRecordCounts) {
RecordCounter configRecords, stateRecords, dataRecords;
JsonValue configRecordsValue(kObjectType);
JsonValue stateRecordsValue(kObjectType);
Expand All @@ -386,12 +393,21 @@ static JsonValue devicesOverView(
addTimeFrameMembers(configRecordsValue, configRecords, allocator);
addTimeFrameMembers(stateRecordsValue, stateRecords, allocator);

recordData.AddMember(stringToJvalue("configuration", allocator), configRecordsValue, allocator);
recordData.AddMember(stringToJvalue("state", allocator), stateRecordsValue, allocator);
recordData.AddMember(stringToJvalue("data", allocator), dataRecordsValue, allocator);
streamData.AddMember(stringToJvalue("configuration", allocator), configRecordsValue, allocator);
streamData.AddMember(stringToJvalue("state", allocator), stateRecordsValue, allocator);
streamData.AddMember(stringToJvalue("data", allocator), dataRecordsValue, allocator);
}

if (details & Details::StreamRecordSizes) {
int64_t size = 0;
for (const auto& record : index) {
size += file.getRecordSize(file.getRecordIndex(record));
}
JsonValue streamSize = stringToJvalue("stream_size", allocator);
streamData.AddMember(streamSize, size, allocator);
}

return recordData;
return streamData;
}

string jsonOverview(const string& path, Details details) {
Expand Down Expand Up @@ -427,7 +443,7 @@ string jsonOverview(RecordFileReader& recordFile, const set<StreamId>& streams,
int64_t fileSize = 0;
const vector<pair<string, int64_t>> chunks = recordFile.getFileChunks();
const pair<string, int64_t>& file = chunks[0];
if (details && Details::Basics) {
if (details & Details::Basics) {
doc.AddMember(
stringToJvalue("file_name", allocator), stringToJvalue(file.first, allocator), allocator);
}
Expand All @@ -439,23 +455,23 @@ string jsonOverview(RecordFileReader& recordFile, const set<StreamId>& streams,
fileSize += chunk.second;
}
}
if (details && Details::ChunkList) {
if (details & Details::ChunkList) {
JsonValue fileChunks(kArrayType);
fileChunks.Reserve(static_cast<SizeType>(chunks.size()), allocator);
for (const auto& chunk : chunks) {
fileChunks.PushBack(stringToJvalue(chunk.first, allocator), allocator);
}
doc.AddMember(stringToJvalue("file_chunks", allocator), fileChunks, allocator);
}
if (details && Details::Basics) {
if (details & Details::Basics) {
JsonValue fileSizeShortId = stringToJvalue("file_size_short", allocator);
doc.AddMember(
fileSizeShortId, stringToJvalue(humanReadableFileSize(fileSize), allocator), allocator);
JsonValue fileSizeId = stringToJvalue("file_size", allocator);
doc.AddMember(fileSizeId, fileSize, allocator);
}

if (details && Details::ListFileTags) {
if (details & Details::ListFileTags) {
JsonValue recordTags(kObjectType);
const auto& tags = recordFile.getTags();
for (const auto& tag : tags) {
Expand All @@ -466,7 +482,7 @@ string jsonOverview(RecordFileReader& recordFile, const set<StreamId>& streams,
doc.AddMember(stringToJvalue("tags", allocator), recordTags, allocator);
}

if (details && Details::MainCounters) {
if (details & Details::MainCounters) {
JsonValue numOfDevices = stringToJvalue("number_of_devices", allocator);
doc.AddMember(numOfDevices, static_cast<uint64_t>(streams.size()), allocator);
size_t recordCount = 0;
Expand Down Expand Up @@ -496,7 +512,7 @@ string jsonOverview(RecordFileReader& recordFile, const set<StreamId>& streams,
}
}

if (details && (Details::StreamNames | Details::StreamTags | Details::StreamRecordCounts)) {
if (details & (Details::StreamNames | Details::StreamTags | Details::StreamRecordCounts)) {
JsonValue devices(kArrayType);
for (auto id : streams) {
devices.PushBack(devicesOverView(recordFile, id, details, allocator), allocator);
Expand Down
3 changes: 2 additions & 1 deletion vrs/utils/RecordFileInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ enum class Details : uint32_t {
StreamNames = 1 << 4,
StreamTags = 1 << 5,
StreamRecordCounts = 1 << 6,
StreamRecordSizes = 1 << 7,

Overview = MainCounters,
Counters = MainCounters | StreamRecordCounts,
Expand All @@ -60,7 +61,7 @@ inline Details operator|(const Details& lhs, const Details& rhs) {
}

/// Test than two sets of flags intersect (at least one flag is set in both sets)
inline bool operator&&(const Details& lhs, const Details& rhs) {
inline bool operator&(const Details& lhs, const Details& rhs) {
return (static_cast<uint32_t>(lhs) & static_cast<uint32_t>(rhs)) != 0;
}

Expand Down

0 comments on commit 9413f4d

Please sign in to comment.