From 05a29ba8fc27f360edbfab0560a1b47374bfe88f Mon Sep 17 00:00:00 2001 From: JackieTien97 Date: Thu, 9 Jan 2025 15:27:18 +0800 Subject: [PATCH 1/2] Correct the retained size calculation for BinaryColumn and BinaryColumnBuilder --- .../org/apache/tsfile/utils/RamUsageEstimator.java | 12 ++++++++++++ .../apache/tsfile/file/header/ChunkGroupHeader.java | 2 -- .../tsfile/read/common/block/TsBlockBuilder.java | 4 ---- .../read/common/block/column/BinaryColumn.java | 6 ++---- .../common/block/column/BinaryColumnBuilder.java | 5 +---- .../tsfile/read/common/block/column/TimeColumn.java | 3 +-- .../read/common/block/column/TsBlockSerde.java | 1 - 7 files changed, 16 insertions(+), 17 deletions(-) diff --git a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java index 3a35d2269..06edd9484 100644 --- a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java +++ b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java @@ -271,6 +271,18 @@ public static long sizeOf(double[] arr) { : alignObjectSize(NUM_BYTES_ARRAY_HEADER + (long) Double.BYTES * arr.length); } + public static long sizeOf(Accountable[] arr) { + if (arr == null) { + return 0; + } else { + long size = shallowSizeOf(arr); + for (Accountable obj : arr) { + size += obj.ramBytesUsed(); + } + return size; + } + } + /** Returns the size in bytes of the String[] object. */ public static long sizeOf(String[] arr) { long size = shallowSizeOf(arr); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java b/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java index b46f0e0c3..ba3371e9d 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java @@ -53,7 +53,6 @@ public int getSerializedSize() { } private int getSerializedSize(IDeviceID deviceID) { - // TODO: add an interface in IDeviceID int length = deviceID.serializedSize(); return Byte.BYTES + ReadWriteForEncodingUtils.varIntSize(length) + length; } @@ -73,7 +72,6 @@ public static ChunkGroupHeader deserializeFrom( } } - // TODO: add an interface in IDeviceID final IDeviceID deviceID = deserializeDeviceID(inputStream, versionNumber); return new ChunkGroupHeader(deviceID); } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java index 918551d9b..028bffc99 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java @@ -104,8 +104,6 @@ private TsBlockBuilder(int initialExpectedEntries, int maxTsBlockBytes, List types) { valueColumnBuilders = new ColumnBuilder[types.size()]; int initialExpectedEntries = timeColumnBuilder.getPositionCount(); for (int i = 0; i < valueColumnBuilders.length; i++) { - // TODO use Type interface to encapsulate createColumnBuilder to each concrete type class - // instead of switch-case switch (types.get(i)) { case BOOLEAN: valueColumnBuilders[i] = diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java index ec36fc4f1..9b8a82cc4 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java @@ -32,8 +32,8 @@ import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; +import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray; -import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfObjectArray; public class BinaryColumn implements Column { @@ -75,9 +75,7 @@ public BinaryColumn(int positionCount, Optional valueIsNull, Binary[] } this.valueIsNull = valueIsNull; - // TODO we need to sum up all the Binary's retainedSize here - retainedSizeInBytes = - INSTANCE_SIZE + sizeOfBooleanArray(positionCount) + sizeOfObjectArray(positionCount); + retainedSizeInBytes = INSTANCE_SIZE + sizeOfBooleanArray(positionCount) + sizeOf(values); } @Override diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java index a82d82fc0..d9c560dc6 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java @@ -32,7 +32,6 @@ import static java.lang.Math.max; import static org.apache.tsfile.read.common.block.column.ColumnUtil.calculateBlockResetSize; -import static org.apache.tsfile.utils.RamUsageEstimator.shallowSizeOf; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf; public class BinaryColumnBuilder implements ColumnBuilder { @@ -129,7 +128,6 @@ public TSDataType getDataType() { @Override public long getRetainedSizeInBytes() { - // TODO we need to sum up all the Binary's retainedSize here long size = INSTANCE_SIZE + arraysRetainedSizeInBytes; if (columnBuilderStatus != null) { size += ColumnBuilderStatus.INSTANCE_SIZE; @@ -139,7 +137,6 @@ public long getRetainedSizeInBytes() { @Override public ColumnBuilder newColumnBuilderLike(ColumnBuilderStatus columnBuilderStatus) { - // TODO we should take retain size into account here return new BinaryColumnBuilder(columnBuilderStatus, calculateBlockResetSize(positionCount)); } @@ -158,6 +155,6 @@ private void growCapacity() { } private void updateArraysDataSize() { - arraysRetainedSizeInBytes = sizeOf(valueIsNull) + shallowSizeOf(values); + arraysRetainedSizeInBytes = sizeOf(valueIsNull) + sizeOf(values); } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java index cce059d71..f3411c18c 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java @@ -100,8 +100,7 @@ public boolean isNull(int position) { @Override public boolean[] isNull() { - // todo - return null; + throw new UnsupportedOperationException("isNull is not supported for TimeColumn"); } @Override diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java index e887a5773..e7a503e6e 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java @@ -67,7 +67,6 @@ public TsBlock deserialize(ByteBuffer byteBuffer) { } // Time column. - // TODO: a TimeColumn will be deserialized as a LongColumn Column timeColumn = ColumnEncoderFactory.get(columnEncodings.get(0)) .readColumn(byteBuffer, TSDataType.INT64, positionCount); From 999bad45abf92ff599f1b1b49be20bec40cd7095 Mon Sep 17 00:00:00 2001 From: JackieTien97 Date: Thu, 9 Jan 2025 16:12:59 +0800 Subject: [PATCH 2/2] consider tailing nulls --- .../main/java/org/apache/tsfile/utils/RamUsageEstimator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java index 06edd9484..d357a42ea 100644 --- a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java +++ b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java @@ -277,7 +277,7 @@ public static long sizeOf(Accountable[] arr) { } else { long size = shallowSizeOf(arr); for (Accountable obj : arr) { - size += obj.ramBytesUsed(); + size += obj != null ? obj.ramBytesUsed() : 0; } return size; }