Fix float encoder overflow (#342)

* try one try * Fix FloatEncoder overflow problem * add more UT * add more UT
apache · Dec 25, 2024 · 654e509 · 654e509
1 parent 33ea485
commit 654e509
Show file tree

Hide file tree

Showing 5 changed files with 153 additions and 9 deletions.
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java
@@ -24,6 +24,7 @@
 import org.apache.tsfile.exception.encoding.TsFileDecodingException;
 import org.apache.tsfile.file.metadata.enums.TSEncoding;
 import org.apache.tsfile.utils.Binary;
+import org.apache.tsfile.utils.BitMap;
 import org.apache.tsfile.utils.ReadWriteForEncodingUtils;
 
 import org.slf4j.Logger;
@@ -47,6 +48,9 @@ public class FloatDecoder extends Decoder {
   /** flag that indicates whether we have read maxPointNumber and calculated maxPointValue. */
   private boolean isMaxPointNumberRead;
 
+  private BitMap useMaxPointNumber;
+  private int position = 0;
+
   public FloatDecoder(TSEncoding encodingType, TSDataType dataType) {
     super(encodingType);
     if (encodingType == TSEncoding.RLE) {
@@ -93,21 +97,39 @@ public FloatDecoder(TSEncoding encodingType, TSDataType dataType) {
   public float readFloat(ByteBuffer buffer) {
     readMaxPointValue(buffer);
     int value = decoder.readInt(buffer);
-    double result = value / maxPointValue;
+    double result = value / getMaxPointValue();
+    position++;
     return (float) result;
   }
 
   @Override
   public double readDouble(ByteBuffer buffer) {
     readMaxPointValue(buffer);
     long value = decoder.readLong(buffer);
-    return value / maxPointValue;
+    double result = value / getMaxPointValue();
+    position++;
+    return result;
+  }
+
+  private double getMaxPointValue() {
+    if (useMaxPointNumber == null) {
+      return maxPointValue;
+    } else {
+      return useMaxPointNumber.isMarked(position) ? maxPointValue : 1;
+    }
   }
 
   private void readMaxPointValue(ByteBuffer buffer) {
     if (!isMaxPointNumberRead) {
       int maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
-      if (maxPointNumber <= 0) {
+      if (maxPointNumber == Integer.MAX_VALUE) {
+        int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+        byte[] tmp = new byte[size / 8 + 1];
+        buffer.get(tmp, 0, size / 8 + 1);
+        useMaxPointNumber = new BitMap(size, tmp);
+        maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+        maxPointValue = Math.pow(10, maxPointNumber);
+      } else if (maxPointNumber <= 0) {
         maxPointValue = 1;
       } else {
         maxPointValue = Math.pow(10, maxPointNumber);
@@ -153,5 +175,6 @@ public long readLong(ByteBuffer buffer) {
   public void reset() {
     this.decoder.reset();
     this.isMaxPointNumberRead = false;
+    this.position = 0;
   }
 }
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java
@@ -22,10 +22,13 @@
 import org.apache.tsfile.enums.TSDataType;
 import org.apache.tsfile.exception.encoding.TsFileEncodingException;
 import org.apache.tsfile.file.metadata.enums.TSEncoding;
+import org.apache.tsfile.utils.BitMap;
 import org.apache.tsfile.utils.ReadWriteForEncodingUtils;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 
 /**
  * Encoder for float or double value using rle or two-diff according to following grammar.
@@ -49,11 +52,14 @@ public class FloatEncoder extends Encoder {
   /** flag to check whether maxPointNumber is saved in the stream. */
   private boolean isMaxPointNumberSaved;
 
+  private final List<Boolean> useMaxPointNumber;
+
   public FloatEncoder(TSEncoding encodingType, TSDataType dataType, int maxPointNumber) {
     super(encodingType);
     this.maxPointNumber = maxPointNumber;
-    calculateMaxPonitNum();
+    calculateMaxPointNum();
     isMaxPointNumberSaved = false;
+    useMaxPointNumber = new ArrayList<>();
     if (encodingType == TSEncoding.RLE) {
       if (dataType == TSDataType.FLOAT) {
         encoder = new IntRleEncoder();
@@ -101,7 +107,7 @@ public void encode(double value, ByteArrayOutputStream out) {
     encoder.encode(valueLong, out);
   }
 
-  private void calculateMaxPonitNum() {
+  private void calculateMaxPointNum() {
     if (maxPointNumber <= 0) {
       maxPointNumber = 0;
       maxPointValue = 1;
@@ -111,21 +117,57 @@ private void calculateMaxPonitNum() {
   }
 
   private int convertFloatToInt(float value) {
-    return (int) Math.round(value * maxPointValue);
+    if (value * maxPointValue > Integer.MAX_VALUE || value * maxPointValue < Integer.MIN_VALUE) {
+      useMaxPointNumber.add(false);
+      return Math.round(value);
+    } else {
+      useMaxPointNumber.add(true);
+      return (int) Math.round(value * maxPointValue);
+    }
   }
 
   private long convertDoubleToLong(double value) {
-    return Math.round(value * maxPointValue);
+    if (value * maxPointValue > Long.MAX_VALUE || value * maxPointValue < Long.MIN_VALUE) {
+      useMaxPointNumber.add(false);
+      return Math.round(value);
+    } else {
+      useMaxPointNumber.add(true);
+      return Math.round(value * maxPointValue);
+    }
   }
 
   @Override
   public void flush(ByteArrayOutputStream out) throws IOException {
     encoder.flush(out);
+    if (pointsNotUseMaxPointNumber()) {
+      byte[] ba = out.toByteArray();
+      out.reset();
+      ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, out);
+      BitMap bitMap = new BitMap(useMaxPointNumber.size());
+      for (int i = 0; i < useMaxPointNumber.size(); i++) {
+        if (useMaxPointNumber.get(i)) {
+          bitMap.mark(i);
+        }
+      }
+      ReadWriteForEncodingUtils.writeUnsignedVarInt(useMaxPointNumber.size(), out);
+      out.write(bitMap.getByteArray());
+      out.write(ba);
+    }
     reset();
   }
 
   private void reset() {
     isMaxPointNumberSaved = false;
+    useMaxPointNumber.clear();
+  }
+
+  private boolean pointsNotUseMaxPointNumber() {
+    for (boolean info : useMaxPointNumber) {
+      if (!info) {
+        return true;
+      }
+    }
+    return false;
   }
 
   private void saveMaxPointNumber(ByteArrayOutputStream out) {

diff --git a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java
@@ -34,6 +34,7 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import static org.apache.tsfile.utils.EncodingUtils.roundWithGivenPrecision;
 import static org.junit.Assert.assertEquals;
 
 public class FloatDecoderTest {
@@ -203,6 +204,38 @@ private void testDoubleLength(
     }
   }
 
+  @Test
+  public void testBigFloat() throws Exception {
+    float a = 0.333F;
+    float b = 6.5536403E8F;
+    Encoder encoder = new FloatEncoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT, 2);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    encoder.encode(a, baos);
+    encoder.encode(b, baos);
+    encoder.flush(baos);
+
+    ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
+    Decoder decoder = new FloatDecoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT);
+    assertEquals(roundWithGivenPrecision(a, 2), decoder.readFloat(buffer), delta);
+    assertEquals(roundWithGivenPrecision(b, 2), decoder.readFloat(buffer), delta);
+  }
+
+  @Test
+  public void testBigDouble() throws Exception {
+    double a = 0.333;
+    double b = 9.223372036854E18;
+    Encoder encoder = new FloatEncoder(TSEncoding.RLE, TSDataType.DOUBLE, 2);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    encoder.encode(a, baos);
+    encoder.encode(b, baos);
+    encoder.flush(baos);
+
+    ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
+    Decoder decoder = new FloatDecoder(TSEncoding.RLE, TSDataType.DOUBLE);
+    assertEquals(roundWithGivenPrecision(a, 2), decoder.readDouble(buffer), delta);
+    assertEquals(roundWithGivenPrecision(b, 2), decoder.readDouble(buffer), delta);
+  }
+
   // private void testDecimalLenght(TSEncoding encoding, List<Double> valueList,
   // int maxPointValue,
   // boolean isDebug, int repeatCount) throws Exception {

diff --git a/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java b/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tsfile.utils;
+
+public class EncodingUtils {
+
+  // Copied from org.apache.iotdb.db.utils.MathUtils
+  public static float roundWithGivenPrecision(float data, int size) {
+    if (size == 0) {
+      return Math.round(data);
+    }
+    return Math.round(data)
+        + Math.round(((data - Math.round(data)) * Math.pow(10, size))) / (float) Math.pow(10, size);
+  }
+
+  // Copied from org.apache.iotdb.db.utils.MathUtils
+  public static double roundWithGivenPrecision(double data, int size) {
+    if (size == 0) {
+      return Math.round(data);
+    }
+    return Math.round(data)
+        + Math.round(((data - Math.round(data)) * Math.pow(10, size))) / Math.pow(10, size);
+  }
+}
diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java
@@ -134,8 +134,13 @@ public void floatTest() throws IOException, WriteProcessException {
 
   public void floatTest(TSEncoding encoding) throws IOException, WriteProcessException {
     writeDataByTSRecord(
-        TSDataType.FLOAT, (i) -> new FloatDataPoint("sensor_1", (float) i), encoding);
-    readData((i, field, delta) -> assertEquals(i, field.getFloatV(), delta));
+        TSDataType.FLOAT,
+        (i) -> new FloatDataPoint("sensor_1", i % 2 == 0 ? 6.55364032E8F : i),
+        encoding);
+    readData(
+        (i, field, delta) ->
+            assertEquals(
+                encoding.toString(), i % 2 == 0 ? 6.55364032E8F : i, field.getFloatV(), delta));
   }
 
   @Test