branchless/batchy parse digits

franz1981 · franz1981 · commit cf176f47c270 · 2024-04-05T10:38:31.000+02:00
diff --git a/vertx-sql-client/src/main/java/io/vertx/sqlclient/impl/codec/CommonCodec.java b/vertx-sql-client/src/main/java/io/vertx/sqlclient/impl/codec/CommonCodec.java
@@ -12,35 +12,185 @@
 package io.vertx.sqlclient.impl.codec;
 
 import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
 
+/**
+ * This is based on this algorithm: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
+ * Which can be explained as follows:
+ * <pre>
+ *
+ * Given 8 ASCII digits as: b1b2b3b4b5b6b7b8
+ *
+ * eg: "12345678"
+ *
+ * which byte[] := { 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38 }
+ *                    "1"   "2"   "3"   "4"   "5"   "6"   "7"   "8"
+ *
+ * reading is in Little Endian form will read the lower indexes first placing them as lower addresses
+ * as can be seen in the hex representation, where "1" is now in the rightmost position.
+ *
+ * hex:
+ * 0x38_37_36_35_34_33_32_31-
+ * 0x30_30_30_30_30_30_30_30=
+ * 0x80_07_06_05_04_03_02_01
+ *
+ * digits = (digits * 10) + (digits >> 8);
+ *
+ * 0x50_46_3c_32_28_1e_14_0a +	~	digits * 10 +
+ * 0x00_80_70_60_50_40_30_21 =	~	digits >> 8 =
+ * 0x50_4e_43_38_2d_22_17_0c
+ * now   ^     ^     ^     ^
+ *       |     |	    |     |
+ * g4=10*b7+b8 |     |     |
+ *       g3=10*b5+b6 |     |
+ *             g2=10*b3+b4 |
+ *                   g1=10*b1+b2
+ *
+ * These are the relevant results we care about, while the others are useless
+ * and subsequent masks will take care to ignore them.
+ *
+ * Now the aggregation parts:
+ *
+ * digits & U64_MASK := 0x00_00_00_38_00_00_00_0c
+ *
+ * This will isolate 10*b3+b4 and 10*b7+b8, trying to correctly compute:
+ *
+ * 1000000*(10*b1+b2) + 100*(10*b5+b6), somehow.
+ *
+ * The mask used to perform the multiplication is
+ *
+ * U64_FIRST_THIRD := (1000000L << 32) + 100 := 0x00_0f_42_40_00_00_00_64
+ *
+ * 0x00_00_00_38_00_00_00_0c *
+ * 0x00_0f_42_40_00_00_00_64 =
+ * 0x00_b7_30_e0_00_00_04_b0
+ *
+ *
+ * which 0x00_b7_30_e0 part (let's ignore the second half 00_00_04_b0, which is 1200)
+ *
+ * is, in decimal:
+ *
+ * 12005600 (!!!) === 1000000*(10*1 + 2) + 100*(10*5 + 6) = 1*10000000 + 2*1000000 + 5*1000 + 6*100
+ *
+ * For the second part
+ *
+ * ie 10000*(10*b3 + b4) + 10*b7 + b8
+ *
+ * we first isolate the 2 paris (g2 and g4) with (digits >> 16) & U64_MASK (which move them to the right by 2 bytes), getting
+ *
+ * 0x00_00_4e_00_00_00_22 *
+ * 0x00_27_10_00_00_00_01 =
+ * 0x05_30_6e_00_00_00_22
+ *
+ * which, once again, has it leftmost part
+ *
+ * 0x05_30_6e === 340078 (!!!!) === 10000*(10*3 + 4) + 10*7 + 8 = 3*100000 + 4*10000 + 7*10 + 8
+ *
+ *
+ * shifting both left by 32 and adding them, the total digit is done.
+ * </pre>
+ */
 public class CommonCodec {
-  /**
-   * Decode the specified {@code buff} formatted as a decimal string starting at the readable index
-   * with the specified {@code length} to a long.
-   *
-   * @param index the hex string index
-   * @param len   the hex string length
-   * @param buff  the byte buff to read from
-   * @return the decoded value as a long
-   */
+
+  // https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
+  private static final long U64_MASK = 0x000000FF000000FFL;
+  private static final long U64_FIRST_THIRD = (1000000L << 32) + 100;
+  private static final long U64_SECOND_FOURTH = (10000L << 32) + 1;
+
+  private static long parseEigthDigitsLE(long digits) {
+    digits -= 0x3030303030303030L;
+    digits = (digits * 10) + (digits >> 8);
+    return (((digits & U64_MASK) * U64_FIRST_THIRD) + (((digits >> 16) & U64_MASK) * U64_SECOND_FOURTH)) >> 32;
+  }
+
+  private static final int U32_MASK = 0x00FF00FF;
+  private static final int U32_FIRST_SECOND = (100 << 16) + 1;
+
+  private static int parseFourDigitsLE(int digits) {
+    digits -= 0x30303030;
+    digits = (digits * 10) + (digits >> 8);
+    return ((digits & U32_MASK) * U32_FIRST_SECOND) >> 16;
+  }
+
+  private static short parseTwoDigitsLE(short digits) {
+    digits -= 0x3030;
+    return (short) ((digits & 0xFF) * 10 + ((digits >> 8) & 0xFF));
+  }
+
+  private static byte parseOneDigit(byte digit) {
+    return (byte) (digit - 0x30);
+  }
+
+  public static void main(String[] args) {
+    ByteBuf buff = Unpooled.buffer();
+    buff.writeCharSequence("-123", java.nio.charset.StandardCharsets.UTF_8);
+    System.out.println(decodeDecStringToLong(0, buff.readableBytes(), buff));
+  }
+
+  public static int decodeDecStringToInt(int index, int len, ByteBuf buff) {
+    // 10 + sign = 32bit
+    return 0;
+  }
+
+  public static int decodeDecStringToShort(int index, int len, ByteBuf buff) {
+    // 5 + sign = 16bit
+    return 0;
+  }
+
   public static long decodeDecStringToLong(int index, int len, ByteBuf buff) {
-    long value = 0;
-    if (len > 0) {
-      int to = index + len;
-      boolean neg = false;
-      if (buff.getByte(index) == '-') {
-        neg = true;
-        index++;
-      }
-      while (index < to) {
-        byte ch = buff.getByte(index++);
-        byte nibble = (byte) (ch - '0');
-        value = value * 10 + nibble;
+    byte firstByte = buff.getByte(index);
+    final boolean negative = firstByte == '-';
+    // handling these fast-path to avoid using get<something>LE which is not free
+    if (len <= 2) {
+      if (len == 1) {
+        if (negative) {
+          throw new IllegalArgumentException("Invalid negative number: missing digits");
+        }
+        return parseOneDigit(firstByte);
       }
-      if (neg) {
-        value = -value;
+      assert len == 2;
+      if (negative) {
+        return -parseOneDigit(buff.getByte(index + 1));
       }
+      return parseOneDigit(firstByte) * 10 + parseOneDigit(buff.getByte(index + 1));
+    }
+    if (negative) {
+      index++;
+      len--;
+    }
+    long lessThanEight = len % 8;
+    if (lessThanEight > 0) {
+      return lessThanEightDigitsUnrolled(negative, index, len, buff);
+    }
+    throw new UnsupportedOperationException("Not implemented yet");
+  }
+
+  private static int lessThanEightDigitsUnrolled(boolean negative, int index, int len, ByteBuf buff) {
+    assert len > 0 && len < 8;
+    int digits = 0;
+    int multiplier = 1;
+    // len >= 4
+    if ((len & Integer.BYTES) != 0) {
+      digits = parseFourDigitsLE(buff.getIntLE(index));
+      index += Integer.BYTES;
+      multiplier = 100;
+    }
+    // len >= 2
+    if ((len & Short.BYTES) != 0) {
+      digits = digits * multiplier + parseTwoDigitsLE(buff.getShortLE(index));
+      index += Short.BYTES;
+      multiplier = 10;
+    }
+    // len >= 1
+    if ((len & Byte.BYTES) != 0) {
+      digits = digits * multiplier + parseOneDigit(buff.getByte(index));
     }
-    return value;
+    return negative ? -digits : digits;
   }
+
+  public static byte decodeDecStringToByte(int index, int len, ByteBuf buff) {
+    // 3 + sign = 8bit
+    return 0;
+  }
+
 }