From f22be73b42976940832df65b67c61b6e8bd94ded Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 13 Apr 2017 20:19:29 +0200 Subject: [PATCH] switch the byte prefix of DATE_INCREMENT and MEASUREMENT Date increments have usually higher values. I had hoped to reduce the file size by a lot. But in my example data with 44 million entries (real life data) it only reduced the storage size by 1.5%. Also fixed a bug in PdbReader that prevented other values for the CONTINUATION byte. Also added a small testing tool that prints the content of a pdb file. It is not (yet) made available as standalone tool, but during debugging sessions it is very useful. --- .../org/lucares/performance/db/ByteType.java | 10 +++-- .../lucares/performance/db/PdbFileViewer.java | 42 +++++++++++++++++++ .../org/lucares/performance/db/PdbReader.java | 19 +++++---- .../org/lucares/performance/db/PdbWriter.java | 12 +++--- 4 files changed, 65 insertions(+), 18 deletions(-) create mode 100644 performanceDb/src/main/java/org/lucares/performance/db/PdbFileViewer.java diff --git a/performanceDb/src/main/java/org/lucares/performance/db/ByteType.java b/performanceDb/src/main/java/org/lucares/performance/db/ByteType.java index 1581411..cbe693e 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/ByteType.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/ByteType.java @@ -4,9 +4,9 @@ enum ByteType { CONTINUATION(ContinuationByte.CONTINUATION_BYTE_PREFIX), // 10000000 - MEASUREMENT(1 << 6), // 01000000 + DATE_INCREMENT(1 << 6), // 01000000 - DATE_INCREMENT(1 << 5), // 00100000 + MEASUREMENT(1 << 5), // 00100000 DATE_OFFSET(1 << 4), // 00010000 @@ -64,7 +64,7 @@ enum ByteType { return firstBytePrefix == (theByte & prefixBits); } - private long getPrefixBits() { + public long getPrefixBits() { return (~getValueBits()) & 0xff; } @@ -77,4 +77,8 @@ enum ByteType { } return null; } + + public long getValue(final int aByte) { + return aByte & getValueBits(); + } } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbFileViewer.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileViewer.java new file mode 100644 index 0000000..a7f6f8c --- /dev/null +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileViewer.java @@ -0,0 +1,42 @@ +package org.lucares.performance.db; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; + +import org.lucares.pdb.api.Tags; + +public class PdbFileViewer { + private static final Tags TAGS = Tags.create(); + + public static void main(final String[] args) throws FileNotFoundException, IOException { + final File file = new File(args[0]); + final PdbFile pdbFile = new PdbFile(file.toPath(), TAGS); + + try (final PdbReader reader = new PdbReader(pdbFile, false)) { + + long value = 0; + int nextByte; + while ((nextByte = reader.readNextByte()) >= 0) { + + final ByteType type = ByteType.getType(nextByte); + final long bytesValue = type.getValue(nextByte); + + if (type == ByteType.CONTINUATION) { + value = value << ByteType.ContinuationByte.NUMBER_OF_VALUES_BITS; + value = value | type.getValue(nextByte); + } else { + value = bytesValue; + } + + System.out.printf("%s %3d %3d %-14s %14d\n", toBinary(nextByte), nextByte, bytesValue, type, value); + } + + } + } + + private static String toBinary(final int b) { + return String.format("%8s", Integer.toBinaryString(b)).replace(" ", "0"); + } + +} diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbReader.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbReader.java index 037cddc..d7f48bf 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbReader.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbReader.java @@ -28,6 +28,10 @@ class PdbReader implements AutoCloseable { private final PdbFile pdbFile; public PdbReader(final PdbFile pdbFile) throws ReadException { + this(pdbFile, true); + } + + PdbReader(final PdbFile pdbFile, final boolean initialize) throws ReadException { super(); try { this.pdbFile = pdbFile; @@ -35,11 +39,12 @@ class PdbReader implements AutoCloseable { this.data = new BufferedInputStream(new FileInputStream(storageFile)); - init(); + if (initialize) { + init(); + } } catch (final FileNotFoundException e) { throw new ReadException(e); } - } private void init() { @@ -138,7 +143,7 @@ class PdbReader implements AutoCloseable { return readContinuationBytes(value); } - private int readNextByte() throws IOException { + int readNextByte() throws IOException { final int result; if (peekedByte == PEEK_NOT_SET) { @@ -151,7 +156,7 @@ class PdbReader implements AutoCloseable { return result; } - private int peekNextByte() throws IOException { + int peekNextByte() throws IOException { if (peekedByte == PEEK_NOT_SET) { peekedByte = data.read(); } @@ -160,7 +165,7 @@ class PdbReader implements AutoCloseable { private long readContinuationBytes(long value) throws IOException { int nextByte; - while ((nextByte = peekNextByte()) >= 0 && isContinuationByte(nextByte)) { + while ((nextByte = peekNextByte()) >= 0 && ByteType.CONTINUATION.isValid(nextByte)) { value = value << ByteType.ContinuationByte.NUMBER_OF_VALUES_BITS; value = value | (nextByte & ByteType.CONTINUATION.getValueBits()); readNextByte(); @@ -169,8 +174,4 @@ class PdbReader implements AutoCloseable { return value; } - private static boolean isContinuationByte(final int nextByte) { - final long bytePrefix = ByteType.CONTINUATION.getBytePrefix(); - return bytePrefix == (nextByte & bytePrefix); - } } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbWriter.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbWriter.java index d76d823..072a96b 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbWriter.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbWriter.java @@ -40,8 +40,8 @@ import org.lucares.pdb.api.Entry; *
  • version, start with 000001 *
  • number of entries up until this point in this file, 00001 *
  • date offsets with absolute values for epoch milli, start with 0001 - *
  • date increments to the previous date value, start with 001 - *
  • measurements, start with 01 + *
  • date increments to the previous date value, start with 01 + *
  • measurements, start with 001 *
  • continuation bytes, start with 1 * * @@ -60,12 +60,12 @@ import org.lucares.pdb.api.Entry; * used for the version number. 001 in our case. So the first byte looks like * this. 00001001 *

    - * The second byte starts with 0001 for date offsets, 001 for date increments - * and 01 for measurements. All continuation bytes start with 1. E.g. The + * The second byte starts with 0001 for date offsets, 01 for date increments and + * 001 for measurements. All continuation bytes start with 1. E.g. The * measurement 202 has the unsigned bit representation 11001010. The first byte - * of a measurement value starts with 01, so we have room for the first 6 bits. + * of a measurement value starts with 001, so we have room for the first 5 bits. * But we need 8 bits. So we must add another byte. The second byte starts with - * 1 and has room for 7 bits. The result looks like this: 01000001 + * 1 and has room for 7 bits. The result looks like this: 00100001 * 11001010 */ class PdbWriter implements AutoCloseable, Flushable {