switch the byte prefix of DATE_INCREMENT and MEASUREMENT
Date increments have usually higher values. I had hoped to reduce the file size by a lot. But in my example data with 44 million entries (real life data) it only reduced the storage size by 1.5%. Also fixed a bug in PdbReader that prevented other values for the CONTINUATION byte. Also added a small testing tool that prints the content of a pdb file. It is not (yet) made available as standalone tool, but during debugging sessions it is very useful.
This commit is contained in:
@@ -4,9 +4,9 @@ enum ByteType {
|
||||
|
||||
CONTINUATION(ContinuationByte.CONTINUATION_BYTE_PREFIX), // 10000000
|
||||
|
||||
MEASUREMENT(1 << 6), // 01000000
|
||||
DATE_INCREMENT(1 << 6), // 01000000
|
||||
|
||||
DATE_INCREMENT(1 << 5), // 00100000
|
||||
MEASUREMENT(1 << 5), // 00100000
|
||||
|
||||
DATE_OFFSET(1 << 4), // 00010000
|
||||
|
||||
@@ -64,7 +64,7 @@ enum ByteType {
|
||||
return firstBytePrefix == (theByte & prefixBits);
|
||||
}
|
||||
|
||||
private long getPrefixBits() {
|
||||
public long getPrefixBits() {
|
||||
return (~getValueBits()) & 0xff;
|
||||
}
|
||||
|
||||
@@ -77,4 +77,8 @@ enum ByteType {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public long getValue(final int aByte) {
|
||||
return aByte & getValueBits();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
package org.lucares.performance.db;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.lucares.pdb.api.Tags;
|
||||
|
||||
public class PdbFileViewer {
|
||||
private static final Tags TAGS = Tags.create();
|
||||
|
||||
public static void main(final String[] args) throws FileNotFoundException, IOException {
|
||||
final File file = new File(args[0]);
|
||||
final PdbFile pdbFile = new PdbFile(file.toPath(), TAGS);
|
||||
|
||||
try (final PdbReader reader = new PdbReader(pdbFile, false)) {
|
||||
|
||||
long value = 0;
|
||||
int nextByte;
|
||||
while ((nextByte = reader.readNextByte()) >= 0) {
|
||||
|
||||
final ByteType type = ByteType.getType(nextByte);
|
||||
final long bytesValue = type.getValue(nextByte);
|
||||
|
||||
if (type == ByteType.CONTINUATION) {
|
||||
value = value << ByteType.ContinuationByte.NUMBER_OF_VALUES_BITS;
|
||||
value = value | type.getValue(nextByte);
|
||||
} else {
|
||||
value = bytesValue;
|
||||
}
|
||||
|
||||
System.out.printf("%s %3d %3d %-14s %14d\n", toBinary(nextByte), nextByte, bytesValue, type, value);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private static String toBinary(final int b) {
|
||||
return String.format("%8s", Integer.toBinaryString(b)).replace(" ", "0");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -28,6 +28,10 @@ class PdbReader implements AutoCloseable {
|
||||
private final PdbFile pdbFile;
|
||||
|
||||
public PdbReader(final PdbFile pdbFile) throws ReadException {
|
||||
this(pdbFile, true);
|
||||
}
|
||||
|
||||
PdbReader(final PdbFile pdbFile, final boolean initialize) throws ReadException {
|
||||
super();
|
||||
try {
|
||||
this.pdbFile = pdbFile;
|
||||
@@ -35,11 +39,12 @@ class PdbReader implements AutoCloseable {
|
||||
|
||||
this.data = new BufferedInputStream(new FileInputStream(storageFile));
|
||||
|
||||
if (initialize) {
|
||||
init();
|
||||
}
|
||||
} catch (final FileNotFoundException e) {
|
||||
throw new ReadException(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void init() {
|
||||
@@ -138,7 +143,7 @@ class PdbReader implements AutoCloseable {
|
||||
return readContinuationBytes(value);
|
||||
}
|
||||
|
||||
private int readNextByte() throws IOException {
|
||||
int readNextByte() throws IOException {
|
||||
|
||||
final int result;
|
||||
if (peekedByte == PEEK_NOT_SET) {
|
||||
@@ -151,7 +156,7 @@ class PdbReader implements AutoCloseable {
|
||||
return result;
|
||||
}
|
||||
|
||||
private int peekNextByte() throws IOException {
|
||||
int peekNextByte() throws IOException {
|
||||
if (peekedByte == PEEK_NOT_SET) {
|
||||
peekedByte = data.read();
|
||||
}
|
||||
@@ -160,7 +165,7 @@ class PdbReader implements AutoCloseable {
|
||||
|
||||
private long readContinuationBytes(long value) throws IOException {
|
||||
int nextByte;
|
||||
while ((nextByte = peekNextByte()) >= 0 && isContinuationByte(nextByte)) {
|
||||
while ((nextByte = peekNextByte()) >= 0 && ByteType.CONTINUATION.isValid(nextByte)) {
|
||||
value = value << ByteType.ContinuationByte.NUMBER_OF_VALUES_BITS;
|
||||
value = value | (nextByte & ByteType.CONTINUATION.getValueBits());
|
||||
readNextByte();
|
||||
@@ -169,8 +174,4 @@ class PdbReader implements AutoCloseable {
|
||||
return value;
|
||||
}
|
||||
|
||||
private static boolean isContinuationByte(final int nextByte) {
|
||||
final long bytePrefix = ByteType.CONTINUATION.getBytePrefix();
|
||||
return bytePrefix == (nextByte & bytePrefix);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,8 +40,8 @@ import org.lucares.pdb.api.Entry;
|
||||
* <li>version, start with 000001
|
||||
* <li>number of entries up until this point in this file, 00001
|
||||
* <li>date offsets with absolute values for epoch milli, start with 0001
|
||||
* <li>date increments to the previous date value, start with 001
|
||||
* <li>measurements, start with 01
|
||||
* <li>date increments to the previous date value, start with 01
|
||||
* <li>measurements, start with 001
|
||||
* <li>continuation bytes, start with 1
|
||||
* </ol>
|
||||
*
|
||||
@@ -60,12 +60,12 @@ import org.lucares.pdb.api.Entry;
|
||||
* used for the version number. 001 in our case. So the first byte looks like
|
||||
* this. 00001001
|
||||
* <p>
|
||||
* The second byte starts with 0001 for date offsets, 001 for date increments
|
||||
* and 01 for measurements. All continuation bytes start with 1. E.g. The
|
||||
* The second byte starts with 0001 for date offsets, 01 for date increments and
|
||||
* 001 for measurements. All continuation bytes start with 1. E.g. The
|
||||
* measurement 202 has the unsigned bit representation 11001010. The first byte
|
||||
* of a measurement value starts with 01, so we have room for the first 6 bits.
|
||||
* of a measurement value starts with 001, so we have room for the first 5 bits.
|
||||
* But we need 8 bits. So we must add another byte. The second byte starts with
|
||||
* 1 and has room for 7 bits. The result looks like this: <b>01</b><i>000001<i>
|
||||
* 1 and has room for 7 bits. The result looks like this: <b>001</b><i>00001<i>
|
||||
* <b>1</b><i>1001010</i>
|
||||
*/
|
||||
class PdbWriter implements AutoCloseable, Flushable {
|
||||
|
||||
Reference in New Issue
Block a user