switch the byte prefix of DATE_INCREMENT and MEASUREMENT

Date increments have usually higher values. 
I had hoped to reduce the file size by a lot. But in my example data
with 44 million entries (real life data) it only reduced the storage 
size by 1.5%.
Also fixed a bug in PdbReader that prevented other values for the 
CONTINUATION byte.
Also added a small testing tool that prints the content of a pdb file.
It is not (yet) made available as standalone tool, but during
debugging sessions it is very useful.
This commit is contained in:
2017-04-13 20:19:29 +02:00
parent 1163c1ca22
commit f22be73b42
4 changed files with 65 additions and 18 deletions

View File

@@ -4,9 +4,9 @@ enum ByteType {
CONTINUATION(ContinuationByte.CONTINUATION_BYTE_PREFIX), // 10000000
MEASUREMENT(1 << 6), // 01000000
DATE_INCREMENT(1 << 6), // 01000000
DATE_INCREMENT(1 << 5), // 00100000
MEASUREMENT(1 << 5), // 00100000
DATE_OFFSET(1 << 4), // 00010000
@@ -64,7 +64,7 @@ enum ByteType {
return firstBytePrefix == (theByte & prefixBits);
}
private long getPrefixBits() {
public long getPrefixBits() {
return (~getValueBits()) & 0xff;
}
@@ -77,4 +77,8 @@ enum ByteType {
}
return null;
}
public long getValue(final int aByte) {
return aByte & getValueBits();
}
}

View File

@@ -0,0 +1,42 @@
package org.lucares.performance.db;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.lucares.pdb.api.Tags;
public class PdbFileViewer {
private static final Tags TAGS = Tags.create();
public static void main(final String[] args) throws FileNotFoundException, IOException {
final File file = new File(args[0]);
final PdbFile pdbFile = new PdbFile(file.toPath(), TAGS);
try (final PdbReader reader = new PdbReader(pdbFile, false)) {
long value = 0;
int nextByte;
while ((nextByte = reader.readNextByte()) >= 0) {
final ByteType type = ByteType.getType(nextByte);
final long bytesValue = type.getValue(nextByte);
if (type == ByteType.CONTINUATION) {
value = value << ByteType.ContinuationByte.NUMBER_OF_VALUES_BITS;
value = value | type.getValue(nextByte);
} else {
value = bytesValue;
}
System.out.printf("%s %3d %3d %-14s %14d\n", toBinary(nextByte), nextByte, bytesValue, type, value);
}
}
}
private static String toBinary(final int b) {
return String.format("%8s", Integer.toBinaryString(b)).replace(" ", "0");
}
}

View File

@@ -28,6 +28,10 @@ class PdbReader implements AutoCloseable {
private final PdbFile pdbFile;
public PdbReader(final PdbFile pdbFile) throws ReadException {
this(pdbFile, true);
}
PdbReader(final PdbFile pdbFile, final boolean initialize) throws ReadException {
super();
try {
this.pdbFile = pdbFile;
@@ -35,11 +39,12 @@ class PdbReader implements AutoCloseable {
this.data = new BufferedInputStream(new FileInputStream(storageFile));
init();
if (initialize) {
init();
}
} catch (final FileNotFoundException e) {
throw new ReadException(e);
}
}
private void init() {
@@ -138,7 +143,7 @@ class PdbReader implements AutoCloseable {
return readContinuationBytes(value);
}
private int readNextByte() throws IOException {
int readNextByte() throws IOException {
final int result;
if (peekedByte == PEEK_NOT_SET) {
@@ -151,7 +156,7 @@ class PdbReader implements AutoCloseable {
return result;
}
private int peekNextByte() throws IOException {
int peekNextByte() throws IOException {
if (peekedByte == PEEK_NOT_SET) {
peekedByte = data.read();
}
@@ -160,7 +165,7 @@ class PdbReader implements AutoCloseable {
private long readContinuationBytes(long value) throws IOException {
int nextByte;
while ((nextByte = peekNextByte()) >= 0 && isContinuationByte(nextByte)) {
while ((nextByte = peekNextByte()) >= 0 && ByteType.CONTINUATION.isValid(nextByte)) {
value = value << ByteType.ContinuationByte.NUMBER_OF_VALUES_BITS;
value = value | (nextByte & ByteType.CONTINUATION.getValueBits());
readNextByte();
@@ -169,8 +174,4 @@ class PdbReader implements AutoCloseable {
return value;
}
private static boolean isContinuationByte(final int nextByte) {
final long bytePrefix = ByteType.CONTINUATION.getBytePrefix();
return bytePrefix == (nextByte & bytePrefix);
}
}

View File

@@ -40,8 +40,8 @@ import org.lucares.pdb.api.Entry;
* <li>version, start with 000001
* <li>number of entries up until this point in this file, 00001
* <li>date offsets with absolute values for epoch milli, start with 0001
* <li>date increments to the previous date value, start with 001
* <li>measurements, start with 01
* <li>date increments to the previous date value, start with 01
* <li>measurements, start with 001
* <li>continuation bytes, start with 1
* </ol>
*
@@ -60,12 +60,12 @@ import org.lucares.pdb.api.Entry;
* used for the version number. 001 in our case. So the first byte looks like
* this. 00001001
* <p>
* The second byte starts with 0001 for date offsets, 001 for date increments
* and 01 for measurements. All continuation bytes start with 1. E.g. The
* The second byte starts with 0001 for date offsets, 01 for date increments and
* 001 for measurements. All continuation bytes start with 1. E.g. The
* measurement 202 has the unsigned bit representation 11001010. The first byte
* of a measurement value starts with 01, so we have room for the first 6 bits.
* of a measurement value starts with 001, so we have room for the first 5 bits.
* But we need 8 bits. So we must add another byte. The second byte starts with
* 1 and has room for 7 bits. The result looks like this: <b>01</b><i>000001<i>
* 1 and has room for 7 bits. The result looks like this: <b>001</b><i>00001<i>
* <b>1</b><i>1001010</i>
*/
class PdbWriter implements AutoCloseable, Flushable {