switch the byte prefix of DATE_INCREMENT and MEASUREMENT
Date increments have usually higher values. I had hoped to reduce the file size by a lot. But in my example data with 44 million entries (real life data) it only reduced the storage size by 1.5%. Also fixed a bug in PdbReader that prevented other values for the CONTINUATION byte. Also added a small testing tool that prints the content of a pdb file. It is not (yet) made available as standalone tool, but during debugging sessions it is very useful.
This commit is contained in:
@@ -4,9 +4,9 @@ enum ByteType {
|
|||||||
|
|
||||||
CONTINUATION(ContinuationByte.CONTINUATION_BYTE_PREFIX), // 10000000
|
CONTINUATION(ContinuationByte.CONTINUATION_BYTE_PREFIX), // 10000000
|
||||||
|
|
||||||
MEASUREMENT(1 << 6), // 01000000
|
DATE_INCREMENT(1 << 6), // 01000000
|
||||||
|
|
||||||
DATE_INCREMENT(1 << 5), // 00100000
|
MEASUREMENT(1 << 5), // 00100000
|
||||||
|
|
||||||
DATE_OFFSET(1 << 4), // 00010000
|
DATE_OFFSET(1 << 4), // 00010000
|
||||||
|
|
||||||
@@ -64,7 +64,7 @@ enum ByteType {
|
|||||||
return firstBytePrefix == (theByte & prefixBits);
|
return firstBytePrefix == (theByte & prefixBits);
|
||||||
}
|
}
|
||||||
|
|
||||||
private long getPrefixBits() {
|
public long getPrefixBits() {
|
||||||
return (~getValueBits()) & 0xff;
|
return (~getValueBits()) & 0xff;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -77,4 +77,8 @@ enum ByteType {
|
|||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long getValue(final int aByte) {
|
||||||
|
return aByte & getValueBits();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
package org.lucares.performance.db;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.lucares.pdb.api.Tags;
|
||||||
|
|
||||||
|
public class PdbFileViewer {
|
||||||
|
private static final Tags TAGS = Tags.create();
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws FileNotFoundException, IOException {
|
||||||
|
final File file = new File(args[0]);
|
||||||
|
final PdbFile pdbFile = new PdbFile(file.toPath(), TAGS);
|
||||||
|
|
||||||
|
try (final PdbReader reader = new PdbReader(pdbFile, false)) {
|
||||||
|
|
||||||
|
long value = 0;
|
||||||
|
int nextByte;
|
||||||
|
while ((nextByte = reader.readNextByte()) >= 0) {
|
||||||
|
|
||||||
|
final ByteType type = ByteType.getType(nextByte);
|
||||||
|
final long bytesValue = type.getValue(nextByte);
|
||||||
|
|
||||||
|
if (type == ByteType.CONTINUATION) {
|
||||||
|
value = value << ByteType.ContinuationByte.NUMBER_OF_VALUES_BITS;
|
||||||
|
value = value | type.getValue(nextByte);
|
||||||
|
} else {
|
||||||
|
value = bytesValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.printf("%s %3d %3d %-14s %14d\n", toBinary(nextByte), nextByte, bytesValue, type, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String toBinary(final int b) {
|
||||||
|
return String.format("%8s", Integer.toBinaryString(b)).replace(" ", "0");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -28,6 +28,10 @@ class PdbReader implements AutoCloseable {
|
|||||||
private final PdbFile pdbFile;
|
private final PdbFile pdbFile;
|
||||||
|
|
||||||
public PdbReader(final PdbFile pdbFile) throws ReadException {
|
public PdbReader(final PdbFile pdbFile) throws ReadException {
|
||||||
|
this(pdbFile, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
PdbReader(final PdbFile pdbFile, final boolean initialize) throws ReadException {
|
||||||
super();
|
super();
|
||||||
try {
|
try {
|
||||||
this.pdbFile = pdbFile;
|
this.pdbFile = pdbFile;
|
||||||
@@ -35,11 +39,12 @@ class PdbReader implements AutoCloseable {
|
|||||||
|
|
||||||
this.data = new BufferedInputStream(new FileInputStream(storageFile));
|
this.data = new BufferedInputStream(new FileInputStream(storageFile));
|
||||||
|
|
||||||
|
if (initialize) {
|
||||||
init();
|
init();
|
||||||
|
}
|
||||||
} catch (final FileNotFoundException e) {
|
} catch (final FileNotFoundException e) {
|
||||||
throw new ReadException(e);
|
throw new ReadException(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void init() {
|
private void init() {
|
||||||
@@ -138,7 +143,7 @@ class PdbReader implements AutoCloseable {
|
|||||||
return readContinuationBytes(value);
|
return readContinuationBytes(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int readNextByte() throws IOException {
|
int readNextByte() throws IOException {
|
||||||
|
|
||||||
final int result;
|
final int result;
|
||||||
if (peekedByte == PEEK_NOT_SET) {
|
if (peekedByte == PEEK_NOT_SET) {
|
||||||
@@ -151,7 +156,7 @@ class PdbReader implements AutoCloseable {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int peekNextByte() throws IOException {
|
int peekNextByte() throws IOException {
|
||||||
if (peekedByte == PEEK_NOT_SET) {
|
if (peekedByte == PEEK_NOT_SET) {
|
||||||
peekedByte = data.read();
|
peekedByte = data.read();
|
||||||
}
|
}
|
||||||
@@ -160,7 +165,7 @@ class PdbReader implements AutoCloseable {
|
|||||||
|
|
||||||
private long readContinuationBytes(long value) throws IOException {
|
private long readContinuationBytes(long value) throws IOException {
|
||||||
int nextByte;
|
int nextByte;
|
||||||
while ((nextByte = peekNextByte()) >= 0 && isContinuationByte(nextByte)) {
|
while ((nextByte = peekNextByte()) >= 0 && ByteType.CONTINUATION.isValid(nextByte)) {
|
||||||
value = value << ByteType.ContinuationByte.NUMBER_OF_VALUES_BITS;
|
value = value << ByteType.ContinuationByte.NUMBER_OF_VALUES_BITS;
|
||||||
value = value | (nextByte & ByteType.CONTINUATION.getValueBits());
|
value = value | (nextByte & ByteType.CONTINUATION.getValueBits());
|
||||||
readNextByte();
|
readNextByte();
|
||||||
@@ -169,8 +174,4 @@ class PdbReader implements AutoCloseable {
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isContinuationByte(final int nextByte) {
|
|
||||||
final long bytePrefix = ByteType.CONTINUATION.getBytePrefix();
|
|
||||||
return bytePrefix == (nextByte & bytePrefix);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,8 +40,8 @@ import org.lucares.pdb.api.Entry;
|
|||||||
* <li>version, start with 000001
|
* <li>version, start with 000001
|
||||||
* <li>number of entries up until this point in this file, 00001
|
* <li>number of entries up until this point in this file, 00001
|
||||||
* <li>date offsets with absolute values for epoch milli, start with 0001
|
* <li>date offsets with absolute values for epoch milli, start with 0001
|
||||||
* <li>date increments to the previous date value, start with 001
|
* <li>date increments to the previous date value, start with 01
|
||||||
* <li>measurements, start with 01
|
* <li>measurements, start with 001
|
||||||
* <li>continuation bytes, start with 1
|
* <li>continuation bytes, start with 1
|
||||||
* </ol>
|
* </ol>
|
||||||
*
|
*
|
||||||
@@ -60,12 +60,12 @@ import org.lucares.pdb.api.Entry;
|
|||||||
* used for the version number. 001 in our case. So the first byte looks like
|
* used for the version number. 001 in our case. So the first byte looks like
|
||||||
* this. 00001001
|
* this. 00001001
|
||||||
* <p>
|
* <p>
|
||||||
* The second byte starts with 0001 for date offsets, 001 for date increments
|
* The second byte starts with 0001 for date offsets, 01 for date increments and
|
||||||
* and 01 for measurements. All continuation bytes start with 1. E.g. The
|
* 001 for measurements. All continuation bytes start with 1. E.g. The
|
||||||
* measurement 202 has the unsigned bit representation 11001010. The first byte
|
* measurement 202 has the unsigned bit representation 11001010. The first byte
|
||||||
* of a measurement value starts with 01, so we have room for the first 6 bits.
|
* of a measurement value starts with 001, so we have room for the first 5 bits.
|
||||||
* But we need 8 bits. So we must add another byte. The second byte starts with
|
* But we need 8 bits. So we must add another byte. The second byte starts with
|
||||||
* 1 and has room for 7 bits. The result looks like this: <b>01</b><i>000001<i>
|
* 1 and has room for 7 bits. The result looks like this: <b>001</b><i>00001<i>
|
||||||
* <b>1</b><i>1001010</i>
|
* <b>1</b><i>1001010</i>
|
||||||
*/
|
*/
|
||||||
class PdbWriter implements AutoCloseable, Flushable {
|
class PdbWriter implements AutoCloseable, Flushable {
|
||||||
|
|||||||
Reference in New Issue
Block a user