From db0b3d6d24e2fcba53e7429e9266b367ed0290db Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 27 Dec 2016 10:24:56 +0100 Subject: [PATCH] new file format Store values in sequences of variable length. Instead of using 8 bytes per entry we are now using between 2 and 20 bytes. But we are also able to store every non-negative long value. --- .../main/java/org/lucares/pdb/api/Entry.java | 6 +- .../main/java/org/lucares/pdb/api/Tags.java | 4 +- performanceDb/.gitignore | 1 + .../org/lucares/performance/db/ByteType.java | 80 ++++++ .../performance/db/FileCorruptException.java | 10 + .../org/lucares/performance/db/PdbFile.java | 47 +--- .../performance/db/PdbFileByTimeAsc.java | 8 +- .../performance/db/PdbFileOffsetTime.java | 29 +++ .../lucares/performance/db/PdbFileUtils.java | 12 +- .../org/lucares/performance/db/PdbReader.java | 231 +++++++++--------- .../org/lucares/performance/db/PdbWriter.java | 169 ++++++++++--- .../lucares/performance/db/StorageUtils.java | 47 ++-- .../lucares/performance/db/TagsToFile.java | 35 ++- .../performance/db/BitFiddlingTest.java | 42 ++++ .../performance/db/PdbReaderWriterTest.java | 117 ++++----- .../performance/db/PdbWriterManagerTest.java | 2 +- .../performance/db/PerformanceDbTest.java | 5 +- .../performance/db/StorageUtilsTest.java | 16 +- .../performance/db/TagsToFilesTest.java | 11 +- 19 files changed, 522 insertions(+), 350 deletions(-) create mode 100644 performanceDb/src/main/java/org/lucares/performance/db/ByteType.java create mode 100644 performanceDb/src/main/java/org/lucares/performance/db/FileCorruptException.java create mode 100644 performanceDb/src/main/java/org/lucares/performance/db/PdbFileOffsetTime.java create mode 100644 performanceDb/src/test/java/org/lucares/performance/db/BitFiddlingTest.java diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Entry.java b/pdb-api/src/main/java/org/lucares/pdb/api/Entry.java index 28c5252..c714e89 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/Entry.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Entry.java @@ -13,8 +13,6 @@ public class Entry { */ public static final Entry POISON = new Entry(0, -1); - public static final long MAX_VALUE = 0xFF_FF_FF_FFL; - private final long epochMilli; private final long value; @@ -28,8 +26,8 @@ public class Entry { } public Entry(final long epochMilli, final long value, final Tags tags) { - if (value < 0 || value > MAX_VALUE) { - throw new IllegalArgumentException("value must be between 0 and " + MAX_VALUE + ", but was " + value); + if (value < 0) { + throw new IllegalArgumentException("value must be between 0 and " + Long.MAX_VALUE + ", but was " + value); } this.epochMilli = epochMilli; diff --git a/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java b/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java index 00be70c..a73a536 100644 --- a/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java +++ b/pdb-api/src/main/java/org/lucares/pdb/api/Tags.java @@ -10,7 +10,9 @@ import java.util.TreeSet; import java.util.function.BiConsumer; public class Tags { - static final Tags EMPTY = new Tags(); + // TODO @ahr move class to org.lcuares.performance.db and make this package + // private + public static final Tags EMPTY = new Tags(); private final Map tags; diff --git a/performanceDb/.gitignore b/performanceDb/.gitignore index 0e895c8..b2922e7 100644 --- a/performanceDb/.gitignore +++ b/performanceDb/.gitignore @@ -4,3 +4,4 @@ /.classpath /.project /test-output +/doc/ diff --git a/performanceDb/src/main/java/org/lucares/performance/db/ByteType.java b/performanceDb/src/main/java/org/lucares/performance/db/ByteType.java new file mode 100644 index 0000000..1581411 --- /dev/null +++ b/performanceDb/src/main/java/org/lucares/performance/db/ByteType.java @@ -0,0 +1,80 @@ +package org.lucares.performance.db; + +enum ByteType { + + CONTINUATION(ContinuationByte.CONTINUATION_BYTE_PREFIX), // 10000000 + + MEASUREMENT(1 << 6), // 01000000 + + DATE_INCREMENT(1 << 5), // 00100000 + + DATE_OFFSET(1 << 4), // 00010000 + + VERSION(1);// 00000001 + + interface ContinuationByte { + long NUMBER_OF_VALUES_BITS = 7; + + long CONTINUATION_BYTE_PREFIX = 1 << NUMBER_OF_VALUES_BITS; // 10000000 + } + + interface VersionByte { + /** + * The version uses at least two bytes. The first byte is the prefix + * which cannot hold any value (unless it is 0). And the second byte is + * the actual value. + */ + long MIN_LENGTH = 2; + } + + private final long firstBytePrefix; + + private ByteType(final long firstBytePrefix) { + this.firstBytePrefix = firstBytePrefix; + } + + public long getBytePrefix() { + return firstBytePrefix; + } + + /** + * the max value for the first byte is the prefix minus 1, because prefixes + * start with 0⋯010⋯0, so prefix -1 is 0⋯01⋯1 which is exactly the max value + * + * @return the maximum value for the first byte of this sequence + */ + public long getFirstByteMaxValue() { + return firstBytePrefix - 1; + } + + /** + * the value bits are the prefix minus 1, because prefixes start with + * 0⋯010⋯0, so prefix -1 is 0⋯01⋯1 which exactly represents the value bits. + * + * @return bitmap with the value bits set + */ + public long getValueBits() { + return firstBytePrefix - 1; + } + + public boolean isValid(final int theByte) { + + final long prefixBits = getPrefixBits(); + + return firstBytePrefix == (theByte & prefixBits); + } + + private long getPrefixBits() { + return (~getValueBits()) & 0xff; + } + + public static ByteType getType(final int aByte) { + + for (final ByteType byteType : values()) { + if (byteType.isValid(aByte)) { + return byteType; + } + } + return null; + } +} diff --git a/performanceDb/src/main/java/org/lucares/performance/db/FileCorruptException.java b/performanceDb/src/main/java/org/lucares/performance/db/FileCorruptException.java new file mode 100644 index 0000000..3abfd44 --- /dev/null +++ b/performanceDb/src/main/java/org/lucares/performance/db/FileCorruptException.java @@ -0,0 +1,10 @@ +package org.lucares.performance.db; + +public class FileCorruptException extends RuntimeException { + + private static final long serialVersionUID = -4194021585305770924L; + + public FileCorruptException(final String message) { + super(message); + } +} diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbFile.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbFile.java index 65a0dcc..c5219c0 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbFile.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbFile.java @@ -8,30 +8,17 @@ import org.lucares.pdb.api.Tags; class PdbFile { private final Tags tags; - private final Day day; - private final Path path; - private final long offsetInEpochMilli; - @Deprecated - public PdbFile(final Day day, final File file, final Tags tags) { - this.day = day; + public PdbFile(final File file, final Tags tags) { this.path = file.toPath(); this.tags = tags; - offsetInEpochMilli = day.getOffsetInEpochMilli(); } - public PdbFile(final Day day, final Path path, final Tags tags) { - this.day = day; + public PdbFile(final Path path, final Tags tags) { this.path = path; this.tags = tags; - offsetInEpochMilli = day.getOffsetInEpochMilli(); - } - - public static PdbFile today(final File file, final Tags tags) { - final Day day = new Day(); - return new PdbFile(day, file, tags); } public Tags getTags() { @@ -47,30 +34,15 @@ class PdbFile { return path; } - public Day getDay() { - return day; - } - - public TimeRange getTimeRange() { - - return day.toTimeRange(); - } - - public long getOffsetInEpochMilli() { - return offsetInEpochMilli; - } - @Override public String toString() { - return "PdbFile [" + path + " " + getTimeRange() + " " + tags + "]\n"; + return "PdbFile [" + path + " " + tags + "]\n"; } @Override public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + ((day == null) ? 0 : day.hashCode()); - result = prime * result + (int) (offsetInEpochMilli ^ (offsetInEpochMilli >>> 32)); result = prime * result + ((path == null) ? 0 : path.hashCode()); result = prime * result + ((tags == null) ? 0 : tags.hashCode()); return result; @@ -85,13 +57,6 @@ class PdbFile { if (getClass() != obj.getClass()) return false; final PdbFile other = (PdbFile) obj; - if (day == null) { - if (other.day != null) - return false; - } else if (!day.equals(other.day)) - return false; - if (offsetInEpochMilli != other.offsetInEpochMilli) - return false; if (path == null) { if (other.path != null) return false; @@ -104,4 +69,10 @@ class PdbFile { return false; return true; } + + public TimeRange getTimeRange() { + // TODO @ahr should return the minimal date that can be added + return null; + } + } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbFileByTimeAsc.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileByTimeAsc.java index 7a1c4ac..57fa754 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbFileByTimeAsc.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileByTimeAsc.java @@ -3,15 +3,15 @@ package org.lucares.performance.db; import java.time.OffsetDateTime; import java.util.Comparator; -public class PdbFileByTimeAsc implements Comparator { +public class PdbFileByTimeAsc implements Comparator { public static final PdbFileByTimeAsc INSTANCE = new PdbFileByTimeAsc(); @Override - public int compare(final PdbFile o1, final PdbFile o2) { + public int compare(final PdbFileOffsetTime o1, final PdbFileOffsetTime o2) { - final OffsetDateTime o1From = o1.getTimeRange().getFrom(); - final OffsetDateTime o2From = o2.getTimeRange().getFrom(); + final OffsetDateTime o1From = o1.getOffsetTime(); + final OffsetDateTime o2From = o2.getOffsetTime(); return o1From.compareTo(o2From); } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbFileOffsetTime.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileOffsetTime.java new file mode 100644 index 0000000..3ed4a12 --- /dev/null +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileOffsetTime.java @@ -0,0 +1,29 @@ +package org.lucares.performance.db; + +import java.time.OffsetDateTime; + +public class PdbFileOffsetTime { + private final PdbFile pdbFile; + + private final OffsetDateTime offsetTime; + + public PdbFileOffsetTime(final PdbFile pdbFile, final OffsetDateTime offsetTime) { + super(); + this.pdbFile = pdbFile; + this.offsetTime = offsetTime; + } + + public PdbFile getPdbFile() { + return pdbFile; + } + + public OffsetDateTime getOffsetTime() { + return offsetTime; + } + + @Override + public String toString() { + return "PdbFileOffsetTime [pdbFile=" + pdbFile + ", offsetTime=" + offsetTime + "]"; + } + +} diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbFileUtils.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileUtils.java index 2385fbe..de621fb 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbFileUtils.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbFileUtils.java @@ -5,17 +5,11 @@ import java.io.IOException; import java.time.OffsetDateTime; class PdbFileUtils { - static TimeRange getAvailableTimeRange(final PdbFile pdbFile) throws FileNotFoundException, IOException { + static OffsetDateTime dateOffset(final PdbFile pdbFile) throws FileNotFoundException, IOException { try (PdbReader reader = new PdbReader(pdbFile)) { - if (reader.canSeekTail(2)) { - reader.seekTail(2); - final OffsetDateTime lastWrittenDate = reader.readDate(); - - return new TimeRange(lastWrittenDate, pdbFile.getTimeRange().getTo()); - } else { - return pdbFile.getTimeRange(); - } + reader.seekToLastValue(); + return reader.getDateOffsetAtCurrentPosition(); } } } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbReader.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbReader.java index b4a37d1..ab83fb9 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbReader.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbReader.java @@ -1,12 +1,14 @@ package org.lucares.performance.db; +import java.io.BufferedInputStream; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.RandomAccessFile; +import java.io.InputStream; import java.time.Instant; import java.time.OffsetDateTime; -import java.time.ZoneOffset; +import java.time.ZoneId; import java.util.Optional; import org.lucares.pdb.api.Entry; @@ -14,134 +16,54 @@ import org.lucares.pdb.api.Tags; class PdbReader implements AutoCloseable { - private static final int BYTES_PER_VALUE = 4; - private final RandomAccessFile data; - private final byte[] buffer = new byte[BYTES_PER_VALUE]; - private final PdbFile pdbFile; + private static final int PEEK_NOT_SET = Integer.MIN_VALUE; + + static final long VERSION = 1; + + private final InputStream data; + private long dateOffsetAtCurrentLocation = 0; + private long index = 0; + private int peekedByte = PEEK_NOT_SET; public PdbReader(final PdbFile pdbFile) throws FileNotFoundException { super(); - this.pdbFile = pdbFile; - File storageFile = pdbFile.getPath().toFile(); - this.data = new RandomAccessFile(storageFile, "r"); + final File storageFile = pdbFile.getPath().toFile(); + + this.data = new BufferedInputStream(new FileInputStream(storageFile)); + + init(); } - /** - * Reads the next value. - *

- * All values are non-negative. A negative return value indicates that the - * end of the file has been reached - * - * @return the value or -1 if end of stream has been reached - */ - public long readValue() { - return read(); - } - - /** - * Reads the next date value. - * - * @return the date, or {@code -1} if end of stream has been reached - * @throws IOException - */ - public long readEpochMilli() { - final long value = read(); - if (value < 0) { - return -1; - } - return pdbFile.getOffsetInEpochMilli() + value; - } - - public OffsetDateTime readDate() { - final long epochMilli = readEpochMilli(); - - if (epochMilli < 0) { - return null; - } - return Instant.ofEpochMilli(epochMilli).atOffset(ZoneOffset.UTC); - } - - // visible for test - long read() { + private void init() { try { - final int read = data.read(buffer); - - if (read != BYTES_PER_VALUE) { - if (read < 0) { - return -1; - } else { - throw new IllegalStateException("invalid file"); - } + final long version = readValue(ByteType.VERSION); + if (version == -1) { + throw new IllegalStateException("Cannot read empty file. The file must have at least a version. " + + "Otherwise we don't know in which version a writer might append data."); + } else if (version != VERSION) { + throw new IllegalStateException( + "The file is not of version " + VERSION + ". Actual version: " + version); } - return BitFiddling.makeLong(buffer[0], buffer[1], buffer[2], buffer[3]); } catch (final IOException e) { - throw new ReadRuntimeException(e); - } - } - - /** - * Seek to the n-th value. - * - * @param n - */ - public void seek(final long n) { - try { - if (n < 0) { - throw new IllegalArgumentException("n must be non-negative, but was " + n); - } - - final long pos = n * BYTES_PER_VALUE; - - if (pos >= data.length()) { - throw new IllegalArgumentException("cannot seek to value " + n + ", because the file only has " - + (data.length() / BYTES_PER_VALUE) + " values"); - } - - data.seek(pos); - } catch (final IOException e) { - throw new ReadRuntimeException(e); + throw new ReadException(e); } } /** * Seek to the end of the file. - *

- * After this operation you can read {@code n} values. * - * @param n - * seek to the n-th last value * @throws ReadRuntimeException - * if the file does not have {@code n} entries + * if an IOException occurs */ - public void seekTail(final long n) { - try { - if (n < 0) { - throw new IllegalArgumentException("n must be non-negative, but was " + n); - } + public void seekToLastValue() { - final long pos = computeSeekPosition(n); - - data.seek(pos); - } catch (final IOException e) { - throw new ReadRuntimeException(e); + while (readEntry(Tags.EMPTY).isPresent()) { + // seek to the end + // TODO @ahr add date offsets every x kb, so we don't have + // to read the whole file } } - public boolean canSeekTail(final long n) { - try { - final long pos = computeSeekPosition(n); - return pos >= 0; - } catch (final IOException e) { - throw new ReadRuntimeException(e); - } - } - - private long computeSeekPosition(final long n) throws IOException { - final long length = data.length(); - final long pos = length - BYTES_PER_VALUE * n; - return pos; - } - @Override public void close() { try { @@ -152,21 +74,92 @@ class PdbReader implements AutoCloseable { } Entry readNullableEntry(final Tags tags) throws ReadRuntimeException { - final long epochMilli = readEpochMilli(); - if (epochMilli < 0) { - return null; - } - final long value = readValue(); + try { + final long epochMilliIncrement = readValue(ByteType.DATE_INCREMENT); + if (epochMilliIncrement < 0) { + return null; + } + final long epochMilli = dateOffsetAtCurrentLocation + epochMilliIncrement; + final long value = readValue(ByteType.MEASUREMENT); - if (value < 0) { - return null; + if (value < 0) { + return null; + } + dateOffsetAtCurrentLocation = epochMilli; + return new Entry(epochMilli, value, tags); + } catch (final IOException e) { + throw new ReadException(e); } - return new Entry(epochMilli, value, tags); } public Optional readEntry(final Tags tags) throws ReadRuntimeException { - return Optional.ofNullable(readNullableEntry(tags)); + final Entry entry = readNullableEntry(tags); + return Optional.ofNullable(entry); + } + + public OffsetDateTime getDateOffsetAtCurrentPosition() { + return OffsetDateTime.ofInstant(Instant.ofEpochMilli(dateOffsetAtCurrentLocation), ZoneId.of("UTC")); + } + + public long readValue(final ByteType byteType) throws IOException { + + final long firstByteValueBits = byteType.getValueBits(); + + int firstByte = readNextByte(); + + if (!byteType.isValid(firstByte)) { + if (firstByte < 0) { + return -1; + } else if (ByteType.DATE_OFFSET.isValid(firstByte)) { + final long dateOffsetInit = firstByte & ByteType.DATE_OFFSET.getValueBits(); + this.dateOffsetAtCurrentLocation = readContinuationBytes(dateOffsetInit); + firstByte = readNextByte(); + } else { + throw new FileCorruptException( + "File corrupt at " + index + ". Byte type was " + ByteType.getType(firstByte)); + } + } + + final long value = firstByte & firstByteValueBits; + + return readContinuationBytes(value); + } + + private int readNextByte() throws IOException { + + final int result; + if (peekedByte == PEEK_NOT_SET) { + result = data.read(); + } else { + result = peekedByte; + peekedByte = PEEK_NOT_SET; + } + index++; + return result; + } + + private int peekNextByte() throws IOException { + if (peekedByte == PEEK_NOT_SET) { + peekedByte = data.read(); + } + return peekedByte; + } + + private long readContinuationBytes(long value) throws IOException { + int nextByte; + while ((nextByte = peekNextByte()) >= 0 && isContinuationByte(nextByte)) { + value = value << ByteType.ContinuationByte.NUMBER_OF_VALUES_BITS; + value = value | (nextByte & ByteType.CONTINUATION.getValueBits()); + readNextByte(); + } + + return value; + } + + private static boolean isContinuationByte(final int nextByte) { + final long bytePrefix = ByteType.CONTINUATION.getBytePrefix(); + return bytePrefix == (nextByte & bytePrefix); } } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/PdbWriter.java b/performanceDb/src/main/java/org/lucares/performance/db/PdbWriter.java index 6c3bdd2..63cc1d5 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/PdbWriter.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/PdbWriter.java @@ -5,18 +5,82 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.time.OffsetDateTime; import java.util.logging.Logger; import org.lucares.pdb.api.Entry; +/** + * File format description: + *

+ * We store non-negative long values for epoch milli and a measurement (usually + * duration in ms). Both values are stored as pairs, so that we get + * date-measurement-date-measurement-date... . The date values are stored as + * difference to the previous date. Every few kilobytes we add an absolute + * offset, so that we can synchronize and don't have to read the whole file when + * we want to append. + * + *

+ * For example we want to store the following values: + * + *

+ * 2009-02-45T12:31:30.30+0100 123 the date is 1234567890 in epoch millis
+ * 2009-02-45T01:06:39.39+0100 456 the date is 1234569999 in epoch millis
+ * 
+ *

+ * We would first store the offset 1234567890, then die first pair. The date is + * stored as the offset to the last value (which was the offset), so it is 0. + * Then we store the measurement. Next we store the second pair. The date + * difference is 2109 and the measurement is 456. + *

+ * Each value is stored with a variable length byte sequence. The idea is + * similar to the encoding of UTF-8. But we differentiate between several + * different types of values. + *

    + *
  1. version, start with 000001 + *
  2. number of entries up until this point in this file, 00001 + *
  3. date offsets with absolute values for epoch milli, start with 0001 + *
  4. date increments to the previous date value, start with 001 + *
  5. measurements, start with 01 + *
  6. continuation bytes, start with 1 + *
+ * + * This is different from UTF-8. We do not encode the number of continuation + * bytes. Therefore we loose UTF-8's self validation feature and we cannot skip + * to the next value without reading all continuation bytes. But it is a little + * bit more efficient, because each continuation byte can store 7 bit instead of + * 6. A four byte sequence in UTF-8 can store 21 bits whereas a four byte + * sequence in this scheme stores 27 bits for values and 26 bits for date + * increments. But it is not as efficent for one byte sequences. On the other + * hand we also encode five different value types. + *

+ * The encoding looks as follows: + *

+ * The first byte starts with 00001 for meta-data. The three remaining bits are + * used for the version number. 001 in our case. So the first byte looks like + * this. 00001001 + *

+ * The second byte starts with 0001 for date offsets, 001 for date increments + * and 01 for measurements. All continuation bytes start with 1. E.g. The + * measurement 202 has the unsigned bit representation 11001010. The first byte + * of a measurement value starts with 01, so we have room for the first 6 bits. + * But we need 8 bits. So we must add another byte. The second byte starts with + * 1 and has room for 7 bits. The result looks like this: 01000001 + * 11001010 + */ class PdbWriter implements AutoCloseable { private final static Logger LOGGER = Logger.getLogger(PdbWriter.class.getCanonicalName()); private static final boolean APPEND = true; + + private static final int MAX_BYTES_PER_VALUE = 10; + + private final byte[] buffer = new byte[MAX_BYTES_PER_VALUE]; + private final OutputStream outputStream; private final PdbFile pdbFile; - private long minimalEpochMilli; + private long lastEpochMilli; PdbWriter(final PdbFile pdbFile) throws IOException { this.pdbFile = pdbFile; @@ -24,10 +88,15 @@ class PdbWriter implements AutoCloseable { this.outputStream = new BufferedOutputStream(new FileOutputStream(storageFile, APPEND)); if (storageFile.exists() && storageFile.length() > 0) { - final TimeRange availableTimeRange = PdbFileUtils.getAvailableTimeRange(pdbFile); - minimalEpochMilli = availableTimeRange.getFrom().toInstant().toEpochMilli(); + // TODO @ahr check version + + final OffsetDateTime dateOffset = PdbFileUtils.dateOffset(pdbFile); + lastEpochMilli = dateOffset.toInstant().toEpochMilli(); } else { - minimalEpochMilli = pdbFile.getTimeRange().getFrom().toInstant().toEpochMilli(); + writeValue(PdbReader.VERSION, ByteType.VERSION, outputStream); + writeValue(0, ByteType.DATE_OFFSET, outputStream); + + lastEpochMilli = 0; } } @@ -36,24 +105,38 @@ class PdbWriter implements AutoCloseable { } public void write(final Entry entry) throws WriteException { - write(entry.getEpochMilli(), entry.getValue()); + final long epochMilli = entry.getEpochMilli(); + final long value = entry.getValue(); + write(epochMilli, value); } private void write(final long epochMilli, final long value) throws WriteException { - final long offsetEpochMilli = pdbFile.getOffsetInEpochMilli(); - final long adjustedValue = epochMilli - offsetEpochMilli; - assertValueInRange(adjustedValue); - assertValueInRange(value); - assertEpochMilliInRange(epochMilli); + try { - write(adjustedValue); - write(value); - minimalEpochMilli = epochMilli; + if (epochMilli < lastEpochMilli) { + LOGGER.info("epochMilli must not be smaller than " + lastEpochMilli + ", but was " + epochMilli + + ". We'll accept this for now. " + + "Currently there is no code that relies on monotonically increasing date values. " + + "Log4j does not guarantee it either."); + return; + } + + final long epochMilliIncrement = epochMilli - lastEpochMilli; + assertValueInRange(epochMilliIncrement); + assertValueInRange(value); + assertEpochMilliInRange(epochMilli); + + writeValue(epochMilliIncrement, ByteType.DATE_INCREMENT, outputStream); + writeValue(value, ByteType.MEASUREMENT, outputStream); + lastEpochMilli = epochMilli; + } catch (final IOException e) { + throw new WriteException(e); + } } private void assertEpochMilliInRange(final long epochMilli) { - if (epochMilli < minimalEpochMilli) { - LOGGER.fine("epochMilli must not be smaller than " + minimalEpochMilli + ", but was " + epochMilli + if (epochMilli < lastEpochMilli) { + LOGGER.info("epochMilli must not be smaller than " + lastEpochMilli + ", but was " + epochMilli + ". We'll accept this for now. " + "Currently there is no code that relies on monotonically increasing date values. " + "Log4j does not guarantee it either."); @@ -64,30 +147,6 @@ class PdbWriter implements AutoCloseable { if (value < 0) { throw new IllegalArgumentException("value must not be negative: " + value); } - if (value > Entry.MAX_VALUE) { - throw new IllegalArgumentException("max value is " + Entry.MAX_VALUE + " value was: " + value); - } - } - - // visible for test - void write(final long value) throws WriteException { - assertValueInRange(value); - try { - outputStream.write(BitFiddling.long3(value)); - outputStream.write(BitFiddling.long2(value)); - outputStream.write(BitFiddling.long1(value)); - outputStream.write(BitFiddling.long0(value)); - } catch (final IOException e) { - throw new WriteException(e); - } - } - - public static void writeEntry(final PdbFile pdbFile, final Entry... entries) throws IOException { - try (PdbWriter writer = new PdbWriter(pdbFile)) { - for (final Entry entry : entries) { - writer.write(entry); - } - } } @Override @@ -99,4 +158,36 @@ class PdbWriter implements AutoCloseable { public void flush() throws IOException { outputStream.flush(); } + + public void writeValue(final long value, final ByteType byteSequenceType, final OutputStream output) + throws IOException { + + int index = buffer.length - 1; + + final long maxFirstByteValue = byteSequenceType.getFirstByteMaxValue(); + long val = value; + while (val > maxFirstByteValue) { + // handles continuation bytes + buffer[index] = (byte) ((val & ByteType.CONTINUATION.getValueBits()) + | ByteType.CONTINUATION.getBytePrefix()); + index--; + val = val >> ByteType.ContinuationByte.NUMBER_OF_VALUES_BITS; + } + + buffer[index] = (byte) (val | byteSequenceType.getBytePrefix()); + + output.write(buffer, index, buffer.length - index); + } + + public static void writeEntry(final PdbFile pdbFile, final Entry... entries) throws IOException { + try (PdbWriter writer = new PdbWriter(pdbFile)) { + for (final Entry entry : entries) { + writer.write(entry); + } + } + } + + public static void init(final PdbFile result) throws IOException { + writeEntry(result); + } } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/StorageUtils.java b/performanceDb/src/main/java/org/lucares/performance/db/StorageUtils.java index 1e47d78..480da84 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/StorageUtils.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/StorageUtils.java @@ -7,32 +7,34 @@ import org.lucares.pdb.api.Tags; public class StorageUtils { - public static Path createStorageFile(final Path tagSpecificStorageFolder, final Day day) { + public static Path createStorageFile(final Path tagSpecificStorageFolder) { - final Path dateSpecificFolder = tagSpecificStorageFolder.resolve(day.format("/")); - final Path storageFile = dateSpecificFolder.resolve(UUID.randomUUID().toString()); + final Path storageFile = tagSpecificStorageFolder.resolve(UUID.randomUUID().toString()); return storageFile; } - public static Day getDateOffset(final Path pathToStorageFile) { - - try { - final Path pathDay = pathToStorageFile.getParent(); - final Path pathMonth = pathDay.getParent(); - final Path pathYear = pathMonth.getParent(); - - final int day = Integer.parseInt(pathDay.getFileName().toString(), 10); - final int month = Integer.parseInt(pathMonth.getFileName().toString(), 10); - final int year = Integer.parseInt(pathYear.getFileName().toString(), 10); - - final Day result = new Day(year, month, day); - return result; - } catch (final NumberFormatException e) { - throw new IllegalStateException(pathToStorageFile.toUri().getPath() + " is not a path to a storage file", - e); - } - } + // TODO @ahr remove + // public static Day getDateOffset(final Path pathToStorageFile) { + // + // try { + // final Path pathDay = pathToStorageFile.getParent(); + // final Path pathMonth = pathDay.getParent(); + // final Path pathYear = pathMonth.getParent(); + // + // final int day = Integer.parseInt(pathDay.getFileName().toString(), 10); + // final int month = Integer.parseInt(pathMonth.getFileName().toString(), + // 10); + // final int year = Integer.parseInt(pathYear.getFileName().toString(), 10); + // + // final Day result = new Day(year, month, day); + // return result; + // } catch (final NumberFormatException e) { + // throw new IllegalStateException(pathToStorageFile.toUri().getPath() + " + // is not a path to a storage file", + // e); + // } + // } public static Path createTagSpecificStorageFolder(final Path dataDirectory, final Tags tags) { @@ -47,9 +49,6 @@ public class StorageUtils { public static Path getTagSpecificStorageFolder(final Path storageFilePath) { return storageFilePath // - .getParent() // day - .getParent() // month - .getParent() // year .getParent(); // tag specific } } diff --git a/performanceDb/src/main/java/org/lucares/performance/db/TagsToFile.java b/performanceDb/src/main/java/org/lucares/performance/db/TagsToFile.java index 9e63a32..3f086fa 100644 --- a/performanceDb/src/main/java/org/lucares/performance/db/TagsToFile.java +++ b/performanceDb/src/main/java/org/lucares/performance/db/TagsToFile.java @@ -6,6 +6,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.time.OffsetDateTime; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -67,8 +68,7 @@ public class TagsToFile implements CollectionUtils { final List storageFiles = FileUtils.listRecursively(tagSpecific.getPath()); for (final Path storageFile : storageFiles) { - final Day day = StorageUtils.getDateOffset(storageFile); - final PdbFile pdbFile = new PdbFile(day, storageFile, fileSpecificTags); + final PdbFile pdbFile = new PdbFile(storageFile, fileSpecificTags); result.add(pdbFile); } @@ -110,21 +110,20 @@ public class TagsToFile implements CollectionUtils { public PdbFile getFile(final OffsetDateTime date, final Tags tags) throws FileNotFoundException, IOException { final List pdbFiles = getFilesMatchingTagsExactly(tags); - final List preResult = new ArrayList<>(); + final List preResult = new ArrayList<>(); assertAllFilesHaveSameFolder(pdbFiles); PdbFile result; for (final PdbFile pdbFile : pdbFiles) { - // TODO @ahr should compare with the last written date - final boolean inRange = pdbFile.getTimeRange().inRange(date); + if (Files.isRegularFile(pdbFile.getPath()) + && Files.size(pdbFile.getPath()) >= ByteType.VersionByte.MIN_LENGTH) { - if (inRange) { - final TimeRange availableTimeRange = PdbFileUtils.getAvailableTimeRange(pdbFile); + final OffsetDateTime offsetTime = PdbFileUtils.dateOffset(pdbFile); - if (availableTimeRange.inRange(date)) { - preResult.add(pdbFile); + if (!offsetTime.isAfter(date)) { + preResult.add(new PdbFileOffsetTime(pdbFile, offsetTime)); } } } @@ -139,9 +138,10 @@ public class TagsToFile implements CollectionUtils { tagSpecificStorageFolder = StorageUtils.getTagSpecificStorageFolder(storageFilePath); } - result = createNewPdbFile(date, tags, tagSpecificStorageFolder); + result = createNewPdbFile(tags, tagSpecificStorageFolder); } else { - result = preResult.get(0); + Collections.sort(preResult, PdbFileByTimeAsc.INSTANCE.reversed()); + result = preResult.get(0).getPdbFile(); } return result; @@ -159,11 +159,10 @@ public class TagsToFile implements CollectionUtils { } } - private PdbFile createNewPdbFile(final OffsetDateTime date, final Tags tags, final Path tagSpecificStorageFolder) { + private PdbFile createNewPdbFile(final Tags tags, final Path tagSpecificStorageFolder) throws IOException { final Path storageFile; PdbFile result; - storageFile = createNewFile(date, tagSpecificStorageFolder); - final Day day = new Day(date); + storageFile = createNewFile(tagSpecificStorageFolder); final Document document = db.getDocument(tagSpecificStorageFolder.toFile()); if (document == null) { @@ -174,14 +173,14 @@ public class TagsToFile implements CollectionUtils { }); } - result = new PdbFile(day, storageFile, tags); + result = new PdbFile(storageFile, tags); + PdbWriter.init(result); return result; } - private Path createNewFile(final OffsetDateTime date, final Path tagSpecificStorageFolder) { - final Day day = new Day(date); + private Path createNewFile(final Path tagSpecificStorageFolder) { - final Path result = StorageUtils.createStorageFile(tagSpecificStorageFolder, day); + final Path result = StorageUtils.createStorageFile(tagSpecificStorageFolder); try { Files.createDirectories(result.getParent()); Files.createFile(result); diff --git a/performanceDb/src/test/java/org/lucares/performance/db/BitFiddlingTest.java b/performanceDb/src/test/java/org/lucares/performance/db/BitFiddlingTest.java new file mode 100644 index 0000000..4a40546 --- /dev/null +++ b/performanceDb/src/test/java/org/lucares/performance/db/BitFiddlingTest.java @@ -0,0 +1,42 @@ +package org.lucares.performance.db; + +import org.testng.annotations.Test; + +@Test +public class BitFiddlingTest { + + // TODO @ahr remove or move + // public void testEncodingMeasurement() throws Exception { + // + // final List types = Arrays.asList(ByteType.DATE_INCREMENT, + // ByteType.DATE_OFFSET, ByteType.MEASUREMENT, + // ByteType.VERSION); + // + // final List values = Arrays.asList(0L, 1L, 63L, 64L, 127L, 128L, + // 202L, 255L, 256L, 8191L, 8192L, 1048575L, + // 1048576L, 134217728L, 17179869183L, 17179869184L, 2199023255551L, + // 2199023255552L, 281474976710655L, + // 281474976710656L, 36028797018963967L, 36028797018963968L, + // 4611686018427387901L, 4611686018427387904L); + // + // for (final Long value : values) { + // for (final ByteType type : types) { + // encodeDecode(value, type); + // } + // } + // } + // + // private void encodeDecode(final Long value, final ByteType byteType) + // throws Exception { + // + // final ByteArrayOutputStream output = new ByteArrayOutputStream(); + // BitFiddling.writeValue(value, byteType, output); + // + // final byte[] byteArray = output.toByteArray(); + // final ByteArrayInputStream input = new ByteArrayInputStream(byteArray); + // final Long readValue = BitFiddling.readValue(byteType, input); + // + // Assert.assertEquals(readValue, value); + // } + +} diff --git a/performanceDb/src/test/java/org/lucares/performance/db/PdbReaderWriterTest.java b/performanceDb/src/test/java/org/lucares/performance/db/PdbReaderWriterTest.java index 9c31aa3..e1d0f99 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/PdbReaderWriterTest.java +++ b/performanceDb/src/test/java/org/lucares/performance/db/PdbReaderWriterTest.java @@ -4,7 +4,13 @@ import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.time.Instant; import java.time.OffsetDateTime; +import java.time.ZoneId; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; import org.lucares.pdb.api.Entry; import org.lucares.pdb.api.Tags; @@ -19,6 +25,8 @@ public class PdbReaderWriterTest { private Path dataDirectory; + private static final Tags TAGS = Tags.create(); + @BeforeMethod public void beforeMethod() throws IOException { dataDirectory = Files.createTempDirectory("pdb"); @@ -30,90 +38,57 @@ public class PdbReaderWriterTest { } @DataProvider(name = "providerWriteRead") - public Object[][] providerWriteRead() { - return new Object[][] { // - { 1 }, // - { 6 }, // - { 0xffffffffL },// - }; + public Iterator providerWriteRead() { + + final OffsetDateTime two_sixteen = DateUtils.getDate(2016, 1, 1, 1, 1, 1); + + final List values = Arrays.asList(0L, 1L, 63L, 64L, 127L, 128L, 202L, 255L, 256L, 8191L, 8192L, 1048575L, + 1048576L, 134217728L, 17179869183L, 17179869184L, 2199023255551L, 2199023255552L, 281474976710655L, + 281474976710656L, 36028797018963967L, 36028797018963968L, 4611686018427387901L, 4611686018427387904L); + + final List result = new ArrayList<>(); + + // single values + for (final Long value : values) { + result.add(new Object[] { Arrays.asList(new Entry(two_sixteen, value, TAGS)) }); + } + + // multivalues + result.clear(); // TODO @ahr remove this line + final List entries = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + + final long epochMilli = 123456 * i; + + final OffsetDateTime date = OffsetDateTime.ofInstant(Instant.ofEpochMilli(epochMilli), ZoneId.of("UTC")); + + entries.add(new Entry(date, i, TAGS)); + } + result.add(new Object[] { entries }); + + return result.iterator(); } @Test(dataProvider = "providerWriteRead") - public void testWriteRead(final long value) throws Exception { + public void testWriteRead(final List entries) throws Exception { final File file = Files.createTempFile(dataDirectory, "pdb", ".db").toFile(); - final Tags tags = Tags.create(); - final PdbFile pdbFile = PdbFile.today(file, tags); - final OffsetDateTime now = OffsetDateTime.now(); // TODO @ahr might fail - // at midnight - final Entry entry = new Entry(now, value, tags); + final PdbFile pdbFile = new PdbFile(file.toPath(), TAGS); try (PdbWriter writer = new PdbWriter(pdbFile)) { - writer.write(entry); + for (final Entry entry : entries) { + writer.write(entry); + } } try (final PdbReader reader = new PdbReader(pdbFile)) { - final Entry actual = reader.readEntry(tags).orElseThrow(() -> new AssertionError()); - Assert.assertEquals(actual, entry); - } - } + for (final Entry entry : entries) { - public void testSeekTail() throws Exception { - - final File file = Files.createTempFile(dataDirectory, "pdb", ".db").toFile(); - final PdbFile pdbFile = PdbFile.today(file, Tags.create()); - - try (PdbWriter writer = new PdbWriter(pdbFile)) { - writer.write(1); - writer.write(2); - writer.write(3); - writer.write(4); - writer.write(5); - } - - try (final PdbReader reader = new PdbReader(pdbFile)) { - reader.seekTail(2); - - final long four = reader.read(); - final long five = reader.read(); - - Assert.assertEquals(four, 4, "second last value"); - Assert.assertEquals(five, 5, "last value"); - - final long eof = reader.read(); - Assert.assertEquals(eof, -1, "end of file"); - } - } - - public void testSeek() throws Exception { - - final File file = Files.createTempFile(dataDirectory, "pdb", ".db").toFile(); - final PdbFile pdbFile = PdbFile.today(file, Tags.create()); - - try (PdbWriter writer = new PdbWriter(pdbFile)) { - writer.write(1); - writer.write(2); - writer.write(3); - writer.write(4); - writer.write(5); - } - - try (final PdbReader reader = new PdbReader(pdbFile)) { - reader.seek(2); - - final long three = reader.read(); - final long four = reader.read(); - final long five = reader.read(); - - Assert.assertEquals(three, 3, "third value"); - Assert.assertEquals(four, 4, "fourth value"); - Assert.assertEquals(five, 5, "fifth value"); - - reader.seek(0); - final long first = reader.read(); - Assert.assertEquals(first, 1, "first value"); + final Entry actual = reader.readEntry(TAGS).orElseThrow(() -> new AssertionError()); + Assert.assertEquals(actual, entry); + } } } } diff --git a/performanceDb/src/test/java/org/lucares/performance/db/PdbWriterManagerTest.java b/performanceDb/src/test/java/org/lucares/performance/db/PdbWriterManagerTest.java index b5e8851..5a9fcdc 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/PdbWriterManagerTest.java +++ b/performanceDb/src/test/java/org/lucares/performance/db/PdbWriterManagerTest.java @@ -34,7 +34,7 @@ public class PdbWriterManagerTest { Path path; try { path = Files.createTempFile(dataDirectory, "pdb", ".data"); - return new PdbWriter(new PdbFile(new Day(date), path, tags)); + return new PdbWriter(new PdbFile(path, tags)); } catch (final IOException e) { throw new AssertionError(e.getMessage(), e); } diff --git a/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java b/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java index d9a0c20..aba752e 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java +++ b/performanceDb/src/test/java/org/lucares/performance/db/PerformanceDbTest.java @@ -115,10 +115,9 @@ public class PerformanceDbTest { .collect(Collectors.toList()); Assert.assertEquals(foldersInStorage.size(), 1); - final Path dateSpecificFolder = foldersInStorage.get(0) - .resolve(new Day(timeRange.getFrom()).format(File.separator)); + final Path tagSpecificFolder = foldersInStorage.get(0); - final File[] filesInStorage = dateSpecificFolder.toFile().listFiles(); + final File[] filesInStorage = tagSpecificFolder.toFile().listFiles(); Assert.assertEquals(filesInStorage.length, 1, "one file in storage, but was: " + Arrays.asList(filesInStorage)); } diff --git a/performanceDb/src/test/java/org/lucares/performance/db/StorageUtilsTest.java b/performanceDb/src/test/java/org/lucares/performance/db/StorageUtilsTest.java index ca97869..a6cf664 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/StorageUtilsTest.java +++ b/performanceDb/src/test/java/org/lucares/performance/db/StorageUtilsTest.java @@ -10,30 +10,16 @@ import org.testng.annotations.Test; @Test public class StorageUtilsTest { - public void testExtractDateOffsetFromPath() throws Exception { - - final Path tagSpecificStorageFolder = Paths.get("/tmp"); - final Day day = new Day(2016, 1, 1); - - final Path storageFile = StorageUtils.createStorageFile(tagSpecificStorageFolder, day); - - final Day extractedDateOffset = StorageUtils.getDateOffset(storageFile); - - Assert.assertEquals(extractedDateOffset, day); - } - public void testGetTagSpecificStorageFolder() { final Path dataDirectory = Paths.get("/tmp"); final Tags tags = Tags.create("key", "value"); - final Day day = new Day(2016, 1, 1); final Path tagSpecifiStorageFolder = StorageUtils.createTagSpecificStorageFolder(dataDirectory, tags); - final Path storageFile = StorageUtils.createStorageFile(tagSpecifiStorageFolder, day); + final Path storageFile = StorageUtils.createStorageFile(tagSpecifiStorageFolder); final Path extractedTagSpecifiStorageFolder = StorageUtils.getTagSpecificStorageFolder(storageFile); Assert.assertEquals(extractedTagSpecifiStorageFolder, extractedTagSpecifiStorageFolder); } - } diff --git a/performanceDb/src/test/java/org/lucares/performance/db/TagsToFilesTest.java b/performanceDb/src/test/java/org/lucares/performance/db/TagsToFilesTest.java index fd38139..a5b0aaa 100644 --- a/performanceDb/src/test/java/org/lucares/performance/db/TagsToFilesTest.java +++ b/performanceDb/src/test/java/org/lucares/performance/db/TagsToFilesTest.java @@ -40,13 +40,15 @@ public class TagsToFilesTest { final Tags tags = Tags.create("myKey", "myValue"); final PdbFile newFileForTags = tagsToFile.getFile(date, tags); + PdbWriter.writeEntry(newFileForTags); + final PdbFile existingFileForTags = tagsToFile.getFile(date, tags); Assert.assertEquals(newFileForTags, existingFileForTags); } } - public void testMultipleFilesForTag() throws Exception { + public void testAppendingToSameFileIfNewDateIsAfter() throws Exception { try (H2DB db = new H2DB(new File(dataDirectory.toFile(), "lu.db"))) { @@ -60,13 +62,14 @@ public class TagsToFilesTest { final PdbFile fileForDay1 = tagsToFile.getFile(day1, tags); final PdbFile fileForDay2 = tagsToFile.getFile(day2, tags); - Assert.assertNotEquals(fileForDay1, fileForDay2); + Assert.assertEquals(fileForDay1, fileForDay2); final PdbFile existingFileForDay1 = tagsToFile.getFile(day1, tags); Assert.assertEquals(fileForDay1, existingFileForDay1); } } + @Test(invocationCount = 1) public void testNewFileIfDateIsTooOld() throws Exception { try (H2DB db = new H2DB(new File(dataDirectory.toFile(), "lu.db"))) { @@ -84,12 +87,12 @@ public class TagsToFilesTest { PdbWriter.writeEntry(fileAfternoon, new Entry(afternoon, 1, tags)); final PdbFile fileMorning = tagsToFile.getFile(morning, tags); - PdbWriter.writeEntry(fileMorning, new Entry(morning, 1, tags)); + PdbWriter.writeEntry(fileMorning, new Entry(morning, 2, tags)); Assert.assertNotEquals(fileAfternoon, fileMorning); final PdbFile fileEarlyMorning = tagsToFile.getFile(earlyMorning, tags); - PdbWriter.writeEntry(fileMorning, new Entry(morning, 1, tags)); + PdbWriter.writeEntry(fileEarlyMorning, new Entry(earlyMorning, 3, tags)); Assert.assertNotEquals(fileEarlyMorning, fileAfternoon); Assert.assertNotEquals(fileEarlyMorning, fileMorning);