add sorted map for byte offset mapping
This commit is contained in:
70
int2intmaplike.py
Normal file
70
int2intmaplike.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
import math
|
||||||
|
import os
|
||||||
|
from logging import exception
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
class Int2IntMapLike():
|
||||||
|
"""
|
||||||
|
A file used to map byte numbers of the filter view to byte numbers in the original file.
|
||||||
|
Each line contains the two integers separated by a comma.
|
||||||
|
The first column is sorted ascending. This allows us to do binary searches.
|
||||||
|
The file uses 4kb blocks. That means we add fill bytes (newlines) if a line would cross a 4kb block boundary.
|
||||||
|
"""
|
||||||
|
blocksize = 4096
|
||||||
|
|
||||||
|
def __init__(self, file):
|
||||||
|
self._file = file
|
||||||
|
self._handle = open(file, "w+t")
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
if not self._handle.closed:
|
||||||
|
self._handle.close()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self._handle.truncate(0)
|
||||||
|
|
||||||
|
def add(self, start: int, length: int, val: int):
|
||||||
|
line = "%d,%d,%d\n" % (start, length, val)
|
||||||
|
length = len(line)
|
||||||
|
offset = self._handle.tell()
|
||||||
|
if offset % self.blocksize + length > self.blocksize:
|
||||||
|
fill_bytes = self.blocksize - offset % self.blocksize
|
||||||
|
self._handle.write("\n" * fill_bytes)
|
||||||
|
self._handle.write(line)
|
||||||
|
|
||||||
|
def find(self, key: int) -> Optional[int]:
|
||||||
|
size = os.stat(self._file).st_size
|
||||||
|
if size == 0:
|
||||||
|
return None
|
||||||
|
total_blocks = math.ceil(size / self.blocksize)
|
||||||
|
step = math.ceil(total_blocks / 2)
|
||||||
|
offset = (step - 1) * self.blocksize
|
||||||
|
while step >= 1:
|
||||||
|
self._handle.seek(offset)
|
||||||
|
block = self._handle.read(self.blocksize)
|
||||||
|
lines = block.split("\n")
|
||||||
|
is_before = None
|
||||||
|
for line in lines:
|
||||||
|
if len(line) == 0:
|
||||||
|
continue
|
||||||
|
token = line.split(",")
|
||||||
|
start = int(token[0])
|
||||||
|
length = int(token[1])
|
||||||
|
val = int(token[2])
|
||||||
|
|
||||||
|
if key >= start and key - start < length:
|
||||||
|
return val
|
||||||
|
tmp = key < start
|
||||||
|
if is_before != None and tmp != is_before:
|
||||||
|
return None
|
||||||
|
is_before = tmp
|
||||||
|
|
||||||
|
if step == 1:
|
||||||
|
return None
|
||||||
|
|
||||||
|
step = math.ceil(step / 2)
|
||||||
|
if is_before:
|
||||||
|
offset = offset - step * self.blocksize
|
||||||
|
else:
|
||||||
|
offset = offset + step * self.blocksize
|
||||||
46
testint2intmaplike.py
Normal file
46
testint2intmaplike.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
from int2intmaplike import Int2IntMapLike
|
||||||
|
|
||||||
|
|
||||||
|
class Int2IntMapLikeTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.test_dir = tempfile.TemporaryDirectory()
|
||||||
|
self.tmpfile = join(self.test_dir.name, "my.log")
|
||||||
|
self.map = Int2IntMapLike(self.tmpfile)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.map.close()
|
||||||
|
self.test_dir.cleanup()
|
||||||
|
|
||||||
|
def test_fill_map(self):
|
||||||
|
map = self.map
|
||||||
|
map.blocksize = 64
|
||||||
|
|
||||||
|
# fill map with
|
||||||
|
# 10,5,1
|
||||||
|
# 20,5,2
|
||||||
|
# 30,5,3
|
||||||
|
# ...
|
||||||
|
for i in range(1, 20):
|
||||||
|
map.add(i * 10, 5, i)
|
||||||
|
|
||||||
|
self.assertEqual(2, map.find(20))
|
||||||
|
self.assertEqual(7, map.find(71))
|
||||||
|
self.assertEqual(13, map.find(134))
|
||||||
|
self.assertEqual(19, map.find(194))
|
||||||
|
|
||||||
|
# values that are not in the map
|
||||||
|
self.assertEqual(None, map.find(0))
|
||||||
|
self.assertEqual(None, map.find(9))
|
||||||
|
self.assertEqual(None, map.find(15))
|
||||||
|
self.assertEqual(None, map.find(16))
|
||||||
|
self.assertEqual(None, map.find(107)) # a value in the second block
|
||||||
|
self.assertEqual(None, map.find(188)) # a value in the third block
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user