move files into a package structure

This commit is contained in:
2022-02-06 16:02:54 +01:00
parent 8bb4ca0563
commit 5428553a1e
28 changed files with 23 additions and 26 deletions

0
raven/util/__init__.py Normal file
View File

29
raven/util/conversion.py Normal file
View File

@@ -0,0 +1,29 @@
import unittest
def humanbytes(bytes: int) -> str:
"""non-localized conversion of bytes to human readable strings"""
powers = {0: 'bytes', 1: 'KB', 2: 'MB', 3: 'GB', 4: 'TB', 5: 'PB', 6: 'EB'}
power = 1
result = "%d bytes" % bytes
while bytes >= 1024 ** power and power in powers:
result = "%.3f" % (bytes / (1024 ** power))
result = result.rstrip("0")
result = result.rstrip(".")
result = result + " " + powers[power]
power = power + 1
return result
class TestLogFileModel(unittest.TestCase):
def test_humanbytes(self):
inputs = {
0: "0 bytes",
1023: "1023 bytes",
1024: "1 KB",
1048575: "1023.999 KB",
1048576: "1 MB",
}
for input in inputs.keys():
actual = humanbytes(input)
self.assertEqual(inputs[input], actual)

85
raven/util/int2intmap.py Normal file
View File

@@ -0,0 +1,85 @@
import math
import os
from logging import exception
from typing import Optional
class Int2IntMap():
"""
A file used to map byte numbers of the filter view to byte numbers in the original file.
Each line contains the two integers separated by a comma.
The first column is sorted ascending. This allows us to do binary searches.
The file uses 4kb blocks. That means we add fill bytes (newlines) if a line would cross a 4kb block boundary.
"""
blocksize = 4096
def __init__(self, file):
self._file = file
self._handle = open(file, "w+t")
self._buffer = ""
def close(self):
if not self._handle.closed:
self._handle.close()
def reset(self):
self._handle.truncate(0)
def add(self, key: int, val: int):
line = "%d,%d\n" % (key, val)
length = len(line)
offset = self._handle.tell() + len(self._buffer)
if offset % self.blocksize + length > self.blocksize:
# end of block: fill block
fill_bytes = self.blocksize - offset % self.blocksize
self._buffer = self._buffer + ("\n" * fill_bytes)
self._buffer = self._buffer + line
if len(self._buffer) > self.blocksize * 100:
self._flush_buffer()
def _flush_buffer(self):
self._handle.write(self._buffer)
self._buffer = ""
self._handle.flush()
def find(self, key: int) -> Optional[int]:
if (len(self._buffer)) > 0:
self._flush_buffer()
size = os.stat(self._file).st_size
if size == 0:
return None
total_blocks = math.ceil(size / self.blocksize)
l = 0
r = total_blocks - 1
while r >= l:
mid = l + math.floor((r - l) / 2)
offset = mid * self.blocksize
self._handle.seek(offset)
block = self._handle.read(self.blocksize)
lines = block.split("\n")
is_before = None
for line in lines:
if len(line) == 0:
continue
token = line.split(",")
k = int(token[0])
val = int(token[1])
if key == k:
return val
tmp = key < k
if is_before is not None and tmp != is_before:
return None
else:
is_before = tmp
if is_before:
r = mid - 1
else:
l = mid + 1
return None
def total_blocks(self) -> int:
size = os.stat(self._file).st_size
return math.ceil(size / self.blocksize)

View File

@@ -0,0 +1,76 @@
import tempfile
import unittest
from os.path import join
from raven.util.int2intmap import Int2IntMap
class Int2IntMapLike(unittest.TestCase):
def setUp(self):
self.test_dir = tempfile.TemporaryDirectory()
self.tmpfile = join(self.test_dir.name, "my.log")
self.map = Int2IntMap(self.tmpfile)
def tearDown(self):
self.map.close()
self.test_dir.cleanup()
def test_empty_map(self):
map = self.map
self.assertEqual(None, map.find(0))
def test_one_line_one_byte(self):
map = self.map
map.add(10, 1) # add the key 10
self.assertEqual(None, map.find(9)) # directly before
self.assertEqual(1, map.find(10))
self.assertEqual(None, map.find(11)) # directly after
def test_one_line_two_bytes(self):
map = self.map
map.add(10, 1) # added key 10
map.add(11, 2) # added key 11
self.assertEqual(None, map.find(9)) # directly before
self.assertEqual(1, map.find(10))
self.assertEqual(2, map.find(11))
self.assertEqual(None, map.find(12)) # directly after
def test_two_lines(self):
map = self.map
map.add(10, 1) # added key 10
map.add(12, 2) # added key 12
self.assertEqual(None, map.find(9)) # directly before
self.assertEqual(1, map.find(10))
self.assertEqual(None, map.find(11)) # between
self.assertEqual(2, map.find(12))
self.assertEqual(None, map.find(13)) # directly after
def test_fill_map(self):
map = self.map
map.blocksize = 64
# fill map with
# 10,5,1
# 20,5,2
# 30,5,3
# ...
#
# range(1,50) results in 6 blocks a 64 byte
for i in range(1, 50):
# print("add %d"%(i*10))
map.add(i * 10, i)
# print("%d -> blocks: %d" %(i, map.total_blocks()))
for j in range(1, i * 10):
if j % 10 == 0:
# values that are in the map
# print("check %d" % (j * 10))
self.assertEqual(j / 10, map.find(j))
else:
# values that are not in the map
self.assertEqual(None, map.find(j))
if __name__ == '__main__':
unittest.main()

28
raven/util/urlutils.py Normal file
View File

@@ -0,0 +1,28 @@
import os
from urllib.parse import urlparse
import sys
def urls_to_path(urls: str) -> [str]:
result = []
url_list = urls.splitlines(keepends=False)
for url in url_list:
path = url_to_path(url)
result.append(path)
return result
def url_to_path(url: str) -> str:
p = urlparse(url)
if sys.platform == 'win32' or sys.platform == 'cygwin':
return os.path.abspath(p.path[1:])
return os.path.abspath(os.path.join(p.netloc, p.path))
def url_is_file(string: str) -> bool:
url_candidates = string.splitlines(keepends=False)
for url in url_candidates:
if url.startswith("file://"):
path = url_to_path(url)
if not os.path.isfile(path):
return False
return True