move files into a package structure
This commit is contained in:
0
raven/util/__init__.py
Normal file
0
raven/util/__init__.py
Normal file
29
raven/util/conversion.py
Normal file
29
raven/util/conversion.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import unittest
|
||||
|
||||
def humanbytes(bytes: int) -> str:
|
||||
"""non-localized conversion of bytes to human readable strings"""
|
||||
powers = {0: 'bytes', 1: 'KB', 2: 'MB', 3: 'GB', 4: 'TB', 5: 'PB', 6: 'EB'}
|
||||
power = 1
|
||||
result = "%d bytes" % bytes
|
||||
while bytes >= 1024 ** power and power in powers:
|
||||
result = "%.3f" % (bytes / (1024 ** power))
|
||||
result = result.rstrip("0")
|
||||
result = result.rstrip(".")
|
||||
result = result + " " + powers[power]
|
||||
power = power + 1
|
||||
return result
|
||||
|
||||
|
||||
class TestLogFileModel(unittest.TestCase):
|
||||
|
||||
def test_humanbytes(self):
|
||||
inputs = {
|
||||
0: "0 bytes",
|
||||
1023: "1023 bytes",
|
||||
1024: "1 KB",
|
||||
1048575: "1023.999 KB",
|
||||
1048576: "1 MB",
|
||||
}
|
||||
for input in inputs.keys():
|
||||
actual = humanbytes(input)
|
||||
self.assertEqual(inputs[input], actual)
|
||||
85
raven/util/int2intmap.py
Normal file
85
raven/util/int2intmap.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import math
|
||||
import os
|
||||
from logging import exception
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class Int2IntMap():
|
||||
"""
|
||||
A file used to map byte numbers of the filter view to byte numbers in the original file.
|
||||
Each line contains the two integers separated by a comma.
|
||||
The first column is sorted ascending. This allows us to do binary searches.
|
||||
The file uses 4kb blocks. That means we add fill bytes (newlines) if a line would cross a 4kb block boundary.
|
||||
"""
|
||||
blocksize = 4096
|
||||
|
||||
def __init__(self, file):
|
||||
self._file = file
|
||||
self._handle = open(file, "w+t")
|
||||
self._buffer = ""
|
||||
|
||||
def close(self):
|
||||
if not self._handle.closed:
|
||||
self._handle.close()
|
||||
|
||||
def reset(self):
|
||||
self._handle.truncate(0)
|
||||
|
||||
def add(self, key: int, val: int):
|
||||
line = "%d,%d\n" % (key, val)
|
||||
length = len(line)
|
||||
offset = self._handle.tell() + len(self._buffer)
|
||||
if offset % self.blocksize + length > self.blocksize:
|
||||
# end of block: fill block
|
||||
fill_bytes = self.blocksize - offset % self.blocksize
|
||||
self._buffer = self._buffer + ("\n" * fill_bytes)
|
||||
self._buffer = self._buffer + line
|
||||
if len(self._buffer) > self.blocksize * 100:
|
||||
self._flush_buffer()
|
||||
|
||||
def _flush_buffer(self):
|
||||
self._handle.write(self._buffer)
|
||||
self._buffer = ""
|
||||
self._handle.flush()
|
||||
|
||||
def find(self, key: int) -> Optional[int]:
|
||||
if (len(self._buffer)) > 0:
|
||||
self._flush_buffer()
|
||||
size = os.stat(self._file).st_size
|
||||
if size == 0:
|
||||
return None
|
||||
total_blocks = math.ceil(size / self.blocksize)
|
||||
l = 0
|
||||
r = total_blocks - 1
|
||||
while r >= l:
|
||||
mid = l + math.floor((r - l) / 2)
|
||||
offset = mid * self.blocksize
|
||||
|
||||
self._handle.seek(offset)
|
||||
block = self._handle.read(self.blocksize)
|
||||
lines = block.split("\n")
|
||||
is_before = None
|
||||
for line in lines:
|
||||
if len(line) == 0:
|
||||
continue
|
||||
token = line.split(",")
|
||||
k = int(token[0])
|
||||
val = int(token[1])
|
||||
|
||||
if key == k:
|
||||
return val
|
||||
tmp = key < k
|
||||
if is_before is not None and tmp != is_before:
|
||||
return None
|
||||
else:
|
||||
is_before = tmp
|
||||
|
||||
if is_before:
|
||||
r = mid - 1
|
||||
else:
|
||||
l = mid + 1
|
||||
return None
|
||||
|
||||
def total_blocks(self) -> int:
|
||||
size = os.stat(self._file).st_size
|
||||
return math.ceil(size / self.blocksize)
|
||||
76
raven/util/testint2intmaplike.py
Normal file
76
raven/util/testint2intmaplike.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import tempfile
|
||||
import unittest
|
||||
from os.path import join
|
||||
|
||||
from raven.util.int2intmap import Int2IntMap
|
||||
|
||||
|
||||
class Int2IntMapLike(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.test_dir = tempfile.TemporaryDirectory()
|
||||
self.tmpfile = join(self.test_dir.name, "my.log")
|
||||
self.map = Int2IntMap(self.tmpfile)
|
||||
|
||||
def tearDown(self):
|
||||
self.map.close()
|
||||
self.test_dir.cleanup()
|
||||
|
||||
def test_empty_map(self):
|
||||
map = self.map
|
||||
self.assertEqual(None, map.find(0))
|
||||
|
||||
def test_one_line_one_byte(self):
|
||||
map = self.map
|
||||
map.add(10, 1) # add the key 10
|
||||
self.assertEqual(None, map.find(9)) # directly before
|
||||
self.assertEqual(1, map.find(10))
|
||||
self.assertEqual(None, map.find(11)) # directly after
|
||||
|
||||
def test_one_line_two_bytes(self):
|
||||
map = self.map
|
||||
map.add(10, 1) # added key 10
|
||||
map.add(11, 2) # added key 11
|
||||
self.assertEqual(None, map.find(9)) # directly before
|
||||
self.assertEqual(1, map.find(10))
|
||||
self.assertEqual(2, map.find(11))
|
||||
self.assertEqual(None, map.find(12)) # directly after
|
||||
|
||||
def test_two_lines(self):
|
||||
map = self.map
|
||||
map.add(10, 1) # added key 10
|
||||
map.add(12, 2) # added key 12
|
||||
self.assertEqual(None, map.find(9)) # directly before
|
||||
self.assertEqual(1, map.find(10))
|
||||
self.assertEqual(None, map.find(11)) # between
|
||||
self.assertEqual(2, map.find(12))
|
||||
self.assertEqual(None, map.find(13)) # directly after
|
||||
|
||||
def test_fill_map(self):
|
||||
map = self.map
|
||||
map.blocksize = 64
|
||||
|
||||
# fill map with
|
||||
# 10,5,1
|
||||
# 20,5,2
|
||||
# 30,5,3
|
||||
# ...
|
||||
#
|
||||
# range(1,50) results in 6 blocks a 64 byte
|
||||
for i in range(1, 50):
|
||||
# print("add %d"%(i*10))
|
||||
map.add(i * 10, i)
|
||||
# print("%d -> blocks: %d" %(i, map.total_blocks()))
|
||||
|
||||
for j in range(1, i * 10):
|
||||
if j % 10 == 0:
|
||||
# values that are in the map
|
||||
# print("check %d" % (j * 10))
|
||||
self.assertEqual(j / 10, map.find(j))
|
||||
else:
|
||||
# values that are not in the map
|
||||
self.assertEqual(None, map.find(j))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
28
raven/util/urlutils.py
Normal file
28
raven/util/urlutils.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
import sys
|
||||
|
||||
|
||||
def urls_to_path(urls: str) -> [str]:
|
||||
result = []
|
||||
url_list = urls.splitlines(keepends=False)
|
||||
for url in url_list:
|
||||
path = url_to_path(url)
|
||||
result.append(path)
|
||||
return result
|
||||
|
||||
def url_to_path(url: str) -> str:
|
||||
p = urlparse(url)
|
||||
if sys.platform == 'win32' or sys.platform == 'cygwin':
|
||||
return os.path.abspath(p.path[1:])
|
||||
return os.path.abspath(os.path.join(p.netloc, p.path))
|
||||
|
||||
|
||||
def url_is_file(string: str) -> bool:
|
||||
url_candidates = string.splitlines(keepends=False)
|
||||
for url in url_candidates:
|
||||
if url.startswith("file://"):
|
||||
path = url_to_path(url)
|
||||
if not os.path.isfile(path):
|
||||
return False
|
||||
return True
|
||||
Reference in New Issue
Block a user