move files into a package structure

2022-02-06 16:02:54 +01:00
parent 8bb4ca0563
commit 5428553a1e
28 changed files with 23 additions and 26 deletions
--- a/raven/util/init.py
+++ b/raven/util/init.py
--- a/raven/util/conversion.py
+++ b/raven/util/conversion.py
@@ -0,0 +1,29 @@
+import unittest
+
+def humanbytes(bytes: int) -> str:
+    """non-localized conversion of bytes to human readable strings"""
+    powers = {0: 'bytes', 1: 'KB', 2: 'MB', 3: 'GB', 4: 'TB', 5: 'PB', 6: 'EB'}
+    power = 1
+    result = "%d bytes" % bytes
+    while bytes >= 1024 ** power and power in powers:
+        result = "%.3f" % (bytes / (1024 ** power))
+        result = result.rstrip("0")
+        result = result.rstrip(".")
+        result = result + " " + powers[power]
+        power = power + 1
+    return result
+
+
+class TestLogFileModel(unittest.TestCase):
+
+    def test_humanbytes(self):
+        inputs = {
+            0: "0 bytes",
+            1023: "1023 bytes",
+            1024: "1 KB",
+            1048575: "1023.999 KB",
+            1048576: "1 MB",
+        }
+        for input in inputs.keys():
+            actual = humanbytes(input)
+            self.assertEqual(inputs[input], actual)
--- a/raven/util/int2intmap.py
+++ b/raven/util/int2intmap.py
@@ -0,0 +1,85 @@
+import math
+import os
+from logging import exception
+from typing import Optional
+
+
+class Int2IntMap():
+    """
+    A file used to map byte numbers of the filter view to byte numbers in the original file.
+    Each line contains the two integers separated by a comma.
+    The first column is sorted ascending. This allows us to do binary searches.
+    The file uses 4kb blocks. That means we add fill bytes (newlines) if a line would cross a 4kb block boundary.
+    """
+    blocksize = 4096
+
+    def __init__(self, file):
+        self._file = file
+        self._handle = open(file, "w+t")
+        self._buffer = ""
+
+    def close(self):
+        if not self._handle.closed:
+            self._handle.close()
+
+    def reset(self):
+        self._handle.truncate(0)
+
+    def add(self, key: int, val: int):
+        line = "%d,%d\n" % (key, val)
+        length = len(line)
+        offset = self._handle.tell() + len(self._buffer)
+        if offset % self.blocksize + length > self.blocksize:
+            # end of block: fill block
+            fill_bytes = self.blocksize - offset % self.blocksize
+            self._buffer = self._buffer + ("\n" * fill_bytes)
+        self._buffer = self._buffer + line
+        if len(self._buffer) > self.blocksize * 100:
+            self._flush_buffer()
+
+    def _flush_buffer(self):
+        self._handle.write(self._buffer)
+        self._buffer = ""
+        self._handle.flush()
+
+    def find(self, key: int) -> Optional[int]:
+        if (len(self._buffer)) > 0:
+            self._flush_buffer()
+        size = os.stat(self._file).st_size
+        if size == 0:
+            return None
+        total_blocks = math.ceil(size / self.blocksize)
+        l = 0
+        r = total_blocks - 1
+        while r >= l:
+            mid = l + math.floor((r - l) / 2)
+            offset = mid * self.blocksize
+
+            self._handle.seek(offset)
+            block = self._handle.read(self.blocksize)
+            lines = block.split("\n")
+            is_before = None
+            for line in lines:
+                if len(line) == 0:
+                    continue
+                token = line.split(",")
+                k = int(token[0])
+                val = int(token[1])
+
+                if key == k:
+                    return val
+                tmp = key < k
+                if is_before is not None and tmp != is_before:
+                    return None
+                else:
+                    is_before = tmp
+
+            if is_before:
+                r = mid - 1
+            else:
+                l = mid + 1
+        return None
+
+    def total_blocks(self) -> int:
+        size = os.stat(self._file).st_size
+        return math.ceil(size / self.blocksize)
--- a/raven/util/testint2intmaplike.py
+++ b/raven/util/testint2intmaplike.py
@@ -0,0 +1,76 @@
+import tempfile
+import unittest
+from os.path import join
+
+from raven.util.int2intmap import Int2IntMap
+
+
+class Int2IntMapLike(unittest.TestCase):
+
+    def setUp(self):
+        self.test_dir = tempfile.TemporaryDirectory()
+        self.tmpfile = join(self.test_dir.name, "my.log")
+        self.map = Int2IntMap(self.tmpfile)
+
+    def tearDown(self):
+        self.map.close()
+        self.test_dir.cleanup()
+
+    def test_empty_map(self):
+        map = self.map
+        self.assertEqual(None, map.find(0))
+
+    def test_one_line_one_byte(self):
+        map = self.map
+        map.add(10, 1)  # add the key 10
+        self.assertEqual(None, map.find(9))  # directly before
+        self.assertEqual(1, map.find(10))
+        self.assertEqual(None, map.find(11))  # directly after
+
+    def test_one_line_two_bytes(self):
+        map = self.map
+        map.add(10, 1)  # added key 10
+        map.add(11, 2)  # added key 11
+        self.assertEqual(None, map.find(9))  # directly before
+        self.assertEqual(1, map.find(10))
+        self.assertEqual(2, map.find(11))
+        self.assertEqual(None, map.find(12))  # directly after
+
+    def test_two_lines(self):
+        map = self.map
+        map.add(10, 1)  # added key 10
+        map.add(12, 2)  # added key 12
+        self.assertEqual(None, map.find(9))  # directly before
+        self.assertEqual(1, map.find(10))
+        self.assertEqual(None, map.find(11))  # between
+        self.assertEqual(2, map.find(12))
+        self.assertEqual(None, map.find(13))  # directly after
+
+    def test_fill_map(self):
+        map = self.map
+        map.blocksize = 64
+
+        # fill map with
+        # 10,5,1
+        # 20,5,2
+        # 30,5,3
+        # ...
+        #
+        # range(1,50) results in 6 blocks a 64 byte
+        for i in range(1, 50):
+            # print("add %d"%(i*10))
+            map.add(i * 10, i)
+            # print("%d -> blocks: %d" %(i, map.total_blocks()))
+
+            for j in range(1, i * 10):
+                if j % 10 == 0:
+                    # values that are in the map
+                    # print("check %d" % (j * 10))
+                    self.assertEqual(j / 10, map.find(j))
+                else:
+                    # values that are not in the map
+                    self.assertEqual(None, map.find(j))
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/raven/util/urlutils.py
+++ b/raven/util/urlutils.py
@@ -0,0 +1,28 @@
+import os
+from urllib.parse import urlparse
+import sys
+
+
+def urls_to_path(urls: str) -> [str]:
+    result = []
+    url_list = urls.splitlines(keepends=False)
+    for url in url_list:
+        path = url_to_path(url)
+        result.append(path)
+    return result
+
+def url_to_path(url: str) -> str:
+    p = urlparse(url)
+    if sys.platform == 'win32' or sys.platform == 'cygwin':
+        return os.path.abspath(p.path[1:])
+    return os.path.abspath(os.path.join(p.netloc, p.path))
+
+
+def url_is_file(string: str) -> bool:
+    url_candidates = string.splitlines(keepends=False)
+    for url in url_candidates:
+        if url.startswith("file://"):
+            path = url_to_path(url)
+            if not os.path.isfile(path):
+                return False
+    return True