handle character in the category 'nonspacing mark'
Those characters are ignored, because they decorate the previous character.
This commit is contained in:
@@ -308,7 +308,7 @@ class InnerBigText(QWidget):
|
|||||||
column_in_line = self.x_pos_to_column(e.pos().x()) + self._left_offset
|
column_in_line = self.x_pos_to_column(e.pos().x()) + self._left_offset
|
||||||
column_in_line = min(column_in_line, line.length_in_columns()) # x was behind the last column of this line
|
column_in_line = min(column_in_line, line.length_in_columns()) # x was behind the last column of this line
|
||||||
char_in_line = line.column_to_char(column_in_line)
|
char_in_line = line.column_to_char(column_in_line)
|
||||||
print("%s in line %s lcolumn_in_line=%s" % (char_in_line, line_number, column_in_line))
|
# print("%s in line %s lcolumn_in_line=%s" % (char_in_line, line_number, column_in_line))
|
||||||
byte_in_line = line.char_index_to_byte(char_in_line)
|
byte_in_line = line.char_index_to_byte(char_in_line)
|
||||||
current_byte = line.byte_offset() + byte_in_line
|
current_byte = line.byte_offset() + byte_in_line
|
||||||
# print("%s + %s = %s" % (line.byte_offset(), char_in_line, current_byte))
|
# print("%s + %s = %s" % (line.byte_offset(), char_in_line, current_byte))
|
||||||
@@ -401,6 +401,7 @@ class InnerBigText(QWidget):
|
|||||||
# draw hightlights first - some characters may overlap to the next line
|
# draw hightlights first - some characters may overlap to the next line
|
||||||
# by drawing the background hightlights first we prevent that the hightlight
|
# by drawing the background hightlights first we prevent that the hightlight
|
||||||
# draws over a character
|
# draws over a character
|
||||||
|
start = time.time()
|
||||||
y_line_offset = self.char_height;
|
y_line_offset = self.char_height;
|
||||||
for l in self.lines:
|
for l in self.lines:
|
||||||
highlight_ranges = []
|
highlight_ranges = []
|
||||||
@@ -412,6 +413,9 @@ class InnerBigText(QWidget):
|
|||||||
self.draw_highlights(highlight_ranges, painter, y_line_offset)
|
self.draw_highlights(highlight_ranges, painter, y_line_offset)
|
||||||
y_line_offset = y_line_offset + self.char_height
|
y_line_offset = y_line_offset + self.char_height
|
||||||
|
|
||||||
|
end = time.time()
|
||||||
|
# print("highlight duration: %.3f" %((end-start)*1000))
|
||||||
|
|
||||||
left_offset = int(-1 * self._left_offset * self.char_width)
|
left_offset = int(-1 * self._left_offset * self.char_width)
|
||||||
y_line_offset = self.char_height;
|
y_line_offset = self.char_height;
|
||||||
for l in self.lines:
|
for l in self.lines:
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
01234
|
01234
|
||||||
01234567890123456789
|
01234567890123456789
|
||||||
012345678901234567890123456789012345678901234567890123456789
|
012345678901234567890123456789012345678901234567890123456789
|
||||||
0123456789012345678901234567890123456789
|
|
||||||
tab indentation:
|
tab indentation:
|
||||||
1 2 3 4 5
|
1 2 3 4 5
|
||||||
- 1-- 2-- 3- 4---5
|
- 1-- 2-- 3- 4---5
|
||||||
@@ -15,8 +14,12 @@ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa|
|
|||||||
...............................|
|
...............................|
|
||||||
|
|
|
|
||||||
ääääääääääääää♥ääääääääääääääää|
|
ääääääääääääää♥ääääääääääääääää|
|
||||||
|
nonspacing marks:
|
||||||
next line consists of x%CC%88
|
next line consists of x%CC%88
|
||||||
ẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍ|
|
ẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍẍ|
|
||||||
|
Combining Double Circumflex Above: “◌᷍◌” (U+1DCD)
|
||||||
|
x◌᷍◌◌᷍◌x
|
||||||
|
👍🏿 dark thumbs up (U+1F44D + U+1F3FF - THUMBS UP SIGN + EMOJI MODIFIER FITZPATRICK TYPE-6)
|
||||||
2019-08-07 00:00:10,391 [catalina-exec-40] INFO c.r.c.u.l.PerformancePointcut - Executed HealthCheckController.checkOperativeness in 1 ms successful. [jv3fw7r2.m1u5]
|
2019-08-07 00:00:10,391 [catalina-exec-40] INFO c.r.c.u.l.PerformancePointcut - Executed HealthCheckController.checkOperativeness in 1 ms successful. [jv3fw7r2.m1u5]
|
||||||
2019-08-07 00:00:16,377 [catalina-exec-56] INFO c.r.c.u.l.PerformancePointcut - Executed HealthCheckController.checkOperativeness in 1 ms successful. [jv3fw7r2.m1u6]
|
2019-08-07 00:00:16,377 [catalina-exec-56] INFO c.r.c.u.l.PerformancePointcut - Executed HealthCheckController.checkOperativeness in 1 ms successful. [jv3fw7r2.m1u6]
|
||||||
2019-08-07 00:00:40,403 [catalina-exec-26] ERROR c.r.c.u.l.PerformancePointcut - Executed HealthCheckController.checkOperativeness in 1 ms successful. [jv3fw7r2.m1ud]
|
2019-08-07 00:00:40,403 [catalina-exec-26] ERROR c.r.c.u.l.PerformancePointcut - Executed HealthCheckController.checkOperativeness in 1 ms successful. [jv3fw7r2.m1ud]
|
||||||
|
|||||||
@@ -22,6 +22,8 @@ class HighlightRegex(Highlight):
|
|||||||
self.regex = self._get_regex()
|
self.regex = self._get_regex()
|
||||||
self.hit_background_color = hit_background_color
|
self.hit_background_color = hit_background_color
|
||||||
self.line_background_color = line_background_color
|
self.line_background_color = line_background_color
|
||||||
|
self._brush_hit = self.brush(self.hit_background_color)
|
||||||
|
self._brush_line = self.brush(self.line_background_color)
|
||||||
|
|
||||||
def _get_regex(self):
|
def _get_regex(self):
|
||||||
flags = re.IGNORECASE if self.ignore_case else 0
|
flags = re.IGNORECASE if self.ignore_case else 0
|
||||||
@@ -46,8 +48,8 @@ class HighlightRegex(Highlight):
|
|||||||
start_column,
|
start_column,
|
||||||
end_column - start_column,
|
end_column - start_column,
|
||||||
highlight_full_line=True,
|
highlight_full_line=True,
|
||||||
brush=self.brush(self.hit_background_color),
|
brush=self._brush_hit,
|
||||||
brush_full_line=self.brush(self.line_background_color)
|
brush_full_line=self._brush_line
|
||||||
))
|
))
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|||||||
49
line.py
49
line.py
@@ -1,3 +1,4 @@
|
|||||||
|
import unicodedata
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
import constants
|
import constants
|
||||||
@@ -8,7 +9,8 @@ class Line:
|
|||||||
self._byte_offset = byte_offset
|
self._byte_offset = byte_offset
|
||||||
self._byte_end = byte_end
|
self._byte_end = byte_end
|
||||||
self._line = line
|
self._line = line
|
||||||
self._char_to_column_cache = self._cache_char_to_column()
|
|
||||||
|
self._cache_char_to_column()
|
||||||
|
|
||||||
def byte_offset(self) -> int:
|
def byte_offset(self) -> int:
|
||||||
return self._byte_offset
|
return self._byte_offset
|
||||||
@@ -23,9 +25,11 @@ class Line:
|
|||||||
return len(self._line)
|
return len(self._line)
|
||||||
|
|
||||||
def length_in_columns(self) -> int:
|
def length_in_columns(self) -> int:
|
||||||
return self.char_to_column(len(self._line))
|
return self.char_to_column(len(self._line) - 1)
|
||||||
|
|
||||||
def char_index_to_byte(self, char_in_line: int) -> int:
|
def char_index_to_byte(self, char_in_line: int) -> int:
|
||||||
|
# todo this does not work with multibyte characters
|
||||||
|
# should probably be len(self.prefix(char_in_line-1).encode("utf8"))
|
||||||
return len(self.prefix(char_in_line).encode("utf8"))
|
return len(self.prefix(char_in_line).encode("utf8"))
|
||||||
|
|
||||||
def byte_index_to_char_index(self, byte_index: int) -> int:
|
def byte_index_to_char_index(self, byte_index: int) -> int:
|
||||||
@@ -52,19 +56,7 @@ class Line:
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def column_to_char(self, column_in_line: int) -> int:
|
def column_to_char(self, column_in_line: int) -> int:
|
||||||
i = 0
|
return self._column_to_char_cache[column_in_line]
|
||||||
result = 0
|
|
||||||
while i < column_in_line:
|
|
||||||
char = self._line[result]
|
|
||||||
if char == "\t":
|
|
||||||
i = i + constants.tab_width - i % constants.tab_width # jump the additional columns to complete the tab
|
|
||||||
if i > column_in_line:
|
|
||||||
break;
|
|
||||||
else:
|
|
||||||
i = i + 1
|
|
||||||
result = result + 1
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def char_to_column(self, char_in_line: int) -> int:
|
def char_to_column(self, char_in_line: int) -> int:
|
||||||
if not char_in_line in self._char_to_column_cache:
|
if not char_in_line in self._char_to_column_cache:
|
||||||
@@ -72,19 +64,34 @@ class Line:
|
|||||||
return -1
|
return -1
|
||||||
return self._char_to_column_cache[char_in_line]
|
return self._char_to_column_cache[char_in_line]
|
||||||
|
|
||||||
def _cache_char_to_column(self) -> Dict[int, int]:
|
def _cache_char_to_column(self):
|
||||||
char_to_column_cache = {}
|
self._char_to_column_cache = {}
|
||||||
|
self._column_to_char_cache = {}
|
||||||
result = 0
|
result = 0
|
||||||
i = 0
|
i = 0
|
||||||
char_to_column_cache[0] = 0
|
self._char_to_column_cache[0] = 0
|
||||||
while i < len(self._line):
|
while i < len(self._line):
|
||||||
if i < len(self._line) and self._line[i] == "\t":
|
self._char_to_column_cache[i] = result
|
||||||
|
if not result in self._column_to_char_cache:
|
||||||
|
self._column_to_char_cache[result] = i
|
||||||
|
current_char = self._line[i]
|
||||||
|
if current_char == "\t":
|
||||||
result = result + constants.tab_width - result % constants.tab_width
|
result = result + constants.tab_width - result % constants.tab_width
|
||||||
else:
|
else:
|
||||||
result = result + 1
|
result = result + 1
|
||||||
i = i + 1
|
i = i + 1
|
||||||
char_to_column_cache[i] = result
|
|
||||||
return char_to_column_cache
|
# ignore: Nonspacing Mark characters are decorations for the previous character.
|
||||||
|
# They do not take up space.
|
||||||
|
# For example the character Combining Diaeresis (U+0308, %CC%88) that adds two
|
||||||
|
# dots above the previous character. It can be used to create an 'ä' from an 'a'+'◌̈'.
|
||||||
|
# In url encoding this looks like: a%CC%88.
|
||||||
|
# todo there are many other character combinations that should be skipped
|
||||||
|
while i < len(self._line) and unicodedata.category(self._line[i]) == "Mn":
|
||||||
|
self._char_to_column_cache[i] = result - 1
|
||||||
|
if not result in self._column_to_char_cache:
|
||||||
|
self._column_to_char_cache[result - 1] = i
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
def includes_byte(self, byte: int) -> bool:
|
def includes_byte(self, byte: int) -> bool:
|
||||||
return self._byte_offset <= byte <= self._byte_end
|
return self._byte_offset <= byte <= self._byte_end
|
||||||
|
|||||||
2
main.py
2
main.py
@@ -64,6 +64,8 @@ if __name__ == "__main__":
|
|||||||
# PluginRegistry.execute("open_file", "/home/andi/ws/ravenlog/example.log")
|
# PluginRegistry.execute("open_file", "/home/andi/ws/ravenlog/example.log")
|
||||||
PluginRegistry.execute("open_file",
|
PluginRegistry.execute("open_file",
|
||||||
"/home/andi/ws/performanceDb/data/production/lt_axc_21.4_133.02_maxInstance/lt_axc_21.4_133.02_maxInstance/app/axcng-service_i-0a69bd43d3624a5bc_172_28_60_222_VADPERFO01AA001_2021-09-21_091717/service/service.log");
|
"/home/andi/ws/performanceDb/data/production/lt_axc_21.4_133.02_maxInstance/lt_axc_21.4_133.02_maxInstance/app/axcng-service_i-0a69bd43d3624a5bc_172_28_60_222_VADPERFO01AA001_2021-09-21_091717/service/service.log");
|
||||||
|
PluginRegistry.execute("open_file",
|
||||||
|
"/home/andi/ws/ravenlog/example.log");
|
||||||
# window.open_file("C:\\Users\\andi\\ws\\some.log")
|
# window.open_file("C:\\Users\\andi\\ws\\some.log")
|
||||||
|
|
||||||
signal.signal(signal.SIGINT, stop_signal)
|
signal.signal(signal.SIGINT, stop_signal)
|
||||||
|
|||||||
35
scribble.py
35
scribble.py
@@ -1,4 +1,37 @@
|
|||||||
# extract icons from dll on windows
|
# extract icons from dll on windows
|
||||||
# https://mail.python.org/pipermail/python-win32/2009-April/009078.html
|
# https://mail.python.org/pipermail/python-win32/2009-April/009078.html
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
print(min(2290538861, 2342622222))
|
from highlighted_range import HighlightedRange
|
||||||
|
from line import Line
|
||||||
|
|
||||||
|
result = []
|
||||||
|
|
||||||
|
byte_offset = 123
|
||||||
|
text = "2021-09-21 08:40:38,187 [catalina-exec-37] INFO c.r.c.s.l.SearchAdapter - Fetched 0 fields, 1 folders, 0 content requests for 1 documents; took 763ms (including 0ms to fetch field names). [project=axcelerate.lds_5m_review2, session=37891bc0-a67e-4c43-90c0-c20da567f491, user=r-162] [kttsjx2h.48z.9ls] 2021-09-21 08:47:16,529 [BravaJobHandler-12] INFO c.r.b.c.f.i.DoneNotifierRunnable - CORE job for 'complete document request with output format xdl (source n.pdf)' complete. Notifying Brava server about completion for numId=LDS_001:00095883.. Extracting and moving XDL and SVG/Thumbs took 31ms (from remote stream from 172.28.60.208:51048,com.recommind.rmi.ssl.SslRmiExporter$CheckedSslRmiClientSocketFactory). Notifying with URL https://localhost:8889/BravaServer/done/xc_E4E99FE32A313D2FBA8D29F846C0EF439E8AE2BE159164D04B2AFD862F714BED_ (context switch time 0ms) [project=axcelerate.lds_5m_review2, session=500380b9-94c5-4740-b30a-81e9f6cd071d, user=r-377] [kttsjx2h.8ys.kai]"
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text)
|
||||||
|
regex = re.compile(r"\w", flags=re.IGNORECASE)
|
||||||
|
match_iter = re.finditer(regex, line.line())
|
||||||
|
for match in match_iter:
|
||||||
|
start_char = match.start(0)
|
||||||
|
end_char = match.end(0)
|
||||||
|
|
||||||
|
start_column = line.char_to_column(start_char)
|
||||||
|
end_column = line.char_to_column(end_char)
|
||||||
|
|
||||||
|
result.append(HighlightedRange(start_column, end_column - start_column, highlight_full_line=True, brush=None,
|
||||||
|
brush_full_line=None))
|
||||||
|
|
||||||
|
end = time.time()
|
||||||
|
print("duration: %.3f" % ((end - start) * 1000))
|
||||||
|
|
||||||
|
result = []
|
||||||
|
start = time.time()
|
||||||
|
for i in range(0, 10000):
|
||||||
|
result.append(i)
|
||||||
|
|
||||||
|
end = time.time()
|
||||||
|
print("duration: %.3f" % ((end - start) * 1000))
|
||||||
|
|||||||
55
testline.py
55
testline.py
@@ -1,5 +1,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
from line import Line
|
from line import Line
|
||||||
|
|
||||||
|
|
||||||
@@ -27,6 +29,20 @@ class MyTestCase(unittest.TestCase):
|
|||||||
self.assertEqual(9, line.column_to_char(15)) # tab
|
self.assertEqual(9, line.column_to_char(15)) # tab
|
||||||
self.assertEqual(10, line.column_to_char(16)) # g
|
self.assertEqual(10, line.column_to_char(16)) # g
|
||||||
|
|
||||||
|
def test_column_to_char_ignore_nonspacing_mark_charaters(self):
|
||||||
|
"""
|
||||||
|
nonspacing mark charaters are those little decorations that are applied to the previous character,
|
||||||
|
e.g. x\u0308 to make ẍ
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
byte_offset = 123
|
||||||
|
text = "x\u0308y\u0308z\u0308"
|
||||||
|
line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text)
|
||||||
|
|
||||||
|
self.assertEqual(0, line.column_to_char(0)) # ẍ
|
||||||
|
self.assertEqual(2, line.column_to_char(1)) # ÿ
|
||||||
|
self.assertEqual(4, line.column_to_char(2)) # z̈
|
||||||
|
|
||||||
def test_char_to_column(self):
|
def test_char_to_column(self):
|
||||||
byte_offset = 123
|
byte_offset = 123
|
||||||
text = "\tabc\td\tef\tg" # will be rendered as: ....abc.d...ef..g where . represents a whitespace column
|
text = "\tabc\td\tef\tg" # will be rendered as: ....abc.d...ef..g where . represents a whitespace column
|
||||||
@@ -43,6 +59,23 @@ class MyTestCase(unittest.TestCase):
|
|||||||
self.assertEqual(14, line.char_to_column(9)) # tab
|
self.assertEqual(14, line.char_to_column(9)) # tab
|
||||||
self.assertEqual(16, line.char_to_column(10)) # g
|
self.assertEqual(16, line.char_to_column(10)) # g
|
||||||
|
|
||||||
|
def test_char_to_column_ignore_nonspacing_mark_charaters(self):
|
||||||
|
"""
|
||||||
|
nonspacing mark charaters are those little decorations that are applied to the previous character,
|
||||||
|
e.g. x\u0308 to make ẍ
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
byte_offset = 123
|
||||||
|
text = "x\u0308y\u0308z\u0308"
|
||||||
|
print(text)
|
||||||
|
line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text)
|
||||||
|
self.assertEqual(0, line.char_to_column(0)) # ẍ
|
||||||
|
self.assertEqual(0, line.char_to_column(1)) # ẍ
|
||||||
|
self.assertEqual(1, line.char_to_column(2)) # ÿ
|
||||||
|
self.assertEqual(1, line.char_to_column(3)) # ÿ
|
||||||
|
self.assertEqual(2, line.char_to_column(4)) # z̈
|
||||||
|
self.assertEqual(2, line.char_to_column(5)) # z̈
|
||||||
|
|
||||||
def test_line_tabs_replaced(self):
|
def test_line_tabs_replaced(self):
|
||||||
byte_offset = 123
|
byte_offset = 123
|
||||||
text = "\ta\tb" # will be rendered as: ....abc where . represents a whitespace column
|
text = "\ta\tb" # will be rendered as: ....abc where . represents a whitespace column
|
||||||
@@ -57,5 +90,27 @@ class MyTestCase(unittest.TestCase):
|
|||||||
line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text)
|
line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text)
|
||||||
self.assertEqual(expected, line.line_tabs_replaced())
|
self.assertEqual(expected, line.line_tabs_replaced())
|
||||||
|
|
||||||
|
def test_byte_index_to_char_index(self):
|
||||||
|
byte_offset = 123
|
||||||
|
text = "x\u0308y\u0308z\u0308\t\u0308a"
|
||||||
|
print(text)
|
||||||
|
line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text)
|
||||||
|
self.assertEqual(0, line.byte_index_to_char_index(0)) # x
|
||||||
|
self.assertEqual(0, line.byte_index_to_char_index(1)) # first byte of diacritical mark belonging to x
|
||||||
|
self.assertEqual(0, line.byte_index_to_char_index(2)) # second byte of diacritical mark belonging to x
|
||||||
|
|
||||||
|
def test_diacritical_marks(self):
|
||||||
|
text = "̈ẍỏôŏ̮👍🏿"
|
||||||
|
text = "\U0001F9D9\u200D\u2642\uFE0F - \U0001F44D\U0001F3FF - a\u02c3 - ẍ - y\u0308 - w\u200D\u00A8"
|
||||||
|
text = unicodedata.normalize("NFD", text)
|
||||||
|
i = 0
|
||||||
|
print("%s" % text)
|
||||||
|
print("length: %s" % len(text))
|
||||||
|
while i < len(text):
|
||||||
|
c = text[i]
|
||||||
|
print("%s %s cat: %s" % (c, unicodedata.name(c), unicodedata.category(c)))
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user