handle character in the category 'nonspacing mark'
Those characters are ignored, because they decorate the previous character.
This commit is contained in:
35
scribble.py
35
scribble.py
@@ -1,4 +1,37 @@
|
||||
# extract icons from dll on windows
|
||||
# https://mail.python.org/pipermail/python-win32/2009-April/009078.html
|
||||
import re
|
||||
import time
|
||||
|
||||
print(min(2290538861, 2342622222))
|
||||
from highlighted_range import HighlightedRange
|
||||
from line import Line
|
||||
|
||||
result = []
|
||||
|
||||
byte_offset = 123
|
||||
text = "2021-09-21 08:40:38,187 [catalina-exec-37] INFO c.r.c.s.l.SearchAdapter - Fetched 0 fields, 1 folders, 0 content requests for 1 documents; took 763ms (including 0ms to fetch field names). [project=axcelerate.lds_5m_review2, session=37891bc0-a67e-4c43-90c0-c20da567f491, user=r-162] [kttsjx2h.48z.9ls] 2021-09-21 08:47:16,529 [BravaJobHandler-12] INFO c.r.b.c.f.i.DoneNotifierRunnable - CORE job for 'complete document request with output format xdl (source n.pdf)' complete. Notifying Brava server about completion for numId=LDS_001:00095883.. Extracting and moving XDL and SVG/Thumbs took 31ms (from remote stream from 172.28.60.208:51048,com.recommind.rmi.ssl.SslRmiExporter$CheckedSslRmiClientSocketFactory). Notifying with URL https://localhost:8889/BravaServer/done/xc_E4E99FE32A313D2FBA8D29F846C0EF439E8AE2BE159164D04B2AFD862F714BED_ (context switch time 0ms) [project=axcelerate.lds_5m_review2, session=500380b9-94c5-4740-b30a-81e9f6cd071d, user=r-377] [kttsjx2h.8ys.kai]"
|
||||
|
||||
start = time.time()
|
||||
line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text)
|
||||
regex = re.compile(r"\w", flags=re.IGNORECASE)
|
||||
match_iter = re.finditer(regex, line.line())
|
||||
for match in match_iter:
|
||||
start_char = match.start(0)
|
||||
end_char = match.end(0)
|
||||
|
||||
start_column = line.char_to_column(start_char)
|
||||
end_column = line.char_to_column(end_char)
|
||||
|
||||
result.append(HighlightedRange(start_column, end_column - start_column, highlight_full_line=True, brush=None,
|
||||
brush_full_line=None))
|
||||
|
||||
end = time.time()
|
||||
print("duration: %.3f" % ((end - start) * 1000))
|
||||
|
||||
result = []
|
||||
start = time.time()
|
||||
for i in range(0, 10000):
|
||||
result.append(i)
|
||||
|
||||
end = time.time()
|
||||
print("duration: %.3f" % ((end - start) * 1000))
|
||||
|
||||
Reference in New Issue
Block a user