fix the way tabs are used

The previous code just assumed a tab was 4 spaces wide.
This is not true, because a tab can be between 1 and 4 spaces wide.
This commit is contained in:
2021-12-20 19:44:46 +01:00
parent 3c27913e75
commit ffea831e2d
4 changed files with 75 additions and 20 deletions

View File

@@ -3,7 +3,7 @@ import sys
import math import math
import os import os
import time import time
from typing import Callable from typing import Callable, List
from PyQt6 import QtGui from PyQt6 import QtGui
from PyQt6.QtCore import * from PyQt6.QtCore import *
@@ -17,6 +17,7 @@ from conversion import humanbytes
from highlight_selection import HighlightSelection from highlight_selection import HighlightSelection
from highlighted_range import HighlightedRange from highlighted_range import HighlightedRange
from highlightingdialog import HighlightingDialog from highlightingdialog import HighlightingDialog
from line import Line
from logFileModel import LogFileModel from logFileModel import LogFileModel
@@ -126,7 +127,7 @@ class InnerBigText(QWidget):
self.customContextMenuRequested.connect(self._open_menu) self.customContextMenuRequested.connect(self._open_menu)
self.update_font_metrics(QPainter(self)) self.update_font_metrics(QPainter(self))
self.lines = [] self.lines = List[Line]
self.selection_highlight = HighlightSelection() self.selection_highlight = HighlightSelection()
self._last_double_click_time = 0 self._last_double_click_time = 0
self._last_double_click_line_number = -1 self._last_double_click_line_number = -1
@@ -213,7 +214,7 @@ class InnerBigText(QWidget):
# triple click: select line # triple click: select line
line_number = self.y_pos_to_line(e.pos().y()) line_number = self.y_pos_to_line(e.pos().y())
if line_number == self._last_double_click_line_number and line_number < len(self.lines): if line_number == self._last_double_click_line_number and line_number < len(self.lines):
line = self.lines[line_number] line: Line = self.lines[line_number]
self.selection_highlight.set_start(line.byte_offset()) self.selection_highlight.set_start(line.byte_offset())
self.selection_highlight.set_end_byte(line.byte_end()) self.selection_highlight.set_end_byte(line.byte_end())
self.update() self.update()
@@ -227,7 +228,7 @@ class InnerBigText(QWidget):
line_number = self.y_pos_to_line(e.pos().y()) line_number = self.y_pos_to_line(e.pos().y())
if line_number < len(self.lines): if line_number < len(self.lines):
line = self.lines[line_number] line: Line = self.lines[line_number]
for listener in self.line_click_listeners: for listener in self.line_click_listeners:
listener(line.byte_offset()) listener(line.byte_offset())
@@ -303,11 +304,11 @@ class InnerBigText(QWidget):
line_number = self.y_pos_to_line(e.pos().y()) line_number = self.y_pos_to_line(e.pos().y())
if line_number < len(self.lines): if line_number < len(self.lines):
line = self.lines[line_number] line: Line = self.lines[line_number]
column_in_line = self.x_pos_to_column(e.pos().x()) + self._left_offset column_in_line = self.x_pos_to_column(e.pos().x()) + self._left_offset
column_in_line = min(column_in_line, line.length()) # x was behind the last column of this line column_in_line = min(column_in_line, line.length_in_columns()) # x was behind the last column of this line
char_in_line = line.column_to_char(column_in_line) char_in_line = line.column_to_char(column_in_line)
# print("%s in line %s lcolumn_in_line=%s" % (char_in_line, line_number, column_in_line)) print("%s in line %s lcolumn_in_line=%s" % (char_in_line, line_number, column_in_line))
byte_in_line = line.char_index_to_byte(char_in_line) byte_in_line = line.char_index_to_byte(char_in_line)
current_byte = line.byte_offset() + byte_in_line current_byte = line.byte_offset() + byte_in_line
# print("%s + %s = %s" % (line.byte_offset(), char_in_line, current_byte)) # print("%s + %s = %s" % (line.byte_offset(), char_in_line, current_byte))
@@ -414,7 +415,7 @@ class InnerBigText(QWidget):
left_offset = int(-1 * self._left_offset * self.char_width) left_offset = int(-1 * self._left_offset * self.char_width)
y_line_offset = self.char_height; y_line_offset = self.char_height;
for l in self.lines: for l in self.lines:
text = l.line() # .replace("\t", tab_string) text = l.line_tabs_replaced()
painter.drawText(left_offset, y_line_offset, text) painter.drawText(left_offset, y_line_offset, text)
y_line_offset = y_line_offset + self.char_height y_line_offset = y_line_offset + self.char_height

View File

@@ -4,7 +4,7 @@
0123456789012345678901234567890123456789 0123456789012345678901234567890123456789
tab indentation: tab indentation:
1 2 3 4 5 1 2 3 4 5
- 1-- 2--- 3---- 4-------5 - 1-- 2-- 3- 4---5
--------1-------1-------1-------1-------1 --------1-------1-------1-------1-------1
mmmmmmmm1mmmmmmm1mmmmmmm1mmmmmmm1mmmmmmm1 mmmmmmmm1mmmmmmm1mmmmmmm1mmmmmmm1mmmmmmm1
1 1 1 1 1 1 1 1 1 1

31
line.py
View File

@@ -16,9 +16,12 @@ class Line:
def line(self) -> str: def line(self) -> str:
return self._line return self._line
def length(self) -> int: def length_in_charaters(self) -> int:
return len(self._line) return len(self._line)
def length_in_columns(self) -> int:
return self.char_to_column(len(self._line))
def char_index_to_byte(self, char_in_line: int) -> int: def char_index_to_byte(self, char_in_line: int) -> int:
return len(self.prefix(char_in_line).encode("utf8")) return len(self.prefix(char_in_line).encode("utf8"))
@@ -27,26 +30,46 @@ class Line:
prefix_chars = prefix_bytes.decode("utf8", errors="ignore") prefix_chars = prefix_bytes.decode("utf8", errors="ignore")
return len(prefix_chars) return len(prefix_chars)
def line_tabs_replaced(self):
line = self._line;
i = 0
offset = 0
result = ""
length = len(line)
while True:
tab_index = line.find("\t", offset)
if tab_index < 0:
break
result = result + line[offset:tab_index]
result = result + " " * (constants.tab_width - len(result) % constants.tab_width)
offset = tab_index + 1
result = result + line[offset:]
return result
def column_to_char(self, column_in_line: int) -> int: def column_to_char(self, column_in_line: int) -> int:
i = 0 i = 0
result = 0 result = 0
while i < column_in_line: while i < column_in_line:
char = self._line[result] char = self._line[result]
if char == "\t": if char == "\t":
i = i + constants.tab_width - 1 # jump the additional 7 columns of the tab width i = i + constants.tab_width - i % constants.tab_width # jump the additional columns to complete the tab
if i >= column_in_line: if i > column_in_line:
break; break;
else:
i = i + 1 i = i + 1
result = result + 1 result = result + 1
return result return result
# todo this method is slow
def char_to_column(self, char_in_line: int) -> int: def char_to_column(self, char_in_line: int) -> int:
result = 0 result = 0
i = 0 i = 0
while i < char_in_line: while i < char_in_line:
if i < len(self._line) and self._line[i] == "\t": if i < len(self._line) and self._line[i] == "\t":
result = result + constants.tab_width result = result + constants.tab_width - result % constants.tab_width
else: else:
result = result + 1 result = result + 1
i = i + 1 i = i + 1

View File

@@ -6,25 +6,56 @@ from line import Line
class MyTestCase(unittest.TestCase): class MyTestCase(unittest.TestCase):
def test_column_to_char(self): def test_column_to_char(self):
byte_offset = 123 byte_offset = 123
text = "\tabc" # will be rendered as: ........abc where . represents a whitespace column text = "\tabc\td\tef\tg" # will be rendered as: ....abc.d...ef..g where . represents a whitespace column
line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text) line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text)
self.assertEqual(0, line.column_to_char(0)) # the tab self.assertEqual(0, line.column_to_char(0)) # the tab
self.assertEqual(0, line.column_to_char(1)) # the tab self.assertEqual(0, line.column_to_char(1)) # the tab
self.assertEqual(0, line.column_to_char(2)) # the tab
self.assertEqual(0, line.column_to_char(3)) # last column of the tab self.assertEqual(0, line.column_to_char(3)) # last column of the tab
self.assertEqual(1, line.column_to_char(4)) # a self.assertEqual(1, line.column_to_char(4)) # a
self.assertEqual(2, line.column_to_char(5)) # b self.assertEqual(2, line.column_to_char(5)) # b
self.assertEqual(3, line.column_to_char(6)) # c self.assertEqual(3, line.column_to_char(6)) # c
self.assertEqual(4, line.column_to_char(7)) # tab
self.assertEqual(5, line.column_to_char(8)) # d
self.assertEqual(6, line.column_to_char(9)) # tab
self.assertEqual(6, line.column_to_char(10)) # tab
self.assertEqual(6, line.column_to_char(11)) # tab
self.assertEqual(7, line.column_to_char(12)) # e
self.assertEqual(8, line.column_to_char(13)) # f
self.assertEqual(9, line.column_to_char(14)) # tab
self.assertEqual(9, line.column_to_char(15)) # tab
self.assertEqual(10, line.column_to_char(16)) # g
def test_char_to_column(self): def test_char_to_column(self):
byte_offset = 123 byte_offset = 123
text = "\tabc" # will be rendered as: ........abc where . represents a whitespace column text = "\tabc\td\tef\tg" # will be rendered as: ....abc.d...ef..g where . represents a whitespace column
line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text) line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text)
self.assertEqual(0, line.char_to_column(0)) self.assertEqual(0, line.char_to_column(0)) # tab
self.assertEqual(4, line.char_to_column(1)) self.assertEqual(4, line.char_to_column(1)) # a
self.assertEqual(5, line.char_to_column(2)) self.assertEqual(5, line.char_to_column(2)) # b
self.assertEqual(6, line.char_to_column(3)) self.assertEqual(6, line.char_to_column(3)) # c
self.assertEqual(7, line.char_to_column(4)) # tab
self.assertEqual(8, line.char_to_column(5)) # d
self.assertEqual(9, line.char_to_column(6)) # tab
self.assertEqual(12, line.char_to_column(7)) # e
self.assertEqual(13, line.char_to_column(8)) # f
self.assertEqual(14, line.char_to_column(9)) # tab
self.assertEqual(16, line.char_to_column(10)) # g
def test_line_tabs_replaced(self):
byte_offset = 123
text = "\ta\tb" # will be rendered as: ....abc where . represents a whitespace column
expected = " a b"
line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text)
self.assertEqual(expected, line.line_tabs_replaced())
def test_line_tabs_replaced_performance(self):
byte_offset = 123
text = "a\t" * 10000
expected = "a " * 10000
line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text)
self.assertEqual(expected, line.line_tabs_replaced())
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()