From ffea831e2dadea8e2c1a2968db97bf2b4f533c98 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Mon, 20 Dec 2021 19:44:46 +0100 Subject: [PATCH] fix the way tabs are used The previous code just assumed a tab was 4 spaces wide. This is not true, because a tab can be between 1 and 4 spaces wide. --- bigtext.py | 17 +++++++++-------- example.log | 2 +- line.py | 33 ++++++++++++++++++++++++++++----- testline.py | 43 +++++++++++++++++++++++++++++++++++++------ 4 files changed, 75 insertions(+), 20 deletions(-) diff --git a/bigtext.py b/bigtext.py index 8faa0e7..29b8c79 100644 --- a/bigtext.py +++ b/bigtext.py @@ -3,7 +3,7 @@ import sys import math import os import time -from typing import Callable +from typing import Callable, List from PyQt6 import QtGui from PyQt6.QtCore import * @@ -17,6 +17,7 @@ from conversion import humanbytes from highlight_selection import HighlightSelection from highlighted_range import HighlightedRange from highlightingdialog import HighlightingDialog +from line import Line from logFileModel import LogFileModel @@ -126,7 +127,7 @@ class InnerBigText(QWidget): self.customContextMenuRequested.connect(self._open_menu) self.update_font_metrics(QPainter(self)) - self.lines = [] + self.lines = List[Line] self.selection_highlight = HighlightSelection() self._last_double_click_time = 0 self._last_double_click_line_number = -1 @@ -213,7 +214,7 @@ class InnerBigText(QWidget): # triple click: select line line_number = self.y_pos_to_line(e.pos().y()) if line_number == self._last_double_click_line_number and line_number < len(self.lines): - line = self.lines[line_number] + line: Line = self.lines[line_number] self.selection_highlight.set_start(line.byte_offset()) self.selection_highlight.set_end_byte(line.byte_end()) self.update() @@ -227,7 +228,7 @@ class InnerBigText(QWidget): line_number = self.y_pos_to_line(e.pos().y()) if line_number < len(self.lines): - line = self.lines[line_number] + line: Line = self.lines[line_number] for listener in self.line_click_listeners: listener(line.byte_offset()) @@ -303,11 +304,11 @@ class InnerBigText(QWidget): line_number = self.y_pos_to_line(e.pos().y()) if line_number < len(self.lines): - line = self.lines[line_number] + line: Line = self.lines[line_number] column_in_line = self.x_pos_to_column(e.pos().x()) + self._left_offset - column_in_line = min(column_in_line, line.length()) # x was behind the last column of this line + column_in_line = min(column_in_line, line.length_in_columns()) # x was behind the last column of this line char_in_line = line.column_to_char(column_in_line) - # print("%s in line %s lcolumn_in_line=%s" % (char_in_line, line_number, column_in_line)) + print("%s in line %s lcolumn_in_line=%s" % (char_in_line, line_number, column_in_line)) byte_in_line = line.char_index_to_byte(char_in_line) current_byte = line.byte_offset() + byte_in_line # print("%s + %s = %s" % (line.byte_offset(), char_in_line, current_byte)) @@ -414,7 +415,7 @@ class InnerBigText(QWidget): left_offset = int(-1 * self._left_offset * self.char_width) y_line_offset = self.char_height; for l in self.lines: - text = l.line() # .replace("\t", tab_string) + text = l.line_tabs_replaced() painter.drawText(left_offset, y_line_offset, text) y_line_offset = y_line_offset + self.char_height diff --git a/example.log b/example.log index 21d9ae4..8499413 100644 --- a/example.log +++ b/example.log @@ -4,7 +4,7 @@ 0123456789012345678901234567890123456789 tab indentation: 1 2 3 4 5 -- 1-- 2--- 3---- 4-------5 +- 1-- 2-- 3- 4---5 --------1-------1-------1-------1-------1 mmmmmmmm1mmmmmmm1mmmmmmm1mmmmmmm1mmmmmmm1 1 1 1 1 1 diff --git a/line.py b/line.py index 542c72b..a738970 100644 --- a/line.py +++ b/line.py @@ -16,9 +16,12 @@ class Line: def line(self) -> str: return self._line - def length(self) -> int: + def length_in_charaters(self) -> int: return len(self._line) + def length_in_columns(self) -> int: + return self.char_to_column(len(self._line)) + def char_index_to_byte(self, char_in_line: int) -> int: return len(self.prefix(char_in_line).encode("utf8")) @@ -27,26 +30,46 @@ class Line: prefix_chars = prefix_bytes.decode("utf8", errors="ignore") return len(prefix_chars) + def line_tabs_replaced(self): + line = self._line; + i = 0 + offset = 0 + result = "" + length = len(line) + while True: + tab_index = line.find("\t", offset) + if tab_index < 0: + break + result = result + line[offset:tab_index] + result = result + " " * (constants.tab_width - len(result) % constants.tab_width) + offset = tab_index + 1 + + result = result + line[offset:] + + return result + def column_to_char(self, column_in_line: int) -> int: i = 0 result = 0 while i < column_in_line: char = self._line[result] if char == "\t": - i = i + constants.tab_width - 1 # jump the additional 7 columns of the tab width - if i >= column_in_line: + i = i + constants.tab_width - i % constants.tab_width # jump the additional columns to complete the tab + if i > column_in_line: break; - i = i + 1 + else: + i = i + 1 result = result + 1 return result + # todo this method is slow def char_to_column(self, char_in_line: int) -> int: result = 0 i = 0 while i < char_in_line: if i < len(self._line) and self._line[i] == "\t": - result = result + constants.tab_width + result = result + constants.tab_width - result % constants.tab_width else: result = result + 1 i = i + 1 diff --git a/testline.py b/testline.py index 2229a2b..917ebda 100644 --- a/testline.py +++ b/testline.py @@ -6,25 +6,56 @@ from line import Line class MyTestCase(unittest.TestCase): def test_column_to_char(self): byte_offset = 123 - text = "\tabc" # will be rendered as: ........abc where . represents a whitespace column + text = "\tabc\td\tef\tg" # will be rendered as: ....abc.d...ef..g where . represents a whitespace column line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text) self.assertEqual(0, line.column_to_char(0)) # the tab self.assertEqual(0, line.column_to_char(1)) # the tab + self.assertEqual(0, line.column_to_char(2)) # the tab self.assertEqual(0, line.column_to_char(3)) # last column of the tab self.assertEqual(1, line.column_to_char(4)) # a self.assertEqual(2, line.column_to_char(5)) # b self.assertEqual(3, line.column_to_char(6)) # c + self.assertEqual(4, line.column_to_char(7)) # tab + self.assertEqual(5, line.column_to_char(8)) # d + self.assertEqual(6, line.column_to_char(9)) # tab + self.assertEqual(6, line.column_to_char(10)) # tab + self.assertEqual(6, line.column_to_char(11)) # tab + self.assertEqual(7, line.column_to_char(12)) # e + self.assertEqual(8, line.column_to_char(13)) # f + self.assertEqual(9, line.column_to_char(14)) # tab + self.assertEqual(9, line.column_to_char(15)) # tab + self.assertEqual(10, line.column_to_char(16)) # g def test_char_to_column(self): byte_offset = 123 - text = "\tabc" # will be rendered as: ........abc where . represents a whitespace column + text = "\tabc\td\tef\tg" # will be rendered as: ....abc.d...ef..g where . represents a whitespace column line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text) - self.assertEqual(0, line.char_to_column(0)) - self.assertEqual(4, line.char_to_column(1)) - self.assertEqual(5, line.char_to_column(2)) - self.assertEqual(6, line.char_to_column(3)) + self.assertEqual(0, line.char_to_column(0)) # tab + self.assertEqual(4, line.char_to_column(1)) # a + self.assertEqual(5, line.char_to_column(2)) # b + self.assertEqual(6, line.char_to_column(3)) # c + self.assertEqual(7, line.char_to_column(4)) # tab + self.assertEqual(8, line.char_to_column(5)) # d + self.assertEqual(9, line.char_to_column(6)) # tab + self.assertEqual(12, line.char_to_column(7)) # e + self.assertEqual(13, line.char_to_column(8)) # f + self.assertEqual(14, line.char_to_column(9)) # tab + self.assertEqual(16, line.char_to_column(10)) # g + def test_line_tabs_replaced(self): + byte_offset = 123 + text = "\ta\tb" # will be rendered as: ....abc where . represents a whitespace column + expected = " a b" + line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text) + self.assertEqual(expected, line.line_tabs_replaced()) + + def test_line_tabs_replaced_performance(self): + byte_offset = 123 + text = "a\t" * 10000 + expected = "a " * 10000 + line = Line(byte_offset=byte_offset, byte_end=byte_offset + len(text.encode("utf8")), line=text) + self.assertEqual(expected, line.line_tabs_replaced()) if __name__ == '__main__': unittest.main()