fix: graphemes are not correctly highlighted

Graphemes don't all have the same width, not even when you use a monospace font. For latin characters it usually works find to assume the same width. But emojis, japanese or chinese characters have have different width. There are even some ultra wide characters like 𒐫 or ﷽. There is also a thing called 'half-width' character. E.g. the japanese 'a' can be ア or ｱ. Fixed by actually computing the width of graphemes and using pixel.
2025-03-23 21:00:53 +01:00
parent 21b2da1e69
commit 61132d242f
6 changed files with 181 additions and 86 deletions
--- a/src/ui/bigtext/line.py
+++ b/src/ui/bigtext/line.py
@@ -1,16 +1,22 @@
 import unicodedata

+from PySide6.QtGui import QFontMetrics
+
 import constants


 class Line:
-    def __init__(self, byte_offset: int, byte_end: int, line: str):
+    def __init__(self, byte_offset: int, byte_end: int, line: str, bytes: str):
        self._byte_offset = byte_offset
        self._byte_end = byte_end
        self._line = line
+        self._bytes = bytes

        self._cache_char_to_column()

+    def get_width_in_px(self, font_metric: QFontMetrics):
+        return font_metric.horizontalAdvance(self._line)
+
    def byte_offset(self) -> int:
        return self._byte_offset

@@ -131,11 +137,20 @@ class Line:
    def prefix(self, index: int) -> str:
        return self._line[0:index]

+    def prefix_bytes(self, byte_index: int) -> str:
+        return self._bytes[0:byte_index]
+
    def substr(self, offset: int, length: int) -> str:
        return self._line[offset:offset+length]

+    def substr_bytes(self, byte_offset: int, byte_length: int) -> str:
+        return self._bytes[byte_offset:byte_offset + byte_length]
+
    def suffix(self, index: int) -> str:
        return self._line[index:]

+    def suffix_bytes(self, byte_index: int) -> str:
+        return self._bytes[byte_index:]
+
    def __str__(self):
        return "%s (%d->%d)" % (self._line, self._byte_offset, self._byte_end)