fix: graphemes are not correctly highlighted

Graphemes don't all have the same width, not even when you use a monospace font.
For latin characters it usually works find to assume the same width. But emojis,
japanese or chinese characters have have different width. There are even some
ultra wide characters like 𒐫 or ﷽. There is also a thing
called 'half-width' character. E.g. the japanese 'a' can be ア or ア.

Fixed by actually computing the width of graphemes and using pixel.
This commit is contained in:
2025-03-23 21:00:53 +01:00
parent 21b2da1e69
commit 61132d242f
6 changed files with 181 additions and 86 deletions

View File

@@ -1,16 +1,22 @@
import unicodedata
from PySide6.QtGui import QFontMetrics
import constants
class Line:
def __init__(self, byte_offset: int, byte_end: int, line: str):
def __init__(self, byte_offset: int, byte_end: int, line: str, bytes: str):
self._byte_offset = byte_offset
self._byte_end = byte_end
self._line = line
self._bytes = bytes
self._cache_char_to_column()
def get_width_in_px(self, font_metric: QFontMetrics):
return font_metric.horizontalAdvance(self._line)
def byte_offset(self) -> int:
return self._byte_offset
@@ -131,11 +137,20 @@ class Line:
def prefix(self, index: int) -> str:
return self._line[0:index]
def prefix_bytes(self, byte_index: int) -> str:
return self._bytes[0:byte_index]
def substr(self, offset: int, length: int) -> str:
return self._line[offset:offset+length]
def substr_bytes(self, byte_offset: int, byte_length: int) -> str:
return self._bytes[byte_offset:byte_offset + byte_length]
def suffix(self, index: int) -> str:
return self._line[index:]
def suffix_bytes(self, byte_index: int) -> str:
return self._bytes[byte_index:]
def __str__(self):
return "%s (%d->%d)" % (self._line, self._byte_offset, self._byte_end)