53 lines
1.8 KiB
Python
53 lines
1.8 KiB
Python
"""Helpers for shaping outgoing mesh messages."""
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
def trim_to_bytes(text: str, max_bytes: int) -> str:
|
|
"""Return ``text`` truncated so its UTF-8 encoding is at most ``max_bytes`` bytes.
|
|
|
|
Backs off if the cut lands inside a multi-byte UTF-8 sequence so we never emit
|
|
invalid UTF-8 to the radio.
|
|
"""
|
|
if max_bytes <= 0:
|
|
return ""
|
|
encoded = text.encode("utf-8")
|
|
if len(encoded) <= max_bytes:
|
|
return text
|
|
cut = encoded[:max_bytes]
|
|
# Continuation bytes start with bits 10xxxxxx; rewind past them.
|
|
while cut and (cut[-1] & 0xC0) == 0x80:
|
|
cut = cut[:-1]
|
|
return cut.decode("utf-8", errors="ignore")
|
|
|
|
|
|
def split_to_bytes(text: str, max_bytes: int, max_chunks: int = 2) -> list[str]:
|
|
"""Split ``text`` into up to ``max_chunks`` UTF-8-safe chunks of ``max_bytes`` each.
|
|
|
|
Anything past ``max_chunks * max_bytes`` is dropped. Splits never land inside a
|
|
multi-byte UTF-8 sequence on either side of the boundary.
|
|
"""
|
|
if max_bytes <= 0 or max_chunks <= 0 or not text:
|
|
return []
|
|
encoded = text.encode("utf-8")
|
|
n = len(encoded)
|
|
chunks: list[str] = []
|
|
pos = 0
|
|
for _ in range(max_chunks):
|
|
if pos >= n:
|
|
break
|
|
end = min(pos + max_bytes, n)
|
|
if end < n:
|
|
# Rewind past trailing UTF-8 continuation bytes (10xxxxxx).
|
|
while end > pos and (encoded[end - 1] & 0xC0) == 0x80:
|
|
end -= 1
|
|
# And past a dangling leader byte (11xxxxxx) whose continuations
|
|
# would fall outside the budget.
|
|
if end > pos and (encoded[end - 1] & 0xC0) == 0xC0:
|
|
end -= 1
|
|
if end == pos:
|
|
break
|
|
chunks.append(encoded[pos:end].decode("utf-8"))
|
|
pos = end
|
|
return chunks
|