Files
lorabot/src/lorabot/messages.py
T
2026-05-04 20:52:51 +02:00

53 lines
1.8 KiB
Python

"""Helpers for shaping outgoing mesh messages."""
from __future__ import annotations
def trim_to_bytes(text: str, max_bytes: int) -> str:
"""Return ``text`` truncated so its UTF-8 encoding is at most ``max_bytes`` bytes.
Backs off if the cut lands inside a multi-byte UTF-8 sequence so we never emit
invalid UTF-8 to the radio.
"""
if max_bytes <= 0:
return ""
encoded = text.encode("utf-8")
if len(encoded) <= max_bytes:
return text
cut = encoded[:max_bytes]
# Continuation bytes start with bits 10xxxxxx; rewind past them.
while cut and (cut[-1] & 0xC0) == 0x80:
cut = cut[:-1]
return cut.decode("utf-8", errors="ignore")
def split_to_bytes(text: str, max_bytes: int, max_chunks: int = 2) -> list[str]:
"""Split ``text`` into up to ``max_chunks`` UTF-8-safe chunks of ``max_bytes`` each.
Anything past ``max_chunks * max_bytes`` is dropped. Splits never land inside a
multi-byte UTF-8 sequence on either side of the boundary.
"""
if max_bytes <= 0 or max_chunks <= 0 or not text:
return []
encoded = text.encode("utf-8")
n = len(encoded)
chunks: list[str] = []
pos = 0
for _ in range(max_chunks):
if pos >= n:
break
end = min(pos + max_bytes, n)
if end < n:
# Rewind past trailing UTF-8 continuation bytes (10xxxxxx).
while end > pos and (encoded[end - 1] & 0xC0) == 0x80:
end -= 1
# And past a dangling leader byte (11xxxxxx) whose continuations
# would fall outside the budget.
if end > pos and (encoded[end - 1] & 0xC0) == 0xC0:
end -= 1
if end == pos:
break
chunks.append(encoded[pos:end].decode("utf-8"))
pos = end
return chunks