lorabot/tests/test_messages.py

from lorabot.messages import split_to_bytes, trim_to_bytes


def test_short_ascii_passthrough():
    assert trim_to_bytes("hello", 184) == "hello"


def test_exact_fit_passthrough():
    s = "a" * 184
    assert trim_to_bytes(s, 184) == s


def test_long_ascii_clean_cut():
    s = "x" * 200
    out = trim_to_bytes(s, 184)
    assert len(out.encode("utf-8")) == 184
    assert out == "x" * 184


def test_emoji_does_not_split():
    # Each 🎉 is 4 UTF-8 bytes. Limit of 5 must keep just one emoji (4 bytes), not 5.
    out = trim_to_bytes("🎉🎉", 5)
    assert out == "🎉"
    assert len(out.encode("utf-8")) == 4


def test_multibyte_at_boundary():
    # "ä" is 2 bytes in UTF-8. With a 3-byte budget for "aä" (3 bytes total), we keep both.
    assert trim_to_bytes("aä", 3) == "aä"
    # With a 2-byte budget we can only keep the leading "a".
    assert trim_to_bytes("aä", 2) == "a"


def test_zero_or_negative_max_bytes():
    assert trim_to_bytes("anything", 0) == ""
    assert trim_to_bytes("anything", -1) == ""


def test_empty_input():
    assert trim_to_bytes("", 184) == ""


# split_to_bytes


def test_split_short_input_single_chunk():
    assert split_to_bytes("hello", 184) == ["hello"]


def test_split_long_input_two_chunks_drops_rest():
    s = "x" * 500
    chunks = split_to_bytes(s, 180, max_chunks=2)
    assert chunks == ["x" * 180, "x" * 180]
    assert sum(len(c.encode("utf-8")) for c in chunks) == 360


def test_split_exact_two_chunks_no_third():
    s = "x" * 360
    chunks = split_to_bytes(s, 180, max_chunks=2)
    assert chunks == ["x" * 180, "x" * 180]


def test_split_does_not_break_multibyte():
    # 4 emoji × 4 bytes = 16 bytes total. Budget 5 bytes/chunk → 1 emoji per chunk.
    chunks = split_to_bytes("🎉🎉🎉🎉", 5, max_chunks=2)
    assert chunks == ["🎉", "🎉"]
    for c in chunks:
        assert len(c.encode("utf-8")) == 4


def test_split_two_byte_char_at_boundary():
    # "abäcd" → bytes: a b ä(2) c d  = 6 bytes. Budget 3/chunk:
    # chunk1 must end at "ab" (3rd byte is start of ä, can't include without continuation).
    # chunk2: "äc"  = 3 bytes.
    chunks = split_to_bytes("abäcd", 3, max_chunks=2)
    assert chunks[0] == "ab"
    assert chunks[1] == "äc"
    # "d" is dropped (over the budget).


def test_split_empty_input():
    assert split_to_bytes("", 184) == []


def test_split_zero_max_bytes():
    assert split_to_bytes("hi", 0) == []


def test_split_zero_chunks():
    assert split_to_bytes("hi", 184, max_chunks=0) == []


def test_split_concat_is_prefix_of_input():
    # The delivered text must always be a prefix of the original (no rearrangement).
    src = "Hello world! 🎉 This is a longer message that should be split."
    chunks = split_to_bytes(src, 20, max_chunks=2)
    delivered = "".join(chunks)
    assert src.startswith(delivered)