from lorabot.messages import split_to_bytes, trim_to_bytes def test_short_ascii_passthrough(): assert trim_to_bytes("hello", 184) == "hello" def test_exact_fit_passthrough(): s = "a" * 184 assert trim_to_bytes(s, 184) == s def test_long_ascii_clean_cut(): s = "x" * 200 out = trim_to_bytes(s, 184) assert len(out.encode("utf-8")) == 184 assert out == "x" * 184 def test_emoji_does_not_split(): # Each 🎉 is 4 UTF-8 bytes. Limit of 5 must keep just one emoji (4 bytes), not 5. out = trim_to_bytes("🎉🎉", 5) assert out == "🎉" assert len(out.encode("utf-8")) == 4 def test_multibyte_at_boundary(): # "ä" is 2 bytes in UTF-8. With a 3-byte budget for "aä" (3 bytes total), we keep both. assert trim_to_bytes("aä", 3) == "aä" # With a 2-byte budget we can only keep the leading "a". assert trim_to_bytes("aä", 2) == "a" def test_zero_or_negative_max_bytes(): assert trim_to_bytes("anything", 0) == "" assert trim_to_bytes("anything", -1) == "" def test_empty_input(): assert trim_to_bytes("", 184) == "" # split_to_bytes def test_split_short_input_single_chunk(): assert split_to_bytes("hello", 184) == ["hello"] def test_split_long_input_two_chunks_drops_rest(): s = "x" * 500 chunks = split_to_bytes(s, 180, max_chunks=2) assert chunks == ["x" * 180, "x" * 180] assert sum(len(c.encode("utf-8")) for c in chunks) == 360 def test_split_exact_two_chunks_no_third(): s = "x" * 360 chunks = split_to_bytes(s, 180, max_chunks=2) assert chunks == ["x" * 180, "x" * 180] def test_split_does_not_break_multibyte(): # 4 emoji × 4 bytes = 16 bytes total. Budget 5 bytes/chunk → 1 emoji per chunk. chunks = split_to_bytes("🎉🎉🎉🎉", 5, max_chunks=2) assert chunks == ["🎉", "🎉"] for c in chunks: assert len(c.encode("utf-8")) == 4 def test_split_two_byte_char_at_boundary(): # "abäcd" → bytes: a b ä(2) c d = 6 bytes. Budget 3/chunk: # chunk1 must end at "ab" (3rd byte is start of ä, can't include without continuation). # chunk2: "äc" = 3 bytes. chunks = split_to_bytes("abäcd", 3, max_chunks=2) assert chunks[0] == "ab" assert chunks[1] == "äc" # "d" is dropped (over the budget). def test_split_empty_input(): assert split_to_bytes("", 184) == [] def test_split_zero_max_bytes(): assert split_to_bytes("hi", 0) == [] def test_split_zero_chunks(): assert split_to_bytes("hi", 184, max_chunks=0) == [] def test_split_concat_is_prefix_of_input(): # The delivered text must always be a prefix of the original (no rearrangement). src = "Hello world! 🎉 This is a longer message that should be split." chunks = split_to_bytes(src, 20, max_chunks=2) delivered = "".join(chunks) assert src.startswith(delivered)