@@ -915,15 +915,24 @@ def test_utf8_2byte_sequence_complete_coverage():
915915
916916 result_1 = one_byte_max .decode ("utf-8" )
917917 result_2 = two_byte_min .decode ("utf-8" )
918- print (f" 1-byte max: { one_byte_max .hex ()} -> U+007F: { repr (result_1 )} " )
919- print (f" 2-byte min: { two_byte_min .hex ()} -> U+0080: { repr (result_2 )} " )
918+ try :
919+ print (f" 1-byte max: { one_byte_max .hex ()} -> U+007F: { repr (result_1 )} " )
920+ except UnicodeEncodeError :
921+ print (f" 1-byte max: { one_byte_max .hex ()} -> U+007F: <result>" )
922+ try :
923+ print (f" 2-byte min: { two_byte_min .hex ()} -> U+0080: { repr (result_2 )} " )
924+ except UnicodeEncodeError :
925+ print (f" 2-byte min: { two_byte_min .hex ()} -> U+0080: <result>" )
920926 assert ord (result_1 ) == 0x7F
921927 assert ord (result_2 ) == 0x80
922928
923929 # Boundary between 2-byte and 3-byte (0x7FF vs 0x800)
924930 two_byte_max = b"\xdf \xbf " # U+07FF - last 2-byte character
925931 result_3 = two_byte_max .decode ("utf-8" )
926- print (f" 2-byte max: { two_byte_max .hex ()} -> U+07FF: { repr (result_3 )} " )
932+ try :
933+ print (f" 2-byte max: { two_byte_max .hex ()} -> U+07FF: { repr (result_3 )} " )
934+ except UnicodeEncodeError :
935+ print (f" 2-byte max: { two_byte_max .hex ()} -> U+07FF: <result>" )
927936 assert ord (result_3 ) == 0x7FF
928937
929938 try :
@@ -962,7 +971,6 @@ def test_utf8_2byte_sequence_complete_coverage():
962971 assert True , "Complete 2-byte sequence coverage validated"
963972
964973
965- @pytest .mark .skip (reason = "Skipping UTF-8 3-byte sequence test" )
966974def test_utf8_3byte_sequence_complete_coverage ():
967975 """
968976 Comprehensive test for 3-byte UTF-8 sequence handling in ddbc_bindings.h lines 490-506.
@@ -1061,7 +1069,10 @@ def test_utf8_3byte_sequence_complete_coverage():
10611069 for test_bytes , expected_char , codepoint , desc in valid_3byte :
10621070 # Test decoding
10631071 result = test_bytes .decode ("utf-8" )
1064- print (f" { test_bytes .hex ()} : U+{ codepoint :04X} -> { repr (result )} ({ desc } )" )
1072+ try :
1073+ print (f" { test_bytes .hex ()} : U+{ codepoint :04X} -> { repr (result )} ({ desc } )" )
1074+ except UnicodeEncodeError :
1075+ print (f" { test_bytes .hex ()} : U+{ codepoint :04X} -> <result> ({ desc } )" )
10651076 assert result == expected_char , f"Should decode to { expected_char !r} "
10661077 assert "\ufffd " not in result , f"Should NOT contain U+FFFD for valid sequence"
10671078
@@ -1152,8 +1163,14 @@ def test_utf8_3byte_sequence_complete_coverage():
11521163
11531164 result_2 = two_byte_max .decode ("utf-8" )
11541165 result_3 = three_byte_min .decode ("utf-8" )
1155- print (f" 2-byte max: { two_byte_max .hex ()} -> U+07FF: { repr (result_2 )} " )
1156- print (f" 3-byte min: { three_byte_min .hex ()} -> U+0800: { repr (result_3 )} " )
1166+ try :
1167+ print (f" 2-byte max: { two_byte_max .hex ()} -> U+07FF: { repr (result_2 )} " )
1168+ except UnicodeEncodeError :
1169+ print (f" 2-byte max: { two_byte_max .hex ()} -> U+07FF: <result>" )
1170+ try :
1171+ print (f" 3-byte min: { three_byte_min .hex ()} -> U+0800: { repr (result_3 )} " )
1172+ except UnicodeEncodeError :
1173+ print (f" 3-byte min: { three_byte_min .hex ()} -> U+0800: <result>" )
11571174 assert ord (result_2 ) == 0x7FF
11581175 assert ord (result_3 ) == 0x800
11591176
@@ -1163,15 +1180,24 @@ def test_utf8_3byte_sequence_complete_coverage():
11631180
11641181 result_before = before_surrogate .decode ("utf-8" )
11651182 result_after = after_surrogate .decode ("utf-8" )
1166- print (f" Before surrogates: { before_surrogate .hex ()} -> U+D7FF: { repr (result_before )} " )
1167- print (f" After surrogates: { after_surrogate .hex ()} -> U+E000: { repr (result_after )} " )
1183+ try :
1184+ print (f" Before surrogates: { before_surrogate .hex ()} -> U+D7FF: { repr (result_before )} " )
1185+ except UnicodeEncodeError :
1186+ print (f" Before surrogates: { before_surrogate .hex ()} -> U+D7FF: <result>" )
1187+ try :
1188+ print (f" After surrogates: { after_surrogate .hex ()} -> U+E000: { repr (result_after )} " )
1189+ except UnicodeEncodeError :
1190+ print (f" After surrogates: { after_surrogate .hex ()} -> U+E000: <result>" )
11681191 assert ord (result_before ) == 0xD7FF
11691192 assert ord (result_after ) == 0xE000
11701193
11711194 # Maximum 3-byte
11721195 three_byte_max = b"\xef \xbf \xbf " # U+FFFF - last 3-byte
11731196 result_max = three_byte_max .decode ("utf-8" )
1174- print (f" 3-byte max: { three_byte_max .hex ()} -> U+FFFF: { repr (result_max )} " )
1197+ try :
1198+ print (f" 3-byte max: { three_byte_max .hex ()} -> U+FFFF: { repr (result_max )} " )
1199+ except UnicodeEncodeError :
1200+ print (f" 3-byte max: { three_byte_max .hex ()} -> U+FFFF: <result>" )
11751201 assert ord (result_max ) == 0xFFFF
11761202
11771203 try :
@@ -1216,7 +1242,6 @@ def test_utf8_3byte_sequence_complete_coverage():
12161242 assert True , "Complete 3-byte sequence coverage validated"
12171243
12181244
1219- @pytest .mark .skip (reason = "Skipping UTF-8 4-byte sequence test" )
12201245def test_utf8_4byte_sequence_complete_coverage ():
12211246 """
12221247 Comprehensive test for 4-byte UTF-8 sequence handling in ddbc_bindings.h lines 508-530.
@@ -1338,7 +1363,10 @@ def test_utf8_4byte_sequence_complete_coverage():
13381363 for test_bytes , expected_char , codepoint , desc in valid_4byte :
13391364 # Test decoding
13401365 result = test_bytes .decode ("utf-8" )
1341- print (f" { test_bytes .hex ()} : U+{ codepoint :06X} -> { repr (result )} ({ desc } )" )
1366+ try :
1367+ print (f" { test_bytes .hex ()} : U+{ codepoint :06X} -> { repr (result )} ({ desc } )" )
1368+ except UnicodeEncodeError :
1369+ print (f" { test_bytes .hex ()} : U+{ codepoint :06X} -> <result> ({ desc } )" )
13421370 assert result == expected_char , f"Should decode to { expected_char !r} "
13431371 assert "\ufffd " not in result , f"Should NOT contain U+FFFD for valid sequence"
13441372
@@ -1445,8 +1473,14 @@ def test_utf8_4byte_sequence_complete_coverage():
14451473
14461474 result_3 = three_byte_max .decode ("utf-8" )
14471475 result_4 = four_byte_min .decode ("utf-8" )
1448- print (f" 3-byte max: { three_byte_max .hex ()} -> U+FFFF: { repr (result_3 )} " )
1449- print (f" 4-byte min: { four_byte_min .hex ()} -> U+10000: { repr (result_4 )} " )
1476+ try :
1477+ print (f" 3-byte max: { three_byte_max .hex ()} -> U+FFFF: { repr (result_3 )} " )
1478+ except UnicodeEncodeError :
1479+ print (f" 3-byte max: { three_byte_max .hex ()} -> U+FFFF: <result>" )
1480+ try :
1481+ print (f" 4-byte min: { four_byte_min .hex ()} -> U+10000: { repr (result_4 )} " )
1482+ except UnicodeEncodeError :
1483+ print (f" 4-byte min: { four_byte_min .hex ()} -> U+10000: <result>" )
14501484 assert ord (result_3 ) == 0xFFFF
14511485 assert ord (result_4 ) == 0x10000
14521486
@@ -1456,8 +1490,14 @@ def test_utf8_4byte_sequence_complete_coverage():
14561490
14571491 result_max = max_unicode .decode ("utf-8" )
14581492 result_beyond = beyond_max .decode ("utf-8" , errors = "replace" )
1459- print (f" Max Unicode: { max_unicode .hex ()} -> U+10FFFF: { repr (result_max )} " )
1460- print (f" Beyond max: { beyond_max .hex ()} -> Invalid: { repr (result_beyond )} " )
1493+ try :
1494+ print (f" Max Unicode: { max_unicode .hex ()} -> U+10FFFF: { repr (result_max )} " )
1495+ except UnicodeEncodeError :
1496+ print (f" Max Unicode: { max_unicode .hex ()} -> U+10FFFF: <result>" )
1497+ try :
1498+ print (f" Beyond max: { beyond_max .hex ()} -> Invalid: { repr (result_beyond )} " )
1499+ except UnicodeEncodeError :
1500+ print (f" Beyond max: { beyond_max .hex ()} -> Invalid: <result>" )
14611501 assert ord (result_max ) == 0x10FFFF
14621502 # Beyond max may be handled differently on different platforms
14631503 assert len (result_beyond ) > 0 , "Should produce some output for beyond-max sequence"
0 commit comments