Skip to content

Commit 88b57ef

Browse files
authored
Merge pull request #3785 from stevenjohnstone/bug/3784
Avoid out-of-bounds reads
2 parents c4c0c81 + 3fe8625 commit 88b57ef

File tree

1 file changed

+58
-26
lines changed

1 file changed

+58
-26
lines changed

src/encoding.c

Lines changed: 58 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2377,6 +2377,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
23772377
*/
23782378
size_t
23792379
pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2380+
if (n == 0) {
2381+
return 0;
2382+
}
2383+
23802384
if (*b < 0x80) {
23812385
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
23822386
}
@@ -2397,6 +2401,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
23972401
*/
23982402
size_t
23992403
pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2404+
if (n == 0) {
2405+
return 0;
2406+
}
2407+
24002408
if (*b < 0x80) {
24012409
return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
24022410
}
@@ -2417,6 +2425,10 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
24172425
*/
24182426
bool
24192427
pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2428+
if (n == 0) {
2429+
return 0;
2430+
}
2431+
24202432
if (*b < 0x80) {
24212433
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
24222434
}
@@ -2435,7 +2447,8 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
24352447

24362448
static pm_unicode_codepoint_t
24372449
pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2438-
if (b[0] < 0x80) {
2450+
2451+
if ((n > 0) && (b[0] < 0x80)) {
24392452
*width = 1;
24402453
return (pm_unicode_codepoint_t) b[0];
24412454
}
@@ -2474,13 +2487,21 @@ pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
24742487

24752488
static size_t
24762489
pm_encoding_cesu_8_char_width(const uint8_t *b, ptrdiff_t n) {
2490+
if (n == 0) {
2491+
return 0;
2492+
}
2493+
24772494
size_t width;
24782495
pm_cesu_8_codepoint(b, n, &width);
24792496
return width;
24802497
}
24812498

24822499
static size_t
24832500
pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2501+
if (n == 0) {
2502+
return 0;
2503+
}
2504+
24842505
if (*b < 0x80) {
24852506
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
24862507
}
@@ -2497,6 +2518,10 @@ pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
24972518

24982519
static size_t
24992520
pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2521+
if (n == 0) {
2522+
return 0;
2523+
}
2524+
25002525
if (*b < 0x80) {
25012526
return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
25022527
}
@@ -2513,6 +2538,10 @@ pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
25132538

25142539
static bool
25152540
pm_encoding_cesu_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2541+
if (n == 0) {
2542+
return 0;
2543+
}
2544+
25162545
if (*b < 0x80) {
25172546
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
25182547
}
@@ -3928,14 +3957,14 @@ static const uint8_t pm_encoding_windows_874_table[256] = {
39283957
};
39293958

39303959
#define PRISM_ENCODING_TABLE(name) \
3931-
static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
3932-
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
3960+
static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, ptrdiff_t n) { \
3961+
return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT)); \
39333962
} \
3934-
static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
3935-
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
3963+
static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, ptrdiff_t n) { \
3964+
return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0; \
39363965
} \
3937-
static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
3938-
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
3966+
static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, ptrdiff_t n) { \
3967+
return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT)); \
39393968
}
39403969

39413970
PRISM_ENCODING_TABLE(cp850)
@@ -4004,17 +4033,17 @@ PRISM_ENCODING_TABLE(windows_874)
40044033
* means that if the top bit is not set, the character is 1 byte long.
40054034
*/
40064035
static size_t
4007-
pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
4008-
return *b < 0x80 ? 1 : 0;
4036+
pm_encoding_ascii_char_width(const uint8_t *b, ptrdiff_t n) {
4037+
return ((n > 0) && (*b < 0x80)) ? 1 : 0;
40094038
}
40104039

40114040
/**
40124041
* Return the size of the next character in the ASCII encoding if it is an
40134042
* alphabetical character.
40144043
*/
40154044
static size_t
4016-
pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
4017-
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);
4045+
pm_encoding_ascii_alpha_char(const uint8_t *b, ptrdiff_t n) {
4046+
return (n > 0) ? (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) : 0;
40184047
}
40194048

40204049
/**
@@ -4024,16 +4053,16 @@ pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
40244053
*/
40254054
static size_t
40264055
pm_encoding_ascii_alpha_char_7bit(const uint8_t *b, ptrdiff_t n) {
4027-
return (*b < 0x80) ? pm_encoding_ascii_alpha_char(b, n) : 0;
4056+
return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alpha_char(b, n) : 0;
40284057
}
40294058

40304059
/**
40314060
* Return the size of the next character in the ASCII encoding if it is an
40324061
* alphanumeric character.
40334062
*/
40344063
static size_t
4035-
pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
4036-
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
4064+
pm_encoding_ascii_alnum_char(const uint8_t *b, ptrdiff_t n) {
4065+
return ((n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
40374066
}
40384067

40394068
/**
@@ -4043,16 +4072,16 @@ pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
40434072
*/
40444073
static size_t
40454074
pm_encoding_ascii_alnum_char_7bit(const uint8_t *b, ptrdiff_t n) {
4046-
return (*b < 0x80) ? pm_encoding_ascii_alnum_char(b, n) : 0;
4075+
return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alnum_char(b, n) : 0;
40474076
}
40484077

40494078
/**
40504079
* Return true if the next character in the ASCII encoding if it is an uppercase
40514080
* character.
40524081
*/
40534082
static bool
4054-
pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
4055-
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
4083+
pm_encoding_ascii_isupper_char(const uint8_t *b, ptrdiff_t n) {
4084+
return (n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
40564085
}
40574086

40584087
/**
@@ -4071,7 +4100,7 @@ pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATT
40714100
static size_t
40724101
pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
40734102
// These are the single byte characters.
4074-
if (*b < 0x80) {
4103+
if ((n > 0) && (*b < 0x80)) {
40754104
return 1;
40764105
}
40774106

@@ -4115,6 +4144,9 @@ pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
41154144
*/
41164145
static size_t
41174146
pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
4147+
if (n == 0) {
4148+
return 0;
4149+
}
41184150
// These are the single byte characters.
41194151
if (b[0] < 0x80 || (b[0] >= 0xA1 && b[0] <= 0xDF)) {
41204152
return 1;
@@ -4178,7 +4210,7 @@ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
41784210
*/
41794211
static bool
41804212
pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
4181-
return (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
4213+
return (n > 0) && (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
41824214
}
41834215

41844216
/**
@@ -4188,7 +4220,7 @@ pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
41884220
static size_t
41894221
pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
41904222
// These are the single byte characters.
4191-
if (*b < 0x80) {
4223+
if ((n > 0) && (*b < 0x80)) {
41924224
return 1;
41934225
}
41944226

@@ -4207,7 +4239,7 @@ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
42074239
static size_t
42084240
pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
42094241
// These are the single byte characters
4210-
if (*b <= 0x80) {
4242+
if ((n > 0) && (*b <= 0x80)) {
42114243
return 1;
42124244
}
42134245

@@ -4226,7 +4258,7 @@ pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
42264258
static size_t
42274259
pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
42284260
// These are the 1 byte characters.
4229-
if (*b < 0x80) {
4261+
if ((n > 0) && (*b < 0x80)) {
42304262
return 1;
42314263
}
42324264

@@ -4269,7 +4301,7 @@ pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
42694301
static size_t
42704302
pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
42714303
// These are the single byte characters.
4272-
if (*b < 0x80) {
4304+
if ((n > 0) && (*b < 0x80)) {
42734305
return 1;
42744306
}
42754307

@@ -4288,7 +4320,7 @@ pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
42884320
static size_t
42894321
pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
42904322
// These are the single byte characters.
4291-
if (*b < 0x80) {
4323+
if ((n > 0) && (*b < 0x80)) {
42924324
return 1;
42934325
}
42944326

@@ -4312,7 +4344,7 @@ pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
43124344
static size_t
43134345
pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
43144346
// These are the 1 byte characters.
4315-
if (*b < 0x80) {
4347+
if ((n > 0) && (*b < 0x80)) {
43164348
return 1;
43174349
}
43184350

@@ -4336,7 +4368,7 @@ pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
43364368
static size_t
43374369
pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
43384370
// These are the single byte characters.
4339-
if (*b <= 0x80) {
4371+
if ((n > 0) && (*b <= 0x80)) {
43404372
return 1;
43414373
}
43424374

0 commit comments

Comments
 (0)