diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 6f1aa41acaaac..50867a2f2f05c 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2561,7 +2561,7 @@ my_mb_wc_filename(CHARSET_INFO *cs __attribute__((unused)), (byte2= hexlo(byte2)) >= 0) { int byte3= hexlo(s[3]); - int byte4= hexlo(s[3] ? s[4] : 0); + int byte4= hexlo((s[3] && s + 4 < e) ? s[4] : 0); if (byte3 >=0 && byte4 >=0) { *pwc= (byte1 << 12) + (byte2 << 8) + (byte3 << 4) + byte4; diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c index d4bf7a0ccda0e..cfb1d085708fc 100644 --- a/unittest/strings/strings-t.c +++ b/unittest/strings/strings-t.c @@ -1704,13 +1704,30 @@ test_strnncollsp_char() } +/* + my_mb_wc_filename() decodes a 5-byte '@HHHH' escape but only checked that + 4 bytes were available before reading the 4th hex digit at s[4], one byte + past the supplied end pointer. "@000" is such a truncated escape ('0','0' + has no entry in the 3-byte table, so it reaches the hex branch). Passing + end= buf + 4 must not consume the byte at buf[4]. +*/ +static int test_mb_wc_filename_truncated() +{ + CHARSET_INFO *cs= &my_charset_filename; + uchar buf[5]= {'@', '0', '0', '0', '1'}; + my_wc_t wc= 0; + int rc= my_ci_mb_wc(cs, &wc, buf, buf + 4); + return rc != MY_CS_ILSEQ; +} + + int main(int ac, char **av) { size_t i, failed= 0; MY_INIT(av[0]); - plan(4); + plan(5); diag("Testing my_like_range_xxx() functions"); for (i= 0; i < array_elements(charset_list); i++) @@ -1736,6 +1753,10 @@ int main(int ac, char **av) failed= test_strnncollsp_char(); ok(failed == 0, "Testing cs->coll->strnncollsp_char()"); + diag("Testing my_charset_filename mb_wc end-pointer bounds"); + ok(test_mb_wc_filename_truncated() == 0, + "filename decoder does not read past the end pointer"); + my_end(0); return exit_status();