diff -r fd801ec0737c -r 172fc01ce997 components/vim/vim72-patches/7.2.312 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/components/vim/vim72-patches/7.2.312 Thu Apr 07 16:25:07 2011 -0700 @@ -0,0 +1,312 @@ +To: vim-dev@vim.org +Subject: Patch 7.2.312 +Fcc: outbox +From: Bram Moolenaar +Mime-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +------------ + +Patch 7.2.312 +Problem: iconv() returns an invalid character sequence when conversion + fails. It should return an empty string. (Yongwei Wu) +Solution: Be more strict about invalid characters in the input. +Files: src/mbyte.c + + +*** ../vim-7.2.311/src/mbyte.c 2009-06-16 15:23:07.000000000 +0200 +--- src/mbyte.c 2009-11-25 16:10:44.000000000 +0100 +*************** +*** 133,154 **** + static int dbcs_ptr2cells_len __ARGS((char_u *p, int size)); + static int dbcs_ptr2char __ARGS((char_u *p)); + +! /* Lookup table to quickly get the length in bytes of a UTF-8 character from +! * the first byte of a UTF-8 string. Bytes which are illegal when used as the +! * first byte have a one, because these will be used separately. */ + static char utf8len_tab[256] = + { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/ +! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/ + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1, + }; + + /* + * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks + * in the "xim.log" file. + */ +--- 133,172 ---- + static int dbcs_ptr2cells_len __ARGS((char_u *p, int size)); + static int dbcs_ptr2char __ARGS((char_u *p)); + +! /* +! * Lookup table to quickly get the length in bytes of a UTF-8 character from +! * the first byte of a UTF-8 string. +! * Bytes which are illegal when used as the first byte have a 1. +! * The NUL byte has length 1. +! */ + static char utf8len_tab[256] = + { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1, + }; + + /* ++ * Like utf8len_tab above, but using a zero for illegal lead bytes. ++ */ ++ static char utf8len_tab_zero[256] = ++ { ++ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ++ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ++ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ++ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ++ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ++ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ++ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, ++ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0, ++ }; ++ ++ /* + * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks + * in the "xim.log" file. + */ +*************** +*** 1352,1358 **** + if (size > 0 && *p >= 0x80) + { + if (utf_ptr2len_len(p, size) < utf8len_tab[*p]) +! return 1; + c = utf_ptr2char(p); + /* An illegal byte is displayed as . */ + if (utf_ptr2len(p) == 1 || c == NUL) +--- 1370,1376 ---- + if (size > 0 && *p >= 0x80) + { + if (utf_ptr2len_len(p, size) < utf8len_tab[*p]) +! return 1; /* truncated */ + c = utf_ptr2char(p); + /* An illegal byte is displayed as . */ + if (utf_ptr2len(p) == 1 || c == NUL) +*************** +*** 1473,1479 **** + if (p[0] < 0x80) /* be quick for ASCII */ + return p[0]; + +! len = utf8len_tab[p[0]]; + if (len > 1 && (p[1] & 0xc0) == 0x80) + { + if (len == 2) +--- 1491,1497 ---- + if (p[0] < 0x80) /* be quick for ASCII */ + return p[0]; + +! len = utf8len_tab_zero[p[0]]; + if (len > 1 && (p[1] & 0xc0) == 0x80) + { + if (len == 2) +*************** +*** 1723,1728 **** +--- 1741,1747 ---- + /* + * Return length of UTF-8 character, obtained from the first byte. + * "b" must be between 0 and 255! ++ * Returns 1 for an invalid first byte value. + */ + int + utf_byte2len(b) +*************** +*** 1737,1742 **** +--- 1756,1762 ---- + * Returns 1 for "". + * Returns 1 for an illegal byte sequence (also in incomplete byte seq.). + * Returns number > "size" for an incomplete byte sequence. ++ * Never returns zero. + */ + int + utf_ptr2len_len(p, size) +*************** +*** 1747,1757 **** + int i; + int m; + +! if (*p == NUL) +! return 1; +! m = len = utf8len_tab[*p]; + if (len > size) + m = size; /* incomplete byte sequence. */ + for (i = 1; i < m; ++i) + if ((p[i] & 0xc0) != 0x80) + return 1; +--- 1767,1779 ---- + int i; + int m; + +! len = utf8len_tab[*p]; +! if (len == 1) +! return 1; /* NUL, ascii or illegal lead byte */ + if (len > size) + m = size; /* incomplete byte sequence. */ ++ else ++ m = len; + for (i = 1; i < m; ++i) + if ((p[i] & 0xc0) != 0x80) + return 1; +*************** +*** 2505,2510 **** +--- 2527,2533 ---- + /* + * mb_head_off() function pointer. + * Return offset from "p" to the first byte of the character it points into. ++ * If "p" points to the NUL at the end of the string return 0. + * Returns 0 when already at the first byte of a character. + */ + int +*************** +*** 2524,2530 **** + + /* It can't be a trailing byte when not using DBCS, at the start of the + * string or the previous byte can't start a double-byte. */ +! if (p <= base || MB_BYTE2LEN(p[-1]) == 1) + return 0; + + /* This is slow: need to start at the base and go forward until the +--- 2547,2553 ---- + + /* It can't be a trailing byte when not using DBCS, at the start of the + * string or the previous byte can't start a double-byte. */ +! if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL) + return 0; + + /* This is slow: need to start at the base and go forward until the +*************** +*** 2552,2558 **** + * lead byte in the current cell. */ + if (p <= base + || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e) +! || MB_BYTE2LEN(p[-1]) == 1) + return 0; + + /* This is slow: need to start at the base and go forward until the +--- 2575,2582 ---- + * lead byte in the current cell. */ + if (p <= base + || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e) +! || MB_BYTE2LEN(p[-1]) == 1 +! || *p == NUL) + return 0; + + /* This is slow: need to start at the base and go forward until the +*************** +*** 2578,2583 **** +--- 2602,2608 ---- + char_u *q; + char_u *s; + int c; ++ int len; + #ifdef FEAT_ARABIC + char_u *j; + #endif +*************** +*** 2597,2604 **** + --q; + /* Check for illegal sequence. Do allow an illegal byte after where we + * started. */ +! if (utf8len_tab[*q] != (int)(s - q + 1) +! && utf8len_tab[*q] != (int)(p - q + 1)) + return 0; + + if (q <= base) +--- 2622,2629 ---- + --q; + /* Check for illegal sequence. Do allow an illegal byte after where we + * started. */ +! len = utf8len_tab[*q]; +! if (len != (int)(s - q + 1) && len != (int)(p - q + 1)) + return 0; + + if (q <= base) +*************** +*** 2810,2818 **** + + while (end == NULL ? *p != NUL : p < end) + { +! if ((*p & 0xc0) == 0x80) + return FALSE; /* invalid lead byte */ +- l = utf8len_tab[*p]; + if (end != NULL && p + l > end) + return FALSE; /* incomplete byte sequence */ + ++p; +--- 2835,2843 ---- + + while (end == NULL ? *p != NUL : p < end) + { +! l = utf8len_tab_zero[*p]; +! if (l == 0) + return FALSE; /* invalid lead byte */ + if (end != NULL && p + l > end) + return FALSE; /* incomplete byte sequence */ + ++p; +*************** +*** 6117,6128 **** + d = retval; + for (i = 0; i < len; ++i) + { +! l = utf_ptr2len(ptr + i); + if (l == 0) + *d++ = NUL; + else if (l == 1) + { +! if (unconvlenp != NULL && utf8len_tab[ptr[i]] > len - i) + { + /* Incomplete sequence at the end. */ + *unconvlenp = len - i; +--- 6142,6161 ---- + d = retval; + for (i = 0; i < len; ++i) + { +! l = utf_ptr2len_len(ptr + i, len - i); + if (l == 0) + *d++ = NUL; + else if (l == 1) + { +! int l_w = utf8len_tab_zero[ptr[i]]; +! +! if (l_w == 0) +! { +! /* Illegal utf-8 byte cannot be converted */ +! vim_free(retval); +! return NULL; +! } +! if (unconvlenp != NULL && l_w > len - i) + { + /* Incomplete sequence at the end. */ + *unconvlenp = len - i; +*** ../vim-7.2.311/src/version.c 2009-12-02 13:32:10.000000000 +0100 +--- src/version.c 2009-12-02 15:00:23.000000000 +0100 +*************** +*** 683,684 **** +--- 683,686 ---- + { /* Add new patch number below this line */ ++ /**/ ++ 312, + /**/ + +-- +hundred-and-one symptoms of being an internet addict: +6. You refuse to go to a vacation spot with no electricity and no phone lines. + + /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ +/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ +\\\ download, build and distribute -- http://www.A-A-P.org /// + \\\ help me help AIDS victims -- http://ICCF-Holland.org ///