components/vim/vim72-patches/7.2.312
changeset 198 172fc01ce997
equal deleted inserted replaced
197:fd801ec0737c 198:172fc01ce997
       
     1 To: [email protected]
       
     2 Subject: Patch 7.2.312
       
     3 Fcc: outbox
       
     4 From: Bram Moolenaar <[email protected]>
       
     5 Mime-Version: 1.0
       
     6 Content-Type: text/plain; charset=UTF-8
       
     7 Content-Transfer-Encoding: 8bit
       
     8 ------------
       
     9 
       
    10 Patch 7.2.312
       
    11 Problem:    iconv() returns an invalid character sequence when conversion
       
    12 	    fails.  It should return an empty string. (Yongwei Wu)
       
    13 Solution:   Be more strict about invalid characters in the input.
       
    14 Files:	    src/mbyte.c
       
    15 
       
    16 
       
    17 *** ../vim-7.2.311/src/mbyte.c	2009-06-16 15:23:07.000000000 +0200
       
    18 --- src/mbyte.c	2009-11-25 16:10:44.000000000 +0100
       
    19 ***************
       
    20 *** 133,154 ****
       
    21   static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
       
    22   static int dbcs_ptr2char __ARGS((char_u *p));
       
    23   
       
    24 ! /* Lookup table to quickly get the length in bytes of a UTF-8 character from
       
    25 !  * the first byte of a UTF-8 string.  Bytes which are illegal when used as the
       
    26 !  * first byte have a one, because these will be used separately. */
       
    27   static char utf8len_tab[256] =
       
    28   {
       
    29       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    30       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    31       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    32       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    33 !     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/
       
    34 !     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/
       
    35       2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
       
    36       3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
       
    37   };
       
    38   
       
    39   /*
       
    40    * XIM often causes trouble.  Define XIM_DEBUG to get a log of XIM callbacks
       
    41    * in the "xim.log" file.
       
    42    */
       
    43 --- 133,172 ----
       
    44   static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
       
    45   static int dbcs_ptr2char __ARGS((char_u *p));
       
    46   
       
    47 ! /*
       
    48 !  * Lookup table to quickly get the length in bytes of a UTF-8 character from
       
    49 !  * the first byte of a UTF-8 string.
       
    50 !  * Bytes which are illegal when used as the first byte have a 1.
       
    51 !  * The NUL byte has length 1.
       
    52 !  */
       
    53   static char utf8len_tab[256] =
       
    54   {
       
    55       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    56       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    57       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    58       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    59 !     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    60 !     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    61       2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
       
    62       3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
       
    63   };
       
    64   
       
    65   /*
       
    66 +  * Like utf8len_tab above, but using a zero for illegal lead bytes.
       
    67 +  */
       
    68 + static char utf8len_tab_zero[256] =
       
    69 + {
       
    70 +     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    71 +     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    72 +     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    73 +     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
       
    74 +     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       
    75 +     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       
    76 +     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
       
    77 +     3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0,
       
    78 + };
       
    79 + 
       
    80 + /*
       
    81    * XIM often causes trouble.  Define XIM_DEBUG to get a log of XIM callbacks
       
    82    * in the "xim.log" file.
       
    83    */
       
    84 ***************
       
    85 *** 1352,1358 ****
       
    86       if (size > 0 && *p >= 0x80)
       
    87       {
       
    88   	if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
       
    89 ! 	    return 1;
       
    90   	c = utf_ptr2char(p);
       
    91   	/* An illegal byte is displayed as <xx>. */
       
    92   	if (utf_ptr2len(p) == 1 || c == NUL)
       
    93 --- 1370,1376 ----
       
    94       if (size > 0 && *p >= 0x80)
       
    95       {
       
    96   	if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
       
    97 ! 	    return 1;  /* truncated */
       
    98   	c = utf_ptr2char(p);
       
    99   	/* An illegal byte is displayed as <xx>. */
       
   100   	if (utf_ptr2len(p) == 1 || c == NUL)
       
   101 ***************
       
   102 *** 1473,1479 ****
       
   103       if (p[0] < 0x80)	/* be quick for ASCII */
       
   104   	return p[0];
       
   105   
       
   106 !     len = utf8len_tab[p[0]];
       
   107       if (len > 1 && (p[1] & 0xc0) == 0x80)
       
   108       {
       
   109   	if (len == 2)
       
   110 --- 1491,1497 ----
       
   111       if (p[0] < 0x80)	/* be quick for ASCII */
       
   112   	return p[0];
       
   113   
       
   114 !     len = utf8len_tab_zero[p[0]];
       
   115       if (len > 1 && (p[1] & 0xc0) == 0x80)
       
   116       {
       
   117   	if (len == 2)
       
   118 ***************
       
   119 *** 1723,1728 ****
       
   120 --- 1741,1747 ----
       
   121   /*
       
   122    * Return length of UTF-8 character, obtained from the first byte.
       
   123    * "b" must be between 0 and 255!
       
   124 +  * Returns 1 for an invalid first byte value.
       
   125    */
       
   126       int
       
   127   utf_byte2len(b)
       
   128 ***************
       
   129 *** 1737,1742 ****
       
   130 --- 1756,1762 ----
       
   131    * Returns 1 for "".
       
   132    * Returns 1 for an illegal byte sequence (also in incomplete byte seq.).
       
   133    * Returns number > "size" for an incomplete byte sequence.
       
   134 +  * Never returns zero.
       
   135    */
       
   136       int
       
   137   utf_ptr2len_len(p, size)
       
   138 ***************
       
   139 *** 1747,1757 ****
       
   140       int		i;
       
   141       int		m;
       
   142   
       
   143 !     if (*p == NUL)
       
   144 ! 	return 1;
       
   145 !     m = len = utf8len_tab[*p];
       
   146       if (len > size)
       
   147   	m = size;	/* incomplete byte sequence. */
       
   148       for (i = 1; i < m; ++i)
       
   149   	if ((p[i] & 0xc0) != 0x80)
       
   150   	    return 1;
       
   151 --- 1767,1779 ----
       
   152       int		i;
       
   153       int		m;
       
   154   
       
   155 !     len = utf8len_tab[*p];
       
   156 !     if (len == 1)
       
   157 ! 	return 1;	/* NUL, ascii or illegal lead byte */
       
   158       if (len > size)
       
   159   	m = size;	/* incomplete byte sequence. */
       
   160 +     else
       
   161 + 	m = len;
       
   162       for (i = 1; i < m; ++i)
       
   163   	if ((p[i] & 0xc0) != 0x80)
       
   164   	    return 1;
       
   165 ***************
       
   166 *** 2505,2510 ****
       
   167 --- 2527,2533 ----
       
   168   /*
       
   169    * mb_head_off() function pointer.
       
   170    * Return offset from "p" to the first byte of the character it points into.
       
   171 +  * If "p" points to the NUL at the end of the string return 0.
       
   172    * Returns 0 when already at the first byte of a character.
       
   173    */
       
   174       int
       
   175 ***************
       
   176 *** 2524,2530 ****
       
   177   
       
   178       /* It can't be a trailing byte when not using DBCS, at the start of the
       
   179        * string or the previous byte can't start a double-byte. */
       
   180 !     if (p <= base || MB_BYTE2LEN(p[-1]) == 1)
       
   181   	return 0;
       
   182   
       
   183       /* This is slow: need to start at the base and go forward until the
       
   184 --- 2547,2553 ----
       
   185   
       
   186       /* It can't be a trailing byte when not using DBCS, at the start of the
       
   187        * string or the previous byte can't start a double-byte. */
       
   188 !     if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL)
       
   189   	return 0;
       
   190   
       
   191       /* This is slow: need to start at the base and go forward until the
       
   192 ***************
       
   193 *** 2552,2558 ****
       
   194        * lead byte in the current cell. */
       
   195       if (p <= base
       
   196   	    || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
       
   197 ! 	    || MB_BYTE2LEN(p[-1]) == 1)
       
   198   	return 0;
       
   199   
       
   200       /* This is slow: need to start at the base and go forward until the
       
   201 --- 2575,2582 ----
       
   202        * lead byte in the current cell. */
       
   203       if (p <= base
       
   204   	    || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
       
   205 ! 	    || MB_BYTE2LEN(p[-1]) == 1
       
   206 ! 	    || *p == NUL)
       
   207   	return 0;
       
   208   
       
   209       /* This is slow: need to start at the base and go forward until the
       
   210 ***************
       
   211 *** 2578,2583 ****
       
   212 --- 2602,2608 ----
       
   213       char_u	*q;
       
   214       char_u	*s;
       
   215       int		c;
       
   216 +     int		len;
       
   217   #ifdef FEAT_ARABIC
       
   218       char_u	*j;
       
   219   #endif
       
   220 ***************
       
   221 *** 2597,2604 ****
       
   222   	    --q;
       
   223   	/* Check for illegal sequence. Do allow an illegal byte after where we
       
   224   	 * started. */
       
   225 ! 	if (utf8len_tab[*q] != (int)(s - q + 1)
       
   226 ! 				       && utf8len_tab[*q] != (int)(p - q + 1))
       
   227   	    return 0;
       
   228   
       
   229   	if (q <= base)
       
   230 --- 2622,2629 ----
       
   231   	    --q;
       
   232   	/* Check for illegal sequence. Do allow an illegal byte after where we
       
   233   	 * started. */
       
   234 ! 	len = utf8len_tab[*q];
       
   235 ! 	if (len != (int)(s - q + 1) && len != (int)(p - q + 1))
       
   236   	    return 0;
       
   237   
       
   238   	if (q <= base)
       
   239 ***************
       
   240 *** 2810,2818 ****
       
   241   
       
   242       while (end == NULL ? *p != NUL : p < end)
       
   243       {
       
   244 ! 	if ((*p & 0xc0) == 0x80)
       
   245   	    return FALSE;	/* invalid lead byte */
       
   246 - 	l = utf8len_tab[*p];
       
   247   	if (end != NULL && p + l > end)
       
   248   	    return FALSE;	/* incomplete byte sequence */
       
   249   	++p;
       
   250 --- 2835,2843 ----
       
   251   
       
   252       while (end == NULL ? *p != NUL : p < end)
       
   253       {
       
   254 ! 	l = utf8len_tab_zero[*p];
       
   255 ! 	if (l == 0)
       
   256   	    return FALSE;	/* invalid lead byte */
       
   257   	if (end != NULL && p + l > end)
       
   258   	    return FALSE;	/* incomplete byte sequence */
       
   259   	++p;
       
   260 ***************
       
   261 *** 6117,6128 ****
       
   262   	    d = retval;
       
   263   	    for (i = 0; i < len; ++i)
       
   264   	    {
       
   265 ! 		l = utf_ptr2len(ptr + i);
       
   266   		if (l == 0)
       
   267   		    *d++ = NUL;
       
   268   		else if (l == 1)
       
   269   		{
       
   270 ! 		    if (unconvlenp != NULL && utf8len_tab[ptr[i]] > len - i)
       
   271   		    {
       
   272   			/* Incomplete sequence at the end. */
       
   273   			*unconvlenp = len - i;
       
   274 --- 6142,6161 ----
       
   275   	    d = retval;
       
   276   	    for (i = 0; i < len; ++i)
       
   277   	    {
       
   278 ! 		l = utf_ptr2len_len(ptr + i, len - i);
       
   279   		if (l == 0)
       
   280   		    *d++ = NUL;
       
   281   		else if (l == 1)
       
   282   		{
       
   283 ! 		    int l_w = utf8len_tab_zero[ptr[i]];
       
   284 ! 
       
   285 ! 		    if (l_w == 0)
       
   286 ! 		    {
       
   287 ! 			/* Illegal utf-8 byte cannot be converted */
       
   288 ! 			vim_free(retval);
       
   289 ! 			return NULL;
       
   290 ! 		    }
       
   291 ! 		    if (unconvlenp != NULL && l_w > len - i)
       
   292   		    {
       
   293   			/* Incomplete sequence at the end. */
       
   294   			*unconvlenp = len - i;
       
   295 *** ../vim-7.2.311/src/version.c	2009-12-02 13:32:10.000000000 +0100
       
   296 --- src/version.c	2009-12-02 15:00:23.000000000 +0100
       
   297 ***************
       
   298 *** 683,684 ****
       
   299 --- 683,686 ----
       
   300   {   /* Add new patch number below this line */
       
   301 + /**/
       
   302 +     312,
       
   303   /**/
       
   304 
       
   305 -- 
       
   306 hundred-and-one symptoms of being an internet addict:
       
   307 6. You refuse to go to a vacation spot with no electricity and no phone lines.
       
   308 
       
   309  /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net   \\\
       
   310 ///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
       
   311 \\\        download, build and distribute -- http://www.A-A-P.org        ///
       
   312  \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///