|
1 To: [email protected] |
|
2 Subject: Patch 7.2.312 |
|
3 Fcc: outbox |
|
4 From: Bram Moolenaar <[email protected]> |
|
5 Mime-Version: 1.0 |
|
6 Content-Type: text/plain; charset=UTF-8 |
|
7 Content-Transfer-Encoding: 8bit |
|
8 ------------ |
|
9 |
|
10 Patch 7.2.312 |
|
11 Problem: iconv() returns an invalid character sequence when conversion |
|
12 fails. It should return an empty string. (Yongwei Wu) |
|
13 Solution: Be more strict about invalid characters in the input. |
|
14 Files: src/mbyte.c |
|
15 |
|
16 |
|
17 *** ../vim-7.2.311/src/mbyte.c 2009-06-16 15:23:07.000000000 +0200 |
|
18 --- src/mbyte.c 2009-11-25 16:10:44.000000000 +0100 |
|
19 *************** |
|
20 *** 133,154 **** |
|
21 static int dbcs_ptr2cells_len __ARGS((char_u *p, int size)); |
|
22 static int dbcs_ptr2char __ARGS((char_u *p)); |
|
23 |
|
24 ! /* Lookup table to quickly get the length in bytes of a UTF-8 character from |
|
25 ! * the first byte of a UTF-8 string. Bytes which are illegal when used as the |
|
26 ! * first byte have a one, because these will be used separately. */ |
|
27 static char utf8len_tab[256] = |
|
28 { |
|
29 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
30 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
31 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
32 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
33 ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/ |
|
34 ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /*bogus*/ |
|
35 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
|
36 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1, |
|
37 }; |
|
38 |
|
39 /* |
|
40 * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks |
|
41 * in the "xim.log" file. |
|
42 */ |
|
43 --- 133,172 ---- |
|
44 static int dbcs_ptr2cells_len __ARGS((char_u *p, int size)); |
|
45 static int dbcs_ptr2char __ARGS((char_u *p)); |
|
46 |
|
47 ! /* |
|
48 ! * Lookup table to quickly get the length in bytes of a UTF-8 character from |
|
49 ! * the first byte of a UTF-8 string. |
|
50 ! * Bytes which are illegal when used as the first byte have a 1. |
|
51 ! * The NUL byte has length 1. |
|
52 ! */ |
|
53 static char utf8len_tab[256] = |
|
54 { |
|
55 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
56 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
57 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
58 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
59 ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
60 ! 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
61 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
|
62 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1, |
|
63 }; |
|
64 |
|
65 /* |
|
66 + * Like utf8len_tab above, but using a zero for illegal lead bytes. |
|
67 + */ |
|
68 + static char utf8len_tab_zero[256] = |
|
69 + { |
|
70 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
71 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
72 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
73 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
74 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
75 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
76 + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
|
77 + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0, |
|
78 + }; |
|
79 + |
|
80 + /* |
|
81 * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks |
|
82 * in the "xim.log" file. |
|
83 */ |
|
84 *************** |
|
85 *** 1352,1358 **** |
|
86 if (size > 0 && *p >= 0x80) |
|
87 { |
|
88 if (utf_ptr2len_len(p, size) < utf8len_tab[*p]) |
|
89 ! return 1; |
|
90 c = utf_ptr2char(p); |
|
91 /* An illegal byte is displayed as <xx>. */ |
|
92 if (utf_ptr2len(p) == 1 || c == NUL) |
|
93 --- 1370,1376 ---- |
|
94 if (size > 0 && *p >= 0x80) |
|
95 { |
|
96 if (utf_ptr2len_len(p, size) < utf8len_tab[*p]) |
|
97 ! return 1; /* truncated */ |
|
98 c = utf_ptr2char(p); |
|
99 /* An illegal byte is displayed as <xx>. */ |
|
100 if (utf_ptr2len(p) == 1 || c == NUL) |
|
101 *************** |
|
102 *** 1473,1479 **** |
|
103 if (p[0] < 0x80) /* be quick for ASCII */ |
|
104 return p[0]; |
|
105 |
|
106 ! len = utf8len_tab[p[0]]; |
|
107 if (len > 1 && (p[1] & 0xc0) == 0x80) |
|
108 { |
|
109 if (len == 2) |
|
110 --- 1491,1497 ---- |
|
111 if (p[0] < 0x80) /* be quick for ASCII */ |
|
112 return p[0]; |
|
113 |
|
114 ! len = utf8len_tab_zero[p[0]]; |
|
115 if (len > 1 && (p[1] & 0xc0) == 0x80) |
|
116 { |
|
117 if (len == 2) |
|
118 *************** |
|
119 *** 1723,1728 **** |
|
120 --- 1741,1747 ---- |
|
121 /* |
|
122 * Return length of UTF-8 character, obtained from the first byte. |
|
123 * "b" must be between 0 and 255! |
|
124 + * Returns 1 for an invalid first byte value. |
|
125 */ |
|
126 int |
|
127 utf_byte2len(b) |
|
128 *************** |
|
129 *** 1737,1742 **** |
|
130 --- 1756,1762 ---- |
|
131 * Returns 1 for "". |
|
132 * Returns 1 for an illegal byte sequence (also in incomplete byte seq.). |
|
133 * Returns number > "size" for an incomplete byte sequence. |
|
134 + * Never returns zero. |
|
135 */ |
|
136 int |
|
137 utf_ptr2len_len(p, size) |
|
138 *************** |
|
139 *** 1747,1757 **** |
|
140 int i; |
|
141 int m; |
|
142 |
|
143 ! if (*p == NUL) |
|
144 ! return 1; |
|
145 ! m = len = utf8len_tab[*p]; |
|
146 if (len > size) |
|
147 m = size; /* incomplete byte sequence. */ |
|
148 for (i = 1; i < m; ++i) |
|
149 if ((p[i] & 0xc0) != 0x80) |
|
150 return 1; |
|
151 --- 1767,1779 ---- |
|
152 int i; |
|
153 int m; |
|
154 |
|
155 ! len = utf8len_tab[*p]; |
|
156 ! if (len == 1) |
|
157 ! return 1; /* NUL, ascii or illegal lead byte */ |
|
158 if (len > size) |
|
159 m = size; /* incomplete byte sequence. */ |
|
160 + else |
|
161 + m = len; |
|
162 for (i = 1; i < m; ++i) |
|
163 if ((p[i] & 0xc0) != 0x80) |
|
164 return 1; |
|
165 *************** |
|
166 *** 2505,2510 **** |
|
167 --- 2527,2533 ---- |
|
168 /* |
|
169 * mb_head_off() function pointer. |
|
170 * Return offset from "p" to the first byte of the character it points into. |
|
171 + * If "p" points to the NUL at the end of the string return 0. |
|
172 * Returns 0 when already at the first byte of a character. |
|
173 */ |
|
174 int |
|
175 *************** |
|
176 *** 2524,2530 **** |
|
177 |
|
178 /* It can't be a trailing byte when not using DBCS, at the start of the |
|
179 * string or the previous byte can't start a double-byte. */ |
|
180 ! if (p <= base || MB_BYTE2LEN(p[-1]) == 1) |
|
181 return 0; |
|
182 |
|
183 /* This is slow: need to start at the base and go forward until the |
|
184 --- 2547,2553 ---- |
|
185 |
|
186 /* It can't be a trailing byte when not using DBCS, at the start of the |
|
187 * string or the previous byte can't start a double-byte. */ |
|
188 ! if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL) |
|
189 return 0; |
|
190 |
|
191 /* This is slow: need to start at the base and go forward until the |
|
192 *************** |
|
193 *** 2552,2558 **** |
|
194 * lead byte in the current cell. */ |
|
195 if (p <= base |
|
196 || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e) |
|
197 ! || MB_BYTE2LEN(p[-1]) == 1) |
|
198 return 0; |
|
199 |
|
200 /* This is slow: need to start at the base and go forward until the |
|
201 --- 2575,2582 ---- |
|
202 * lead byte in the current cell. */ |
|
203 if (p <= base |
|
204 || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e) |
|
205 ! || MB_BYTE2LEN(p[-1]) == 1 |
|
206 ! || *p == NUL) |
|
207 return 0; |
|
208 |
|
209 /* This is slow: need to start at the base and go forward until the |
|
210 *************** |
|
211 *** 2578,2583 **** |
|
212 --- 2602,2608 ---- |
|
213 char_u *q; |
|
214 char_u *s; |
|
215 int c; |
|
216 + int len; |
|
217 #ifdef FEAT_ARABIC |
|
218 char_u *j; |
|
219 #endif |
|
220 *************** |
|
221 *** 2597,2604 **** |
|
222 --q; |
|
223 /* Check for illegal sequence. Do allow an illegal byte after where we |
|
224 * started. */ |
|
225 ! if (utf8len_tab[*q] != (int)(s - q + 1) |
|
226 ! && utf8len_tab[*q] != (int)(p - q + 1)) |
|
227 return 0; |
|
228 |
|
229 if (q <= base) |
|
230 --- 2622,2629 ---- |
|
231 --q; |
|
232 /* Check for illegal sequence. Do allow an illegal byte after where we |
|
233 * started. */ |
|
234 ! len = utf8len_tab[*q]; |
|
235 ! if (len != (int)(s - q + 1) && len != (int)(p - q + 1)) |
|
236 return 0; |
|
237 |
|
238 if (q <= base) |
|
239 *************** |
|
240 *** 2810,2818 **** |
|
241 |
|
242 while (end == NULL ? *p != NUL : p < end) |
|
243 { |
|
244 ! if ((*p & 0xc0) == 0x80) |
|
245 return FALSE; /* invalid lead byte */ |
|
246 - l = utf8len_tab[*p]; |
|
247 if (end != NULL && p + l > end) |
|
248 return FALSE; /* incomplete byte sequence */ |
|
249 ++p; |
|
250 --- 2835,2843 ---- |
|
251 |
|
252 while (end == NULL ? *p != NUL : p < end) |
|
253 { |
|
254 ! l = utf8len_tab_zero[*p]; |
|
255 ! if (l == 0) |
|
256 return FALSE; /* invalid lead byte */ |
|
257 if (end != NULL && p + l > end) |
|
258 return FALSE; /* incomplete byte sequence */ |
|
259 ++p; |
|
260 *************** |
|
261 *** 6117,6128 **** |
|
262 d = retval; |
|
263 for (i = 0; i < len; ++i) |
|
264 { |
|
265 ! l = utf_ptr2len(ptr + i); |
|
266 if (l == 0) |
|
267 *d++ = NUL; |
|
268 else if (l == 1) |
|
269 { |
|
270 ! if (unconvlenp != NULL && utf8len_tab[ptr[i]] > len - i) |
|
271 { |
|
272 /* Incomplete sequence at the end. */ |
|
273 *unconvlenp = len - i; |
|
274 --- 6142,6161 ---- |
|
275 d = retval; |
|
276 for (i = 0; i < len; ++i) |
|
277 { |
|
278 ! l = utf_ptr2len_len(ptr + i, len - i); |
|
279 if (l == 0) |
|
280 *d++ = NUL; |
|
281 else if (l == 1) |
|
282 { |
|
283 ! int l_w = utf8len_tab_zero[ptr[i]]; |
|
284 ! |
|
285 ! if (l_w == 0) |
|
286 ! { |
|
287 ! /* Illegal utf-8 byte cannot be converted */ |
|
288 ! vim_free(retval); |
|
289 ! return NULL; |
|
290 ! } |
|
291 ! if (unconvlenp != NULL && l_w > len - i) |
|
292 { |
|
293 /* Incomplete sequence at the end. */ |
|
294 *unconvlenp = len - i; |
|
295 *** ../vim-7.2.311/src/version.c 2009-12-02 13:32:10.000000000 +0100 |
|
296 --- src/version.c 2009-12-02 15:00:23.000000000 +0100 |
|
297 *************** |
|
298 *** 683,684 **** |
|
299 --- 683,686 ---- |
|
300 { /* Add new patch number below this line */ |
|
301 + /**/ |
|
302 + 312, |
|
303 /**/ |
|
304 |
|
305 -- |
|
306 hundred-and-one symptoms of being an internet addict: |
|
307 6. You refuse to go to a vacation spot with no electricity and no phone lines. |
|
308 |
|
309 /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net \\\ |
|
310 /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ |
|
311 \\\ download, build and distribute -- http://www.A-A-P.org /// |
|
312 \\\ help me help AIDS victims -- http://ICCF-Holland.org /// |