|
1 /* |
|
2 * Copyright (c) 1988, 1993 |
|
3 * The Regents of the University of California. All rights reserved. |
|
4 * |
|
5 * Redistribution and use in source and binary forms, with or without |
|
6 * modification, are permitted provided that the following conditions |
|
7 * are met: |
|
8 * 1. Redistributions of source code must retain the above copyright |
|
9 * notice, this list of conditions and the following disclaimer. |
|
10 * 2. Redistributions in binary form must reproduce the above copyright |
|
11 * notice, this list of conditions and the following disclaimer in the |
|
12 * documentation and/or other materials provided with the distribution. |
|
13 * 3. All advertising materials mentioning features or use of this software |
|
14 * must display the following acknowledgement: |
|
15 * This product includes software developed by the University of |
|
16 * California, Berkeley and its contributors. |
|
17 * 4. Neither the name of the University nor the names of its contributors |
|
18 * may be used to endorse or promote products derived from this software |
|
19 * without specific prior written permission. |
|
20 * |
|
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
|
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
|
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
31 * SUCH DAMAGE. |
|
32 */ |
|
33 |
|
34 #include <sys/types.h> |
|
35 |
|
36 #include <ctype.h> |
|
37 #include <err.h> |
|
38 #include <limits.h> |
|
39 #include <locale.h> |
|
40 #include <stdio.h> |
|
41 #include <stdlib.h> |
|
42 #include <string.h> |
|
43 #include <unistd.h> |
|
44 #include <wchar.h> |
|
45 #include <wctype.h> |
|
46 |
|
47 #include "cmap.h" |
|
48 #include "cset.h" |
|
49 #include "extern.h" |
|
50 |
|
51 STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; |
|
52 STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; |
|
53 |
|
54 static struct cset *setup(char *, STR *, int, int); |
|
55 static void usage(void); |
|
56 |
|
57 static wint_t |
|
58 cmap_lookup(struct cmap *cm, wint_t from) |
|
59 { |
|
60 |
|
61 if (from < CM_CACHE_SIZE && cm->cm_havecache) |
|
62 return (cm->cm_cache[from]); |
|
63 return (cmap_lookup_hard(cm, from)); |
|
64 } |
|
65 |
|
66 static wint_t |
|
67 cmap_max(struct cmap *cm) |
|
68 { |
|
69 return (cm->cm_max); |
|
70 } |
|
71 |
|
72 static inline bool |
|
73 cset_in(struct cset *cs, wchar_t ch) |
|
74 { |
|
75 |
|
76 if (ch < CS_CACHE_SIZE && cs->cs_havecache) |
|
77 return (cs->cs_cache[ch]); |
|
78 return (cset_in_hard(cs, ch)); |
|
79 } |
|
80 |
|
81 int |
|
82 main(int argc, char **argv) |
|
83 { |
|
84 static int carray[NCHARS_SB]; |
|
85 struct cmap *map; |
|
86 struct cset *delete, *squeeze; |
|
87 int n, *p; |
|
88 int Cflag, cflag, dflag, sflag, isstring2; |
|
89 wint_t ch, cnt, lastch; |
|
90 |
|
91 (void) setlocale(LC_ALL, ""); |
|
92 |
|
93 Cflag = cflag = dflag = sflag = 0; |
|
94 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) |
|
95 switch ((char)ch) { |
|
96 case 'C': |
|
97 Cflag = 1; |
|
98 cflag = 0; |
|
99 break; |
|
100 case 'c': |
|
101 cflag = 1; |
|
102 Cflag = 0; |
|
103 break; |
|
104 case 'd': |
|
105 dflag = 1; |
|
106 break; |
|
107 case 's': |
|
108 sflag = 1; |
|
109 break; |
|
110 case 'u': |
|
111 setbuf(stdout, (char *)NULL); |
|
112 break; |
|
113 case '?': |
|
114 default: |
|
115 usage(); |
|
116 } |
|
117 argc -= optind; |
|
118 argv += optind; |
|
119 |
|
120 switch (argc) { |
|
121 case 0: |
|
122 default: |
|
123 usage(); |
|
124 /* NOTREACHED */ |
|
125 case 1: |
|
126 isstring2 = 0; |
|
127 break; |
|
128 case 2: |
|
129 isstring2 = 1; |
|
130 break; |
|
131 } |
|
132 |
|
133 /* |
|
134 * tr -ds [-Cc] string1 string2 |
|
135 * Delete all characters (or complemented characters) in string1. |
|
136 * Squeeze all characters in string2. |
|
137 */ |
|
138 if (dflag && sflag) { |
|
139 if (!isstring2) |
|
140 usage(); |
|
141 |
|
142 delete = setup(argv[0], &s1, cflag, Cflag); |
|
143 squeeze = setup(argv[1], &s2, 0, 0); |
|
144 |
|
145 for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) |
|
146 if (!cset_in(delete, ch) && |
|
147 (lastch != ch || !cset_in(squeeze, ch))) { |
|
148 lastch = ch; |
|
149 (void) putwchar(ch); |
|
150 } |
|
151 if (ferror(stdin)) |
|
152 err(1, NULL); |
|
153 exit(0); |
|
154 } |
|
155 |
|
156 /* |
|
157 * tr -d [-Cc] string1 |
|
158 * Delete all characters (or complemented characters) in string1. |
|
159 */ |
|
160 if (dflag) { |
|
161 if (isstring2) |
|
162 usage(); |
|
163 |
|
164 delete = setup(argv[0], &s1, cflag, Cflag); |
|
165 |
|
166 while ((ch = getwchar()) != WEOF) |
|
167 if (!cset_in(delete, ch)) |
|
168 (void) putwchar(ch); |
|
169 if (ferror(stdin)) |
|
170 err(1, NULL); |
|
171 exit(0); |
|
172 } |
|
173 |
|
174 /* |
|
175 * tr -s [-Cc] string1 |
|
176 * Squeeze all characters (or complemented characters) in string1. |
|
177 */ |
|
178 if (sflag && !isstring2) { |
|
179 squeeze = setup(argv[0], &s1, cflag, Cflag); |
|
180 |
|
181 for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) |
|
182 if (lastch != ch || !cset_in(squeeze, ch)) { |
|
183 lastch = ch; |
|
184 (void) putwchar(ch); |
|
185 } |
|
186 if (ferror(stdin)) |
|
187 err(1, NULL); |
|
188 exit(0); |
|
189 } |
|
190 |
|
191 /* |
|
192 * tr [-Ccs] string1 string2 |
|
193 * Replace all characters (or complemented characters) in string1 with |
|
194 * the character in the same position in string2. If the -s option is |
|
195 * specified, squeeze all the characters in string2. |
|
196 */ |
|
197 if (!isstring2) |
|
198 usage(); |
|
199 |
|
200 map = cmap_alloc(); |
|
201 if (map == NULL) |
|
202 err(1, NULL); |
|
203 squeeze = cset_alloc(); |
|
204 if (squeeze == NULL) |
|
205 err(1, NULL); |
|
206 |
|
207 s1.str = argv[0]; |
|
208 |
|
209 if (Cflag || cflag) { |
|
210 (void) cmap_default(map, OOBCH); |
|
211 if ((s2.str = strdup(argv[1])) == NULL) |
|
212 errx(1, "strdup(argv[1])"); |
|
213 } else |
|
214 s2.str = argv[1]; |
|
215 |
|
216 if (!next(&s2)) |
|
217 errx(1, "empty string2"); |
|
218 |
|
219 /* |
|
220 * For -s result will contain only those characters defined |
|
221 * as the second characters in each of the toupper or tolower |
|
222 * pairs. |
|
223 */ |
|
224 |
|
225 /* If string2 runs out of characters, use the last one specified. */ |
|
226 while (next(&s1)) { |
|
227 again: |
|
228 if (s1.state == CCLASS_LOWER && |
|
229 s2.state == CCLASS_UPPER && |
|
230 s1.cnt == 1 && s2.cnt == 1) { |
|
231 do { |
|
232 ch = towupper(s1.lastch); |
|
233 (void) cmap_add(map, s1.lastch, ch); |
|
234 if (sflag && iswupper(ch)) |
|
235 (void) cset_add(squeeze, ch); |
|
236 if (!next(&s1)) |
|
237 goto endloop; |
|
238 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); |
|
239 /* skip upper set */ |
|
240 do { |
|
241 if (!next(&s2)) |
|
242 break; |
|
243 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); |
|
244 goto again; |
|
245 } else if (s1.state == CCLASS_UPPER && |
|
246 s2.state == CCLASS_LOWER && |
|
247 s1.cnt == 1 && s2.cnt == 1) { |
|
248 do { |
|
249 ch = towlower(s1.lastch); |
|
250 (void) cmap_add(map, s1.lastch, ch); |
|
251 if (sflag && iswlower(ch)) |
|
252 (void) cset_add(squeeze, ch); |
|
253 if (!next(&s1)) |
|
254 goto endloop; |
|
255 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); |
|
256 /* skip lower set */ |
|
257 do { |
|
258 if (!next(&s2)) |
|
259 break; |
|
260 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); |
|
261 goto again; |
|
262 } else { |
|
263 (void) cmap_add(map, s1.lastch, s2.lastch); |
|
264 if (sflag) |
|
265 (void) cset_add(squeeze, s2.lastch); |
|
266 } |
|
267 (void) next(&s2); |
|
268 } |
|
269 endloop: |
|
270 if (cflag || (Cflag && MB_CUR_MAX > 1)) { |
|
271 /* |
|
272 * This is somewhat tricky: since the character set is |
|
273 * potentially huge, we need to avoid allocating a map |
|
274 * entry for every character. Our strategy is to set the |
|
275 * default mapping to the last character of string #2 |
|
276 * (= the one that gets automatically repeated), then to |
|
277 * add back identity mappings for characters that should |
|
278 * remain unchanged. We don't waste space on identity mappings |
|
279 * for non-characters with the -C option; those are simulated |
|
280 * in the I/O loop. |
|
281 */ |
|
282 s2.str = argv[1]; |
|
283 s2.state = NORMAL; |
|
284 for (cnt = 0; cnt < WCHAR_MAX; cnt++) { |
|
285 if (Cflag && !iswrune(cnt)) |
|
286 continue; |
|
287 if (cmap_lookup(map, cnt) == OOBCH) { |
|
288 if (next(&s2)) |
|
289 (void) cmap_add(map, cnt, s2.lastch); |
|
290 if (sflag) |
|
291 (void) cset_add(squeeze, s2.lastch); |
|
292 } else |
|
293 (void) cmap_add(map, cnt, cnt); |
|
294 if ((s2.state == EOS || s2.state == INFINITE) && |
|
295 cnt >= cmap_max(map)) |
|
296 break; |
|
297 } |
|
298 (void) cmap_default(map, s2.lastch); |
|
299 } else if (Cflag) { |
|
300 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { |
|
301 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) |
|
302 *p++ = cnt; |
|
303 else |
|
304 (void) cmap_add(map, cnt, cnt); |
|
305 } |
|
306 n = p - carray; |
|
307 if (Cflag && n > 1) |
|
308 (void) qsort(carray, n, sizeof (*carray), charcoll); |
|
309 |
|
310 s2.str = argv[1]; |
|
311 s2.state = NORMAL; |
|
312 for (cnt = 0; cnt < n; cnt++) { |
|
313 (void) next(&s2); |
|
314 (void) cmap_add(map, carray[cnt], s2.lastch); |
|
315 /* |
|
316 * Chars taken from s2 can be different this time |
|
317 * due to lack of complex upper/lower processing, |
|
318 * so fill string2 again to not miss some. |
|
319 */ |
|
320 if (sflag) |
|
321 (void) cset_add(squeeze, s2.lastch); |
|
322 } |
|
323 } |
|
324 |
|
325 cset_cache(squeeze); |
|
326 cmap_cache(map); |
|
327 |
|
328 if (sflag) |
|
329 for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) { |
|
330 if (!Cflag || iswrune(ch)) |
|
331 ch = cmap_lookup(map, ch); |
|
332 if (lastch != ch || !cset_in(squeeze, ch)) { |
|
333 lastch = ch; |
|
334 (void) putwchar(ch); |
|
335 } |
|
336 } |
|
337 else |
|
338 while ((ch = getwchar()) != WEOF) { |
|
339 if (!Cflag || iswrune(ch)) |
|
340 ch = cmap_lookup(map, ch); |
|
341 (void) putwchar(ch); |
|
342 } |
|
343 if (ferror(stdin)) |
|
344 err(1, NULL); |
|
345 exit(0); |
|
346 } |
|
347 |
|
348 static struct cset * |
|
349 setup(char *arg, STR *str, int cflag, int Cflag) |
|
350 { |
|
351 struct cset *cs; |
|
352 |
|
353 cs = cset_alloc(); |
|
354 if (cs == NULL) |
|
355 err(1, NULL); |
|
356 str->str = arg; |
|
357 while (next(str)) |
|
358 (void) cset_add(cs, str->lastch); |
|
359 if (Cflag) |
|
360 (void) cset_addclass(cs, wctype("rune"), true); |
|
361 if (cflag || Cflag) |
|
362 cset_invert(cs); |
|
363 cset_cache(cs); |
|
364 return (cs); |
|
365 } |
|
366 |
|
367 int |
|
368 charcoll(const void *a, const void *b) |
|
369 { |
|
370 static char sa[2], sb[2]; |
|
371 |
|
372 sa[0] = *(const int *)a; |
|
373 sb[0] = *(const int *)b; |
|
374 return (strcoll(sa, sb)); |
|
375 } |
|
376 |
|
377 static void |
|
378 usage(void) |
|
379 { |
|
380 (void) fprintf(stderr, "%s\n%s\n%s\n%s\n", |
|
381 "usage: tr [-Ccsu] string1 string2", |
|
382 " tr [-Ccu] -d string1", |
|
383 " tr [-Ccu] -s string1", |
|
384 " tr [-Ccu] -ds string1 string2"); |
|
385 exit(1); |
|
386 } |