usr/src/cmd/tr/tr.c
changeset 12790 e1c710858516
equal deleted inserted replaced
12789:0b7f2daa174e 12790:e1c710858516
       
     1 /*
       
     2  * Copyright (c) 1988, 1993
       
     3  *	The Regents of the University of California.  All rights reserved.
       
     4  *
       
     5  * Redistribution and use in source and binary forms, with or without
       
     6  * modification, are permitted provided that the following conditions
       
     7  * are met:
       
     8  * 1. Redistributions of source code must retain the above copyright
       
     9  *    notice, this list of conditions and the following disclaimer.
       
    10  * 2. Redistributions in binary form must reproduce the above copyright
       
    11  *    notice, this list of conditions and the following disclaimer in the
       
    12  *    documentation and/or other materials provided with the distribution.
       
    13  * 3. All advertising materials mentioning features or use of this software
       
    14  *    must display the following acknowledgement:
       
    15  *	This product includes software developed by the University of
       
    16  *	California, Berkeley and its contributors.
       
    17  * 4. Neither the name of the University nor the names of its contributors
       
    18  *    may be used to endorse or promote products derived from this software
       
    19  *    without specific prior written permission.
       
    20  *
       
    21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       
    22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       
    25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       
    26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       
    27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       
    28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       
    29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       
    30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       
    31  * SUCH DAMAGE.
       
    32  */
       
    33 
       
    34 #include <sys/types.h>
       
    35 
       
    36 #include <ctype.h>
       
    37 #include <err.h>
       
    38 #include <limits.h>
       
    39 #include <locale.h>
       
    40 #include <stdio.h>
       
    41 #include <stdlib.h>
       
    42 #include <string.h>
       
    43 #include <unistd.h>
       
    44 #include <wchar.h>
       
    45 #include <wctype.h>
       
    46 
       
    47 #include "cmap.h"
       
    48 #include "cset.h"
       
    49 #include "extern.h"
       
    50 
       
    51 STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL };
       
    52 STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL };
       
    53 
       
    54 static struct cset *setup(char *, STR *, int, int);
       
    55 static void usage(void);
       
    56 
       
    57 static wint_t
       
    58 cmap_lookup(struct cmap *cm, wint_t from)
       
    59 {
       
    60 
       
    61         if (from < CM_CACHE_SIZE && cm->cm_havecache)
       
    62 		return (cm->cm_cache[from]);
       
    63         return (cmap_lookup_hard(cm, from));
       
    64 }
       
    65 
       
    66 static wint_t
       
    67 cmap_max(struct cmap *cm)
       
    68 {
       
    69 	return (cm->cm_max);
       
    70 }
       
    71 
       
    72 static inline bool
       
    73 cset_in(struct cset *cs, wchar_t ch)
       
    74 {
       
    75 
       
    76  	if (ch < CS_CACHE_SIZE && cs->cs_havecache)
       
    77 		return (cs->cs_cache[ch]);
       
    78 	return (cset_in_hard(cs, ch));
       
    79 }
       
    80 
       
    81 int
       
    82 main(int argc, char **argv)
       
    83 {
       
    84 	static int carray[NCHARS_SB];
       
    85 	struct cmap *map;
       
    86 	struct cset *delete, *squeeze;
       
    87 	int n, *p;
       
    88 	int Cflag, cflag, dflag, sflag, isstring2;
       
    89 	wint_t ch, cnt, lastch;
       
    90 
       
    91 	(void) setlocale(LC_ALL, "");
       
    92 
       
    93 	Cflag = cflag = dflag = sflag = 0;
       
    94 	while ((ch = getopt(argc, argv, "Ccdsu")) != -1)
       
    95 		switch ((char)ch) {
       
    96 		case 'C':
       
    97 			Cflag = 1;
       
    98 			cflag = 0;
       
    99 			break;
       
   100 		case 'c':
       
   101 			cflag = 1;
       
   102 			Cflag = 0;
       
   103 			break;
       
   104 		case 'd':
       
   105 			dflag = 1;
       
   106 			break;
       
   107 		case 's':
       
   108 			sflag = 1;
       
   109 			break;
       
   110 		case 'u':
       
   111 			setbuf(stdout, (char *)NULL);
       
   112 			break;
       
   113 		case '?':
       
   114 		default:
       
   115 			usage();
       
   116 		}
       
   117 	argc -= optind;
       
   118 	argv += optind;
       
   119 
       
   120 	switch (argc) {
       
   121 	case 0:
       
   122 	default:
       
   123 		usage();
       
   124 		/* NOTREACHED */
       
   125 	case 1:
       
   126 		isstring2 = 0;
       
   127 		break;
       
   128 	case 2:
       
   129 		isstring2 = 1;
       
   130 		break;
       
   131 	}
       
   132 
       
   133 	/*
       
   134 	 * tr -ds [-Cc] string1 string2
       
   135 	 * Delete all characters (or complemented characters) in string1.
       
   136 	 * Squeeze all characters in string2.
       
   137 	 */
       
   138 	if (dflag && sflag) {
       
   139 		if (!isstring2)
       
   140 			usage();
       
   141 
       
   142 		delete = setup(argv[0], &s1, cflag, Cflag);
       
   143 		squeeze = setup(argv[1], &s2, 0, 0);
       
   144 
       
   145 		for (lastch = OOBCH; (ch = getwchar()) != WEOF; )
       
   146 			if (!cset_in(delete, ch) &&
       
   147 			    (lastch != ch || !cset_in(squeeze, ch))) {
       
   148 				lastch = ch;
       
   149 				(void) putwchar(ch);
       
   150 			}
       
   151 		if (ferror(stdin))
       
   152 			err(1, NULL);
       
   153 		exit(0);
       
   154 	}
       
   155 
       
   156 	/*
       
   157 	 * tr -d [-Cc] string1
       
   158 	 * Delete all characters (or complemented characters) in string1.
       
   159 	 */
       
   160 	if (dflag) {
       
   161 		if (isstring2)
       
   162 			usage();
       
   163 
       
   164 		delete = setup(argv[0], &s1, cflag, Cflag);
       
   165 
       
   166 		while ((ch = getwchar()) != WEOF)
       
   167 			if (!cset_in(delete, ch))
       
   168 				(void) putwchar(ch);
       
   169 		if (ferror(stdin))
       
   170 			err(1, NULL);
       
   171 		exit(0);
       
   172 	}
       
   173 
       
   174 	/*
       
   175 	 * tr -s [-Cc] string1
       
   176 	 * Squeeze all characters (or complemented characters) in string1.
       
   177 	 */
       
   178 	if (sflag && !isstring2) {
       
   179 		squeeze = setup(argv[0], &s1, cflag, Cflag);
       
   180 
       
   181 		for (lastch = OOBCH; (ch = getwchar()) != WEOF; )
       
   182 			if (lastch != ch || !cset_in(squeeze, ch)) {
       
   183 				lastch = ch;
       
   184 				(void) putwchar(ch);
       
   185 			}
       
   186 		if (ferror(stdin))
       
   187 			err(1, NULL);
       
   188 		exit(0);
       
   189 	}
       
   190 
       
   191 	/*
       
   192 	 * tr [-Ccs] string1 string2
       
   193 	 * Replace all characters (or complemented characters) in string1 with
       
   194 	 * the character in the same position in string2.  If the -s option is
       
   195 	 * specified, squeeze all the characters in string2.
       
   196 	 */
       
   197 	if (!isstring2)
       
   198 		usage();
       
   199 
       
   200 	map = cmap_alloc();
       
   201 	if (map == NULL)
       
   202 		err(1, NULL);
       
   203 	squeeze = cset_alloc();
       
   204 	if (squeeze == NULL)
       
   205 		err(1, NULL);
       
   206 
       
   207 	s1.str = argv[0];
       
   208 
       
   209 	if (Cflag || cflag) {
       
   210 		(void) cmap_default(map, OOBCH);
       
   211 		if ((s2.str = strdup(argv[1])) == NULL)
       
   212 			errx(1, "strdup(argv[1])");
       
   213 	} else
       
   214 		s2.str = argv[1];
       
   215 
       
   216 	if (!next(&s2))
       
   217 		errx(1, "empty string2");
       
   218 
       
   219 	/*
       
   220 	 * For -s result will contain only those characters defined
       
   221 	 * as the second characters in each of the toupper or tolower
       
   222 	 * pairs.
       
   223 	 */
       
   224 
       
   225 	/* If string2 runs out of characters, use the last one specified. */
       
   226 	while (next(&s1)) {
       
   227 	again:
       
   228 		if (s1.state == CCLASS_LOWER &&
       
   229 		    s2.state == CCLASS_UPPER &&
       
   230 		    s1.cnt == 1 && s2.cnt == 1) {
       
   231 			do {
       
   232 				ch = towupper(s1.lastch);
       
   233 				(void) cmap_add(map, s1.lastch, ch);
       
   234 				if (sflag && iswupper(ch))
       
   235 					(void) cset_add(squeeze, ch);
       
   236 				if (!next(&s1))
       
   237 					goto endloop;
       
   238 			} while (s1.state == CCLASS_LOWER && s1.cnt > 1);
       
   239 			/* skip upper set */
       
   240 			do {
       
   241 				if (!next(&s2))
       
   242 					break;
       
   243 			} while (s2.state == CCLASS_UPPER && s2.cnt > 1);
       
   244 			goto again;
       
   245 		} else if (s1.state == CCLASS_UPPER &&
       
   246 		    s2.state == CCLASS_LOWER &&
       
   247 		    s1.cnt == 1 && s2.cnt == 1) {
       
   248 			do {
       
   249 				ch = towlower(s1.lastch);
       
   250 				(void) cmap_add(map, s1.lastch, ch);
       
   251 				if (sflag && iswlower(ch))
       
   252 					(void) cset_add(squeeze, ch);
       
   253 				if (!next(&s1))
       
   254 					goto endloop;
       
   255 			} while (s1.state == CCLASS_UPPER && s1.cnt > 1);
       
   256 			/* skip lower set */
       
   257 			do {
       
   258 				if (!next(&s2))
       
   259 					break;
       
   260 			} while (s2.state == CCLASS_LOWER && s2.cnt > 1);
       
   261 			goto again;
       
   262 		} else {
       
   263 			(void) cmap_add(map, s1.lastch, s2.lastch);
       
   264 			if (sflag)
       
   265 				(void) cset_add(squeeze, s2.lastch);
       
   266 		}
       
   267 		(void) next(&s2);
       
   268 	}
       
   269 endloop:
       
   270 	if (cflag || (Cflag && MB_CUR_MAX > 1)) {
       
   271 		/*
       
   272 		 * This is somewhat tricky: since the character set is
       
   273 		 * potentially huge, we need to avoid allocating a map
       
   274 		 * entry for every character. Our strategy is to set the
       
   275 		 * default mapping to the last character of string #2
       
   276 		 * (= the one that gets automatically repeated), then to
       
   277 		 * add back identity mappings for characters that should
       
   278 		 * remain unchanged. We don't waste space on identity mappings
       
   279 		 * for non-characters with the -C option; those are simulated
       
   280 		 * in the I/O loop.
       
   281 		 */
       
   282 		s2.str = argv[1];
       
   283 		s2.state = NORMAL;
       
   284 		for (cnt = 0; cnt < WCHAR_MAX; cnt++) {
       
   285 			if (Cflag && !iswrune(cnt))
       
   286 				continue;
       
   287 			if (cmap_lookup(map, cnt) == OOBCH) {
       
   288 				if (next(&s2))
       
   289 					(void) cmap_add(map, cnt, s2.lastch);
       
   290 				if (sflag)
       
   291 					(void) cset_add(squeeze, s2.lastch);
       
   292 			} else
       
   293 				(void) cmap_add(map, cnt, cnt);
       
   294 			if ((s2.state == EOS || s2.state == INFINITE) &&
       
   295 			    cnt >= cmap_max(map))
       
   296 				break;
       
   297 		}
       
   298 		(void) cmap_default(map, s2.lastch);
       
   299 	} else if (Cflag) {
       
   300 		for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) {
       
   301 			if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt))
       
   302 				*p++ = cnt;
       
   303 			else
       
   304 				(void) cmap_add(map, cnt, cnt);
       
   305 		}
       
   306 		n = p - carray;
       
   307 		if (Cflag && n > 1)
       
   308 			(void) qsort(carray, n, sizeof (*carray), charcoll);
       
   309 
       
   310 		s2.str = argv[1];
       
   311 		s2.state = NORMAL;
       
   312 		for (cnt = 0; cnt < n; cnt++) {
       
   313 			(void) next(&s2);
       
   314 			(void) cmap_add(map, carray[cnt], s2.lastch);
       
   315 			/*
       
   316 			 * Chars taken from s2 can be different this time
       
   317 			 * due to lack of complex upper/lower processing,
       
   318 			 * so fill string2 again to not miss some.
       
   319 			 */
       
   320 			if (sflag)
       
   321 				(void) cset_add(squeeze, s2.lastch);
       
   322 		}
       
   323 	}
       
   324 
       
   325 	cset_cache(squeeze);
       
   326 	cmap_cache(map);
       
   327 
       
   328 	if (sflag)
       
   329 		for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) {
       
   330 			if (!Cflag || iswrune(ch))
       
   331 				ch = cmap_lookup(map, ch);
       
   332 			if (lastch != ch || !cset_in(squeeze, ch)) {
       
   333 				lastch = ch;
       
   334 				(void) putwchar(ch);
       
   335 			}
       
   336 		}
       
   337 	else
       
   338 		while ((ch = getwchar()) != WEOF) {
       
   339 			if (!Cflag || iswrune(ch))
       
   340 				ch = cmap_lookup(map, ch);
       
   341 			(void) putwchar(ch);
       
   342 		}
       
   343 	if (ferror(stdin))
       
   344 		err(1, NULL);
       
   345 	exit(0);
       
   346 }
       
   347 
       
   348 static struct cset *
       
   349 setup(char *arg, STR *str, int cflag, int Cflag)
       
   350 {
       
   351 	struct cset *cs;
       
   352 
       
   353 	cs = cset_alloc();
       
   354 	if (cs == NULL)
       
   355 		err(1, NULL);
       
   356 	str->str = arg;
       
   357 	while (next(str))
       
   358 		(void) cset_add(cs, str->lastch);
       
   359 	if (Cflag)
       
   360 		(void) cset_addclass(cs, wctype("rune"), true);
       
   361 	if (cflag || Cflag)
       
   362 		cset_invert(cs);
       
   363 	cset_cache(cs);
       
   364 	return (cs);
       
   365 }
       
   366 
       
   367 int
       
   368 charcoll(const void *a, const void *b)
       
   369 {
       
   370 	static char sa[2], sb[2];
       
   371 
       
   372 	sa[0] = *(const int *)a;
       
   373 	sb[0] = *(const int *)b;
       
   374 	return (strcoll(sa, sb));
       
   375 }
       
   376 
       
   377 static void
       
   378 usage(void)
       
   379 {
       
   380 	(void) fprintf(stderr, "%s\n%s\n%s\n%s\n",
       
   381 	    "usage: tr [-Ccsu] string1 string2",
       
   382 	    "       tr [-Ccu] -d string1",
       
   383 	    "       tr [-Ccu] -s string1",
       
   384 	    "       tr [-Ccu] -ds string1 string2");
       
   385 	exit(1);
       
   386 }