--- a/usr/src/Makefile.lint Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/Makefile.lint Wed Aug 18 14:41:42 2010 -0700
@@ -293,6 +293,7 @@
cmd/th_tools \
cmd/tip \
cmd/touch \
+ cmd/tr \
cmd/truss \
cmd/tty \
cmd/tzreload \
@@ -471,7 +472,6 @@
$(CLOSED)/cmd/pax \
$(CLOSED)/cmd/sed_xpg4 \
$(CLOSED)/cmd/tail \
- $(CLOSED)/cmd/tr_xpg4 \
$(CLOSED)/lib/libc_i18n
i386_SUBDIRS= \
--- a/usr/src/cmd/Makefile Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/cmd/Makefile Wed Aug 18 14:41:42 2010 -0700
@@ -402,6 +402,7 @@
touch \
tplot \
tput \
+ tr \
trapstat \
troff \
true \
@@ -467,8 +468,7 @@
$(CLOSED)/cmd/printf \
$(CLOSED)/cmd/sed \
$(CLOSED)/cmd/sed_xpg4 \
- $(CLOSED)/cmd/tail \
- $(CLOSED)/cmd/tr_xpg4
+ $(CLOSED)/cmd/tail
i386_SUBDIRS= \
acpihpd \
@@ -749,8 +749,7 @@
$(CLOSED)/cmd/printf \
$(CLOSED)/cmd/sed \
$(CLOSED)/cmd/sed_xpg4 \
- $(CLOSED)/cmd/tail \
- $(CLOSED)/cmd/tr_xpg4
+ $(CLOSED)/cmd/tail
sparc_MSGSUBDIRS= \
fruadm \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/tr/Makefile Wed Aug 18 14:41:42 2010 -0700
@@ -0,0 +1,70 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#
+# Copyright 2010 Nexenta Systems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+
+PROG= tr
+XPG4PROG= $(PROG)
+XPG6PROG= $(PROG)
+
+OBJS= tr.o str.o cset.o cmap.o
+SRCS= $(OBJS:%.o=%.c)
+
+include ../Makefile.cmd
+
+CLOBBERFILES= $(PROG)
+
+
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+CPPFLAGS += -D_ILLUMOS_PRIVATE -I.
+LINTFLAGS += -D_ILLUMOS_PRIVATE -I.
+
+# install rules
+$(ROOTINC)/% : %
+ $(INS.file)
+
+.KEEP_STATE:
+
+.PARALLEL: $(OBJS)
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+install: all .WAIT $(ROOTPROG) $(ROOTXPG4PROG) $(ROOTXPG6PROG)
+
+$(ROOTXPG4PROG) $(ROOTXPG6PROG):
+ -$(RM) $@
+ -$(LN) -s ../../bin/$(PROG) $@
+
+lint: lint_SRCS
+
+clean:
+ $(RM) $(OBJS)
+
+include ../Makefile.targ
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/tr/THIRDPARTYLICENSE Wed Aug 18 14:41:42 2010 -0700
@@ -0,0 +1,88 @@
+
+Copyright (c) 2004 Tim J. Robbins.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+
+
+Copyright (c) 1991, 1993
+ The Regents of the University of California. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+3. All advertising materials mentioning features or use of this software
+ must display the following acknowledgement:
+ This product includes software developed by the University of
+ California, Berkeley and its contributors.
+4. Neither the name of the University nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+
+
+Copyright (c) 1988, 1993
+ The Regents of the University of California. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+3. All advertising materials mentioning features or use of this software
+ must display the following acknowledgement:
+ This product includes software developed by the University of
+ California, Berkeley and its contributors.
+4. Neither the name of the University nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/tr/THIRDPARTYLICENSE.descrip Wed Aug 18 14:41:42 2010 -0700
@@ -0,0 +1,1 @@
+TR UTILITY
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/tr/cmap.c Wed Aug 18 14:41:42 2010 -0700
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2004 Tim J. Robbins.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * "Character map" ADT. Stores mappings between pairs of characters in a
+ * splay tree, with a lookup table cache to simplify looking up the first
+ * bunch of characters (which are presumably more common than others).
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include "cmap.h"
+
+static struct cmapnode *cmap_splay(struct cmapnode *, wint_t);
+
+/*
+ * cmap_alloc --
+ * Allocate a character map.
+ */
+struct cmap *
+cmap_alloc(void)
+{
+ struct cmap *cm;
+
+ cm = malloc(sizeof (*cm));
+ if (cm == NULL)
+ return (NULL);
+ cm->cm_root = NULL;
+ cm->cm_def = CM_DEF_SELF;
+ cm->cm_havecache = false;
+ cm->cm_min = cm->cm_max = 0;
+ return (cm);
+}
+
+/*
+ * cmap_add --
+ * Add a mapping from "from" to "to" to the map.
+ */
+bool
+cmap_add(struct cmap *cm, wint_t from, wint_t to)
+{
+ struct cmapnode *cmn, *ncmn;
+
+ cm->cm_havecache = false;
+
+ if (cm->cm_root == NULL) {
+ cmn = malloc(sizeof (*cmn));
+ if (cmn == NULL)
+ return (false);
+ cmn->cmn_from = from;
+ cmn->cmn_to = to;
+ cmn->cmn_left = cmn->cmn_right = NULL;
+ cm->cm_root = cmn;
+ cm->cm_min = cm->cm_max = from;
+ return (true);
+ }
+
+ cmn = cm->cm_root = cmap_splay(cm->cm_root, from);
+
+ if (cmn->cmn_from == from) {
+ cmn->cmn_to = to;
+ return (true);
+ }
+
+ ncmn = malloc(sizeof (*ncmn));
+ if (ncmn == NULL)
+ return (false);
+ ncmn->cmn_from = from;
+ ncmn->cmn_to = to;
+ if (from < cmn->cmn_from) {
+ ncmn->cmn_left = cmn->cmn_left;
+ ncmn->cmn_right = cmn;
+ cmn->cmn_left = NULL;
+ } else {
+ ncmn->cmn_right = cmn->cmn_right;
+ ncmn->cmn_left = cmn;
+ cmn->cmn_right = NULL;
+ }
+ if (from < cm->cm_min)
+ cm->cm_min = from;
+ if (from > cm->cm_max)
+ cm->cm_max = from;
+ cm->cm_root = ncmn;
+
+ return (true);
+}
+
+/*
+ * cmap_lookup_hard --
+ * Look up the mapping for a character without using the cache.
+ */
+wint_t
+cmap_lookup_hard(struct cmap *cm, wint_t ch)
+{
+
+ if (cm->cm_root != NULL) {
+ cm->cm_root = cmap_splay(cm->cm_root, ch);
+ if (cm->cm_root->cmn_from == ch)
+ return (cm->cm_root->cmn_to);
+ }
+ return (cm->cm_def == CM_DEF_SELF ? ch : cm->cm_def);
+}
+
+/*
+ * cmap_cache --
+ * Update the cache.
+ */
+void
+cmap_cache(struct cmap *cm)
+{
+ wint_t ch;
+
+ for (ch = 0; ch < CM_CACHE_SIZE; ch++)
+ cm->cm_cache[ch] = cmap_lookup_hard(cm, ch);
+
+ cm->cm_havecache = true;
+}
+
+/*
+ * cmap_default --
+ * Change the value that characters without mappings map to, and
+ * return the old value. The special character value CM_MAP_SELF
+ * means characters map to themselves.
+ */
+wint_t
+cmap_default(struct cmap *cm, wint_t def)
+{
+ wint_t old;
+
+ old = cm->cm_def;
+ cm->cm_def = def;
+ cm->cm_havecache = false;
+ return (old);
+}
+
+static struct cmapnode *
+cmap_splay(struct cmapnode *t, wint_t ch)
+{
+ struct cmapnode N, *l, *r, *y;
+
+ /*
+ * Based on public domain code from Sleator.
+ */
+
+ assert(t != NULL);
+
+ N.cmn_left = N.cmn_right = NULL;
+ l = r = &N;
+ for (;;) {
+ if (ch < t->cmn_from) {
+ if (t->cmn_left != NULL &&
+ ch < t->cmn_left->cmn_from) {
+ y = t->cmn_left;
+ t->cmn_left = y->cmn_right;
+ y->cmn_right = t;
+ t = y;
+ }
+ if (t->cmn_left == NULL)
+ break;
+ r->cmn_left = t;
+ r = t;
+ t = t->cmn_left;
+ } else if (ch > t->cmn_from) {
+ if (t->cmn_right != NULL &&
+ ch > t->cmn_right->cmn_from) {
+ y = t->cmn_right;
+ t->cmn_right = y->cmn_left;
+ y->cmn_left = t;
+ t = y;
+ }
+ if (t->cmn_right == NULL)
+ break;
+ l->cmn_right = t;
+ l = t;
+ t = t->cmn_right;
+ } else
+ break;
+ }
+ l->cmn_right = t->cmn_left;
+ r->cmn_left = t->cmn_right;
+ t->cmn_left = N.cmn_right;
+ t->cmn_right = N.cmn_left;
+ return (t);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/tr/cmap.h Wed Aug 18 14:41:42 2010 -0700
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2004 Tim J. Robbins.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef CMAP_H
+#define CMAP_H
+
+#include <limits.h>
+#include <stdbool.h>
+#include <wchar.h>
+
+struct cmapnode {
+ wint_t cmn_from;
+ wint_t cmn_to;
+ struct cmapnode *cmn_left;
+ struct cmapnode *cmn_right;
+};
+
+struct cmap {
+#define CM_CACHE_SIZE 128
+ wint_t cm_cache[CM_CACHE_SIZE];
+ bool cm_havecache;
+ struct cmapnode *cm_root;
+#define CM_DEF_SELF -2
+ wint_t cm_def;
+ wint_t cm_min;
+ wint_t cm_max;
+};
+
+struct cmap *cmap_alloc(void);
+bool cmap_add(struct cmap *, wint_t, wint_t);
+wint_t cmap_lookup_hard(struct cmap *, wint_t);
+void cmap_cache(struct cmap *);
+wint_t cmap_default(struct cmap *, wint_t);
+
+#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/tr/cset.c Wed Aug 18 14:41:42 2010 -0700
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2004 Tim J. Robbins.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * "Set of characters" ADT implemented as a splay tree of extents, with
+ * a lookup table cache to simplify looking up the first bunch of
+ * characters (which are presumably more common than others).
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <wctype.h>
+#include "cset.h"
+
+static struct csnode *cset_delete(struct csnode *, wchar_t);
+static int cset_rangecmp(struct csnode *, wchar_t);
+static struct csnode *cset_splay(struct csnode *, wchar_t);
+
+/*
+ * cset_alloc --
+ * Allocate a set of characters.
+ */
+struct cset *
+cset_alloc(void)
+{
+ struct cset *cs;
+
+ if ((cs = malloc(sizeof (*cs))) == NULL)
+ return (NULL);
+ cs->cs_root = NULL;
+ cs->cs_classes = NULL;
+ cs->cs_havecache = false;
+ cs->cs_invert = false;
+ return (cs);
+}
+
+/*
+ * cset_add --
+ * Add a character to the set.
+ */
+bool
+cset_add(struct cset *cs, wchar_t ch)
+{
+ struct csnode *csn, *ncsn;
+ wchar_t oval;
+
+ cs->cs_havecache = false;
+
+ /*
+ * Inserting into empty tree; new item becomes the root.
+ */
+ if (cs->cs_root == NULL) {
+ csn = malloc(sizeof (*cs->cs_root));
+ if (csn == NULL)
+ return (false);
+ csn->csn_left = csn->csn_right = NULL;
+ csn->csn_min = csn->csn_max = ch;
+ cs->cs_root = csn;
+ return (true);
+ }
+
+ /*
+ * Splay to check whether the item already exists, and otherwise,
+ * where we should put it.
+ */
+ csn = cs->cs_root = cset_splay(cs->cs_root, ch);
+
+ /*
+ * Avoid adding duplicate nodes.
+ */
+ if (cset_rangecmp(csn, ch) == 0)
+ return (true);
+
+ /*
+ * Allocate a new node and make it the new root.
+ */
+ ncsn = malloc(sizeof (*ncsn));
+ if (ncsn == NULL)
+ return (false);
+ ncsn->csn_min = ncsn->csn_max = ch;
+ if (cset_rangecmp(csn, ch) < 0) {
+ ncsn->csn_left = csn->csn_left;
+ ncsn->csn_right = csn;
+ csn->csn_left = NULL;
+ } else {
+ ncsn->csn_right = csn->csn_right;
+ ncsn->csn_left = csn;
+ csn->csn_right = NULL;
+ }
+ cs->cs_root = ncsn;
+
+ /*
+ * Coalesce with left and right neighbours if possible.
+ */
+ if (ncsn->csn_left != NULL) {
+ ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1);
+ if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) {
+ oval = ncsn->csn_left->csn_min;
+ ncsn->csn_left = cset_delete(ncsn->csn_left,
+ ncsn->csn_left->csn_min);
+ ncsn->csn_min = oval;
+ }
+ }
+ if (ncsn->csn_right != NULL) {
+ ncsn->csn_right = cset_splay(ncsn->csn_right,
+ ncsn->csn_max + 1);
+ if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) {
+ oval = ncsn->csn_right->csn_max;
+ ncsn->csn_right = cset_delete(ncsn->csn_right,
+ ncsn->csn_right->csn_min);
+ ncsn->csn_max = oval;
+ }
+ }
+
+ return (true);
+}
+
+/*
+ * cset_in_hard --
+ * Determine whether a character is in the set without using
+ * the cache.
+ */
+bool
+cset_in_hard(struct cset *cs, wchar_t ch)
+{
+ struct csclass *csc;
+
+ for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next)
+ if ((csc->csc_invert ^ iswctype(ch, csc->csc_type)) != 0)
+ return (cs->cs_invert ^ true);
+ if (cs->cs_root != NULL) {
+ cs->cs_root = cset_splay(cs->cs_root, ch);
+ return ((cs->cs_invert ^ cset_rangecmp(cs->cs_root, ch)) == 0);
+ }
+ return (cs->cs_invert ^ false);
+}
+
+/*
+ * cset_cache --
+ * Update the cache.
+ */
+void
+cset_cache(struct cset *cs)
+{
+ wchar_t i;
+
+ for (i = 0; i < CS_CACHE_SIZE; i++)
+ cs->cs_cache[i] = cset_in_hard(cs, i);
+
+ cs->cs_havecache = true;
+}
+
+/*
+ * cset_invert --
+ * Invert the character set.
+ */
+void
+cset_invert(struct cset *cs)
+{
+
+ cs->cs_invert ^= true;
+ cs->cs_havecache = false;
+}
+
+/*
+ * cset_addclass --
+ * Add a wctype()-style character class to the set, optionally
+ * inverting it.
+ */
+bool
+cset_addclass(struct cset *cs, wctype_t type, bool invert)
+{
+ struct csclass *csc;
+
+ csc = malloc(sizeof (*csc));
+ if (csc == NULL)
+ return (false);
+ csc->csc_type = type;
+ csc->csc_invert = invert;
+ csc->csc_next = cs->cs_classes;
+ cs->cs_classes = csc;
+ cs->cs_havecache = false;
+ return (true);
+}
+
+static int
+cset_rangecmp(struct csnode *t, wchar_t ch)
+{
+
+ if (ch < t->csn_min)
+ return (-1);
+ if (ch > t->csn_max)
+ return (1);
+ return (0);
+}
+
+static struct csnode *
+cset_splay(struct csnode *t, wchar_t ch)
+{
+ struct csnode N, *l, *r, *y;
+
+ /*
+ * Based on public domain code from Sleator.
+ */
+
+ assert(t != NULL);
+
+ N.csn_left = N.csn_right = NULL;
+ l = r = &N;
+ for (;;) {
+ if (cset_rangecmp(t, ch) < 0) {
+ if (t->csn_left != NULL &&
+ cset_rangecmp(t->csn_left, ch) < 0) {
+ y = t->csn_left;
+ t->csn_left = y->csn_right;
+ y->csn_right = t;
+ t = y;
+ }
+ if (t->csn_left == NULL)
+ break;
+ r->csn_left = t;
+ r = t;
+ t = t->csn_left;
+ } else if (cset_rangecmp(t, ch) > 0) {
+ if (t->csn_right != NULL &&
+ cset_rangecmp(t->csn_right, ch) > 0) {
+ y = t->csn_right;
+ t->csn_right = y->csn_left;
+ y->csn_left = t;
+ t = y;
+ }
+ if (t->csn_right == NULL)
+ break;
+ l->csn_right = t;
+ l = t;
+ t = t->csn_right;
+ } else
+ break;
+ }
+ l->csn_right = t->csn_left;
+ r->csn_left = t->csn_right;
+ t->csn_left = N.csn_right;
+ t->csn_right = N.csn_left;
+ return (t);
+}
+
+static struct csnode *
+cset_delete(struct csnode *t, wchar_t ch)
+{
+ struct csnode *x;
+
+ assert(t != NULL);
+ t = cset_splay(t, ch);
+ assert(cset_rangecmp(t, ch) == 0);
+ if (t->csn_left == NULL)
+ x = t->csn_right;
+ else {
+ x = cset_splay(t->csn_left, ch);
+ x->csn_right = t->csn_right;
+ }
+ free(t);
+ return (x);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/tr/cset.h Wed Aug 18 14:41:42 2010 -0700
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2004 Tim J. Robbins.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef CSET_H
+#define CSET_H
+
+#include <stdbool.h>
+#include <wchar.h>
+#include <wctype.h>
+
+struct csnode {
+ wchar_t csn_min;
+ wchar_t csn_max;
+ struct csnode *csn_left;
+ struct csnode *csn_right;
+};
+
+struct csclass {
+ wctype_t csc_type;
+ bool csc_invert;
+ struct csclass *csc_next;
+};
+
+struct cset {
+#define CS_CACHE_SIZE 256
+ bool cs_cache[CS_CACHE_SIZE];
+ bool cs_havecache;
+ struct csclass *cs_classes;
+ struct csnode *cs_root;
+ bool cs_invert;
+};
+
+bool cset_addclass(struct cset *, wctype_t, bool);
+struct cset *cset_alloc(void);
+bool cset_add(struct cset *, wchar_t);
+void cset_invert(struct cset *);
+bool cset_in_hard(struct cset *, wchar_t);
+void cset_cache(struct cset *);
+
+#endif /* CSET_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/tr/extern.h Wed Aug 18 14:41:42 2010 -0700
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <limits.h>
+
+#define NCHARS_SB (UCHAR_MAX + 1) /* Number of single-byte characters. */
+#define OOBCH -1 /* Out of band character value. */
+
+typedef struct {
+ enum { STRING1, STRING2 } which;
+ enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE,
+ CCLASS, CCLASS_UPPER, CCLASS_LOWER, SET } state;
+ int cnt; /* character count */
+ wint_t lastch; /* last character */
+ wctype_t cclass; /* character class from wctype() */
+ wint_t equiv[NCHARS_SB]; /* equivalence set */
+ wint_t *set; /* set of characters */
+ char *str; /* user's string */
+} STR;
+
+wint_t next(STR *);
+int charcoll(const void *, const void *);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/tr/str.c Wed Aug 18 14:41:42 2010 -0700
@@ -0,0 +1,399 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "extern.h"
+
+static int backslash(STR *, int *);
+static int bracket(STR *);
+static void genclass(STR *);
+static void genequiv(STR *);
+static int genrange(STR *, int);
+static void genseq(STR *);
+
+wint_t
+next(s)
+ STR *s;
+{
+ int is_octal;
+ wint_t ch;
+ wchar_t wch;
+ size_t clen;
+
+ switch (s->state) {
+ case EOS:
+ return (0);
+ case INFINITE:
+ return (1);
+ case NORMAL:
+ switch (*s->str) {
+ case '\0':
+ s->state = EOS;
+ return (0);
+ case '\\':
+ s->lastch = backslash(s, &is_octal);
+ break;
+ case '[':
+ if (bracket(s))
+ return (next(s));
+ /* FALLTHROUGH */
+ default:
+ clen = mbrtowc(&wch, s->str, MB_LEN_MAX, NULL);
+ if (clen == (size_t)-1 || clen == (size_t)-2 ||
+ clen == 0) {
+ (void) fprintf(stderr, "Illegal seqeunce.\n");
+ exit(1);
+ }
+ is_octal = 0;
+ s->lastch = wch;
+ s->str += clen;
+ break;
+ }
+
+ /* We can start a range at any time. */
+ if (s->str[0] == '-' && genrange(s, is_octal))
+ return (next(s));
+ return (1);
+ case RANGE:
+ if (s->cnt-- == 0) {
+ s->state = NORMAL;
+ return (next(s));
+ }
+ ++s->lastch;
+ return (1);
+ case SEQUENCE:
+ if (s->cnt-- == 0) {
+ s->state = NORMAL;
+ return (next(s));
+ }
+ return (1);
+ case CCLASS:
+ case CCLASS_UPPER:
+ case CCLASS_LOWER:
+ s->cnt++;
+ ch = nextwctype(s->lastch, s->cclass);
+ if (ch == -1) {
+ s->state = NORMAL;
+ return (next(s));
+ }
+ s->lastch = ch;
+ return (1);
+ case SET:
+ if ((ch = s->set[s->cnt++]) == OOBCH) {
+ s->state = NORMAL;
+ return (next(s));
+ }
+ s->lastch = ch;
+ return (1);
+ default:
+ return (0);
+ }
+ /* NOTREACHED */
+}
+
+static int
+bracket(s)
+ STR *s;
+{
+ char *p;
+
+ switch (s->str[1]) {
+ case ':': /* "[:class:]" */
+ if ((p = strchr(s->str + 2, ']')) == NULL)
+ return (0);
+ if (*(p - 1) != ':' || p - s->str < 4)
+ goto repeat;
+ *(p - 1) = '\0';
+ s->str += 2;
+ genclass(s);
+ s->str = p + 1;
+ return (1);
+ case '=': /* "[=equiv=]" */
+ if ((p = strchr(s->str + 2, ']')) == NULL)
+ return (0);
+ if (*(p - 1) != '=' || p - s->str < 4)
+ goto repeat;
+ s->str += 2;
+ genequiv(s);
+ return (1);
+ default: /* "[\###*n]" or "[#*n]" */
+ repeat:
+ if ((p = strpbrk(s->str + 2, "*]")) == NULL)
+ return (0);
+ if (p[0] != '*' || index(p, ']') == NULL)
+ return (0);
+ s->str += 1;
+ genseq(s);
+ return (1);
+ }
+ /* NOTREACHED */
+}
+
+static void
+genclass(s)
+ STR *s;
+{
+
+ if ((s->cclass = wctype(s->str)) == 0)
+ errx(1, "unknown class %s", s->str);
+ s->cnt = 0;
+ s->lastch = -1; /* incremented before check in next() */
+ if (strcmp(s->str, "upper") == 0)
+ s->state = CCLASS_UPPER;
+ else if (strcmp(s->str, "lower") == 0)
+ s->state = CCLASS_LOWER;
+ else
+ s->state = CCLASS;
+}
+
+static void
+genequiv(s)
+ STR *s;
+{
+ int i, p, pri;
+ char src[2], dst[3];
+ size_t clen;
+ wchar_t wc;
+
+ if (*s->str == '\\') {
+ s->equiv[0] = backslash(s, NULL);
+ if (*s->str != '=')
+ errx(1, "misplaced equivalence equals sign");
+ s->str += 2;
+ } else {
+ clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL);
+ if (clen == (size_t)-1 || clen == (size_t)-2 || clen == 0) {
+ errno = EILSEQ;
+ err(1, NULL);
+ }
+ s->equiv[0] = wc;
+ if (s->str[clen] != '=')
+ errx(1, "misplaced equivalence equals sign");
+ s->str += clen + 2;
+ }
+
+ /*
+ * Calculate the set of all characters in the same equivalence class
+ * as the specified character (they will have the same primary
+ * collation weights).
+ * XXX Knows too much about how strxfrm() is implemented. Assumes
+ * it fills the string with primary collation weight bytes. Only one-
+ * to-one mappings are supported.
+ * XXX Equivalence classes not supported in multibyte locales.
+ */
+ src[0] = (char)s->equiv[0];
+ src[1] = '\0';
+ if (MB_CUR_MAX == 1 && strxfrm(dst, src, sizeof (dst)) == 1) {
+ pri = (unsigned char)*dst;
+ for (p = 1, i = 1; i < NCHARS_SB; i++) {
+ *src = i;
+ if (strxfrm(dst, src, sizeof (dst)) == 1 && pri &&
+ pri == (unsigned char)*dst)
+ s->equiv[p++] = i;
+ }
+ s->equiv[p] = OOBCH;
+ }
+
+ s->cnt = 0;
+ s->state = SET;
+ s->set = s->equiv;
+}
+
+static int
+genrange(STR *s, int was_octal)
+{
+ int stopval, octal;
+ char *savestart;
+ int n, cnt, *p;
+ size_t clen;
+ wchar_t wc;
+
+ octal = 0;
+ savestart = s->str;
+ if (*++s->str == '\\')
+ stopval = backslash(s, &octal);
+ else {
+ clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL);
+ if (clen == (size_t)-1 || clen == (size_t)-2) {
+ errno = EILSEQ;
+ err(1, NULL);
+ }
+ stopval = wc;
+ s->str += clen;
+ }
+ /*
+ * XXX Characters are not ordered according to collating sequence in
+ * multibyte locales.
+ */
+ if (octal || was_octal || MB_CUR_MAX > 1) {
+ if (stopval < s->lastch) {
+ s->str = savestart;
+ return (0);
+ }
+ s->cnt = stopval - s->lastch + 1;
+ s->state = RANGE;
+ --s->lastch;
+ return (1);
+ }
+ if (charcoll((const void *)&stopval, (const void *)&(s->lastch)) < 0) {
+ s->str = savestart;
+ return (0);
+ }
+ p = malloc((NCHARS_SB + 1) * sizeof (int));
+ if ((s = (void *)p) == NULL)
+ err(1, "genrange() malloc");
+ for (cnt = 0; cnt < NCHARS_SB; cnt++)
+ if (charcoll((const void *)&cnt, (const void *)&(s->lastch)) >=
+ 0 &&
+ charcoll((const void *)&cnt, (const void *)&stopval) <= 0)
+ *p++ = cnt;
+ *p = OOBCH;
+ n = (int *)p - (int *)s->set;
+
+ s->cnt = 0;
+ s->state = SET;
+ if (n > 1)
+ qsort(s->set, n, sizeof (*(s->set)), charcoll);
+ return (1);
+}
+
+static void
+genseq(s)
+ STR *s;
+{
+ char *ep;
+ wchar_t wc;
+ size_t clen;
+
+ if (s->which == STRING1)
+ errx(1, "sequences only valid in string2");
+
+ if (*s->str == '\\')
+ s->lastch = backslash(s, NULL);
+ else {
+ clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL);
+ if (clen == (size_t)-1 || clen == (size_t)-2) {
+ errno = EILSEQ;
+ err(1, NULL);
+ }
+ s->lastch = wc;
+ s->str += clen;
+ }
+ if (*s->str != '*')
+ errx(1, "misplaced sequence asterisk");
+
+ switch (*++s->str) {
+ case '\\':
+ s->cnt = backslash(s, NULL);
+ break;
+ case ']':
+ s->cnt = 0;
+ ++s->str;
+ break;
+ default:
+ if (isdigit((uchar_t)*s->str)) {
+ s->cnt = strtol(s->str, &ep, 0);
+ if (*ep == ']') {
+ s->str = ep + 1;
+ break;
+ }
+ }
+ errx(1, "illegal sequence count");
+ /* NOTREACHED */
+ }
+
+ s->state = s->cnt ? SEQUENCE : INFINITE;
+}
+
+/*
+ * Translate \??? into a character. Up to 3 octal digits, if no digits either
+ * an escape code or a literal character.
+ */
+static int
+backslash(STR *s, int *is_octal)
+{
+ int ch, cnt, val;
+
+ if (is_octal != NULL)
+ *is_octal = 0;
+ for (cnt = val = 0; ; ) {
+ ch = (uchar_t)*++s->str;
+ if (!isdigit(ch) || ch > '7')
+ break;
+ val = val * 8 + ch - '0';
+ if (++cnt == 3) {
+ ++s->str;
+ break;
+ }
+ }
+ if (cnt) {
+ if (is_octal != NULL)
+ *is_octal = 1;
+ return (val);
+ }
+ if (ch != '\0')
+ ++s->str;
+ switch (ch) {
+ case 'a': /* escape characters */
+ return ('\7');
+ case 'b':
+ return ('\b');
+ case 'f':
+ return ('\f');
+ case 'n':
+ return ('\n');
+ case 'r':
+ return ('\r');
+ case 't':
+ return ('\t');
+ case 'v':
+ return ('\13');
+ case '\0': /* \" -> \ */
+ s->state = EOS;
+ return ('\\');
+ default: /* \x" -> x */
+ return (ch);
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/tr/tr.c Wed Aug 18 14:41:42 2010 -0700
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "cmap.h"
+#include "cset.h"
+#include "extern.h"
+
+STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL };
+STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL };
+
+static struct cset *setup(char *, STR *, int, int);
+static void usage(void);
+
+static wint_t
+cmap_lookup(struct cmap *cm, wint_t from)
+{
+
+ if (from < CM_CACHE_SIZE && cm->cm_havecache)
+ return (cm->cm_cache[from]);
+ return (cmap_lookup_hard(cm, from));
+}
+
+static wint_t
+cmap_max(struct cmap *cm)
+{
+ return (cm->cm_max);
+}
+
+static inline bool
+cset_in(struct cset *cs, wchar_t ch)
+{
+
+ if (ch < CS_CACHE_SIZE && cs->cs_havecache)
+ return (cs->cs_cache[ch]);
+ return (cset_in_hard(cs, ch));
+}
+
+int
+main(int argc, char **argv)
+{
+ static int carray[NCHARS_SB];
+ struct cmap *map;
+ struct cset *delete, *squeeze;
+ int n, *p;
+ int Cflag, cflag, dflag, sflag, isstring2;
+ wint_t ch, cnt, lastch;
+
+ (void) setlocale(LC_ALL, "");
+
+ Cflag = cflag = dflag = sflag = 0;
+ while ((ch = getopt(argc, argv, "Ccdsu")) != -1)
+ switch ((char)ch) {
+ case 'C':
+ Cflag = 1;
+ cflag = 0;
+ break;
+ case 'c':
+ cflag = 1;
+ Cflag = 0;
+ break;
+ case 'd':
+ dflag = 1;
+ break;
+ case 's':
+ sflag = 1;
+ break;
+ case 'u':
+ setbuf(stdout, (char *)NULL);
+ break;
+ case '?':
+ default:
+ usage();
+ }
+ argc -= optind;
+ argv += optind;
+
+ switch (argc) {
+ case 0:
+ default:
+ usage();
+ /* NOTREACHED */
+ case 1:
+ isstring2 = 0;
+ break;
+ case 2:
+ isstring2 = 1;
+ break;
+ }
+
+ /*
+ * tr -ds [-Cc] string1 string2
+ * Delete all characters (or complemented characters) in string1.
+ * Squeeze all characters in string2.
+ */
+ if (dflag && sflag) {
+ if (!isstring2)
+ usage();
+
+ delete = setup(argv[0], &s1, cflag, Cflag);
+ squeeze = setup(argv[1], &s2, 0, 0);
+
+ for (lastch = OOBCH; (ch = getwchar()) != WEOF; )
+ if (!cset_in(delete, ch) &&
+ (lastch != ch || !cset_in(squeeze, ch))) {
+ lastch = ch;
+ (void) putwchar(ch);
+ }
+ if (ferror(stdin))
+ err(1, NULL);
+ exit(0);
+ }
+
+ /*
+ * tr -d [-Cc] string1
+ * Delete all characters (or complemented characters) in string1.
+ */
+ if (dflag) {
+ if (isstring2)
+ usage();
+
+ delete = setup(argv[0], &s1, cflag, Cflag);
+
+ while ((ch = getwchar()) != WEOF)
+ if (!cset_in(delete, ch))
+ (void) putwchar(ch);
+ if (ferror(stdin))
+ err(1, NULL);
+ exit(0);
+ }
+
+ /*
+ * tr -s [-Cc] string1
+ * Squeeze all characters (or complemented characters) in string1.
+ */
+ if (sflag && !isstring2) {
+ squeeze = setup(argv[0], &s1, cflag, Cflag);
+
+ for (lastch = OOBCH; (ch = getwchar()) != WEOF; )
+ if (lastch != ch || !cset_in(squeeze, ch)) {
+ lastch = ch;
+ (void) putwchar(ch);
+ }
+ if (ferror(stdin))
+ err(1, NULL);
+ exit(0);
+ }
+
+ /*
+ * tr [-Ccs] string1 string2
+ * Replace all characters (or complemented characters) in string1 with
+ * the character in the same position in string2. If the -s option is
+ * specified, squeeze all the characters in string2.
+ */
+ if (!isstring2)
+ usage();
+
+ map = cmap_alloc();
+ if (map == NULL)
+ err(1, NULL);
+ squeeze = cset_alloc();
+ if (squeeze == NULL)
+ err(1, NULL);
+
+ s1.str = argv[0];
+
+ if (Cflag || cflag) {
+ (void) cmap_default(map, OOBCH);
+ if ((s2.str = strdup(argv[1])) == NULL)
+ errx(1, "strdup(argv[1])");
+ } else
+ s2.str = argv[1];
+
+ if (!next(&s2))
+ errx(1, "empty string2");
+
+ /*
+ * For -s result will contain only those characters defined
+ * as the second characters in each of the toupper or tolower
+ * pairs.
+ */
+
+ /* If string2 runs out of characters, use the last one specified. */
+ while (next(&s1)) {
+ again:
+ if (s1.state == CCLASS_LOWER &&
+ s2.state == CCLASS_UPPER &&
+ s1.cnt == 1 && s2.cnt == 1) {
+ do {
+ ch = towupper(s1.lastch);
+ (void) cmap_add(map, s1.lastch, ch);
+ if (sflag && iswupper(ch))
+ (void) cset_add(squeeze, ch);
+ if (!next(&s1))
+ goto endloop;
+ } while (s1.state == CCLASS_LOWER && s1.cnt > 1);
+ /* skip upper set */
+ do {
+ if (!next(&s2))
+ break;
+ } while (s2.state == CCLASS_UPPER && s2.cnt > 1);
+ goto again;
+ } else if (s1.state == CCLASS_UPPER &&
+ s2.state == CCLASS_LOWER &&
+ s1.cnt == 1 && s2.cnt == 1) {
+ do {
+ ch = towlower(s1.lastch);
+ (void) cmap_add(map, s1.lastch, ch);
+ if (sflag && iswlower(ch))
+ (void) cset_add(squeeze, ch);
+ if (!next(&s1))
+ goto endloop;
+ } while (s1.state == CCLASS_UPPER && s1.cnt > 1);
+ /* skip lower set */
+ do {
+ if (!next(&s2))
+ break;
+ } while (s2.state == CCLASS_LOWER && s2.cnt > 1);
+ goto again;
+ } else {
+ (void) cmap_add(map, s1.lastch, s2.lastch);
+ if (sflag)
+ (void) cset_add(squeeze, s2.lastch);
+ }
+ (void) next(&s2);
+ }
+endloop:
+ if (cflag || (Cflag && MB_CUR_MAX > 1)) {
+ /*
+ * This is somewhat tricky: since the character set is
+ * potentially huge, we need to avoid allocating a map
+ * entry for every character. Our strategy is to set the
+ * default mapping to the last character of string #2
+ * (= the one that gets automatically repeated), then to
+ * add back identity mappings for characters that should
+ * remain unchanged. We don't waste space on identity mappings
+ * for non-characters with the -C option; those are simulated
+ * in the I/O loop.
+ */
+ s2.str = argv[1];
+ s2.state = NORMAL;
+ for (cnt = 0; cnt < WCHAR_MAX; cnt++) {
+ if (Cflag && !iswrune(cnt))
+ continue;
+ if (cmap_lookup(map, cnt) == OOBCH) {
+ if (next(&s2))
+ (void) cmap_add(map, cnt, s2.lastch);
+ if (sflag)
+ (void) cset_add(squeeze, s2.lastch);
+ } else
+ (void) cmap_add(map, cnt, cnt);
+ if ((s2.state == EOS || s2.state == INFINITE) &&
+ cnt >= cmap_max(map))
+ break;
+ }
+ (void) cmap_default(map, s2.lastch);
+ } else if (Cflag) {
+ for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) {
+ if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt))
+ *p++ = cnt;
+ else
+ (void) cmap_add(map, cnt, cnt);
+ }
+ n = p - carray;
+ if (Cflag && n > 1)
+ (void) qsort(carray, n, sizeof (*carray), charcoll);
+
+ s2.str = argv[1];
+ s2.state = NORMAL;
+ for (cnt = 0; cnt < n; cnt++) {
+ (void) next(&s2);
+ (void) cmap_add(map, carray[cnt], s2.lastch);
+ /*
+ * Chars taken from s2 can be different this time
+ * due to lack of complex upper/lower processing,
+ * so fill string2 again to not miss some.
+ */
+ if (sflag)
+ (void) cset_add(squeeze, s2.lastch);
+ }
+ }
+
+ cset_cache(squeeze);
+ cmap_cache(map);
+
+ if (sflag)
+ for (lastch = OOBCH; (ch = getwchar()) != WEOF; ) {
+ if (!Cflag || iswrune(ch))
+ ch = cmap_lookup(map, ch);
+ if (lastch != ch || !cset_in(squeeze, ch)) {
+ lastch = ch;
+ (void) putwchar(ch);
+ }
+ }
+ else
+ while ((ch = getwchar()) != WEOF) {
+ if (!Cflag || iswrune(ch))
+ ch = cmap_lookup(map, ch);
+ (void) putwchar(ch);
+ }
+ if (ferror(stdin))
+ err(1, NULL);
+ exit(0);
+}
+
+static struct cset *
+setup(char *arg, STR *str, int cflag, int Cflag)
+{
+ struct cset *cs;
+
+ cs = cset_alloc();
+ if (cs == NULL)
+ err(1, NULL);
+ str->str = arg;
+ while (next(str))
+ (void) cset_add(cs, str->lastch);
+ if (Cflag)
+ (void) cset_addclass(cs, wctype("rune"), true);
+ if (cflag || Cflag)
+ cset_invert(cs);
+ cset_cache(cs);
+ return (cs);
+}
+
+int
+charcoll(const void *a, const void *b)
+{
+ static char sa[2], sb[2];
+
+ sa[0] = *(const int *)a;
+ sb[0] = *(const int *)b;
+ return (strcoll(sa, sb));
+}
+
+static void
+usage(void)
+{
+ (void) fprintf(stderr, "%s\n%s\n%s\n%s\n",
+ "usage: tr [-Ccsu] string1 string2",
+ " tr [-Ccu] -d string1",
+ " tr [-Ccu] -s string1",
+ " tr [-Ccu] -ds string1 string2");
+ exit(1);
+}
--- a/usr/src/head/wctype.h Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/head/wctype.h Wed Aug 18 14:41:42 2010 -0700
@@ -95,6 +95,14 @@
wchar_t *code; /* conversion code */
};
+
+#ifdef _ILLUMOS_PRIVATE
+extern int __iswrune(wint_t);
+extern wint_t __nextwctype(wint_t, wctype_t);
+#define iswrune(c) __iswrune(c)
+#define nextwctype(c, t) __nextwctype(c, t)
+#endif
+
/* character classification functions */
/* iswascii is still a macro */
--- a/usr/src/lib/libc/amd64/Makefile Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/lib/libc/amd64/Makefile Wed Aug 18 14:41:42 2010 -0700
@@ -733,6 +733,7 @@
mbstowcs.o \
mbtowc.o \
mskanji.o \
+ nextwctype.o \
nl_langinfo.o \
none.o \
regcomp.o \
--- a/usr/src/lib/libc/i386/Makefile.com Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/lib/libc/i386/Makefile.com Wed Aug 18 14:41:42 2010 -0700
@@ -775,6 +775,7 @@
mbstowcs.o \
mbtowc.o \
mskanji.o \
+ nextwctype.o \
nl_langinfo.o \
none.o \
regcomp.o \
--- a/usr/src/lib/libc/port/locale/iswctype.c Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/lib/libc/port/locale/iswctype.c Wed Aug 18 14:41:42 2010 -0700
@@ -214,3 +214,24 @@
{
return (__istype(wc, _CTYPE_N));
}
+
+/*
+ * FreeBSD has iswrune() for use by external programs, and this is used by
+ * the "tr" program. As that program is part of our consolidation, we
+ * provide an _ILLUMOS_PRIVATE version of this function that we can use.
+ *
+ * No programs that are not part of the illumos stack itself should use
+ * this function -- programs that do reference will not be portable to
+ * other versions of SunOS or Solaris.
+ */
+int
+__iswrune(wint_t wc)
+{
+ /*
+ * Note, FreeBSD ignored the low order byte, as they encode their
+ * ctype values differently. We can't do that (ctype is baked into
+ * applications), but instead can just check if *any* bit is set in
+ * the ctype. Any bit being set indicates its a valid rune.
+ */
+ return (__istype(wc, 0xffffffffU));
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/libc/port/locale/nextwctype.c Wed Aug 18 14:41:42 2010 -0700
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2004 Tim J. Robbins.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "lint.h"
+#include "runetype.h"
+#include <wchar.h>
+#include <wctype.h>
+
+wint_t
+__nextwctype(wint_t wc, wctype_t wct)
+{
+ size_t lim;
+ _RuneRange *rr = &_CurrentRuneLocale->__runetype_ext;
+ _RuneEntry *base, *re;
+ int noinc;
+
+ noinc = 0;
+ if (wc < _CACHED_RUNES) {
+ wc++;
+ while (wc < _CACHED_RUNES) {
+ if (_CurrentRuneLocale->__runetype[wc] & wct)
+ return (wc);
+ wc++;
+ }
+ wc--;
+ }
+ if (rr->__ranges != NULL && wc < rr->__ranges[0].__min) {
+ wc = rr->__ranges[0].__min;
+ noinc = 1;
+ }
+
+ /* Binary search -- see bsearch.c for explanation. */
+ base = rr->__ranges;
+ for (lim = rr->__nranges; lim != 0; lim >>= 1) {
+ re = base + (lim >> 1);
+ if (re->__min <= wc && wc <= re->__max)
+ goto found;
+ else if (wc > re->__max) {
+ base = re + 1;
+ lim--;
+ }
+ }
+ return (-1);
+found:
+ if (!noinc)
+ wc++;
+ if (re->__min <= wc && wc <= re->__max) {
+ if (re->__types != NULL) {
+ for (; wc <= re->__max; wc++)
+ if (re->__types[wc - re->__min] & wct)
+ return (wc);
+ } else if (re->__map & wct)
+ return (wc);
+ }
+ while (++re < rr->__ranges + rr->__nranges) {
+ wc = re->__min;
+ if (re->__types != NULL) {
+ for (; wc <= re->__max; wc++)
+ if (re->__types[wc - re->__min] & wct)
+ return (wc);
+ } else if (re->__map & wct)
+ return (wc);
+ }
+ return (-1);
+}
--- a/usr/src/lib/libc/port/mapfile-vers Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/lib/libc/port/mapfile-vers Wed Aug 18 14:41:42 2010 -0700
@@ -2496,6 +2496,7 @@
__inf_written;
__i_size;
_isnanf { TYPE = FUNCTION; FILTER = libm.so.2 };
+ __iswrune;
__libc_threaded;
_lib_version { FLAGS = NODIRECT };
_logb { TYPE = FUNCTION; FILTER = libm.so.2 };
@@ -2510,6 +2511,7 @@
_modff { TYPE = FUNCTION; FILTER = libm.so.2 };
__nan_read;
__nan_written;
+ __nextwctype;
__nis_debug_bind;
__nis_debug_calls;
__nis_debug_file;
--- a/usr/src/lib/libc/sparc/Makefile.com Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/lib/libc/sparc/Makefile.com Wed Aug 18 14:41:42 2010 -0700
@@ -803,6 +803,8 @@
mbstowcs.o \
mbtowc.o \
mskanji.o \
+ nextwctype.o \
+ nl_langinfo.o \
none.o \
regcomp.o \
regfree.o \
--- a/usr/src/lib/libc/sparcv9/Makefile.com Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/lib/libc/sparcv9/Makefile.com Wed Aug 18 14:41:42 2010 -0700
@@ -754,6 +754,8 @@
mbstowcs.o \
mbtowc.o \
mskanji.o \
+ nextwctype.o \
+ nl_langinfo.o \
none.o \
regcomp.o \
regfree.o \
--- a/usr/src/pkg/manifests/SUNWcs.mf Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/pkg/manifests/SUNWcs.mf Wed Aug 18 14:41:42 2010 -0700
@@ -2565,6 +2565,7 @@
license cmd/stat/vmstat/THIRDPARTYLICENSE \
license=cmd/stat/vmstat/THIRDPARTYLICENSE
license cmd/tip/THIRDPARTYLICENSE license=cmd/tip/THIRDPARTYLICENSE
+license cmd/tr/THIRDPARTYLICENSE license=cmd/tr/THIRDPARTYLICENSE
license cmd/vi/THIRDPARTYLICENSE license=cmd/vi/THIRDPARTYLICENSE
license cmd/which/THIRDPARTYLICENSE license=cmd/which/THIRDPARTYLICENSE
license cmd/xstr/THIRDPARTYLICENSE license=cmd/xstr/THIRDPARTYLICENSE
--- a/usr/src/pkg/manifests/system-xopen-xcu4.mf Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/pkg/manifests/system-xopen-xcu4.mf Wed Aug 18 14:41:42 2010 -0700
@@ -71,7 +71,6 @@
file path=usr/xpg4/bin/sort mode=0555
file path=usr/xpg4/bin/stty mode=0555
file path=usr/xpg4/bin/tail mode=0555
-file path=usr/xpg4/bin/tr mode=0555
file path=usr/xpg4/bin/who mode=0555
hardlink path=usr/xpg4/bin/bg target=../../../usr/xpg4/bin/alias
hardlink path=usr/xpg4/bin/cd target=../../../usr/xpg4/bin/alias
@@ -108,3 +107,4 @@
license lic_OSBL license=lic_OSBL
license lic_OSBL_preamble license=lic_OSBL_preamble
link path=usr/xpg4/bin/ipcs target=../../bin/ipcs
+link path=usr/xpg4/bin/tr target=../../bin/tr
--- a/usr/src/pkg/manifests/system-xopen-xcu6.mf Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/pkg/manifests/system-xopen-xcu6.mf Wed Aug 18 14:41:42 2010 -0700
@@ -41,7 +41,6 @@
file path=usr/xpg6/bin/expr mode=0555
file path=usr/xpg6/bin/getconf mode=0555
file path=usr/xpg6/bin/ls mode=0555
-file path=usr/xpg6/bin/tr mode=0555
hardlink path=usr/xpg6/bin/ex target=../../../usr/xpg6/bin/edit
hardlink path=usr/xpg6/bin/vedit target=../../../usr/xpg6/bin/edit
hardlink path=usr/xpg6/bin/vi target=../../../usr/xpg6/bin/edit
@@ -55,3 +54,4 @@
license lic_CDDL license=lic_CDDL
link path=usr/xpg6/bin/stty target=../../bin/stty
link path=usr/xpg6/bin/xargs target=../../bin/xargs
+link path=usr/xpg6/bin/tr target=../../bin/tr
--- a/usr/src/tools/opensolaris/license-list Mon Aug 16 16:22:01 2010 -0700
+++ b/usr/src/tools/opensolaris/license-list Wed Aug 18 14:41:42 2010 -0700
@@ -63,6 +63,7 @@
usr/src/cmd/tcpd/THIRDPARTYLICENSE
usr/src/cmd/terminfo/THIRDPARTYLICENSE
usr/src/cmd/tip/THIRDPARTYLICENSE
+usr/src/cmd/tr/THIRDPARTYLICENSE
usr/src/cmd/ul/THIRDPARTYLICENSE
usr/src/cmd/units/THIRDPARTYLICENSE
usr/src/cmd/vgrind/THIRDPARTYLICENSE