672 xargs doesn't support -0
authorGarrett D'Amore <garrett@dey-sys.com>
Tue, 27 Aug 2013 13:00:09 -0700
changeset 14183 f1ab3ae29311
parent 14182 b02c4a353739
child 14184 8ae0bbd9e7cc
672 xargs doesn't support -0 3356 xargs needlessly duplicates stdio buffering Reviewed by: Prasad Joshi <[email protected]> Reviewed by: Gary Mills <[email protected]> Approved by: Robert Mustacchi <[email protected]>
usr/src/cmd/xargs/xargs.c
usr/src/man/man1/xargs.1
--- a/usr/src/cmd/xargs/xargs.c	Fri Aug 23 15:33:55 2013 -0400
+++ b/usr/src/cmd/xargs/xargs.c	Tue Aug 27 13:00:09 2013 -0700
@@ -19,6 +19,16 @@
  * CDDL HEADER END
  */
 /*
+ * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
+ *
+ * Portions of this file developed by DEY Storage Systems, Inc. are licensed
+ * under the terms of the Common Development and Distribution License (CDDL)
+ * version 1.0 only.  The use of subsequent versions of the License are
+ * is specifically prohibited unless those terms are not in conflict with
+ * version 1.0 of the License.  You can find this license on-line at
+ * http://www.illumos.org/license/CDDL
+ */
+/*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
@@ -27,8 +37,6 @@
 /*	  All Rights Reserved  	*/
 
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -62,11 +70,32 @@
 #define	QBUF_STARTLEN 255  /* start size of growable string buffer */
 #define	QBUF_INC 100	   /* how much to grow a growable string by */
 
+/* We use these macros to help make formatting look "consistent" */
+#define	EMSG(s)		ermsg(gettext(s "\n"))
+#define	EMSG2(s, a)	ermsg(gettext(s "\n"), a)
+#define	PERR(s)		perror(gettext("xargs: " s))
+
+/* Some common error messages */
+
+#define	LIST2LONG	"Argument list too long"
+#define	ARG2LONG	"A single argument was greater than %d bytes"
+#define	MALLOCFAIL	"Memory allocation failure"
+#define	CORRUPTFILE	"Corrupt input file"
+#define	WAITFAIL	"Wait failure"
+#define	CHILDSIG	"Child killed with signal %d"
+#define	CHILDFAIL	"Command could not continue processing data"
+#define	FORKFAIL	"Could not fork child"
+#define	EXECFAIL	"Could not exec command"
+#define	MISSQUOTE	"Missing quote"
+#define	BADESCAPE	"Incomplete escape"
+#define	IBUFOVERFLOW	"Insert buffer overflow"
+
+#define	_(x)	gettext(x)
+
 static wctype_t	blank;
 static char	*arglist[MAXARGS+1];
-static char	argbuf[BUFSIZE+1];
-static char	*next = argbuf;
-static char	*lastarg = "";
+static char	argbuf[BUFSIZE * 2 + 1];
+static char	lastarg[BUFSIZE + 1];
 static char	**ARGV = arglist;
 static char	*LEOF = "_";
 static char	*INSPAT = INSPAT_STR;
@@ -78,7 +107,6 @@
 	char	*p_skel;	/* ptr to arg template */
 } saveargv[MAXINSERTS];
 
-static off_t	file_offset = 0;
 static int	PROMPT = -1;
 static int	BUFLIM = BUFSIZE;
 static int	N_ARGS = 0;
@@ -92,28 +120,13 @@
 static int	LEGAL = FALSE;
 static int	TRACE = FALSE;
 static int	INSERT = FALSE;
+static int	ZERO = FALSE;
 static int	linesize = 0;
 static int	ibufsize = 0;
 static int	exitstat = 0;	/* our exit status			*/
 static int	mac;		/* modified argc, after parsing		*/
 static char	**mav;		/* modified argv, after parsing		*/
 static int	n_inserts;	/* # of insertions.			*/
-static int	inquote = 0;	/* processing a quoted string		*/
-static int	save_index = 0;
-
-/*
- * the pio structure is used to save any pending input before the
- * user replies to a prompt. the pending input is saved here,
- * for the appropriate processing later.
- */
-typedef struct pio {
-	struct pio *next;	/* next in stack			*/
-	char *start;		/* starting addr of the buffer		*/
-	char *cur;		/* ptr to current char in buf		*/
-	size_t length;		/* number of bytes remaining		*/
-} pio;
-
-static pio *queued_data = NULL;
 
 /* our usage message:							*/
 #define	USAGEMSG "Usage: xargs: [-t] [-p] [-e[eofstr]] [-E eofstr] "\
@@ -121,21 +134,16 @@
 	"[cmd [args ...]]\n"
 
 static int	echoargs();
-static int	getchr(void);
-static wchar_t	getwchr(void);
-static void	ungetwchr(wchar_t);
+static wint_t	getwchr(char *, size_t *);
 static int	lcall(char *sub, char **subargs);
-static int	xindex(char *as1, char *as2);
 static void	addibuf(struct inserts *p);
 static void	ermsg(char *messages, ...);
 static char	*addarg(char *arg);
-static char	*checklen(char *arg);
-static size_t   store_wchr(char **, size_t *, size_t, wchar_t);
-static char	*getarg();
+static void	store_str(char **, char *, size_t);
+static char	*getarg(char *);
 static char	*insert(char *pattern, char *subst);
 static void	usage();
 static void	parseargs();
-static void	saveinput();
 
 int
 main(int argc, char **argv)
@@ -144,8 +152,9 @@
 	struct inserts *psave;
 	int c;
 	int	initsize;
-	char	*cmdname, *initbuf, **initlist;
-
+	char	*cmdname, **initlist;
+	char	*arg;
+	char	*next;
 
 	/* initialization */
 	blank = wctype("blank");
@@ -157,22 +166,26 @@
 #endif
 	(void) textdomain(TEXT_DOMAIN);
 	if (init_yes() < 0) {
-		ermsg(gettext(ERR_MSG_INIT_YES), strerror(errno));
+		ermsg(_(ERR_MSG_INIT_YES), strerror(errno));
 		exit(1);
 	}
 
 	parseargs(argc, argv);
 
 	/* handling all of xargs arguments:				*/
-	while ((c = getopt(mac, mav, "tpe:E:I:i:L:l:n:s:x")) != EOF) {
+	while ((c = getopt(mac, mav, "0tpe:E:I:i:L:l:n:s:x")) != EOF) {
 		switch (c) {
+		case '0':
+			ZERO = TRUE;
+			break;
+
 		case 't':	/* -t: turn trace mode on		*/
 			TRACE = TRUE;
 			break;
 
 		case 'p':	/* -p: turn on prompt mode.		*/
 			if ((PROMPT = open("/dev/tty", O_RDONLY)) == -1) {
-				perror(gettext("can't read from tty for -p"));
+				PERR("can't read from tty for -p");
 			} else {
 				TRACE = TRUE;
 			}
@@ -202,8 +215,8 @@
 			N_ARGS = 0;
 			INSPAT = optarg;
 			if (*optarg == '\0') {
-				ermsg(gettext(
-				    "Option requires an argument: -%c\n"), c);
+				ermsg(_("Option requires an argument: -%c\n"),
+				    c);
 			}
 			break;
 
@@ -245,8 +258,8 @@
 			N_ARGS = 0;
 			INSERT = FALSE;
 			if ((PER_LINE = atoi(optarg)) <= 0) {
-				ermsg(gettext("#lines must be positive "
-				    "int: %s\n"), optarg);
+				ermsg(_("#lines must be positive int: %s\n"),
+				    optarg);
 			}
 			break;
 
@@ -275,8 +288,8 @@
 			 * number *is* required here:
 			 */
 			if ((N_ARGS = atoi(optarg)) <= 0) {
-				ermsg(gettext("#args must be positive "
-				    "int: %s\n"), optarg);
+				ermsg(_("#args must be positive int: %s\n"),
+				    optarg);
 			} else {
 				LEGAL = DASHX || N_ARGS == 1;
 				INSERT = PER_LINE = FALSE;
@@ -286,9 +299,8 @@
 		case 's':	/* -s size: set max size of each arg list */
 			BUFLIM = atoi(optarg);
 			if (BUFLIM > BUFSIZE || BUFLIM <= 0) {
-				ermsg(gettext(
-				    "0 < max-cmd-line-size <= %d: "
-				    "%s\n"), BUFSIZE, optarg);
+				ermsg(_("0 < max-cmd-line-size <= %d: %s\n"),
+				    BUFSIZE, optarg);
 			}
 			break;
 
@@ -300,9 +312,7 @@
 			/*
 			 * bad argument. complain and get ready to die.
 			 */
-			ERR = TRUE;
 			usage();
-
 			exit(2);
 			break;
 		}
@@ -313,7 +323,6 @@
 	 * we exit early.
 	 */
 	if (OK == FALSE) {
-		ERR = TRUE;
 		usage();
 		exit(2);
 	}
@@ -346,9 +355,9 @@
 			 * work to do:
 			 */
 			if (INSERT && ! ERR) {
-				if (xindex(*mav, INSPAT) != -1) {
+				if (strstr(*mav, INSPAT) != NULL) {
 					if (++n_inserts > MAXINSERTS) {
-						ermsg(gettext("too many args "
+						ermsg(_("too many args "
 						    "with %s\n"), INSPAT);
 						ERR = TRUE;
 					}
@@ -362,39 +371,77 @@
 
 	/* pick up args from standard input */
 
-	initbuf = next;
 	initlist = ARGV;
 	initsize = linesize;
+	lastarg[0] = '\0';
 
-	while (OK && MORE) {
+	while (OK) {
 		N_args = 0;
 		N_lines = 0;
-		next = initbuf;
 		ARGV = initlist;
 		linesize = initsize;
-		if (*lastarg) {
-			*ARGV++ = addarg(lastarg);
-			lastarg = "";
-		}
+		next = argbuf;
+
+		while (MORE || (lastarg[0] != '\0')) {
+			int l;
+
+			if (*lastarg != '\0') {
+				arg = strcpy(next, lastarg);
+				*lastarg = '\0';
+			} else if ((arg = getarg(next)) == NULL) {
+				break;
+			}
+
+			l = strlen(arg) + 1;
+			linesize += l;
+			next += l;
 
-		while (((*ARGV++ = getarg()) != NULL) && OK) {
+			/* Inserts are handled specially later. */
+			if ((n_inserts == 0) && (linesize >= BUFLIM)) {
+				/*
+				 * Legal indicates hard fail if the list is
+				 * truncated due to size.  So fail, or if we
+				 * cannot create any list because it would be
+				 * too big.
+				 */
+				if (LEGAL || N_args == 0) {
+					EMSG(LIST2LONG);
+					exit(2);
+					/* NOTREACHED */
+				}
+
+				/*
+				 * Otherwise just save argument for later.
+				 */
+				(void) strcpy(lastarg, arg);
+				break;
+			}
+
+			*ARGV++ = arg;
+
+			N_args++;
+
+			if ((PER_LINE && N_lines >= PER_LINE) ||
+			    (N_ARGS && (N_args) >= N_ARGS)) {
+				break;
+			}
+
+
 			if ((ARGV - arglist) == MAXARGS) {
-				save_index = ARGV - arglist;
 				break;
 			}
 		}
-		if ((save_index == MAXARGS) && !MORE && (N_args == 0)) {
-			/* there were no more args after filling arglist */
+
+		*ARGV = NULL;
+		if (N_args == 0) {
+			/* Reached the end with no more work. */
 			exit(exitstat);
 		}
 
 		/* insert arg if requested */
 
 		if (!ERR && INSERT) {
-			if ((!MORE) && (N_lines == 0)) {
-				exit(exitstat);
-			}
-					/* no more input lines */
+
 			p_ibuf = ins_buf;
 			ARGV--;
 			j = ibufsize = 0;
@@ -404,31 +451,22 @@
 					break;
 			}
 		}
-		*ARGV = 0;
+		*ARGV = NULL;
 
 		if (n_inserts > 0) {
-			int t_ninserts;
-
 			/*
 			 * if we've done any insertions, re-calculate the
 			 * linesize. bomb out if we've exceeded our length.
 			 */
-			t_ninserts = n_inserts;
-			n_inserts = 0;	/* inserts have been done 	*/
-			linesize = 0;	/* recalculate this		*/
-
-			/* for each current argument in the list:	*/
+			linesize = 0;
 			for (ARGV = arglist; *ARGV != NULL; ARGV++) {
-				/* recalculate everything.		*/
-				if (checklen(*ARGV) != 0) {
-					if (N_ARGS && (N_args >= N_ARGS)) {
-						N_lines = N_args = 0;
-						OK = FALSE;
-						ERR = TRUE;
-					}
-				}
+				linesize += strlen(*ARGV) + 1;
 			}
-			n_inserts = t_ninserts;
+			if (linesize >= BUFLIM) {
+				EMSG(LIST2LONG);
+				exit(2);
+				/* NOTREACHED */
+			}
 		}
 
 		/* exec command */
@@ -446,334 +484,161 @@
 				 * so if we have a non-zero status here,
 				 * quit immediately.
 				 */
-				if ((exitstat |= lcall(cmdname, arglist)) == 0)
-					continue;
+				exitstat |= lcall(cmdname, arglist);
 			}
 		}
 	}
 
-	(void) lseek(0, file_offset, SEEK_SET);
-	if (OK) {
+	if (OK)
 		return (exitstat);
-	} else {
-		/*
-		 * if exitstat was set, to match XCU4 complience,
-		 * return that value, otherwise, return 1.
-		 */
-		return (exitstat ? exitstat : 1);
-	}
-}
-
-static void
-queue(char *buffer, int len, int where)
-{
-	pio *new, *element;
-
-	if ((new = malloc(sizeof (pio))) == NULL) {
-		perror(gettext("xargs: Memory allocation failure"));
-		exit(1);
-	}
-	new->cur = new->start = buffer;
-	new->length = len;
 
-	if (where == TAIL) {
-		new->next = NULL;
-		if (queued_data == NULL) {
-			queued_data = new;
-		} else {
-			element = queued_data;
-			while (element->next != NULL) {
-				element = element->next;
-			}
-			element->next = new;
-		}
-	} else {
-		file_offset -= len;
-		new->next = queued_data;
-		queued_data = new;
-	}
-}
-
-static char *
-checklen(char *arg)
-{
-	int	oklen;
-
-	oklen = TRUE;
-	linesize += strlen(arg) + 1;
-	if (linesize >= BUFLIM) {
-		/*
-		 * we skip this if there're inserts. we'll handle the
-		 * argument counting after all the insertions have
-		 * been done.
-		 */
-		if (n_inserts == 0) {
-			lastarg = arg;
-			oklen = OK = FALSE;
-
-			if (LEGAL) {
-				ERR = TRUE;
-				ermsg(gettext("arg list too long\n"));
-			} else if (N_args > 1) {
-				N_args = 1;
-			} else {
-				ermsg(gettext("a single arg was greater than "
-				    "the max arglist size of %d characters\n"),
-				    BUFLIM);
-				ERR = TRUE;
-			}
-		}
-	}
-	return (oklen ? arg : 0);
+	/*
+	 * if exitstat was set, to match XCU4 complience,
+	 * return that value, otherwise, return 1.
+	 */
+	return (exitstat ? exitstat : 1);
 }
 
 static char *
 addarg(char *arg)
 {
-	if (checklen(arg) != 0) {
-		(void) strcpy(next, arg);
-		arg = next;
-		next += strlen(arg) + 1;
-		return (arg);
-	}
-	return ((char *)0);
+	linesize += (strlen(arg) + 1);
+	return (arg);
 }
 
-/*
- * store_wchr() : append a wchar_t to a char buffer, resize buffer if required.
- *
- *     Given a pointer to the beginning of a string buffer, the length of the
- *     buffer and an offset indicating the next place to write within that
- *     buffer, the passed wchar_t will be appended to the buffer if there is
- *     enough space. If there is not enough space, an attempt to reallocate the
- *     buffer will be made and if successful the passed pointer and size will be
- *     updated to describe the reallocated block. Returns the new value for
- *     'offset' (it will be incremented by the number of bytes written).
- */
-static size_t
-store_wchr(char **buffer, size_t *buflen, size_t offset, wchar_t c)
-{
-	int bytes;
 
-	/*
-	 * Make sure that there is enough room in the buffer to store the
-	 * maximum length of c.
-	 */
-	if ((offset + MB_CUR_MAX) > *buflen) {
-		/*
-		 * Not enough room so attempt to reallocate. Add 'MB_CUR_MAX' to
-		 * buffer length to ensure that there is always enough room to
-		 * store 'c' if realloc succeeds, no matter what QBUF_INC is
-		 * defined as.
-		 */
-		*buflen += (QBUF_INC + MB_CUR_MAX);
-		if ((*buffer = realloc(*buffer, *buflen)) == NULL) {
-			perror(gettext("xargs: Memory allocation failure"));
-			exit(1);
-		}
-	}
-	/* store bytes from wchar into buffer */
-	bytes = wctomb(*buffer + offset, c);
-	if (bytes == -1) {
-		/* char was invalid */
-		bytes = 1;
-		*(*buffer + offset) = (char)c;
-	}
+static void
+store_str(char **buffer, char *str, size_t len)
+{
+	(void) memcpy(*buffer, str, len);
+	(*buffer)[len] = '\0';
+	*buffer += len;
+}
 
-	/* return new value for offset */
-	return (offset + bytes);
-}
 
 static char *
-getarg()
+getarg(char *arg)
 {
-	int	bytes;
+	char	*xarg = arg;
 	wchar_t	c;
-	char	*arg;
-	char	*retarg, *requeue_buf;
-	size_t  requeue_offset = 0, requeue_len;
 	char	mbc[MB_LEN_MAX];
-
-	while (iswspace(c = getwchr()) || c == '\n')
-		;
-
-	if (c == '\0') {
-		MORE = FALSE;
-		return (0);
-	}
-
-	/*
-	 * While we are reading in an argument, it is possible that we will
-	 * reach the maximum length of the overflow buffer and we'll have to
-	 * requeue what we have read so far. To handle this we allocate an
-	 * initial buffer here which will keep an unprocessed copy of the data
-	 * that we read in (this buffer will grow as required).
-	 */
-	requeue_len = (size_t)QBUF_STARTLEN;
-	if ((requeue_buf = (char *)malloc(requeue_len)) == NULL) {
-		perror(gettext("xargs: Memory allocation failure"));
-		exit(1);
-	}
-
-	for (arg = next; ; c = getwchr()) {
-		bytes = wctomb(mbc, c);
+	size_t	len;
+	int	escape = 0;
+	int	inquote = 0;
 
-		/*
-		 * Store the char that we have read before processing it in case
-		 * the current argument needs to be requeued.
-		 */
-		requeue_offset = store_wchr(&requeue_buf, &requeue_len,
-		    requeue_offset, c);
+	arg[0] = '\0';
+
+	while (MORE) {
 
-		/* Check for overflow the input buffer */
-		if ((next + ((bytes == -1) ? 1 : bytes)) >= &argbuf[BUFLIM]) {
-			/*
-			 * It's only an error if there are no Args in buffer
-			 * already.
-			 */
-			if ((N_ARGS || PER_LINE) && LEGAL) {
-				ERR = TRUE;
-				ermsg(gettext("Argument list too long\n"));
-				free(requeue_buf);
-				return (0);
-			} else if (N_args == 0) {
-				lastarg = "";
-				ERR = TRUE;
-				ermsg(gettext("A single arg was greater than "
-				    "the max arglist size of %d characters\n"),
-				    BUFSIZE);
-				free(requeue_buf);
-				return (0);
-			}
-			/*
-			 * Otherwise we put back the current argument
-			 * and use what we have collected so far...
-			 */
-			queue(requeue_buf, requeue_offset, HEAD);
-			/* reset inquote because we have requeued the quotes */
-			inquote = 0;
+		len = 0;
+		c = getwchr(mbc, &len);
+
+		if (((arg - xarg) + len) > BUFLIM) {
+			EMSG2(ARG2LONG, BUFLIM);
+			exit(2);
+			ERR = TRUE;
 			return (NULL);
 		}
 
-
-		if (iswctype(c, blank) && inquote == 0) {
-			if (INSERT) {
-				if (bytes == -1) {
-					*next++ = (char)c;
-				} else {
-					(void) wctomb(next, c);
-					next += bytes;
-				}
+		switch (c) {
+		case '\n':
+			if (ZERO) {
+				store_str(&arg, mbc, len);
 				continue;
 			}
+			/* FALLTHRU */
 
-			/* skip over trailing whitespace till next arg */
-			while (iswctype((c = getwchr()), blank) &&
-			    (c != '\n') && (c != '\0'))
-				;
+		case '\0':
+		case WEOF:	/* Note WEOF == EOF */
 
-			/*
-			 * if there was space till end of line then the last
-			 * character was really a newline...
-			 */
-			if (c == L'\n' || c == L'\0') {
-				ungetwchr(L'\n');
-			} else {
-				/* later code needs to know this was a space */
-				ungetwchr(c);
-				c = L' ';
+			if (escape) {
+				EMSG(BADESCAPE);
+				ERR = TRUE;
+				return (NULL);
 			}
-			goto end_arg;
-		}
-		switch (c) {
-		case L'\0':
-		case L'\n':
 			if (inquote) {
-				*next++ = '\0';
-				ermsg(gettext("Missing quote: %s\n"), arg);
+				EMSG(MISSQUOTE);
 				ERR = TRUE;
-				free(requeue_buf);
-				return (0);
+				return (NULL);
 			}
 
 			N_lines++;
-end_arg:		*next++ = '\0';
-			/* we finished without requeuing so free requeue_buf */
-			free(requeue_buf);
-			if ((strcmp(arg, LEOF) == 0 && *LEOF != '\0') ||
-			    (c == '\0' && strlen(arg) == 0)) {
-				MORE = FALSE;
-				/* absorb the rest of the line */
-				if ((c != '\n') && (c != '\0'))
-					while (c = getwchr())
-						if ((c == '\n') || (c == '\0'))
-							break;
-				if (strcmp(arg, LEOF) == 0 && *LEOF != '\0') {
-					/*
-					 * Encountered EOF string.
-					 * Don't read any more lines.
-					 */
-					N_lines = 0;
-				}
-				return (0);
-			} else {
-				++N_args;
-				if (retarg = checklen(arg)) {
-					if ((PER_LINE &&
-					    N_lines >= PER_LINE &&
-					    (c == '\0' || c == '\n')) ||
-					    (N_ARGS && N_args >= N_ARGS)) {
-						N_lines = N_args = 0;
-						lastarg = "";
-						OK = FALSE;
-					}
-				}
-				return (retarg);
-			}
+			break;
 
 		case '"':
-			if (inquote == 1)	/* in single quoted string */
-				goto is_default;
-			if (inquote == 2)	/* terminating double quote */
+			if (ZERO || escape || (inquote == 1)) {
+				/* treat it literally */
+				escape = 0;
+				store_str(&arg, mbc, len);
+
+			} else if (inquote == 2) {
+				/* terminating double quote */
 				inquote = 0;
-			else			/* starting quoted string */
+
+			} else {
+				/* starting quoted string */
 				inquote = 2;
-			break;
+			}
+			continue;
 
 		case '\'':
-			if (inquote == 2)	/* in double quoted string */
-				goto is_default;
-			if (inquote == 1)	/* terminating single quote */
+			if (ZERO || escape || (inquote == 2)) {
+				/* treat it literally */
+				escape = 0;
+				store_str(&arg, mbc, len);
+
+			} else if (inquote == 1) {
+				/* terminating single quote */
 				inquote = 0;
-			else			/* starting quoted string */
+
+			} else {
+				/* starting quoted string */
 				inquote = 1;
-			break;
+			}
+			continue;
 
-		case L'\\':
+		case '\\':
 			/*
 			 * Any unquoted character can be escaped by
 			 * preceding it with a backslash.
 			 */
-			if (inquote == 0) {
-				c = getwchr();
-				/* store quoted char for potential requeueing */
-				requeue_offset = store_wchr(&requeue_buf,
-				    &requeue_len, requeue_offset, c);
+			if (ZERO || inquote || escape) {
+				escape = 0;
+				store_str(&arg, mbc, len);
+			} else {
+				escape = 1;
 			}
+			continue;
 
 		default:
-is_default:		if (bytes == -1) {
-				*next++ = (char)c;
-			} else {
-				(void) wctomb(next, c);
-				next += bytes;
+			/* most times we will just want to store it */
+			if (inquote || escape || ZERO || !iswctype(c, blank)) {
+				escape = 0;
+				store_str(&arg, mbc, len);
+				continue;
 			}
+			/* unquoted blank */
 			break;
 		}
+
+		/*
+		 * At this point we are processing a complete argument.
+		 */
+		if (strcmp(xarg, LEOF) == 0 && *LEOF != '\0') {
+			MORE = FALSE;
+			return (NULL);
+		}
+		if (c == WEOF) {
+			MORE = FALSE;
+		}
+		if (xarg[0] == '\0')
+			continue;
+		break;
 	}
+
+	return (xarg[0] == '\0' ? NULL : xarg);
 }
 
-
 /*
  * ermsg():	print out an error message, and indicate failure globally.
  *
@@ -825,15 +690,6 @@
 		return (TRUE);
 	}
 
-	/*
-	 * at this point, there may be unexpected input pending on stdin,
-	 * if one has used the -n flag. this presents a problem, because
-	 * if we simply do a read(), we'll get the extra input, instead
-	 * of our desired y/n input. so, we see if there's any extra
-	 * input, and if there is, then we will store it.
-	 */
-	saveinput();
-
 	(void) write(2, "?...", 4);	/* ask the user for input	*/
 
 	for (i = 0; i < LINE_MAX && read(PROMPT, &reply[i], 1) > 0; i++) {
@@ -873,7 +729,7 @@
 	bufend = &buffer[MAXSBUF];
 
 	while (*++pat) {
-		if (xindex(pat, INSPAT) == 0) {
+		if (strncmp(pat, INSPAT, ipatlen) == 0) {
 			if (pbuf + len >= bufend) {
 				break;
 			} else {
@@ -895,7 +751,7 @@
 		ermsg(gettext("Maximum argument size with insertion via %s's "
 		    "exceeded\n"), INSPAT);
 		ERR = TRUE;
-		return (0);
+		return (NULL);
 	}
 }
 
@@ -908,96 +764,62 @@
 
 	skel = p->p_skel;
 	sub = *ARGV;
-	linesize -= strlen(skel) + 1;
 	newarg = insert(skel, sub);
 	if (ERR)
 		return;
 
-	if (checklen(newarg)) {
-		if ((ibufsize += (l = strlen(newarg) + 1)) > MAXIBUF) {
-			ermsg(gettext("Insert buffer overflow\n"));
-			ERR = TRUE;
-		}
-		(void) strcpy(p_ibuf, newarg);
-		*(p->p_ARGV) = p_ibuf;
-		p_ibuf += l;
+	l = strlen(newarg) + 1;
+	if ((ibufsize += l) > MAXIBUF) {
+		EMSG(IBUFOVERFLOW);
+		ERR = TRUE;
 	}
+	(void) strcpy(p_ibuf, newarg);
+	*(p->p_ARGV) = p_ibuf;
+	p_ibuf += l;
 }
 
 
 /*
- * getchr():	get the next character.
+ * getwchr():	get the next wide character.
  * description:
- *	we get the next character from pio.structure, if there's a character
- *	to get. this may happen when we've had to flush stdin=/dev/tty,
- *	but still wanted to preserve the characters for later processing.
- *
- *	otherwise we just get the character from stdin.
+ *	we get the next character from stdin.  This returns WEOF if no
+ *	character is present.  If ZERO is set, it gets a single byte instead
+ *	a wide character.
  */
-static int
-getchr(void)
+static wint_t
+getwchr(char *mbc, size_t *sz)
 {
-	char	c;
+	size_t		i;
+	int		c;
+	wchar_t		wch;
 
-	do {
-		if (queued_data == NULL) {
-			char	*buffer;
-			int	len;
+	i = 0;
+	while (i < MB_CUR_MAX) {
 
-			if ((buffer = malloc(BUFSIZE)) == NULL) {
-				perror(gettext(
-				    "xargs: Memory allocation failure"));
-				exit(1);
-			}
+		if ((c = fgetc(stdin)) == EOF) {
 
-			if ((len = read(0, buffer, BUFSIZE)) == 0)
-				return (0);
-			if (len == -1) {
-				perror(gettext("xargs: Read failure"));
-				exit(1);
+			if (i == 0) {
+				/* TRUE EOF has been reached */
+				return (WEOF);
 			}
 
-			queue(buffer, len, TAIL);
-		}
-
-		file_offset++;
-		c = *queued_data->cur++;	 /* get the next character */
-		if (--queued_data->length == 0) { /* at the end of buffer? */
-			pio	*nxt = queued_data->next;
-
-			free(queued_data->start);
-			free(queued_data);
-			queued_data = nxt;
-		}
-	} while (c == '\0');
-	return (c);
-}
-
-
-static wchar_t
-getwchr(void)
-{
-	int		i;
-	wchar_t		wch;
-	unsigned char	buffer[MB_LEN_MAX + 1];
-
-	for (i = 0; i < (int)MB_CUR_MAX; ) {
-		if ((buffer[i++] = getchr()) == NULL) {
-			/* We have reached  EOF */
-			if (i == 1) {
-				/* TRUE EOF has been reached */
-				return (NULL);
-			}
 			/*
 			 * We have some characters in our buffer still so it
 			 * must be an invalid character right before EOF.
 			 */
 			break;
 		}
+		mbc[i++] = (char)c;
 
 		/* If this succeeds then we are done */
-		if (mbtowc(&wch, (char *)buffer, i) != -1)
-			return (wch);
+		if (ZERO) {
+			*sz = i;
+			return ((char)c);
+		}
+		if (mbtowc(&wch, mbc, i) != -1) {
+			*sz = i;
+			return ((wint_t)wch);
+		}
 	}
 
 	/*
@@ -1009,67 +831,50 @@
 	 * generated in another locale?
 	 */
 	errno = EILSEQ;
-	perror(gettext("xargs: Corrupt input file"));
+	PERR(CORRUPTFILE);
 	exit(1);
 	/* NOTREACHED */
 }
 
 
-static void
-ungetwchr(wchar_t wch)
-{
-	char	*buffer;
-	int	bytes;
-
-	if ((buffer = malloc(MB_LEN_MAX)) == NULL) {
-		perror(gettext("xargs: Memory allocation failure"));
-		exit(1);
-	}
-	bytes = wctomb(buffer, wch);
-	queue(buffer, bytes, HEAD);
-}
-
-
 static int
 lcall(char *sub, char **subargs)
 {
 	int retcode, retry = 0;
 	pid_t iwait, child;
 
-	for (; ; ) {
+	for (;;) {
 		switch (child = fork()) {
 		default:
 			while ((iwait = wait(&retcode)) != child &&
 			    iwait != (pid_t)-1)
 				;
 			if (iwait == (pid_t)-1) {
-				perror(gettext("xargs: Wait failure"));
+				PERR(WAITFAIL);
 				exit(122);
 				/* NOTREACHED */
 			}
 			if (WIFSIGNALED(retcode)) {
-				ermsg(gettext("Child killed with signal %d\n"),
-				    WTERMSIG(retcode));
+				EMSG2(CHILDSIG, WTERMSIG(retcode));
 				exit(125);
 				/* NOTREACHED */
 			}
 			if ((WEXITSTATUS(retcode) & 0377) == 0377) {
-				ermsg(gettext("Command could not continue "
-				    "processing data\n"));
+				EMSG(CHILDFAIL);
 				exit(124);
 				/* NOTREACHED */
 			}
 			return (WEXITSTATUS(retcode));
 		case 0:
 			(void) execvp(sub, subargs);
-			perror(gettext("xargs: Could not exec command"));
+			PERR(EXECFAIL);
 			if (errno == EACCES)
 				exit(126);
 			exit(127);
 			/* NOTREACHED */
 		case -1:
 			if (errno != EAGAIN && retry++ < FORK_RETRY) {
-				perror(gettext("xargs: Could not fork child"));
+				PERR(FORKFAIL);
 				exit(123);
 			}
 			(void) sleep(1);
@@ -1078,41 +883,10 @@
 }
 
 
-/*
- * If `s2' is a substring of `s1' return the offset of the first
- * occurrence of `s2' in `s1', else return -1.
- */
-static int
-xindex(char *as1, char *as2)
-{
-	char	*s1, *s2, c;
-	int		offset;
-
-	s1 = as1;
-	s2 = as2;
-	c = *s2;
-
-	while (*s1) {
-		if (*s1++ == c) {
-			offset = s1 - as1 - 1;
-			s2++;
-			while ((c = *s2++) == *s1++ && c)
-				;
-			if (c == 0)
-				return (offset);
-			s1 = offset + as1 + 1;
-			s2 = as2;
-			c = *s2;
-		}
-	}
-	return (-1);
-}
-
-
 static void
 usage()
 {
-	ermsg(gettext(USAGEMSG));
+	ermsg(_(USAGEMSG));
 	OK = FALSE;
 }
 
@@ -1142,14 +916,14 @@
 	int cflag;		/* 0 = not processing cmd arg		*/
 
 	if ((mav = malloc((ac * 2 + 1) * sizeof (char *))) == NULL) {
-		perror(gettext("xargs: Memory allocation failure"));
+		PERR(MALLOCFAIL);
 		exit(1);
 	}
 
 	/* for each argument, see if we need to change things:		*/
 	for (i = mac = cflag = 0; (av[i] != NULL) && i < ac; i++, mac++) {
 		if ((mav[mac] = strdup(av[i])) == NULL) {
-			perror(gettext("xargs: Memory allocation failure"));
+			PERR(MALLOCFAIL);
 			exit(1);
 		}
 
@@ -1187,8 +961,7 @@
 					mav[++mac] = strdup(&av[i][2]);
 				}
 				if (mav[mac] == NULL) {
-					perror(gettext("xargs: Memory"
-					    " allocation failure"));
+					PERR(MALLOCFAIL);
 					exit(1);
 				}
 				break;
@@ -1225,8 +998,7 @@
 					++mac;	/* inc to next mod'd arg */
 					if ((mav[mac] = strdup(&av[i][2])) ==
 					    NULL) {
-						perror(gettext("xargs: Memory"
-						    " allocation failure"));
+						PERR(MALLOCFAIL);
 						exit(1);
 					}
 					break;
@@ -1239,8 +1011,7 @@
 					return;
 				}
 				if ((mav[mac] = strdup(av[i])) == NULL) {
-					perror(gettext("xargs: Memory"
-					    " allocation failure"));
+					PERR(MALLOCFAIL);
 					exit(1);
 				}
 				break;
@@ -1273,63 +1044,3 @@
 
 	mav[mac] = NULL;
 }
-
-
-/*
- * saveinput(): pick up any pending input, so it can be processed later.
- *
- * description:
- *	the purpose of this routine is to allow us to handle the user
- *	typing in a 'y' or 'n', when there's existing characters already
- *	in stdin. this happens when one gives the "-n" option along with
- *	"-p". the problem occurs when the user first types in more arguments
- *	than specified by the -n number. echoargs() wants to read stdin
- *	in order to get the user's response, but if there's already stuff
- *	there, echoargs() won't read the proper character.
- *
- *	the solution provided by this routine is to pick up all characters
- *	(if any), and store them for later processing.
- */
-
-void
-saveinput()
-{
-	char *buffer;		/* ptr to the floating data buffer	*/
-	struct strpeek speek;	/* to see what's on the queue		*/
-	struct strpeek *ps;
-
-	/* if we're not in -p mode, skip				*/
-	if (PROMPT == -1) {
-		return;
-	}
-
-
-	/* now see if there's any activity pending:			*/
-	ps = &speek;
-	ps->ctlbuf.maxlen = 0;
-	ps->ctlbuf.len = 0;
-	ps->ctlbuf.buf = NULL;
-	ps->flags = 0;
-	ps->databuf.maxlen = MAX_INPUT;
-	ps->databuf.len = 0;
-	if ((buffer = malloc((size_t)MAX_INPUT)) == NULL) {
-		perror(gettext("xargs: Memory allocation failure"));
-		exit(1);
-	}
-	ps->databuf.buf = (char *)buffer;
-
-	if (ioctl(PROMPT, I_PEEK, ps) == -1) {
-		perror(gettext("xargs: I_PEEK failure"));
-		exit(1);
-	}
-
-	if (ps->databuf.len > 0) {
-		int	len;
-
-		if ((len = read(PROMPT, buffer, ps->databuf.len)) == -1) {
-			perror(gettext("xargs: read failure"));
-			exit(1);
-		}
-		queue(buffer, len, TAIL);
-	}
-}
--- a/usr/src/man/man1/xargs.1	Fri Aug 23 15:33:55 2013 -0400
+++ b/usr/src/man/man1/xargs.1	Tue Aug 27 13:00:09 2013 -0700
@@ -7,13 +7,13 @@
 .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License").  You may not use this file except in compliance with the License.
 .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.  See the License for the specific language governing permissions and limitations under the License.
 .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH XARGS 1 "Jul 17, 2007"
+.TH XARGS 1 "November 24, 2012"
 .SH NAME
 xargs \- construct argument lists and invoke utility
 .SH SYNOPSIS
 .LP
 .nf
-\fBxargs\fR [\fB-t\fR] [\fB-p\fR] [\fB-e\fR[\fIeofstr\fR]] [\fB-E\fR \fIeofstr\fR]
+\fBxargs\fR [\fB-t\fR] [\fB-0\fR] [\fB-p\fR] [\fB-e\fR[\fIeofstr\fR]] [\fB-E\fR \fIeofstr\fR]
      [\fB-I\fR \fIreplstr\fR] [\fB-i\fR[\fIreplstr\fR]] [\fB-L\fR \fInumber\fR] [\fB-l\fR[\fInumber\fR]]
      [\fB-n\fR \fInumber\fR [\fB-x\fR]] [\fB-s\fR \fIsize\fR] [\fIutility\fR [\fIargument\fR...]]
 .fi
@@ -158,7 +158,7 @@
 .sp
 .ne 2
 .na
-\fB\fB-p\fR\fR
+\fB-p\fR
 .ad
 .RS 15n
 Prompt mode. The user is asked whether to execute \fIutility\fR at each
@@ -224,6 +224,17 @@
 does not fit in the implied or specified size (see the \fB-s\fR option above).
 .RE
 
+.sp
+.ne 2
+.na
+\fB-0\fR
+.ad
+.RS 6n
+Null separator mode.  Instead of using white space or new lines to
+delimit arguments, zero bytes are used.  This is suitable for use with
+the -print0 argument to \fBfind\fR(1).
+.RE
+
 .SH OPERANDS
 .sp
 .LP