PSARC/2006/475 Clearview: IP Observability Devices
authorPhilip Kirk <Phil.Kirk@Sun.COM>
Thu, 06 Nov 2008 06:47:54 -0500
changeset 8023 faf256d5c16c
parent 8022 9d1341032585
child 8024 dcdf61401b98
PSARC/2006/475 Clearview: IP Observability Devices 4085089 add a feature to enable 'snooping' of the loopback traffic 6753688 ip netinfo has no need for separate create and dispatch functions 6755448 ifconfig wedged in SIOCLIFREMOVEIF 6756483 incorrect ASSERT() in ip_delmulti[_v6]() 5092073 RFE: allow snoop to filter on zonename or zoneid 6606991 panic assertion failure !ill->ill_join_allmulti for multicast router 6760922 devname doesn't handle stale dev_t's in sdev_node cache entries
usr/src/Targetdirs
usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c
usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h
usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c
usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c
usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ipaddr.c
usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c
usr/src/cmd/devfsadm/misc_link.c
usr/src/cmd/truss/codes.c
usr/src/lib/brand/native/zone/config.xml
usr/src/lib/brand/native/zone/platform.xml
usr/src/lib/brand/sn1/zone/platform.xml
usr/src/lib/libdladm/common/libdladm.c
usr/src/lib/libdlpi/common/libdlpi.c
usr/src/lib/libdlpi/common/libdlpi.h
usr/src/lib/libsecdb/exec_attr.txt
usr/src/lib/libsecdb/help/profiles/RtNetObservability.html
usr/src/lib/libsecdb/prof_attr.txt
usr/src/pkgdefs/SUNWckr/prototype_com
usr/src/pkgdefs/SUNWckr/prototype_i386
usr/src/pkgdefs/SUNWckr/prototype_sparc
usr/src/pkgdefs/SUNWcsd/prototype_com
usr/src/pkgdefs/SUNWhea/prototype_com
usr/src/pkgdefs/SUNWsvvs/prototype_com
usr/src/pkgdefs/SUNWsvvs/prototype_i386
usr/src/pkgdefs/SUNWsvvs/prototype_sparc
usr/src/pkgdefs/common_files/i.devpolicy
usr/src/pkgdefs/common_files/i.minorperm_i386
usr/src/pkgdefs/common_files/i.minorperm_sparc
usr/src/tools/scripts/bfu.sh
usr/src/uts/common/Makefile.files
usr/src/uts/common/Makefile.rules
usr/src/uts/common/fs/dev/sdev_ipnetops.c
usr/src/uts/common/fs/dev/sdev_netops.c
usr/src/uts/common/fs/dev/sdev_ptsops.c
usr/src/uts/common/fs/dev/sdev_subr.c
usr/src/uts/common/fs/dev/sdev_vtops.c
usr/src/uts/common/inet/Makefile
usr/src/uts/common/inet/arp/arp_netinfo.c
usr/src/uts/common/inet/ip.h
usr/src/uts/common/inet/ip/ip.c
usr/src/uts/common/inet/ip/ip6.c
usr/src/uts/common/inet/ip/ip6_if.c
usr/src/uts/common/inet/ip/ip_if.c
usr/src/uts/common/inet/ip/ip_multi.c
usr/src/uts/common/inet/ip/ip_ndp.c
usr/src/uts/common/inet/ip/ip_netinfo.c
usr/src/uts/common/inet/ip6.h
usr/src/uts/common/inet/ip_if.h
usr/src/uts/common/inet/ip_impl.h
usr/src/uts/common/inet/ip_multi.h
usr/src/uts/common/inet/ip_stack.h
usr/src/uts/common/inet/ipnet.h
usr/src/uts/common/inet/ipnet/ipnet.c
usr/src/uts/common/inet/ipnet/ipnet.conf
usr/src/uts/common/inet/tcp.h
usr/src/uts/common/inet/tcp/tcp.c
usr/src/uts/common/inet/tcp/tcp_fusion.c
usr/src/uts/common/inet/udp/udp.c
usr/src/uts/common/io/neti_impl.c
usr/src/uts/common/io/sundlpi.c
usr/src/uts/common/os/priv_defs
usr/src/uts/common/sys/dlpi.h
usr/src/uts/common/sys/fs/sdev_impl.h
usr/src/uts/common/sys/hook_event.h
usr/src/uts/common/sys/neti.h
usr/src/uts/common/sys/netstack.h
usr/src/uts/common/sys/sysmacros.h
usr/src/uts/intel/Makefile.intel.shared
usr/src/uts/intel/dev/Makefile
usr/src/uts/intel/ia32/ml/modstubs.s
usr/src/uts/intel/ip/ip.global-objs.debug64
usr/src/uts/intel/ip/ip.global-objs.obj64
usr/src/uts/intel/ipnet/Makefile
usr/src/uts/intel/os/device_policy
usr/src/uts/intel/os/minor_perm
usr/src/uts/intel/os/name_to_major
usr/src/uts/sparc/Makefile.sparc.shared
usr/src/uts/sparc/dev/Makefile
usr/src/uts/sparc/ip/ip.global-objs.debug64
usr/src/uts/sparc/ip/ip.global-objs.obj64
usr/src/uts/sparc/ipnet/Makefile
usr/src/uts/sparc/ml/modstubs.s
usr/src/uts/sparc/os/device_policy
usr/src/uts/sparc/os/minor_perm
usr/src/uts/sparc/os/name_to_major
usr/src/xmod/xmod_files
--- a/usr/src/Targetdirs	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/Targetdirs	Thu Nov 06 06:47:54 2008 -0500
@@ -65,6 +65,7 @@
 	/dev \
 	/dev/dsk \
 	/dev/fd \
+	/dev/ipnet \
 	/dev/net \
 	/dev/rdsk \
 	/dev/rmt \
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c	Thu Nov 06 06:47:54 2008 -0500
@@ -20,12 +20,10 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SunOS	*/
-
 #include <stdio.h>
 #include <unistd.h>
 #include <stropts.h>
@@ -80,11 +78,12 @@
 int x_offset = -1;
 int x_length = 0x7fffffff;
 FILE *namefile;
-int Pflg;
-boolean_t qflg = B_FALSE;
-boolean_t rflg = B_FALSE;
+boolean_t Pflg;
+boolean_t Iflg;
+boolean_t qflg;
+boolean_t rflg;
 #ifdef	DEBUG
-boolean_t zflg = B_FALSE;		/* debugging packet corrupt flag */
+boolean_t zflg;
 #endif
 struct Pf_ext_packetfilt pf;
 
@@ -105,12 +104,13 @@
 	struct Pf_ext_packetfilt *fp = NULL;
 	char *icapfile = NULL;
 	char *ocapfile = NULL;
-	int nflg = 0;
-	int Nflg = 0;
+	boolean_t nflg = B_FALSE;
+	boolean_t Nflg = B_FALSE;
 	int Cflg = 0;
+	boolean_t Uflg = B_FALSE;
 	int first = 1;
 	int last  = 0x7fffffff;
-	int use_kern_pf;
+	boolean_t use_kern_pf;
 	char *p, *p2;
 	char names[MAXPATHLEN + 1];
 	char self[MAXHOSTNAMELEN + 1];
@@ -228,8 +228,8 @@
 	}
 	(void) setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
 
-	while ((c = getopt(argc, argv, "at:CPDSi:o:Nn:s:d:vVp:f:c:x:?rqz"))
-				!= EOF) {
+	while ((c = getopt(argc, argv, "at:CPDSi:o:Nn:s:d:I:vVp:f:c:x:U?rqz"))
+	    != EOF) {
 		switch (c) {
 		case 'a':
 			audiodev = getenv("AUDIODEV");
@@ -238,7 +238,7 @@
 			audio = open(audiodev, O_WRONLY);
 			if (audio < 0) {
 				pr_err("Audio device %s: %m",
-					audiodev);
+				    audiodev);
 				exit(1);
 			}
 			break;
@@ -251,8 +251,14 @@
 			default:	usage();
 			}
 			break;
+		case 'I':
+			if (device != NULL)
+				usage();
+			Iflg = B_TRUE;
+			device = optarg;
+			break;
 		case 'P':
-			Pflg++;
+			Pflg = B_TRUE;
 			break;
 		case 'D':
 			flags |= F_DROPS;
@@ -267,16 +273,18 @@
 			ocapfile = optarg;
 			break;
 		case 'N':
-			Nflg++;
+			Nflg = B_TRUE;
 			break;
 		case 'n':
-			nflg++;
+			nflg = B_TRUE;
 			(void) strlcpy(names, optarg, MAXPATHLEN);
 			break;
 		case 's':
 			snaplen = atoi(optarg);
 			break;
 		case 'd':
+			if (Iflg)
+				usage();
 			device = optarg;
 			break;
 		case 'v':
@@ -306,12 +314,12 @@
 				    strcmp(p+1, self) == 0)
 				(void) fprintf(stderr,
 				"Warning: cannot capture packets from %s\n",
-					self);
+				    self);
 				*p = ' ';
 			} else if (strcmp(optarg, self) == 0)
 				(void) fprintf(stderr,
 				"Warning: cannot capture packets from %s\n",
-					self);
+				    self);
 			argstr = optarg;
 			break;
 		case 'x':
@@ -330,7 +338,7 @@
 			maxcount = atoi(optarg);
 			break;
 		case 'C':
-			Cflg++;
+			Cflg = B_TRUE;
 			break;
 		case 'q':
 			qflg = B_TRUE;
@@ -338,6 +346,9 @@
 		case 'r':
 			rflg = B_TRUE;
 			break;
+		case 'U':
+			Uflg = B_TRUE;
+			break;
 #ifdef	DEBUG
 		case 'z':
 			zflg = B_TRUE;
@@ -363,6 +374,7 @@
 	if (!icapfile) {
 		use_kern_pf = check_device(&dh, &device);
 	} else {
+		use_kern_pf = B_FALSE;
 		cap_open_read(icapfile);
 
 		if (!nflg) {
@@ -372,6 +384,9 @@
 		}
 	}
 
+	if (Uflg)
+		use_kern_pf = B_FALSE;
+
 	/* attempt to read .names file if it exists before filtering */
 	if ((!Nflg) && names[0] != '\0') {
 		if (access(names, F_OK) == 0) {
@@ -383,7 +398,7 @@
 	}
 
 	if (argstr) {
-		if (!icapfile && use_kern_pf) {
+		if (use_kern_pf) {
 			ret = pf_compile(argstr, Cflg);
 			switch (ret) {
 			case 0:
@@ -447,7 +462,7 @@
 			}
 			flags = 0;
 			(void) fprintf(stderr,
-				"Creating name file %s\n", names);
+			    "Creating name file %s\n", names);
 		}
 
 		if (flags & F_DTAIL)
@@ -646,7 +661,7 @@
 		sumcount = 0;			/* error recovery */
 		pr_err(
 		    "get_sum_line: sumline overflow (sumcount=%d, MAXSUM=%d)\n",
-			tsumcount, MAXSUM);
+		    tsumcount, MAXSUM);
 	}
 
 	sumline[sumcount][0] = '\0';
@@ -764,10 +779,12 @@
 	(void) fprintf(stderr,
 	"\t[ -a ]			# Listen to packets on audio\n");
 	(void) fprintf(stderr,
-	"\t[ -d device ]		# Listen on interface named device\n");
+	"\t[ -d link ]		# Listen on named link\n");
 	(void) fprintf(stderr,
 	"\t[ -s snaplen ]		# Truncate packets\n");
 	(void) fprintf(stderr,
+	"\t[ -I IP interface ]		# Listen on named IP interface\n");
+	(void) fprintf(stderr,
 	"\t[ -c count ]		# Quit after count packets\n");
 	(void) fprintf(stderr,
 	"\t[ -P ]			# Turn OFF promiscuous mode\n");
@@ -892,7 +909,7 @@
 		} else {
 			if (nalarm == 0 || nalarm > hp->s_time)
 				nalarm = now < hp->s_time ? hp->s_time :
-					now + 1;
+				    now + 1;
 			tp = hp;
 		}
 	}
@@ -974,7 +991,7 @@
 				if ((hp->s_time - now) > 0) {
 					if (nalarm == 0 || nalarm > hp->s_time)
 						nalarm = now < hp->s_time ?
-							hp->s_time : now + 1;
+						    hp->s_time : now + 1;
 				}
 			}
 		}
@@ -1027,8 +1044,8 @@
 		}
 		if (snoop_nrecover >= SNOOP_MAXRECOVER) {
 			(void) fprintf(stderr,
-				"snoop: WARNING: skipping from packet %d\n",
-				count);
+			    "snoop: WARNING: skipping from packet %d\n",
+			    count);
 			snoop_nrecover = 0;
 		} else {
 			/* continue trying */
@@ -1036,7 +1053,7 @@
 		}
 	} else if (snoop_nrecover >= SNOOP_MAXRECOVER) {
 		(void) fprintf(stderr,
-			"snoop: ERROR: cannot recover from packet %d\n", count);
+		    "snoop: ERROR: cannot recover from packet %d\n", count);
 		exit(1);
 	}
 
@@ -1055,7 +1072,7 @@
 		/* Inform user that snoop has taken a fault */
 		(void) fprintf(stderr,
 		    "WARNING: received signal %d from packet %d\n",
-				sig, count);
+		    sig, count);
 	}
 
 	/* Reset interpreter variables */
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h	Thu Nov 06 06:47:54 2008 -0500
@@ -20,15 +20,13 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SNOOP_H
 #define	_SNOOP_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SunOS	*/
-
 #include <rpc/types.h>
 #include <sys/pfmod.h>
 #include <sys/time.h>
@@ -283,12 +281,15 @@
  * cannot be expressed in kernel space.
  */
 typedef struct interface {
-	uint_t	mac_type;
-	uint_t	mtu_size;
-	uint_t  network_type_offset;
-	uint_t	(*header_len)(char *);
-	uint_t 	(*interpreter)(int, char *, int, int);
-	uint_t	try_kernel_filter;
+	uint_t		mac_type;
+	uint_t		mtu_size;
+	uint_t  	network_type_offset;
+	size_t		network_type_len;
+	uint_t		network_type_ip;
+	uint_t		network_type_ipv6;
+	uint_t		(*header_len)(char *);
+	uint_t 		(*interpreter)(int, char *, int, int);
+	boolean_t	try_kernel_filter;
 } interface_t;
 
 extern interface_t INTERFACES[], *interface;
@@ -306,6 +307,7 @@
 extern unsigned int encap_levels, total_encap_levels;
 
 extern int quitting;
+extern boolean_t Iflg, Pflg, rflg;
 
 /*
  * Global error recovery routine: used to reset snoop variables after
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c	Thu Nov 06 06:47:54 2008 -0500
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SunOS */
-
 #include <stdio.h>
 #include <string.h>
 #include <errno.h>
@@ -52,6 +50,7 @@
 #include <ctype.h>
 #include <values.h>
 #include <libdlpi.h>
+#include <sys/dlpi.h>
 
 #include "snoop.h"
 
@@ -89,6 +88,7 @@
 check_device(dlpi_handle_t *dhp, char **devicep)
 {
 	int	retval;
+	int	flags = DLPI_PASSIVE | DLPI_RAW;
 
 	/*
 	 * Determine which network device
@@ -105,7 +105,7 @@
 		unsigned bufsize;
 
 		if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
-		    pr_err("socket");
+			pr_err("socket");
 
 		if (ioctl(s, SIOCGIFNUM, (char *)&numifs) < 0) {
 			pr_err("check_device: ioctl SIOCGIFNUM");
@@ -139,8 +139,8 @@
 			if (ioctl(s, SIOCGIFFLAGS, (char *)ifr) < 0)
 				pr_err("ioctl SIOCGIFFLAGS");
 			if ((ifr->ifr_flags &
-				(IFF_VIRTUAL|IFF_LOOPBACK|IFF_UP|
-				IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
+			    (IFF_VIRTUAL|IFF_LOOPBACK|IFF_UP|
+			    IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
 				break;
 		}
 
@@ -150,9 +150,11 @@
 		*devicep = ifr->ifr_name;
 		(void) close(s);
 	}
-
-	retval = dlpi_open(*devicep, dhp, DLPI_PASSIVE|DLPI_RAW);
-	if (retval != DLPI_SUCCESS) {
+	if (Iflg)
+		flags |= DLPI_DEVIPNET;
+	if (Iflg || strcmp(*devicep, "lo0") == 0)
+		flags |= DLPI_IPNETINFO;
+	if ((retval = dlpi_open(*devicep, dhp, flags)) != DLPI_SUCCESS) {
 		pr_err("cannot open \"%s\": %s", *devicep,
 		    dlpi_strerror(retval));
 	}
@@ -187,15 +189,20 @@
 initdevice(dlpi_handle_t dh, ulong_t snaplen, ulong_t chunksize,
     struct timeval *timeout, struct Pf_ext_packetfilt *fp)
 {
-	extern int Pflg;
 	int 	retv;
 	int 	netfd;
+	int	val = 1;
 
 	retv = dlpi_bind(dh, DLPI_ANY_SAP, NULL);
 	if (retv != DLPI_SUCCESS)
 		pr_errdlpi(dh, "cannot bind on", retv);
 
-	(void) fprintf(stderr, "Using device %s ", dlpi_linkname(dh));
+	if (Iflg) {
+		(void) fprintf(stderr, "Using device ipnet/%s ",
+		    dlpi_linkname(dh));
+	} else {
+		(void) fprintf(stderr, "Using device %s ", dlpi_linkname(dh));
+	}
 
 	/*
 	 * If Pflg not set - use physical level
@@ -394,9 +401,9 @@
 			nhdrp->sbh_totlen = ntohl(hdrp->sbh_totlen);
 			nhdrp->sbh_drops = ntohl(hdrp->sbh_drops);
 			nhdrp->sbh_timestamp.tv_sec =
-				ntohl(hdrp->sbh_timestamp.tv_sec);
+			    ntohl(hdrp->sbh_timestamp.tv_sec);
 			nhdrp->sbh_timestamp.tv_usec =
-				ntohl(hdrp->sbh_timestamp.tv_usec);
+			    ntohl(hdrp->sbh_timestamp.tv_usec);
 		}
 
 		/* Enhanced check for valid header */
@@ -412,14 +419,15 @@
 		    (nhdrp->sbh_msglen > nhdrp->sbh_origlen) ||
 		    (nhdrp->sbh_totlen < nhdrp->sbh_msglen) ||
 		    (nhdrp->sbh_timestamp.tv_sec == 0)) {
-			if (cap)
+			if (cap) {
 				(void) fprintf(stderr, "(warning) bad packet "
 				    "header in capture file");
-			else
+			} else {
 				(void) fprintf(stderr, "(warning) bad packet "
 				    "header in buffer");
+			}
 			(void) fprintf(stderr, " offset %d: length=%d\n",
-				bp - buf, nhdrp->sbh_totlen);
+			    bp - buf, nhdrp->sbh_totlen);
 			goto err;
 		}
 
@@ -433,7 +441,7 @@
 				    " greater than MTU in buffer");
 
 			(void) fprintf(stderr, " offset %d: length=%d\n",
-				bp - buf, nhdrp->sbh_totlen);
+			    bp - buf, nhdrp->sbh_totlen);
 		}
 
 		/*
@@ -454,16 +462,16 @@
 
 		header_okay = 1;
 		if (!filter ||
-			want_packet(pktp,
-				nhdrp->sbh_msglen,
-				nhdrp->sbh_origlen)) {
+		    want_packet(pktp,
+		    nhdrp->sbh_msglen,
+		    nhdrp->sbh_origlen)) {
 			count++;
 
 			/*
 			 * Start deadman timer for interpreter processing
 			 */
 			(void) snoop_alarm(SNOOP_ALARM_GRAN*SNOOP_MAXRECOVER,
-				NULL);
+			    NULL);
 
 			encap_levels = 0;
 			if (!cap || count >= first)
@@ -507,7 +515,7 @@
 			bp += sizeof (int);
 		} else {
 			for (bp += sizeof (int); bp <= bufstop;
-				bp += sizeof (int)) {
+			    bp += sizeof (int)) {
 				hdrp = (struct sb_hdr *)bp;
 				/* An approximate timestamp located */
 				if ((hdrp->sbh_timestamp.tv_sec >> 8) ==
@@ -528,8 +536,8 @@
 cap_write_error(const char *msgtype)
 {
 	(void) fprintf(stderr,
-		    "snoop: cannot write %s to capture file: %s\n",
-		    msgtype, strerror(errno));
+	    "snoop: cannot write %s to capture file: %s\n",
+	    msgtype, strerror(errno));
 	exit(1);
 }
 
@@ -668,17 +676,17 @@
 
 		default:
 			pr_err("capture file: %s: Version %d unrecognized\n",
-				name, cap_vers);
+			    name, cap_vers);
 		}
 
 		for (interface = &INTERFACES[0]; interface->mac_type != -1;
-				interface++)
+		    interface++)
 			if (interface->mac_type == device_mac_type)
 				break;
 
 		if (interface->mac_type == -1)
 			pr_err("Mac Type = %x is not supported\n",
-				device_mac_type);
+			    device_mac_type);
 	} else {
 		/* Use heuristic to check if it's an old-style file */
 
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c	Thu Nov 06 06:47:54 2008 -0500
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SunOS */
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -43,36 +41,50 @@
 #include <sys/ib/clients/ibd/ibd.h>
 #include <sys/ethernet.h>
 #include <sys/vlan.h>
+#include <sys/zone.h>
+#include <sys/byteorder.h>
+#include <limits.h>
+#include <inet/ip.h>
+#include <inet/ip6.h>
 
 #include "at.h"
 #include "snoop.h"
 
-static	uint_t ether_header_len(char *), fddi_header_len(char *),
-		tr_header_len(char *), ib_header_len(char *);
+static uint_t ether_header_len(char *), fddi_header_len(char *),
+	tr_header_len(char *), ib_header_len(char *), ipnet_header_len(char *);
 static uint_t interpret_ether(), interpret_fddi(), interpret_tr();
-static uint_t interpret_ib(int, char *, int, int);
+static uint_t interpret_ib(int, char *, int, int),
+	interpret_ipnet(int, char *, int, int);
 static void addr_copy_swap(struct ether_addr *, struct ether_addr *);
 
 interface_t *interface;
 interface_t INTERFACES[] = {
 
 	/* IEEE 802.3 CSMA/CD network */
-	{ DL_CSMACD, 1550, 12, ether_header_len, interpret_ether, B_TRUE },
+	{ DL_CSMACD, 1550, 12, 2, ETHERTYPE_IP, ETHERTYPE_IPV6,
+	    ether_header_len, interpret_ether, B_TRUE },
 
 	/* Ethernet Bus */
-	{ DL_ETHER, 1550, 12, ether_header_len, interpret_ether, B_TRUE },
+	{ DL_ETHER, 1550, 12, 2, ETHERTYPE_IP, ETHERTYPE_IPV6,
+	    ether_header_len, interpret_ether, B_TRUE },
 
 	/* Fiber Distributed data interface */
-	{ DL_FDDI, 4500, 19, fddi_header_len, interpret_fddi, B_FALSE },
+	{ DL_FDDI, 4500, 19, 2, ETHERTYPE_IP, ETHERTYPE_IPV6,
+	    fddi_header_len, interpret_fddi, B_FALSE },
 
 	/* Token Ring interface */
-	{ DL_TPR, 17800, 0, tr_header_len, interpret_tr, B_FALSE },
+	{ DL_TPR, 17800, 0, 2, ETHERTYPE_IP, ETHERTYPE_IPV6,
+	    tr_header_len, interpret_tr, B_FALSE },
 
 	/* Infiniband */
-	{ DL_IB, 4096, 0, ib_header_len, interpret_ib, B_TRUE },
+	{ DL_IB, 4096, 0, 2, ETHERTYPE_IP, ETHERTYPE_IPV6,
+	    ib_header_len, interpret_ib, B_TRUE },
 
-	{ (uint_t)-1, 0, 0, 0, 0, 0 }
+	/* ipnet */
+	{ DL_IPNET, INT_MAX, 0, 2, IPV4_VERSION, IPV6_VERSION,
+	    ipnet_header_len, interpret_ipnet, B_TRUE },
 
+	{ (uint_t)-1, 0, 0, 0, 0, NULL, NULL, B_FALSE }
 };
 
 /* externals */
@@ -698,7 +710,7 @@
 	static char line[512];
 
 	sprintf(line, "TR Source Route dir=%d, mtu=%d",
-			rh->dir, Mtutab[rh->mtu]);
+	    rh->dir, Mtutab[rh->mtu]);
 
 	hops = (int)(rh->len - 2) / (int)2;
 
@@ -1516,7 +1528,7 @@
 	if (origlen < IPOIB_HDRSIZE) {
 		if (flags & F_SUM)
 			(void) snprintf(get_sum_line(), MAXLINE,
-				"RUNT (short packet - %d bytes)", origlen);
+			    "RUNT (short packet - %d bytes)", origlen);
 		if (flags & F_DTAIL)
 			show_header("RUNT:  ", "Short packet", origlen);
 		return (elen);
@@ -1536,24 +1548,24 @@
 
 	if (flags & F_SUM) {
 		(void) snprintf(get_sum_line(), MAXLINE,
-			"IPIB Type=%04X (%s), size = %d bytes",
-			ethertype,
-			print_ethertype(ethertype),
-			origlen);
+		    "IPIB Type=%04X (%s), size = %d bytes",
+		    ethertype,
+		    print_ethertype(ethertype),
+		    origlen);
 	}
 
 	if (flags & F_DTAIL) {
 		show_header("IPIB:  ", "IPIB Header", elen);
 		show_space();
 		(void) snprintf(get_line(0, 0), get_line_remain(),
-			"Packet %d arrived at %d:%02d:%d.%02d",
-			pi_frame, pi_time_hour, pi_time_min,
-			pi_time_sec, pi_time_usec / 10000);
+		    "Packet %d arrived at %d:%02d:%d.%02d",
+		    pi_frame, pi_time_hour, pi_time_min,
+		    pi_time_sec, pi_time_usec / 10000);
 		(void) snprintf(get_line(0, 0), get_line_remain(),
-			"Packet size = %d bytes", elen, elen);
+		    "Packet size = %d bytes", elen, elen);
 		(void) snprintf(get_line(0, 2), get_line_remain(),
-			"Ethertype = %04X (%s)", ethertype,
-			print_ethertype(ethertype));
+		    "Ethertype = %04X (%s)", ethertype,
+		    print_ethertype(ethertype));
 		show_space();
 	}
 
@@ -1573,3 +1585,85 @@
 
 	return (elen);
 }
+
+uint_t
+ipnet_header_len(char *hdr)
+{
+	return (sizeof (dl_ipnetinfo_t));
+}
+
+#define	MAX_UINT64_STR	22
+static uint_t
+interpret_ipnet(int flags, char *header, int elen, int origlen)
+{
+	dl_ipnetinfo_t dl;
+	size_t len = elen - sizeof (dl_ipnetinfo_t);
+	char *off = (char *)header + sizeof (dl_ipnetinfo_t);
+	int blen = MAX(origlen, 8252);
+	char szone[MAX_UINT64_STR];
+	char dzone[MAX_UINT64_STR];
+
+	(void) memcpy(&dl, header, sizeof (dl));
+	if (data != NULL && datalen != 0 && datalen < blen) {
+		free(data);
+		data = NULL;
+		datalen = 0;
+	}
+	if (data == NULL) {
+		data = (char *)malloc(blen);
+		if (!data)
+			pr_err("Warning: malloc failure");
+		datalen = blen;
+	}
+
+	if (dl.dli_srczone == ALL_ZONES)
+		sprintf(szone, "Unknown");
+	else
+		sprintf(szone, "%llu", BE_64(dl.dli_srczone));
+
+	if (dl.dli_dstzone == ALL_ZONES)
+		sprintf(dzone, "Unknown");
+	else
+		sprintf(dzone, "%llu", BE_64(dl.dli_dstzone));
+
+	if (flags & F_SUM) {
+		(void) snprintf(get_sum_line(), MAXLINE,
+		    "IPNET src zone %s dst zone %s", szone, dzone);
+	}
+
+	if (flags & F_DTAIL) {
+		show_header("IPNET:  ", "IPNET Header", elen);
+		show_space();
+		(void) sprintf(get_line(0, 0),
+		    "Packet %d arrived at %d:%02d:%d.%05d",
+		    pi_frame,
+		    pi_time_hour, pi_time_min, pi_time_sec,
+		    pi_time_usec / 10);
+		(void) sprintf(get_line(0, 0),
+		    "Packet size = %d bytes",
+		    elen);
+		(void) snprintf(get_line(0, 0), get_line_remain(),
+		    "dli_version = %d", dl.dli_version);
+		(void) snprintf(get_line(0, 0), get_line_remain(),
+		    "dli_type = %d", dl.dli_ipver);
+		(void) snprintf(get_line(0, 2), get_line_remain(),
+		    "dli_srczone = %s", szone);
+		(void) snprintf(get_line(0, 2), get_line_remain(),
+		    "dli_dstzone = %s", dzone);
+		show_space();
+	}
+	memcpy(data, off, len);
+
+	switch (dl.dli_ipver) {
+	case IPV4_VERSION:
+		(void) interpret_ip(flags, (struct ip *)data, len);
+		break;
+	case IPV6_VERSION:
+		(void) interpret_ipv6(flags, (ip6_t *)data, len);
+		break;
+	default:
+		break;
+	}
+
+	return (0);
+}
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c	Thu Nov 06 06:47:54 2008 -0500
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SunOS */
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <ctype.h>
@@ -50,6 +48,7 @@
 #include <arpa/inet.h>
 #include <rpc/rpc.h>
 #include <rpc/rpcent.h>
+#include <sys/dlpi.h>
 
 #include <snoop.h>
 #include "snoop_vlan.h"
@@ -93,6 +92,12 @@
 #define	AT_DST_NODE_OFFSET	16
 #define	AT_SRC_NODE_OFFSET	17
 
+/*
+ * Offset for the source and destination zoneid in the ipnet header.
+ */
+#define	IPNET_SRCZONE_OFFSET 8
+#define	IPNET_DSTZONE_OFFSET 16
+
 int eaddr;	/* need ethernet addr */
 
 int opstack;	/* operand stack depth */
@@ -368,10 +373,10 @@
 			op++;
 			if ((int)*op < 0)
 				printf("\t%2d:   0x%08x (%d)\n",
-					op - oplist, *op, *op);
+				    op - oplist, *op, *op);
 			else
 				printf("\t%2d:   %d (0x%08x)\n",
-					op - oplist, *op, *op);
+				    op - oplist, *op, *op);
 		}
 	}
 	printf("\t%2d: STOP\n", op - oplist);
@@ -593,21 +598,21 @@
 
 			case 2:
 				*((ushort_t *)(sp)) =
-					*((ushort_t *)(base + off));
+				    *((ushort_t *)(base + off));
 				*(((ushort_t *)sp) + 1) =
-					*((ushort_t *)(base + off) + 1);
+				    *((ushort_t *)(base + off) + 1);
 				break;
 
 			case 1:
 			case 3:
 				*((uchar_t *)(sp)) =
-					*((uchar_t *)(base + off));
+				    *((uchar_t *)(base + off));
 				*(((uchar_t *)sp) + 1) =
-					*((uchar_t *)(base + off) + 1);
+				    *((uchar_t *)(base + off) + 1);
 				*(((uchar_t *)sp) + 2) =
-					*((uchar_t *)(base + off) + 2);
+				    *((uchar_t *)(base + off) + 2);
 				*(((uchar_t *)sp) + 3) =
-					*((uchar_t *)(base + off) + 3);
+				    *((uchar_t *)(base + off) + 3);
 				break;
 			}
 			*sp = ntohl(*sp);
@@ -917,42 +922,39 @@
 			break;
 		case OP_OFFSET_ETHERTYPE:
 			/*
-			 * Set base to the location of the ethertype.
-			 * If the packet is VLAN tagged, move base
-			 * to the ethertype field in the VLAN header.
-			 * Otherwise, set it to the appropriate field
-			 * for this link type.
+			 * Set base to the location of the ethertype as
+			 * appropriate for this link type.  Note that it's
+			 * not called "ethertype" for every link type, but
+			 * we need to call it something.
 			 */
 			if (offp >= &offstack[MAXSS])
 				return (0);
 			*++offp = base;
 			base = pkt + interface->network_type_offset;
+
+			/*
+			 * Below, we adjust the offset for unusual
+			 * link-layer headers that may have the protocol
+			 * type in a variable location beyond what was set
+			 * above.
+			 */
+			switch (interface->mac_type) {
+			case DL_ETHER:
+			case DL_CSMACD:
+				/*
+				 * If this is a VLAN-tagged packet, we need
+				 * to point to the ethertype field in the
+				 * VLAN header.  Move past the ethertype
+				 * field in the ethernet header.
+				 */
+				if (ntohs(get_u16(base)) == ETHERTYPE_VLAN)
+					base += (ENCAP_ETHERTYPE_OFF);
+				break;
+			}
 			if (base > pkt + len) {
 				/* Went too far, drop the packet */
 				return (0);
 			}
-
-			/*
-			 * VLAN links are only supported on Ethernet-like
-			 * links.
-			 */
-			if (interface->mac_type == DL_ETHER ||
-			    interface->mac_type == DL_CSMACD) {
-				if (ntohs(get_u16(base)) == ETHERTYPE_VLAN) {
-					/*
-					 * We need to point to the
-					 * ethertype field in the VLAN
-					 * tag, so also move past the
-					 * ethertype field in the
-					 * ethernet header.
-					 */
-					base += (ENCAP_ETHERTYPE_OFF);
-				}
-				if (base > pkt + len) {
-					/* Went too far, drop the packet */
-					return (0);
-				}
-			}
 			break;
 		}
 	}
@@ -1040,6 +1042,24 @@
 	emitop(OP_EQ);
 }
 
+/*
+ * Compare two zoneid's. The arg val passed in is stored in network
+ * byte order.
+ */
+static void
+compare_value_zone(uint_t offset, uint64_t val)
+{
+	int i;
+
+	for (i = 0; i < sizeof (uint64_t) / 4; i++) {
+		load_const(ntohl(((uint32_t *)&val)[i]));
+		load_value(offset + i * 4, 4);
+		emitop(OP_EQ);
+		if (i != 0)
+			emitop(OP_AND);
+	}
+}
+
 /* Emit an operator into the code array */
 static void
 emitop(enum optype opcode)
@@ -1253,7 +1273,7 @@
 				tkp = p;
 			} else if (base == 16) {
 				size = 2 + strspn(token+2,
-					"0123456789abcdefABCDEF");
+				    "0123456789abcdefABCDEF");
 				size1 = p - token;
 				if (size != size1) {
 					tokentype = ALPHA;
@@ -1294,14 +1314,16 @@
 	*tkp = '\0';
 }
 
-static struct match_type {
+typedef struct match_type {
 	char		*m_name;
 	int		m_offset;
 	int		m_size;
 	int		m_value;
 	int		m_depend;
 	enum optype	m_optype;
-} match_types[] = {
+} match_type_t;
+
+static match_type_t ether_match_types[] = {
 	/*
 	 * Table initialized assuming Ethernet data link headers.
 	 * m_offset is an offset beyond the offset op, which is why
@@ -1331,15 +1353,44 @@
 	0,		0,  0, 0,		 0,	0
 };
 
+static match_type_t ipnet_match_types[] = {
+	/*
+	 * Table initialized assuming Ethernet data link headers.
+	 * m_offset is an offset beyond the offset op, which is why
+	 * the offset is zero for when snoop needs to check an ethertype.
+	 */
+	"ip",		0,  2, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	-1,	OP_OFFSET_ETHERTYPE,
+	"ip6",		0,  2, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	-1,	OP_OFFSET_ETHERTYPE,
+	"tcp",		9,  1, IPPROTO_TCP,	 0,	OP_OFFSET_LINK,
+	"tcp",		6,  1, IPPROTO_TCP,	 1,	OP_OFFSET_LINK,
+	"udp",		9,  1, IPPROTO_UDP,	 0,	OP_OFFSET_LINK,
+	"udp",		6,  1, IPPROTO_UDP,	 1,	OP_OFFSET_LINK,
+	"icmp",		9,  1, IPPROTO_ICMP,	 0,	OP_OFFSET_LINK,
+	"icmp6",	6,  1, IPPROTO_ICMPV6,	 1,	OP_OFFSET_LINK,
+	"ospf",		9,  1, IPPROTO_OSPF,	 0,	OP_OFFSET_LINK,
+	"ospf",		6,  1, IPPROTO_OSPF,	 1,	OP_OFFSET_LINK,
+	"ip-in-ip",	9,  1, IPPROTO_ENCAP,	 0,	OP_OFFSET_LINK,
+	"esp",		9,  1, IPPROTO_ESP,	 0,	OP_OFFSET_LINK,
+	"esp",		6,  1, IPPROTO_ESP,	 1,	OP_OFFSET_LINK,
+	"ah",		9,  1, IPPROTO_AH,	 0,	OP_OFFSET_LINK,
+	"ah",		6,  1, IPPROTO_AH,	 1,	OP_OFFSET_LINK,
+	"sctp",		9,  1, IPPROTO_SCTP,	 0,	OP_OFFSET_LINK,
+	"sctp",		6,  1, IPPROTO_SCTP,	 1,	OP_OFFSET_LINK,
+	0,		0,  0, 0,		 0,	0
+};
+
 static void
-generate_check(struct match_type *mtp)
+generate_check(match_type_t match_types[], int index, int type)
 {
+	match_type_t *mtp = &match_types[index];
 	/*
 	 * Note: this code assumes the above dependencies are
 	 * not cyclic.  This *should* always be true.
 	 */
 	if (mtp->m_depend != -1)
-		generate_check(&match_types[mtp->m_depend]);
+		generate_check(match_types, mtp->m_depend, type);
 
 	emitop(mtp->m_optype);
 	load_value(mtp->m_offset, mtp->m_size);
@@ -1364,14 +1415,25 @@
 comparison(char *s)
 {
 	unsigned int	i, n_checks = 0;
+	match_type_t	*match_types;
+
+	switch (interface->mac_type) {
+	case DL_ETHER:
+		match_types = ether_match_types;
+		break;
+	case DL_IPNET:
+		match_types = ipnet_match_types;
+		break;
+	default:
+		return (0);
+	}
 
 	for (i = 0; match_types[i].m_name != NULL; i++) {
-
 		if (strcmp(s, match_types[i].m_name) != 0)
 			continue;
 
 		n_checks++;
-		generate_check(&match_types[i]);
+		generate_check(match_types, i, interface->mac_type);
 		if (n_checks > 1)
 			emitop(OP_OR);
 	}
@@ -1416,11 +1478,9 @@
 	found_host = 0;
 
 	if (tokentype == ADDR_IP) {
-		hp = lgetipnodebyname(hostname, AF_INET,
-					0, &error_num);
+		hp = lgetipnodebyname(hostname, AF_INET, 0, &error_num);
 		if (hp == NULL) {
-			hp = getipnodebyname(hostname, AF_INET,
-							0, &error_num);
+			hp = getipnodebyname(hostname, AF_INET, 0, &error_num);
 			freehp = 1;
 		}
 		if (hp == NULL) {
@@ -1433,11 +1493,9 @@
 		}
 		inet_type = IPV4_ONLY;
 	} else if (tokentype == ADDR_IP6) {
-		hp = lgetipnodebyname(hostname, AF_INET6,
-					0, &error_num);
+		hp = lgetipnodebyname(hostname, AF_INET6, 0, &error_num);
 		if (hp == NULL) {
-			hp = getipnodebyname(hostname, AF_INET6,
-							0, &error_num);
+			hp = getipnodebyname(hostname, AF_INET6, 0, &error_num);
 			freehp = 1;
 		}
 		if (hp == NULL) {
@@ -1454,11 +1512,10 @@
 		switch (inet_type) {
 		case IPV4_ONLY:
 			/* Only IPv4 address is needed */
-			hp = lgetipnodebyname(hostname, AF_INET,
-						0, &error_num);
+			hp = lgetipnodebyname(hostname, AF_INET, 0, &error_num);
 			if (hp == NULL) {
-				hp = getipnodebyname(hostname, AF_INET,
-								0, &error_num);
+				hp = getipnodebyname(hostname, AF_INET,	0,
+				    &error_num);
 				freehp = 1;
 			}
 			if (hp != NULL) {
@@ -1467,11 +1524,11 @@
 			break;
 		case IPV6_ONLY:
 			/* Only IPv6 address is needed */
-			hp = lgetipnodebyname(hostname, AF_INET6,
-						0, &error_num);
+			hp = lgetipnodebyname(hostname, AF_INET6, 0,
+			    &error_num);
 			if (hp == NULL) {
-				hp = getipnodebyname(hostname, AF_INET6,
-								0, &error_num);
+				hp = getipnodebyname(hostname, AF_INET6, 0,
+				    &error_num);
 				freehp = 1;
 			}
 			if (hp != NULL) {
@@ -1481,10 +1538,10 @@
 		case IPV4_AND_IPV6:
 			/* Both IPv4 and IPv6 are needed */
 			hp = lgetipnodebyname(hostname, AF_INET6,
-					AI_ALL | AI_V4MAPPED, &error_num);
+			    AI_ALL | AI_V4MAPPED, &error_num);
 			if (hp == NULL) {
 				hp = getipnodebyname(hostname, AF_INET6,
-					AI_ALL | AI_V4MAPPED, &error_num);
+				    AI_ALL | AI_V4MAPPED, &error_num);
 				freehp = 1;
 			}
 			if (hp != NULL) {
@@ -1524,7 +1581,7 @@
 	 * The code below generates the filter.
 	 */
 	if (hp != NULL && hp->h_addrtype == AF_INET) {
-		ethertype_match(ETHERTYPE_IP);
+		ethertype_match(interface->network_type_ip);
 		emitop(OP_BRFL);
 		n = chain(n);
 		emitop(OP_OFFSET_LINK);
@@ -1560,7 +1617,8 @@
 		while (addr6ptr != NULL) {
 			if (IN6_IS_ADDR_V4MAPPED(addr6ptr)) {
 				if (first) {
-					ethertype_match(ETHERTYPE_IP);
+					ethertype_match(
+					    interface->network_type_ip);
 					emitop(OP_BRFL);
 					n = chain(n);
 					emitop(OP_OFFSET_LINK);
@@ -1604,7 +1662,8 @@
 						resolve_chain(n);
 						n = 0;
 					}
-					ethertype_match(ETHERTYPE_IPV6);
+					ethertype_match(
+					    interface->network_type_ipv6);
 					emitop(OP_BRFL);
 					n = chain(n);
 					emitop(OP_OFFSET_LINK);
@@ -1642,6 +1701,27 @@
 }
 
 /*
+ * Match on zoneid. The arg zone passed in is in network byte order.
+ */
+static void
+zone_match(enum direction which, uint64_t zone)
+{
+
+	switch (which) {
+	case TO:
+		compare_value_zone(IPNET_DSTZONE_OFFSET, zone);
+		break;
+	case FROM:
+		compare_value_zone(IPNET_SRCZONE_OFFSET, zone);
+		break;
+	case ANY:
+		compare_value_zone(IPNET_SRCZONE_OFFSET, zone);
+		compare_value_zone(IPNET_DSTZONE_OFFSET, zone);
+		emitop(OP_OR);
+	}
+}
+
+/*
  * Generate code to match an AppleTalk address.  The address
  * must be given as two numbers with a dot between
  *
@@ -1745,7 +1825,7 @@
 		if (ether_hostton(hostname, &e))
 			if (!arp_for_ether(hostname, &e))
 				pr_err("cannot obtain ether addr for %s",
-					hostname);
+				    hostname);
 		ep = &e;
 	}
 	memcpy(&addr, (ushort_t *)ep, 4);
@@ -1809,13 +1889,22 @@
 			emitop(OP_OFFSET_ZERO);
 		}
 	}
-	compare_value(ether_offset, 2, val);
+	compare_value(ether_offset, interface->network_type_len, val);
 	if (interface->mac_type == DL_ETHER ||
 	    interface->mac_type == DL_CSMACD) {
 		emitop(OP_OFFSET_POP);
 	}
 }
 
+static void
+ipnettype_match(int val)
+{
+	int ipnet_offset = interface->network_type_offset;
+
+	emitop(OP_OFFSET_ETHERTYPE);
+	compare_value(ipnet_offset, 2, val);
+}
+
 /*
  * Match a network address.  The host part
  * is masked out.  The network address may
@@ -1890,8 +1979,7 @@
 	} else {
 		sp = getservbyname(portname, NULL);
 		if (sp == NULL)
-			pr_err("invalid port number or name: %s",
-				portname);
+			pr_err("invalid port number or name: %s", portname);
 		port = ntohs(sp->s_port);
 	}
 
@@ -2216,7 +2304,7 @@
 		}
 
 		if (EQ("bootp") || EQ("dhcp")) {
-			ethertype_match(ETHERTYPE_IP);
+			ethertype_match(interface->network_type_ip);
 			emitop(OP_BRFL);
 			m = chain(0);
 			emitop(OP_OFFSET_LINK);
@@ -2241,7 +2329,7 @@
 		}
 
 		if (EQ("dhcp6")) {
-			ethertype_match(ETHERTYPE_IPV6);
+			ethertype_match(interface->network_type_ipv6);
 			emitop(OP_BRFL);
 			m = chain(0);
 			emitop(OP_OFFSET_LINK);
@@ -2343,7 +2431,7 @@
 			emitop(OP_OFFSET_POP);
 			emitop(OP_BRFL);
 			m = chain(0);
-			ethertype_match(ETHERTYPE_IP);
+			ethertype_match(interface->network_type_ip);
 			resolve_chain(m);
 			opstack++;
 			next();
@@ -2405,26 +2493,26 @@
 		}
 
 		if (EQ("slp")) {
-		    /* filter out TCP handshakes */
-		    emitop(OP_OFFSET_LINK);
-		    compare_value(9, 1, IPPROTO_TCP);
-		    emitop(OP_LOAD_CONST);
-		    emitval(52);
-		    emitop(OP_LOAD_CONST);
-		    emitval(2);
-		    emitop(OP_LOAD_SHORT);
-		    emitop(OP_GE);
-		    emitop(OP_AND);	/* proto == TCP && len < 52 */
-		    emitop(OP_NOT);
-		    emitop(OP_BRFL);	/* pkt too short to be a SLP call */
-		    m = chain(0);
+			/* filter out TCP handshakes */
+			emitop(OP_OFFSET_LINK);
+			compare_value(9, 1, IPPROTO_TCP);
+			emitop(OP_LOAD_CONST);
+			emitval(52);
+			emitop(OP_LOAD_CONST);
+			emitval(2);
+			emitop(OP_LOAD_SHORT);
+			emitop(OP_GE);
+			emitop(OP_AND);	/* proto == TCP && len < 52 */
+			emitop(OP_NOT);
+			emitop(OP_BRFL); /* pkt too short to be a SLP call */
+			m = chain(0);
 
-		    emitop(OP_OFFSET_POP);
-		    emitop(OP_OFFSET_SLP);
-		    resolve_chain(m);
-		    opstack++;
-		    next();
-		    break;
+			emitop(OP_OFFSET_POP);
+			emitop(OP_OFFSET_SLP);
+			resolve_chain(m);
+			opstack++;
+			next();
+			break;
 		}
 
 		if (EQ("ldap")) {
@@ -2439,6 +2527,16 @@
 			break;
 		}
 
+		if (EQ("zone")) {
+			next();
+			if (tokentype != NUMBER)
+				pr_err("zoneid expected");
+			zone_match(dir, BE_64((uint64_t)(tokenval)));
+			opstack++;
+			next();
+			break;
+		}
+
 		if (EQ("gateway")) {
 			next();
 			if (eaddr || tokentype != ALPHA)
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ipaddr.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ipaddr.c	Thu Nov 06 06:47:54 2008 -0500
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SunOS */
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <ctype.h>
@@ -87,7 +85,6 @@
 }
 
 extern char *inet_ntoa();
-extern boolean_t rflg;
 
 static struct hostdata *
 iplookup(struct in_addr ipaddr)
@@ -118,12 +115,12 @@
 	if (! rflg && sigsetjmp(nisjmp, 1) == 0) {
 		(void) snoop_alarm(3, wakeup);
 		hp = getipnodebyaddr((char *)&ipaddr, sizeof (int),
-			AF_INET, &error_num);
+		    AF_INET, &error_num);
 		if (hp == NULL && inet_lnaof(ipaddr) == 0) {
 			np = getnetbyaddr(inet_netof(ipaddr), AF_INET);
 			if (np)
 				return (addhost(AF_INET, &ipaddr, np->n_name,
-					np->n_aliases));
+				    np->n_aliases));
 		}
 		(void) snoop_alarm(0, wakeup);
 	}
@@ -244,14 +241,14 @@
 					    aliases[ind] != NULL;
 					    ind++) {
 						(void) fprintf(namefile, " %s",
-								aliases[ind]);
+						    aliases[ind]);
 					}
 				}
 				(void) fprintf(namefile, "\n");
 			}
 		} else if (family == AF_INET6) {
 			np = (char *)inet_ntop(AF_INET6, (void *)ipaddr, aname,
-					sizeof (aname));
+			    sizeof (aname));
 			if (np) {
 				(void) fprintf(namefile, "%s\t%s", np, name);
 				if (aliases) {
@@ -259,14 +256,14 @@
 					    aliases[ind] != NULL;
 					    ind++) {
 						(void) fprintf(namefile, " %s",
-								aliases[ind]);
+						    aliases[ind]);
 					}
 				}
 				(void) fprintf(namefile, "\n");
 			}
 		} else {
 			(void) fprintf(stderr, "addhost: unknown family %d\n",
-				family);
+			    family);
 		}
 	}
 	return (n);
@@ -386,7 +383,7 @@
 					}
 					/* found ipv6 addr */
 					hp->h_addr_list[ind] =
-						(char *)&h6->h6_addr;
+					    (char *)&h6->h6_addr;
 					ind++;
 				}
 			}
@@ -416,8 +413,8 @@
 						hp->h_addr_list[ind] =
 						    (char *)&h46_addr[ind];
 						IN6_INADDR_TO_V4MAPPED(
-							&h->h4_addr,
-							&h46_addr[ind]);
+						    &h->h4_addr,
+						    &h46_addr[ind]);
 						ind++;
 					}
 				}
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c	Thu Nov 06 06:47:54 2008 -0500
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SunOS	*/
-
 #include <stdio.h>
 #include <stddef.h>
 #include <ctype.h>
@@ -44,6 +42,8 @@
 #include <netinet/if_ether.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
+#include <inet/ip.h>
+#include <inet/ip6.h>
 #include <netdb.h>
 #include <rpc/rpc.h>
 #include <setjmp.h>
@@ -82,21 +82,154 @@
 #define	IPV6_ONLY	1
 #define	IPV4_AND_IPV6	2
 
-/*
- * The following constants represent the offsets in bytes from the beginning
- * of the packet of the link and IP(v6) layer source/destination/type fields,
- * initialized for Ethernet. Media specific code can set any unavailable
- * link layer property's offset to -1 to indicate that the property's value
- * is not available from the frame.
- */
-static int link_header_len = 14, link_type_offset = 12;
-static int link_dest_offset = 0, link_src_offset = 6;
-static int link_addr_len = 6;
+typedef struct {
+	int	transport_protocol;
+	int	network_protocol;
+	/*
+	 * offset is the offset in bytes from the beginning
+	 * of the network protocol header to where the transport
+	 * protocol type is.
+	 */
+	int	offset;
+} transport_protocol_table_t;
+
+typedef struct network_table {
+	char *nmt_name;
+	int nmt_val;
+} network_table_t;
+
+static network_table_t ether_network_mapping_table[] = {
+	{ "pup", ETHERTYPE_PUP },
+	{ "ip", ETHERTYPE_IP },
+	{ "arp", ETHERTYPE_ARP },
+	{ "revarp", ETHERTYPE_REVARP },
+	{ "at", ETHERTYPE_AT },
+	{ "aarp", ETHERTYPE_AARP },
+	{ "vlan", ETHERTYPE_VLAN },
+	{ "ip6", ETHERTYPE_IPV6 },
+	{ "slow", ETHERTYPE_SLOW },
+	{ "ppoed", ETHERTYPE_PPPOED },
+	{ "ppoes", ETHERTYPE_PPPOES },
+	{ "NULL", -1 }
+
+};
+
+static network_table_t ib_network_mapping_table[] = {
+	{ "pup", ETHERTYPE_PUP },
+	{ "ip", ETHERTYPE_IP },
+	{ "arp", ETHERTYPE_ARP },
+	{ "revarp", ETHERTYPE_REVARP },
+	{ "at", ETHERTYPE_AT },
+	{ "aarp", ETHERTYPE_AARP },
+	{ "vlan", ETHERTYPE_VLAN },
+	{ "ip6", ETHERTYPE_IPV6 },
+	{ "slow", ETHERTYPE_SLOW },
+	{ "ppoed", ETHERTYPE_PPPOED },
+	{ "ppoes", ETHERTYPE_PPPOES },
+	{ "NULL", -1 }
+
+};
+
+static network_table_t ipnet_network_mapping_table[] = {
+	{ "ip", (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION) },
+	{ "ip6", (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION) },
+	{ "NULL", -1 }
+
+};
+
+static transport_protocol_table_t ether_transport_mapping_table[] = {
+	{IPPROTO_TCP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_TCP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_UDP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_UDP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_OSPF, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_OSPF, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_SCTP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_SCTP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_ICMP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_ICMPV6, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_ENCAP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_ESP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_ESP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_AH, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_AH, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{-1, 0, 0}	/* must be the final entry */
+};
 
-#define	IPV4_SRCADDR_OFFSET	(link_header_len + 12)
-#define	IPV4_DSTADDR_OFFSET	(link_header_len + 16)
-#define	IPV6_SRCADDR_OFFSET	(link_header_len + 8)
-#define	IPV6_DSTADDR_OFFSET	(link_header_len + 24)
+static transport_protocol_table_t ipnet_transport_mapping_table[] = {
+	{IPPROTO_TCP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	    IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_TCP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	    IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_UDP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	    IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_UDP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	    IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_OSPF, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	    IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_OSPF, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	    IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_SCTP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	    IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_SCTP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	    IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_ICMP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	    IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_ICMPV6, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	    IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_ENCAP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	    IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_ESP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	    IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_ESP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	    IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_AH, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	    IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_AH, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	    IPV6_TYPE_HEADER_OFFSET},
+	{-1, 0, 0}	/* must be the final entry */
+};
+
+static transport_protocol_table_t ib_transport_mapping_table[] = {
+	{IPPROTO_TCP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_TCP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_UDP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_UDP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_OSPF, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_OSPF, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_SCTP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_SCTP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_ICMP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_ICMPV6, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_ENCAP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_ESP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_ESP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{IPPROTO_AH, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
+	{IPPROTO_AH, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
+	{-1, 0, 0}	/* must be the final entry */
+};
+
+typedef struct datalink {
+	uint_t	dl_type;
+	void	(*dl_match_fn)(uint_t datatype);
+	transport_protocol_table_t *dl_transport_mapping_table;
+	network_table_t *dl_net_map_tbl;
+	int dl_link_header_len;
+	int dl_link_type_offset;
+	int dl_link_dest_offset;
+	int dl_link_src_offset;
+	int dl_link_addr_len;
+} datalink_t;
+
+datalink_t	dl;
+
+#define	IPV4_SRCADDR_OFFSET	(dl.dl_link_header_len + 12)
+#define	IPV4_DSTADDR_OFFSET	(dl.dl_link_header_len + 16)
+#define	IPV6_SRCADDR_OFFSET	(dl.dl_link_header_len + 8)
+#define	IPV6_DSTADDR_OFFSET	(dl.dl_link_header_len + 24)
+
+#define	IPNET_SRCZONE_OFFSET 8
+#define	IPNET_DSTZONE_OFFSET 16
 
 static int inBrace = 0, inBraceOR = 0;
 static int foundOR = 0;
@@ -116,6 +249,8 @@
 static void pf_clear_offset_register();
 static void pf_emit_load_offset(uint_t offset);
 static void pf_match_ethertype(uint_t ethertype);
+static void pf_match_ipnettype(uint_t type);
+static void pf_match_ibtype(uint_t type);
 static void pf_check_transport_protocol(uint_t transport_protocol);
 static void pf_compare_value_mask_generic(int offset, uint_t len,
     uint_t val, int mask, uint_t op);
@@ -441,6 +576,24 @@
 }
 
 /*
+ * Like pf_compare_value() but compare on a 64-bit zoneid value.
+ * The argument val passed in is in network byte order.
+ */
+static void
+pf_compare_zoneid(int offset, uint64_t val)
+{
+	int i;
+
+	for (i = 0; i < sizeof (uint64_t) / 2; i ++) {
+		pf_emit(ENF_PUSHWORD + offset / 2 + i);
+		pf_emit(ENF_PUSHLIT | ENF_EQ);
+		pf_emit(((uint16_t *)&val)[i]);
+		if (i != 0)
+			pf_emit(ENF_AND);
+	}
+}
+
+/*
  * Generate pf code to match an IPv4 or IPv6 address.
  */
 static void
@@ -549,8 +702,16 @@
 	}
 
 	if (hp != NULL && hp->h_addrtype == AF_INET) {
-		pf_match_ethertype(ETHERTYPE_IP);
-		pf_check_vlan_tag(ENCAP_ETHERTYPE_OFF/2);
+		for (; dl.dl_net_map_tbl->nmt_val != -1;
+		    dl.dl_net_map_tbl++) {
+			if (strcmp("ip",
+				dl.dl_net_map_tbl->nmt_name) == 0) {
+				dl.dl_match_fn(
+					dl.dl_net_map_tbl->nmt_val);
+			}
+		}
+		if (dl.dl_type == DL_ETHER)
+			pf_check_vlan_tag(ENCAP_ETHERTYPE_OFF/2);
 		h_addr_index = 0;
 		addr4ptr = (uint_t *)hp->h_addr_list[h_addr_index];
 		while (addr4ptr != NULL) {
@@ -579,9 +740,21 @@
 		while (addr6ptr != NULL) {
 			if (IN6_IS_ADDR_V4MAPPED(addr6ptr)) {
 				if (first) {
-					pf_match_ethertype(ETHERTYPE_IP);
-					pf_check_vlan_tag(
-					    ENCAP_ETHERTYPE_OFF/2);
+					for (; dl.dl_net_map_tbl->nmt_val != -1;
+					    dl.dl_net_map_tbl++) {
+						if (strcmp("ip",
+							dl.dl_net_map_tbl->
+							nmt_name) == 0) {
+							dl.dl_match_fn(
+								dl.
+								dl_net_map_tbl->
+								nmt_val);
+						}
+					}
+					if (dl.dl_type == DL_ETHER) {
+						pf_check_vlan_tag(
+							ENCAP_ETHERTYPE_OFF/2);
+					}
 					pass++;
 				}
 				IN6_V4MAPPED_TO_INADDR(addr6ptr,
@@ -616,9 +789,21 @@
 		while (addr6ptr != NULL) {
 			if (!IN6_IS_ADDR_V4MAPPED(addr6ptr)) {
 				if (first) {
-					pf_match_ethertype(ETHERTYPE_IPV6);
-					pf_check_vlan_tag(
-					    ENCAP_ETHERTYPE_OFF/2);
+					for (; dl.dl_net_map_tbl->nmt_val != -1;
+					    dl.dl_net_map_tbl++) {
+						if (strcmp("ip6",
+							dl.dl_net_map_tbl->
+							nmt_name) == 0) {
+							dl.dl_match_fn(
+								dl.
+								dl_net_map_tbl->
+								nmt_val);
+						}
+					}
+					if (dl.dl_type == DL_ETHER) {
+						pf_check_vlan_tag(
+							ENCAP_ETHERTYPE_OFF/2);
+					}
 					pass++;
 				}
 				if (addr6offset == -1) {
@@ -716,17 +901,17 @@
 
 	switch (which) {
 	case TO:
-		pf_compare_address(link_dest_offset, link_addr_len,
+		pf_compare_address(dl.dl_link_dest_offset, dl.dl_link_addr_len,
 		    (uchar_t *)ep);
 		break;
 	case FROM:
-		pf_compare_address(link_src_offset, link_addr_len,
+		pf_compare_address(dl.dl_link_src_offset, dl.dl_link_addr_len,
 		    (uchar_t *)ep);
 		break;
 	case ANY:
-		pf_compare_address(link_dest_offset, link_addr_len,
+		pf_compare_address(dl.dl_link_dest_offset, dl.dl_link_addr_len,
 		    (uchar_t *)ep);
-		pf_compare_address(link_src_offset, link_addr_len,
+		pf_compare_address(dl.dl_link_src_offset, dl.dl_link_addr_len,
 		    (uchar_t *)ep);
 		pf_emit(ENF_OR);
 		break;
@@ -786,6 +971,31 @@
 }
 
 /*
+ * Emit code to match on src or destination zoneid.
+ * The zoneid passed in is in network byte order.
+ */
+static void
+pf_match_zone(enum direction which, uint64_t zoneid)
+{
+	if (dl.dl_type != DL_IPNET)
+		pr_err("zone filter option unsupported on media");
+
+	switch (which) {
+	case TO:
+		pf_compare_zoneid(IPNET_DSTZONE_OFFSET, zoneid);
+		break;
+	case FROM:
+		pf_compare_zoneid(IPNET_SRCZONE_OFFSET, zoneid);
+		break;
+	case ANY:
+		pf_compare_zoneid(IPNET_SRCZONE_OFFSET, zoneid);
+		pf_compare_zoneid(IPNET_DSTZONE_OFFSET, zoneid);
+		pf_emit(ENF_OR);
+		break;
+	}
+}
+
+/*
  * A helper function to keep the code to emit instructions
  * to change the offset register in one place.
  *
@@ -863,7 +1073,8 @@
 		/*
 		 * Check the ethertype.
 		 */
-		pf_compare_value(link_type_offset, 2, htons(ETHERTYPE_VLAN));
+		pf_compare_value(dl.dl_link_type_offset, 2,
+		    htons(ETHERTYPE_VLAN));
 
 		/*
 		 * And if it's not VLAN, don't load offset to the offset
@@ -929,38 +1140,20 @@
 	else
 		pf_check_vlan_tag(2);
 
-	pf_compare_value(link_type_offset, 2, htons(ethertype));
+	pf_compare_value(dl.dl_link_type_offset, 2, htons(ethertype));
 }
 
-typedef struct {
-	int	transport_protocol;
-	int	network_protocol;
-	/*
-	 * offset is the offset in bytes from the beginning
-	 * of the network protocol header to where the transport
-	 * protocol type is.
-	 */
-	int	offset;
-} transport_protocol_table_t;
+static void
+pf_match_ipnettype(uint_t type)
+{
+	pf_compare_value(dl.dl_link_type_offset, 2, htons(type));
+}
 
-static transport_protocol_table_t mapping_table[] = {
-	{IPPROTO_TCP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_TCP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
-	{IPPROTO_UDP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_UDP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
-	{IPPROTO_OSPF, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_OSPF, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
-	{IPPROTO_SCTP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_SCTP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
-	{IPPROTO_ICMP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_ICMPV6, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
-	{IPPROTO_ENCAP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_ESP, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_ESP, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
-	{IPPROTO_AH, ETHERTYPE_IP,   IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_AH, ETHERTYPE_IPV6, IPV6_TYPE_HEADER_OFFSET},
-	{-1, 0, 0}	/* must be the final entry */
-};
+static void
+pf_match_ibtype(uint_t type)
+{
+	pf_compare_value(dl.dl_link_type_offset, 2, htons(type));
+}
 
 /*
  * This function uses the table above to generate a
@@ -979,14 +1172,17 @@
 	int i = 0;
 	uint_t number_of_matches = 0;
 
-	for (i = 0; mapping_table[i].transport_protocol != -1; i++) {
+	for (; dl.dl_transport_mapping_table->transport_protocol != -1;
+	    dl.dl_transport_mapping_table++) {
 		if (transport_protocol ==
-		    (uint_t)mapping_table[i].transport_protocol) {
+		    (uint_t)dl.dl_transport_mapping_table->transport_protocol) {
 			number_of_matches++;
-			pf_match_ethertype(mapping_table[i].network_protocol);
+			dl.dl_match_fn(dl.dl_transport_mapping_table->
+			    network_protocol);
 			pf_check_vlan_tag(ENCAP_ETHERTYPE_OFF/2);
 			pf_compare_value(
-			    mapping_table[i].offset + link_header_len, 1,
+			    dl.dl_transport_mapping_table->offset +
+			    dl.dl_link_header_len, 1,
 			    transport_protocol);
 			pf_emit(ENF_AND);
 			if (number_of_matches > 1) {
@@ -1003,6 +1199,15 @@
 }
 
 static void
+pf_matchfn(char *proto)
+{
+	for (; dl.dl_net_map_tbl->nmt_val != -1; dl.dl_net_map_tbl++) {
+		if (strcmp(proto, dl.dl_net_map_tbl->nmt_name) == 0)
+			dl.dl_match_fn(dl.dl_net_map_tbl->nmt_val);
+	}
+}
+
+static void
 pf_primary()
 {
 	for (;;) {
@@ -1010,21 +1215,21 @@
 			break;
 
 		if (EQ("ip")) {
-			pf_match_ethertype(ETHERTYPE_IP);
+			pf_matchfn("ip");
 			opstack++;
 			next();
 			break;
 		}
 
 		if (EQ("ip6")) {
-			pf_match_ethertype(ETHERTYPE_IPV6);
+			pf_matchfn("ip6");
 			opstack++;
 			next();
 			break;
 		}
 
 		if (EQ("pppoe")) {
-			pf_match_ethertype(ETHERTYPE_PPPOED);
+			pf_matchfn("pppoe");
 			pf_match_ethertype(ETHERTYPE_PPPOES);
 			pf_emit(ENF_OR);
 			opstack++;
@@ -1033,28 +1238,28 @@
 		}
 
 		if (EQ("pppoed")) {
-			pf_match_ethertype(ETHERTYPE_PPPOED);
+			pf_matchfn("pppoed");
 			opstack++;
 			next();
 			break;
 		}
 
 		if (EQ("pppoes")) {
-			pf_match_ethertype(ETHERTYPE_PPPOES);
+			pf_matchfn("pppoes");
 			opstack++;
 			next();
 			break;
 		}
 
 		if (EQ("arp")) {
-			pf_match_ethertype(ETHERTYPE_ARP);
+			pf_matchfn("arp");
 			opstack++;
 			next();
 			break;
 		}
 
 		if (EQ("vlan")) {
-			pf_match_ethertype(ETHERTYPE_VLAN);
+			pf_matchfn("vlan");
 			pf_compare_value_mask_neq(VLAN_ID_OFFSET, 2,
 			    0, VLAN_ID_MASK);
 			pf_emit(ENF_AND);
@@ -1067,7 +1272,7 @@
 			next();
 			if (tokentype != NUMBER)
 				pr_err("VLAN ID expected");
-			pf_match_ethertype(ETHERTYPE_VLAN);
+			pf_matchfn("vlan-id");
 			pf_compare_value_mask(VLAN_ID_OFFSET, 2, tokenval,
 			    VLAN_ID_MASK);
 			pf_emit(ENF_AND);
@@ -1077,7 +1282,7 @@
 		}
 
 		if (EQ("rarp")) {
-			pf_match_ethertype(ETHERTYPE_REVARP);
+			pf_matchfn("rarp");
 			opstack++;
 			next();
 			break;
@@ -1208,7 +1413,7 @@
 				pr_err("IP proto type expected");
 			pf_check_vlan_tag(ENCAP_ETHERTYPE_OFF/2);
 			pf_compare_value(
-			    IPV4_TYPE_HEADER_OFFSET + link_header_len, 1,
+			    IPV4_TYPE_HEADER_OFFSET + dl.dl_link_header_len, 1,
 			    tokenval);
 			opstack++;
 			next();
@@ -1217,7 +1422,7 @@
 
 		if (EQ("broadcast")) {
 			pf_clear_offset_register();
-			pf_compare_value(link_dest_offset, 4, 0xffffffff);
+			pf_compare_value(dl.dl_link_dest_offset, 4, 0xffffffff);
 			opstack++;
 			next();
 			break;
@@ -1225,7 +1430,8 @@
 
 		if (EQ("multicast")) {
 			pf_clear_offset_register();
-			pf_compare_value_mask(link_dest_offset, 1, 0x01, 0x01);
+			pf_compare_value_mask(
+			    dl.dl_link_dest_offset, 1, 0x01, 0x01);
 			opstack++;
 			next();
 			break;
@@ -1254,6 +1460,16 @@
 			break;
 		}
 
+		if (EQ("zone")) {
+			next();
+			if (tokentype != NUMBER)
+				pr_err("zoneid expected after inet");
+			pf_match_zone(dir, BE_64((uint64_t)(tokenval)));
+			opstack++;
+			next();
+			break;
+		}
+
 		/*
 		 * Give up on anything that's obviously
 		 * not a primary.
@@ -1357,11 +1573,44 @@
 	/*
 	 * Set media specific packet offsets that this code uses.
 	 */
+	if (interface->mac_type == DL_ETHER) {
+		dl.dl_type = DL_ETHER;
+		dl.dl_match_fn = pf_match_ethertype;
+		dl.dl_transport_mapping_table =
+		    &ether_transport_mapping_table[0];
+		dl.dl_net_map_tbl =
+		    &ether_network_mapping_table[0];
+		dl.dl_link_header_len = 14;
+		dl.dl_link_type_offset = 12;
+		dl.dl_link_dest_offset = 0;
+		dl.dl_link_src_offset = 6;
+		dl.dl_link_addr_len = 6;
+	}
+
 	if (interface->mac_type == DL_IB) {
-		link_header_len = 4;
-		link_type_offset = 0;
-		link_dest_offset = link_src_offset = -1;
-		link_addr_len = 20;
+		dl.dl_type = DL_IB;
+		dl.dl_link_header_len = 4;
+		dl.dl_link_type_offset = 0;
+		dl.dl_link_dest_offset = dl.dl_link_src_offset = -1;
+		dl.dl_link_addr_len = 20;
+		dl.dl_match_fn = pf_match_ibtype;
+		dl.dl_transport_mapping_table =
+		    &ib_transport_mapping_table[0];
+		dl.dl_net_map_tbl =
+		    &ib_network_mapping_table[0];
+	}
+
+	if (interface->mac_type == DL_IPNET) {
+		dl.dl_type = DL_IPNET;
+		dl.dl_link_header_len = 24;
+		dl.dl_link_type_offset = 0;
+		dl.dl_link_dest_offset = dl.dl_link_src_offset = -1;
+		dl.dl_link_addr_len = -1;
+		dl.dl_match_fn = pf_match_ipnettype;
+		dl.dl_transport_mapping_table =
+		    &ipnet_transport_mapping_table[0];
+		dl.dl_net_map_tbl =
+		    &ipnet_network_mapping_table[0];
 	}
 
 	next();
--- a/usr/src/cmd/devfsadm/misc_link.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/cmd/devfsadm/misc_link.c	Thu Nov 06 06:47:54 2008 -0500
@@ -93,8 +93,8 @@
 	},
 	{ "pseudo", "ddi_pseudo",
 	    "(^lockstat$)|(^SUNW,rtvc$)|(^vol$)|(^log$)|(^sy$)|"
-	    "(^ksyms$)|(^clone$)|(^tl$)|(^tnf$)|(^kstat$)|(^mdesc$)|"
-	    "(^eeprom$)|(^ptsl$)|(^mm$)|(^wc$)|(^dump$)|(^cn$)|(^lo$)|(^ptm$)|"
+	    "(^ksyms$)|(^clone$)|(^tl$)|(^tnf$)|(^kstat$)|(^mdesc$)|(^eeprom$)|"
+	    "(^ptsl$)|(^mm$)|(^wc$)|(^dump$)|(^cn$)|(^svvslo$)|(^ptm$)|"
 	    "(^ptc$)|(^openeepr$)|(^poll$)|(^sysmsg$)|(^random$)|(^trapstat$)|"
 	    "(^cryptoadm$)|(^crypto$)|(^pool$)|(^poolctl$)|(^bl$)|(^kmdb$)|"
 	    "(^sysevent$)|(^kssl$)|(^physmem$)",
@@ -104,7 +104,7 @@
 	    "(^ip$)|(^tcp$)|(^udp$)|(^icmp$)|(^sctp$)|"
 	    "(^ip6$)|(^tcp6$)|(^udp6$)|(^icmp6$)|(^sctp6$)|"
 	    "(^rts$)|(^arp$)|(^ipsecah$)|(^ipsecesp$)|(^keysock$)|(^spdsock$)|"
-	    "(^nca$)|(^rds$)|(^sdp$)",
+	    "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)",
 	    TYPE_EXACT | DRV_RE, ILEVEL_1, minor_name
 	},
 	{ "pseudo", "ddi_pseudo",
--- a/usr/src/cmd/truss/codes.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/cmd/truss/codes.c	Thu Nov 06 06:47:54 2008 -0500
@@ -298,6 +298,7 @@
 	 */
 	{ (uint_t)DLIOCRAW,	"DLIOCRAW",	NULL },
 	{ (uint_t)DLIOCNATIVE,	"DLIOCNATIVE",	NULL },
+	{ (uint_t)DLIOCIPNETINFO, "DLIOCIPNETINFO", NULL},
 
 	{ (uint_t)LDOPEN,	"LDOPEN",	NULL },
 	{ (uint_t)LDCLOSE,	"LDCLOSE",	NULL },
@@ -607,8 +608,8 @@
 	{ (uint_t)SIOCTMYADDR,		"SIOCTMYADDR",	"sioc_addrreq" },
 	{ (uint_t)SIOCTONLINK,		"SIOCTONLINK",	"sioc_addrreq" },
 	{ (uint_t)SIOCTMYSITE,		"SIOCTMYSITE",	"sioc_addrreq" },
-	{ (uint_t)SIOCGTUNPARAM,	"SIOCGTUNPARAM",	"iftun_req" },
-	{ (uint_t)SIOCSTUNPARAM,	"SIOCSTUNPARAM",	"iftun_req" },
+	{ (uint_t)SIOCGTUNPARAM,        "SIOCGTUNPARAM",        "iftun_req" },
+	{ (uint_t)SIOCSTUNPARAM,        "SIOCSTUNPARAM",        "iftun_req" },
 	{ (uint_t)SIOCFIPSECONFIG,	"SIOCFIPSECONFIG",	NULL },
 	{ (uint_t)SIOCSIPSECONFIG,	"SIOCSIPSECONFIG",	NULL },
 	{ (uint_t)SIOCDIPSECONFIG,	"SIOCDIPSECONFIG",	NULL },
--- a/usr/src/lib/brand/native/zone/config.xml	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/lib/brand/native/zone/config.xml	Thu Nov 06 06:47:54 2008 -0500
@@ -23,8 +23,6 @@
  Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  Use is subject to license terms.
 
- ident	"%Z%%M%	%I%	%E% SMI"
-
  DO NOT EDIT THIS FILE.
 -->
 
@@ -65,6 +63,7 @@
 	<privilege set="default" name="net_bindmlp" />
 	<privilege set="default" name="net_icmpaccess" />
 	<privilege set="default" name="net_mac_aware" />
+	<privilege set="default" name="net_observability" />
 	<privilege set="default" name="net_privaddr" />
 	<privilege set="default" name="net_rawaccess" ip-type="exclusive" />
 	<privilege set="default" name="proc_chroot" />
--- a/usr/src/lib/brand/native/zone/platform.xml	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/lib/brand/native/zone/platform.xml	Thu Nov 06 06:47:54 2008 -0500
@@ -23,8 +23,6 @@
  Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  Use is subject to license terms.
 
- ident	"%Z%%M%	%I%	%E% SMI"
-
  DO NOT EDIT THIS FILE.
 -->
 
@@ -54,11 +52,9 @@
 	<device match="dtrace/*" />
 	<device match="dtrace/provider/*" />
 	<device match="fd" />
+	<device match="ipnet" />
 	<device match="kstat" />
 	<device match="lo0" />
-	<device match="lo1" />
-	<device match="lo2" />
-	<device match="lo3" />
 	<device match="log" />
 	<device match="logindmux" />
 	<device match="nsmb" />
@@ -73,6 +69,10 @@
 	<device match="rdsk" />
 	<device match="rmt" />
 	<device match="sad/user" />
+	<device match="svvslo0" />
+	<device match="svvslo1" />
+	<device match="svvslo2" />
+	<device match="svvslo3" />
 	<device match="swap" />
 	<device match="sysevent" />
 	<device match="tcp" />
--- a/usr/src/lib/brand/sn1/zone/platform.xml	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/lib/brand/sn1/zone/platform.xml	Thu Nov 06 06:47:54 2008 -0500
@@ -57,11 +57,9 @@
 	<device match="dtrace/*" />
 	<device match="dtrace/provider/*" />
 	<device match="fd" />
+	<device match="ipnet" />
 	<device match="kstat" />
 	<device match="lo0" />
-	<device match="lo1" />
-	<device match="lo2" />
-	<device match="lo3" />
 	<device match="log" />
 	<device match="logindmux" />
 	<device match="net/*" />
@@ -75,6 +73,10 @@
 	<device match="rdsk" />
 	<device match="rmt" />
 	<device match="sad/user" />
+	<device match="svvslo0" />
+	<device match="svvslo1" />
+	<device match="svvslo2" />
+	<device match="svvslo3" />
 	<device match="swap" />
 	<device match="sysevent" />
 	<device match="tcp" />
--- a/usr/src/lib/libdladm/common/libdladm.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/lib/libdladm/common/libdladm.c	Thu Nov 06 06:47:54 2008 -0500
@@ -340,6 +340,9 @@
 	case DL_ASYNC:
 		s = "AsyncCharacter";
 		break;
+	case DL_IPNET:
+		s = "IPNET";
+		break;
 	default:
 		s = "--";
 		break;
--- a/usr/src/lib/libdlpi/common/libdlpi.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/lib/libdlpi/common/libdlpi.c	Thu Nov 06 06:47:54 2008 -0500
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * Data-Link Provider Interface (Version 2)
  */
@@ -49,6 +47,7 @@
 #include <libdlpi.h>
 #include <libintl.h>
 #include <libinetutil.h>
+#include <dirent.h>
 
 #include "libdlpi_impl.h"
 
@@ -95,18 +94,35 @@
 dlpi_walk(dlpi_walkfunc_t *fn, void *arg, uint_t flags)
 {
 	struct i_dlpi_walklink_arg warg;
+	struct dirent *d;
+	DIR *dp;
 
 	warg.fn = fn;
 	warg.arg = arg;
 
-	(void) dladm_walk(i_dlpi_walk_link, &warg, DATALINK_CLASS_ALL,
-	    DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE);
+	if (flags & DLPI_DEVIPNET) {
+		if ((dp = opendir("/dev/ipnet")) == NULL)
+			return;
+
+		while ((d = readdir(dp)) != NULL) {
+			if (d->d_name[0] == '.')
+				continue;
+
+			if (warg.fn(d->d_name, warg.arg))
+				break;
+		}
+
+		(void) closedir(dp);
+	} else {
+		(void) dladm_walk(i_dlpi_walk_link, &warg, DATALINK_CLASS_ALL,
+		    DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE);
+	}
 }
 
 int
 dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags)
 {
-	int		retval;
+	int		retval, on = 1;
 	int		cnt;
 	ifspec_t	ifsp;
 	dlpi_impl_t  	*dip;
@@ -119,6 +135,13 @@
 	    !ifparse_ifspec(linkname, &ifsp))
 		return (DLPI_ELINKNAMEINVAL);
 
+	/*
+	 * Ensure flags values are sane.
+	 */
+	if ((flags & (DLPI_DEVIPNET|DLPI_DEVONLY)) ==
+	    (DLPI_DEVIPNET|DLPI_DEVONLY))
+		return (DLPI_EINVAL);
+
 	/* Allocate a new dlpi_impl_t. */
 	if ((dip = calloc(1, sizeof (dlpi_impl_t))) == NULL)
 		return (DL_SYSERR);
@@ -126,16 +149,18 @@
 	/* Fill in known/default libdlpi handle values. */
 	dip->dli_timeout = DLPI_DEF_TIMEOUT;
 	dip->dli_ppa = ifsp.ifsp_ppa;
-	dip->dli_mod_cnt = ifsp.ifsp_modcnt;
 	dip->dli_oflags = flags;
 	dip->dli_notifylistp = NULL;
 	dip->dli_note_processing = B_FALSE;
 	if (getenv("DLPI_DEVONLY") != NULL)
 		dip->dli_oflags |= DLPI_DEVONLY;
 
-	for (cnt = 0; cnt != dip->dli_mod_cnt; cnt++) {
-		(void) strlcpy(dip->dli_modlist[cnt], ifsp.ifsp_mods[cnt],
-		    DLPI_LINKNAME_MAX);
+	if (!(flags & DLPI_DEVIPNET)) {
+		dip->dli_mod_cnt = ifsp.ifsp_modcnt;
+		for (cnt = 0; cnt != dip->dli_mod_cnt; cnt++) {
+			(void) strlcpy(dip->dli_modlist[cnt],
+			    ifsp.ifsp_mods[cnt], DLPI_LINKNAME_MAX);
+		}
 	}
 
 	/* Copy linkname provided to the function. */
@@ -173,7 +198,7 @@
 			 * of failure from errno.
 			 */
 			retval = DL_SYSERR;
-		} else {
+		} else if (!(dip->dli_oflags & DLPI_DEVIPNET)) {
 			retval = i_dlpi_style2_open(dip);
 		}
 		if (retval != DLPI_SUCCESS) {
@@ -191,6 +216,12 @@
 		return (DLPI_ERAWNOTSUP);
 	}
 
+	if ((dip->dli_oflags & DLPI_IPNETINFO) &&
+	    ioctl(dip->dli_fd, DLIOCIPNETINFO, &on) < 0) {
+		dlpi_close((dlpi_handle_t)dip);
+		return (DLPI_EIPNETINFONOTSUP);
+	}
+
 	/*
 	 * We intentionally do not care if this request fails, as this
 	 * indicates the underlying DLPI device does not support Native mode
@@ -978,12 +1009,13 @@
 
 /*
  * This function attempts to open a device under the following namespaces:
+ *	/dev/ipnet	- if DLPI_DEVIPNET is specified
  *      /dev/net	- if a data-link with the specified name exists
  *	/dev		- if DLPI_DEVONLY is specified, or if there is no
  *			  data-link with the specified name (could be /dev/ip)
  *
- * In particular, this function is used to open a data-link node, or some
- * special node, such as "/dev/ip" node. It is usually be called firstly
+ * In particular, if DLPI_DEVIPNET is not specified, this function is used to
+ * open a data-link node, or "/dev/ip" node. It is usually be called firstly
  * with style1 being B_TRUE, and if that fails and the return value is not
  * DLPI_ENOTSTYLE2, the function will again be called with style1 being
  * B_FALSE (style-1 open attempt first, then style-2 open attempt).
@@ -1019,7 +1051,13 @@
 	if (flags & DLPI_EXCL)
 		oflags |= O_EXCL;
 
-	if (style1 && !(flags & DLPI_DEVONLY)) {
+	if (flags & DLPI_DEVIPNET) {
+		(void) snprintf(path, sizeof (path), "/dev/ipnet/%s", provider);
+		if ((*fd = open(path, oflags)) != -1)
+			return (DLPI_SUCCESS);
+		else
+			return (errno == ENOENT ? DLPI_ENOLINK : DL_SYSERR);
+	} else if (style1 && !(flags & DLPI_DEVONLY)) {
 		char		driver[DLPI_LINKNAME_MAX];
 		char		device[DLPI_LINKNAME_MAX];
 		datalink_id_t	linkid;
@@ -1600,7 +1638,8 @@
 	"DLPI notification not supported by link",
 						/* DLPI_ENOTENOTSUP */
 	"invalid DLPI notification type",	/* DLPI_ENOTEINVAL */
-	"invalid DLPI notification id"		/* DLPI_ENOTEIDINVAL */
+	"invalid DLPI notification id",		/* DLPI_ENOTEIDINVAL */
+	"DLPI_IPNETINFO not supported"		/* DLPI_EIPNETINFONOTSUP */
 };
 
 const char *
@@ -1648,7 +1687,8 @@
 	{ DL_IB,		"Infiniband"		},
 	{ DL_IPV4,		"IPv4 Tunnel"		},
 	{ DL_IPV6,		"IPv6 Tunnel"		},
-	{ DL_WIFI,		"IEEE 802.11"		}
+	{ DL_WIFI,		"IEEE 802.11"		},
+	{ DL_IPNET,		"IPNET"			}
 };
 
 const char *
--- a/usr/src/lib/libdlpi/common/libdlpi.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/lib/libdlpi/common/libdlpi.h	Thu Nov 06 06:47:54 2008 -0500
@@ -26,8 +26,6 @@
 #ifndef _LIBDLPI_H
 #define	_LIBDLPI_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/types.h>
 #include <sys/dlpi.h>
 
@@ -63,7 +61,8 @@
 #define	DLPI_NOATTACH	0x0010	/* Do not attach PPA */
 #define	DLPI_NATIVE	0x0020	/* Open DLPI link in native mode */
 #define	DLPI_DEVONLY	0x0040	/* Open DLPI link under /dev only */
-
+#define	DLPI_DEVIPNET	0x0080	/* Open IP DLPI link under /dev/ipnet */
+#define	DLPI_IPNETINFO	0x0100	/* Request ipnetinfo headers */
 /*
  * Timeout to be used in DLPI-related operations, in seconds.
  */
@@ -93,6 +92,7 @@
 	DLPI_ENOTEINVAL,	/* invalid DLPI notification type */
 	DLPI_ENOTENOTSUP,	/* DLPI notification not supported by link */
 	DLPI_ENOTEIDINVAL,	/* invalid DLPI notification id */
+	DLPI_EIPNETINFONOTSUP,	/* DLPI_IPNETINFO not supported */
 	DLPI_ERRMAX		/* Highest + 1 libdlpi error code */
 };
 
--- a/usr/src/lib/libsecdb/exec_attr.txt	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/lib/libsecdb/exec_attr.txt	Thu Nov 06 06:47:54 2008 -0500
@@ -204,7 +204,9 @@
 Network Management:suser:cmd:::/usr/sbin/rndc:privs=file_dac_read
 Network Management:suser:cmd:::/usr/sbin/route:uid=0
 Network Management:suser:cmd:::/usr/sbin/snoop:uid=0
+Network Management:solaris:cmd:::/usr/sbin/snoop:privs=net_observability
 Network Management:suser:cmd:::/usr/sbin/spray:euid=0
+Network Observability:solaris:cmd:::/usr/sbin/snoop:privs=net_observability
 Network Link Security:solaris:cmd:::/sbin/dladm:euid=dladm;egid=sys;\
 	privs=sys_dl_config,net_rawaccess,proc_audit
 Network IPsec Management:solaris:cmd:::/usr/lib/inet/certdb:euid=0;privs=none
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/libsecdb/help/profiles/RtNetObservability.html	Thu Nov 06 06:47:54 2008 -0500
@@ -0,0 +1,36 @@
+<HTML>
+<!--
+    CDDL HEADER START
+
+    The contents of this file are subject to the terms of the
+    Common Development and Distribution License (the "License").
+    You may not use this file except in compliance with the License.
+
+    You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+    or http://www.opensolaris.org/os/licensing.
+    See the License for the specific language governing permissions
+    and limitations under the License.
+
+    When distributing Covered Code, include this CDDL HEADER in each
+    file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+    If applicable, add the following below this CDDL HEADER, with the
+    fields enclosed by brackets "[]" replaced with your own identifying
+    information: Portions Copyright [yyyy] [name of copyright owner]
+
+    CDDL HEADER END
+
+-- Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+-- Use is subject to license terms.
+-->
+<HEAD>
+	<TITLE> </TITLE>
+	 
+	
+</HEAD>
+<BODY>
+When Network Observability is in the Rights Included column, it grants the right to open the ip observability devices, /dev/ipnet/* and /dev/lo0 and so observe network traffic at the ip layer.
+<p>
+If Network Observability is grayed, then you are not entitled to Add or Remove this right.
+<p>
+</BODY>
+</HTML>
--- a/usr/src/lib/libsecdb/prof_attr.txt	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/lib/libsecdb/prof_attr.txt	Thu Nov 06 06:47:54 2008 -0500
@@ -59,7 +59,8 @@
 MMS User:::MMS Tape User:auths=solaris.mms.io.*;help=RtMMSUser.html
 NDMP Management:::Manage the NDMP service:auths=solaris.smf.manage.ndmp,solaris.smf.value.ndmp,solaris.smf.read.ndmp;help=RtNdmpMngmnt.html
 Network Autoconf:::Manage network auto-magic configuration via nwamd:auths=solaris.network.autoconf;help=RtNetAutoconf.html
-Network Management:::Manage the host and network configuration:auths=solaris.smf.manage.name-service-cache,solaris.smf.manage.bind,solaris.smf.value.routing,solaris.smf.manage.routing,solaris.smf.value.nwam,solaris.smf.manage.nwam,solaris.smf.manage.tnd,solaris.smf.manage.tnctl,solaris.smf.manage.wpa,solaris.smf.value.mdns,solaris.smf.manage.mdns;profiles=Network Wifi Management,Inetd Management,Network Autoconf;help=RtNetMngmnt.html
+Network Management:::Manage the host and network configuration:auths=solaris.smf.manage.name-service-cache,solaris.smf.manage.bind,solaris.smf.value.routing,solaris.smf.manage.routing,solaris.smf.value.nwam,solaris.smf.manage.nwam,solaris.smf.manage.tnd,solaris.smf.manage.tnctl,solaris.smf.manage.wpa,solaris.smf.value.mdns,solaris.smf.manage.mdns;profiles=Network Wifi Management,Inetd Management,Network Autoconf,Network Observability;help=RtNetMngmnt.html
+Network Observability:::Allow access to observability devices:privs=net_observability;help=RtNetObservability.html
 Network Security:::Manage network and host security:auths=solaris.smf.manage.ssh,solaris.smf.value.tnd;profiles=Network Wifi Security,Network Link Security,Network IPsec Management;help=RtNetSecure.html
 Network Wifi Management:::Manage wifi network configuration:auths=solaris.network.wifi.config;help=RtNetWifiMngmnt.html
 Network Wifi Security:::Manage wifi network security:auths=solaris.network.wifi.wep;help=RtNetWifiSecure.html
--- a/usr/src/pkgdefs/SUNWckr/prototype_com	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/pkgdefs/SUNWckr/prototype_com	Thu Nov 06 06:47:54 2008 -0500
@@ -23,8 +23,6 @@
 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
 
 #
 # This required package information file contains a list of package contents.
@@ -87,6 +85,7 @@
 f none kernel/drv/icmp6.conf 644 root sys
 f none kernel/drv/ip.conf 644 root sys
 f none kernel/drv/ip6.conf 644 root sys
+f none kernel/drv/ipnet.conf 644 root sys
 f none kernel/drv/ippctl.conf 644 root sys
 f none kernel/drv/ipsecah.conf 644 root sys
 f none kernel/drv/ipsecesp.conf 644 root sys
--- a/usr/src/pkgdefs/SUNWckr/prototype_i386	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/pkgdefs/SUNWckr/prototype_i386	Thu Nov 06 06:47:54 2008 -0500
@@ -93,6 +93,7 @@
 f none kernel/drv/intel_nb5000.conf 644 root sys
 f none kernel/drv/ip 755 root sys
 f none kernel/drv/ip6 755 root sys
+f none kernel/drv/ipnet 755 root sys
 f none kernel/drv/ippctl 755 root sys
 f none kernel/drv/ipsecah 755 root sys
 f none kernel/drv/ipsecesp 755 root sys
@@ -310,6 +311,7 @@
 f none kernel/drv/amd64/intel_nb5000 755 root sys
 f none kernel/drv/amd64/ip 755 root sys
 f none kernel/drv/amd64/ip6 755 root sys
+f none kernel/drv/amd64/ipnet 755 root sys
 f none kernel/drv/amd64/ippctl 755 root sys
 f none kernel/drv/amd64/ipsecah 755 root sys
 f none kernel/drv/amd64/ipsecesp 755 root sys
--- a/usr/src/pkgdefs/SUNWckr/prototype_sparc	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/pkgdefs/SUNWckr/prototype_sparc	Thu Nov 06 06:47:54 2008 -0500
@@ -87,6 +87,7 @@
 f none kernel/drv/sparcv9/icmp 755 root sys
 f none kernel/drv/sparcv9/icmp6 755 root sys
 f none kernel/drv/sparcv9/ip 755 root sys
+f none kernel/drv/sparcv9/ipnet 755 root sys
 f none kernel/drv/sparcv9/ip6 755 root sys
 f none kernel/drv/sparcv9/ippctl 755 root sys
 f none kernel/drv/sparcv9/ipsecah 755 root sys
--- a/usr/src/pkgdefs/SUNWcsd/prototype_com	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/pkgdefs/SUNWcsd/prototype_com	Thu Nov 06 06:47:54 2008 -0500
@@ -70,6 +70,7 @@
 d none dev 755 root sys
 d none dev/dsk 755 root sys
 d none dev/fd 555 root root
+d none dev/ipnet 755 root sys
 d none dev/net 755 root sys
 d none dev/pts 755 root sys
 d none dev/rdsk 755 root sys
--- a/usr/src/pkgdefs/SUNWhea/prototype_com	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/pkgdefs/SUNWhea/prototype_com	Thu Nov 06 06:47:54 2008 -0500
@@ -249,6 +249,7 @@
 f none usr/include/inet/ip6_asp.h 644 root bin
 f none usr/include/inet/ipclassifier.h 644 root bin
 f none usr/include/inet/ipdrop.h 644 root bin
+f none usr/include/inet/ipnet.h 644 root bin
 f none usr/include/inet/ipp_common.h 644 root bin
 d none usr/include/inet/kssl 755 root bin
 f none usr/include/inet/kssl/ksslapi.h 644 root bin
--- a/usr/src/pkgdefs/SUNWsvvs/prototype_com	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/pkgdefs/SUNWsvvs/prototype_com	Thu Nov 06 06:47:54 2008 -0500
@@ -1,9 +1,7 @@
 #
-# Copyright 1990-1993,1998,2003 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
 # This required package information file contains a list of package contents.
 # The 'pkgmk' command uses this file to identify the contents of a package
 # and their location on the development machine when building the package.
@@ -26,13 +24,13 @@
 d none usr 755 root sys
 d none usr/include 755 root bin
 d none usr/include/sys 755 root bin
-f none usr/include/sys/lo.h 644 root bin
+f none usr/include/sys/svvslo.h 644 root bin
 f none usr/include/sys/tidg.h 644 root bin
 f none usr/include/sys/tivc.h 644 root bin
 f none usr/include/sys/tmux.h 644 root bin
 d none usr/kernel 755 root sys
 d none usr/kernel/drv 755 root sys
-f none usr/kernel/drv/lo.conf 644 root sys
+f none usr/kernel/drv/svvslo.conf 644 root sys
 f none usr/kernel/drv/tidg.conf 644 root sys
 f none usr/kernel/drv/tivc.conf 644 root sys
 f none usr/kernel/drv/tmux.conf 644 root sys
--- a/usr/src/pkgdefs/SUNWsvvs/prototype_i386	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/pkgdefs/SUNWsvvs/prototype_i386	Thu Nov 06 06:47:54 2008 -0500
@@ -1,9 +1,7 @@
 #
-# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
 # This required package information file contains a list of package contents.
 # The 'pkgmk' command uses this file to identify the contents of a package
 # and their location on the development machine when building the package.
@@ -28,7 +26,7 @@
 #
 # SUNWsvvs
 #
-f none usr/kernel/drv/lo 755 root sys
+f none usr/kernel/drv/svvslo 755 root sys
 f none usr/kernel/drv/tidg 755 root sys
 f none usr/kernel/drv/tivc 755 root sys
 f none usr/kernel/drv/tmux 755 root sys
@@ -37,7 +35,7 @@
 f none usr/kernel/strmod/lmodr 755 root sys
 f none usr/kernel/strmod/lmodt 755 root sys
 d none usr/kernel/drv/amd64 755 root sys  
-f none usr/kernel/drv/amd64/lo 755 root sys  
+f none usr/kernel/drv/amd64/svvslo 755 root sys  
 f none usr/kernel/drv/amd64/tidg 755 root sys  
 f none usr/kernel/drv/amd64/tivc 755 root sys  
 f none usr/kernel/drv/amd64/tmux 755 root sys  
--- a/usr/src/pkgdefs/SUNWsvvs/prototype_sparc	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/pkgdefs/SUNWsvvs/prototype_sparc	Thu Nov 06 06:47:54 2008 -0500
@@ -1,9 +1,7 @@
 #
-# Copyright 1990-1993,2003 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
 # This required package information file contains a list of package contents.
 # The 'pkgmk' command uses this file to identify the contents of a package
 # and their location on the development machine when building the package.
@@ -29,7 +27,7 @@
 # SUNWsvvs
 #
 d none usr/kernel/drv/sparcv9 755 root sys  
-f none usr/kernel/drv/sparcv9/lo 755 root sys  
+f none usr/kernel/drv/sparcv9/svvslo 755 root sys  
 f none usr/kernel/drv/sparcv9/tidg 755 root sys  
 f none usr/kernel/drv/sparcv9/tivc 755 root sys  
 f none usr/kernel/drv/sparcv9/tmux 755 root sys  
--- a/usr/src/pkgdefs/common_files/i.devpolicy	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/pkgdefs/common_files/i.devpolicy	Thu Nov 06 06:47:54 2008 -0500
@@ -50,7 +50,7 @@
 	rm -f $dest.$$
 
 	# potential additions
-	additions="aggr bge dnet keysock ibd icmp icmp6 ipsecah ipsecesp openeepr random spdsock vni ipf pfil scsi_vhci"
+	additions="aggr bge dnet keysock ibd icmp icmp6 ipnet ipsecah ipsecesp openeepr random spdsock vni ipf pfil scsi_vhci"
 
 	for dev in $additions
 	do
--- a/usr/src/pkgdefs/common_files/i.minorperm_i386	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/pkgdefs/common_files/i.minorperm_i386	Thu Nov 06 06:47:54 2008 -0500
@@ -221,6 +221,7 @@
 wc:*
 ip:ip
 ip6:ip6
+ipnet:lo0
 icmp:icmp
 icmp6:icmp6
 udp:udp
--- a/usr/src/pkgdefs/common_files/i.minorperm_sparc	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/pkgdefs/common_files/i.minorperm_sparc	Thu Nov 06 06:47:54 2008 -0500
@@ -246,6 +246,7 @@
 icmp6:icmp6
 ip:ip
 ip6:ip6
+ipnet:lo0
 tcp:tcp
 tcp6:tcp6
 udp:udp
--- a/usr/src/tools/scripts/bfu.sh	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/tools/scripts/bfu.sh	Thu Nov 06 06:47:54 2008 -0500
@@ -7596,6 +7596,16 @@
 	#
 	rm -f $root/usr/platform/i86pc/lib/fm/topo/maps/Sun-Fire-*-topology.xml
 
+	#
+	# Remove old SVVS lo driver and related files.  It was renamed to
+	# svvslo.  The renamed files will be extracted using their new names.
+	#
+	rm -f $usr/kernel/drv/lo.conf
+	rm -f $usr/kernel/drv/lo
+	rm -f $usr/kernel/drv/sparcv9/lo
+	rm -f $usr/kernel/drv/amd64/lo
+	rm -f $usr/include/sys/lo.h
+
 	# End of pre-archive extraction hacks.
 
 	if [ $diskless = no -a $zone = global ]; then
--- a/usr/src/uts/common/Makefile.files	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/Makefile.files	Thu Nov 06 06:47:54 2008 -0500
@@ -499,6 +499,8 @@
 
 KEYSOCK_OBJS +=	keysockddi.o keysock.o keysock_opt_data.o
 
+IPNET_OBJS += ipnet.o
+
 SPDSOCK_OBJS += spdsockddi.o spdsock.o spdsock_opt_data.o
 
 IPSECESP_OBJS += ipsecespddi.o ipsecesp.o
@@ -973,7 +975,8 @@
 
 DEV_OBJS  +=	sdev_subr.o	sdev_vfsops.o	sdev_vnops.o	\
 		sdev_ptsops.o	sdev_comm.o	sdev_profile.o	\
-		sdev_ncache.o	sdev_netops.o	sdev_vtops.o
+		sdev_ncache.o	sdev_netops.o	sdev_vtops.o	\
+		sdev_ipnetops.o
 
 CTFS_OBJS +=	ctfs_all.o ctfs_cdir.o ctfs_ctl.o ctfs_event.o \
 		ctfs_latest.o ctfs_root.o ctfs_sym.o ctfs_tdir.o ctfs_tmpl.o
--- a/usr/src/uts/common/Makefile.rules	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/Makefile.rules	Thu Nov 06 06:47:54 2008 -0500
@@ -439,6 +439,10 @@
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
 
+$(OBJS_DIR)/%.o:                $(UTSBASE)/common/inet/ipnet/%.c
+	$(COMPILE.c) -o $@ $<
+	$(CTFCONVERT_O)
+
 $(OBJS_DIR)/%.o:		$(UTSBASE)/common/inet/kssl/%.c
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
@@ -1539,6 +1543,9 @@
 $(LINTS_DIR)/%.ln:		$(UTSBASE)/common/inet/ip/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))
 
+$(LINTS_DIR)/%.ln: 	     	$(UTSBASE)/common/inet/ipnet/%.c
+	@($(LHEAD) $(LINT.c) $< $(LTAIL))
+
 $(LINTS_DIR)/%.ln:		$(UTSBASE)/common/inet/ipf/%.c
 	@($(LHEAD) $(LINT.c) $(IPFFLAGS) $< $(LTAIL))
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/dev/sdev_ipnetops.c	Thu Nov 06 06:47:54 2008 -0500
@@ -0,0 +1,226 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * vnode ops for the /dev/ipnet directory
+ *	The lookup is based on the ipnetif nodes held
+ *	in the ipnet module. We also override readdir
+ *	in order to delete ipnet nodes no longer in use.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/sunndi.h>
+#include <fs/fs_subr.h>
+#include <sys/fs/dv_node.h>
+#include <sys/fs/sdev_impl.h>
+#include <sys/policy.h>
+#include <inet/ipnet.h>
+#include <sys/zone.h>
+
+struct vnodeops		*devipnet_vnodeops;
+
+static void
+devipnet_fill_vattr(struct vattr *vap, dev_t dev)
+{
+	timestruc_t now;
+
+	*vap = sdev_vattr_chr;
+	vap->va_rdev = dev;
+	vap->va_mode |= 0666;
+
+	gethrestime(&now);
+	vap->va_atime = now;
+	vap->va_mtime = now;
+	vap->va_ctime = now;
+}
+
+/*
+ * Check if an ipnet sdev_node is still valid.
+ */
+int
+devipnet_validate(struct sdev_node *dv)
+{
+	dev_t	dev;
+
+	dev = ipnet_if_getdev(dv->sdev_name, getzoneid());
+	if (dev == (dev_t)-1)
+		return (SDEV_VTOR_INVALID);
+	if (getminor(SDEVTOV(dv)->v_rdev) != getminor(dev))
+		return (SDEV_VTOR_STALE);
+	return (SDEV_VTOR_VALID);
+}
+
+/*
+ * This callback is invoked from devname_lookup_func() to create
+ * an ipnet entry when the node is not found in the cache.
+ */
+/*ARGSUSED*/
+static int
+devipnet_create_rvp(struct sdev_node *ddv, char *nm,
+    void **arg, cred_t *cred, void *whatever, char *whichever)
+{
+	dev_t		dev;
+	struct vattr	*vap = (struct vattr *)arg;
+	int		err = 0;
+
+	if ((dev = ipnet_if_getdev(nm, getzoneid())) == (dev_t)-1)
+		err = ENOENT;
+	else
+		devipnet_fill_vattr(vap, dev);
+
+	return (err);
+}
+
+/*
+ * Lookup for /dev/ipnet directory
+ *	If the entry does not exist, the devipnet_create_rvp() callback
+ *	is invoked to create it. Nodes do not persist across reboot.
+ */
+/*ARGSUSED3*/
+static int
+devipnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
+    struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
+    caller_context_t *ct, int *direntflags, pathname_t *realpnp)
+{
+	struct sdev_node *sdvp = VTOSDEV(dvp);
+	struct sdev_node *dv;
+	struct vnode *rvp = NULL;
+	int error;
+
+	error = devname_lookup_func(sdvp, nm, vpp, cred, devipnet_create_rvp,
+	    SDEV_VATTR);
+
+	if (error == 0) {
+		switch ((*vpp)->v_type) {
+		case VCHR:
+			dv = VTOSDEV(VTOS(*vpp)->s_realvp);
+			ASSERT(VOP_REALVP(SDEVTOV(dv), &rvp, NULL) == ENOSYS);
+			break;
+		case VDIR:
+			dv = VTOSDEV(*vpp);
+			break;
+		default:
+			cmn_err(CE_PANIC, "devipnet_lookup: Unsupported node "
+			    "type: %p: %d", (void *)(*vpp), (*vpp)->v_type);
+			break;
+		}
+		ASSERT(SDEV_HELD(dv));
+	}
+
+	return (error);
+}
+
+static void
+devipnet_filldir_entry(const char *name, void *arg, dev_t dev)
+{
+	struct sdev_node *ddv = arg;
+	struct vattr vattr;
+	struct sdev_node *dv;
+
+	ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
+
+	if ((dv = sdev_cache_lookup(ddv, (char *)name)) == NULL) {
+		devipnet_fill_vattr(&vattr, dev);
+		if (sdev_mknode(ddv, (char *)name, &dv, &vattr, NULL, NULL,
+		    kcred, SDEV_READY) != 0)
+			return;
+	}
+	SDEV_SIMPLE_RELE(dv);
+}
+
+static void
+devipnet_filldir(struct sdev_node *ddv)
+{
+	sdev_node_t	*dv, *next;
+
+	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
+	if (rw_tryupgrade(&ddv->sdev_contents) == NULL) {
+		rw_exit(&ddv->sdev_contents);
+		rw_enter(&ddv->sdev_contents, RW_WRITER);
+	}
+
+	for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
+		next = SDEV_NEXT_ENTRY(ddv, dv);
+
+		/* validate and prune only ready nodes */
+		if (dv->sdev_state != SDEV_READY)
+			continue;
+		switch (devipnet_validate(dv)) {
+		case SDEV_VTOR_VALID:
+		case SDEV_VTOR_SKIP:
+			continue;
+		case SDEV_VTOR_INVALID:
+		case SDEV_VTOR_STALE:
+			sdcmn_err12(("devipnet_filldir: destroy invalid "
+			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
+			break;
+		}
+
+		if (SDEVTOV(dv)->v_count > 0)
+			continue;
+		SDEV_HOLD(dv);
+		/* remove the cache node */
+		(void) sdev_cache_update(ddv, &dv, dv->sdev_name,
+		    SDEV_CACHE_DELETE);
+	}
+
+	ipnet_walk_if(devipnet_filldir_entry, ddv, getzoneid());
+
+	rw_downgrade(&ddv->sdev_contents);
+}
+
+/*
+ * Display all instantiated ipnet device nodes.
+ */
+/* ARGSUSED */
+static int
+devipnet_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred,
+    int *eofp, caller_context_t *ct, int flags)
+{
+	struct sdev_node *sdvp = VTOSDEV(dvp);
+
+	if (uiop->uio_offset == 0)
+		devipnet_filldir(sdvp);
+
+	return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
+}
+
+/*
+ * We override lookup and readdir to build entries based on the
+ * in kernel ipnet table.
+ */
+const fs_operation_def_t devipnet_vnodeops_tbl[] = {
+	VOPNAME_READDIR,	{ .vop_readdir = devipnet_readdir },
+	VOPNAME_LOOKUP,		{ .vop_lookup = devipnet_lookup },
+	VOPNAME_CREATE,		{ .error = fs_nosys },
+	VOPNAME_REMOVE,		{ .error = fs_nosys },
+	VOPNAME_MKDIR,		{ .error = fs_nosys },
+	VOPNAME_RMDIR,		{ .error = fs_nosys },
+	VOPNAME_SYMLINK,	{ .error = fs_nosys },
+	VOPNAME_SETSECATTR,	{ .error = fs_nosys },
+	NULL,			NULL
+};
--- a/usr/src/uts/common/fs/dev/sdev_netops.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/fs/dev/sdev_netops.c	Thu Nov 06 06:47:54 2008 -0500
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * vnode ops for the /dev/net directory
  *
@@ -282,6 +280,7 @@
 		case SDEV_VTOR_SKIP:
 			continue;
 		case SDEV_VTOR_INVALID:
+		case SDEV_VTOR_STALE:
 			sdcmn_err12(("devnet_filldir: destroy invalid "
 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
 			break;
--- a/usr/src/uts/common/fs/dev/sdev_ptsops.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/fs/dev/sdev_ptsops.c	Thu Nov 06 06:47:54 2008 -0500
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * vnode ops for the /dev/pts directory
  *	The lookup is based on the internal pty table. We also
@@ -248,6 +246,7 @@
 		case SDEV_VTOR_SKIP:
 			continue;
 		case SDEV_VTOR_INVALID:
+		case SDEV_VTOR_STALE:
 			sdcmn_err7(("prunedir: destroy invalid "
 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
 			break;
--- a/usr/src/uts/common/fs/dev/sdev_subr.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/fs/dev/sdev_subr.c	Thu Nov 06 06:47:54 2008 -0500
@@ -614,6 +614,9 @@
 	{ "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
 	SDEV_DYNAMIC | SDEV_VTOR },
 
+	{ "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
+	devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
+
 	{ NULL, NULL, NULL, NULL, NULL, 0}
 };
 
@@ -2328,7 +2331,7 @@
 		}
 	}
 
-
+lookup_create_node:
 	/* first thread that is doing the lookup on this node */
 	if (!dv) {
 		if (!rw_tryupgrade(&ddv->sdev_contents)) {
@@ -2451,6 +2454,24 @@
 		switch (vtor(dv)) {
 		case SDEV_VTOR_VALID:
 			break;
+		case SDEV_VTOR_STALE:
+			/*
+			 * The name exists, but the cache entry is
+			 * stale and needs to be re-created.
+			 */
+			ASSERT(RW_READ_HELD(&ddv->sdev_contents));
+			if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
+				rw_exit(&ddv->sdev_contents);
+				rw_enter(&ddv->sdev_contents, RW_WRITER);
+			}
+			error = sdev_cache_update(ddv, &dv, nm,
+			    SDEV_CACHE_DELETE);
+			rw_downgrade(&ddv->sdev_contents);
+			if (error == 0) {
+				dv = NULL;
+				goto lookup_create_node;
+			}
+			/* FALLTHRU */
 		case SDEV_VTOR_INVALID:
 			SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
 			sdcmn_err7(("lookup: destroy invalid "
--- a/usr/src/uts/common/fs/dev/sdev_vtops.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/fs/dev/sdev_vtops.c	Thu Nov 06 06:47:54 2008 -0500
@@ -285,6 +285,7 @@
 		case SDEV_VTOR_SKIP:
 			continue;
 		case SDEV_VTOR_INVALID:
+		case SDEV_VTOR_STALE:
 			sdcmn_err7(("destroy invalid "
 			    "node: %s(%p)\n", dv->sdev_name, (void *)dv));
 			break;
--- a/usr/src/uts/common/inet/Makefile	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/Makefile	Thu Nov 06 06:47:54 2008 -0500
@@ -1,4 +1,4 @@
-#
+#                                                                               
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
@@ -18,10 +18,9 @@
 #
 # CDDL HEADER END
 #
+
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 # uts/common/inet/Makefile
@@ -29,12 +28,12 @@
 # include global definitions
 include ../../../Makefile.master
 
-HDRS=	arp.h arp_impl.h common.h ipclassifier.h ip.h ip6.h ipdrop.h ipsecah.h \
-	ipsecesp.h ipsec_info.h ip6_asp.h ip_if.h ip_ire.h ip_multi.h \
-	ip_netinfo.h ip_ndp.h ip_rts.h ipsec_impl.h keysock.h led.h mi.h \
-	mib2.h nd.h optcom.h sadb.h sctp_itf.h snmpcom.h tcp.h tcp_sack.h \
-	tcp_stack.h tun.h udp_impl.h rawip_impl.h ipp_common.h ip_ftable.h \
-	ip_impl.h tcp_impl.h wifi_ioctl.h ip_stack.h
+HDRS=	arp.h arp_impl.h common.h ipclassifier.h ip.h ip6.h ipdrop.h ipnet.h \
+	ipsecah.h ipsecesp.h ipsec_info.h ip6_asp.h ip_if.h ip_ire.h \
+	ip_multi.h ip_netinfo.h ip_ndp.h ip_rts.h ipsec_impl.h keysock.h \
+	led.h mi.h mib2.h nd.h optcom.h sadb.h sctp_itf.h snmpcom.h tcp.h \
+	tcp_sack.h tcp_stack.h tun.h udp_impl.h rawip_impl.h ipp_common.h \
+	ip_ftable.h ip_impl.h ip_stack.h tcp_impl.h wifi_ioctl.h
 
 ROOTDIRS= $(ROOT)/usr/include/inet
 
--- a/usr/src/uts/common/inet/arp/arp_netinfo.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/arp/arp_netinfo.c	Thu Nov 06 06:47:54 2008 -0500
@@ -46,6 +46,8 @@
 static int 	arp_getpmtuenabled(net_handle_t);
 static int 	arp_getlifaddr(net_handle_t, phy_if_t, lif_if_t, size_t,
 		    net_ifaddr_t [], void *);
+static int	arp_getlifzone(net_handle_t, phy_if_t, lif_if_t, zoneid_t *);
+static int	arp_getlifflags(net_handle_t, phy_if_t, lif_if_t, uint64_t *);
 static phy_if_t arp_phygetnext(net_handle_t, phy_if_t);
 static phy_if_t arp_phylookup(net_handle_t, const char *);
 static lif_if_t arp_lifgetnext(net_handle_t, phy_if_t, lif_if_t);
@@ -61,6 +63,8 @@
 	arp_getmtu,
 	arp_getpmtuenabled,
 	arp_getlifaddr,
+	arp_getlifzone,
+	arp_getlifflags,
 	arp_phygetnext,
 	arp_phylookup,
 	arp_lifgetnext,
@@ -348,3 +352,25 @@
 {
 	return (-1);
 }
+
+/*
+ * Unsupported with ARP.
+ */
+/*ARGSUSED*/
+static int
+arp_getlifzone(net_handle_t net, phy_if_t phy_ifdata, lif_if_t ifdata,
+    zoneid_t *zoneid)
+{
+	return (-1);
+}
+
+/*
+ * Unsupported with ARP.
+ */
+/*ARGSUSED*/
+static int
+arp_getlifflags(net_handle_t net, phy_if_t phy_ifdata, lif_if_t ifdata,
+    uint64_t *flags)
+{
+	return (-1);
+}
--- a/usr/src/uts/common/inet/ip.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip.h	Thu Nov 06 06:47:54 2008 -0500
@@ -1841,6 +1841,7 @@
 	mblk_t	*ill_bcast_mp;		/* DLPI header for broadcasts. */
 	mblk_t	*ill_resolver_mp;	/* Resolver template. */
 	mblk_t	*ill_unbind_mp;		/* unbind mp from ill_dl_up() */
+	mblk_t	*ill_promiscoff_mp;	/* for ill_leave_allmulti() */
 	mblk_t	*ill_dlpi_deferred;	/* b_next chain of control messages */
 	mblk_t	*ill_phys_addr_mp;	/* mblk which holds ill_phys_addr */
 #define	ill_last_mp_to_free	ill_phys_addr_mp
@@ -1869,7 +1870,8 @@
 
 		ill_note_link : 1,	/* supports link-up notification */
 		ill_capab_reneg : 1, /* capability renegotiation to be done */
-		ill_pad_to_bit_31 : 18;
+		ill_need_recover_multicast : 1,
+		ill_pad_to_bit_31 : 17;
 
 	/* Following bit fields protected by ill_lock */
 	uint_t
@@ -1889,11 +1891,6 @@
 	int	ill_arp_muxid;		/* muxid returned from plink for arp */
 	int	ill_ip_muxid;		/* muxid returned from plink for ip */
 
-	/*
-	 * NIC event information attached, to be used by nic event hooks.
-	 */
-	hook_nic_event_int_t	*ill_nic_event_info;
-
 	/* Used for IP frag reassembly throttling on a per ILL basis.  */
 	uint_t	ill_ipf_gen;		/* Generation of next fragment queue */
 	uint_t	ill_frag_count;		/* Count of all reassembly mblk bytes */
@@ -1993,6 +1990,7 @@
 	ip_stack_t	*ill_ipst;	/* Corresponds to a netstack_hold */
 	uint32_t	ill_dhcpinit;	/* IP_DHCPINIT_IFs for ill */
 	uint_t		ill_ilm_cnt;    /* ilms referencing this ill */
+	uint_t		ill_ipallmulti_cnt; /* ip_join_allmulti() calls */
 } ill_t;
 
 /*
@@ -2510,7 +2508,7 @@
 	uint32_t	ire_ihandle;	/* Associate interface IREs to cache */
 	ipif_t		*ire_ipif;	/* the interface that this ire uses */
 	uint32_t	ire_flags;	/* flags related to route (RTF_*) */
-	uint_t	ire_ipsec_overhead;	/* IPSEC overhead */
+	uint_t ire_ipsec_overhead;	/* IPSEC overhead */
 	/*
 	 * Neighbor Cache Entry for IPv6; arp info for IPv4
 	 */
@@ -3124,8 +3122,9 @@
 extern ill_t	*ill_first(int, int, ill_walk_context_t *, ip_stack_t *);
 extern ill_t	*ill_next(ill_walk_context_t *, ill_t *);
 extern void	ill_frag_timer_start(ill_t *);
-extern void	ill_nic_info_dispatch(ill_t *);
-extern void	ill_nic_info_plumb(ill_t *, boolean_t);
+extern void	ill_nic_event_dispatch(ill_t *, lif_if_t, nic_event_t,
+    nic_event_data_t, size_t);
+extern void	ill_nic_event_plumb(ill_t *, boolean_t);
 extern mblk_t	*ip_carve_mp(mblk_t **, ssize_t);
 extern mblk_t	*ip_dlpi_alloc(size_t, t_uscalar_t);
 extern char	*ip_dot_addr(ipaddr_t, char *);
@@ -3366,6 +3365,53 @@
 #endif
 
 /*
+ * IP observability hook support
+ */
+
+/*
+ * ipobs_hooktype_t describes the hook types supported
+ * by the ip module. IPOBS_HOOK_LOCAL refers to packets
+ * which are looped back internally within the ip module.
+ */
+
+typedef enum ipobs_hook_type {
+	IPOBS_HOOK_LOCAL,
+	IPOBS_HOOK_OUTBOUND,
+	IPOBS_HOOK_INBOUND
+} ipobs_hook_type_t;
+
+typedef void ipobs_cbfunc_t(mblk_t *);
+
+typedef struct ipobs_cb {
+	ipobs_cbfunc_t	*ipobs_cbfunc;
+	list_node_t	ipobs_cbnext;
+} ipobs_cb_t;
+
+/*
+ * This structure holds the data passed back from the ip module to
+ * observability consumers.
+ *
+ * ihd_mp	  Pointer to the IP packet.
+ * ihd_zsrc	  Source zoneid; set to ALL_ZONES when unknown.
+ * ihd_zdst	  Destination zoneid; set to ALL_ZONES when unknown.
+ * ihd_htype	  IPobs hook type, see above for the defined types.
+ * ihd_ipver	  IP version of the packet.
+ * ihd_ifindex	  Interface index that the packet was received/sent over.
+ *		  For local packets, this is the index of the interface
+ *		  associated with the local destination address.
+ * ihd_stack	  Netstack the packet is from.
+ */
+typedef struct ipobs_hook_data {
+	mblk_t			*ihd_mp;
+	zoneid_t		ihd_zsrc;
+	zoneid_t		ihd_zdst;
+	ipobs_hook_type_t	ihd_htype;
+	uint16_t		ihd_ipver;
+	uint64_t		ihd_ifindex;
+	netstack_t		*ihd_stack;
+} ipobs_hook_data_t;
+
+/*
  * Per-ILL Multidata Transmit capabilities.
  */
 struct ill_mdt_capab_s {
@@ -3482,7 +3528,10 @@
 extern int	ip_fill_mtuinfo(struct in6_addr *, in_port_t,
 	struct ip6_mtuinfo *, netstack_t *);
 extern	ipif_t *conn_get_held_ipif(conn_t *, ipif_t **, int *);
-
+extern void ipobs_register_hook(netstack_t *, ipobs_cbfunc_t *);
+extern void ipobs_unregister_hook(netstack_t *, ipobs_cbfunc_t *);
+extern void ipobs_hook(mblk_t *, int, zoneid_t, zoneid_t, const ill_t *, int,
+    uint32_t, ip_stack_t *);
 typedef void    (*ipsq_func_t)(ipsq_t *, queue_t *, mblk_t *, void *);
 
 /*
--- a/usr/src/uts/common/inet/ip/ip.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip/ip.c	Thu Nov 06 06:47:54 2008 -0500
@@ -791,6 +791,9 @@
 
 static void	ip_rput_process_forward(queue_t *, mblk_t *, ire_t *,
     ipha_t *, ill_t *, boolean_t);
+
+static void ipobs_init(ip_stack_t *);
+static void ipobs_fini(ip_stack_t *);
 ipaddr_t	ip_g_all_ones = IP_HOST_MASK;
 
 /* How long, in seconds, we allow frags to hang around. */
@@ -1230,10 +1233,10 @@
 	/* 146 */ { SIOCTMYSITE, sizeof (struct sioc_addrreq), 0,
 			MISC_CMD, ip_sioctl_tmysite, NULL },
 	/* 147 */ { SIOCGTUNPARAM, sizeof (struct iftun_req), IPI_REPL,
-			TUN_CMD, ip_sioctl_tunparam, NULL },
+		    TUN_CMD, ip_sioctl_tunparam, NULL },
 	/* 148 */ { SIOCSTUNPARAM, sizeof (struct iftun_req),
-			IPI_PRIV | IPI_WR,
-			TUN_CMD, ip_sioctl_tunparam, NULL },
+		    IPI_PRIV | IPI_WR,
+		    TUN_CMD, ip_sioctl_tunparam, NULL },
 
 	/* IPSECioctls handled in ip_sioctl_copyin_setup itself */
 	/* 149 */ { SIOCFIPSECONFIG, 0, IPI_PRIV, MISC_CMD, NULL, NULL },
@@ -4156,9 +4159,7 @@
 			if ((ipif->ipif_flags & IPIF_UP) &&
 			    !ipif->ipif_addr_ready) {
 				ipif_mask_reply(ipif);
-				ip_rts_ifmsg(ipif);
-				ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
-				sctp_update_ipif(ipif, SCTP_IPIF_UP);
+				ipif_up_notify(ipif);
 			}
 			ipif->ipif_addr_ready = 1;
 			ipif_refrele(ipif);
@@ -5829,6 +5830,7 @@
 	mutex_destroy(&ipst->ips_ip_addr_avail_lock);
 	rw_destroy(&ipst->ips_ill_g_lock);
 
+	ipobs_fini(ipst);
 	ip_ire_fini(ipst);
 	ip6_asp_free(ipst);
 	conn_drain_fini(ipst);
@@ -6033,6 +6035,7 @@
 	ipst->ips_ip_src_id = 1;
 	rw_init(&ipst->ips_srcid_lock, NULL, RW_DEFAULT, NULL);
 
+	ipobs_init(ipst);
 	ip_net_init(ipst, ns);
 	ipv4_hook_init(ipst);
 	ipv6_hook_init(ipst);
@@ -8532,7 +8535,6 @@
 			return;
 		}
 		case IRE_IF_NORESOLVER: {
-
 			if (dst_ill->ill_phys_addr_length != IP_ADDR_LEN &&
 			    dst_ill->ill_resolver_mp == NULL) {
 				ip1dbg(("ip_newroute: dst_ill %p "
@@ -14054,7 +14056,7 @@
 		 * may be queued depending on the availability
 		 * of transmit resources at the media layer.
 		 */
-		IP_DLS_ILL_TX(stq_ill, ipha, mp, ipst);
+		IP_DLS_ILL_TX(stq_ill, ipha, mp, ipst, hlen);
 	} else {
 		DTRACE_PROBE4(ip4__physical__out__start,
 		    ill_t *, NULL, ill_t *, stq_ill,
@@ -15147,6 +15149,18 @@
 			continue;
 		}
 
+		if (ipst->ips_ipobs_enabled) {
+			zoneid_t dzone;
+
+			/*
+			 * On the inbound path the src zone will be unknown as
+			 * this packet has come from the wire.
+			 */
+			dzone = ip_get_zoneid_v4(dst, mp, ipst, ALL_ZONES);
+			ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone,
+			    ill, IPV4_VERSION, 0, ipst);
+		}
+
 		/*
 		 * Reuse the cached ire only if the ipha_dst of the previous
 		 * packet is the same as the current packet AND it is not
@@ -15157,6 +15171,7 @@
 			ire_refrele(ire);
 			ire = NULL;
 		}
+
 		opt_len = ipha->ipha_version_and_hdr_length -
 		    IP_SIMPLE_HDR_VERSION;
 
@@ -15848,7 +15863,7 @@
 
 		mutex_enter(&ill->ill_lock);
 		ill->ill_dl_up = 1;
-		(void) ill_hook_event_create(ill, 0, NE_UP, NULL, 0);
+		ill_nic_event_dispatch(ill, 0, NE_UP, NULL, 0);
 		mutex_exit(&ill->ill_lock);
 
 		/*
@@ -16411,7 +16426,7 @@
 		iocp = (struct iocblk *)mp->b_rptr;
 
 		switch (iocp->ioc_cmd) {
-		int mode;
+			int mode;
 
 		case DL_IOC_HDR_INFO:
 			/*
@@ -20132,7 +20147,7 @@
 mblk_t *
 ip_unbind(queue_t *q, mblk_t *mp)
 {
-	conn_t	*connp = Q_TO_CONN(q);
+	conn_t  *connp = Q_TO_CONN(q);
 
 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
 
@@ -22670,6 +22685,19 @@
 		if (mp == NULL)
 			goto release_ire_and_ill;
 
+		if (ipst->ips_ipobs_enabled) {
+			zoneid_t szone;
+
+			/*
+			 * On the outbound path the destination zone will be
+			 * unknown as we're sending this packet out on the
+			 * wire.
+			 */
+			szone = ip_get_zoneid_v4(ipha->ipha_src, mp, ipst,
+			    ALL_ZONES);
+			ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, ALL_ZONES,
+			    ire->ire_ipif->ipif_ill, IPV4_VERSION, 0, ipst);
+		}
 		mp->b_prev = SET_BPREV_FLAG(IPP_LOCAL_OUT);
 		DTRACE_PROBE2(ip__xmit__1, mblk_t *, mp, ire_t *, ire);
 		pktxmit_state = ip_xmit_v4(mp, ire, NULL, B_TRUE);
@@ -25118,6 +25146,24 @@
 	if (first_mp == NULL)
 		return;
 
+	if (ipst->ips_ipobs_enabled) {
+		zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES;
+		zoneid_t stackzoneid = netstackid_to_zoneid(
+		    ipst->ips_netstack->netstack_stackid);
+
+		dzone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid;
+		/*
+		 * 127.0.0.1 is special, as we cannot lookup its zoneid by
+		 * address.  Restrict the lookup below to the destination zone.
+		 */
+		if (ipha->ipha_src == ntohl(INADDR_LOOPBACK))
+			lookup_zoneid = zoneid;
+		szone = ip_get_zoneid_v4(ipha->ipha_src, mp, ipst,
+		    lookup_zoneid);
+		ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill,
+		    IPV4_VERSION, 0, ipst);
+	}
+
 	DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *,
 	    ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL,
 	    int, 1);
@@ -25864,9 +25910,10 @@
 
 		DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, ipsec_mp);
 
-		if (ipsec_mp != NULL)
+		if (ipsec_mp != NULL) {
 			ip_wput_local_v6(RD(q), out_ill,
-			    ip6h, ipsec_mp, ire, 0);
+			    ip6h, ipsec_mp, ire, 0, zoneid);
+		}
 		if (ire_need_rele)
 			ire_refrele(ire);
 		return;
@@ -29381,12 +29428,8 @@
 			else
 				ill = phyi->phyint_illv6;
 
-			if (ill != NULL) {
-				mutex_enter(&ill->ill_lock);
-				ill_nic_info_plumb(ill, B_TRUE);
-				ill_nic_info_dispatch(ill);
-				mutex_exit(&ill->ill_lock);
-			}
+			if (ill != NULL)
+				ill_nic_event_plumb(ill, B_TRUE);
 		}
 	}
 	rw_exit(&ipst->ips_ill_g_lock);
@@ -30202,3 +30245,169 @@
 	ipp->ipp_fields &= ~(IPPF_HOPOPTS | IPPF_RTDSTOPTS | IPPF_DSTOPTS |
 	    IPPF_RTHDR);
 }
+
+zoneid_t
+ip_get_zoneid_v4(ipaddr_t addr, mblk_t *mp, ip_stack_t *ipst,
+    zoneid_t lookup_zoneid)
+{
+	ire_t		*ire;
+	int		ire_flags = MATCH_IRE_TYPE;
+	zoneid_t	zoneid = ALL_ZONES;
+
+	if (is_system_labeled() && !tsol_can_accept_raw(mp, B_FALSE))
+		return (ALL_ZONES);
+
+	if (lookup_zoneid != ALL_ZONES)
+		ire_flags |= MATCH_IRE_ZONEONLY;
+	ire = ire_ctable_lookup(addr, NULL, IRE_LOCAL | IRE_LOOPBACK, NULL,
+	    lookup_zoneid, NULL, ire_flags, ipst);
+	if (ire != NULL) {
+		zoneid = IP_REAL_ZONEID(ire->ire_zoneid, ipst);
+		ire_refrele(ire);
+	}
+	return (zoneid);
+}
+
+zoneid_t
+ip_get_zoneid_v6(in6_addr_t *addr, mblk_t *mp, const ill_t *ill,
+    ip_stack_t *ipst, zoneid_t lookup_zoneid)
+{
+	ire_t		*ire;
+	int		ire_flags = MATCH_IRE_TYPE;
+	zoneid_t	zoneid = ALL_ZONES;
+	ipif_t		*ipif_arg = NULL;
+
+	if (is_system_labeled() && !tsol_can_accept_raw(mp, B_FALSE))
+		return (ALL_ZONES);
+
+	if (IN6_IS_ADDR_LINKLOCAL(addr)) {
+		ire_flags |= MATCH_IRE_ILL_GROUP;
+		ipif_arg = ill->ill_ipif;
+	}
+	if (lookup_zoneid != ALL_ZONES)
+		ire_flags |= MATCH_IRE_ZONEONLY;
+	ire = ire_ctable_lookup_v6(addr, NULL, IRE_LOCAL | IRE_LOOPBACK,
+	    ipif_arg, lookup_zoneid, NULL, ire_flags, ipst);
+	if (ire != NULL) {
+		zoneid = IP_REAL_ZONEID(ire->ire_zoneid, ipst);
+		ire_refrele(ire);
+	}
+	return (zoneid);
+}
+
+/*
+ * IP obserability hook support functions.
+ */
+
+static void
+ipobs_init(ip_stack_t *ipst)
+{
+	ipst->ips_ipobs_enabled = B_FALSE;
+	list_create(&ipst->ips_ipobs_cb_list, sizeof (ipobs_cb_t),
+	    offsetof(ipobs_cb_t, ipobs_cbnext));
+	mutex_init(&ipst->ips_ipobs_cb_lock, NULL, MUTEX_DEFAULT, NULL);
+	ipst->ips_ipobs_cb_nwalkers = 0;
+	cv_init(&ipst->ips_ipobs_cb_cv, NULL, CV_DRIVER, NULL);
+}
+
+static void
+ipobs_fini(ip_stack_t *ipst)
+{
+	ipobs_cb_t *cb;
+
+	mutex_enter(&ipst->ips_ipobs_cb_lock);
+	while (ipst->ips_ipobs_cb_nwalkers != 0)
+		cv_wait(&ipst->ips_ipobs_cb_cv, &ipst->ips_ipobs_cb_lock);
+
+	while ((cb = list_head(&ipst->ips_ipobs_cb_list)) != NULL) {
+		list_remove(&ipst->ips_ipobs_cb_list, cb);
+		kmem_free(cb, sizeof (*cb));
+	}
+	list_destroy(&ipst->ips_ipobs_cb_list);
+	mutex_exit(&ipst->ips_ipobs_cb_lock);
+	mutex_destroy(&ipst->ips_ipobs_cb_lock);
+	cv_destroy(&ipst->ips_ipobs_cb_cv);
+}
+
+void
+ipobs_hook(mblk_t *mp, int htype, zoneid_t zsrc, zoneid_t zdst,
+    const ill_t *ill, int ipver, uint32_t hlen, ip_stack_t *ipst)
+{
+	ipobs_cb_t *ipobs_cb;
+
+	ASSERT(DB_TYPE(mp) == M_DATA);
+
+	mutex_enter(&ipst->ips_ipobs_cb_lock);
+	ipst->ips_ipobs_cb_nwalkers++;
+	mutex_exit(&ipst->ips_ipobs_cb_lock);
+	for (ipobs_cb = list_head(&ipst->ips_ipobs_cb_list); ipobs_cb != NULL;
+	    ipobs_cb = list_next(&ipst->ips_ipobs_cb_list, ipobs_cb)) {
+		mblk_t  *mp2 = allocb(sizeof (ipobs_hook_data_t),
+		    BPRI_HI);
+		if (mp2 != NULL) {
+			ipobs_hook_data_t *ihd =
+			    (ipobs_hook_data_t *)mp2->b_rptr;
+			if (((ihd->ihd_mp = dupmsg(mp)) == NULL) &&
+			    ((ihd->ihd_mp = copymsg(mp)) == NULL)) {
+				freemsg(mp2);
+				continue;
+			}
+			ihd->ihd_mp->b_rptr += hlen;
+			ihd->ihd_htype = htype;
+			ihd->ihd_ipver = ipver;
+			ihd->ihd_zsrc = zsrc;
+			ihd->ihd_zdst = zdst;
+			ihd->ihd_ifindex = ill->ill_phyint->phyint_ifindex;
+			ihd->ihd_stack = ipst->ips_netstack;
+			mp2->b_wptr += sizeof (*ihd);
+			ipobs_cb->ipobs_cbfunc(mp2);
+		}
+	}
+	mutex_enter(&ipst->ips_ipobs_cb_lock);
+	ipst->ips_ipobs_cb_nwalkers--;
+	if (ipst->ips_ipobs_cb_nwalkers == 0)
+		cv_broadcast(&ipst->ips_ipobs_cb_cv);
+	mutex_exit(&ipst->ips_ipobs_cb_lock);
+}
+
+void
+ipobs_register_hook(netstack_t *ns, pfv_t func)
+{
+	ipobs_cb_t   *cb;
+	ip_stack_t *ipst = ns->netstack_ip;
+
+	cb = kmem_alloc(sizeof (*cb), KM_SLEEP);
+
+	mutex_enter(&ipst->ips_ipobs_cb_lock);
+	while (ipst->ips_ipobs_cb_nwalkers != 0)
+		cv_wait(&ipst->ips_ipobs_cb_cv, &ipst->ips_ipobs_cb_lock);
+	ASSERT(ipst->ips_ipobs_cb_nwalkers == 0);
+
+	cb->ipobs_cbfunc = func;
+	list_insert_head(&ipst->ips_ipobs_cb_list, cb);
+	ipst->ips_ipobs_enabled = B_TRUE;
+	mutex_exit(&ipst->ips_ipobs_cb_lock);
+}
+
+void
+ipobs_unregister_hook(netstack_t *ns, pfv_t func)
+{
+	ipobs_cb_t	*curcb;
+	ip_stack_t	*ipst = ns->netstack_ip;
+
+	mutex_enter(&ipst->ips_ipobs_cb_lock);
+	while (ipst->ips_ipobs_cb_nwalkers != 0)
+		cv_wait(&ipst->ips_ipobs_cb_cv, &ipst->ips_ipobs_cb_lock);
+
+	for (curcb = list_head(&ipst->ips_ipobs_cb_list); curcb != NULL;
+	    curcb = list_next(&ipst->ips_ipobs_cb_list, curcb)) {
+		if (func == curcb->ipobs_cbfunc) {
+			list_remove(&ipst->ips_ipobs_cb_list, curcb);
+			kmem_free(curcb, sizeof (*curcb));
+			break;
+		}
+	}
+	if (list_is_empty(&ipst->ips_ipobs_cb_list))
+		ipst->ips_ipobs_enabled = B_FALSE;
+	mutex_exit(&ipst->ips_ipobs_cb_lock);
+}
--- a/usr/src/uts/common/inet/ip/ip6.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip/ip6.c	Thu Nov 06 06:47:54 2008 -0500
@@ -3259,9 +3259,8 @@
 			if (!IPCL_IS_IPTUN(connp) &&
 			    (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
 			    secure)) {
-				first_mp1 = ipsec_check_inbound_policy
-				    (first_mp1, connp, NULL, ip6h,
-				    mctl_present);
+				first_mp1 = ipsec_check_inbound_policy(
+				    first_mp1, connp, NULL, ip6h, mctl_present);
 			}
 			if (first_mp1 != NULL) {
 				if (mctl_present)
@@ -6855,6 +6854,26 @@
 	if (first_mp == NULL)
 		return;
 
+	/*
+	 * Attach any necessary label information to this packet.
+	 */
+	if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) {
+		if (ip6opt_ls != 0)
+			ip0dbg(("tsol_get_pkt_label v6 failed\n"));
+		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
+		goto discard;
+	}
+
+	/* IP observability hook. */
+	if (ipst->ips_ipobs_enabled) {
+		zoneid_t dzone;
+
+		dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst,
+		    ALL_ZONES);
+		ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill,
+		    IPV6_VERSION, 0, ipst);
+	}
+
 	if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) ==
 	    IPV6_DEFAULT_VERS_AND_FLOW) {
 		/*
@@ -7285,18 +7304,6 @@
 	}
 
 	/*
-	 * Attach any necessary label information to this packet.
-	 */
-	if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) {
-		if (ip6opt_ls != 0)
-			ip0dbg(("tsol_get_pkt_label v6 failed\n"));
-		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
-		freemsg(hada_mp);
-		freemsg(first_mp);
-		return;
-	}
-
-	/*
 	 * On incoming v6 multicast packets we will bypass the ire table,
 	 * and assume that the read queue corresponds to the targetted
 	 * interface.
@@ -10519,7 +10526,7 @@
  */
 void
 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp,
-    ire_t *ire, int fanout_flags)
+    ire_t *ire, int fanout_flags, zoneid_t zoneid)
 {
 	uint32_t	ports;
 	mblk_t		*mp = first_mp, *first_mp1;
@@ -10569,6 +10576,25 @@
 	if (first_mp == NULL)
 		return;
 
+	if (ipst->ips_ipobs_enabled) {
+		zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES;
+		zoneid_t stackzoneid = netstackid_to_zoneid(
+		    ipst->ips_netstack->netstack_stackid);
+
+		szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid;
+		/*
+		 * ::1 is special, as we cannot lookup its zoneid by
+		 * address.  For this case, restrict the lookup to the
+		 * source zone.
+		 */
+		if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst))
+			lookup_zoneid = zoneid;
+		dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst,
+		    lookup_zoneid);
+		ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill,
+		    IPV6_VERSION, 0, ipst);
+	}
+
 	DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *,
 	    ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h,
 	    int, 1);
@@ -11007,8 +11033,8 @@
 						 * disabled.
 						 */
 						ip_wput_local_v6(RD(q), ill,
-						    nip6h, nmp,
-						    ire, fanout_flags);
+						    nip6h, nmp, ire,
+						    fanout_flags, zoneid);
 					}
 				} else {
 					BUMP_MIB(mibptr, ipIfStatsOutDiscards);
@@ -11437,8 +11463,10 @@
 		    ipst->ips_ipv6firewall_loopback_out,
 		    NULL, ill, ip6h, first_mp, mp, 0, ipst);
 		DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp);
-		if (first_mp != NULL)
-			ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0);
+		if (first_mp != NULL) {
+			ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0,
+			    zoneid);
+		}
 	}
 }
 
@@ -12000,6 +12028,8 @@
 	boolean_t	multirt_send = B_FALSE;
 	mblk_t		*next_mp = NULL;
 	ip_stack_t	*ipst = ire->ire_ipst;
+	boolean_t	fp_prepend = B_FALSE;
+	uint32_t	hlen;
 
 	ip6h = (ip6_t *)mp->b_rptr;
 	ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6));
@@ -12201,7 +12231,6 @@
 			ASSERT(nce->nce_ipversion != IPV4_VERSION);
 			mutex_enter(&nce->nce_lock);
 			if ((mp1 = nce->nce_fp_mp) != NULL) {
-				uint32_t hlen;
 				uchar_t	*rptr;
 
 				hlen = MBLKL(mp1);
@@ -12237,6 +12266,7 @@
 					 */
 					bcopy(mp1->b_rptr, rptr, hlen);
 					mutex_exit(&nce->nce_lock);
+					fp_prepend = B_TRUE;
 				}
 			} else {
 				/*
@@ -12316,6 +12346,16 @@
 				}
 			}
 
+			if (ipst->ips_ipobs_enabled) {
+				zoneid_t	szone;
+
+				szone = ip_get_zoneid_v6(&ip6h->ip6_src,
+				    mp_ip6h, out_ill, ipst, ALL_ZONES);
+				ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone,
+				    ALL_ZONES, out_ill, IPV6_VERSION,
+				    fp_prepend ? hlen : 0, ipst);
+			}
+
 			/*
 			 * Update ire and MIB counters; for save_ire, this has
 			 * been done by the caller.
--- a/usr/src/uts/common/inet/ip/ip6_if.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip/ip6_if.c	Thu Nov 06 06:47:54 2008 -0500
@@ -26,8 +26,6 @@
  * Copyright (c) 1990 Mentat Inc.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * This file contains the interface control functions for IPv6.
  */
@@ -3148,7 +3146,7 @@
 	ipif_saved_ire_cnt = ipif->ipif_saved_ire_cnt;
 	ipif_saved_irep = ipif_recover_ire_v6(ipif);
 
-	if (ipif->ipif_ipif_up_count == 1 && !loopback) {
+	if (ill->ill_need_recover_multicast) {
 		/*
 		 * Need to recover all multicast memberships in the driver.
 		 * This had to be deferred until we had attached.
@@ -3187,11 +3185,8 @@
 		}
 	}
 
-	if (ipif->ipif_addr_ready) {
-		ip_rts_ifmsg(ipif);
-		ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
-		sctp_update_ipif(ipif, SCTP_IPIF_UP);
-	}
+	if (ipif->ipif_addr_ready)
+		ipif_up_notify(ipif);
 
 	if (ipif_saved_irep != NULL) {
 		kmem_free(ipif_saved_irep,
@@ -3200,6 +3195,7 @@
 
 	if (src_ipif_held)
 		ipif_refrele(src_ipif);
+
 	return (0);
 
 bad:
--- a/usr/src/uts/common/inet/ip/ip_if.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip/ip_if.c	Thu Nov 06 06:47:54 2008 -0500
@@ -44,7 +44,6 @@
 #include <sys/sunldi.h>
 #include <sys/file.h>
 #include <sys/bitmap.h>
-
 #include <sys/kmem.h>
 #include <sys/systm.h>
 #include <sys/param.h>
@@ -252,7 +251,6 @@
 static ill_t	*ill_prev_usesrc(ill_t *);
 static int	ill_relink_usesrc_ills(ill_t *, ill_t *, uint_t);
 static void	ill_disband_usesrc_group(ill_t *);
-
 static void	conn_cleanup_stale_ire(conn_t *, caddr_t);
 
 #ifdef DEBUG
@@ -491,7 +489,7 @@
 
 static uchar_t	ip_six_byte_all_ones[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
 
-static ip_m_t	ip_m_tbl[] = {
+static ip_m_t   ip_m_tbl[] = {
 	{ DL_ETHER, IFT_ETHER, ip_ether_v4mapinfo, ip_ether_v6mapinfo,
 	    ip_ether_v6intfid },
 	{ DL_CSMACD, IFT_ISO88023, ip_ether_v4mapinfo, ip_ether_v6mapinfo,
@@ -751,6 +749,12 @@
 	reset_conn_ill(ill);
 
 	/*
+	 * Remove multicast references added as a result of calls to
+	 * ip_join_allmulti().
+	 */
+	ip_purge_allmulti(ill);
+
+	/*
 	 * ill_down will arrange to blow off any IRE's dependent on this
 	 * ILL, and shut down fragmentation reassembly.
 	 */
@@ -4343,28 +4347,11 @@
 		ill->ill_ppa = UINT_MAX;
 	}
 
-	/*
-	 * Run the unplumb hook after the NIC has disappeared from being
-	 * visible so that attempts to revalidate its existance will fail.
-	 *
-	 * This needs to be run inside the ill_g_lock perimeter to ensure
-	 * that the ordering of delivered events to listeners matches the
-	 * order of them in the kernel.
-	 */
-	mutex_enter(&ill->ill_lock);
-	ill_nic_info_dispatch(ill);
-	mutex_exit(&ill->ill_lock);
-
-	/* Generate NE_UNPLUMB event for ill_name. */
-	(void) ill_hook_event_create(ill, 0, NE_UNPLUMB, ill->ill_name,
+	/* Generate one last event for this ill. */
+	ill_nic_event_dispatch(ill, 0, NE_UNPLUMB, ill->ill_name,
 	    ill->ill_name_length);
 
 	ill_phyint_free(ill);
-
-	mutex_enter(&ill->ill_lock);
-	ill_nic_info_dispatch(ill);
-	mutex_exit(&ill->ill_lock);
-
 	rw_exit(&ipst->ips_ill_g_lock);
 }
 
@@ -5090,6 +5077,8 @@
 		*error = 0;
 	*did_alloc = B_TRUE;
 	rw_exit(&ipst->ips_ill_g_lock);
+	ill_nic_event_dispatch(ill, MAP_IPIF_ID(ill->ill_ipif->ipif_id),
+	    NE_PLUMB, ill->ill_name, ill->ill_name_length);
 	return (ill);
 done:
 	if (ill != NULL) {
@@ -6277,27 +6266,9 @@
 static boolean_t
 ipif_is_freeable(ipif_t *ipif)
 {
-
-	ill_t *ill;
-
 	ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock));
-
-	if (ipif->ipif_refcnt != 0 || !IPIF_FREE_OK(ipif)) {
-		return (B_FALSE);
-	}
-
-	ill = ipif->ipif_ill;
-	if (ill->ill_ipif_up_count != 0 || ill->ill_ipif_dup_count != 0 ||
-	    ill->ill_logical_down) {
-		return (B_TRUE);
-	}
-
-	/* This is the last ipif going down or being deleted on this ill */
-	if (!ILL_FREE_OK(ill) || ill->ill_refcnt != 0) {
-		return (B_FALSE);
-	}
-
-	return (B_TRUE);
+	ASSERT(ipif->ipif_id != 0);
+	return (ipif->ipif_refcnt == 0 && IPIF_FREE_OK(ipif));
 }
 
 /*
@@ -8039,8 +8010,6 @@
 		mutex_enter(&ill->ill_lock);
 		dlpi_pending = ill->ill_dlpi_pending;
 		ipif->ipif_state_flags &= ~IPIF_CHANGING;
-		/* Send any queued event */
-		ill_nic_info_dispatch(ill);
 		mutex_exit(&ill->ill_lock);
 	}
 
@@ -8193,13 +8162,13 @@
 {
 	boolean_t exists;
 	struct iftun_req *ta;
-	ipif_t	*ipif;
-	ill_t	*ill;
+	ipif_t  *ipif;
+	ill_t   *ill;
 	boolean_t isv6;
-	mblk_t	*mp1;
-	int	error;
-	conn_t	*connp;
-	ip_stack_t	*ipst;
+	mblk_t  *mp1;
+	int error;
+	conn_t  *connp;
+	ip_stack_t  *ipst;
 
 	/* Existence verified in ip_wput_nondata */
 	mp1 = mp->b_cont->b_cont;
@@ -9427,7 +9396,7 @@
 ip_sioctl_tunparam(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
     ip_ioctl_cmd_t *ipip, void *dummy_ifreq)
 {
-	ill_t  		*ill;
+	ill_t		*ill;
 	mblk_t		*mp1;
 	conn_t		*connp;
 	boolean_t	success;
@@ -11224,7 +11193,7 @@
 	 * Don't attach nic event message for SIOCLIFADDIF ioctl.
 	 */
 	if (iocp != NULL && iocp->ioc_cmd != SIOCLIFADDIF) {
-		(void) ill_hook_event_create(ill, MAP_IPIF_ID(ipif->ipif_id),
+		ill_nic_event_dispatch(ill, MAP_IPIF_ID(ipif->ipif_id),
 		    NE_ADDRESS_CHANGE, sin, sinlen);
 	}
 
@@ -13920,9 +13889,7 @@
 		 * DAD completion would have done, and continue.
 		 */
 		ipif_mask_reply(ipif);
-		ip_rts_ifmsg(ipif);
-		ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
-		sctp_update_ipif(ipif, SCTP_IPIF_UP);
+		ipif_up_notify(ipif);
 		ipif->ipif_addr_ready = 1;
 		return;
 	}
@@ -13949,9 +13916,7 @@
 		 * problem.  Just send out the routing socket notification that
 		 * DAD completion would have done, and continue.
 		 */
-		ip_rts_ifmsg(ipif);
-		ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
-		sctp_update_ipif(ipif, SCTP_IPIF_UP);
+		ipif_up_notify(ipif);
 		ipif->ipif_addr_ready = 1;
 	}
 	NCE_REFRELE(nce);
@@ -15661,7 +15626,7 @@
 	for (ill = illgrp->illgrp_ill; ill != NULL;
 	    ill = ill->ill_group_next) {
 		if (ill->ill_join_allmulti)
-			(void) ip_leave_allmulti(ill->ill_ipif);
+			ill_leave_allmulti(ill);
 	}
 
 	/*
@@ -15671,13 +15636,9 @@
 	 * one of them is failed and another is a good one and
 	 * the good one (not marked inactive) is leaving the group.
 	 */
-	ret = 0;
-	for (ill = illgrp->illgrp_ill; ill != NULL;
-	    ill = ill->ill_group_next) {
-		/* Never pick an offline interface */
+	for (ill = illgrp->illgrp_ill; ill != NULL; ill = ill->ill_group_next) {
 		if (ill->ill_phyint->phyint_flags & PHYI_OFFLINE)
 			continue;
-
 		if (ill->ill_phyint->phyint_flags & PHYI_FAILED) {
 			fallback_failed_ill = ill;
 			continue;
@@ -15688,11 +15649,11 @@
 		}
 		for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
 			if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
-				ret = ip_join_allmulti(ill->ill_ipif);
+				ret = ill_join_allmulti(ill);
 				/*
-				 * ip_join_allmulti can fail because of memory
-				 * failures. So, make sure we join at least
-				 * on one ill.
+				 * ill_join_allmulti() can fail because of
+				 * memory failures so make sure we join at
+				 * least on one ill.
 				 */
 				if (ill->ill_join_allmulti)
 					return (0);
@@ -15709,17 +15670,13 @@
 	}
 	if ((ill = fallback_inactive_ill) != NULL) {
 		for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
-			if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
-				ret = ip_join_allmulti(ill->ill_ipif);
-				return (ret);
-			}
+			if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr))
+				return (ill_join_allmulti(ill));
 		}
 	} else if ((ill = fallback_failed_ill) != NULL) {
 		for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
-			if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
-				ret = ip_join_allmulti(ill->ill_ipif);
-				return (ret);
-			}
+			if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr))
+				return (ill_join_allmulti(ill));
 		}
 	}
 	return (0);
@@ -15816,7 +15773,7 @@
 	} else {
 		for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
 			if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
-				(void) ip_join_allmulti(ill->ill_ipif);
+				(void) ill_join_allmulti(ill);
 				break;
 			}
 		}
@@ -16116,8 +16073,8 @@
 		/*
 		 * When ipif_up_done() calls this function, the multicast
 		 * groups have not been joined yet. So, there is no point in
-		 * nomination. ip_join_allmulti will handle groups when
-		 * ill_recover_multicast is called from ipif_up_done() later.
+		 * nomination. ill_join_allmulti() will handle groups when
+		 * ill_recover_multicast() is called from ipif_up_done() later.
 		 */
 		(void) ill_nominate_mcast_rcv(illgrp);
 		/*
@@ -16567,7 +16524,7 @@
 		if (ipst->ips_ipmp_hook_emulation &&
 		    phyi_tmp != NULL) {
 			/* First phyint in group - group PLUMB event */
-			ill_nic_info_plumb(ill, B_TRUE);
+			ill_nic_event_plumb(ill, B_TRUE);
 		}
 		mutex_exit(&phyi->phyint_lock);
 		RELEASE_ILL_LOCKS(ill_v4, ill_v6);
@@ -16799,12 +16756,12 @@
 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
 			/*
 			 * There is no need to hold ill locks as we are
-			 * writer on both ills and when ill_join_allmulti
-			 * is changed the thread is always a writer.
+			 * writer on both ills and when ill_join_allmulti()
+			 * is called the thread is always a writer.
 			 */
 			if (from_ill->ill_join_allmulti &&
 			    !to_ill->ill_join_allmulti) {
-				(void) ip_join_allmulti(to_ill->ill_ipif);
+				(void) ill_join_allmulti(to_ill);
 			}
 		} else if (ilm->ilm_notify_driver) {
 
@@ -16844,7 +16801,7 @@
 
 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
 			if (from_ill->ill_join_allmulti)
-				(void) ip_leave_allmulti(from_ill->ill_ipif);
+				ill_leave_allmulti(from_ill);
 		} else if (ilm_numentries_v6(from_ill, &ilm->ilm_v6addr) == 0) {
 			(void) ip_ll_send_disabmulti_req(from_ill,
 			    &ilm->ilm_v6addr);
@@ -18314,7 +18271,7 @@
 
 	mutex_enter(&ill->ill_lock);
 	ill->ill_dl_up = 0;
-	(void) ill_hook_event_create(ill, 0, NE_DOWN, NULL, 0);
+	ill_nic_event_dispatch(ill, 0, NE_DOWN, NULL, 0);
 	mutex_exit(&ill->ill_lock);
 }
 
@@ -18707,6 +18664,8 @@
 		ipif_was_up = B_TRUE;
 		/* Update status in SCTP's list */
 		sctp_update_ipif(ipif, SCTP_IPIF_DOWN);
+		ill_nic_event_dispatch(ipif->ipif_ill,
+		    MAP_IPIF_ID(ipif->ipif_id), NE_LIF_DOWN, NULL, 0);
 	}
 
 	/*
@@ -20480,11 +20439,18 @@
 
 	}
 
-	/* This is the first interface on this ill */
-	if (ipif->ipif_ipif_up_count == 1 && !loopback) {
+	if (ill->ill_need_recover_multicast) {
 		/*
 		 * Need to recover all multicast memberships in the driver.
-		 * This had to be deferred until we had attached.
+		 * This had to be deferred until we had attached.  The same
+		 * code exists in ipif_up_done_v6() to recover IPv6
+		 * memberships.
+		 *
+		 * Note that it would be preferable to unconditionally do the
+		 * ill_recover_multicast() in ill_dl_up(), but we cannot do
+		 * that since ill_join_allmulti() depends on ill_dl_up being
+		 * set, and it is not set until we receive a DL_BIND_ACK after
+		 * having called ill_dl_up().
 		 */
 		ill_recover_multicast(ill);
 	}
@@ -20537,12 +20503,8 @@
 	 * been validated.  Otherwise, if it isn't ready yet, wait for
 	 * duplicate address detection to do its thing.
 	 */
-	if (ipif->ipif_addr_ready) {
-		ip_rts_ifmsg(ipif);
-		ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
-		/* Let SCTP update the status for this ipif */
-		sctp_update_ipif(ipif, SCTP_IPIF_UP);
-	}
+	if (ipif->ipif_addr_ready)
+		ipif_up_notify(ipif);
 	return (0);
 
 bad:
@@ -22774,7 +22736,7 @@
 		 * for the ill_names should ipmp_hook_emulation be turned on
 		 * later.
 		 */
-		ill_nic_info_plumb(ill, B_FALSE);
+		ill_nic_event_plumb(ill, B_FALSE);
 	}
 	RELEASE_ILL_LOCKS(ill, ill_other);
 	mutex_exit(&phyi->phyint_lock);
@@ -22786,7 +22748,7 @@
  * It will be sent when we leave the ipsq.
  */
 void
-ill_nic_info_plumb(ill_t *ill, boolean_t group)
+ill_nic_event_plumb(ill_t *ill, boolean_t group)
 {
 	phyint_t	*phyi = ill->ill_phyint;
 	char		*name;
@@ -22803,33 +22765,7 @@
 		name = ill->ill_name;
 	}
 
-	(void) ill_hook_event_create(ill, 0, NE_PLUMB, name, namelen);
-}
-
-/*
- * Unhook the nic event message from the ill and enqueue it
- * into the nic event taskq.
- */
-void
-ill_nic_info_dispatch(ill_t *ill)
-{
-	hook_nic_event_int_t *info;
-
-	ASSERT(MUTEX_HELD(&ill->ill_lock));
-
-	if ((info = ill->ill_nic_event_info) != NULL) {
-		if (ddi_taskq_dispatch(eventq_queue_nic,
-		    ip_ne_queue_func, info, DDI_SLEEP) == DDI_FAILURE) {
-			ip2dbg(("ill_nic_info_dispatch: "
-			    "ddi_taskq_dispatch failed\n"));
-			if (info->hnei_event.hne_data != NULL) {
-				kmem_free(info->hnei_event.hne_data,
-				    info->hnei_event.hne_datalen);
-			}
-			kmem_free(info, sizeof (*info));
-		}
-		ill->ill_nic_event_info = NULL;
-	}
+	ill_nic_event_dispatch(ill, 0, NE_PLUMB, name, namelen);
 }
 
 /*
@@ -23089,14 +23025,21 @@
 	 */
 	if (ill->ill_sap == 0) {
 		if (ill->ill_isv6)
-			ill->ill_sap  = IP6_DL_SAP;
+			ill->ill_sap = IP6_DL_SAP;
 		else
-			ill->ill_sap  = IP_DL_SAP;
+			ill->ill_sap = IP_DL_SAP;
 	}
 
 	ill->ill_ifname_pending = 1;
 	ill->ill_ifname_pending_err = 0;
 
+	/*
+	 * When the first ipif comes up in ipif_up_done(), multicast groups
+	 * that were joined while this ill was not bound to the DLPI link need
+	 * to be recovered by ill_recover_multicast().
+	 */
+	ill->ill_need_recover_multicast = 1;
+
 	ill_refhold(ill);
 	rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
 	if ((error = ill_glist_insert(ill, interf_name,
@@ -24443,51 +24386,27 @@
 		return ("DOWN");
 	case NE_ADDRESS_CHANGE:
 		return ("ADDRESS_CHANGE");
+	case NE_LIF_UP:
+		return ("LIF_UP");
+	case NE_LIF_DOWN:
+		return ("LIF_DOWN");
 	default:
 		return ("UNKNOWN");
 	}
 }
 
-static void
-ill_hook_event_destroy(ill_t *ill)
-{
-	hook_nic_event_int_t	*info;
-
-	if ((info = ill->ill_nic_event_info) != NULL) {
-		if (info->hnei_event.hne_data != NULL) {
-			kmem_free(info->hnei_event.hne_data,
-			    info->hnei_event.hne_datalen);
-		}
-		kmem_free(info, sizeof (*info));
-
-		ill->ill_nic_event_info = NULL;
-	}
-
-}
-
-boolean_t
-ill_hook_event_create(ill_t *ill, lif_if_t lif, nic_event_t event,
+void
+ill_nic_event_dispatch(ill_t *ill, lif_if_t lif, nic_event_t event,
     nic_event_data_t data, size_t datalen)
 {
 	ip_stack_t		*ipst = ill->ill_ipst;
 	hook_nic_event_int_t	*info;
 	const char		*str = NULL;
 
-	/* destroy nic event info if it exists */
-	if ((info = ill->ill_nic_event_info) != NULL) {
-		str = ill_hook_event2str(info->hnei_event.hne_event);
-		ip2dbg(("ill_hook_event_create: unexpected nic event %s "
-		    "attached for %s\n", str, ill->ill_name));
-		ill_hook_event_destroy(ill);
-	}
-
 	/* create a new nic event info */
-	info = kmem_alloc(sizeof (*info), KM_NOSLEEP);
-	if (info == NULL)
+	if ((info = kmem_alloc(sizeof (*info), KM_NOSLEEP)) == NULL)
 		goto fail;
 
-	ill->ill_nic_event_info = info;
-
 	if (event == NE_UNPLUMB)
 		info->hnei_event.hne_nic = ill->ill_phyint->phyint_ifindex;
 	else
@@ -24502,19 +24421,35 @@
 
 	if (data != NULL && datalen != 0) {
 		info->hnei_event.hne_data = kmem_alloc(datalen, KM_NOSLEEP);
-		if (info->hnei_event.hne_data != NULL) {
-			bcopy(data, info->hnei_event.hne_data, datalen);
-			info->hnei_event.hne_datalen = datalen;
-		} else {
-			ill_hook_event_destroy(ill);
+		if (info->hnei_event.hne_data == NULL)
 			goto fail;
-		}
-	}
-
-	return (B_TRUE);
+		bcopy(data, info->hnei_event.hne_data, datalen);
+		info->hnei_event.hne_datalen = datalen;
+	}
+
+	if (ddi_taskq_dispatch(eventq_queue_nic, ip_ne_queue_func, info,
+	    DDI_NOSLEEP) == DDI_SUCCESS)
+		return;
+
 fail:
+	if (info != NULL) {
+		if (info->hnei_event.hne_data != NULL) {
+			kmem_free(info->hnei_event.hne_data,
+			    info->hnei_event.hne_datalen);
+		}
+		kmem_free(info, sizeof (hook_nic_event_t));
+	}
 	str = ill_hook_event2str(event);
-	ip2dbg(("ill_hook_event_create: could not attach %s nic event "
+	ip2dbg(("ill_nic_event_dispatch: could not dispatch %s nic event "
 	    "information for %s (ENOMEM)\n", str, ill->ill_name));
-	return (B_FALSE);
-}
+}
+
+void
+ipif_up_notify(ipif_t *ipif)
+{
+	ip_rts_ifmsg(ipif);
+	ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
+	sctp_update_ipif(ipif, SCTP_IPIF_UP);
+	ill_nic_event_dispatch(ipif->ipif_ill, MAP_IPIF_ID(ipif->ipif_id),
+	    NE_LIF_UP, NULL, 0);
+}
--- a/usr/src/uts/common/inet/ip/ip_multi.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip/ip_multi.c	Thu Nov 06 06:47:54 2008 -0500
@@ -24,8 +24,6 @@
  */
 /* Copyright (c) 1990 Mentat Inc. */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/types.h>
 #include <sys/stream.h>
 #include <sys/dlpi.h>
@@ -626,7 +624,7 @@
 		if (ilm_numentries_v6(ill, &v6group) > 1)
 			return (0);
 		if (ill->ill_group == NULL)
-			ret = ip_join_allmulti(ipif);
+			ret = ill_join_allmulti(ill);
 		else
 			ret = ill_nominate_mcast_rcv(ill->ill_group);
 		if (ret != 0)
@@ -718,7 +716,7 @@
 		if (ilm_numentries_v6(ill, v6group) > 1)
 			return (0);
 		if (ill->ill_group == NULL)
-			ret = ip_join_allmulti(ill->ill_ipif);
+			ret = ill_join_allmulti(ill);
 		else
 			ret = ill_nominate_mcast_rcv(ill->ill_group);
 
@@ -854,7 +852,6 @@
 	ill_t	*ill = ipif->ipif_ill;
 	ilm_t *ilm;
 	in6_addr_t v6group;
-	int	ret;
 
 	ASSERT(IAM_WRITER_IPIF(ipif));
 
@@ -899,20 +896,13 @@
 		if (ilm_numentries_v6(ill, &v6group) != 0)
 			return (0);
 
-		/*
-		 * If we never joined, then don't leave.  This can happen
-		 * if we're in an IPMP group, since only one ill per IPMP
-		 * group receives all multicast packets.
-		 */
-		if (!ill->ill_join_allmulti) {
-			ASSERT(ill->ill_group != NULL);
-			return (0);
+		/* If we never joined, then don't leave. */
+		if (ill->ill_join_allmulti) {
+			ill_leave_allmulti(ill);
+			if (ill->ill_group != NULL)
+				(void) ill_nominate_mcast_rcv(ill->ill_group);
 		}
-
-		ret = ip_leave_allmulti(ipif);
-		if (ill->ill_group != NULL)
-			(void) ill_nominate_mcast_rcv(ill->ill_group);
-		return (ret);
+		return (0);
 	}
 
 	if (!IS_LOOPBACK(ill))
@@ -939,7 +929,6 @@
 {
 	ipif_t	*ipif;
 	ilm_t *ilm;
-	int	ret;
 
 	ASSERT(IAM_WRITER_ILL(ill));
 
@@ -995,20 +984,13 @@
 		if (ilm_numentries_v6(ill, v6group) != 0)
 			return (0);
 
-		/*
-		 * If we never joined, then don't leave.  This can happen
-		 * if we're in an IPMP group, since only one ill per IPMP
-		 * group receives all multicast packets.
-		 */
-		if (!ill->ill_join_allmulti) {
-			ASSERT(ill->ill_group != NULL);
-			return (0);
+		/* If we never joined, then don't leave. */
+		if (ill->ill_join_allmulti) {
+			ill_leave_allmulti(ill);
+			if (ill->ill_group != NULL)
+				(void) ill_nominate_mcast_rcv(ill->ill_group);
 		}
-
-		ret = ip_leave_allmulti(ipif);
-		if (ill->ill_group != NULL)
-			(void) ill_nominate_mcast_rcv(ill->ill_group);
-		return (ret);
+		return (0);
 	}
 
 	if (!IS_LOOPBACK(ill))
@@ -1123,13 +1105,12 @@
  * one ill joining the allmulti group.
  */
 int
-ip_join_allmulti(ipif_t *ipif)
+ill_join_allmulti(ill_t *ill)
 {
-	ill_t	*ill = ipif->ipif_ill;
-	mblk_t	*mp;
+	mblk_t		*promiscon_mp, *promiscoff_mp;
 	uint32_t	addrlen, addroff;
 
-	ASSERT(IAM_WRITER_IPIF(ipif));
+	ASSERT(IAM_WRITER_ILL(ill));
 
 	if (!ill->ill_dl_up) {
 		/*
@@ -1142,18 +1123,25 @@
 	ASSERT(!ill->ill_join_allmulti);
 
 	/*
-	 * Create a DL_PROMISCON_REQ message and send it directly to
-	 * the DLPI provider.  We don't need to do this for certain
-	 * media types for which we never need to turn promiscuous
-	 * mode on.
+	 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI
+	 * provider.  We don't need to do this for certain media types for
+	 * which we never need to turn promiscuous mode on.  While we're here,
+	 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that
+	 * ill_leave_allmulti() will not fail due to low memory conditions.
 	 */
 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
-		mp = ill_create_dl(ill, DL_PROMISCON_REQ,
+		promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ,
 		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
-		if (mp == NULL)
+		promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
+		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
+		if (promiscon_mp == NULL || promiscoff_mp == NULL) {
+			freemsg(promiscon_mp);
+			freemsg(promiscoff_mp);
 			return (ENOMEM);
-		ill_dlpi_send(ill, mp);
+		}
+		ill->ill_promiscoff_mp = promiscoff_mp;
+		ill_dlpi_send(ill, promiscon_mp);
 	}
 
 	ill->ill_join_allmulti = B_TRUE;
@@ -1166,21 +1154,19 @@
  * With ill groups, we need to nominate some other ill as
  * this ipif->ipif_ill is leaving the group.
  */
-int
-ip_leave_allmulti(ipif_t *ipif)
+void
+ill_leave_allmulti(ill_t *ill)
 {
-	ill_t	*ill = ipif->ipif_ill;
-	mblk_t	*mp;
-	uint32_t	addrlen, addroff;
-
-	ASSERT(IAM_WRITER_IPIF(ipif));
+	mblk_t		*promiscoff_mp = ill->ill_promiscoff_mp;
+
+	ASSERT(IAM_WRITER_ILL(ill));
 
 	if (!ill->ill_dl_up) {
 		/*
 		 * Nobody there. All multicast addresses will be re-joined
 		 * when we get the DL_BIND_ACK bringing the interface up.
 		 */
-		return (0);
+		return;
 	}
 
 	ASSERT(ill->ill_join_allmulti);
@@ -1193,18 +1179,97 @@
 	 */
 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
-		mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
-		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
-		if (mp == NULL)
-			return (ENOMEM);
-		ill_dlpi_send(ill, mp);
+		ASSERT(promiscoff_mp != NULL);
+		ill->ill_promiscoff_mp = NULL;
+		ill_dlpi_send(ill, promiscoff_mp);
 	}
 
 	ill->ill_join_allmulti = B_FALSE;
+}
+
+static ill_t *
+ipsq_enter_byifindex(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
+{
+	ill_t		*ill;
+	boolean_t	in_ipsq;
+
+	ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL,
+	    ipst);
+	if (ill != NULL) {
+		if (!ill_waiter_inc(ill)) {
+			ill_refrele(ill);
+			return (NULL);
+		}
+		ill_refrele(ill);
+		in_ipsq = ipsq_enter(ill, B_FALSE);
+		ill_waiter_dcr(ill);
+		if (!in_ipsq)
+			ill = NULL;
+	}
+	return (ill);
+}
+
+int
+ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
+{
+	ill_t		*ill;
+	int		ret;
+
+	if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL)
+		return (ENODEV);
+	if (isv6) {
+		ret = ip_addmulti_v6(&ipv6_all_zeros, ill, ifindex,
+		    ill->ill_zoneid, ILGSTAT_NONE, MODE_IS_EXCLUDE, NULL);
+	} else {
+		ret = ip_addmulti(INADDR_ANY, ill->ill_ipif, ILGSTAT_NONE,
+		    MODE_IS_EXCLUDE, NULL);
+	}
+	ill->ill_ipallmulti_cnt++;
+	ipsq_exit(ill->ill_phyint->phyint_ipsq);
+	return (ret);
+}
+
+int
+ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
+{
+	ill_t		*ill;
+
+	if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL)
+		return (ENODEV);
+	ASSERT(ill->ill_ipallmulti_cnt != 0);
+	if (isv6) {
+		(void) ip_delmulti_v6(&ipv6_all_zeros, ill, ifindex,
+		    ill->ill_zoneid, B_TRUE, B_TRUE);
+	} else {
+		(void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE, B_TRUE);
+	}
+	ill->ill_ipallmulti_cnt--;
+	ipsq_exit(ill->ill_phyint->phyint_ipsq);
 	return (0);
 }
 
 /*
+ * Delete the allmulti memberships that were added as part of
+ * ip_join_allmulti().
+ */
+void
+ip_purge_allmulti(ill_t *ill)
+{
+	ASSERT(IAM_WRITER_ILL(ill));
+
+	for (; ill->ill_ipallmulti_cnt > 0; ill->ill_ipallmulti_cnt--) {
+		if (ill->ill_isv6) {
+			(void) ip_delmulti_v6(&ipv6_all_zeros, ill,
+			    ill->ill_phyint->phyint_ifindex, ill->ill_zoneid,
+			    B_TRUE, B_TRUE);
+		} else {
+			(void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE,
+			    B_TRUE);
+		}
+	}
+}
+
+/*
  * Copy mp_orig and pass it in as a local message.
  */
 void
@@ -1477,6 +1542,9 @@
 	char    addrbuf[INET6_ADDRSTRLEN];
 
 	ASSERT(IAM_WRITER_ILL(ill));
+
+	ill->ill_need_recover_multicast = 0;
+
 	ILM_WALKER_HOLD(ill);
 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
 		/*
@@ -1492,7 +1560,7 @@
 		    sizeof (addrbuf))));
 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
 			if (ill->ill_group == NULL) {
-				(void) ip_join_allmulti(ill->ill_ipif);
+				(void) ill_join_allmulti(ill);
 			} else {
 				/*
 				 * We don't want to join on this ill,
@@ -1522,6 +1590,9 @@
 	char    addrbuf[INET6_ADDRSTRLEN];
 
 	ASSERT(IAM_WRITER_ILL(ill));
+
+	ill->ill_need_recover_multicast = 1;
+
 	ILM_WALKER_HOLD(ill);
 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
 		/*
@@ -1536,7 +1607,7 @@
 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
 		    sizeof (addrbuf))));
 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
-			(void) ip_leave_allmulti(ill->ill_ipif);
+			ill_leave_allmulti(ill);
 			/*
 			 * If we were part of an IPMP group, then
 			 * ill_handoff_responsibility() has already
--- a/usr/src/uts/common/inet/ip/ip_ndp.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip/ip_ndp.c	Thu Nov 06 06:47:54 2008 -0500
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/types.h>
 #include <sys/stream.h>
 #include <sys/stropts.h>
@@ -2561,11 +2559,8 @@
 					    "%s on %s", sbuf, ibuf);
 				}
 				if ((ipif->ipif_flags & IPIF_UP) &&
-				    !ipif->ipif_addr_ready) {
-					ip_rts_ifmsg(ipif);
-					ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
-					sctp_update_ipif(ipif, SCTP_IPIF_UP);
-				}
+				    !ipif->ipif_addr_ready)
+					ipif_up_notify(ipif);
 				ipif->ipif_addr_ready = 1;
 				ipif_refrele(ipif);
 			}
--- a/usr/src/uts/common/inet/ip/ip_netinfo.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip/ip_netinfo.c	Thu Nov 06 06:47:54 2008 -0500
@@ -59,6 +59,10 @@
 static int 		ip_getpmtuenabled(net_handle_t);
 static int 		ip_getlifaddr(net_handle_t, phy_if_t, lif_if_t,
 			    size_t, net_ifaddr_t [], void *);
+static int		ip_getlifzone(net_handle_t, phy_if_t, lif_if_t,
+			    zoneid_t *);
+static int		ip_getlifflags(net_handle_t, phy_if_t, lif_if_t,
+			    uint64_t *);
 static phy_if_t		ip_phygetnext(net_handle_t, phy_if_t);
 static phy_if_t 	ip_phylookup(net_handle_t, const char *);
 static lif_if_t 	ip_lifgetnext(net_handle_t, phy_if_t, lif_if_t);
@@ -73,6 +77,10 @@
 static int 		ipv6_getmtu(net_handle_t, phy_if_t, lif_if_t);
 static int 		ipv6_getlifaddr(net_handle_t, phy_if_t, lif_if_t,
 			    size_t, net_ifaddr_t [], void *);
+static int		ipv6_getlifzone(net_handle_t, phy_if_t, lif_if_t,
+			    zoneid_t *);
+static int		ipv6_getlifflags(net_handle_t, phy_if_t, lif_if_t,
+			    uint64_t *);
 static phy_if_t 	ipv6_phygetnext(net_handle_t, phy_if_t);
 static phy_if_t 	ipv6_phylookup(net_handle_t, const char *);
 static lif_if_t 	ipv6_lifgetnext(net_handle_t, phy_if_t, lif_if_t);
@@ -111,6 +119,8 @@
 	ip_getmtu,
 	ip_getpmtuenabled,
 	ip_getlifaddr,
+	ip_getlifzone,
+	ip_getlifflags,
 	ip_phygetnext,
 	ip_phylookup,
 	ip_lifgetnext,
@@ -128,6 +138,8 @@
 	ipv6_getmtu,
 	ip_getpmtuenabled,
 	ipv6_getlifaddr,
+	ipv6_getlifzone,
+	ipv6_getlifflags,
 	ipv6_phygetnext,
 	ipv6_phylookup,
 	ipv6_lifgetnext,
@@ -891,6 +903,7 @@
 	ip6_t *ip6h;
 	ire_t *ire;
 	mblk_t *mp;
+	zoneid_t zoneid;
 
 	ASSERT(packet != NULL);
 	ASSERT(packet->ni_packet != NULL);
@@ -935,6 +948,8 @@
 		 * provide similar functionality for IPv6.
 		 */
 		mp = packet->ni_packet;
+		zoneid =
+		    netstackid_to_zoneid(ipst->ips_netstack->netstack_stackid);
 
 		if (!isv6) {
 			struct sockaddr *sock;
@@ -946,8 +961,7 @@
 			 * Currently this function only supports IPv4.
 			 */
 			switch (ipfil_sendpkt(sock, mp, packet->ni_physical,
-			    netstackid_to_zoneid(
-			    ipst->ips_netstack->netstack_stackid))) {
+			    zoneid)) {
 			case 0 :
 			case EINPROGRESS:
 				return (0);
@@ -989,7 +1003,7 @@
 			    ire->ire_ipif->ipif_ill, ipha_t *, NULL, ip6_t *,
 			    ip6h, int, 1);
 			ip_wput_local_v6(ire->ire_rfq,
-			    ire->ire_ipif->ipif_ill, ip6h, mp, ire, 0);
+			    ire->ire_ipif->ipif_ill, ip6h, mp, ire, 0, zoneid);
 			ire_refrele(ire);
 			return (0);
 		}
@@ -1409,6 +1423,78 @@
 }
 
 /*
+ * Shared implementation to determine the zoneid associated with an IPv4/IPv6
+ * address
+ */
+static int
+ip_getlifzone_impl(sa_family_t family, phy_if_t phy_ifdata, lif_if_t ifdata,
+    ip_stack_t *ipst, zoneid_t *zoneid)
+{
+	ipif_t  *ipif;
+
+	ipif = ipif_getby_indexes((uint_t)phy_ifdata,
+	    UNMAP_IPIF_ID((uint_t)ifdata), (family == AF_INET6), ipst);
+	if (ipif == NULL)
+		return (-1);
+	*zoneid = IP_REAL_ZONEID(ipif->ipif_zoneid, ipst);
+	ipif_refrele(ipif);
+	return (0);
+}
+
+/*
+ * Determine the zoneid associated with an IPv4 address
+ */
+static int
+ip_getlifzone(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata,
+    zoneid_t *zoneid)
+{
+	return (ip_getlifzone_impl(AF_INET, phy_ifdata, ifdata,
+	    neti->netd_stack->nts_netstack->netstack_ip, zoneid));
+}
+
+/*
+ * Determine the zoneid associated with an IPv6 address
+ */
+static int
+ipv6_getlifzone(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata,
+    zoneid_t *zoneid)
+{
+	return (ip_getlifzone_impl(AF_INET6, phy_ifdata, ifdata,
+	    neti->netd_stack->nts_netstack->netstack_ip, zoneid));
+}
+
+static int
+ip_getlifflags_impl(sa_family_t family, phy_if_t phy_ifdata, lif_if_t ifdata,
+    ip_stack_t *ipst, uint64_t *flags)
+{
+	ipif_t *ipif;
+
+	ipif = ipif_getby_indexes((uint_t)phy_ifdata,
+	    UNMAP_IPIF_ID((uint_t)ifdata), (family == AF_INET6), ipst);
+	if (ipif == NULL)
+		return (-1);
+	*flags = ipif->ipif_flags;
+	ipif_refrele(ipif);
+	return (0);
+}
+
+static int
+ip_getlifflags(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata,
+    uint64_t *flags)
+{
+	return (ip_getlifflags_impl(AF_INET, phy_ifdata, ifdata,
+	    neti->netd_stack->nts_netstack->netstack_ip, flags));
+}
+
+static int
+ipv6_getlifflags(net_handle_t neti, phy_if_t phy_ifdata, lif_if_t ifdata,
+    uint64_t *flags)
+{
+	return (ip_getlifflags_impl(AF_INET6, phy_ifdata, ifdata,
+	    neti->netd_stack->nts_netstack->netstack_ip, flags));
+}
+
+/*
  * Deliver packet up into the kernel, immitating its reception by a
  * network interface.
  */
--- a/usr/src/uts/common/inet/ip6.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip6.h	Thu Nov 06 06:47:54 2008 -0500
@@ -19,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_INET_IP6_H
 #define	_INET_IP6_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -366,7 +364,7 @@
 extern int	ipsec_ah_get_hdr_size_v6(mblk_t *, boolean_t);
 extern void	ip_wput_v6(queue_t *, mblk_t *);
 extern void	ip_wput_local_v6(queue_t *, ill_t *, ip6_t *, mblk_t *,
-    ire_t *, int);
+    ire_t *, int, zoneid_t);
 extern void	ip_output_v6(void *, mblk_t *, void *, int);
 extern void	ip_xmit_v6(mblk_t *, ire_t *, uint_t, conn_t *, int,
     struct ipsec_out_s *);
--- a/usr/src/uts/common/inet/ip_if.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip_if.h	Thu Nov 06 06:47:54 2008 -0500
@@ -27,8 +27,6 @@
 #ifndef	_INET_IP_IF_H
 #define	_INET_IP_IF_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <net/route.h>
 
 #ifdef	__cplusplus
@@ -260,6 +258,7 @@
 extern	int	ipif_ndp_setup_multicast(ipif_t *, struct nce_s **);
 extern	int	ipif_up_done(ipif_t *);
 extern	int	ipif_up_done_v6(ipif_t *);
+extern	void	ipif_up_notify(ipif_t *);
 extern	void	ipif_update_other_ipifs_v6(ipif_t *, ill_group_t *);
 extern	void	ipif_recreate_interface_routes_v6(ipif_t *, ipif_t *);
 extern	void	ill_update_source_selection(ill_t *);
--- a/usr/src/uts/common/inet/ip_impl.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip_impl.h	Thu Nov 06 06:47:54 2008 -0500
@@ -26,8 +26,6 @@
 #ifndef	_INET_IP_IMPL_H
 #define	_INET_IP_IMPL_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * IP implementation private declarations.  These interfaces are
  * used to build the IP module and are not meant to be accessed
@@ -507,7 +505,7 @@
  * Macro that hands off one or more messages directly to DLD
  * when the interface is marked with ILL_CAPAB_POLL.
  */
-#define	IP_DLS_ILL_TX(ill, ipha, mp, ipst) {				\
+#define	IP_DLS_ILL_TX(ill, ipha, mp, ipst, hlen) {			\
 	ill_dls_capab_t *ill_dls = ill->ill_dls_capab;			\
 	ASSERT(ILL_DLS_CAPABLE(ill));					\
 	ASSERT(ill_dls != NULL);					\
@@ -520,7 +518,15 @@
 	    ipst->ips_ipv4firewall_physical_out,			\
 	    NULL, ill, ipha, mp, mp, 0, ipst);				\
 	DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp);		\
-	if (mp != NULL)	{						\
+	if (mp != NULL) {						\
+		if (ipst->ips_ipobs_enabled) {				\
+			zoneid_t szone;					\
+									\
+			szone = ip_get_zoneid_v4(ipha->ipha_src, mp,	\
+			    ipst, ALL_ZONES);				\
+			ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,	\
+			    ALL_ZONES, ill, IPV4_VERSION, hlen, ipst);	\
+		}							\
 		DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL,		\
 		    void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill,	\
 		    ipha_t *, ipha, ip6_t *, NULL, int,	0);		\
@@ -528,9 +534,23 @@
 	}								\
 }
 
+/*
+ * In non-global zone exclusive IP stacks, data structures such as IRE
+ * entries pretend that they're in the global zone.  The following
+ * macro evaluates to the real zoneid instead of a pretend
+ * GLOBAL_ZONEID.
+ */
+#define	IP_REAL_ZONEID(zoneid, ipst)					\
+	(((zoneid) == GLOBAL_ZONEID) ?					\
+	    netstackid_to_zoneid((ipst)->ips_netstack->netstack_stackid) : \
+	    (zoneid))
+
 extern int	ip_wput_frag_mdt_min;
 extern boolean_t ip_can_frag_mdt(mblk_t *, ssize_t, ssize_t);
 extern mblk_t   *ip_prepend_zoneid(mblk_t *, zoneid_t, ip_stack_t *);
+extern zoneid_t	ip_get_zoneid_v4(ipaddr_t, mblk_t *, ip_stack_t *, zoneid_t);
+extern zoneid_t	ip_get_zoneid_v6(in6_addr_t *, mblk_t *, const ill_t *,
+    ip_stack_t *, zoneid_t);
 
 #endif	/* _KERNEL */
 
--- a/usr/src/uts/common/inet/ip_multi.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip_multi.h	Thu Nov 06 06:47:54 2008 -0500
@@ -27,8 +27,6 @@
 #ifndef	_INET_IP_MULTI_H
 #define	_INET_IP_MULTI_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -99,8 +97,11 @@
 extern	int		ip_delmulti(ipaddr_t, ipif_t *, boolean_t, boolean_t);
 extern	int		ip_delmulti_v6(const in6_addr_t *, ill_t *, int,
     zoneid_t, boolean_t, boolean_t);
-extern	int		ip_join_allmulti(ipif_t *);
-extern	int		ip_leave_allmulti(ipif_t *);
+extern	int		ill_join_allmulti(ill_t *);
+extern	void		ill_leave_allmulti(ill_t *);
+extern	int		ip_join_allmulti(uint_t, boolean_t, ip_stack_t *);
+extern	int		ip_leave_allmulti(uint_t, boolean_t, ip_stack_t *);
+extern	void		ip_purge_allmulti(ill_t *);
 extern	void		ip_multicast_loopback(queue_t *, ill_t *, mblk_t *,
     int, zoneid_t);
 extern	int		ip_mforward(ill_t *, ipha_t *, mblk_t *);
--- a/usr/src/uts/common/inet/ip_stack.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/ip_stack.h	Thu Nov 06 06:47:54 2008 -0500
@@ -412,6 +412,12 @@
 
 	net_handle_t		ips_ipv4_net_data;
 	net_handle_t		ips_ipv6_net_data;
+
+	boolean_t		ips_ipobs_enabled;
+	list_t			ips_ipobs_cb_list;
+	kmutex_t		ips_ipobs_cb_lock;
+	uint_t			ips_ipobs_cb_nwalkers;
+	kcondvar_t		ips_ipobs_cb_cv;
 };
 typedef struct ip_stack ip_stack_t;
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/inet/ipnet.h	Thu Nov 06 06:47:54 2008 -0500
@@ -0,0 +1,201 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _INET_IPNET_H
+#define	_INET_IPNET_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/netstack.h>
+#include <sys/list.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <sys/avl.h>
+#include <sys/neti.h>
+
+/*
+ * Structure used to hold information for both IPv4 and IPv6 addresses.
+ */
+typedef struct ipnetif_addr {
+	union {
+		ipaddr_t	ifau_ip4addr;
+		in6_addr_t	ifau_ip6addr;
+	} ifa_addr;
+	ipaddr_t	ifa_brdaddr;
+	zoneid_t	ifa_zone;
+	uint64_t	ifa_id;
+	list_node_t	ifa_link;
+} ipnetif_addr_t;
+#define	ifa_ip4addr	ifa_addr.ifau_ip4addr
+#define	ifa_ip6addr	ifa_addr.ifau_ip6addr
+
+/*
+ * Structure describes the ipnet module representation of an ip interface.
+ * The structure holds both IPv4 and IPv6 addresses, the address lists are
+ * protected by a mutex. The ipnetif structures are held per stack instance
+ * within avl trees indexed on name and ip index.
+ */
+typedef struct ipnetif {
+	char		if_name[LIFNAMSIZ];
+	uint_t		if_flags;
+	uint64_t	if_index;
+	kmutex_t	if_addr_lock;	/* protects both addr lists */
+	list_t		if_ip4addr_list;
+	list_t		if_ip6addr_list;
+	avl_node_t	if_avl_by_index;
+	avl_node_t	if_avl_by_name;
+	dev_t		if_dev;
+	uint_t		if_multicnt;	/* protected by ips_event_lock */
+	kmutex_t	if_reflock;	/* protects if_refcnt */
+	uint_t		if_refcnt;
+} ipnetif_t;
+
+/* if_flags */
+#define	IPNETIF_IPV4PLUMBED	0x01
+#define	IPNETIF_IPV6PLUMBED	0x02
+#define	IPNETIF_IPV4ALLMULTI	0x04
+#define	IPNETIF_IPV6ALLMULTI	0x08
+
+/*
+ * Structure used by the accept callback function.  This is simply an address
+ * pointer into a packet (either IPv4 or IPv6), along with an address family
+ * that denotes which pointer is valid.
+ */
+typedef struct ipnet_addrp {
+	sa_family_t	iap_family;
+	union {
+		ipaddr_t	*iapu_addr4;
+		in6_addr_t	*iapu_addr6;
+	} iap_addrp;
+} ipnet_addrp_t;
+#define	iap_addr4	iap_addrp.iapu_addr4
+#define	iap_addr6	iap_addrp.iapu_addr6
+
+struct ipnet;
+struct ipobs_hook_data;
+typedef boolean_t ipnet_acceptfn_t(struct ipnet *, struct ipobs_hook_data *,
+    ipnet_addrp_t *, ipnet_addrp_t *);
+
+/*
+ * Per instance data for all open streams. Instance data is held on a
+ * per netstack list see struct ipnet_stack below.
+ */
+typedef struct ipnet {
+	queue_t		*ipnet_rq;	/* read queue pointer */
+	minor_t		ipnet_minor;	/* minor number for this instance */
+	ipnetif_t	*ipnet_if;	/* ipnetif for this open instance */
+	zoneid_t	ipnet_zoneid;	/* zoneid the device was opened in */
+	uint16_t	ipnet_flags;	/* see below */
+	t_scalar_t	ipnet_sap;	/* sap this instance is bound to */
+	t_uscalar_t	ipnet_dlstate;	/* dlpi state */
+	list_node_t	ipnet_next;	/* list next member */
+	netstack_t	*ipnet_ns;	/* netstack of zone we were opened in */
+	ipnet_acceptfn_t *ipnet_acceptfn; /* accept callback function pointer */
+} ipnet_t;
+
+/* ipnet_flags */
+#define	IPNET_PROMISC_PHYS	0x01
+#define	IPNET_PROMISC_MULTI	0x02
+#define	IPNET_PROMISC_SAP	0x04
+#define	IPNET_INFO		0x08
+#define	IPNET_LOMODE		0x10
+
+/*
+ * Per-netstack data holding:
+ * - net_handle_t references for IPv4 and IPv6 for this netstack.
+ * - avl trees by name and index for ip interfaces associated with this
+ *   netstack. The trees are protected by ips_avl_lock.
+ * - ips_str_list is a list of open client streams.  ips_walkers_lock in
+ *   conjunction with ips_walkers_cv and ips_walkers_cnt synchronize access to
+ *   the list.  The count is incremented in ipnet_dispatch() at the start of a
+ *   walk and decremented when the walk is finished. If the walkers count is 0
+ *   then we cv_broadcast() waiting any threads waiting on the walkers count.
+ * - ips_event_lock synchronizes ipnet_if_init() and incoming NIC info events.
+ *   We cannot be processing any NIC info events while initializing interfaces
+ *   in ipnet_if_init().
+ *
+ * Note on lock ordering: If a thread needs to both hold the ips_event_lock
+ * and any other lock such as ips_walkers_lock, ips_avl_lock, or if_addr_lock,
+ * the ips_event_lock must be held first.  This lock ordering is mandated by
+ * ipnet_nicevent_cb() which must always grab ips_event_lock before continuing
+ * with processing NIC events.
+ */
+typedef struct ipnet_stack {
+	net_handle_t	ips_ndv4;
+	net_handle_t	ips_ndv6;
+	netstack_t	*ips_netstack;
+	hook_t		*ips_nicevents;
+	kmutex_t	ips_event_lock;
+	kmutex_t	ips_avl_lock;
+	avl_tree_t	ips_avl_by_index;
+	avl_tree_t	ips_avl_by_name;
+	kmutex_t	ips_walkers_lock;
+	kcondvar_t	ips_walkers_cv;
+	uint_t		ips_walkers_cnt;
+	list_t		ips_str_list;
+	uint64_t	ips_drops;
+} ipnet_stack_t;
+
+/*
+ * Template for dl_info_ack_t initialization.  We don't have an address, so we
+ * set the address length to just the SAP length (16 bits).  We don't really
+ * have a maximum SDU, but setting it to UINT_MAX proved problematic with
+ * applications that performed arithmetic on dl_max_sdu and wrapped around, so
+ * we sleaze out and use INT_MAX.
+ */
+#define	IPNET_INFO_ACK_INIT {						\
+	DL_INFO_ACK,			/* dl_primitive */		\
+	INT_MAX,			/* dl_max_sdu */		\
+	0,				/* dl_min_sdu */		\
+	sizeof (uint16_t),		/* dl_addr_length */ 		\
+	DL_IPNET,			/* dl_mac_type */		\
+	0,				/* dl_reserved */		\
+	0,				/* dl_current_state */		\
+	sizeof (uint16_t),		/* dl_sap_length */ 		\
+	DL_CLDLS,			/* dl_service_mode */		\
+	0,				/* dl_qos_length */		\
+	0,				/* dl_qos_offset */		\
+	0,				/* dl_range_length */		\
+	0,				/* dl_range_offset */		\
+	DL_STYLE1,			/* dl_provider_style */		\
+	0,				/* dl_addr_offset */		\
+	DL_VERSION_2,			/* dl_version */		\
+	0,				/* dl_brdcst_addr_length */	\
+	0				/* dl_brdcst_addr_offset */	\
+}
+
+typedef void ipnet_walkfunc_t(const char *, void *, dev_t);
+extern void ipnet_walk_if(ipnet_walkfunc_t *, void *, zoneid_t);
+extern dev_t ipnet_if_getdev(char *, zoneid_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _INET_IPNET_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/inet/ipnet/ipnet.c	Thu Nov 06 06:47:54 2008 -0500
@@ -0,0 +1,1722 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * The ipnet device defined here provides access to packets at the IP layer. To
+ * provide access to packets at this layer it registers a callback function in
+ * the ip module and when there are open instances of the device ip will pass
+ * packets into the device. Packets from ip are passed on the input, output and
+ * loopback paths. Internally the module returns to ip as soon as possible by
+ * deferring processing using a taskq.
+ *
+ * Management of the devices in /dev/ipnet/ is handled by the devname
+ * filesystem and use of the neti interfaces.  This module registers for NIC
+ * events using the neti framework so that when IP interfaces are bought up,
+ * taken down etc. the ipnet module is notified and its view of the interfaces
+ * configured on the system adjusted.  On attach, the module gets an initial
+ * view of the system again using the neti framework but as it has already
+ * registered for IP interface events, it is still up-to-date with any changes.
+ */
+
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/cred.h>
+#include <sys/stat.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/modctl.h>
+#include <sys/dlpi.h>
+#include <sys/strsun.h>
+#include <sys/id_space.h>
+#include <sys/kmem.h>
+#include <sys/mkdev.h>
+#include <sys/neti.h>
+#include <net/if.h>
+#include <sys/errno.h>
+#include <sys/list.h>
+#include <sys/ksynch.h>
+#include <sys/hook_event.h>
+#include <sys/stropts.h>
+#include <sys/sysmacros.h>
+#include <inet/ip.h>
+#include <inet/ip_multi.h>
+#include <inet/ip6.h>
+#include <inet/ipnet.h>
+
+static struct module_info ipnet_minfo = {
+	1,		/* mi_idnum */
+	"ipnet",	/* mi_idname */
+	0,		/* mi_minpsz */
+	INFPSZ,		/* mi_maxpsz */
+	2048,		/* mi_hiwat */
+	0		/* mi_lowat */
+};
+
+/*
+ * List to hold static view of ipnetif_t's on the system. This is needed to
+ * avoid holding the lock protecting the avl tree of ipnetif's over the
+ * callback into the dev filesystem.
+ */
+typedef struct ipnetif_cbdata {
+	char		ic_ifname[LIFNAMSIZ];
+	dev_t		ic_dev;
+	list_node_t	ic_next;
+} ipnetif_cbdata_t;
+
+/*
+ * Convenience enumerated type for ipnet_accept().  It describes the
+ * properties of a given ipnet_addrp_t relative to a single ipnet_t
+ * client stream.  The values represent whether the address is ...
+ */
+typedef enum {
+	IPNETADDR_MYADDR,	/* an address on my ipnetif_t. */
+	IPNETADDR_MBCAST,	/* a multicast or broadcast address. */
+	IPNETADDR_UNKNOWN	/* none of the above. */
+} ipnet_addrtype_t;
+
+/* Argument used for the ipnet_nicevent_taskq callback. */
+typedef struct ipnet_nicevent_s {
+	nic_event_t		ipne_event;
+	net_handle_t		ipne_protocol;
+	netstackid_t		ipne_stackid;
+	uint64_t		ipne_ifindex;
+	uint64_t		ipne_lifindex;
+	char			ipne_ifname[LIFNAMSIZ];
+} ipnet_nicevent_t;
+
+static dev_info_t	*ipnet_dip;
+static major_t		ipnet_major;
+static ddi_taskq_t	*ipnet_taskq;		/* taskq for packets */
+static ddi_taskq_t	*ipnet_nicevent_taskq;	/* taskq for NIC events */
+static id_space_t	*ipnet_minor_space;
+static const int	IPNET_MINOR_LO = 1; 	/* minor number for /dev/lo0 */
+static const int 	IPNET_MINOR_MIN = 2; 	/* start of dynamic minors */
+static dl_info_ack_t	ipnet_infoack = IPNET_INFO_ACK_INIT;
+static ipnet_acceptfn_t	ipnet_accept, ipnet_loaccept;
+
+static void	ipnet_input(mblk_t *);
+static int	ipnet_wput(queue_t *, mblk_t *);
+static int	ipnet_rsrv(queue_t *);
+static int	ipnet_open(queue_t *, dev_t *, int, int, cred_t *);
+static int	ipnet_close(queue_t *);
+static void	ipnet_ioctl(queue_t *, mblk_t *);
+static void	ipnet_iocdata(queue_t *, mblk_t *);
+static void 	ipnet_wputnondata(queue_t *, mblk_t *);
+static int	ipnet_attach(dev_info_t *, ddi_attach_cmd_t);
+static int	ipnet_detach(dev_info_t *, ddi_detach_cmd_t);
+static int	ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
+static void	ipnet_inforeq(queue_t *q, mblk_t *mp);
+static void	ipnet_bindreq(queue_t *q, mblk_t *mp);
+static void	ipnet_unbindreq(queue_t *q, mblk_t *mp);
+static void	ipnet_dlpromisconreq(queue_t *q, mblk_t *mp);
+static void	ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp);
+static int	ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *);
+static void	ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *);
+static int	ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *);
+static void	ipnet_nicevent_task(void *);
+static ipnetif_t *ipnet_create_if(const char *, uint64_t, ipnet_stack_t *);
+static void	ipnet_remove_if(ipnetif_t *, ipnet_stack_t *);
+static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t);
+static ipnetif_t *ipnet_if_getby_index(uint64_t, ipnet_stack_t *);
+static ipnetif_t *ipnet_if_getby_dev(dev_t, ipnet_stack_t *);
+static boolean_t ipnet_if_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *);
+static void	ipnet_if_zonecheck(ipnetif_t *, ipnet_stack_t *);
+static int	ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t);
+static int 	ipnet_if_compare_name(const void *, const void *);
+static int 	ipnet_if_compare_index(const void *, const void *);
+static void	ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t);
+static void	ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t);
+static void	ipnetif_refhold(ipnetif_t *);
+static void	ipnetif_refrele(ipnetif_t *);
+static void	ipnet_walkers_inc(ipnet_stack_t *);
+static void	ipnet_walkers_dec(ipnet_stack_t *);
+static void	ipnet_register_netihook(ipnet_stack_t *);
+static void	*ipnet_stack_init(netstackid_t, netstack_t *);
+static void	ipnet_stack_fini(netstackid_t, void *);
+
+static struct qinit ipnet_rinit = {
+	NULL,		/* qi_putp */
+	ipnet_rsrv,	/* qi_srvp */
+	ipnet_open,	/* qi_qopen */
+	ipnet_close,	/* qi_qclose */
+	NULL,		/* qi_qadmin */
+	&ipnet_minfo,	/* qi_minfo */
+};
+
+static struct qinit ipnet_winit = {
+	ipnet_wput,	/* qi_putp */
+	NULL,		/* qi_srvp */
+	NULL,		/* qi_qopen */
+	NULL,		/* qi_qclose */
+	NULL,		/* qi_qadmin */
+	&ipnet_minfo,	/* qi_minfo */
+};
+
+static struct streamtab ipnet_info = {
+	&ipnet_rinit, &ipnet_winit
+};
+
+DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach,
+    ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info,
+    ddi_quiesce_not_supported);
+
+static struct modldrv modldrv = {
+	&mod_driverops,
+	"STREAMS ipnet driver",
+	&ipnet_ops
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1, &modldrv, NULL
+};
+
+/*
+ * Walk the list of physical interfaces on the machine, for each
+ * interface create a new ipnetif_t and add any addresses to it. We
+ * need to do the walk twice, once for IPv4 and once for IPv6.
+ *
+ * The interfaces are destroyed as part of ipnet_stack_fini() for each
+ * stack.  Note that we cannot do this initialization in
+ * ipnet_stack_init(), since ipnet_stack_init() cannot fail.
+ */
+static int
+ipnet_if_init(void)
+{
+	netstack_handle_t	nh;
+	netstack_t		*ns;
+	ipnet_stack_t		*ips;
+	int			ret = 0;
+
+	netstack_next_init(&nh);
+	while ((ns = netstack_next(&nh)) != NULL) {
+		ips = ns->netstack_ipnet;
+		if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) != 0)
+			break;
+		if ((ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE)) != 0)
+			break;
+	}
+	netstack_next_fini(&nh);
+	return (ret);
+}
+
+/*
+ * Standard module entry points.
+ */
+int
+_init(void)
+{
+	int	ret;
+
+	if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1)
+		return (ENODEV);
+	ipnet_minor_space = id_space_create("ipnet_minor_space",
+	    IPNET_MINOR_MIN, MAXMIN32);
+	netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini);
+	/*
+	 * We call ddi_taskq_create() with nthread == 1 to ensure in-order
+	 * delivery of packets to clients.
+	 */
+	ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0);
+	ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue",
+	    1, TASKQ_DEFAULTPRI, 0);
+	if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) {
+		ret = ENOMEM;
+		goto done;
+	}
+	if ((ret = ipnet_if_init()) == 0)
+		ret = mod_install(&modlinkage);
+done:
+	if (ret != 0) {
+		if (ipnet_taskq != NULL)
+			ddi_taskq_destroy(ipnet_taskq);
+		if (ipnet_nicevent_taskq != NULL)
+			ddi_taskq_destroy(ipnet_nicevent_taskq);
+		netstack_unregister(NS_IPNET);
+		id_space_destroy(ipnet_minor_space);
+	}
+	return (ret);
+}
+
+int
+_fini(void)
+{
+	int err;
+
+	if ((err = mod_remove(&modlinkage)) != 0)
+		return (err);
+	ddi_taskq_destroy(ipnet_nicevent_taskq);
+	ddi_taskq_destroy(ipnet_taskq);
+	netstack_unregister(NS_IPNET);
+	id_space_destroy(ipnet_minor_space);
+	return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+static void
+ipnet_register_netihook(ipnet_stack_t *ips)
+{
+	int		ret;
+	netstackid_t	stackid = ips->ips_netstack->netstack_stackid;
+
+	HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents",
+	    ips);
+
+	/*
+	 * The ipnet device depends on ip and is registered in the netstack
+	 * framework after ip so the call to net_lookup_impl() cannot fail.
+	 */
+	ips->ips_ndv4 = net_protocol_lookup(stackid, NHF_INET);
+	ips->ips_ndv6 = net_protocol_lookup(stackid, NHF_INET6);
+
+	ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS,
+	    ips->ips_nicevents);
+	if (ret != 0) {
+		cmn_err(CE_WARN, "ipnet_register_netihook: net_register_hook() "
+		    "failed for v4 stack instance %d: %d", stackid, ret);
+	}
+	ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS,
+	    ips->ips_nicevents);
+	if (ret != 0) {
+		cmn_err(CE_WARN, "ipnet_register_netihook: net_register_hook() "
+		    "failed for v6 stack instance %d: %d", stackid, ret);
+	}
+}
+
+/*
+ * This function is called on attach to build an initial view of the
+ * interfaces on the system. It will be called once for IPv4 and once
+ * for IPv6, although there is only one ipnet interface for both IPv4
+ * and IPv6 there are separate address lists.
+ */
+static int
+ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6)
+{
+	phy_if_t		phyif;
+	lif_if_t		lif;
+	ipnetif_t		*ipnetif;
+	char			name[LIFNAMSIZ];
+	boolean_t		new_if = B_FALSE;
+	uint64_t		ifflags;
+	int			ret = 0;
+
+	/*
+	 * Make sure we're not processing NIC events during the
+	 * population of our interfaces and address lists.
+	 */
+	mutex_enter(&ips->ips_event_lock);
+
+	for (phyif = net_phygetnext(nd, 0); phyif != 0;
+	    phyif = net_phygetnext(nd, phyif)) {
+		if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0)
+			continue;
+		if ((ipnetif = ipnet_if_getby_index(phyif, ips)) == NULL) {
+			ipnetif = ipnet_create_if(name, phyif, ips);
+			if (ipnetif == NULL) {
+				ret = ENOMEM;
+				goto done;
+			}
+			new_if = B_TRUE;
+		}
+		ipnetif->if_flags |=
+		    isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
+
+		for (lif = net_lifgetnext(nd, phyif, 0); lif != 0;
+		    lif = net_lifgetnext(nd, phyif, lif)) {
+			/*
+			 * Skip addresses that aren't up.  We'll add
+			 * them when we receive an NE_LIF_UP event.
+			 */
+			if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 ||
+			    !(ifflags & IFF_UP))
+				continue;
+			/* Don't add it if we already have it. */
+			if (ipnet_match_lif(ipnetif, lif, isv6) != NULL)
+				continue;
+			ipnet_add_ifaddr(lif, ipnetif, nd);
+		}
+		if (!new_if)
+			ipnetif_refrele(ipnetif);
+	}
+
+done:
+	mutex_exit(&ips->ips_event_lock);
+	return (ret);
+}
+
+static int
+ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+	if (cmd != DDI_ATTACH)
+		return (DDI_FAILURE);
+
+	if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO,
+	    DDI_PSEUDO, 0) == DDI_FAILURE)
+		return (DDI_FAILURE);
+
+	ipnet_dip = dip;
+	return (DDI_SUCCESS);
+}
+
+static int
+ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+	if (cmd != DDI_DETACH)
+		return (DDI_FAILURE);
+
+	ASSERT(dip == ipnet_dip);
+	ddi_remove_minor_node(ipnet_dip, NULL);
+	ipnet_dip = NULL;
+	return (DDI_SUCCESS);
+}
+
+/* ARGSUSED */
+static int
+ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+	int error = DDI_FAILURE;
+
+	switch (infocmd) {
+	case DDI_INFO_DEVT2INSTANCE:
+		*result = (void *)0;
+		error = DDI_SUCCESS;
+		break;
+	case DDI_INFO_DEVT2DEVINFO:
+		if (ipnet_dip != NULL) {
+			*result = ipnet_dip;
+			error = DDI_SUCCESS;
+		}
+		break;
+	}
+	return (error);
+}
+
+/* ARGSUSED */
+static int
+ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
+{
+	ipnet_t		*ipnet;
+	netstack_t	*ns = NULL;
+	ipnet_stack_t	*ips;
+	int		err = 0;
+	zoneid_t	zoneid = crgetzoneid(crp);
+
+	/*
+	 * If the system is labeled, only the global zone is allowed to open
+	 * IP observability nodes.
+	 */
+	if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
+		return (EACCES);
+
+	/* We don't support open as a module */
+	if (sflag & MODOPEN)
+		return (ENOTSUP);
+
+	/* This driver is self-cloning, we don't support re-open. */
+	if (rq->q_ptr != NULL)
+		return (EBUSY);
+
+	if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL)
+		return (ENOMEM);
+
+	VERIFY((ns = netstack_find_by_cred(crp)) != NULL);
+	ips = ns->netstack_ipnet;
+
+	rq->q_ptr = WR(rq)->q_ptr = ipnet;
+	ipnet->ipnet_rq = rq;
+	ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space);
+	ipnet->ipnet_zoneid = zoneid;
+	ipnet->ipnet_dlstate = DL_UNBOUND;
+	ipnet->ipnet_sap = 0;
+	ipnet->ipnet_ns = ns;
+
+	/*
+	 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need
+	 * to be processed after ipnet_if is set and the ipnet_t has been
+	 * inserted in the ips_str_list.
+	 */
+	mutex_enter(&ips->ips_event_lock);
+	if (getminor(*dev) == IPNET_MINOR_LO) {
+		ipnet->ipnet_flags |= IPNET_LOMODE;
+		ipnet->ipnet_acceptfn = ipnet_loaccept;
+	} else {
+		ipnet->ipnet_acceptfn = ipnet_accept;
+		ipnet->ipnet_if = ipnet_if_getby_dev(*dev, ips);
+		if (ipnet->ipnet_if == NULL ||
+		    !ipnet_if_in_zone(ipnet->ipnet_if, zoneid, ips)) {
+			err = ENODEV;
+			goto done;
+		}
+	}
+
+	mutex_enter(&ips->ips_walkers_lock);
+	while (ips->ips_walkers_cnt != 0)
+		cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
+	list_insert_head(&ips->ips_str_list, ipnet);
+	*dev = makedevice(getmajor(*dev), ipnet->ipnet_minor);
+	qprocson(rq);
+
+	/*
+	 * Only register our callback if we're the first open client; we call
+	 * unregister in close() for the last open client.
+	 */
+	if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list))
+		ipobs_register_hook(ns, ipnet_input);
+	mutex_exit(&ips->ips_walkers_lock);
+
+done:
+	mutex_exit(&ips->ips_event_lock);
+	if (err != 0) {
+		netstack_rele(ns);
+		id_free(ipnet_minor_space, ipnet->ipnet_minor);
+		if (ipnet->ipnet_if != NULL)
+			ipnetif_refrele(ipnet->ipnet_if);
+		kmem_free(ipnet, sizeof (*ipnet));
+	}
+	return (err);
+}
+
+static int
+ipnet_close(queue_t *rq)
+{
+	ipnet_t		*ipnet = rq->q_ptr;
+	ipnet_stack_t	*ips = ipnet->ipnet_ns->netstack_ipnet;
+
+	if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
+		ipnet_leave_allmulti(ipnet->ipnet_if, ips);
+	if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
+		ipnet_leave_allmulti(ipnet->ipnet_if, ips);
+
+	mutex_enter(&ips->ips_walkers_lock);
+	while (ips->ips_walkers_cnt != 0)
+		cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
+
+	qprocsoff(rq);
+
+	list_remove(&ips->ips_str_list, ipnet);
+	if (ipnet->ipnet_if != NULL)
+		ipnetif_refrele(ipnet->ipnet_if);
+	id_free(ipnet_minor_space, ipnet->ipnet_minor);
+	kmem_free(ipnet, sizeof (*ipnet));
+
+	if (list_is_empty(&ips->ips_str_list))
+		ipobs_unregister_hook(ips->ips_netstack, ipnet_input);
+
+	mutex_exit(&ips->ips_walkers_lock);
+	netstack_rele(ips->ips_netstack);
+	return (0);
+}
+
+static int
+ipnet_wput(queue_t *q, mblk_t *mp)
+{
+	switch (mp->b_datap->db_type) {
+	case M_FLUSH:
+		if (*mp->b_rptr & FLUSHW) {
+			flushq(q, FLUSHDATA);
+			*mp->b_rptr &= ~FLUSHW;
+		}
+		if (*mp->b_rptr & FLUSHR)
+			qreply(q, mp);
+		else
+			freemsg(mp);
+		break;
+	case M_PROTO:
+	case M_PCPROTO:
+		ipnet_wputnondata(q, mp);
+		break;
+	case M_IOCTL:
+		ipnet_ioctl(q, mp);
+		break;
+	case M_IOCDATA:
+		ipnet_iocdata(q, mp);
+		break;
+	default:
+		freemsg(mp);
+		break;
+	}
+	return (0);
+}
+
+static int
+ipnet_rsrv(queue_t *q)
+{
+	mblk_t *mp;
+
+	while ((mp = getq(q)) != NULL) {
+		ASSERT(DB_TYPE(mp) == M_DATA);
+		if (canputnext(q)) {
+			putnext(q, mp);
+		} else {
+			(void) putbq(q, mp);
+			break;
+		}
+	}
+	return (0);
+}
+
+static void
+ipnet_ioctl(queue_t *q, mblk_t *mp)
+{
+	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
+
+	switch (iocp->ioc_cmd) {
+	case DLIOCRAW:
+		miocack(q, mp, 0, 0);
+		break;
+	case DLIOCIPNETINFO:
+		if (iocp->ioc_count == TRANSPARENT) {
+			mcopyin(mp, NULL, sizeof (uint_t), NULL);
+			qreply(q, mp);
+			break;
+		}
+		/* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */
+	default:
+		miocnak(q, mp, 0, EINVAL);
+		break;
+	}
+}
+
+static void
+ipnet_iocdata(queue_t *q, mblk_t *mp)
+{
+	struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
+	ipnet_t		*ipnet = q->q_ptr;
+
+	switch (iocp->ioc_cmd) {
+	case DLIOCIPNETINFO:
+		if (*(int *)mp->b_cont->b_rptr == 1)
+			ipnet->ipnet_flags |= IPNET_INFO;
+		else if (*(int *)mp->b_cont->b_rptr == 0)
+			ipnet->ipnet_flags &= ~IPNET_INFO;
+		else
+			goto iocnak;
+		miocack(q, mp, 0, DL_IPNETINFO_VERSION);
+		break;
+	default:
+	iocnak:
+		miocnak(q, mp, 0, EINVAL);
+		break;
+	}
+}
+
+static void
+ipnet_wputnondata(queue_t *q, mblk_t *mp)
+{
+	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
+	t_uscalar_t		prim = dlp->dl_primitive;
+
+	switch (prim) {
+	case DL_INFO_REQ:
+		ipnet_inforeq(q, mp);
+		break;
+	case DL_UNBIND_REQ:
+		ipnet_unbindreq(q, mp);
+		break;
+	case DL_BIND_REQ:
+		ipnet_bindreq(q, mp);
+		break;
+	case DL_PROMISCON_REQ:
+		ipnet_dlpromisconreq(q, mp);
+		break;
+	case DL_PROMISCOFF_REQ:
+		ipnet_dlpromiscoffreq(q, mp);
+		break;
+	case DL_UNITDATA_REQ:
+	case DL_DETACH_REQ:
+	case DL_PHYS_ADDR_REQ:
+	case DL_SET_PHYS_ADDR_REQ:
+	case DL_ENABMULTI_REQ:
+	case DL_DISABMULTI_REQ:
+	case DL_ATTACH_REQ:
+		dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0);
+		break;
+	default:
+		dlerrorack(q, mp, prim, DL_BADPRIM, 0);
+		break;
+	}
+}
+
+static void
+ipnet_inforeq(queue_t *q, mblk_t *mp)
+{
+	dl_info_ack_t	*dlip;
+	size_t		size = sizeof (dl_info_ack_t) + sizeof (ushort_t);
+
+	if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
+		dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0);
+		return;
+	}
+
+	if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL)
+		return;
+
+	dlip = (dl_info_ack_t *)mp->b_rptr;
+	*dlip = ipnet_infoack;
+	qreply(q, mp);
+}
+
+static void
+ipnet_bindreq(queue_t *q, mblk_t *mp)
+{
+	union   DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
+	int32_t sap;
+	ipnet_t	*ipnet = q->q_ptr;
+
+	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
+		dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0);
+		return;
+	}
+
+	sap = dlp->bind_req.dl_sap;
+	if (sap != IPV4_VERSION && sap != IPV6_VERSION && sap != 0) {
+		dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0);
+	} else {
+		ipnet->ipnet_sap = sap;
+		ipnet->ipnet_dlstate = DL_IDLE;
+		dlbindack(q, mp, sap, 0, 0, 0, 0);
+	}
+}
+
+static void
+ipnet_unbindreq(queue_t *q, mblk_t *mp)
+{
+	ipnet_t	*ipnet = q->q_ptr;
+
+	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
+		dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
+		return;
+	}
+
+	if (ipnet->ipnet_dlstate != DL_IDLE) {
+		dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
+	} else {
+		ipnet->ipnet_dlstate = DL_UNBOUND;
+		ipnet->ipnet_sap = 0;
+		dlokack(q, mp, DL_UNBIND_REQ);
+	}
+}
+
+static void
+ipnet_dlpromisconreq(queue_t *q, mblk_t *mp)
+{
+	ipnet_t		*ipnet = q->q_ptr;
+	t_uscalar_t	level;
+	int		err;
+
+	if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
+		dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
+		return;
+	}
+
+	if (ipnet->ipnet_flags & IPNET_LOMODE) {
+		dlokack(q, mp, DL_PROMISCON_REQ);
+		return;
+	}
+
+	level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
+	if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
+		if ((err = ipnet_join_allmulti(ipnet->ipnet_if,
+		    ipnet->ipnet_ns->netstack_ipnet)) != 0) {
+			dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err);
+			return;
+		}
+	}
+
+	switch (level) {
+	case DL_PROMISC_PHYS:
+		ipnet->ipnet_flags |= IPNET_PROMISC_PHYS;
+		break;
+	case DL_PROMISC_SAP:
+		ipnet->ipnet_flags |= IPNET_PROMISC_SAP;
+		break;
+	case DL_PROMISC_MULTI:
+		ipnet->ipnet_flags |= IPNET_PROMISC_MULTI;
+		break;
+	default:
+		dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
+		return;
+	}
+
+	dlokack(q, mp, DL_PROMISCON_REQ);
+}
+
+static void
+ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp)
+{
+	ipnet_t		*ipnet = q->q_ptr;
+	t_uscalar_t	level;
+	uint16_t	orig_ipnet_flags = ipnet->ipnet_flags;
+
+	if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
+		dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
+		return;
+	}
+
+	if (ipnet->ipnet_flags & IPNET_LOMODE) {
+		dlokack(q, mp, DL_PROMISCOFF_REQ);
+		return;
+	}
+
+	level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
+	switch (level) {
+	case DL_PROMISC_PHYS:
+		if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
+			ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS;
+		break;
+	case DL_PROMISC_SAP:
+		if (ipnet->ipnet_flags & IPNET_PROMISC_SAP)
+			ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP;
+		break;
+	case DL_PROMISC_MULTI:
+		if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
+			ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI;
+		break;
+	default:
+		dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
+		return;
+	}
+
+	if (orig_ipnet_flags == ipnet->ipnet_flags) {
+		dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
+		return;
+	}
+
+	if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
+		ipnet_leave_allmulti(ipnet->ipnet_if,
+		    ipnet->ipnet_ns->netstack_ipnet);
+	}
+
+	dlokack(q, mp, DL_PROMISCOFF_REQ);
+}
+
+static int
+ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
+{
+	int		err = 0;
+	ip_stack_t	*ipst = ips->ips_netstack->netstack_ip;
+	uint64_t	index = ipnetif->if_index;
+
+	mutex_enter(&ips->ips_event_lock);
+	if (ipnetif->if_multicnt == 0) {
+		ASSERT((ipnetif->if_flags &
+		    (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0);
+		if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) {
+			err = ip_join_allmulti(index, B_FALSE, ipst);
+			if (err != 0)
+				goto done;
+			ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI;
+		}
+		if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) {
+			err = ip_join_allmulti(index, B_TRUE, ipst);
+			if (err != 0 &&
+			    (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) {
+				(void) ip_leave_allmulti(index, B_FALSE, ipst);
+				ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
+				goto done;
+			}
+			ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI;
+		}
+	}
+	ipnetif->if_multicnt++;
+
+done:
+	mutex_exit(&ips->ips_event_lock);
+	return (err);
+}
+
+static void
+ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
+{
+	int		err;
+	ip_stack_t	*ipst = ips->ips_netstack->netstack_ip;
+	uint64_t	index = ipnetif->if_index;
+
+	mutex_enter(&ips->ips_event_lock);
+	ASSERT(ipnetif->if_multicnt != 0);
+	if (--ipnetif->if_multicnt == 0) {
+		if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) {
+			err = ip_leave_allmulti(index, B_FALSE, ipst);
+			ASSERT(err == 0 || err == ENODEV);
+			ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
+		}
+		if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) {
+			err = ip_leave_allmulti(index, B_TRUE, ipst);
+			ASSERT(err == 0 || err == ENODEV);
+			ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI;
+		}
+	}
+	mutex_exit(&ips->ips_event_lock);
+}
+
+static mblk_t *
+ipnet_addheader(ipobs_hook_data_t *ihd, mblk_t *mp)
+{
+	mblk_t		*dlhdr;
+	dl_ipnetinfo_t	*dl;
+
+	if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) {
+		freemsg(mp);
+		return (NULL);
+	}
+	dl = (dl_ipnetinfo_t *)dlhdr->b_rptr;
+	dl->dli_version = DL_IPNETINFO_VERSION;
+	dl->dli_len = htons(sizeof (*dl));
+	dl->dli_ipver = ihd->ihd_ipver;
+	dl->dli_srczone = BE_64((uint64_t)ihd->ihd_zsrc);
+	dl->dli_dstzone = BE_64((uint64_t)ihd->ihd_zdst);
+	dlhdr->b_wptr += sizeof (*dl);
+	dlhdr->b_cont = mp;
+
+	return (dlhdr);
+}
+
+static ipnet_addrtype_t
+ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr)
+{
+	list_t			*list;
+	ipnetif_t		*ipnetif = ipnet->ipnet_if;
+	ipnetif_addr_t		*ifaddr;
+	ipnet_addrtype_t	addrtype = IPNETADDR_UNKNOWN;
+
+	/* First check if the address is multicast or limited broadcast. */
+	switch (addr->iap_family) {
+	case AF_INET:
+		if (CLASSD(*(addr->iap_addr4)) ||
+		    *(addr->iap_addr4) == INADDR_BROADCAST)
+			return (IPNETADDR_MBCAST);
+		break;
+	case AF_INET6:
+		if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6))
+			return (IPNETADDR_MBCAST);
+		break;
+	}
+
+	/*
+	 * Walk the address list to see if the address belongs to our
+	 * interface or is one of our subnet broadcast addresses.
+	 */
+	mutex_enter(&ipnetif->if_addr_lock);
+	list = (addr->iap_family == AF_INET) ?
+	    &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list;
+	for (ifaddr = list_head(list);
+	    ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN;
+	    ifaddr = list_next(list, ifaddr)) {
+		/*
+		 * If we're not in the global zone, then only look at
+		 * addresses in our zone.
+		 */
+		if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
+		    ipnet->ipnet_zoneid != ifaddr->ifa_zone)
+			continue;
+		switch (addr->iap_family) {
+		case AF_INET:
+			if (ifaddr->ifa_ip4addr != INADDR_ANY &&
+			    *(addr->iap_addr4) == ifaddr->ifa_ip4addr)
+				addrtype = IPNETADDR_MYADDR;
+			else if (ifaddr->ifa_brdaddr != INADDR_ANY &&
+			    *(addr->iap_addr4) == ifaddr->ifa_brdaddr)
+				addrtype = IPNETADDR_MBCAST;
+			break;
+		case AF_INET6:
+			if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6,
+			    &ifaddr->ifa_ip6addr))
+				addrtype = IPNETADDR_MYADDR;
+			break;
+		}
+	}
+	mutex_exit(&ipnetif->if_addr_lock);
+
+	return (addrtype);
+}
+
+/*
+ * Verify if the packet contained in ihd should be passed up to the
+ * ipnet client stream.
+ */
+static boolean_t
+ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
+    ipnet_addrp_t *dst)
+{
+	uint64_t		ifindex = ipnet->ipnet_if->if_index;
+	ipnet_addrtype_t	srctype, dsttype;
+
+	srctype = ipnet_get_addrtype(ipnet, src);
+	dsttype = ipnet_get_addrtype(ipnet, dst);
+
+	/*
+	 * Do not allow an ipnet stream to see packets that are not from or to
+	 * its zone.  The exception is when zones are using the shared stack
+	 * model.  In this case, streams in the global zone have visibility
+	 * into other shared-stack zones, and broadcast and multicast traffic
+	 * is visible by all zones in the stack.
+	 */
+	if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
+	    dsttype != IPNETADDR_MBCAST) {
+		if (ipnet->ipnet_zoneid != ihd->ihd_zsrc &&
+		    ipnet->ipnet_zoneid != ihd->ihd_zdst)
+			return (B_FALSE);
+	}
+
+	/*
+	 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the
+	 * packet's IP version.
+	 */
+	if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) &&
+	    ipnet->ipnet_sap != ihd->ihd_ipver)
+		return (B_FALSE);
+
+	/* If the destination address is ours, then accept the packet. */
+	if (dsttype == IPNETADDR_MYADDR)
+		return (B_TRUE);
+
+	/*
+	 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are
+	 * sent or received on the interface we're observing, or packets that
+	 * have our source address (this allows us to see packets we send).
+	 */
+	if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) {
+		if (ihd->ihd_ifindex == ifindex || srctype == IPNETADDR_MYADDR)
+			return (B_TRUE);
+	}
+
+	/*
+	 * We accept multicast and broadcast packets transmitted or received
+	 * on the interface we're observing.
+	 */
+	if (dsttype == IPNETADDR_MBCAST && ihd->ihd_ifindex == ifindex)
+		return (B_TRUE);
+
+	return (B_FALSE);
+}
+
+/*
+ * Verify if the packet contained in ihd should be passed up to the ipnet
+ * client stream that's in IPNET_LOMODE.
+ */
+/* ARGSUSED */
+static boolean_t
+ipnet_loaccept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
+    ipnet_addrp_t *dst)
+{
+	if (ihd->ihd_htype != IPOBS_HOOK_LOCAL)
+		return (B_FALSE);
+
+	/*
+	 * An ipnet stream must not see packets that are not from/to its zone.
+	 */
+	if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) {
+		if (ipnet->ipnet_zoneid != ihd->ihd_zsrc &&
+		    ipnet->ipnet_zoneid != ihd->ihd_zdst)
+			return (B_FALSE);
+	}
+
+	return (ipnet->ipnet_sap == 0 || ipnet->ipnet_sap == ihd->ihd_ipver);
+}
+
+static void
+ipnet_dispatch(void *arg)
+{
+	mblk_t			*mp = arg;
+	ipobs_hook_data_t	*ihd = (ipobs_hook_data_t *)mp->b_rptr;
+	ipnet_t			*ipnet;
+	mblk_t			*netmp;
+	list_t			*list;
+	ipnet_stack_t		*ips = ihd->ihd_stack->netstack_ipnet;
+	ipnet_addrp_t		src, dst;
+
+	if (ihd->ihd_ipver == IPV4_VERSION) {
+		src.iap_family = dst.iap_family = AF_INET;
+		src.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_src;
+		dst.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_dst;
+	} else {
+		src.iap_family = dst.iap_family = AF_INET6;
+		src.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_src;
+		dst.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_dst;
+	}
+
+	ipnet_walkers_inc(ips);
+
+	list = &ips->ips_str_list;
+	for (ipnet = list_head(list); ipnet != NULL;
+	    ipnet = list_next(list, ipnet)) {
+		if (!(*ipnet->ipnet_acceptfn)(ipnet, ihd, &src, &dst))
+			continue;
+
+		if (list_next(list, ipnet) == NULL) {
+			netmp = ihd->ihd_mp;
+			ihd->ihd_mp = NULL;
+		} else {
+			if ((netmp = dupmsg(ihd->ihd_mp)) == NULL &&
+			    (netmp = copymsg(ihd->ihd_mp)) == NULL) {
+				atomic_inc_64(&ips->ips_drops);
+				continue;
+			}
+		}
+
+		if (ipnet->ipnet_flags & IPNET_INFO) {
+			if ((netmp = ipnet_addheader(ihd, netmp)) == NULL) {
+				atomic_inc_64(&ips->ips_drops);
+				continue;
+			}
+		}
+
+		if (ipnet->ipnet_rq->q_first == NULL &&
+		    canputnext(ipnet->ipnet_rq)) {
+			putnext(ipnet->ipnet_rq, netmp);
+		} else if (canput(ipnet->ipnet_rq)) {
+			(void) putq(ipnet->ipnet_rq, netmp);
+		} else {
+			freemsg(netmp);
+			atomic_inc_64(&ips->ips_drops);
+		}
+	}
+
+	ipnet_walkers_dec(ips);
+
+	freemsg(ihd->ihd_mp);
+	freemsg(mp);
+}
+
+static void
+ipnet_input(mblk_t *mp)
+{
+	ipobs_hook_data_t  *ihd = (ipobs_hook_data_t *)mp->b_rptr;
+
+	if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) !=
+	    DDI_SUCCESS) {
+		atomic_inc_64(&ihd->ihd_stack->netstack_ipnet->ips_drops);
+		freemsg(ihd->ihd_mp);
+		freemsg(mp);
+	}
+}
+
+/*
+ * Create a new ipnetif_t and new minor node for it.  If creation is
+ * successful the new ipnetif_t is inserted into an avl_tree
+ * containing ipnetif's for this stack instance.
+ */
+static ipnetif_t *
+ipnet_create_if(const char *name, uint64_t index, ipnet_stack_t *ips)
+{
+	ipnetif_t	*ipnetif;
+	avl_index_t	where = 0;
+	minor_t		ifminor;
+
+	/*
+	 * Because ipnet_create_if() can be called from a NIC event
+	 * callback, it should not block.
+	 */
+	ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space);
+	if (ifminor == (minor_t)-1)
+		return (NULL);
+	if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) {
+		id_free(ipnet_minor_space, ifminor);
+		return (NULL);
+	}
+
+	(void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ);
+	ipnetif->if_index = index;
+
+	mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0);
+	list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t),
+	    offsetof(ipnetif_addr_t, ifa_link));
+	list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t),
+	    offsetof(ipnetif_addr_t, ifa_link));
+	ipnetif->if_dev = makedevice(ipnet_major, ifminor);
+	mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0);
+	ipnetif->if_refcnt = 1;
+
+	mutex_enter(&ips->ips_avl_lock);
+	VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL);
+	avl_insert(&ips->ips_avl_by_index, ipnetif, where);
+	VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL);
+	avl_insert(&ips->ips_avl_by_name, ipnetif, where);
+	mutex_exit(&ips->ips_avl_lock);
+
+	return (ipnetif);
+}
+
+static void
+ipnet_remove_if(ipnetif_t *ipnetif, ipnet_stack_t *ips)
+{
+	ipnet_t	*ipnet;
+
+	ipnet_walkers_inc(ips);
+	/* Send a SIGHUP to all open streams associated with this ipnetif. */
+	for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL;
+	    ipnet = list_next(&ips->ips_str_list, ipnet)) {
+		if (ipnet->ipnet_if == ipnetif)
+			(void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
+	}
+	ipnet_walkers_dec(ips);
+	mutex_enter(&ips->ips_avl_lock);
+	avl_remove(&ips->ips_avl_by_index, ipnetif);
+	avl_remove(&ips->ips_avl_by_name, ipnetif);
+	mutex_exit(&ips->ips_avl_lock);
+	/* Release the reference we implicitly held in ipnet_create_if(). */
+	ipnetif_refrele(ipnetif);
+}
+
+static void
+ipnet_purge_addrlist(list_t *addrlist)
+{
+	ipnetif_addr_t *ifa;
+
+	while ((ifa = list_head(addrlist)) != NULL) {
+		list_remove(addrlist, ifa);
+		kmem_free(ifa, sizeof (*ifa));
+	}
+}
+
+static void
+ipnet_free_if(ipnetif_t *ipnetif)
+{
+	ASSERT(ipnetif->if_refcnt == 0);
+
+	/* Remove IPv4/v6 address lists from the ipnetif */
+	ipnet_purge_addrlist(&ipnetif->if_ip4addr_list);
+	list_destroy(&ipnetif->if_ip4addr_list);
+	ipnet_purge_addrlist(&ipnetif->if_ip6addr_list);
+	list_destroy(&ipnetif->if_ip6addr_list);
+	mutex_destroy(&ipnetif->if_addr_lock);
+	mutex_destroy(&ipnetif->if_reflock);
+	id_free(ipnet_minor_space, getminor(ipnetif->if_dev));
+	kmem_free(ipnetif, sizeof (*ipnetif));
+}
+
+/*
+ * Create an ipnetif_addr_t with the given logical interface id (lif)
+ * and add it to the supplied ipnetif.  The lif is the netinfo
+ * representation of logical interface id, and we use this id to match
+ * incoming netinfo events against our lists of addresses.
+ */
+static void
+ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd)
+{
+	ipnetif_addr_t		*ifaddr;
+	zoneid_t		zoneid;
+	struct sockaddr_in	bcast;
+	struct sockaddr_storage	addr;
+	net_ifaddr_t		type = NA_ADDRESS;
+	uint64_t		phyif = ipnetif->if_index;
+
+	if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 ||
+	    net_getlifzone(nd, phyif, lif, &zoneid) != 0)
+		return;
+	if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL)
+		return;
+
+	ifaddr->ifa_zone = zoneid;
+	ifaddr->ifa_id = lif;
+
+	switch (addr.ss_family) {
+	case AF_INET:
+		ifaddr->ifa_ip4addr =
+		    ((struct sockaddr_in *)&addr)->sin_addr.s_addr;
+		/*
+		 * Try and get the broadcast address.  Note that it's okay for
+		 * an interface to not have a broadcast address, so we don't
+		 * fail the entire operation if net_getlifaddr() fails here.
+		 */
+		type = NA_BROADCAST;
+		if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0)
+			ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr;
+		break;
+	case AF_INET6:
+		ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr;
+		break;
+	}
+
+	mutex_enter(&ipnetif->if_addr_lock);
+	list_insert_tail(addr.ss_family == AF_INET ?
+	    &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr);
+	mutex_exit(&ipnetif->if_addr_lock);
+}
+
+static void
+ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6)
+{
+	mutex_enter(&ipnetif->if_addr_lock);
+	list_remove(isv6 ?
+	    &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr);
+	mutex_exit(&ipnetif->if_addr_lock);
+	kmem_free(ifaddr, sizeof (*ifaddr));
+}
+
+static void
+ipnet_plumb_ev(uint64_t ifindex, const char *ifname, ipnet_stack_t *ips,
+    boolean_t isv6)
+{
+	ipnetif_t	*ipnetif;
+	boolean_t	refrele_needed = B_TRUE;
+
+	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) {
+		ipnetif = ipnet_create_if(ifname, ifindex, ips);
+		refrele_needed = B_FALSE;
+	}
+	if (ipnetif != NULL) {
+		ipnetif->if_flags |=
+		    isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
+	}
+
+	if (ipnetif->if_multicnt != 0) {
+		if (ip_join_allmulti(ifindex, isv6,
+		    ips->ips_netstack->netstack_ip) == 0) {
+			ipnetif->if_flags |=
+			    isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI;
+		}
+	}
+
+	if (refrele_needed)
+		ipnetif_refrele(ipnetif);
+}
+
+static void
+ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6)
+{
+	ipnetif_t	*ipnetif;
+
+	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
+		return;
+
+	mutex_enter(&ipnetif->if_addr_lock);
+	ipnet_purge_addrlist(isv6 ?
+	    &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list);
+	mutex_exit(&ipnetif->if_addr_lock);
+
+	/*
+	 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive
+	 * separate NE_UNPLUMB events for IPv4 and IPv6.  We remove the ipnetif
+	 * if both IPv4 and IPv6 interfaces have been unplumbed.
+	 */
+	ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED;
+	if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED)))
+		ipnet_remove_if(ipnetif, ips);
+	ipnetif_refrele(ipnetif);
+}
+
+static void
+ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd,
+    ipnet_stack_t *ips, boolean_t isv6)
+{
+	ipnetif_t	*ipnetif;
+	ipnetif_addr_t	*ifaddr;
+
+	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
+		return;
+	if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) {
+		/*
+		 * We must have missed a NE_LIF_DOWN event.  Delete this
+		 * ifaddr and re-create it.
+		 */
+		ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
+	}
+
+	ipnet_add_ifaddr(lifindex, ipnetif, nd);
+	ipnetif_refrele(ipnetif);
+}
+
+static void
+ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips,
+    boolean_t isv6)
+{
+	ipnetif_t	*ipnetif;
+	ipnetif_addr_t	*ifaddr;
+
+	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
+		return;
+	if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL)
+		ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
+	ipnetif_refrele(ipnetif);
+	/*
+	 * Make sure that open streams on this ipnetif are still allowed to
+	 * have it open.
+	 */
+	ipnet_if_zonecheck(ipnetif, ips);
+}
+
+/*
+ * This callback from the NIC event framework dispatches a taskq as the event
+ * handlers may block.
+ */
+/* ARGSUSED */
+static int
+ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg)
+{
+	ipnet_stack_t		*ips = arg;
+	hook_nic_event_t	*hn = (hook_nic_event_t *)info;
+	ipnet_nicevent_t	*ipne;
+
+	if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL)
+		return (0);
+	ipne->ipne_event = hn->hne_event;
+	ipne->ipne_protocol = hn->hne_protocol;
+	ipne->ipne_stackid = ips->ips_netstack->netstack_stackid;
+	ipne->ipne_ifindex = hn->hne_nic;
+	ipne->ipne_lifindex = hn->hne_lif;
+	if (hn->hne_datalen != 0) {
+		(void) strlcpy(ipne->ipne_ifname, hn->hne_data,
+		    sizeof (ipne->ipne_ifname));
+	}
+	(void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task,
+	    ipne, DDI_NOSLEEP);
+	return (0);
+}
+
+static void
+ipnet_nicevent_task(void *arg)
+{
+	ipnet_nicevent_t	*ipne = arg;
+	netstack_t		*ns;
+	ipnet_stack_t		*ips;
+	boolean_t		isv6;
+
+	if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL)
+		goto done;
+	ips = ns->netstack_ipnet;
+	isv6 = (ipne->ipne_protocol == ips->ips_ndv6);
+
+	mutex_enter(&ips->ips_event_lock);
+	switch (ipne->ipne_event) {
+	case NE_PLUMB:
+		ipnet_plumb_ev(ipne->ipne_ifindex, ipne->ipne_ifname, ips,
+		    isv6);
+		break;
+	case NE_UNPLUMB:
+		ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6);
+		break;
+	case NE_LIF_UP:
+		ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex,
+		    ipne->ipne_protocol, ips, isv6);
+		break;
+	case NE_LIF_DOWN:
+		ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips,
+		    isv6);
+		break;
+	default:
+		break;
+	}
+	mutex_exit(&ips->ips_event_lock);
+done:
+	if (ns != NULL)
+		netstack_rele(ns);
+	kmem_free(ipne, sizeof (ipnet_nicevent_t));
+}
+
+dev_t
+ipnet_if_getdev(char *name, zoneid_t zoneid)
+{
+	netstack_t	*ns;
+	ipnet_stack_t	*ips;
+	ipnetif_t	*ipnetif;
+	dev_t		dev = (dev_t)-1;
+
+	if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
+		return (dev);
+	if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
+		return (dev);
+
+	ips = ns->netstack_ipnet;
+	mutex_enter(&ips->ips_avl_lock);
+	if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) {
+		if (ipnet_if_in_zone(ipnetif, zoneid, ips))
+			dev = ipnetif->if_dev;
+	}
+	mutex_exit(&ips->ips_avl_lock);
+	netstack_rele(ns);
+
+	return (dev);
+}
+
+static ipnetif_t *
+ipnet_if_getby_index(uint64_t id, ipnet_stack_t *ips)
+{
+	ipnetif_t	*ipnetif;
+
+	mutex_enter(&ips->ips_avl_lock);
+	if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL)
+		ipnetif_refhold(ipnetif);
+	mutex_exit(&ips->ips_avl_lock);
+	return (ipnetif);
+}
+
+static ipnetif_t *
+ipnet_if_getby_dev(dev_t dev, ipnet_stack_t *ips)
+{
+	ipnetif_t	*ipnetif;
+	avl_tree_t	*tree;
+
+	mutex_enter(&ips->ips_avl_lock);
+	tree = &ips->ips_avl_by_index;
+	for (ipnetif = avl_first(tree); ipnetif != NULL;
+	    ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) {
+		if (ipnetif->if_dev == dev) {
+			ipnetif_refhold(ipnetif);
+			break;
+		}
+	}
+	mutex_exit(&ips->ips_avl_lock);
+	return (ipnetif);
+}
+
+static ipnetif_addr_t *
+ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6)
+{
+	ipnetif_addr_t	*ifaddr;
+	list_t		*list;
+
+	mutex_enter(&ipnetif->if_addr_lock);
+	list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list;
+	for (ifaddr = list_head(list); ifaddr != NULL;
+	    ifaddr = list_next(list, ifaddr)) {
+		if (lid == ifaddr->ifa_id)
+			break;
+	}
+	mutex_exit(&ipnetif->if_addr_lock);
+	return (ifaddr);
+}
+
+/* ARGSUSED */
+static void *
+ipnet_stack_init(netstackid_t stackid, netstack_t *ns)
+{
+	ipnet_stack_t	*ips;
+
+	ips = kmem_zalloc(sizeof (*ips), KM_SLEEP);
+	ips->ips_netstack = ns;
+	mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0);
+	avl_create(&ips->ips_avl_by_index, ipnet_if_compare_index,
+	    sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index));
+	avl_create(&ips->ips_avl_by_name, ipnet_if_compare_name,
+	    sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name));
+	mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL);
+	list_create(&ips->ips_str_list, sizeof (ipnet_t),
+	    offsetof(ipnet_t, ipnet_next));
+	ipnet_register_netihook(ips);
+	return (ips);
+}
+
+/* ARGSUSED */
+static void
+ipnet_stack_fini(netstackid_t stackid, void *arg)
+{
+	ipnet_stack_t	*ips = arg;
+	ipnetif_t	*ipnetif, *nipnetif;
+
+	if (ips->ips_ndv4 != NULL) {
+		VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS,
+		    ips->ips_nicevents) == 0);
+		VERIFY(net_protocol_release(ips->ips_ndv4) == 0);
+	}
+	if (ips->ips_ndv6 != NULL) {
+		VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS,
+		    ips->ips_nicevents) == 0);
+		VERIFY(net_protocol_release(ips->ips_ndv6) == 0);
+	}
+	hook_free(ips->ips_nicevents);
+
+	for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
+	    ipnetif = nipnetif) {
+		nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif);
+		ipnet_remove_if(ipnetif, ips);
+	}
+	avl_destroy(&ips->ips_avl_by_index);
+	avl_destroy(&ips->ips_avl_by_name);
+	mutex_destroy(&ips->ips_avl_lock);
+	mutex_destroy(&ips->ips_walkers_lock);
+	cv_destroy(&ips->ips_walkers_cv);
+	list_destroy(&ips->ips_str_list);
+	kmem_free(ips, sizeof (*ips));
+}
+
+/* Do any of the addresses in addrlist belong the supplied zoneid? */
+static boolean_t
+ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid)
+{
+	ipnetif_addr_t *ifa;
+
+	for (ifa = list_head(addrlist); ifa != NULL;
+	    ifa = list_next(addrlist, ifa)) {
+		if (ifa->ifa_zone == zoneid)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+/* Should the supplied ipnetif be visible from the supplied zoneid? */
+static boolean_t
+ipnet_if_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips)
+{
+	int ret;
+
+	/*
+	 * The global zone has visibility into all interfaces in the global
+	 * stack, and exclusive stack zones have visibility into all
+	 * interfaces in their stack.
+	 */
+	if (zoneid == GLOBAL_ZONEID ||
+	    ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
+		return (B_TRUE);
+
+	/*
+	 * Shared-stack zones only have visibility for interfaces that have
+	 * addresses in their zone.
+	 */
+	mutex_enter(&ipnetif->if_addr_lock);
+	ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) ||
+	    ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid);
+	mutex_exit(&ipnetif->if_addr_lock);
+	return (ret);
+}
+
+/*
+ * Verify that any ipnet_t that has a reference to the supplied ipnetif should
+ * still be allowed to have it open.  A given ipnet_t may no longer be allowed
+ * to have an ipnetif open if there are no longer any addresses that belong to
+ * the ipnetif in the ipnet_t's non-global shared-stack zoneid.  If that's the
+ * case, send the ipnet_t an M_HANGUP.
+ */
+static void
+ipnet_if_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
+{
+	list_t	*strlist = &ips->ips_str_list;
+	ipnet_t	*ipnet;
+
+	ipnet_walkers_inc(ips);
+	for (ipnet = list_head(strlist); ipnet != NULL;
+	    ipnet = list_next(strlist, ipnet)) {
+		if (ipnet->ipnet_if != ipnetif)
+			continue;
+		if (!ipnet_if_in_zone(ipnetif, ipnet->ipnet_zoneid, ips))
+			(void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
+	}
+	ipnet_walkers_dec(ips);
+}
+
+void
+ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid)
+{
+	ipnetif_t 		*ipnetif;
+	list_t			cbdata;
+	ipnetif_cbdata_t	*cbnode;
+	netstack_t		*ns;
+	ipnet_stack_t		*ips;
+
+	/*
+	 * On labeled systems, non-global zones shouldn't see anything
+	 * in /dev/ipnet.
+	 */
+	if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
+		return;
+
+	if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
+		return;
+
+	ips = ns->netstack_ipnet;
+	list_create(&cbdata, sizeof (ipnetif_cbdata_t),
+	    offsetof(ipnetif_cbdata_t, ic_next));
+
+	mutex_enter(&ips->ips_avl_lock);
+	for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
+	    ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) {
+		if (!ipnet_if_in_zone(ipnetif, zoneid, ips))
+			continue;
+		cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP);
+		(void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ);
+		cbnode->ic_dev = ipnetif->if_dev;
+		list_insert_head(&cbdata, cbnode);
+	}
+	mutex_exit(&ips->ips_avl_lock);
+
+	while ((cbnode = list_head(&cbdata)) != NULL) {
+		cb(cbnode->ic_ifname, arg, cbnode->ic_dev);
+		list_remove(&cbdata, cbnode);
+		kmem_free(cbnode, sizeof (ipnetif_cbdata_t));
+	}
+	list_destroy(&cbdata);
+	netstack_rele(ns);
+}
+
+static int
+ipnet_if_compare_index(const void *index_ptr, const void *ipnetifp)
+{
+	int64_t index1 = *((int64_t *)index_ptr);
+	int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index;
+
+	return (SIGNOF(index2 - index1));
+}
+
+static int
+ipnet_if_compare_name(const void *name_ptr, const void *ipnetifp)
+{
+	int res;
+
+	res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr);
+	return (SIGNOF(res));
+}
+
+static void
+ipnetif_refhold(ipnetif_t *ipnetif)
+{
+	mutex_enter(&ipnetif->if_reflock);
+	ipnetif->if_refcnt++;
+	mutex_exit(&ipnetif->if_reflock);
+}
+
+static void
+ipnetif_refrele(ipnetif_t *ipnetif)
+{
+	mutex_enter(&ipnetif->if_reflock);
+	ASSERT(ipnetif->if_refcnt != 0);
+	if (--ipnetif->if_refcnt == 0)
+		ipnet_free_if(ipnetif);
+	else
+		mutex_exit(&ipnetif->if_reflock);
+}
+
+static void
+ipnet_walkers_inc(ipnet_stack_t *ips)
+{
+	mutex_enter(&ips->ips_walkers_lock);
+	ips->ips_walkers_cnt++;
+	mutex_exit(&ips->ips_walkers_lock);
+}
+
+static void
+ipnet_walkers_dec(ipnet_stack_t *ips)
+{
+	mutex_enter(&ips->ips_walkers_lock);
+	ASSERT(ips->ips_walkers_cnt != 0);
+	if (--ips->ips_walkers_cnt == 0)
+		cv_broadcast(&ips->ips_walkers_cv);
+	mutex_exit(&ips->ips_walkers_lock);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/inet/ipnet/ipnet.conf	Thu Nov 06 06:47:54 2008 -0500
@@ -0,0 +1,26 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+name="ipnet" parent="pseudo" instance=0;
--- a/usr/src/uts/common/inet/tcp.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/tcp.h	Thu Nov 06 06:47:54 2008 -0500
@@ -292,7 +292,8 @@
 		tcp_cork : 1,		/* tcp_cork option */
 		tcp_tconnind_started : 1, /* conn_ind message is being sent */
 		tcp_lso :1,		/* Lower layer is capable of LSO */
-		tcp_pad_to_bit_31 : 17;
+		tcp_refuse :1,		/* Connection needs refusing */
+		tcp_pad_to_bit_31 : 16;
 
 	uint32_t	tcp_if_mtu;	/* Outgoing interface MTU. */
 
@@ -393,6 +394,10 @@
 	int	tcp_ip_hdr_len;		/* Byte len of our current IPvx hdr */
 	tcph_t	*tcp_tcph;		/* tcp header within combined hdr */
 	int32_t	tcp_tcp_hdr_len;	/* tcp header len within combined */
+	/* Saved peer headers in the case of re-fusion */
+	ipha_t	tcp_saved_ipha;
+	ip6_t	tcp_saved_ip6h;
+	tcph_t	tcp_saved_tcph;
 
 	uint32_t tcp_sum;		/* checksum to compensate for source */
 					/* routed packets. Host byte order */
--- a/usr/src/uts/common/inet/tcp/tcp.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/tcp/tcp.c	Thu Nov 06 06:47:54 2008 -0500
@@ -8019,6 +8019,7 @@
 	tcp->tcp_zero_win_probe = 0;
 
 	tcp->tcp_loopback = 0;
+	tcp->tcp_refuse = 0;
 	tcp->tcp_localnet = 0;
 	tcp->tcp_syn_defense = 0;
 	tcp->tcp_set_timer = 0;
@@ -17866,6 +17867,7 @@
 	tcp_t		*tcp = connp->conn_tcp;
 	uint32_t	msize;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
 
 	/*
 	 * Try and ASSERT the minimum possible references on the
@@ -17886,6 +17888,16 @@
 	tcp->tcp_squeue_bytes -= msize;
 	mutex_exit(&tcp->tcp_non_sq_lock);
 
+	/* Check to see if this connection wants to be re-fused. */
+	if (tcp->tcp_refuse && !ipst->ips_ipobs_enabled) {
+		if (tcp->tcp_ipversion == IPV4_VERSION) {
+			tcp_fuse(tcp, (uchar_t *)&tcp->tcp_saved_ipha,
+			    &tcp->tcp_saved_tcph);
+		} else {
+			tcp_fuse(tcp, (uchar_t *)&tcp->tcp_saved_ip6h,
+			    &tcp->tcp_saved_tcph);
+		}
+	}
 	/* Bypass tcp protocol for fused tcp loopback */
 	if (tcp->tcp_fused && tcp_fuse_output(tcp, mp, msize))
 		return;
@@ -19462,7 +19474,7 @@
 		 * depending on the availability of transmit resources at
 		 * the media layer.
 		 */
-		IP_DLS_ILL_TX(ill, ipha, mp, ipst);
+		IP_DLS_ILL_TX(ill, ipha, mp, ipst, ire_fp_mp_len);
 	} else {
 		ill_t *out_ill = (ill_t *)ire->ire_stq->q_ptr;
 		DTRACE_PROBE4(ip4__physical__out__start,
@@ -19474,6 +19486,12 @@
 		DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp);
 
 		if (mp != NULL) {
+			if (ipst->ips_ipobs_enabled) {
+				ipobs_hook(mp, IPOBS_HOOK_OUTBOUND,
+				    IP_REAL_ZONEID(connp->conn_zoneid, ipst),
+				    ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len,
+				    ipst);
+			}
 			DTRACE_IP_FASTPATH(mp, ipha, out_ill, ipha, NULL);
 			putnext(ire->ire_stq, mp);
 		}
@@ -21258,6 +21276,24 @@
 		atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, obsegs);
 	ire->ire_last_used_time = lbolt;
 
+	if (ipst->ips_ipobs_enabled) {
+		multidata_t *dlmdp = mmd_getmultidata(md_mp_head);
+		pdesc_t *dl_pkt;
+		pdescinfo_t pinfo;
+		mblk_t *nmp;
+		zoneid_t szone = tcp->tcp_connp->conn_zoneid;
+
+		for (dl_pkt = mmd_getfirstpdesc(dlmdp, &pinfo);
+		    (dl_pkt != NULL);
+		    dl_pkt = mmd_getnextpdesc(dl_pkt, &pinfo)) {
+			if ((nmp = mmd_transform_link(dl_pkt)) == NULL)
+				continue;
+			ipobs_hook(nmp, IPOBS_HOOK_OUTBOUND, szone,
+			    ALL_ZONES, ill, tcp->tcp_ipversion, 0, ipst);
+			freemsg(nmp);
+		}
+	}
+
 	/* send it down */
 	if (ILL_DLS_CAPABLE(ill)) {
 		ill_dls_capab_t *ill_dls = ill->ill_dls_capab;
@@ -21437,7 +21473,7 @@
 		 * depending on the availability of transmit resources at
 		 * the media layer.
 		 */
-		IP_DLS_ILL_TX(ill, ipha, mp, ipst);
+		IP_DLS_ILL_TX(ill, ipha, mp, ipst, ire_fp_mp_len);
 	} else {
 		ill_t *out_ill = (ill_t *)ire->ire_stq->q_ptr;
 		DTRACE_PROBE4(ip4__physical__out__start,
@@ -21449,6 +21485,13 @@
 		DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp);
 
 		if (mp != NULL) {
+			if (ipst->ips_ipobs_enabled) {
+				zoneid_t szone = tcp->tcp_connp->conn_zoneid;
+
+				ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
+				    ALL_ZONES, ill, tcp->tcp_ipversion,
+				    ire_fp_mp_len, ipst);
+			}
 			DTRACE_IP_FASTPATH(mp, ipha, out_ill, ipha, NULL);
 			putnext(ire->ire_stq, mp);
 		}
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c	Thu Nov 06 06:47:54 2008 -0500
@@ -202,9 +202,12 @@
 	 * around until tcp_accept_finish() is called on this eager --
 	 * this won't happen until we're done since we're inside the
 	 * eager's perimeter now.
+	 *
+	 * We can also get called in the case were a connection needs
+	 * to be re-fused. In this case tcp_saved_listener will be
+	 * NULL but tcp_refuse will be true.
 	 */
-	ASSERT(tcp->tcp_saved_listener != NULL);
-
+	ASSERT(tcp->tcp_saved_listener != NULL || tcp->tcp_refuse);
 	/*
 	 * Lookup peer endpoint; search for the remote endpoint having
 	 * the reversed address-port quadruplet in ESTABLISHED state,
@@ -329,36 +332,43 @@
 		 * inherit the listener's q_hiwat value; this is temporary
 		 * since we'll repeat the process in tcp_accept_finish().
 		 */
-		(void) tcp_fuse_set_rcv_hiwat(tcp,
-		    tcp->tcp_saved_listener->tcp_rq->q_hiwat);
+		if (!tcp->tcp_refuse) {
+			(void) tcp_fuse_set_rcv_hiwat(tcp,
+			    tcp->tcp_saved_listener->tcp_rq->q_hiwat);
 
-		/*
-		 * Set the stream head's write offset value to zero since we
-		 * won't be needing any room for TCP/IP headers; tell it to
-		 * not break up the writes (this would reduce the amount of
-		 * work done by kmem); and configure our receive buffer.
-		 * Note that we can only do this for the active connect tcp
-		 * since our eager is still detached; it will be dealt with
-		 * later in tcp_accept_finish().
-		 */
-		DB_TYPE(mp) = M_SETOPTS;
-		mp->b_wptr += sizeof (*stropt);
+			/*
+			 * Set the stream head's write offset value to zero
+			 * since we won't be needing any room for TCP/IP
+			 * headers; tell it to not break up the writes (this
+			 * would reduce the amount of work done by kmem); and
+			 * configure our receive buffer. Note that we can only
+			 * do this for the active connect tcp since our eager
+			 * is still detached; it will be dealt with later in
+			 * tcp_accept_finish().
+			 */
+			DB_TYPE(mp) = M_SETOPTS;
+			mp->b_wptr += sizeof (*stropt);
 
-		stropt = (struct stroptions *)mp->b_rptr;
-		stropt->so_flags = SO_MAXBLK | SO_WROFF | SO_HIWAT;
-		stropt->so_maxblk = tcp_maxpsz_set(peer_tcp, B_FALSE);
-		stropt->so_wroff = 0;
+			stropt = (struct stroptions *)mp->b_rptr;
+			stropt->so_flags = SO_MAXBLK | SO_WROFF | SO_HIWAT;
+			stropt->so_maxblk = tcp_maxpsz_set(peer_tcp, B_FALSE);
+			stropt->so_wroff = 0;
 
-		/*
-		 * Record the stream head's high water mark for
-		 * peer endpoint; this is used for flow-control
-		 * purposes in tcp_fuse_output().
-		 */
-		stropt->so_hiwat = tcp_fuse_set_rcv_hiwat(peer_tcp,
-		    peer_rq->q_hiwat);
+			/*
+			 * Record the stream head's high water mark for
+			 * peer endpoint; this is used for flow-control
+			 * purposes in tcp_fuse_output().
+			 */
+			stropt->so_hiwat = tcp_fuse_set_rcv_hiwat(peer_tcp,
+			    peer_rq->q_hiwat);
 
-		/* Send the options up */
-		putnext(peer_rq, mp);
+			tcp->tcp_refuse = B_FALSE;
+			peer_tcp->tcp_refuse = B_FALSE;
+			/* Send the options up */
+			putnext(peer_rq, mp);
+		}
+		tcp->tcp_refuse = B_FALSE;
+		peer_tcp->tcp_refuse = B_FALSE;
 	} else {
 		TCP_STAT(tcps, tcp_fusion_unqualified);
 	}
@@ -410,6 +420,10 @@
 	/* Unfuse the endpoints */
 	peer_tcp->tcp_fused = tcp->tcp_fused = B_FALSE;
 	peer_tcp->tcp_loopback_peer = tcp->tcp_loopback_peer = NULL;
+	freeb(peer_tcp->tcp_fused_sigurg_mp);
+	freeb(tcp->tcp_fused_sigurg_mp);
+	peer_tcp->tcp_fused_sigurg_mp = NULL;
+	tcp->tcp_fused_sigurg_mp = NULL;
 }
 
 /*
@@ -536,8 +550,27 @@
 	/* If this connection requires IP, unfuse and use regular path */
 	if (tcp_loopback_needs_ip(tcp, ns) ||
 	    tcp_loopback_needs_ip(peer_tcp, ns) ||
-	    IPP_ENABLED(IPP_LOCAL_OUT|IPP_LOCAL_IN, ipst)) {
+	    IPP_ENABLED(IPP_LOCAL_OUT|IPP_LOCAL_IN, ipst) ||
+	    list_head(&ipst->ips_ipobs_cb_list) != NULL) {
 		TCP_STAT(tcps, tcp_fusion_aborted);
+		tcp->tcp_refuse = B_TRUE;
+		peer_tcp->tcp_refuse = B_TRUE;
+
+		bcopy(peer_tcp->tcp_tcph, &tcp->tcp_saved_tcph,
+		    sizeof (tcph_t));
+		bcopy(tcp->tcp_tcph, &peer_tcp->tcp_saved_tcph,
+		    sizeof (tcph_t));
+		if (tcp->tcp_ipversion == IPV4_VERSION) {
+			bcopy(peer_tcp->tcp_ipha, &tcp->tcp_saved_ipha,
+			    sizeof (ipha_t));
+			bcopy(tcp->tcp_ipha, &peer_tcp->tcp_saved_ipha,
+			    sizeof (ipha_t));
+		} else {
+			bcopy(peer_tcp->tcp_ip6h, &tcp->tcp_saved_ip6h,
+			    sizeof (ip6_t));
+			bcopy(tcp->tcp_ip6h, &peer_tcp->tcp_saved_ip6h,
+			    sizeof (ip6_t));
+		}
 		goto unfuse;
 	}
 
--- a/usr/src/uts/common/inet/udp/udp.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/inet/udp/udp.c	Thu Nov 06 06:47:54 2008 -0500
@@ -24,8 +24,6 @@
  */
 /* Copyright (c) 1990 Mentat Inc. */
 
-const char udp_version[] = "@(#)udp.c	1.206	08/10/17 SMI";
-
 #include <sys/types.h>
 #include <sys/stream.h>
 #include <sys/dlpi.h>
@@ -80,6 +78,7 @@
 #include <inet/ipclassifier.h>
 #include <inet/ipsec_impl.h>
 #include <inet/ipp_common.h>
+#include <inet/ipnet.h>
 
 /*
  * The ipsec_info.h header file is here since it has the definition for the
@@ -6341,7 +6340,7 @@
 		 * depending on the availability of transmit resources at
 		 * the media layer.
 		 */
-		IP_DLS_ILL_TX(ill, ipha, mp, ipst);
+		IP_DLS_ILL_TX(ill, ipha, mp, ipst, ire_fp_mp_len);
 	} else {
 		DTRACE_PROBE4(ip4__physical__out__start,
 		    ill_t *, NULL, ill_t *, ill,
@@ -6351,13 +6350,18 @@
 		    NULL, ill, ipha, mp, mp, ll_multicast, ipst);
 		DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp);
 		if (mp != NULL) {
+			if (ipst->ips_ipobs_enabled) {
+				ipobs_hook(mp, IPOBS_HOOK_OUTBOUND,
+				    IP_REAL_ZONEID(connp->conn_zoneid, ipst),
+				    ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len,
+				    ipst);
+			}
 			DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL,
 			    void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill,
 			    ipha_t *, ipha, ip6_t *, NULL, int, 0);
 			putnext(ire->ire_stq, mp);
 		}
 	}
-
 	IRE_REFRELE(ire);
 }
 
--- a/usr/src/uts/common/io/neti_impl.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/io/neti_impl.c	Thu Nov 06 06:47:54 2008 -0500
@@ -262,6 +262,32 @@
 	    nelem, type, storage));
 }
 
+int
+net_getlifzone(net_handle_t info, phy_if_t phy_ifdata, lif_if_t ifdata,
+    zoneid_t *zoneid)
+{
+	ASSERT(info != NULL);
+
+	if (info->netd_condemned != 0 || info->netd_stack == NULL)
+		return (-1);
+
+	return (info->netd_info.neti_getlifzone(info, phy_ifdata, ifdata,
+	    zoneid));
+}
+
+int
+net_getlifflags(net_handle_t info, phy_if_t phy_ifdata, lif_if_t ifdata,
+    uint64_t *flags)
+{
+	ASSERT(info != NULL);
+
+	if (info->netd_condemned != 0 || info->netd_stack == NULL)
+		return (-1);
+
+	return (info->netd_info.neti_getlifflags(info, phy_ifdata, ifdata,
+	    flags));
+}
+
 phy_if_t
 net_phygetnext(net_handle_t info, phy_if_t nic)
 {
--- a/usr/src/uts/common/io/sundlpi.c	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/io/sundlpi.c	Thu Nov 06 06:47:54 2008 -0500
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  *  Common Sun DLPI routines.
  */
@@ -583,6 +581,7 @@
 	case DL_IPV4:		return ("IPv4 Tunnel");
 	case DL_IPV6:		return ("IPv6 Tunnel");
 	case DL_WIFI:		return ("IEEE 802.11");
+	case DL_IPNET:		return ("IPNET");
 	default:		return ("<unknown mactype>");
 	}
 }
--- a/usr/src/uts/common/os/priv_defs	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/os/priv_defs	Thu Nov 06 06:47:54 2008 -0500
@@ -223,6 +223,11 @@
 	This privilege is interpreted only if the system is configured
 	with Trusted Extensions.
 
+privilege PRIV_NET_OBSERVABILITY
+
+	Allows a process to access /dev/lo0 and the devices in /dev/ipnet/
+	while not requiring them to need PRIV_NET_RAWACCESS.
+
 privilege PRIV_NET_PRIVADDR
 
 	Allows a process to bind to a privileged port
--- a/usr/src/uts/common/sys/dlpi.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/sys/dlpi.h	Thu Nov 06 06:47:54 2008 -0500
@@ -35,11 +35,8 @@
 #ifndef	_SYS_DLPI_H
 #define	_SYS_DLPI_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 1.2	*/
-
 #include <sys/types.h>
 #include <sys/stream.h>
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -51,9 +48,20 @@
 #define	DLIOCRAW	(DLIOC|1)	/* M_DATA "raw" mode */
 #define	DLIOCNATIVE	(DLIOC|2)	/* Native traffic mode */
 #define	DLIOCMARGININFO	(DLIOC|3)	/* margin size info */
+#define	DLIOCIPNETINFO	(DLIOC|4)	/* ipnet header */
 #define	DLIOCHDRINFO	(DLIOC|10)	/* IP fast-path */
+#define	DL_IOC_HDR_INFO	DLIOCHDRINFO
+
+#define	DL_IPNETINFO_VERSION	0x1
 
-#define	DL_IOC_HDR_INFO	DLIOCHDRINFO
+typedef struct dl_ipnetinfo {
+	uint8_t		dli_version;	/* DL_IPNETINFO_* version */
+	uint8_t		dli_ipver;	/* packet IP header version */
+	uint16_t	dli_len;	/* length of dl_ipnetinfo_t */
+	uint32_t	dli_pad;	/* alignment pad */
+	uint64_t	dli_srczone; 	/* packet source zone ID (if any) */
+	uint64_t	dli_dstzone;	/* packet dest zone ID (if any) */
+} dl_ipnetinfo_t;
 
 /*
  * DLPI revision definition history
@@ -264,7 +272,7 @@
 #define	DL_IPV6		0x80000002ul	/* IPv6 Tunnel Link */
 #define	SUNW_DL_VNI	0x80000003ul	/* Virtual network interface */
 #define	DL_WIFI		0x80000004ul	/* IEEE 802.11 */
-
+#define	DL_IPNET	0x80000005ul	/* ipnet(7D) link */
 /*
  * DLPI provider service supported.
  * These must be allowed to be bitwise-OR for dl_service_mode in
--- a/usr/src/uts/common/sys/fs/sdev_impl.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/sys/fs/sdev_impl.h	Thu Nov 06 06:47:54 2008 -0500
@@ -271,6 +271,7 @@
 #define	SDEV_VTOR_INVALID	-1
 #define	SDEV_VTOR_SKIP		0
 #define	SDEV_VTOR_VALID		1
+#define	SDEV_VTOR_STALE		2
 
 /* convenient macros */
 #define	SDEV_IS_GLOBAL(dv)	\
@@ -626,6 +627,7 @@
 extern void prof_filldir(struct sdev_node *);
 extern int devpts_validate(struct sdev_node *dv);
 extern int devnet_validate(struct sdev_node *dv);
+extern int devipnet_validate(struct sdev_node *dv);
 extern int devvt_validate(struct sdev_node *dv);
 extern void *sdev_get_vtor(struct sdev_node *dv);
 
@@ -657,11 +659,13 @@
 extern struct vnodeops		*sdev_vnodeops;
 extern struct vnodeops		*devpts_vnodeops;
 extern struct vnodeops		*devnet_vnodeops;
+extern struct vnodeops		*devipnet_vnodeops;
 extern struct vnodeops		*devvt_vnodeops;
 extern struct sdev_data *sdev_origins; /* mount info for global /dev instance */
 extern const fs_operation_def_t	sdev_vnodeops_tbl[];
 extern const fs_operation_def_t	devpts_vnodeops_tbl[];
 extern const fs_operation_def_t	devnet_vnodeops_tbl[];
+extern const fs_operation_def_t devipnet_vnodeops_tbl[];
 extern const fs_operation_def_t	devvt_vnodeops_tbl[];
 extern const fs_operation_def_t	devsys_vnodeops_tbl[];
 extern const fs_operation_def_t	devpseudo_vnodeops_tbl[];
--- a/usr/src/uts/common/sys/hook_event.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/sys/hook_event.h	Thu Nov 06 06:47:54 2008 -0500
@@ -74,7 +74,9 @@
 	NE_UNPLUMB,
 	NE_UP,
 	NE_DOWN,
-	NE_ADDRESS_CHANGE
+	NE_ADDRESS_CHANGE,
+	NE_LIF_UP,
+	NE_LIF_DOWN
 } nic_event_t;
 
 typedef void *nic_event_data_t;
--- a/usr/src/uts/common/sys/neti.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/sys/neti.h	Thu Nov 06 06:47:54 2008 -0500
@@ -127,6 +127,10 @@
 	int		(*netp_getpmtuenabled)(net_handle_t);
 	int		(*netp_getlifaddr)(net_handle_t, phy_if_t, lif_if_t,
 			    size_t, net_ifaddr_t [], void *);
+	int		(*neti_getlifzone)(net_handle_t, phy_if_t, lif_if_t,
+			    zoneid_t *);
+	int		(*neti_getlifflags)(net_handle_t, phy_if_t, lif_if_t,
+			    uint64_t *);
 	phy_if_t	(*netp_phygetnext)(net_handle_t, phy_if_t);
 	phy_if_t	(*netp_phylookup)(net_handle_t, const char *);
 	lif_if_t	(*netp_lifgetnext)(net_handle_t, phy_if_t, lif_if_t);
@@ -274,6 +278,8 @@
 extern int net_getpmtuenabled(net_handle_t);
 extern int net_getlifaddr(net_handle_t, phy_if_t, lif_if_t,
     int, net_ifaddr_t [], void *);
+extern zoneid_t net_getlifzone(net_handle_t, phy_if_t, lif_if_t, zoneid_t *);
+extern int net_getlifflags(net_handle_t, phy_if_t, lif_if_t, uint64_t *);
 extern phy_if_t net_phygetnext(net_handle_t, phy_if_t);
 extern phy_if_t net_phylookup(net_handle_t, const char *);
 extern lif_if_t net_lifgetnext(net_handle_t, phy_if_t, lif_if_t);
--- a/usr/src/uts/common/sys/netstack.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/sys/netstack.h	Thu Nov 06 06:47:54 2008 -0500
@@ -78,7 +78,8 @@
 #define	NS_IPSECAH	13
 #define	NS_IPSECESP	14
 #define	NS_TUN		15
-#define	NS_MAX		(NS_TUN+1)
+#define	NS_IPNET	16
+#define	NS_MAX		(NS_IPNET+1)
 
 /*
  * State maintained for each module which tracks the state of
@@ -151,6 +152,7 @@
 			struct ipsecah_stack	*nu_ipsecah;
 			struct ipsecesp_stack	*nu_ipsecesp;
 			struct tun_stack	*nu_tun;
+			struct ipnet_stack	*nu_ipnet;
 		} nu_s;
 	} netstack_u;
 #define	netstack_modules	netstack_u.nu_modules
@@ -170,6 +172,7 @@
 #define	netstack_ipsecah	netstack_u.nu_s.nu_ipsecah
 #define	netstack_ipsecesp	netstack_u.nu_s.nu_ipsecesp
 #define	netstack_tun		netstack_u.nu_s.nu_tun
+#define	netstack_ipnet		netstack_u.nu_s.nu_ipnet
 
 	nm_state_t	netstack_m_state[NS_MAX]; /* module state */
 
--- a/usr/src/uts/common/sys/sysmacros.h	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/common/sys/sysmacros.h	Thu Nov 06 06:47:54 2008 -0500
@@ -57,6 +57,9 @@
 #ifndef ABS
 #define	ABS(a)		((a) < 0 ? -(a) : (a))
 #endif
+#ifndef	SIGNOF
+#define	SIGNOF(a)	((a) < 0 ? -1 : (a) > 0)
+#endif
 
 #ifdef _KERNEL
 
--- a/usr/src/uts/intel/Makefile.intel.shared	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/intel/Makefile.intel.shared	Thu Nov 06 06:47:54 2008 -0500
@@ -234,6 +234,7 @@
 DRV_KMODS	+= ip
 DRV_KMODS	+= ip6
 DRV_KMODS	+= ipf
+DRV_KMODS       += ipnet
 DRV_KMODS	+= ippctl
 DRV_KMODS	+= ipsecah
 DRV_KMODS	+= ipsecesp
@@ -643,7 +644,7 @@
 #	delivered with a released system. However, during development
 #	it is convenient to build and install the SVVS kernel modules.
 #
-SVVS_KMODS	+= lmodb lmode lmodr lmodt lo tidg tivc tmux
+SVVS_KMODS	+= lmodb lmode lmodr lmodt svvslo tidg tivc tmux
 
 SVVS		+= svvs
 
--- a/usr/src/uts/intel/dev/Makefile	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/intel/dev/Makefile	Thu Nov 06 06:47:54 2008 -0500
@@ -23,8 +23,6 @@
 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
 #	This makefile drives the production of the dev file system
 #	kernel module.
 #
--- a/usr/src/uts/intel/ia32/ml/modstubs.s	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/intel/ia32/ml/modstubs.s	Thu Nov 06 06:47:54 2008 -0500
@@ -1330,6 +1330,13 @@
 	END_MODULE(dcopy);
 #endif
 
+#ifndef IPNET_MODULE
+	MODULE(ipnet,drv);
+	STUB(ipnet, ipnet_if_getdev, nomod_zero);
+	STUB(ipnet, ipnet_walk_if, nomod_zero);
+	END_MODULE(ipnet);
+#endif
+
 / this is just a marker for the area of text that contains stubs 
 
 	ENTRY_NP(stubs_end)
--- a/usr/src/uts/intel/ip/ip.global-objs.debug64	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/intel/ip/ip.global-objs.debug64	Thu Nov 06 06:47:54 2008 -0500
@@ -337,7 +337,6 @@
 udp_rinitv4
 udp_rinitv6
 udp_valid_levels_arr
-udp_version
 udp_winit
 udpinfov4
 udpinfov6
--- a/usr/src/uts/intel/ip/ip.global-objs.obj64	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/intel/ip/ip.global-objs.obj64	Thu Nov 06 06:47:54 2008 -0500
@@ -326,7 +326,6 @@
 udp_rinitv4
 udp_rinitv6
 udp_valid_levels_arr
-udp_version
 udp_winit
 udpinfov4
 udpinfov6
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/intel/ipnet/Makefile	Thu Nov 06 06:47:54 2008 -0500
@@ -0,0 +1,105 @@
+#                                                                               
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#   This makefile drives the production of the ipnet driver 
+#   kernel module.
+#
+#   intel architecture dependent
+#
+
+#
+#   Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+
+UTSBASE = ../..
+
+#
+#   Define the module and object file sets.
+#
+
+MODULE          = ipnet
+OBJECTS         = $(IPNET_OBJS:%=$(OBJS_DIR)/%)
+LINTS           = $(IPNET_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE      = $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR     = $(UTSBASE)/common/inet/ipnet
+
+#
+#   Include common rules.
+#
+
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+#   Define targets
+#
+
+ALL_TARGET      = $(BINARY) $(SRC_CONFFILE)
+LINT_TARGET     = $(MODULE).lint
+INSTALL_TARGET  = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# lint pass one enforcement
+#
+
+CFLAGS          += $(CCVERBOSE)
+
+#
+# STREAMS API limitations force us to turn off these lint checks.
+#
+LINTTAGS        += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS        += -erroff=E_PTRDIFF_OVERFLOW
+
+#
+# Depends on ip, neti, and hook.
+#
+LDFLAGS		+= -dy -Ndrv/ip -Nmisc/neti -Nmisc/hook
+
+#
+#   Default build targets.
+#
+
+.KEEP_STATE:
+
+def:            $(DEF_DEPS)
+
+all:            $(ALL_DEPS)
+
+clean:          $(CLEAN_DEPS)
+
+clobber:        $(CLOBBER_DEPS)
+
+lint:           $(LINT_DEPS)
+
+modlintlib:     $(MODLINTLIB_DEPS)
+
+clean.lint:     $(CLEAN_LINT_DEPS)
+
+install:        $(INSTALL_DEPS)
+
+#
+#   Include common targets.
+#
+
+include $(UTSBASE)/intel/Makefile.targ
--- a/usr/src/uts/intel/os/device_policy	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/intel/os/device_policy	Thu Nov 06 06:47:54 2008 -0500
@@ -65,6 +65,12 @@
 # Virtual network interface access permission
 #
 vni		read_priv_set=net_rawaccess	write_priv_set=net_rawaccess
+
+#
+# IP observability device access permission
+#
+ipnet		read_priv_set=net_observability write_priv_set=net_observability
+
 #
 # Disk devices.
 #
--- a/usr/src/uts/intel/os/minor_perm	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/intel/os/minor_perm	Thu Nov 06 06:47:54 2008 -0500
@@ -43,9 +43,9 @@
 fbt:fbt 0644 root sys
 fd:* 0666 root sys
 id:* 0640 root sys
+ipnet:lo0 0666 root sys
 kstat:* 0666 root sys
 ksyms:* 0666 root sys
-lo:* 0666 root sys
 lockstat:* 0644 root sys
 lofi:* 0600 root sys
 lofi:ctl 0644 root sys
@@ -73,6 +73,7 @@
 cmdk:* 0640 root sys
 st:* 0666 root sys
 sdt:sdt 0644 root sys
+svvslo:* 0666 root sys
 sy:tty 0666 root tty
 sysmsg:msglog 0600 root sys
 sysmsg:sysmsg 0600 root sys
--- a/usr/src/uts/intel/os/name_to_major	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/intel/os/name_to_major	Thu Nov 06 06:47:54 2008 -0500
@@ -23,7 +23,7 @@
 tcp 42
 rts 43
 arp 44
-lo 46
+svvslo 46
 tivc 47
 tidg 48
 tmux 49
@@ -153,3 +153,4 @@
 amd_iommu 258
 xpvtap 259
 nulldriver 260
+ipnet 261
--- a/usr/src/uts/sparc/Makefile.sparc.shared	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/sparc/Makefile.sparc.shared	Thu Nov 06 06:47:54 2008 -0500
@@ -204,7 +204,7 @@
 DRV_KMODS	+= aggr arp bl bofi clone cn conskbd consms cpuid
 DRV_KMODS	+= crypto cryptoadm devinfo dump
 DRV_KMODS	+= dtrace fasttrap fbt lockstat profile sdt systrace
-DRV_KMODS	+= fssnap icmp icmp6 ip ip6 ipsecah
+DRV_KMODS	+= fssnap icmp icmp6 ip ip6 ipnet ipsecah
 DRV_KMODS	+= ipsecesp iwscn keysock kmdb kstat ksyms llc1
 DRV_KMODS	+= lofi
 DRV_KMODS	+= log logindmux kssl mm nca physmem pm poll pool
@@ -453,7 +453,7 @@
 #	delivered with a released system. However, during development
 #	it is convenient to build and install the SVVS kernel modules.
 #
-SVVS_KMODS	+= lmodb lmode lmodr lmodt lo tidg tivc tmux
+SVVS_KMODS	+= lmodb lmode lmodr lmodt svvslo tidg tivc tmux
 
 SVVS		+= svvs
 
--- a/usr/src/uts/sparc/dev/Makefile	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/sparc/dev/Makefile	Thu Nov 06 06:47:54 2008 -0500
@@ -21,8 +21,6 @@
 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
 # uts/sparc/dev/Makefile
 #	This makefile drives the production of the /dev file system
 #	kernel module.
--- a/usr/src/uts/sparc/ip/ip.global-objs.debug64	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64	Thu Nov 06 06:47:54 2008 -0500
@@ -337,7 +337,6 @@
 udp_rinitv4
 udp_rinitv6
 udp_valid_levels_arr
-udp_version
 udp_winit
 udpinfov4
 udpinfov6
--- a/usr/src/uts/sparc/ip/ip.global-objs.obj64	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64	Thu Nov 06 06:47:54 2008 -0500
@@ -326,7 +326,6 @@
 udp_rinitv4
 udp_rinitv6
 udp_valid_levels_arr
-udp_version
 udp_winit
 udpinfov4
 udpinfov6
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/sparc/ipnet/Makefile	Thu Nov 06 06:47:54 2008 -0500
@@ -0,0 +1,111 @@
+#                                                                               
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#   This makefile drives the production of the ipnet driver 
+#   kernel module.
+#
+#   sparc architecture dependent
+#
+
+#
+#   Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+
+UTSBASE = ../..
+
+#
+#   Define the module and object file sets.
+#
+
+MODULE          = ipnet
+OBJECTS         = $(IPNET_OBJS:%=$(OBJS_DIR)/%)
+LINTS           = $(IPNET_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE      = $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR     = $(UTSBASE)/common/inet/ipnet
+
+#
+#   Include common rules.
+#
+
+include $(UTSBASE)/sparc/Makefile.sparc
+
+#
+#   Define targets
+#
+
+ALL_TARGET      = $(BINARY) $(SRC_CONFFILE)
+LINT_TARGET     = $(MODULE).lint
+INSTALL_TARGET  = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# lint pass one enforcement
+#
+
+CFLAGS          += $(CCVERBOSE)
+
+#
+# lint pass one enforcement
+#
+
+CFLAGS          += $(CCVERBOSE)
+
+#
+# STREAMS API limitations force us to turn off these lint checks.
+#
+LINTTAGS        += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS        += -erroff=E_PTRDIFF_OVERFLOW
+
+#
+# Depends on ip, neti, and hook.
+#
+LDFLAGS		+= -dy -Ndrv/ip -Nmisc/neti -Nmisc/hook
+
+#
+#   Default build targets.
+#
+
+.KEEP_STATE:
+
+def:            $(DEF_DEPS)
+
+all:            $(ALL_DEPS)
+
+clean:          $(CLEAN_DEPS)
+
+clobber:        $(CLOBBER_DEPS)
+
+lint:           $(LINT_DEPS)
+
+modlintlib:     $(MODLINTLIB_DEPS)
+
+clean.lint:     $(CLEAN_LINT_DEPS)
+
+install:        $(INSTALL_DEPS)
+
+#
+#   Include common targets.
+#
+
+include $(UTSBASE)/sparc/Makefile.targ
--- a/usr/src/uts/sparc/ml/modstubs.s	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/sparc/ml/modstubs.s	Thu Nov 06 06:47:54 2008 -0500
@@ -1282,6 +1282,13 @@
 	END_MODULE(dcopy);
 #endif
 
+#ifndef IPNET_MODULE
+	MODULE(ipnet,drv);
+	STUB(ipnet, ipnet_if_getdev, nomod_zero);
+	STUB(ipnet, ipnet_walk_if, nomod_zero);
+	END_MODULE(ipnet);
+#endif
+
 ! this is just a marker for the area of text that contains stubs
 	.seg ".text"
 	.global stubs_end
--- a/usr/src/uts/sparc/os/device_policy	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/sparc/os/device_policy	Thu Nov 06 06:47:54 2008 -0500
@@ -68,6 +68,12 @@
 # Virtual network interface access permission
 #
 vni		read_priv_set=net_rawaccess	write_priv_set=net_rawaccess
+
+#
+# IP observability device access permission
+#
+ipnet		read_priv_set=net_observability write_priv_set=net_observability
+
 #
 # Disk devices.
 #
--- a/usr/src/uts/sparc/os/minor_perm	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/sparc/os/minor_perm	Thu Nov 06 06:47:54 2008 -0500
@@ -44,9 +44,9 @@
 fasttrap:fasttrap 0666 root sys
 fssnap:* 0640 root sys
 fssnap:ctl 0666 root sys
+ipnet:lo0 0666 root sys
 kstat:* 0666 root sys
 ksyms:* 0666 root sys
-lo:* 0666 root sys
 lockstat:* 0644 root sys
 lofi:* 0600 root sys
 lofi:ctl 0644 root sys
@@ -82,6 +82,7 @@
 su:ssp 0600 root sys
 su:sspctl 0600 root sys
 SUNW,fdtwo:* 0666 root sys
+svvslo:* 0666 root sys
 sy:tty 0666 root tty
 sysmsg:msglog 0600 root sys
 sysmsg:sysmsg 0600 root sys
--- a/usr/src/uts/sparc/os/name_to_major	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/uts/sparc/os/name_to_major	Thu Nov 06 06:47:54 2008 -0500
@@ -39,7 +39,7 @@
 tcp 42
 rts 43
 arp 44
-lo 46
+svvslo 46
 tivc 47
 tidg 48
 tmux 49
@@ -226,3 +226,4 @@
 bmc 278
 fm 279
 nulldriver 280
+ipnet 281
--- a/usr/src/xmod/xmod_files	Thu Nov 06 11:42:52 2008 +0100
+++ b/usr/src/xmod/xmod_files	Thu Nov 06 06:47:54 2008 -0500
@@ -4,9 +4,9 @@
 ../closed/uts/common/io/lmode.c
 ../closed/uts/common/io/lmodr.c
 ../closed/uts/common/io/lmodt.c
-../closed/uts/common/io/lo.c
-../closed/uts/common/io/lo.conf
-../closed/uts/common/sys/lo.h
+../closed/uts/common/io/svvslo.c
+../closed/uts/common/io/svvslo.conf
+../closed/uts/common/sys/svvslo.h
 ../closed/uts/common/io/tidg.c
 ../closed/uts/common/sys/tidg.h
 ../closed/uts/common/io/tidg.conf