6719086 dynamic update of net-dev in vsw makes guests inaccessible for a short time
--- a/usr/src/uts/sun4v/io/vsw.c Thu Jul 03 12:07:41 2008 -0700
+++ b/usr/src/uts/sun4v/io/vsw.c Thu Jul 03 13:18:44 2008 -0700
@@ -211,6 +211,13 @@
*/
uint32_t vsw_pri_tx_nmblks = 64;
+/*
+ * Number of RARP packets sent to announce macaddr to the physical switch,
+ * after vsw's physical device is changed dynamically or after a guest (client
+ * vnet) is live migrated in.
+ */
+uint32_t vsw_publish_macaddr_count = 3;
+
boolean_t vsw_hio_enabled = B_TRUE; /* Enable/disable HybridIO */
int vsw_hio_max_cleanup_retries = 10; /* Max retries for HybridIO cleanp */
int vsw_hio_cleanup_delay = 10000; /* 10ms */
--- a/usr/src/uts/sun4v/io/vsw_ldc.c Thu Jul 03 12:07:41 2008 -0700
+++ b/usr/src/uts/sun4v/io/vsw_ldc.c Thu Jul 03 13:18:44 2008 -0700
@@ -205,6 +205,7 @@
extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp);
extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg);
extern void vsw_hio_stop_port(vsw_port_t *portp);
+extern void vsw_publish_macaddr(vsw_t *vswp, uint8_t *addr);
#define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */
@@ -229,6 +230,7 @@
extern uint32_t vsw_num_mblks2;
extern uint32_t vsw_num_mblks3;
extern boolean_t vsw_obp_ver_proto_workaround;
+extern uint32_t vsw_publish_macaddr_count;
#define LDC_ENTER_LOCK(ldcp) \
mutex_enter(&((ldcp)->ldc_cblock));\
@@ -379,6 +381,11 @@
*/
(void) vsw_init_ldcs(port);
+ /* announce macaddr of vnet to the physical switch */
+ if (vsw_publish_macaddr_count != 0) { /* enabled */
+ vsw_publish_macaddr(vswp, (uint8_t *)&(port->p_macaddr));
+ }
+
D1(vswp, "%s: exit", __func__);
return (0);
}
--- a/usr/src/uts/sun4v/io/vsw_phys.c Thu Jul 03 12:07:41 2008 -0700
+++ b/usr/src/uts/sun4v/io/vsw_phys.c Thu Jul 03 13:18:44 2008 -0700
@@ -51,6 +51,8 @@
#include <sys/dlpi.h>
#include <sys/ethernet.h>
#include <net/if.h>
+#include <netinet/arp.h>
+#include <inet/arp.h>
#include <sys/varargs.h>
#include <sys/machsystm.h>
#include <sys/modctl.h>
@@ -96,6 +98,7 @@
void vsw_set_addrs(vsw_t *vswp);
int vsw_get_hw_maddr(vsw_t *);
mblk_t *vsw_tx_msg(vsw_t *, mblk_t *);
+void vsw_publish_macaddr(vsw_t *vswp, uint8_t *addr);
/*
* Tunables used in this file.
@@ -103,6 +106,7 @@
extern int vsw_mac_open_retries;
extern boolean_t vsw_multi_ring_enable;
extern int vsw_mac_rx_rings;
+extern uint32_t vsw_publish_macaddr_count;
/*
* Check to see if the card supports the setting of multiple unicst
@@ -226,6 +230,13 @@
mutex_exit(&port->mca_lock);
}
+ /* announce macaddr of vnets to the physical switch */
+ if (vsw_publish_macaddr_count != 0) { /* enabled */
+ for (port = plist->head; port != NULL; port = port->p_next) {
+ vsw_publish_macaddr(vswp, (uint8_t *)&port->p_macaddr);
+ }
+ }
+
RW_EXIT(&plist->lockrw);
}
@@ -1352,3 +1363,83 @@
return (mp);
}
+
+#define ARH_FIXED_LEN 8 /* Length of fixed part of ARP header(see arp.h) */
+
+/*
+ * Send a gratuitous RARP packet to notify the physical switch to update its
+ * Layer2 forwarding table for the given mac address. This is done to allow the
+ * switch to quickly learn the macaddr-port association when a guest is live
+ * migrated or when vsw's physical device is changed dynamically. Any protocol
+ * packet would serve this purpose, but we choose RARP, as it allows us to
+ * accomplish this within L2 (ie, no need to specify IP addr etc in the packet)
+ * The macaddr of vnet is retained across migration. Hence, we don't need to
+ * update the arp cache of other hosts within the broadcast domain. Note that
+ * it is harmless to send these RARP packets during normal port attach of a
+ * client vnet. This can can be turned off if needed, by setting
+ * vsw_publish_macaddr_count to zero in /etc/system.
+ */
+void
+vsw_publish_macaddr(vsw_t *vswp, uint8_t *addr)
+{
+ mblk_t *mp;
+ mblk_t *bp;
+ struct arphdr *arh;
+ struct ether_header *ehp;
+ int count = 0;
+ int plen = 4;
+ uint8_t *cp;
+
+ mp = allocb(ETHERMIN, BPRI_MED);
+ if (mp == NULL) {
+ return;
+ }
+
+ /* Initialize eth header */
+ ehp = (struct ether_header *)mp->b_rptr;
+ bcopy(ðerbroadcastaddr, &ehp->ether_dhost, ETHERADDRL);
+ bcopy(addr, &ehp->ether_shost, ETHERADDRL);
+ ehp->ether_type = htons(ETHERTYPE_REVARP);
+
+ /* Initialize arp packet */
+ arh = (struct arphdr *)(mp->b_rptr + sizeof (struct ether_header));
+ cp = (uint8_t *)arh;
+
+ arh->ar_hrd = htons(ARPHRD_ETHER); /* Hardware type: ethernet */
+ arh->ar_pro = htons(ETHERTYPE_IP); /* Protocol type: IP */
+ arh->ar_hln = ETHERADDRL; /* Length of hardware address: 6 */
+ arh->ar_pln = plen; /* Length of protocol address: 4 */
+ arh->ar_op = htons(REVARP_REQUEST); /* Opcode: REVARP Request */
+
+ cp += ARH_FIXED_LEN;
+
+ /* Sender's hardware address and protocol address */
+ bcopy(addr, cp, ETHERADDRL);
+ cp += ETHERADDRL;
+ bzero(cp, plen); /* INADDR_ANY */
+ cp += plen;
+
+ /* Target hardware address and protocol address */
+ bcopy(addr, cp, ETHERADDRL);
+ cp += ETHERADDRL;
+ bzero(cp, plen); /* INADDR_ANY */
+ cp += plen;
+
+ mp->b_wptr += ETHERMIN; /* total size is 42; round up to ETHERMIN */
+
+ for (count = 0; count < vsw_publish_macaddr_count; count++) {
+
+ bp = dupmsg(mp);
+ if (bp == NULL) {
+ continue;
+ }
+
+ /* transmit the packet */
+ bp = vsw_tx_msg(vswp, bp);
+ if (bp != NULL) {
+ freemsg(bp);
+ }
+ }
+
+ freemsg(mp);
+}