6896190 "Failover completed successfully" is printed a few times while failover is actually failed on a TPG
authorVictor Li <Victor.Li@Sun.COM>
Wed, 05 May 2010 11:09:47 +0800
changeset 12310 f87d8e83d301
parent 12309 e1038eb75bcf
child 12311 650b48a2bf75
6896190 "Failover completed successfully" is printed a few times while failover is actually failed on a TPG 6940470 mdi_failover() with async flag won't return the failover status to target driver 6941020 mpxio failover on 7410 may take more than 3 mintues
usr/src/uts/common/io/scsi/adapters/scsi_vhci/fops/tpgs.c
usr/src/uts/common/io/scsi/adapters/scsi_vhci/scsi_vhci.c
usr/src/uts/common/sys/scsi/adapters/scsi_vhci.h
--- a/usr/src/uts/common/io/scsi/adapters/scsi_vhci/fops/tpgs.c	Tue May 04 17:11:30 2010 -0700
+++ b/usr/src/uts/common/io/scsi/adapters/scsi_vhci/fops/tpgs.c	Wed May 05 11:09:47 2010 +0800
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -135,60 +134,65 @@
  */
 static int
 std_process_cmplt_pkt(struct scsi_device *sd, struct scsi_pkt *pkt,
-    int *retry_cnt)
+    int *retry_cnt, int *retval)
 {
-	uint8_t *sns, skey, asc, ascq;
+	*retval = 1; /* fail */
 
-	/*
-	 * Re-initialize retry_cmd_cnt. Allow transport and
-	 * cmd errors to go through a full retry count when
-	 * these are encountered.  This way TRAN/CMD errors
-	 * retry count is not exhausted due to CMD_CMPLTs
-	 * delay. This allows the system
-	 * to brave a hick-up on the link at any given time,
-	 * while waiting for the fo to complete.
-	 */
-	if (pkt->pkt_state & STATE_ARQ_DONE) {
-		sns = (uint8_t *)
-		    &(((struct scsi_arq_status *)(uintptr_t)
-		    (pkt->pkt_scbp))->sts_sensedata);
-		skey = scsi_sense_key(sns);
-		asc = scsi_sense_asc(sns);
-		ascq = scsi_sense_ascq(sns);
-		if (skey == KEY_UNIT_ATTENTION) {
-			/*
-			 * tpgs access state changed
-			 */
-			if (asc == STD_SCSI_ASC_STATE_CHG &&
-			    ascq == STD_SCSI_ASCQ_STATE_CHG_SUCC) {
-				/* XXX: update path info? */
-				cmn_err(CE_WARN, "!Device failover"
-				    " state change");
-			}
-			return (1);
-		} else if (skey == KEY_NOT_READY) {
-			if ((*retry_cnt)++ >=
-			    STD_FO_MAX_RETRIES) {
-				cmn_err(CE_WARN, "!Device failover"
-				    " failed: timed out waiting "
-				    "for path to become active");
+	switch (SCBP_C(pkt)) {
+		case STATUS_GOOD:
+			*retval = 0;
+			break;
+		case STATUS_CHECK:
+			if (pkt->pkt_state & STATE_ARQ_DONE) {
+				uint8_t *sns, skey, asc, ascq;
+				sns = (uint8_t *)
+				    &(((struct scsi_arq_status *)(uintptr_t)
+				    (pkt->pkt_scbp))->sts_sensedata);
+				skey = scsi_sense_key(sns);
+				asc = scsi_sense_asc(sns);
+				ascq = scsi_sense_ascq(sns);
+				if (skey == KEY_UNIT_ATTENTION) {
+					/*
+					 * tpgs access state changed
+					 */
+					if (asc == STD_SCSI_ASC_STATE_CHG &&
+					    ascq ==
+					    STD_SCSI_ASCQ_STATE_CHG_SUCC) {
+						/* XXX: update path info? */
+						cmn_err(CE_WARN,
+						    "!Device failover"
+						    " state change");
+					}
+					return (1);
+				} else if (skey == KEY_NOT_READY) {
+					if (asc ==
+					    STD_LOGICAL_UNIT_NOT_ACCESSIBLE &&
+					    ascq == STD_TGT_PORT_STANDBY) {
+						/*
+						 * Don't retry on the path
+						 * which is indicated as
+						 * standby, return failure.
+						 */
+						return (0);
+					} else if ((*retry_cnt)++ >=
+					    STD_FO_MAX_RETRIES) {
+						cmn_err(CE_WARN,
+						    "!Device failover failed: "
+						    "timed out waiting for "
+						    "path to become active");
+						return (0);
+					}
+					VHCI_DEBUG(6, (CE_NOTE, NULL,
+					    "!(sd:%p)lun becoming active...\n",
+					    (void *)sd));
+					drv_usecwait(STD_FO_RETRY_DELAY);
+					return (1);
+				}
+				cmn_err(CE_NOTE, "!Failover failed;"
+				    " sense key:%x, ASC: %x, "
+				    "ASCQ:%x", skey, asc, ascq);
 				return (0);
 			}
-			VHCI_DEBUG(6, (CE_NOTE, NULL,
-			    "!(sd:%p)lun "
-			    "becoming active...\n", (void *)sd));
-			drv_usecwait(STD_FO_RETRY_DELAY);
-			return (1);
-		}
-		cmn_err(CE_NOTE, "!Failover failed;"
-		    " sense key:%x, ASC: %x, "
-		    "ASCQ:%x", skey, asc, ascq);
-		return (0);
-	}
-	switch (SCBP_C(pkt)) {
-		case STATUS_GOOD:
-			break;
-		case STATUS_CHECK:
 			VHCI_DEBUG(4, (CE_WARN, NULL,
 			    "!(sd:%p):"
 			    " status returned CHECK during std"
@@ -310,9 +314,18 @@
 	}
 	switch (pkt->pkt_reason) {
 		case CMD_CMPLT:
+			/*
+			 * Re-initialize retry_cmd_cnt. Allow transport and
+			 * cmd errors to go through a full retry count when
+			 * these are encountered.  This way TRAN/CMD errors
+			 * retry count is not exhausted due to CMD_CMPLTs
+			 * delay. This allows the system
+			 * to brave a hick-up on the link at any given time,
+			 * while waiting for the fo to complete.
+			 */
 			retry_cmd_cnt = 0;
-			retval = std_process_cmplt_pkt(sd, pkt, &retry_cnt);
-			if (retval != 0) {
+			if (std_process_cmplt_pkt(sd, pkt, &retry_cnt,
+			    &retval) != 0) {
 				goto retry;
 			}
 			break;
@@ -346,8 +359,6 @@
 			break;
 	}
 
-
-	VHCI_DEBUG(4, (CE_NOTE, NULL, "!Path activation success\n"));
 	scsi_destroy_pkt(pkt);
 	scsi_free_consistent_buf(bp);
 	return (retval);
--- a/usr/src/uts/common/io/scsi/adapters/scsi_vhci/scsi_vhci.c	Tue May 04 17:11:30 2010 -0700
+++ b/usr/src/uts/common/io/scsi/adapters/scsi_vhci/scsi_vhci.c	Wed May 05 11:09:47 2010 +0800
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -2361,10 +2360,14 @@
 				return (TRAN_FATAL_ERROR);
 			}
 			mdi_rele_path(npip);
-			VHCI_DEBUG(1, (CE_NOTE, NULL, "!invoking "
-			    "mdi_failover\n"));
-			rval = mdi_failover(vhci->vhci_dip, cdip,
-			    MDI_FAILOVER_ASYNC);
+			if (!(vpkt->vpkt_state & VHCI_PKT_IN_FAILOVER)) {
+				VHCI_DEBUG(1, (CE_NOTE, NULL, "!invoking "
+				    "mdi_failover\n"));
+				rval = mdi_failover(vhci->vhci_dip, cdip,
+				    MDI_FAILOVER_ASYNC);
+			} else {
+				rval = vlun->svl_failover_status;
+			}
 			if (rval == MDI_FAILURE) {
 				VHCI_RELEASE_LUN(vlun);
 				if (pgr_sema_held) {
@@ -2381,6 +2384,7 @@
 				if (pgr_sema_held) {
 					sema_v(&vlun->svl_pgr_sema);
 				}
+				vpkt->vpkt_state |= VHCI_PKT_IN_FAILOVER;
 				return (TRAN_BUSY);
 			}
 		}
@@ -7010,6 +7014,7 @@
 	retval = MDI_SUCCESS;
 
 done:
+	vlun->svl_failover_status = retval;
 	if (flags == MDI_FAILOVER_ASYNC) {
 		VHCI_RELEASE_LUN(vlun);
 		VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_failover(12): DONE! "
--- a/usr/src/uts/common/sys/scsi/adapters/scsi_vhci.h	Tue May 04 17:11:30 2010 -0700
+++ b/usr/src/uts/common/sys/scsi/adapters/scsi_vhci.h	Wed May 05 11:09:47 2010 +0800
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_SCSI_ADAPTERS_SCSI_VHCI_H
@@ -184,6 +183,12 @@
  * is being issued from the taskq and not target driver.
  */
 #define	VHCI_PKT_THRU_TASKQ		0x20
+/*
+ * Set the first time failover is being triggered. To ensure
+ * failover won't be triggered again when the packet is being
+ * retried by target driver.
+ */
+#define	VHCI_PKT_IN_FAILOVER		0x40
 
 #define	VHCI_PKT_TIMEOUT		30		/* seconds */
 #define	VHCI_PKT_RETRY_CNT		2
@@ -310,6 +315,14 @@
 	time_t			svl_wfa_time;
 
 	/*
+	 * to keep the failover status in order to return the
+	 * failure status to target driver when targer driver
+	 * retries the command which originally triggered the
+	 * failover.
+	 */
+	int			svl_failover_status;
+
+	/*
 	 * for RESERVE/RELEASE support
 	 */
 	client_lb_t		svl_lb_policy_save;