6896190 "Failover completed successfully" is printed a few times while failover is actually failed on a TPG
6940470 mdi_failover() with async flag won't return the failover status to target driver
6941020 mpxio failover on 7410 may take more than 3 mintues
--- a/usr/src/uts/common/io/scsi/adapters/scsi_vhci/fops/tpgs.c Tue May 04 17:11:30 2010 -0700
+++ b/usr/src/uts/common/io/scsi/adapters/scsi_vhci/fops/tpgs.c Wed May 05 11:09:47 2010 +0800
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -135,60 +134,65 @@
*/
static int
std_process_cmplt_pkt(struct scsi_device *sd, struct scsi_pkt *pkt,
- int *retry_cnt)
+ int *retry_cnt, int *retval)
{
- uint8_t *sns, skey, asc, ascq;
+ *retval = 1; /* fail */
- /*
- * Re-initialize retry_cmd_cnt. Allow transport and
- * cmd errors to go through a full retry count when
- * these are encountered. This way TRAN/CMD errors
- * retry count is not exhausted due to CMD_CMPLTs
- * delay. This allows the system
- * to brave a hick-up on the link at any given time,
- * while waiting for the fo to complete.
- */
- if (pkt->pkt_state & STATE_ARQ_DONE) {
- sns = (uint8_t *)
- &(((struct scsi_arq_status *)(uintptr_t)
- (pkt->pkt_scbp))->sts_sensedata);
- skey = scsi_sense_key(sns);
- asc = scsi_sense_asc(sns);
- ascq = scsi_sense_ascq(sns);
- if (skey == KEY_UNIT_ATTENTION) {
- /*
- * tpgs access state changed
- */
- if (asc == STD_SCSI_ASC_STATE_CHG &&
- ascq == STD_SCSI_ASCQ_STATE_CHG_SUCC) {
- /* XXX: update path info? */
- cmn_err(CE_WARN, "!Device failover"
- " state change");
- }
- return (1);
- } else if (skey == KEY_NOT_READY) {
- if ((*retry_cnt)++ >=
- STD_FO_MAX_RETRIES) {
- cmn_err(CE_WARN, "!Device failover"
- " failed: timed out waiting "
- "for path to become active");
+ switch (SCBP_C(pkt)) {
+ case STATUS_GOOD:
+ *retval = 0;
+ break;
+ case STATUS_CHECK:
+ if (pkt->pkt_state & STATE_ARQ_DONE) {
+ uint8_t *sns, skey, asc, ascq;
+ sns = (uint8_t *)
+ &(((struct scsi_arq_status *)(uintptr_t)
+ (pkt->pkt_scbp))->sts_sensedata);
+ skey = scsi_sense_key(sns);
+ asc = scsi_sense_asc(sns);
+ ascq = scsi_sense_ascq(sns);
+ if (skey == KEY_UNIT_ATTENTION) {
+ /*
+ * tpgs access state changed
+ */
+ if (asc == STD_SCSI_ASC_STATE_CHG &&
+ ascq ==
+ STD_SCSI_ASCQ_STATE_CHG_SUCC) {
+ /* XXX: update path info? */
+ cmn_err(CE_WARN,
+ "!Device failover"
+ " state change");
+ }
+ return (1);
+ } else if (skey == KEY_NOT_READY) {
+ if (asc ==
+ STD_LOGICAL_UNIT_NOT_ACCESSIBLE &&
+ ascq == STD_TGT_PORT_STANDBY) {
+ /*
+ * Don't retry on the path
+ * which is indicated as
+ * standby, return failure.
+ */
+ return (0);
+ } else if ((*retry_cnt)++ >=
+ STD_FO_MAX_RETRIES) {
+ cmn_err(CE_WARN,
+ "!Device failover failed: "
+ "timed out waiting for "
+ "path to become active");
+ return (0);
+ }
+ VHCI_DEBUG(6, (CE_NOTE, NULL,
+ "!(sd:%p)lun becoming active...\n",
+ (void *)sd));
+ drv_usecwait(STD_FO_RETRY_DELAY);
+ return (1);
+ }
+ cmn_err(CE_NOTE, "!Failover failed;"
+ " sense key:%x, ASC: %x, "
+ "ASCQ:%x", skey, asc, ascq);
return (0);
}
- VHCI_DEBUG(6, (CE_NOTE, NULL,
- "!(sd:%p)lun "
- "becoming active...\n", (void *)sd));
- drv_usecwait(STD_FO_RETRY_DELAY);
- return (1);
- }
- cmn_err(CE_NOTE, "!Failover failed;"
- " sense key:%x, ASC: %x, "
- "ASCQ:%x", skey, asc, ascq);
- return (0);
- }
- switch (SCBP_C(pkt)) {
- case STATUS_GOOD:
- break;
- case STATUS_CHECK:
VHCI_DEBUG(4, (CE_WARN, NULL,
"!(sd:%p):"
" status returned CHECK during std"
@@ -310,9 +314,18 @@
}
switch (pkt->pkt_reason) {
case CMD_CMPLT:
+ /*
+ * Re-initialize retry_cmd_cnt. Allow transport and
+ * cmd errors to go through a full retry count when
+ * these are encountered. This way TRAN/CMD errors
+ * retry count is not exhausted due to CMD_CMPLTs
+ * delay. This allows the system
+ * to brave a hick-up on the link at any given time,
+ * while waiting for the fo to complete.
+ */
retry_cmd_cnt = 0;
- retval = std_process_cmplt_pkt(sd, pkt, &retry_cnt);
- if (retval != 0) {
+ if (std_process_cmplt_pkt(sd, pkt, &retry_cnt,
+ &retval) != 0) {
goto retry;
}
break;
@@ -346,8 +359,6 @@
break;
}
-
- VHCI_DEBUG(4, (CE_NOTE, NULL, "!Path activation success\n"));
scsi_destroy_pkt(pkt);
scsi_free_consistent_buf(bp);
return (retval);
--- a/usr/src/uts/common/io/scsi/adapters/scsi_vhci/scsi_vhci.c Tue May 04 17:11:30 2010 -0700
+++ b/usr/src/uts/common/io/scsi/adapters/scsi_vhci/scsi_vhci.c Wed May 05 11:09:47 2010 +0800
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -2361,10 +2360,14 @@
return (TRAN_FATAL_ERROR);
}
mdi_rele_path(npip);
- VHCI_DEBUG(1, (CE_NOTE, NULL, "!invoking "
- "mdi_failover\n"));
- rval = mdi_failover(vhci->vhci_dip, cdip,
- MDI_FAILOVER_ASYNC);
+ if (!(vpkt->vpkt_state & VHCI_PKT_IN_FAILOVER)) {
+ VHCI_DEBUG(1, (CE_NOTE, NULL, "!invoking "
+ "mdi_failover\n"));
+ rval = mdi_failover(vhci->vhci_dip, cdip,
+ MDI_FAILOVER_ASYNC);
+ } else {
+ rval = vlun->svl_failover_status;
+ }
if (rval == MDI_FAILURE) {
VHCI_RELEASE_LUN(vlun);
if (pgr_sema_held) {
@@ -2381,6 +2384,7 @@
if (pgr_sema_held) {
sema_v(&vlun->svl_pgr_sema);
}
+ vpkt->vpkt_state |= VHCI_PKT_IN_FAILOVER;
return (TRAN_BUSY);
}
}
@@ -7010,6 +7014,7 @@
retval = MDI_SUCCESS;
done:
+ vlun->svl_failover_status = retval;
if (flags == MDI_FAILOVER_ASYNC) {
VHCI_RELEASE_LUN(vlun);
VHCI_DEBUG(6, (CE_NOTE, NULL, "!vhci_failover(12): DONE! "
--- a/usr/src/uts/common/sys/scsi/adapters/scsi_vhci.h Tue May 04 17:11:30 2010 -0700
+++ b/usr/src/uts/common/sys/scsi/adapters/scsi_vhci.h Wed May 05 11:09:47 2010 +0800
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_SCSI_ADAPTERS_SCSI_VHCI_H
@@ -184,6 +183,12 @@
* is being issued from the taskq and not target driver.
*/
#define VHCI_PKT_THRU_TASKQ 0x20
+/*
+ * Set the first time failover is being triggered. To ensure
+ * failover won't be triggered again when the packet is being
+ * retried by target driver.
+ */
+#define VHCI_PKT_IN_FAILOVER 0x40
#define VHCI_PKT_TIMEOUT 30 /* seconds */
#define VHCI_PKT_RETRY_CNT 2
@@ -310,6 +315,14 @@
time_t svl_wfa_time;
/*
+ * to keep the failover status in order to return the
+ * failure status to target driver when targer driver
+ * retries the command which originally triggered the
+ * failover.
+ */
+ int svl_failover_status;
+
+ /*
* for RESERVE/RELEASE support
*/
client_lb_t svl_lb_policy_save;