6973638 NULL pointer panic in pmcs_tgt_event_recovery()
authorSrikanth Suravajhala <srikanth.suravajhala@oracle.com>
Fri, 13 Aug 2010 11:23:20 -0700
changeset 13103 17b823d436a7
parent 13102 91026ef504cf
child 13104 6fa1bbe65855
6973638 NULL pointer panic in pmcs_tgt_event_recovery()
usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_ds.c
usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_ds.c	Fri Aug 13 11:07:16 2010 -0700
+++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_ds.c	Fri Aug 13 11:23:20 2010 -0700
@@ -740,10 +740,9 @@
 
 /*
  * SSP target event recovery
- * Entered with a phy lock held
- * Pwrk lock is not needed - pwrk is on the target aq and no other thread
- * will do anything with it until this thread starts the chain of recovery.
- * Statlock may be acquired and released.
+ * phy->lock should be held upon entry.
+ * pwrk->lock should be held upon entry and gets released by this routine.
+ * tgt->statlock should not be held.
  */
 void
 pmcs_tgt_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk)
@@ -755,7 +754,6 @@
 	uint32_t event;
 	uint32_t htag;
 	uint32_t status;
-	uint8_t dstate;
 	int rv;
 
 	ASSERT(pwrk->arg != NULL);
@@ -766,6 +764,8 @@
 	event = pwrk->ssp_event;
 	pwrk->ssp_event = 0xffffffff;
 
+	mutex_exit(&pwrk->lock);
+
 	if (event == PMCOUT_STATUS_XFER_ERR_BREAK ||
 	    event == PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY ||
 	    event == PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT) {
@@ -793,7 +793,7 @@
 		    lun->lun_num, &status))
 			goto out;
 	}
-	(void) pmcs_abort(pwp, pptr, pwrk->htag, 0, 1);
+	(void) pmcs_abort(pwp, pptr, htag, 0, 1);
 	/*
 	 * Abort either took care of work completion, or put device in
 	 * a recovery state
@@ -801,20 +801,14 @@
 	return;
 out:
 	/* Abort failed, do full device recovery */
-	ASSERT(tgt != NULL);
-	mutex_enter(&tgt->statlock);
-	if (!pmcs_get_dev_state(pwp, pptr, tgt, &dstate))
-		tgt->dev_state = dstate;
-
-	if ((tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) &&
-	    (tgt->dev_state != PMCS_DEVICE_STATE_NON_OPERATIONAL)) {
-		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
-		    "%s: Setting IN_RECOVERY for tgt 0x%p",
-		    __func__, (void *)tgt);
-		(void) pmcs_send_err_recovery_cmd(pwp,
-		    PMCS_DEVICE_STATE_IN_RECOVERY, pptr, tgt);
+	mutex_enter(&pwrk->lock);
+	tgt = pwrk->xp;
+	mutex_exit(&pwrk->lock);
+	if (tgt != NULL) {
+		mutex_enter(&tgt->statlock);
+		pmcs_start_dev_state_recovery(tgt, pptr);
+		mutex_exit(&tgt->statlock);
 	}
-	mutex_exit(&tgt->statlock);
 }
 
 /*
@@ -854,43 +848,43 @@
 		}
 
 		pmcs_lock_phy(pphy);
-		mutex_enter(&tgt->statlock);
 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt,
 		    "%s: found target(0x%p)", __func__, (void *) tgt);
 
 		/* Check what cmd expects recovery */
 		mutex_enter(&tgt->aqlock);
 		STAILQ_FOREACH(cp, &tgt->aq, cmd_next) {
-			/*
-			 * Since work structure is on this target aq, and only
-			 * this thread is accessing it now, we do not need
-			 * to lock it
-			 */
 			idxpwrk = PMCS_TAG_INDEX(cp->cmd_tag);
 			pwrk = &pwp->work[idxpwrk];
+			mutex_enter(&pwrk->lock);
 			if (pwrk->htag != cp->cmd_tag) {
 				/*
 				 * aq may contain TMF commands, so we
 				 * may not find work structure with htag
 				 */
-				break;
+				mutex_exit(&pwrk->lock);
+				continue;
 			}
-			if ((pwrk->ssp_event != 0) &&
+			if (!PMCS_COMMAND_DONE(pwrk) &&
+			    (pwrk->ssp_event != 0) &&
 			    (pwrk->ssp_event != PMCS_REC_EVENT)) {
 				pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt,
 				    "%s: pwrk(%p) htag(0x%x)",
 				    __func__, (void *) pwrk, cp->cmd_tag);
 				mutex_exit(&tgt->aqlock);
-				mutex_exit(&tgt->statlock);
+				/*
+				 * pwrk->lock gets dropped in
+				 * pmcs_tgt_event_recovery()
+				 */
 				pmcs_tgt_event_recovery(pwp, pwrk);
-				/*
-				 * We dropped statlock, so restart the scan
-				 */
 				pmcs_unlock_phy(pphy);
+				/* All bets are off on tgt/aq now, restart */
 				goto restart;
 			}
+			mutex_exit(&pwrk->lock);
 		}
 		mutex_exit(&tgt->aqlock);
+		mutex_enter(&tgt->statlock);
 		tgt->event_recovery = 0;
 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt,
 		    "%s: end of SSP event recovery for target(0x%p)",
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c	Fri Aug 13 11:07:16 2010 -0700
+++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c	Fri Aug 13 11:23:20 2010 -0700
@@ -4797,6 +4797,7 @@
 	p->abt_htag = 0;
 	p->timer = 0;
 	p->onwire = 0;
+	p->ssp_event = 0;
 	mutex_exit(&p->lock);
 
 	if (mutex_tryenter(&pwp->wfree_lock) == 0) {