6931570 Add flash devices' VID/PID to disk table to advertising 4K physical sector size.
authorbo zhou - Sun Microsystems - Beijing China <Bo.Zhou@Sun.COM>
Thu Apr 22 11:12:41 2010 +0800 (2010-04-22)
changeset 12208008d9e6b6072
parent 12207 877b2956883c
child 12209 2515275cde2e
6931570 Add flash devices' VID/PID to disk table to advertising 4K physical sector size.
6930150 sd should support zfs to send physical block size aligned I/O to FMODs in emulation mode
6927876 For 4k sector support, ZFS needs to use DKIOCGMEDIAINFOEXT
usr/src/uts/common/fs/zfs/vdev_disk.c
usr/src/uts/common/io/scsi/targets/sd.c
usr/src/uts/common/sys/scsi/targets/sddef.h
     1.1 --- a/usr/src/uts/common/fs/zfs/vdev_disk.c	Wed Apr 21 20:49:07 2010 -0600
     1.2 +++ b/usr/src/uts/common/fs/zfs/vdev_disk.c	Thu Apr 22 11:12:41 2010 +0800
     1.3 @@ -19,8 +19,7 @@
     1.4   * CDDL HEADER END
     1.5   */
     1.6  /*
     1.7 - * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     1.8 - * Use is subject to license terms.
     1.9 + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
    1.10   */
    1.11  
    1.12  #include <sys/zfs_context.h>
    1.13 @@ -108,7 +107,7 @@
    1.14  {
    1.15  	spa_t *spa = vd->vdev_spa;
    1.16  	vdev_disk_t *dvd;
    1.17 -	struct dk_minfo dkm;
    1.18 +	struct dk_minfo_ext dkmext;
    1.19  	int error;
    1.20  	dev_t dev;
    1.21  	int otyp;
    1.22 @@ -288,11 +287,11 @@
    1.23  	 * Determine the device's minimum transfer size.
    1.24  	 * If the ioctl isn't supported, assume DEV_BSIZE.
    1.25  	 */
    1.26 -	if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, (intptr_t)&dkm,
    1.27 +	if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT, (intptr_t)&dkmext,
    1.28  	    FKIOCTL, kcred, NULL) != 0)
    1.29 -		dkm.dki_lbsize = DEV_BSIZE;
    1.30 +		dkmext.dki_pbsize = DEV_BSIZE;
    1.31  
    1.32 -	*ashift = highbit(MAX(dkm.dki_lbsize, SPA_MINBLOCKSIZE)) - 1;
    1.33 +	*ashift = highbit(MAX(dkmext.dki_pbsize, SPA_MINBLOCKSIZE)) - 1;
    1.34  
    1.35  	/*
    1.36  	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
     2.1 --- a/usr/src/uts/common/io/scsi/targets/sd.c	Wed Apr 21 20:49:07 2010 -0600
     2.2 +++ b/usr/src/uts/common/io/scsi/targets/sd.c	Thu Apr 22 11:12:41 2010 +0800
     2.3 @@ -795,7 +795,17 @@
     2.4  static const int sd_disk_table_size =
     2.5  	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
     2.6  
     2.7 -
     2.8 +/*
     2.9 + * Emulation mode disk drive VID/PID table
    2.10 + */
    2.11 +static char sd_flash_dev_table[][25] = {
    2.12 +	"ATA     MARVELL SD88SA02",
    2.13 +	"MARVELL SD88SA02",
    2.14 +	"TOSHIBA THNSNV05",
    2.15 +};
    2.16 +
    2.17 +static const int sd_flash_dev_table_size =
    2.18 +	sizeof (sd_flash_dev_table) / sizeof (sd_flash_dev_table[0]);
    2.19  
    2.20  #define	SD_INTERCONNECT_PARALLEL	0
    2.21  #define	SD_INTERCONNECT_FABRIC		1
    2.22 @@ -879,6 +889,7 @@
    2.23  #define	sd_chk_vers1_data		ssd_chk_vers1_data
    2.24  #define	sd_set_vers1_properties		ssd_set_vers1_properties
    2.25  #define	sd_check_solid_state		ssd_check_solid_state
    2.26 +#define	sd_check_emulation_mode		ssd_check_emulation_mode
    2.27  
    2.28  #define	sd_get_physical_geometry	ssd_get_physical_geometry
    2.29  #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
    2.30 @@ -1278,7 +1289,7 @@
    2.31  static void  sd_get_nv_sup(sd_ssc_t *ssc);
    2.32  static dev_t sd_make_device(dev_info_t *devi);
    2.33  static void  sd_check_solid_state(sd_ssc_t *ssc);
    2.34 -
    2.35 +static void  sd_check_emulation_mode(sd_ssc_t *ssc);
    2.36  static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
    2.37  	uint64_t capacity);
    2.38  
    2.39 @@ -7581,6 +7592,7 @@
    2.40  	 */
    2.41  	un->un_f_disksort_disabled = FALSE;
    2.42  	un->un_f_rmw_type = SD_RMW_TYPE_DEFAULT;
    2.43 +	un->un_f_enable_rmw = FALSE;
    2.44  
    2.45  	/*
    2.46  	 * GET EVENT STATUS NOTIFICATION media polling enabled by default, but
    2.47 @@ -7633,6 +7645,11 @@
    2.48  	un->un_blockcount = 0;
    2.49  
    2.50  	/*
    2.51 +	 * physical sector size default to DEV_BSIZE currently.
    2.52 +	 */
    2.53 +	un->un_phy_blocksize = DEV_BSIZE;
    2.54 +
    2.55 +	/*
    2.56  	 * Set up the per-instance info needed to determine the correct
    2.57  	 * CDBs and other info for issuing commands to the target.
    2.58  	 */
    2.59 @@ -8151,6 +8168,11 @@
    2.60  	 */
    2.61  	sd_check_solid_state(ssc);
    2.62  
    2.63 +	/*
    2.64 +	 * Check whether the drive is in emulation mode.
    2.65 +	 */
    2.66 +	sd_check_emulation_mode(ssc);
    2.67 +
    2.68  	cmlb_alloc_handle(&un->un_cmlbhandle);
    2.69  
    2.70  #if defined(__i386) || defined(__amd64)
    2.71 @@ -8230,8 +8252,9 @@
    2.72  	un->un_f_write_cache_enabled = (wc_enabled != 0);
    2.73  	mutex_exit(SD_MUTEX(un));
    2.74  
    2.75 -	if (un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR &&
    2.76 -	    un->un_tgt_blocksize != DEV_BSIZE) {
    2.77 +	if ((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR &&
    2.78 +	    un->un_tgt_blocksize != DEV_BSIZE) ||
    2.79 +	    un->un_f_enable_rmw) {
    2.80  		if (!(un->un_wm_cache)) {
    2.81  			(void) snprintf(name_str, sizeof (name_str),
    2.82  			    "%s%d_cache",
    2.83 @@ -10646,9 +10669,10 @@
    2.84  	 * a media is changed this routine will be called and the
    2.85  	 * block size is a function of media rather than device.
    2.86  	 */
    2.87 -	if ((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR ||
    2.88 +	if (((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR ||
    2.89  	    un->un_f_non_devbsize_supported) &&
    2.90 -	    un->un_tgt_blocksize != DEV_BSIZE) {
    2.91 +	    un->un_tgt_blocksize != DEV_BSIZE) ||
    2.92 +	    un->un_f_enable_rmw) {
    2.93  		if (!(un->un_wm_cache)) {
    2.94  			(void) snprintf(name_str, sizeof (name_str),
    2.95  			    "%s%d_cache",
    2.96 @@ -10877,7 +10901,8 @@
    2.97  	/*
    2.98  	 * Read requests are restricted to multiples of the system block size.
    2.99  	 */
   2.100 -	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR)
   2.101 +	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
   2.102 +	    !un->un_f_enable_rmw)
   2.103  		secmask = un->un_tgt_blocksize - 1;
   2.104  	else
   2.105  		secmask = DEV_BSIZE - 1;
   2.106 @@ -10966,7 +10991,8 @@
   2.107  	/*
   2.108  	 * Write requests are restricted to multiples of the system block size.
   2.109  	 */
   2.110 -	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR)
   2.111 +	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
   2.112 +	    !un->un_f_enable_rmw)
   2.113  		secmask = un->un_tgt_blocksize - 1;
   2.114  	else
   2.115  		secmask = DEV_BSIZE - 1;
   2.116 @@ -11055,7 +11081,8 @@
   2.117  	/*
   2.118  	 * Read requests are restricted to multiples of the system block size.
   2.119  	 */
   2.120 -	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR)
   2.121 +	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
   2.122 +	    !un->un_f_enable_rmw)
   2.123  		secmask = un->un_tgt_blocksize - 1;
   2.124  	else
   2.125  		secmask = DEV_BSIZE - 1;
   2.126 @@ -11144,7 +11171,8 @@
   2.127  	/*
   2.128  	 * Write requests are restricted to multiples of the system block size.
   2.129  	 */
   2.130 -	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR)
   2.131 +	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
   2.132 +	    !un->un_f_enable_rmw)
   2.133  		secmask = un->un_tgt_blocksize - 1;
   2.134  	else
   2.135  		secmask = DEV_BSIZE - 1;
   2.136 @@ -11524,16 +11552,24 @@
   2.137  		index = un->un_buf_chain_type;
   2.138  		if ((!un->un_f_has_removable_media) &&
   2.139  		    (un->un_tgt_blocksize != 0) &&
   2.140 -		    (un->un_tgt_blocksize != DEV_BSIZE)) {
   2.141 +		    (un->un_tgt_blocksize != DEV_BSIZE ||
   2.142 +		    un->un_f_enable_rmw)) {
   2.143  			int secmask = 0, blknomask = 0;
   2.144 -			blknomask =
   2.145 -			    (un->un_tgt_blocksize / DEV_BSIZE) - 1;
   2.146 -			secmask = un->un_tgt_blocksize - 1;
   2.147 +			if (un->un_f_enable_rmw) {
   2.148 +				blknomask =
   2.149 +				    (un->un_phy_blocksize / DEV_BSIZE) - 1;
   2.150 +				secmask = un->un_phy_blocksize - 1;
   2.151 +			} else {
   2.152 +				blknomask =
   2.153 +				    (un->un_tgt_blocksize / DEV_BSIZE) - 1;
   2.154 +				secmask = un->un_tgt_blocksize - 1;
   2.155 +			}
   2.156  
   2.157  			if ((bp->b_lblkno & (blknomask)) ||
   2.158  			    (bp->b_bcount & (secmask))) {
   2.159 -				if (un->un_f_rmw_type !=
   2.160 -				    SD_RMW_TYPE_RETURN_ERROR) {
   2.161 +				if ((un->un_f_rmw_type !=
   2.162 +				    SD_RMW_TYPE_RETURN_ERROR) ||
   2.163 +				    un->un_f_enable_rmw) {
   2.164  					if (un->un_f_pm_is_enabled == FALSE)
   2.165  						index =
   2.166  						    SD_CHAIN_INFO_MSS_DSK_NO_PM;
   2.167 @@ -12543,14 +12579,19 @@
   2.168  	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
   2.169  	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
   2.170  
   2.171 -	blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
   2.172 -	secmask = un->un_tgt_blocksize - 1;
   2.173 +	if (un->un_f_enable_rmw) {
   2.174 +		blknomask = (un->un_phy_blocksize / DEV_BSIZE) - 1;
   2.175 +		secmask = un->un_phy_blocksize - 1;
   2.176 +	} else {
   2.177 +		blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
   2.178 +		secmask = un->un_tgt_blocksize - 1;
   2.179 +	}
   2.180  
   2.181  	if ((bp->b_lblkno & (blknomask)) || (bp->b_bcount & (secmask))) {
   2.182  		is_aligned = FALSE;
   2.183  	}
   2.184  
   2.185 -	if (!(NOT_DEVBSIZE(un))) {
   2.186 +	if (!(NOT_DEVBSIZE(un)) || un->un_f_enable_rmw) {
   2.187  		/*
   2.188  		 * If I/O is aligned, no need to involve RMW(Read Modify Write)
   2.189  		 * Convert the logical block number to target's physical sector
   2.190 @@ -12561,12 +12602,17 @@
   2.191  		} else {
   2.192  			switch (un->un_f_rmw_type) {
   2.193  			case SD_RMW_TYPE_RETURN_ERROR:
   2.194 -				bp->b_flags |= B_ERROR;
   2.195 -				goto error_exit;
   2.196 +				if (un->un_f_enable_rmw)
   2.197 +					break;
   2.198 +				else {
   2.199 +					bp->b_flags |= B_ERROR;
   2.200 +					goto error_exit;
   2.201 +				}
   2.202  
   2.203  			case SD_RMW_TYPE_DEFAULT:
   2.204  				mutex_enter(SD_MUTEX(un));
   2.205 -				if (un->un_rmw_msg_timeid == NULL) {
   2.206 +				if (!un->un_f_enable_rmw &&
   2.207 +				    un->un_rmw_msg_timeid == NULL) {
   2.208  					scsi_log(SD_DEVINFO(un), sd_label,
   2.209  					    CE_WARN, "I/O request is not "
   2.210  					    "aligned with %d disk sector size. "
   2.211 @@ -12809,7 +12855,7 @@
   2.212  	 * un->un_sys_blocksize as its block size or if bcount == 0.
   2.213  	 * In this case there is no layer-private data block allocated.
   2.214  	 */
   2.215 -	if ((un->un_tgt_blocksize == DEV_BSIZE) ||
   2.216 +	if ((un->un_tgt_blocksize == DEV_BSIZE && !un->un_f_enable_rmw) ||
   2.217  	    (bp->b_bcount == 0)) {
   2.218  		goto done;
   2.219  	}
   2.220 @@ -12868,9 +12914,18 @@
   2.221  	 * block of the request, but that's what is needed for the computation.
   2.222  	 */
   2.223  	first_byte  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
   2.224 -	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
   2.225 -	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
   2.226 -	    un->un_tgt_blocksize;
   2.227 +	if (un->un_f_enable_rmw) {
   2.228 +		start_block = xp->xb_blkno =
   2.229 +		    (first_byte / un->un_phy_blocksize) *
   2.230 +		    (un->un_phy_blocksize / DEV_BSIZE);
   2.231 +		end_block   = ((first_byte + bp->b_bcount +
   2.232 +		    un->un_phy_blocksize - 1) / un->un_phy_blocksize) *
   2.233 +		    (un->un_phy_blocksize / DEV_BSIZE);
   2.234 +	} else {
   2.235 +		start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
   2.236 +		end_block   = (first_byte + bp->b_bcount +
   2.237 +		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
   2.238 +	}
   2.239  
   2.240  	/* request_bytes is rounded up to a multiple of the target block size */
   2.241  	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
   2.242 @@ -12880,9 +12935,16 @@
   2.243  	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
   2.244  	 * then we do not need to allocate a shadow buf to handle the request.
   2.245  	 */
   2.246 -	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
   2.247 -	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
   2.248 -		is_aligned = TRUE;
   2.249 +	if (un->un_f_enable_rmw) {
   2.250 +		if (((first_byte % un->un_phy_blocksize) == 0) &&
   2.251 +		    ((bp->b_bcount % un->un_phy_blocksize) == 0)) {
   2.252 +			is_aligned = TRUE;
   2.253 +		}
   2.254 +	} else {
   2.255 +		if (((first_byte % un->un_tgt_blocksize) == 0) &&
   2.256 +		    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
   2.257 +			is_aligned = TRUE;
   2.258 +		}
   2.259  	}
   2.260  
   2.261  	if ((bp->b_flags & B_READ) == 0) {
   2.262 @@ -12939,10 +13001,17 @@
   2.263  		 * command (which will be based upon the target blocksize). Note
   2.264  		 * that this is only really used if the request is unaligned.
   2.265  		 */
   2.266 -		bsp->mbs_copy_offset = (ssize_t)(first_byte -
   2.267 -		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
   2.268 -		ASSERT((bsp->mbs_copy_offset >= 0) &&
   2.269 -		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
   2.270 +		if (un->un_f_enable_rmw) {
   2.271 +			bsp->mbs_copy_offset = (ssize_t)(first_byte -
   2.272 +			    ((offset_t)xp->xb_blkno * un->un_sys_blocksize));
   2.273 +			ASSERT((bsp->mbs_copy_offset >= 0) &&
   2.274 +			    (bsp->mbs_copy_offset < un->un_phy_blocksize));
   2.275 +		} else {
   2.276 +			bsp->mbs_copy_offset = (ssize_t)(first_byte -
   2.277 +			    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
   2.278 +			ASSERT((bsp->mbs_copy_offset >= 0) &&
   2.279 +			    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
   2.280 +		}
   2.281  
   2.282  		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
   2.283  
   2.284 @@ -13010,7 +13079,7 @@
   2.285  	 * There is no shadow buf or layer-private data if the target is
   2.286  	 * using un->un_sys_blocksize as its block size or if bcount == 0.
   2.287  	 */
   2.288 -	if ((un->un_tgt_blocksize == DEV_BSIZE) ||
   2.289 +	if ((un->un_tgt_blocksize == DEV_BSIZE && !un->un_f_enable_rmw) ||
   2.290  	    (bp->b_bcount == 0)) {
   2.291  		goto exit;
   2.292  	}
   2.293 @@ -13077,7 +13146,11 @@
   2.294  	 * shadow_start and shadow_len indicate the location and size of
   2.295  	 * the data returned with the shadow IO request.
   2.296  	 */
   2.297 -	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
   2.298 +	if (un->un_f_enable_rmw) {
   2.299 +		shadow_start  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
   2.300 +	} else {
   2.301 +		shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
   2.302 +	}
   2.303  	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
   2.304  
   2.305  	/*
   2.306 @@ -13088,7 +13161,14 @@
   2.307  	 * data to be copied (in bytes).
   2.308  	 */
   2.309  	copy_offset  = bsp->mbs_copy_offset;
   2.310 -	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
   2.311 +	if (un->un_f_enable_rmw) {
   2.312 +		ASSERT((copy_offset >= 0) &&
   2.313 +		    (copy_offset < un->un_phy_blocksize));
   2.314 +	} else {
   2.315 +		ASSERT((copy_offset >= 0) &&
   2.316 +		    (copy_offset < un->un_tgt_blocksize));
   2.317 +	}
   2.318 +
   2.319  	copy_length  = orig_bp->b_bcount;
   2.320  	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
   2.321  
   2.322 @@ -14532,7 +14612,7 @@
   2.323  	 * If sorting is disabled, just add the buf to the tail end of
   2.324  	 * the wait queue and return.
   2.325  	 */
   2.326 -	if (un->un_f_disksort_disabled) {
   2.327 +	if (un->un_f_disksort_disabled || un->un_f_enable_rmw) {
   2.328  		un->un_waitq_tailp->av_forw = bp;
   2.329  		un->un_waitq_tailp = bp;
   2.330  		bp->av_forw = NULL;
   2.331 @@ -19996,6 +20076,8 @@
   2.332  			    &lbasize, &pbsize, path_flag);
   2.333  			if (status != 0) {
   2.334  				return (status);
   2.335 +			} else {
   2.336 +				goto rc16_done;
   2.337  			}
   2.338  		}
   2.339  		break;	/* Success! */
   2.340 @@ -20051,6 +20133,8 @@
   2.341  	if (un->un_f_has_removable_media)
   2.342  		capacity *= (lbasize / un->un_sys_blocksize);
   2.343  
   2.344 +rc16_done:
   2.345 +
   2.346  	/*
   2.347  	 * Copy the values from the READ CAPACITY command into the space
   2.348  	 * provided by the caller.
   2.349 @@ -20253,6 +20337,32 @@
   2.350  		return (status);
   2.351  	}
   2.352  
   2.353 +	/*
   2.354 +	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
   2.355 +	 * (2352 and 0 are common) so for these devices always force the value
   2.356 +	 * to 2048 as required by the ATAPI specs.
   2.357 +	 */
   2.358 +	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
   2.359 +		lbasize = 2048;
   2.360 +	}
   2.361 +
   2.362 +	/*
   2.363 +	 * Get the maximum LBA value from the READ CAPACITY 16 data.
   2.364 +	 * Here we assume that the Partial Medium Indicator (PMI) bit
   2.365 +	 * was cleared when issuing the command. This means that the LBA
   2.366 +	 * returned from the device is the LBA of the last logical block
   2.367 +	 * on the logical unit.  The actual logical block count will be
   2.368 +	 * this value plus one.
   2.369 +	 */
   2.370 +	capacity += 1;
   2.371 +
   2.372 +	/*
   2.373 +	 * Currently, for removable media, the capacity is saved in terms
   2.374 +	 * of un->un_sys_blocksize, so scale the capacity value to reflect this.
   2.375 +	 */
   2.376 +	if (un->un_f_has_removable_media)
   2.377 +		capacity *= (lbasize / un->un_sys_blocksize);
   2.378 +
   2.379  	*capp = capacity;
   2.380  	*lbap = lbasize;
   2.381  	*psp = pbsize;
   2.382 @@ -20261,6 +20371,14 @@
   2.383  	    "capacity:0x%llx  lbasize:0x%x, pbsize: 0x%x\n",
   2.384  	    capacity, lbasize, pbsize);
   2.385  
   2.386 +	if ((capacity == 0) || (lbasize == 0) || (pbsize == 0)) {
   2.387 +		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
   2.388 +		    "sd_send_scsi_READ_CAPACITY_16 received invalid value "
   2.389 +		    "capacity %llu lbasize %d pbsize %d", capacity, lbasize);
   2.390 +		return (EIO);
   2.391 +	}
   2.392 +
   2.393 +	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
   2.394  	return (0);
   2.395  }
   2.396  
   2.397 @@ -23486,17 +23604,24 @@
   2.398  	 * Now read the capacity so we can provide the lbasize,
   2.399  	 * pbsize and capacity.
   2.400  	 */
   2.401 -	rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize, &pbsize,
   2.402 -	    SD_PATH_DIRECT);
   2.403 -
   2.404 -	if (rval != 0) {
   2.405 +	if (un->un_f_descr_format_supported)
   2.406 +		rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize,
   2.407 +		    &pbsize, SD_PATH_DIRECT);
   2.408 +
   2.409 +	if (rval != 0 || !un->un_f_descr_format_supported) {
   2.410  		rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
   2.411  		    SD_PATH_DIRECT);
   2.412  
   2.413  		switch (rval) {
   2.414  		case 0:
   2.415 -			pbsize = lbasize;
   2.416 +			if (un->un_f_enable_rmw &&
   2.417 +			    un->un_phy_blocksize != 0) {
   2.418 +				pbsize = un->un_phy_blocksize;
   2.419 +			} else {
   2.420 +				pbsize = lbasize;
   2.421 +			}
   2.422  			media_capacity = capacity;
   2.423 +
   2.424  			/*
   2.425  			 * sd_send_scsi_READ_CAPACITY() reports capacity in
   2.426  			 * un->un_sys_blocksize chunks. So we need to convert
   2.427 @@ -23515,6 +23640,13 @@
   2.428  			goto done;
   2.429  		}
   2.430  	} else {
   2.431 +		if (un->un_f_enable_rmw &&
   2.432 +		    !ISP2(pbsize % DEV_BSIZE)) {
   2.433 +			pbsize = SSD_SECSIZE;
   2.434 +		} else if (!ISP2(lbasize % DEV_BSIZE) ||
   2.435 +		    !ISP2(pbsize % DEV_BSIZE)) {
   2.436 +			pbsize = lbasize = DEV_BSIZE;
   2.437 +		}
   2.438  		media_capacity = capacity;
   2.439  	}
   2.440  
   2.441 @@ -23530,8 +23662,8 @@
   2.442  	mutex_exit(SD_MUTEX(un));
   2.443  
   2.444  	media_info_ext.dki_lbsize = lbasize;
   2.445 +	media_info_ext.dki_pbsize = pbsize;
   2.446  	media_info_ext.dki_capacity = media_capacity;
   2.447 -	media_info_ext.dki_pbsize = pbsize;
   2.448  
   2.449  	if (ddi_copyout(&media_info_ext, arg, sizeof (struct dk_minfo_ext),
   2.450  	    flag)) {
   2.451 @@ -31607,3 +31739,66 @@
   2.452  		mutex_exit(SD_MUTEX(un));
   2.453  	}
   2.454  }
   2.455 +
   2.456 +/*
   2.457 + *	Function: sd_check_emulation_mode
   2.458 + *
   2.459 + *   Description: Check whether the SSD is at emulation mode
   2.460 + *		  by issuing READ_CAPACITY_16 to see whether
   2.461 + *		  we can get physical block size of the drive.
   2.462 + *
   2.463 + *	 Context: Kernel thread or interrupt context.
   2.464 + */
   2.465 +
   2.466 +static void
   2.467 +sd_check_emulation_mode(sd_ssc_t *ssc)
   2.468 +{
   2.469 +	int		rval = 0;
   2.470 +	uint64_t	capacity;
   2.471 +	uint_t		lbasize;
   2.472 +	uint_t		pbsize;
   2.473 +	int		i;
   2.474 +	int		devid_len;
   2.475 +	struct sd_lun	*un;
   2.476 +
   2.477 +	ASSERT(ssc != NULL);
   2.478 +	un = ssc->ssc_un;
   2.479 +	ASSERT(un != NULL);
   2.480 +	ASSERT(!mutex_owned(SD_MUTEX(un)));
   2.481 +
   2.482 +	mutex_enter(SD_MUTEX(un));
   2.483 +	if (ISCD(un)) {
   2.484 +		mutex_exit(SD_MUTEX(un));
   2.485 +		return;
   2.486 +	}
   2.487 +
   2.488 +	if (un->un_f_descr_format_supported) {
   2.489 +		mutex_exit(SD_MUTEX(un));
   2.490 +		rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize,
   2.491 +		    &pbsize, SD_PATH_DIRECT);
   2.492 +		mutex_enter(SD_MUTEX(un));
   2.493 +
   2.494 +		if (rval != 0) {
   2.495 +			un->un_phy_blocksize = DEV_BSIZE;
   2.496 +		} else {
   2.497 +			if (!ISP2(pbsize % DEV_BSIZE) || pbsize == 0) {
   2.498 +				un->un_phy_blocksize = DEV_BSIZE;
   2.499 +			} else {
   2.500 +				un->un_phy_blocksize = pbsize;
   2.501 +			}
   2.502 +		}
   2.503 +	}
   2.504 +
   2.505 +	for (i = 0; i < sd_flash_dev_table_size; i++) {
   2.506 +		devid_len = (int)strlen(sd_flash_dev_table[i]);
   2.507 +		if (sd_sdconf_id_match(un, sd_flash_dev_table[i], devid_len)
   2.508 +		    == SD_SUCCESS) {
   2.509 +			un->un_phy_blocksize = SSD_SECSIZE;
   2.510 +			if (un->un_f_is_solid_state &&
   2.511 +			    un->un_phy_blocksize != un->un_tgt_blocksize)
   2.512 +				un->un_f_enable_rmw = TRUE;
   2.513 +		}
   2.514 +	}
   2.515 +
   2.516 +	mutex_exit(SD_MUTEX(un));
   2.517 +}
     3.1 --- a/usr/src/uts/common/sys/scsi/targets/sddef.h	Wed Apr 21 20:49:07 2010 -0600
     3.2 +++ b/usr/src/uts/common/sys/scsi/targets/sddef.h	Thu Apr 22 11:12:41 2010 +0800
     3.3 @@ -19,8 +19,7 @@
     3.4   * CDDL HEADER END
     3.5   */
     3.6  /*
     3.7 - * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     3.8 - * Use is subject to license terms.
     3.9 + * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
    3.10   */
    3.11  
    3.12  #ifndef	_SYS_SCSI_TARGETS_SDDEF_H
    3.13 @@ -258,6 +257,9 @@
    3.14  	/* The size of a logical block on the target, in bytes. */
    3.15  	uint32_t	un_tgt_blocksize;
    3.16  
    3.17 +	/* The size of a physical block on the target, in bytes. */
    3.18 +	uint32_t	un_phy_blocksize;
    3.19 +
    3.20  	/*
    3.21  	 * The number of logical blocks on the target. This is adjusted
    3.22  	 * to be in terms of the block size specified by un_sys_blocksize
    3.23 @@ -456,7 +458,8 @@
    3.24  	    un_f_is_solid_state		:1,	/* has solid state media */
    3.25  	    un_f_mmc_gesn_polling	:1,	/* use GET EVENT STATUS */
    3.26  						/* NOTIFICATION for polling */
    3.27 -	    un_f_reserved		:5;
    3.28 +	    un_f_enable_rmw		:1,	/* Force RMW in sd driver */
    3.29 +	    un_f_reserved		:4;
    3.30  
    3.31  	/* Ptr to table of strings for ASC/ASCQ error message printing */
    3.32  	struct scsi_asq_key_strings	*un_additional_codes;
    3.33 @@ -575,6 +578,10 @@
    3.34  #define	SD_BYTES2TGTBLOCKS(un, bytecount)				\
    3.35  	((bytecount + (un->un_tgt_blocksize - 1))/un->un_tgt_blocksize)
    3.36  
    3.37 +/* Convert a byte count to a number of physical blocks */
    3.38 +#define	SD_BYTES2PHYBLOCKS(un, bytecount)				\
    3.39 +	((bytecount + (un->un_phy_blocksize - 1))/un->un_phy_blocksize)
    3.40 +
    3.41  /* Convert a target block count to a number of bytes */
    3.42  #define	SD_TGTBLOCKS2BYTES(un, blockcount)				\
    3.43  	(blockcount * (un)->un_tgt_blocksize)
    3.44 @@ -918,6 +925,11 @@
    3.45  #define	SD_MODE2_BLKSIZE		2336	/* bytes */
    3.46  
    3.47  /*
    3.48 + * Solid State Drive default sector size
    3.49 + */
    3.50 +#define	SSD_SECSIZE			4096
    3.51 +
    3.52 +/*
    3.53   * Resource type definitions for multi host control operations. Specifically,
    3.54   * queue and request definitions for reservation request handling between the
    3.55   * scsi facility callback function (sd_mhd_watch_cb) and the reservation