7102535 libz.so performance on Solaris needs significant improvement
authorFerenc Rakoczi <Ferenc.Rakoczi@Sun.COM>
Wed, 23 May 2012 01:05:52 -0700
changeset 825 78809aba88fe
parent 824 3c0f8247e0cd
child 826 c6aad84d2493
7102535 libz.so performance on Solaris needs significant improvement
components/zlib/Makefile
components/zlib/capabilities/Makefile
components/zlib/capabilities/Makefile.com
components/zlib/capabilities/Makefile.targ
components/zlib/capabilities/sun4v/Makefile
components/zlib/capabilities/sun4v/Makefile.com
components/zlib/capabilities/sun4v/Makefile.targ
components/zlib/capabilities/sun4v/mapfile-cap
components/zlib/capabilities/sun4v/sparcv7/Makefile
components/zlib/capabilities/sun4v/sparcv7/longest_match_t4.s
components/zlib/capabilities/sun4v/sparcv9/Makefile
components/zlib/capabilities/sun4v/sparcv9/longest_match_t4.s
components/zlib/mapfile
components/zlib/patches/perf.patch
--- a/components/zlib/Makefile	Mon May 21 01:44:27 2012 -0700
+++ b/components/zlib/Makefile	Wed May 23 01:05:52 2012 -0700
@@ -39,15 +39,44 @@
 include ../../make-rules/ips.mk
 include ../../make-rules/lint-libraries.mk
 
+#
+# We want to build hardware specific versions of the longest_match()
+# function into our shared library that has been hand optimised to use
+# some machine architecture specific instructions. Currently, we are doing
+# it for the T4 architecture, but later other architectures may be added.
+# This is done by taking advantage of the Solaris 11 linker-editor
+# "Symbol Capabilities" feature.  Refer to the section "Creating a Family
+# of Symbol Capabilities Functions", under "Identifying Capability
+# Requirements" in the "Linker and Libraries Guide"
+# (http://docs.oracle.com/cd/E19963-01/html/819-0690/chapter2-13.html#giskh).
+CAP_OBJS_sparcv7 += ../../capabilities/sun4v/sparcv7/symcap.o 
+CAP_OBJS_sparcv9 += ../../capabilities/sun4v/sparcv9/symcap.o 
+$(BUILD_DIR)/%/.built: CAP_OBJS=$(CAP_OBJS_$*)
+
 # Zlib won't build without cloning. We need also to get rid of default
-# Makefile and get our own version of zlib.h to avoid interactions
+# Makefile and get our own version of zconf.h to avoid interactions
 # between 32 and 64 bit builds.
+# Also, the x86 architecture does not require alignment for multi-byte
+# loads, so we can define UNALIGNED_OK for x86
+ifeq ($(MACH), i386)
 COMPONENT_PRE_CONFIGURE_ACTION = ( \
 	$(CLONEY) $(SOURCE_DIR) $(@D); \
 	$(RM) $(@D)/Makefile $(@D)/zconf.h; \
 	$(CP) $(SOURCE_DIR)/zconf.h $(@D) )
+CFLAGS_EXTRA = -DUNALIGNED_OK -DORIG_LONGEST_MATCH_GLOBAL
+PIC_OBJS =
+else
+COMPONENT_PRE_CONFIGURE_ACTION = ( \
+	$(CLONEY) $(SOURCE_DIR) $(@D); \
+	$(RM) $(@D)/Makefile $(@D)/zconf.h; \
+	$(CP) $(SOURCE_DIR)/zconf.h $(@D) )
+CFLAGS_EXTRA = -DORIG_LONGEST_MATCH_GLOBAL -xinline=%auto,no%longest_match
+PIC_OBJS=$(CAP_OBJS)
+endif
 
-CFLAGS += $(CC_PIC) 
+CFLAGS += $(CC_PIC)
+
+CFLAGS += $(CFLAGS_EXTRA)
 
 # We need to reset configure options here because zlib is confused with
 # CC and CFLAGS definitions as configure parameters.
@@ -57,13 +86,17 @@
 CONFIGURE_OPTIONS.64	+= --libdir=/usr/lib/$(MACH64)
 
 CONFIGURE_ENV += CC="$(CC)"
-CONFIGURE_ENV += CFLAGS="$(CFLAGS)"
+CONFIGURE_ENV += CFLAGS="$(CFLAGS) -xalias_level=basic -xdepend"
 CONFIGURE_ENV += LDSHARED="$(CC) $(CFLAGS) -G"
 
 # This LDSHARED definitions is forced to get all required options plus
 # mapfile for result linking. While the one used with configure is just
 # to allow Zlib detect capability of creating shared libraries.
-COMPONENT_BUILD_ARGS = LDSHARED="$(CC) $(CFLAGS) -G -h libz.so.1 $(LD_OPTIONS_SO) -M ../../mapfile -L."
+COMPONENT_BUILD_ARGS = LDSHARED="$(CC) $(CFLAGS) -G -h libz.so.1 $(LD_OPTIONS_SO) -M ../../mapfile -L." PIC_OBJS=$(PIC_OBJS)
+
+$(BUILD_DIR)/sparc%/.built: COMPONENT_PRE_BUILD_ACTION = ( \
+	cd capabilities; \
+	$(ENV) SUBDIRS="sun4v" BUILD_ARCH=$* $(GMAKE) build )
 
 COMPONENT_TEST_TARGETS = test
 
@@ -73,6 +106,9 @@
 
 test:		$(TEST_32_and_64)
 
+clean::
+	$(RM) -r $(BUILD_DIR) $(PROTO_DIR) capabilities/*/*/*.o
+
 BUILD_PKG_DEPENDENCIES =	$(BUILD_TOOLS)
 
 include ../../make-rules/depend.mk
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/zlib/capabilities/Makefile	Wed May 23 01:05:52 2012 -0700
@@ -0,0 +1,36 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+#
+
+all:		TARGET= all
+clean:		TARGET= clean
+clobber:	TARGET= clobber
+build:		TARGET= build
+
+all clean clobber build:	$(SUBDIRS)
+
+$(SUBDIRS):	FRC
+		@cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/zlib/capabilities/Makefile.com	Wed May 23 01:05:52 2012 -0700
@@ -0,0 +1,35 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+#
+
+MAPFILE-CAP =	../mapfile-cap
+MAPOPT-CAP =	$(MAPFILE-CAP:%=-M%)
+
+OBJCAP =	objcap.o
+SYMCAP =	symcap.o
+
+CLOBBERFILES +=	$(OBJCAP) $(SYMCAP)
+
+C99MODE =		-xc99=%all
+C99LMODE =		-Xc99=%all
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/zlib/capabilities/Makefile.targ	Wed May 23 01:05:52 2012 -0700
@@ -0,0 +1,37 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+#
+
+# Combine all HW specific objects into one relocatable object.
+# Assign any capabilities to this object, and define the interface.
+
+objcap.o:	$(HW_SPEC_OBJECTS) $(MAPFILE-CAP)
+		$(LD) -r -o $@ $(MAPOPT-CAP) -Breduce $(HW_SPEC_OBJECTS)
+
+# Convert the combined object capabilities object into a symbol capabilities
+# object.
+
+symcap.o:	$(OBJCAP)
+		$(LD) -r -o $@ -z symbolcap $(OBJCAP)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/zlib/capabilities/sun4v/Makefile	Wed May 23 01:05:52 2012 -0700
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+#
+
+all:		TARGET= all
+clean:		TARGET= clean
+clobber:	TARGET= clobber
+build:		TARGET= build
+
+all clean clobber build:	$(BUILD_ARCH)
+
+lint:
+
+$(BUILD_ARCH):	FRC
+		@cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/zlib/capabilities/sun4v/Makefile.com	Wed May 23 01:05:52 2012 -0700
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+#
+
+PLATFORM =	sun4v
+
+# Redefine the objects required for this capabilities group.
+HW_SPEC_OBJECTS =	longest_match_t4.o
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/zlib/capabilities/sun4v/Makefile.targ	Wed May 23 01:05:52 2012 -0700
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+#
+
+AS = /usr/bin/as 
+
+longest_match_t4.o: longest_match_t4.s
+	$(AS) $(ASFLAGS) -o $@ longest_match_t4.s
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/zlib/capabilities/sun4v/mapfile-cap	Wed May 23 01:05:52 2012 -0700
@@ -0,0 +1,37 @@
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+#
+
+$mapfile_version 2
+
+CAPABILITY sun4v {
+	MACHINE = sun4v;
+	HW += CBCOND;
+};
+
+SYMBOL_SCOPE {
+	global:
+		longest_match;
+	local:
+		*;
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/zlib/capabilities/sun4v/sparcv7/Makefile	Wed May 23 01:05:52 2012 -0700
@@ -0,0 +1,39 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+#
+
+include		../Makefile.com
+include		../../Makefile.com
+
+CFLAGS += -xarch=sparc
+CPPFLAGS += -D__sparc
+ASFLAGS = -m32 -K PIC -xarch=sparc4
+
+include		../Makefile.targ
+include		../../Makefile.targ
+
+all build:	$(SYMCAP)
+
+clean:
+	$(RM) *.o
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/zlib/capabilities/sun4v/sparcv7/longest_match_t4.s	Wed May 23 01:05:52 2012 -0700
@@ -0,0 +1,689 @@
+!
+! This file was generated by a compiler that is currently not part of the CBE
+! as the CBE compiler does not generate code for the T4 architecture. If
+! such a compiler (e.g. the Oracle Studio 12.3) becomes part of the CBE
+! the longest_match.o file can simply be comp[iled from longest_match.c
+! as for the 32-bit version, no modifications were made.
+!
+
+	.section	".text",#alloc,#execinstr,#progbits
+	.file	"longest_match.c"
+
+	.section	".bss",#alloc,#write,#nobits
+
+Bbss.bss:
+
+	.section	".data",#alloc,#write,#progbits
+
+Ddata.data:
+
+	.section	".rodata",#alloc,#progbits
+!
+! CONSTANT POOL
+!
+
+Drodata.rodata:
+	.global	deflate_lm_copyright
+!
+! CONSTANT POOL
+!
+	.global deflate_lm_copyright
+
+deflate_lm_copyright:
+	.byte	32
+	.byte	100
+	.byte	101
+	.byte	102
+	.byte	108
+	.byte	97
+	.byte	116
+	.byte	101
+	.byte	47
+	.byte	108
+	.byte	111
+	.byte	110
+	.byte	103
+	.byte	101
+	.byte	115
+	.byte	116
+	.byte	95
+	.byte	109
+	.byte	97
+	.byte	116
+	.byte	99
+	.byte	104
+	.byte	32
+	.byte	49
+	.byte	46
+	.byte	50
+	.byte	46
+	.byte	51
+	.byte	32
+	.byte	67
+	.byte	111
+	.byte	112
+	.byte	121
+	.byte	114
+	.byte	105
+	.byte	103
+	.byte	104
+	.byte	116
+	.byte	32
+	.byte	49
+	.byte	57
+	.byte	57
+	.byte	53
+	.byte	45
+	.byte	50
+	.byte	48
+	.byte	48
+	.byte	53
+	.byte	32
+	.byte	74
+	.byte	101
+	.byte	97
+	.byte	110
+	.byte	45
+	.byte	108
+	.byte	111
+	.byte	117
+	.byte	112
+	.byte	32
+	.byte	71
+	.byte	97
+	.byte	105
+	.byte	108
+	.byte	108
+	.byte	121
+	.byte	32
+	.skip	1
+	.type	deflate_lm_copyright,#object
+	.size	deflate_lm_copyright,67
+
+	.section	".tbss",#alloc,#write,#tls,#nobits
+
+Ttbss.bss:
+
+	.section	".tdata",#alloc,#write,#tls,#progbits
+
+Ttdata.data:
+
+	.section	".text",#alloc,#execinstr,#progbits
+/* 000000	   0 */		.align	4
+! FILE longest_match.c
+
+!    1		      !/* deflate.c -- compress data using the deflation algorithm
+!    2		      ! * Copyright (C) 1995-2005 Jean-loup Gailly.
+!    3		      ! * For conditions of distribution and use, see copyright notice in zlib.h
+!    4		      ! */
+!    6		      !/*
+!    7		      ! * This file contains the longest_match() function cut out from the original
+!    8		      ! * deflate.c file - this was necessary so that the compiler do not inline
+!    9		      ! * this function and so architecture-specific versions of it may be built
+!   10		      ! * that can use the linker's capabilities-based linking feature to produce
+!   11		      ! * the best executable for all platforms.
+!   12		      ! */
+!   14		      !/* @(#) $Id$ */
+!   16		      !#include "deflate.h"
+!   18		      !const char deflate_lm_copyright[] =
+!   19		      !   " deflate/longest_match 1.2.3 Copyright 1995-2005 Jean-loup Gailly ";
+!   20		      !/*
+!   21		      !  If you use the zlib library in a product, an acknowledgment is welcome
+!   22		      !  in the documentation of your product. If for some reason you cannot
+!   23		      !  include such an acknowledgment, I would appreciate that you keep this
+!   24		      !  copyright string in the executable of your product.
+!   25		      ! */
+!   27		      !#define NIL 0
+!   29		      !#ifndef FASTEST
+!   30		      !#ifdef ASMV
+!   31		      !      void match_init OF((void)); /* asm code initialization */
+!   32		      !      uInt longest_match  OF((deflate_state *s, IPos cur_match));
+!   33		      !#else
+!   34		      !uInt longest_match  OF((deflate_state *s, IPos cur_match));
+!   35		      !#endif
+!   37		      !/* ===========================================================================
+!   38		      ! * Set match_start to the longest match starting at the given string and
+!   39		      ! * return its length. Matches shorter or equal to prev_length are discarded,
+!   40		      ! * in which case the result is equal to prev_length and match_start is
+!   41		      ! * garbage.
+!   42		      ! * IN assertions: cur_match is the head of the hash chain for the current
+!   43		      ! *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
+!   44		      ! * OUT assertion: the match length is not greater than s->lookahead.
+!   45		      ! */
+!   46		      !#ifndef ASMV
+!   47		      !/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
+!   48		      ! * match.S. The code will be functionally equivalent.
+!   49		      ! */
+!   50		      !uInt longest_match(s, cur_match)
+!   51		      !    deflate_state *s;
+!   52		      !    IPos cur_match;                             /* current match */
+!   53		      !{
+
+!
+! SUBROUTINE longest_match
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global longest_match
+                       
+
+			longest_match:
+
+! Registers live out of longest_match: 
+! g2 o1 sp l1 l2 l4 l5 l6 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+
+			.L900000109:
+/* 000000	  53 */		save	%sp,-96,%sp
+
+!   54		      !    unsigned chain_length = s->max_chain_length;/* max hash chain length */
+!   55		      !    register Bytef *scan = s->window + s->strstart; /* current string */
+
+/* 0x0004	  55 */		ld	[%i0+108],%l4
+/* 0x0008	     */		ld	[%i0+56],%l5
+
+!   56		      !    register Bytef *match;                       /* matched string */
+!   57		      !    register int len;                           /* length of current match */
+!   58		      !    int best_len = s->prev_length;              /* best match length so far */
+!   59		      !    int nice_match = s->nice_match;             /* stop if match long enough */
+!   60		      !    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+!   61		      !        s->strstart - (IPos)MAX_DIST(s) : NIL;
+
+/* 0x000c	  61 */		mov	0,%l6
+/* 0x0010	     */		ld	[%i0+44],%i5
+/* 0x0014	  58 */		ld	[%i0+120],%i2
+/* 0x0018	  54 */		ld	[%i0+124],%o1
+/* 0x001c	  59 */		ld	[%i0+144],%g2
+/* 0x0020	  93 */		ld	[%i0+116],%i3
+/* 0x0024	  55 */		add	%l5,%l4,%i4
+/* 0x0028	  87 */		ld	[%i0+140],%l1
+/* 0x002c	  61 */		add	%i5,-262,%l3
+/* 0x0030	  77 */		add	%i4,%i2,%l2
+/* 0x0034	  61 */		cwbleu	%l4,%l3,.L77000163
+                       
+! predecessor blocks: .L900000109
+
+			.L77000159:
+/* 0x0038	  61 */		sub	%l4,%l3,%l6
+
+! Registers live out of .L77000159: 
+! g2 o1 sp l1 l2 l5 l6 i0 i1 i2 i3 i4 fp gsr 
+! 
+		
+!   62		      !    /* Stop when cur_match becomes <= limit. To simplify the code,
+!   63		      !     * we prevent matches with the string of window index 0.
+!   64		      !     */
+!   65		      !    Posf *prev = s->prev;
+!   66		      !    uInt wmask = s->w_mask;
+!   68		      !#ifdef UNALIGNED_OK
+!   69		      !    /* Compare two bytes at a time. Note: this is not always beneficial.
+!   70		      !     * Try with and without -DUNALIGNED_OK to check.
+!   71		      !     */
+!   72		      !    register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
+!   73		      !    register ush scan_start = *(ushf*)scan;
+!   74		      !    register ush scan_end   = *(ushf*)(scan+best_len-1);
+!   75		      !#else
+!   76		      !    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+!   77		      !    register Byte scan_end1  = scan[best_len-1];
+
+                       
+! predecessor blocks: .L77000159 .L900000109
+
+			.L77000163:
+/* 0x003c	  77 */		ldub	[%l2-1],%l4
+
+!   78		      !    register Byte scan_end   = scan[best_len];
+!   79		      !#endif
+!   81		      !    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+!   82		      !     * It is easy to get rid of this optimization if necessary.
+!   83		      !     */
+!   84		      !    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+!   86		      !    /* Do not waste too much time if we already have a good match: */
+!   87		      !    if (s->prev_length >= s->good_match) {
+
+/* 0x0040	  87 */		cmp	%i2,%l1
+/* 0x0044	  78 */		ldub	[%i4+%i2],%l3
+/* 0x0048	  76 */		add	%i4,258,%l7
+/* 0x004c	  66 */		ld	[%i0+52],%l1
+/* 0x0050	  65 */		ld	[%i0+64],%l2
+/* 0x0054	  87 */		bcs,pn	%icc,.L77000167
+/* 0x0058	  93 */		cmp	%g2,%i3
+
+! Registers live out of .L77000163: 
+! g2 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		
+!   88		      !        chain_length >>= 2;
+
+                       
+! predecessor blocks: .L77000163
+
+			.L77000165:
+/* 0x005c	  88 */		srl	%o1,2,%o1
+
+! Registers live out of .L77000165: 
+! g2 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		
+!   89		      !    }
+!   90		      !    /* Do not look for matches beyond the end of the input. This is necessary
+!   91		      !     * to make deflate deterministic.
+!   92		      !     */
+!   93		      !    if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+
+                       
+! predecessor blocks: .L77000163 .L77000165
+
+			.L77000167:
+/* 0x0060	  93 */		movgu	%icc,%i3,%g2
+
+!   95		      !    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+!   97		      !    do {
+!   98		      !        Assert(cur_match < s->strstart, "no future");
+!   99		      !        match = s->window + cur_match;
+
+/* 0x0064	  99 */		add	%i1,%l5,%g3
+
+! Registers live out of .L77000167: 
+! g2 g3 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		
+!  101		      !        /* Skip to next match if the match length cannot increase
+!  102		      !         * or if the match length is less than 2.  Note that the checks below
+!  103		      !         * for insufficient lookahead only occur occasionally for performance
+!  104		      !         * reasons.  Therefore uninitialized memory will be accessed, and
+!  105		      !         * conditional jumps will be made that depend on those values.
+!  106		      !         * However the length of the match is limited to the lookahead, so
+!  107		      !         * the output of deflate is not affected by the uninitialized values.
+!  108		      !         */
+!  109		      !#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
+!  110		      !        /* This code assumes sizeof(unsigned short) == 2. Do not use
+!  111		      !         * UNALIGNED_OK if your compiler uses a different size.
+!  112		      !         */
+!  113		      !        if (*(ushf*)(match+best_len-1) != scan_end ||
+!  114		      !            *(ushf*)match != scan_start) continue;
+!  116		      !        /* It is not necessary to compare scan[2] and match[2] since they are
+!  117		      !         * always equal when the other bytes match, given that the hash keys
+!  118		      !         * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
+!  119		      !         * strstart+3, +5, ... up to strstart+257. We check for insufficient
+!  120		      !         * lookahead only every 4th comparison; the 128th check will be made
+!  121		      !         * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
+!  122		      !         * necessary to put more guard bytes at the end of the window, or
+!  123		      !         * to check more often for insufficient lookahead.
+!  124		      !         */
+!  125		      !        Assert(scan[2] == match[2], "scan[2]?");
+!  126		      !        scan++, match++;
+!  127		      !        do {
+!  128		      !        } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+!  129		      !                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+!  130		      !                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+!  131		      !                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+!  132		      !                 scan < strend);
+!  133		      !        /* The funny "do {}" generates better code on most compilers */
+!  135		      !        /* Here, scan <= window+strstart+257 */
+!  136		      !        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+!  137		      !        if (*scan == *match) scan++;
+!  139		      !        len = (MAX_MATCH - 1) - (int)(strend-scan);
+!  140		      !        scan = strend - (MAX_MATCH-1);
+!  142		      !#else /* UNALIGNED_OK */
+!  144		      !        if (match[best_len]   != scan_end  ||
+!  145		      !            match[best_len-1] != scan_end1 ||
+!  146		      !            *match            != *scan     ||
+!  147		      !            *++match          != scan[1])      continue;
+
+                       
+! predecessor blocks: .L77000167 .L77000219
+
+			.L900000107:
+/* 0x0068	 147 */		ldub	[%g3+%i2],%o3
+/* 0x006c	     */		cwbne	%o3,%l3,.L77000217
+
+! Registers live out of .L900000107: 
+! g2 g3 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+! predecessor blocks: .L900000107
+
+			.L77000175:
+/* 0x0070	 147 */		add	%g3,%i2,%g4
+/* 0x0074	     */		ldub	[%g4-1],%o4
+/* 0x0078	     */		cwbne	%o4,%l4,.L77000217
+
+! Registers live out of .L77000175: 
+! g2 g3 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+! predecessor blocks: .L77000175
+
+			.L77000177:
+/* 0x007c	 147 */		ldub	[%l5+%i1],%o5
+/* 0x0080	     */		ldub	[%i4],%o7
+/* 0x0084	     */		cwbne	%o5,%o7,.L77000217
+
+! Registers live out of .L77000177: 
+! g2 g3 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+! predecessor blocks: .L77000177
+
+			.L77000179:
+/* 0x0088	 147 */		ldub	[%i4+1],%i5
+/* 0x008c	     */		ldub	[%g3+1],%l0
+/* 0x0090	     */		cwbne	%l0,%i5,.L77000217
+
+! Registers live out of .L77000179: 
+! g2 g3 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		
+!  149		      !        /* The check at best_len-1 can be removed because it will be made
+!  150		      !         * again later. (This heuristic is not always a win.)
+!  151		      !         * It is not necessary to compare scan[2] and match[2] since they
+!  152		      !         * are always equal when the other bytes match, given that
+!  153		      !         * the hash keys are equal and that HASH_BITS >= 8.
+!  154		      !         */
+!  155		      !        scan += 2, match++;
+
+                       
+! predecessor blocks: .L77000179
+
+			.L77000185:
+/* 0x0094	 155 */		add	%i4,2,%g4
+/* 0x0098	     */		add	%g3,2,%g3
+
+! Registers live out of .L77000185: 
+! g2 g3 g4 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		
+!  156		      !        Assert(*scan == *match, "match[2]?");
+!  158		      !        /* We check for insufficient lookahead only every 8th comparison;
+!  159		      !         * the 256th check will be made at strstart+258.
+!  160		      !         */
+!  161		      !        do {
+!  162		      !        } while (*++scan == *++match && *++scan == *++match &&
+!  163		      !                 *++scan == *++match && *++scan == *++match &&
+!  164		      !                 *++scan == *++match && *++scan == *++match &&
+!  165		      !                 *++scan == *++match && *++scan == *++match &&
+!  166		      !                 scan < strend);
+
+                       
+! predecessor blocks: .L77000185 .L77000203
+
+			.L77000187:
+/* 0x009c	 166 */		ldub	[%g4+1],%g1
+/* 0x00a0	     */		add	%g4,1,%g4
+/* 0x00a4	     */		ldub	[%g3+1],%g5
+/* 0x00a8	     */		cwbne	%g1,%g5,.L77000207
+
+! Registers live out of .L77000187: 
+! g2 g3 g4 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+! predecessor blocks: .L77000187
+
+			.L77000189:
+/* 0x00ac	 166 */		ldub	[%g4+1],%o0
+/* 0x00b0	     */		add	%g4,1,%g4
+/* 0x00b4	     */		ldub	[%g3+2],%o2
+/* 0x00b8	     */		cwbne	%o0,%o2,.L77000207
+
+! Registers live out of .L77000189: 
+! g2 g3 g4 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+! predecessor blocks: .L77000189
+
+			.L77000191:
+/* 0x00bc	 166 */		ldub	[%g4+1],%o3
+/* 0x00c0	     */		add	%g4,1,%g4
+/* 0x00c4	     */		ldub	[%g3+3],%o4
+/* 0x00c8	     */		cwbne	%o3,%o4,.L77000207
+
+! Registers live out of .L77000191: 
+! g2 g3 g4 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+! predecessor blocks: .L77000191
+
+			.L77000193:
+/* 0x00cc	 166 */		ldub	[%g4+1],%o5
+/* 0x00d0	     */		add	%g4,1,%g4
+/* 0x00d4	     */		ldub	[%g3+4],%o7
+/* 0x00d8	     */		cwbne	%o5,%o7,.L77000207
+
+! Registers live out of .L77000193: 
+! g2 g3 g4 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+! predecessor blocks: .L77000193
+
+			.L77000195:
+/* 0x00dc	 166 */		ldub	[%g4+1],%l0
+/* 0x00e0	     */		add	%g4,1,%g4
+/* 0x00e4	     */		ldub	[%g3+5],%i5
+/* 0x00e8	     */		cwbne	%l0,%i5,.L77000207
+
+! Registers live out of .L77000195: 
+! g2 g3 g4 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+! predecessor blocks: .L77000195
+
+			.L77000197:
+/* 0x00ec	 166 */		ldub	[%g4+1],%g1
+/* 0x00f0	     */		add	%g4,1,%g4
+/* 0x00f4	     */		ldub	[%g3+6],%g5
+/* 0x00f8	     */		cwbne	%g1,%g5,.L77000207
+
+! Registers live out of .L77000197: 
+! g2 g3 g4 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+! predecessor blocks: .L77000197
+
+			.L77000199:
+/* 0x00fc	 166 */		ldub	[%g4+1],%o0
+/* 0x0100	     */		add	%g4,1,%g4
+/* 0x0104	     */		ldub	[%g3+7],%o2
+/* 0x0108	     */		cwbne	%o0,%o2,.L77000207
+
+! Registers live out of .L77000199: 
+! g2 g3 g4 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+! predecessor blocks: .L77000199
+
+			.L77000201:
+/* 0x010c	 166 */		ldub	[%g4+1],%o3
+/* 0x0110	     */		add	%g4,1,%g4
+/* 0x0114	     */		ldub	[%g3+8],%o4
+/* 0x0118	     */		add	%g3,8,%g3
+/* 0x011c	     */		cwbne	%o3,%o4,.L77000207
+/* 0x0120	     */		nop
+
+! Registers live out of .L77000201: 
+! g2 g3 g4 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+! predecessor blocks: .L77000201
+
+			.L77000203:
+/* 0x0124	 166 */		cwbcs	%g4,%l7,.L77000187
+
+! Registers live out of .L77000203: 
+! g2 g3 g4 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		
+!  168		      !        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+!  170		      !        len = MAX_MATCH - (int)(strend - scan);
+
+                       
+! predecessor blocks: .L77000187 .L77000189 .L77000191 .L77000193 .L77000195 .L77000197 .L77000199 .L77000201 .L77000203
+
+			.L77000207:
+/* 0x0128	 170 */		sub	%g4,%l7,%o7
+/* 0x012c	     */		add	%o7,258,%o5
+
+!  171		      !        scan = strend - MAX_MATCH;
+!  173		      !#endif /* UNALIGNED_OK */
+!  175		      !        if (len > best_len) {
+
+/* 0x0130	 175 */		cwble	%o5,%i2,.L77000217
+
+! Registers live out of .L77000207: 
+! g2 o1 o5 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		
+!  176		      !            s->match_start = cur_match;
+
+                       
+! predecessor blocks: .L77000207
+
+			.L77000209:
+/* 0x0134	 176 */		st	%i1,[%i0+112]
+
+!  177		      !            best_len = len;
+
+/* 0x0138	 177 */		mov	%o5,%i2
+
+!  178		      !            if (len >= nice_match) break;
+
+/* 0x013c	 178 */		cwbge	%o5,%g2,.L77000225
+
+! Registers live out of .L77000209: 
+! g2 o1 o5 sp l1 l2 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		
+!  179		      !#ifdef UNALIGNED_OK
+!  180		      !            scan_end = *(ushf*)(scan+best_len-1);
+!  181		      !#else
+!  182		      !            scan_end1  = scan[best_len-1];
+
+                       
+! predecessor blocks: .L77000209
+
+			.L77000213:
+/* 0x0140	 182 */		add	%i4,%o5,%l4
+
+!  183		      !            scan_end   = scan[best_len];
+
+/* 0x0144	 183 */		ldub	[%i4+%o5],%l3
+/* 0x0148	 182 */		ldub	[%l4-1],%l4
+
+! Registers live out of .L77000213: 
+! g2 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		
+!  184		      !#endif
+!  185		      !        }
+!  186		      !    } while ((cur_match = prev[cur_match & wmask]) > limit
+!  187		      !             && --chain_length != 0);
+
+                       
+! predecessor blocks: .L77000175 .L77000177 .L77000179 .L77000207 .L77000213 .L900000107
+
+			.L77000217:
+/* 0x014c	 187 */		and	%i1,%l1,%i1
+/* 0x0150	     */		sll	%i1,1,%l0
+/* 0x0154	     */		lduh	[%l0+%l2],%i1
+/* 0x0158	     */		cwbleu	%i1,%l6,.L77000225
+
+! Registers live out of .L77000217: 
+! g2 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		                       
+! predecessor blocks: .L77000217
+
+			.L77000219:
+/* 0x015c	 187 */		addcc	%o1,-1,%o1
+/* 0x0160	     */		bne,pt	%icc,.L900000107
+/* 0x0164	  99 */		add	%i1,%l5,%g3
+
+! Registers live out of .L77000219: 
+! g2 g3 o1 sp l1 l2 l4 l5 l6 l7 i0 i1 i2 i3 i4 fp gsr 
+! 
+		
+!  189		      !    if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
+
+                       
+! predecessor blocks: .L77000209 .L77000217 .L77000219
+
+			.L77000225:
+/* 0x0168	 189 */		mov	%i3,%i0
+/* 0x016c	     */		cmp	%i2,%i3
+/* 0x0170	     */		movleu	%icc,%i2,%i0
+
+!  190		      !    return s->lookahead;
+
+/* 0x0174	 190 */		return	%i7+8	! Result =  %i0
+/* 0x0178	     */		nop
+/* 0x017c	   0 */		.type	longest_match,#function
+/* 0x017c	   0 */		.size	longest_match,(.-longest_match)
+                       
+
+			.L900000110:
+
+	.section	".text",#alloc,#execinstr,#progbits
+                       
+
+			.L900000112:
+
+	.section	".annotate",#progbits
+/* 000000	   0 */		.asciz	"anotate"
+/* 0x0008	   0 */		.half	6,0
+/* 0x000c	   0 */		.word	28
+/* 0x0010	   0 */		.half	0,8
+/* 0x0014	   0 */		.word	(.L900000112-0x17c)
+/* 0x0018	   0 */		.word	380
+/* 0x001c	   0 */		.half	1,12
+/* 0x0020	   0 */		.word	.L900000109
+/* 0x0024	   0 */		.word	(.L900000110-.L900000109)
+/* 0x0028	   0 */		.word	1577088
+
+! Begin Disassembling Debug Info
+!  Begin sdCreateSection : .debug_info
+!  Section Info: link_name/strtab=, entsize=0x1, adralign=0x1, flags=0x0
+!  Section Data Blocks:
+!   reloc[0]: knd=2, off=6, siz=4, lab1=.debug_abbrev, lab2=, loff=0
+!   reloc[1]: knd=2, off=286, siz=4, lab1=.debug_line, lab2=, loff=0
+	.section ".debug_info"
+	.byte 0x00,0x00,0x01,0x1f,0x00,0x02
+	.uaword %section_symbol(".debug_abbrev")
+	.byte 0x04,0x01
+	.ascii "longest_match.c\0"
+	.byte 0x0c
+	.ascii "/builds/frakoczi/userland_s11u1/zlib-t4/components/zlib/build/sparcv7\0"
+	.ascii " /ws/onnv-tools/SUNWspro/solarisstudio12.3/prod/bin/cc -xtarget=T4 -xarch=sparc4 -xchip=T4 -xO5 -S  longest_match.c\0"
+	.ascii "Xa;O;R=Sun C 5.12 SunOS_sparc 2011/11/16;backend;raw;cd;\0"
+	.ascii "DBG_GEN 5.3.3\0"
+	.uaword %section_symbol(".debug_line")
+	.byte 0x00
+!  End sdCreateSection
+!  Begin sdCreateSection : .debug_line
+!  Section Info: link_name/strtab=, entsize=0x1, adralign=0x1, flags=0x0
+!  Section Data Blocks:
+	.section ".debug_line"
+	.byte 0x00,0x00,0x00,0x29,0x00,0x02,0x00,0x00
+	.byte 0x00,0x23,0x04,0x00,0xff,0x04,0x0a,0x00
+	.byte 0x01,0x01,0x01,0x01,0x00,0x00,0x00,0x01
+	.byte 0x00,0x6c,0x6f,0x6e,0x67,0x65,0x73,0x74
+	.byte 0x5f,0x6d,0x61,0x74,0x63,0x68,0x2e,0x63
+	.byte 0x00,0x00,0x00,0x00,0x00
+!  End sdCreateSection
+!  Begin sdCreateSection : .debug_abbrev
+!  Section Info: link_name/strtab=, entsize=0x1, adralign=0x1, flags=0x0
+!  Section Data Blocks:
+	.section ".debug_abbrev"
+	.byte 0x01,0x11,0x00,0x03,0x08,0x13,0x0b,0x1b
+	.byte 0x08,0x85,0x44,0x08,0x87,0x44,0x08,0x25
+	.byte 0x08,0x10,0x06,0x00,0x00,0x00
+!  End sdCreateSection
+
+! End Disassembling Debug Info
+
+! Begin Disassembling Ident
+	.ident	"cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16"	! (NO SOURCE LINE)
+	.ident	"acomp: Sun C 5.12 SunOS_sparc 2011/11/16"	! (/tmp/acomp.1337176051.3981.02.sd:84)
+	.ident	"iropt: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16"	! (/tmp/acomp.1337176051.3981.02.sd:85)
+	.ident	"cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16"	! (NO SOURCE LINE)
+! End Disassembling Ident
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/zlib/capabilities/sun4v/sparcv9/Makefile	Wed May 23 01:05:52 2012 -0700
@@ -0,0 +1,39 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+#
+
+include		../Makefile.com
+include		../../Makefile.com
+
+CFLAGS +=  -m64 -xarch=sparc
+CPPFLAGS += -D__sparc
+ASFLAGS = -m64 -K PIC -xarch=sparc4
+
+include		../Makefile.targ
+include		../../Makefile.targ
+
+all build:	$(SYMCAP)
+
+clean:
+	$(RM) *.o
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/zlib/capabilities/sun4v/sparcv9/longest_match_t4.s	Wed May 23 01:05:52 2012 -0700
@@ -0,0 +1,279 @@
+!
+! This file was generated by a compiler that is currently not part of the CBE
+! (as the CBE compiler does not generate code for the T4 architecture), and
+! then it was modified by hand to remove some unnecessary instructions that
+! the compiler generated and the main loop's branches was rearranged for
+! fewer taken branches on the most frequent code path. These modifications 
+! were made in 7 steps. In each step, a few lines were removed from and added
+! to the compiler generated code to produce an equivalent binary. The lines
+! that were removed in step <i> are marked by "!<i>" at the beginning of the
+! line, the lines added in this step are marked by the same added at the end of
+! the line. In other words, let C_i mean the code, after step <i> (C_0 is
+! the original, compiler generated code, C_7 is the code in this file)
+! To reproduce C_i (0 <= i < 7) first take C_<i+1>, remove the lines that
+! end in !<i+1>, and then remove the !<i+1> string from the beginning of those
+! lines that start with it. Comparing C_i and C_<i+1> is a simple task, as
+! only a few lines have changed.
+!  If a compiler (e.g. the Oracle Studio 12.3) becomes part of the CBE and
+! it will be able to generate as efficient code as in this file the 
+! longest_match.o file can simply be comp[iled from longest_match.c .
+!
+
+	.section	".text",#alloc,#execinstr,#progbits
+	.file	"deflate-t4.c"
+
+	.section	".bss",#alloc,#write,#nobits
+
+Bbss.bss:
+
+	.section	".data",#alloc,#write,#progbits
+
+Ddata.data:
+
+	.section	".rodata",#alloc,#progbits
+!
+! CONSTANT POOL
+!
+
+Drodata.rodata:
+
+	.section	".picdata",#alloc,#write
+
+Dpicdata.picdata:
+
+	.section	".tbss",#alloc,#write,#tls,#nobits
+
+Ttbss.bss:
+
+	.section	".tdata",#alloc,#write,#tls,#progbits
+
+Ttdata.data:
+
+	.section	".rodata1",#alloc,#progbits
+	.align	8
+!
+! CONSTANT POOL
+!
+
+.L95:
+	.ascii	"invalid distance too far back\000"
+	.align	8
+!
+! CONSTANT POOL
+!
+
+.L147:
+	.ascii	"invalid distance code\000"
+	.align	8
+!
+! CONSTANT POOL
+!
+
+.L153:
+	.ascii	"invalid literal/length code\000"
+
+	.section	".text",#alloc,#execinstr,#progbits
+/* 000000	   0 */		.align	4
+! FILE deflate-t4.c
+
+!    1		      !#include <sun_prefetch.h>
+!    2		      !#include "deflate.h"
+!    3		      !#define NIL 0
+!    5		      !uInt longest_match(s, cur_match)
+!    6		      !    deflate_state *s;
+!    7		      !    IPos cur_match;                             /* current match */
+!    8		      !{
+
+!
+! SUBROUTINE longest_match
+!
+! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
+
+                       	.global longest_match
+                       
+
+			longest_match:
+
+			.L900000112:
+	save      %sp, -0xb0, %sp
+	 ld        [%i0 + 0x4c], %l4	!7
+	 ldn       [%i0 + 0x60], %l5	!7
+	 and       %i1, %l4, %l2	!7
+	 prefetch [%i5 + %l2], #n_reads	!7
+!7	ld        [%i0 + 0x9c], %l4
+	 ld        [%i0 + 0x9c], %l1	!7
+	ld        [%i0 + 0x44], %l6
+	clr       %g4
+	ldn       [%i0 + 0x50], %g1
+	ld        [%i0 + 0xa8], %i2
+	ld        [%i0 + 0xac], %g5
+	ld        [%i0 + 0xc0], %o0
+!5	srl       %l4, 0x0, %l5
+	ld        [%i0 + 0xbc], %l7
+	add       %l6, -0x106, %i3
+!5	add       %g1, %l5, %i4
+!7	 add       %g1, %l4, %i4	!5
+	 add       %g1, %l1, %i4	!7
+!7	cwbleu    %l4, %i3, lm_0x38
+	 cwbleu    %l1, %i3, lm_0x38	!7
+!7	sub       %l4, %i3, %g4
+	 sub       %l1, %i3, %g4	!7
+		
+lm_0x38:
+!7	ld        [%i0 + 0x4c], %l4
+!4	add       %i2, -0x1, %l3
+!7	ldn       [%i0 + 0x60], %l5
+!4	sra       %l3, 0x0, %o2
+	 add       %i2, -0x1, %o2	!4
+	ldub      [%i4 + %o2], %o2
+!3	sra       %i2, 0x0, %l6
+!3	ldub      [%i4 + %l6], %o1
+	 ldub      [%i4 + %i2], %o1	!3
+	cmp       %i2, %l7
+	add       %i4, 0x102, %l7
+	ld        [%i0 + 0xa4], %i3
+	bcs,pn    %icc, lm_0x6c
+	mov       0x102, %l3
+
+	srl       %g5, 0x2, %g5
+		
+lm_0x6c:
+	cmp       %o0, %i3
+!6	srl       %i1, 0x0, %l0
+!7	 and       %i1, %l4, %l2	!6
+	movgu     %icc, %i3, %o0
+		
+lm_0x78:
+!6	and       %i1, %l4, %l2
+!6	add       %l0, %g1, %o3
+	 add       %i1, %g1, %o3	!6
+!3	ldub      [%o3 + %l6], %o5
+	 ldub      [%o3 + %i2], %o5		!3
+!1	srl       %l2, 0x0, %o4
+!1	sllx      %o4, 0x1, %l2
+	 sllx	  %l2, 0x1, %l2			!1
+	 add       %l2, %l5, %l1		!1
+	 prefetch  [%l1 - 0x40], #n_reads	!1
+	cwbe     %o5, %o1, lm_0x17c_neg
+
+lm_0x17c:
+	lduh      [%l5 + %l2], %i1
+	cwbleu    %i1, %g4, lm_0x190
+
+	addcc     %g5, -0x1, %g5
+	bne,pt    %icc, lm_0x78
+!6	srl       %i1, 0x0, %l0
+	 and       %i1, %l4, %l2	!6
+		
+lm_0x190:
+	cmp       %i2, %i3
+	movgu     %icc, %i3, %i2
+	return    %i7 + 0x8
+	srl       %o2, 0x0, %o0
+
+lm_0x17c_neg:
+!3	add       %o3, %l6, %o7
+	 add       %o3, %i2, %o7		!3
+	ldub      [%o7 - 0x1], %l1
+	cwbne     %l1, %o2, lm_0x17c
+
+!6	ldub      [%g1 + %l0], %i5
+	 ldub      [%g1 + %i1], %i5	!6
+	ldub      [%i4], %o5
+	cwbne     %i5, %o5, lm_0x17c
+
+	ldub      [%i4 + 0x1], %l1
+	ldub      [%o3 + 0x1], %o4
+	cwbne     %o4, %l1, lm_0x17c
+
+	add       %o3, 0x2, %o3
+!1	add       %l2, %l5, %l1
+	add       %i4, 0x2, %o4
+
+lm_0xc0:
+	ldub      [%o4 + 0x1], %l0
+	add       %o4, 0x1, %o4
+	ldub      [%o3 + 0x1], %o7
+	cwbne     %l0, %o7, lm_0x14c
+
+	ldub      [%o4 + 0x1], %i5
+	add       %o4, 0x1, %o4
+	ldub      [%o3 + 0x2], %o5
+	cwbne     %i5, %o5, lm_0x14c
+
+	ldub      [%o4 + 0x1], %l0
+	add       %o4, 0x1, %o4
+	ldub      [%o3 + 0x3], %o7
+	cwbne     %l0, %o7, lm_0x14c
+
+	ldub      [%o4 + 0x1], %i5
+	add       %o4, 0x1, %o4
+	ldub      [%o3 + 0x4], %o5
+	cwbne     %i5, %o5, lm_0x14c
+
+	ldub      [%o4 + 0x1], %l0
+	add       %o4, 0x1, %o4
+	ldub      [%o3 + 0x5], %o7
+	cwbne     %l0, %o7, lm_0x14c
+
+	ldub      [%o4 + 0x1], %i5
+	add       %o4, 0x1, %o4
+	ldub      [%o3 + 0x6], %o5
+	cwbne     %i5, %o5, lm_0x14c
+
+	ldub      [%o4 + 0x1], %l0
+	add       %o4, 0x1, %o4
+	ldub      [%o3 + 0x7], %o7
+	cwbne     %l0, %o7, lm_0x14c
+
+	ldub      [%o4 + 0x1], %i5
+	add       %o4, 0x1, %o4
+	ldub      [%o3 + 0x8], %o5
+	add       %o3, 0x8, %o3
+	cwbne     %i5, %o5, lm_0x14c
+
+	nop
+	cxbcs     %o4, %l7, lm_0xc0
+		
+lm_0x14c:
+!1	prefetch  [%l1 - 0x40], #n_reads
+	sub       %l7, %o4, %l0
+	sub       %l3, %l0, %o7
+	cwble     %o7, %i2, lm_0x17c
+
+	st        %i1, [%i0 + 0xa0]
+	mov       %o7, %i2
+	cwbge     %o7, %o0, lm_0x190
+
+!2	sra       %o7, 0x0, %i1
+!3	sra       %o7, 0x0, %l6
+!2	add       %i4, %i1, %l1
+	 add       %i4, %o7, %l1	!2
+!2	ldub      [%i4 + %i1], %o1
+	 ldub      [%i4 + %o7], %o1	!2
+	 ba	   lm_0x17c
+	ldub      [%l1 - 0x1], %o2
+	
+
+/* 0x0220	   0 */		.type	longest_match,#function
+/* 0x0220	   0 */		.size	longest_match,(.-longest_match)
+                       
+
+			.L900000113:
+
+	.section	".text",#alloc,#execinstr,#progbits
+/* 000000	   0 */		.align	8
+/* 000000	     */		.skip	24
+/* 0x0018	     */		.align	4
+
+
+			.L900000286:
+
+	.section	".text",#alloc,#execinstr,#progbits
+
+! Begin Disassembling Ident
+	.ident	"cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16"	! (NO SOURCE LINE)
+	.ident	"acomp: Sun C 5.12 SunOS_sparc 2011/11/16"	! (/tmp/acomp.1329237379.172468.02.sd:24)
+	.ident	"iropt: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16"	! (/tmp/acomp.1329237379.172468.02.sd:25)
+	.ident	"cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16"	! (NO SOURCE LINE)
+! End Disassembling Ident
--- a/components/zlib/mapfile	Mon May 21 01:44:27 2012 -0700
+++ b/components/zlib/mapfile	Wed May 23 01:05:52 2012 -0700
@@ -32,7 +32,7 @@
 #
 # Note that the source above actually lives in the ON tree.
 #
-# Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
 #
 # public interfaces in libz
 #
@@ -106,6 +106,7 @@
 SYMBOL_VERSION SUNWprivate {
     global:
 	inflateBackInit_ ;
+	longest_match ;
     local: *;
 };
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/zlib/patches/perf.patch	Wed May 23 01:05:52 2012 -0700
@@ -0,0 +1,149 @@
+--- zlib-1.2.3/zlib.h.orig   Sun Jul 17 19:26:49 2005
++++ zlib-1.2.3/zlib.h        Tue May 22 10:17:54 2012
+@@ -37,8 +37,8 @@
+ extern "C" {
+ #endif
+ 
+-#define ZLIB_VERSION "1.2.3"
+-#define ZLIB_VERNUM 0x1230
++#define ZLIB_VERSION "1.2.3-T4mods"
++#define ZLIB_VERNUM 0x123f
+ 
+ /*
+      The 'zlib' compression library provides in-memory compression and
+--- zlib-1.2.3/Makefile.in.orig	Mon May 14 14:29:35 2012
++++ zlib-1.2.3/Makefile.in	Mon May 14 14:30:29 2012
+@@ -77,8 +77,8 @@
+ 	mv _match.o match.o
+ 	rm -f _match.s
+ 
+-$(SHAREDLIBV): $(OBJS)
+-	$(LDSHARED) -o $@ $(OBJS)
++$(SHAREDLIBV): $(OBJS) $(PIC_OBJS)
++	$(LDSHARED) -o $@ $(OBJS) $(PIC_OBJS)
+ 	rm -f $(SHAREDLIB) $(SHAREDLIBM)
+ 	ln -s $@ $(SHAREDLIB)
+ 	ln -s $@ $(SHAREDLIBM)
+
+--- zlib-1.2.3/inffast.c.orig	Fri Nov 12 22:05:29 2004
++++ zlib-1.2.3/inffast.c	Tue Mar 27 08:05:36 2012
+@@ -87,7 +87,7 @@
+     code const FAR *dcode;      /* local strm->distcode */
+     unsigned lmask;             /* mask for first level of length codes */
+     unsigned dmask;             /* mask for first level of distance codes */
+-    code this;                  /* retrieved table entry */
++    code *this;                 /* retrieved table entry */
+     unsigned op;                /* code bits, operation, extra bits, or */
+                                 /*  window position, window bytes to copy */
+     unsigned len;               /* match length, unused bytes */
+@@ -124,20 +124,20 @@
+             hold += (unsigned long)(PUP(in)) << bits;
+             bits += 8;
+         }
+-        this = lcode[hold & lmask];
++        this = (code *)(&(lcode[hold & lmask]));
+       dolen:
+-        op = (unsigned)(this.bits);
++        op = (unsigned)(this->bits);
+         hold >>= op;
+         bits -= op;
+-        op = (unsigned)(this.op);
++        op = (unsigned)(this->op);
+         if (op == 0) {                          /* literal */
+-            Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
++            Tracevv((stderr, this->val >= 0x20 && this->val < 0x7f ?
+                     "inflate:         literal '%c'\n" :
+-                    "inflate:         literal 0x%02x\n", this.val));
+-            PUP(out) = (unsigned char)(this.val);
++                    "inflate:         literal 0x%02x\n", this->val));
++            PUP(out) = (unsigned char)(this->val);
+         }
+         else if (op & 16) {                     /* length base */
+-            len = (unsigned)(this.val);
++            len = (unsigned)(this->val);
+             op &= 15;                           /* number of extra bits */
+             if (op) {
+                 if (bits < op) {
+@@ -155,14 +155,14 @@
+                 hold += (unsigned long)(PUP(in)) << bits;
+                 bits += 8;
+             }
+-            this = dcode[hold & dmask];
++            this = (code *)(&(dcode[hold & dmask]));
+           dodist:
+-            op = (unsigned)(this.bits);
++            op = (unsigned)(this->bits);
+             hold >>= op;
+             bits -= op;
+-            op = (unsigned)(this.op);
++            op = (unsigned)(this->op);
+             if (op & 16) {                      /* distance base */
+-                dist = (unsigned)(this.val);
++                dist = (unsigned)(this->val);
+                 op &= 15;                       /* number of extra bits */
+                 if (bits < op) {
+                     hold += (unsigned long)(PUP(in)) << bits;
+@@ -259,7 +259,8 @@
+                 }
+             }
+             else if ((op & 64) == 0) {          /* 2nd level distance code */
+-                this = dcode[this.val + (hold & ((1U << op) - 1))];
++                this = (code *)
++		    (&(dcode[this->val + (hold & ((1U << op) - 1))]));
+                 goto dodist;
+             }
+             else {
+@@ -269,7 +270,7 @@
+             }
+         }
+         else if ((op & 64) == 0) {              /* 2nd level length code */
+-            this = lcode[this.val + (hold & ((1U << op) - 1))];
++            this = (code *)(&(lcode[this->val + (hold & ((1U << op) - 1))]));
+             goto dolen;
+         }
+         else if (op & 32) {                     /* end-of-block */
+
+--- zlib-1.2.3/deflate.c.orig   Tue Mar 27 10:02:52 2012
++++ zlib-1.2.3/deflate.c        Sun Jul 17 19:27:31 2005
+@@ -88,9 +88,13 @@
+       void match_init OF((void)); /* asm code initialization */
+       uInt longest_match  OF((deflate_state *s, IPos cur_match));
+ #else
++#ifdef ORIG_LONGEST_MATCH
+ local uInt longest_match  OF((deflate_state *s, IPos cur_match));
++#else
++uInt longest_match  OF((deflate_state *s, IPos cur_match));
+ #endif
+ #endif
++#endif
+ local uInt longest_match_fast OF((deflate_state *s, IPos cur_match));
+ 
+ #ifdef DEBUG
+@@ -1010,6 +1014,7 @@
+ #endif
+ }
+ 
++#if defined(ORIG_LONGEST_MATCH) || defined(ORIG_LONGEST_MATCH_GLOBAL)
+ #ifndef FASTEST
+ /* ===========================================================================
+  * Set match_start to the longest match starting at the given string and
+@@ -1024,7 +1029,11 @@
+ /* For 80x86 and 680x0, an optimized version will be provided in match.asm or
+  * match.S. The code will be functionally equivalent.
+  */
++#ifdef ORIG_LONGEST_MATCH_GLOBAL
++uInt longest_match(s, cur_match)
++#else
+ local uInt longest_match(s, cur_match)
++#endif
+     deflate_state *s;
+     IPos cur_match;                             /* current match */
+ {
+@@ -1168,6 +1177,7 @@
+ }
+ #endif /* ASMV */
+ #endif /* FASTEST */
++#endif /* ORIG_LONGEST_MATCHT */
+ 
+ /* ---------------------------------------------------------------------------
+  * Optimized version for level == 1 or strategy == Z_RLE only