# HG changeset patch # User Misaki Miyashita # Date 1373331018 25200 # Node ID 3515c1afdfc8f59e94aa9898d8c046bb1b4afa8d # Parent 15aec33b84fa0211d939de5b91da8f7ddef7ce45 PSARC 2013/034 OpenSSL 1.0.1 15824600 SUNBT7206152 T4 montmul should be embedded in the OpenSSL upstream src diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/README --- a/components/openssl/README Mon Jul 08 16:18:46 2013 -0700 +++ b/components/openssl/README Mon Jul 08 17:50:18 2013 -0700 @@ -43,8 +43,14 @@ we have decided to patch the code. The following files/code are copied in from 1.0.2. added: + components/openssl/openssl-1.0.1/inline-t4/aest4-sparcv9.pl + components/openssl/openssl-1.0.1/inline-t4/dest4-sparcv9.pl components/openssl/openssl-1.0.1/inline-t4/md5-sparcv9.pl components/openssl/openssl-1.0.1/inline-t4/sparc_arch.h + components/openssl/openssl-1.0.1/inline-t4/sparct4-mont.pl + components/openssl/openssl-1.0.1/inline-t4/sparcv9_modes.pl + components/openssl/openssl-1.0.1/inline-t4/sparcv9-gf2m.pl + components/openssl/openssl-1.0.1/inline-t4/vis3-mont.pl components/openssl/openssl-1.0.1/patches/openssl-t4-inline.sparc-patch TPNO for OpenSSL 1.0.1e is 13003. @@ -120,11 +126,6 @@ 31_dtls_version.patch Fix DTLS_BAD_VER bug reported after OpenSSL 1.0.1e is released. -openssl-1.0.0d-t4-engine.sparc-patch -SPARC-only patch. -Add a built-in engine, t4, to support SPARC T4 crypto instructions. -along with files in directory engines/t4. - openssl-t4-inline.sparc-patch SPARC-only patch. Add patch to support inline T4 instruction in OpenSSL upstream code until diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/Makefile --- a/components/openssl/openssl-1.0.1/Makefile Mon Jul 08 16:18:46 2013 -0700 +++ b/components/openssl/openssl-1.0.1/Makefile Mon Jul 08 17:50:18 2013 -0700 @@ -42,7 +42,6 @@ # Architecture-specific patches EXTRA_PATCHES.sparc = $(PATCH_DIR)/openssl-t4-inline.sparc-patch -EXTRA_PATCHES.sparc += $(PATCH_DIR)/openssl-1.0.1e-t4-engine.sparc-patch EXTRA_PATCHES = $(EXTRA_PATCHES.$(MACH)) include $(WS_TOP)/make-rules/prep.mk @@ -175,16 +174,15 @@ ( echo "Cloning engines..."; \ $(LN) -fs $(COMPONENT_DIR)/engines/devcrypto/*.[ch] $(@D)/engines; \ $(LN) -fs $(COMPONENT_DIR)/engines/pkcs11/*.[ch] $(@D)/crypto/engine; \ - $(LN) -fs $(COMPONENT_DIR)/engines/t4/eng_t4*.[ch] $(@D)/crypto/engine; \ - $(LN) -fs $(COMPONENT_DIR)/engines/t4/t4_aes.S $(@D)/crypto/aes/asm; \ - $(LN) -fs $(COMPONENT_DIR)/engines/t4/t4_md5.S $(@D)/crypto/md5/asm; \ - $(LN) -fs $(COMPONENT_DIR)/engines/t4/t4_sha?.S $(@D)/crypto/sha/asm; \ $(LN) -fs $(COMPONENT_DIR)/wanboot-openssl/wanboot-stubs.c $(@D)/crypto; \ $(LN) -fs $(COMPONENT_DIR)/inline-t4/sparc_arch.h $(@D)/crypto/; \ $(LN) -fs $(COMPONENT_DIR)/inline-t4/md5-sparcv9.pl $(@D)/crypto/md5/asm; \ $(LN) -fs $(COMPONENT_DIR)/inline-t4/aest4-sparcv9.pl $(@D)/crypto/aes/asm; \ $(LN) -fs $(COMPONENT_DIR)/inline-t4/dest4-sparcv9.pl $(@D)/crypto/des/asm; \ - $(LN) -fs $(COMPONENT_DIR)/inline-t4/sparcv9_modes.pl $(@D)/crypto/perlasm; ) + $(LN) -fs $(COMPONENT_DIR)/inline-t4/sparcv9_modes.pl $(@D)/crypto/perlasm; \ + $(LN) -fs $(COMPONENT_DIR)/inline-t4/vis3-mont.pl $(@D)/crypto/bn/asm; \ + $(LN) -fs $(COMPONENT_DIR)/inline-t4/sparcv9-gf2m.pl $(@D)/crypto/bn/asm; \ + $(LN) -fs $(COMPONENT_DIR)/inline-t4/sparct4-mont.pl $(@D)/crypto/bn/asm; ) # OpenSSL for wanboot is built on sparc only. ifeq ($(MACH), sparc) @@ -232,6 +230,7 @@ crypto/bn/bn_prime.o crypto/bn/bn_print.o crypto/bn/bn_rand.o \ crypto/bn/bn_recp.o crypto/bn/bn_shift.o crypto/bn/bn_sqr.o \ crypto/bn/bn_word.o crypto/bn/sparcv9-mont.o crypto/bn/sparcv9a-mont.o \ + crypto/bn/sparct4-mont.o crypto/bn/vis3-mont.o \ crypto/buffer/buf_err.o crypto/buffer/buf_str.o crypto/buffer/buffer.o \ crypto/camellia/camellia.o crypto/camellia/cmll_cbc.o \ crypto/camellia/cmll_cfb.o crypto/camellia/cmll_ecb.o \ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/eng_t4.c --- a/components/openssl/openssl-1.0.1/engines/t4/eng_t4.c Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,230 +0,0 @@ -/* - * This product includes cryptographic software developed by the OpenSSL - * Project for use in the OpenSSL Toolkit (http://www.openssl.org/). - */ - -/* - * ==================================================================== - * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * This engine supports SPARC microprocessors that provide T4 MONTMUL - * instructions, such as the T4 microprocessor. - */ - -#include - -#if !defined(OPENSSL_NO_HW) -#include -#include /* getisax() */ -#include -#include -#include -#include -#include - -#define T4_LIB_NAME "SPARC T4 engine" -#include "eng_t4_err.c" - -/* Constants used when creating the ENGINE */ -static const char *ENGINE_T4_ID = "t4"; -static const char *ENGINE_T4_NAME = "SPARC T4 engine support"; -static const char *ENGINE_NO_T4_NAME = "SPARC T4 engine support (no T4)"; - - -#if (defined(sun4v) || defined(__sparcv9) || defined(__sparcv8plus) || \ - defined(__sparcv8)) && !defined(OPENSSL_NO_ASM) -#define COMPILE_HW_T4 -static int t4_bind_helper(ENGINE *e, const char *id); -#pragma inline(t4_bind_helper) -#endif - -/* - * This makes the engine "built-in" with OpenSSL. - * On non-T4 CPUs this just returns. - * Called by ENGINE_load_builtin_engines(). - */ -void -ENGINE_load_t4(void) -{ -#ifdef COMPILE_HW_T4 - ENGINE *toadd = ENGINE_new(); - if (toadd != NULL) { - if (t4_bind_helper(toadd, ENGINE_T4_ID) != 0) { - (void) ENGINE_add(toadd); - (void) ENGINE_free(toadd); - ERR_clear_error(); - } else { - (void) ENGINE_free(toadd); - } - } -#endif -} - - -#ifdef COMPILE_HW_T4 -static int t4_bind(ENGINE *e); -#ifndef DYNAMIC_ENGINE -#pragma inline(t4_bind) -#endif -static void t4_instructions_present(_Bool *montmul_present); -#pragma inline(t4_instructions_present) - -/* RSA_METHOD structure used by ENGINE_set_RSA() */ -extern RSA_METHOD *t4_RSA(void); - -/* DH_METHOD structure used by ENGINE_set_DH() */ -extern DH_METHOD *t4_DH(void); - -/* DSA_METHOD structure used by ENGINE_set_DSA() */ -extern DSA_METHOD *t4_DSA(void); - -/* - * Utility Functions - */ - -/* - * Set montmul_present to B_FALSE or B_TRUE depending on whether the - * current SPARC processor supports MONTMUL. - */ -static void -t4_instructions_present(_Bool *montmul_present) -{ - uint_t ui; - - (void) getisax(&ui, 1); - *montmul_present = ((ui & AV_SPARC_MONT) != 0); -} - - - -/* - * Is the t4 engine available? - * Passed to ENGINE_set_init_function(). - */ -/* ARGSUSED */ -static int -t4_init(ENGINE *e) -{ - return (1); -} - -/* Passed to ENGINE_set_destroy_function(). */ -/* ARGSUSED */ -static int -t4_destroy(ENGINE *e) -{ - ERR_unload_t4_strings(); - return (1); -} - - -/* - * Called by t4_bind_helper(). - * Note: too early to use T4err() functions on errors. - */ -/* ARGSUSED */ -static int -t4_bind(ENGINE *e) -{ - _Bool montmul_engage; - - /* Register T4 engine ID, name, and functions */ - if (!ENGINE_set_id(e, ENGINE_T4_ID) || - !ENGINE_set_name(e, - montmul_engage ? ENGINE_T4_NAME : ENGINE_NO_T4_NAME) || - !ENGINE_set_init_function(e, t4_init) || -#ifndef OPENSSL_NO_RSA - (montmul_engage && !ENGINE_set_RSA(e, t4_RSA())) || -#endif /* OPENSSL_NO_RSA */ -#ifndef OPENSSL_NO_DH - (montmul_engage && !ENGINE_set_DH(e, t4_DH())) || -#endif /* OPENSSL_NO_DH */ -#ifndef OPENSSL_NO_DSA - (montmul_engage && !ENGINE_set_DSA(e, t4_DSA())) || -#endif /* OPENSSL_NO_DSA */ - !ENGINE_set_destroy_function(e, t4_destroy)) { - return (0); - } - - return (1); -} - - -/* - * Called by ENGINE_load_t4(). - * Note: too early to use T4err() functions on errors. - */ -static int -t4_bind_helper(ENGINE *e, const char *id) -{ - if (id != NULL && (strcmp(id, ENGINE_T4_ID) != 0)) { - (void) fprintf(stderr, "T4: bad t4 engine ID\n"); - return (0); - } - if (!t4_bind(e)) { - (void) fprintf(stderr, - "T4: failed to bind t4 engine\n"); - return (0); - } - - return (1); -} - - -#ifdef DYNAMIC_ENGINE -IMPLEMENT_DYNAMIC_CHECK_FN() -IMPLEMENT_DYNAMIC_BIND_FN(t4_bind_helper) -#endif /* DYNAMIC_ENGINE */ -#endif /* COMPILE_HW_T4 */ -#endif /* !OPENSSL_NO_HW */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/eng_t4_aes_asm.h --- a/components/openssl/openssl-1.0.1/engines/t4/eng_t4_aes_asm.h Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,147 +0,0 @@ -/* - * This product includes cryptographic software developed by the OpenSSL - * Project for use in the OpenSSL Toolkit (http://www.openssl.org/). - */ - -/* - * ==================================================================== - * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - */ - -#ifndef ENG_T4_AES_ASM_H -#define ENG_T4_AES_ASM_H - -/* - * SPARC AES assembly language functions. - * - * Based on Solaris file aes_impl.h. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -#if (defined(sun4v) || defined(__sparcv9) || defined(__sparcv8plus) || \ - defined(__sparcv8)) && ! defined(OPENSSL_NO_ASM) - -extern void t4_aes_expand128(uint64_t *rk, const uint32_t *key); -extern void t4_aes_expand192(uint64_t *rk, const uint32_t *key); -extern void t4_aes_expand256(uint64_t *rk, const uint32_t *key); -extern void t4_aes_encrypt128(const uint64_t *rk, const uint32_t *pt, - uint32_t *ct); -extern void t4_aes_encrypt192(const uint64_t *rk, const uint32_t *pt, - uint32_t *ct); -extern void t4_aes_encrypt256(const uint64_t *rk, const uint32_t *pt, - uint32_t *ct); -extern void t4_aes_decrypt128(const uint64_t *rk, const uint32_t *ct, - uint32_t *pt); -extern void t4_aes_decrypt192(const uint64_t *rk, const uint32_t *ct, - uint32_t *pt); -extern void t4_aes_decrypt256(const uint64_t *rk, const uint32_t *ct, - uint32_t *pt); -extern void t4_aes128_load_keys_for_encrypt(uint64_t *ks); -extern void t4_aes192_load_keys_for_encrypt(uint64_t *ks); -extern void t4_aes256_load_keys_for_encrypt(uint64_t *ks); -extern void t4_aes128_ecb_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *dummy); -extern void t4_aes192_ecb_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *dummy); -extern void t4_aes256_ecb_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *dummy); -extern void t4_aes128_cbc_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv); -extern void t4_aes192_cbc_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv); -extern void t4_aes256_cbc_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv); -extern void t4_aes128_ctr_crypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv); -extern void t4_aes192_ctr_crypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv); -extern void t4_aes256_ctr_crypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv); -extern void t4_aes128_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv); -extern void t4_aes192_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv); -extern void t4_aes256_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv); -extern void t4_aes128_load_keys_for_decrypt(uint64_t *ks); -extern void t4_aes192_load_keys_for_decrypt(uint64_t *ks); -extern void t4_aes256_load_keys_for_decrypt(uint64_t *ks); -extern void t4_aes128_ecb_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *dummy); -extern void t4_aes192_ecb_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *dummy); -extern void t4_aes256_ecb_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *dummy); -extern void t4_aes128_cbc_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *iv); -extern void t4_aes192_cbc_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *iv); -extern void t4_aes256_cbc_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *iv); -extern void t4_aes128_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *iv); -extern void t4_aes192_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *iv); -extern void t4_aes256_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *iv); - -#endif /* (sun4v||__sparv9||__sparcv8plus||__sparvc8) && !OPENSSL_NO_ASM */ - -#ifdef __cplusplus -} -#endif -#endif /* ENG_T4_AES_ASM_H */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/eng_t4_bignum.h --- a/components/openssl/openssl-1.0.1/engines/t4/eng_t4_bignum.h Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,287 +0,0 @@ -/* - * This product includes cryptographic software developed by the OpenSSL - * Project for use in the OpenSSL Toolkit (http://www.openssl.org/). - */ - -/* - * ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * This file is a copy of the ON gate's usr/src/common/bignum/bignum.h file - */ - -#ifndef _BIGNUM_H -#define _BIGNUM_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -#if defined(__sparcv9) || defined(__amd64) || defined(__sparc) - /* 64-bit chunk size */ -#ifndef UMUL64 -#define UMUL64 /* 64-bit multiplication results are supported */ -#endif -#else -#define BIGNUM_CHUNK_32 -#endif - - -#define BITSINBYTE 8 - -/* Bignum "digits" (aka "chunks" or "words") are either 32- or 64-bits */ -#ifdef BIGNUM_CHUNK_32 -#define BIG_CHUNK_SIZE 32 -#define BIG_CHUNK_TYPE uint32_t -#define BIG_CHUNK_TYPE_SIGNED int32_t -#define BIG_CHUNK_HIGHBIT 0x80000000 -#define BIG_CHUNK_ALLBITS 0xffffffff -#define BIG_CHUNK_LOWHALFBITS 0xffff -#define BIG_CHUNK_HALF_HIGHBIT 0x8000 - -#else -#define BIG_CHUNK_SIZE 64 -#define BIG_CHUNK_TYPE uint64_t -#define BIG_CHUNK_TYPE_SIGNED int64_t -#define BIG_CHUNK_HIGHBIT 0x8000000000000000ULL -#define BIG_CHUNK_ALLBITS 0xffffffffffffffffULL -#define BIG_CHUNK_LOWHALFBITS 0xffffffffULL -#define BIG_CHUNK_HALF_HIGHBIT 0x80000000ULL -#endif - -#define BITLEN2BIGNUMLEN(x) ((x) > 0 ? \ - ((((x) - 1) / BIG_CHUNK_SIZE) + 1) : 0) -#define CHARLEN2BIGNUMLEN(x) ((x) > 0 ? \ - ((((x) - 1) / sizeof (BIG_CHUNK_TYPE)) + 1) : 0) - -#define BIGNUM_WORDSIZE (BIG_CHUNK_SIZE / BITSINBYTE) /* word size in bytes */ -#define BIG_CHUNKS_FOR_160BITS BITLEN2BIGNUMLEN(160) - - -/* - * leading 0's are permitted - * 0 should be represented by size>=1, size>=len>=1, sign=1, - * value[i]=0 for 0 -#include "eng_t4_err.h" - -/* BEGIN ERROR CODES */ - -#ifndef OPENSSL_NO_ERR - -#define ERR_FUNC(func) ERR_PACK(0, func, 0) -#define ERR_REASON(reason) ERR_PACK(0, 0, reason) - -static ERR_STRING_DATA t4_str_functs[] = { - {ERR_FUNC(T4_F_INIT), "T4_INIT"}, - {ERR_FUNC(T4_F_DESTROY), "T4_DESTROY"}, - {ERR_FUNC(T4_F_FINISH), "T4_FINISH"}, - {ERR_FUNC(T4_F_CIPHER_INIT_AES), "T4_CIPHER_INIT_AES"}, - {ERR_FUNC(T4_F_ADD_NID), "T4_ADD_NID"}, - {ERR_FUNC(T4_F_GET_ALL_CIPHERS), "T4_GET_ALL_CIPHERS"}, - {ERR_FUNC(T4_F_CIPHER_DO_AES), "T4_CIPHER_DO_AES"}, - {ERR_FUNC(T4_F_CIPHER_CLEANUP), "T4_CIPHER_CLEANUP"}, - {ERR_FUNC(T4_F_CIPHER_INIT_DES), "T4_CIPHER_INIT_DES"}, - {ERR_FUNC(T4_F_CIPHER_DO_DES), "T4_CIPHER_DO_DES"}, - {0, NULL} -}; - -static ERR_STRING_DATA t4_str_reasons[] = { - {ERR_REASON(T4_R_CIPHER_KEY), "invalid cipher key"}, - {ERR_REASON(T4_R_CIPHER_NID), "invalid cipher NID"}, - {ERR_REASON(T4_R_IV_LEN_INCORRECT), "IV length incorrect"}, - {ERR_REASON(T4_R_KEY_LEN_INCORRECT), "key length incorrect"}, - {ERR_REASON(T4_R_ASN1_OBJECT_CREATE), "ASN1_OBJECT_create failed"}, - {ERR_REASON(T4_R_NOT_BLOCKSIZE_LENGTH), "blocksize length not even"}, - {0, NULL} -}; -#endif /* OPENSSL_NO_ERR */ - - -#ifdef T4_LIB_NAME -static ERR_STRING_DATA T4_lib_name[] = { - {0, T4_LIB_NAME}, - {0, NULL} -}; -#endif - -static int t4_error_code = 0; -static int t4_error_init = 1; - - -static void -ERR_load_t4_strings(void) -{ - if (t4_error_code == 0) - t4_error_code = ERR_get_next_error_library(); - - if (t4_error_init != 0) { - t4_error_init = 0; - -#ifndef OPENSSL_NO_ERR - ERR_load_strings(t4_error_code, t4_str_functs); - ERR_load_strings(t4_error_code, t4_str_reasons); -#endif - -#ifdef T4_LIB_NAME - T4_lib_name->error = - ERR_PACK(t4_error_code, 0, 0); - ERR_load_strings(0, T4_lib_name); -#endif - } -} - - -static void -ERR_unload_t4_strings(void) -{ - if (t4_error_init == 0) { -#ifndef OPENSSL_NO_ERR - ERR_unload_strings(t4_error_code, t4_str_functs); - ERR_unload_strings(t4_error_code, t4_str_reasons); -#endif - -#ifdef T4_LIB_NAME - ERR_unload_strings(0, T4_lib_name); -#endif - t4_error_init = 1; - } -} - - -static void -ERR_t4_error(int function, int reason, char *file, int line) -{ - if (t4_error_init != 0) { - ERR_load_t4_strings(); - } - - if (t4_error_code == 0) - t4_error_code = ERR_get_next_error_library(); - ERR_PUT_error(t4_error_code, function, reason, file, line); -} diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/eng_t4_err.h --- a/components/openssl/openssl-1.0.1/engines/t4/eng_t4_err.h Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,97 +0,0 @@ -/* - * This product includes cryptographic software developed by the OpenSSL - * Project for use in the OpenSSL Toolkit (http://www.openssl.org/). - */ - -/* - * ==================================================================== - * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - */ - -#ifndef ENG_T4_ERR_H -#define ENG_T4_ERR_H - -#ifdef __cplusplus -extern "C" { -#endif - -static void ERR_unload_t4_strings(void); -#pragma inline(ERR_unload_t4_strings) -static void ERR_t4_error(int function, int reason, char *file, int line); - -#define T4err(f, r) ERR_t4_error((f), (r), __FILE__, __LINE__) - -/* Function codes */ -#define T4_F_INIT 100 -#define T4_F_DESTROY 101 -#define T4_F_FINISH 102 -#define T4_F_CIPHER_INIT_AES 103 -#define T4_F_ADD_NID 104 -#define T4_F_GET_ALL_CIPHERS 105 -#define T4_F_CIPHER_DO_AES 106 -#define T4_F_CIPHER_CLEANUP 107 -#define T4_F_CIPHER_INIT_DES 108 -#define T4_F_CIPHER_DO_DES 109 - -/* Reason codes */ -#define T4_R_CIPHER_KEY 100 -#define T4_R_CIPHER_NID 101 -#define T4_R_IV_LEN_INCORRECT 102 -#define T4_R_KEY_LEN_INCORRECT 103 -#define T4_R_ASN1_OBJECT_CREATE 104 -#define T4_R_NOT_BLOCKSIZE_LENGTH 105 - -#ifdef __cplusplus -} -#endif - -#endif /* ENG_T4_ERR_H */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/eng_t4_md5.c --- a/components/openssl/openssl-1.0.1/engines/t4/eng_t4_md5.c Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,200 +0,0 @@ -/* - * This product includes cryptographic software developed by the OpenSSL - * Project for use in the OpenSSL Toolkit (http://www.openssl.org/). This - * product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). - */ - -/* - * ==================================================================== - * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * This engine supports SPARC microprocessors that provide AES and other - * cipher and hash instructions, such as the T4 microprocessor. - * - * This file implements the MD5 message digest operations. - */ - -#include - -#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_MD_T4) -#ifndef OPENSSL_NO_MD5 - -#include -#include -#include -#include - -#include -#include -/* - * Solaris sys/md5.h and OpenSSL openssl/md5.h both define MD5_CTX. - * The OpenSSL MD5_CTX has an extra "num" field at the end. - */ -#include - -#if (defined(sun4v) || defined(__sparcv9) || defined(__sparcv8plus) || \ - defined(__sparcv8)) && !defined(OPENSSL_NO_ASM) -#define COMPILE_HW_T4 -#endif - -#ifdef COMPILE_HW_T4 - -/* Assembly language function; replaces C function md5_block_data_order(): */ -extern void t4_md5_multiblock(MD5_CTX *ctx, const uint8_t *input, - unsigned int input_length_in_blocks); - -/* Formal declaration for functions in EVP_MD structure */ -int t4_digest_init_md5(EVP_MD_CTX *ctx); -int t4_digest_update_md5(EVP_MD_CTX *ctx, const void *data, size_t count); -int t4_digest_final_md5(EVP_MD_CTX *ctx, unsigned char *md); -int t4_digest_copy_md5(EVP_MD_CTX *to, const EVP_MD_CTX *from); - - -/* - * MD5 Message Digests - * - * OpenSSL's libcrypto EVP stuff. This is how this engine gets wired to EVP. - * EVP_MD is defined in evp.h. To maintain binary compatibility the - * definition cannot be modified. - * Stuff specific to the t4 engine is kept in t4_cipher_ctx_t, which is - * pointed to by the last field, app_data. - * - * Fields: type, pkey_type, md_size, flags, - * init(), update(), final(), - * copy(), cleanup(), sign(), verify(), - * required_pkey_type, block_size, ctx_size, md5_ctrl() - */ -const EVP_MD t4_md5 = { - NID_md5, NID_md5WithRSAEncryption, MD5_DIGEST_LENGTH, - 0, - t4_digest_init_md5, t4_digest_update_md5, t4_digest_final_md5, - t4_digest_copy_md5, NULL, - EVP_PKEY_RSA_method, MD5_CBLOCK, - sizeof (MD5_CTX), NULL - }; - -/* These functions are implemented in md32_common.h: */ -static int t4_md5_update(MD5_CTX *c, const void *data_, size_t len); -static void t4_md5_transform(MD5_CTX *c, const unsigned char *data); -static int t4_md5_final(unsigned char *md, MD5_CTX *c); -#pragma inline(t4_md5_update, t4_md5_transform, t4_md5_final) - -#define DATA_ORDER_IS_LITTLE_ENDIAN -/* HASH_LONG/MD5_LONG is a 32-bit unsigned: */ -#define HASH_LONG MD5_LONG -#define HASH_CTX MD5_CTX -#define HASH_CBLOCK MD5_CBLOCK -#define HASH_UPDATE t4_md5_update -#define HASH_TRANSFORM t4_md5_transform -#define HASH_FINAL t4_md5_final -#define HASH_BLOCK_DATA_ORDER t4_md5_multiblock -/* Hash is already byte-swapped as Little Endian for SPARC T4: */ -#define HASH_MAKE_STRING(c, s) memcpy((s), (c), MD5_DIGEST_LENGTH); - -/* This defines HASH_UPDATE, HASH_TRANSFORM, HASH_FINAL functions: */ -#include "md32_common.h" - - -/* - * MD5 functions (RFC 1321 The MD5 Message-Digest Algorithm) - */ - -int -t4_digest_init_md5(EVP_MD_CTX *ctx) -{ - MD5_CTX *c = (MD5_CTX *)ctx->md_data; - - /* Optimization: don't call memset(c, 0,...) or initialize c->data[] */ - c->Nl = c->Nh = c->num = 0; - - /* Big Endian for T4 */ - c->A = 0x01234567U; - c->B = 0x89abcdefU; - c->C = 0xfedcba98U; - c->D = 0x76543210U; - - return (1); -} - -/* - * Continue MD5 digest operation, using the message block to update context. - * MD5 crunches in 64-byte blocks. - */ -int -t4_digest_update_md5(EVP_MD_CTX *ctx, const void *data, size_t len) -{ - return (t4_md5_update((MD5_CTX *)ctx->md_data, data, len)); -} - -/* End MD5 digest operation, finalizing message digest and zeroing context. */ -int -t4_digest_final_md5(EVP_MD_CTX *ctx, unsigned char *md) -{ - return (t4_md5_final(md, (MD5_CTX *)ctx->md_data)); -} - -/* Required for Engine API */ -int -t4_digest_copy_md5(EVP_MD_CTX *to, const EVP_MD_CTX *from) -{ - if ((to->md_data != NULL) && (from->md_data != NULL)) { - (void) memcpy(to->md_data, from->md_data, sizeof (MD5_CTX)); - } - return (1); -} - -#endif /* COMPILE_HW_T4 */ -#endif /* !OPENSSL_NO_MD5 */ -#endif /* !OPENSSL_NO_HW && !OPENSSL_NO_HW_MD_T4 */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/eng_t4_montmul.c --- a/components/openssl/openssl-1.0.1/engines/t4/eng_t4_montmul.c Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,459 +0,0 @@ -/* - * This product includes cryptographic software developed by the OpenSSL - * Project for use in the OpenSSL Toolkit (http://www.openssl.org/). - */ - -/* - * ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * This engine supports SPARC microprocessors that provide AES and other - * cipher and hash instructions, such as the T4 microprocessor. - * - * This file implements the RSA, DSA, and DH operations. - */ - -#include - -#define BIGNUM SOLARIS_BIGNUM -#include "eng_t4_bignum.h" -#undef BIGNUM - - -#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_MONTMUL_T4) -#include -#include /* getisax() */ -#include /* IS_P2ALIGNED() */ -#include /* htonl() and friends */ -#include -#include -#include -#include - -#ifndef OPENSSL_NO_RSA -#include -#endif /* !OPENSSL_NO_RSA */ - -#ifndef OPENSSL_NO_DSA -#include -#endif /* !OPENSSL_NO_DSA */ - -#ifndef OPENSSL_NO_DH -#include -#endif /* !OPENSSL_NO_DH */ - -#include -#include -#include - -#if (defined(sun4v) || defined(__sparcv9) || defined(__sparcv8plus) || \ - defined(__sparcv8)) && !defined(OPENSSL_NO_ASM) -#define COMPILE_HW_T4 -#endif - -#ifdef COMPILE_HW_T4 - -#if !(defined(OPENSSL_NO_RSA) && defined(OPENSSL_NO_DSA) && \ - defined(OPENSSL_NO_DH)) - - -/* - * Convert OpenSSL's BIGNUM to Solaris's BIGNUM.... - * It assumes that the Solaris BIGNUM has enough space - */ -static void -bn2solbn(const BIGNUM *src, SOLARIS_BIGNUM *dst) -{ - int i, j; - - if (BN_BITS2 < BIG_CHUNK_SIZE) { - for (i = 0, j = 0; i < src->top; i++) { - if ((i & 1) == 0) { - dst->value[j] = src->d[i]; - } else { - dst->value[j] += ((uint64_t)(src->d[i])) << 32; - j++; - } - } - dst->len = (src->top + 1) / 2; - dst->sign = (src->neg == 1) ? -1 : 1; - } else if (BN_BITS2 == BIG_CHUNK_SIZE) { - (void) memcpy(dst->value, src->d, src->top); - dst->len = src->top; - dst->sign = (src->neg == 1) ? -1 : 1; - } else { /* BN_BITS2 > BIG_CHUNK_SIZE */ - for (i = 0, j = 0; i < src->top; i++) { - dst->value[j++] = src->d[i] & 0xffffffffULL; - dst->value[j++] = ((uint64_t)(src->d[i])) >> 32; - } - dst->len = src->top * 2; - if (dst->value[dst->len - 1] == 0) { - dst->len--; - } - dst->sign = (src->neg == 1) ? -1 : 1; - } -} - -/* - * It assumes that OpenSSL's BIGNUM has enough space. - */ -static void -solbn2bn(const SOLARIS_BIGNUM *src, BIGNUM *dst) -{ - int i, j; - - if (BN_BITS2 < BIG_CHUNK_SIZE) { - for (i = 0, j = 0; i < src->len; i++) { - dst->d[j++] = src->value[i] & 0xffffffffULL; - dst->d[j++] = ((uint64_t)(src->value[i])) >> 32; - } - dst->top = src->len * 2; - if (dst->d[dst->top - 1] == 0) { - dst->top--; - } - dst->neg = (src->sign == -1) ? 1 : 0; - } else if (BN_BITS2 == BIG_CHUNK_SIZE) { - (void) memcpy(src->value, dst->d, src->len); - dst->top = src->len; - dst->neg = (src->sign == -1) ? 1 : 0; - } else { /* BN_BITS2 > BIG_CHUNK_SIZE */ - for (i = 0, j = 0; i < src->len; i++) { - if ((i & 1) == 0) { - dst->d[j] = src->value[i]; - } else { - dst->d[j] += ((uint64_t)(src->value[i])) << 32; - j++; - } - } - dst->top = (src->len + 1) / 2; - dst->neg = (src->sign == -1) ? 1 : 0; - } -} - - - -static int -t4_bn_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, - BN_CTX *ctx, BN_MONT_CTX *m_ctx) -{ - int rv = 0; - SOLARIS_BIGNUM sol_r = {0}; - SOLARIS_BIGNUM sol_a = {0}; - SOLARIS_BIGNUM sol_p = {0}; - SOLARIS_BIGNUM sol_m = {0}; - - if (big_init(&sol_r, (m->top + 3) * BN_BITS2 / BIG_CHUNK_SIZE) != - BIG_OK) { - goto cleanup; - } - if (big_init(&sol_a, (a->top + 1) * BN_BITS2 / BIG_CHUNK_SIZE) != - BIG_OK) { - goto cleanup; - } - if (big_init(&sol_p, (p->top + 1) * BN_BITS2 / BIG_CHUNK_SIZE) != - BIG_OK) { - goto cleanup; - } - if (big_init(&sol_m, (m->top + 1) * BN_BITS2 / BIG_CHUNK_SIZE) != - BIG_OK) { - goto cleanup; - } - - bn2solbn(a, &sol_a); - bn2solbn(p, &sol_p); - bn2solbn(m, &sol_m); - - /* calls libsoftcrypto's big_modexp() routine */ - if (big_modexp(&sol_r, &sol_a, &sol_p, &sol_m, NULL) != BIG_OK) { - goto cleanup; - } - - if (bn_wexpand(r, m->top + 2) == NULL) { - goto cleanup; - } - solbn2bn(&sol_r, r); - - rv = 1; - -cleanup: - - big_finish(&sol_m); - big_finish(&sol_p); - big_finish(&sol_a); - big_finish(&sol_r); - - return (rv); -} - -#endif /* !(OPENSSL_NO_RSA && OPENSSL_NO_DSA) */ - -#ifndef OPENSSL_NO_RSA - -/* Our internal RSA_METHOD that we provide pointers to */ -static RSA_METHOD t4_rsa = -{ - "Oracle T4 RSA method", - NULL, /* rsa_pub_encrypt */ - NULL, /* rsa_pub_decrypt */ - NULL, /* rsa_priv_encrypt */ - NULL, /* rsa_priv_decrypt */ - NULL, /* rsa_mod_exp */ - t4_bn_mod_exp, /* bn_mod_exp */ - NULL, /* init */ - NULL, /* finish */ - RSA_FLAG_CACHE_PUBLIC | RSA_FLAG_CACHE_PRIVATE | RSA_FLAG_NO_BLINDING, - /* flags */ - NULL, /* app_data */ - NULL, /* rsa_sign */ - NULL, /* rsa_verify */ - /* Internal rsa_keygen will be used if this is NULL. */ - NULL /* rsa_keygen */ -}; - -RSA_METHOD * -t4_RSA(void) -{ - const RSA_METHOD *meth1; - - meth1 = RSA_PKCS1_SSLeay(); - t4_rsa.rsa_pub_enc = meth1->rsa_pub_enc; - t4_rsa.rsa_pub_dec = meth1->rsa_pub_dec; - t4_rsa.rsa_priv_enc = meth1->rsa_priv_enc; - t4_rsa.rsa_priv_dec = meth1->rsa_priv_dec; - t4_rsa.rsa_mod_exp = meth1->rsa_mod_exp; - t4_rsa.finish = meth1->finish; - - return (&t4_rsa); -} - -#endif /* !OPENSSL_NO_RSA */ - - -#ifndef OPENSSL_NO_DSA - -static int -t4_dsa_bn_mod_exp(DSA *dsa, BIGNUM *r, BIGNUM *a, const BIGNUM *p, - const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx) -{ - return (t4_bn_mod_exp(r, a, p, m, ctx, m_ctx)); -} - - -static int -t4_dsa_mod_exp(DSA *dsa, BIGNUM *rr, BIGNUM *a1, BIGNUM *p1, - BIGNUM *a2, BIGNUM *p2, BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) -{ - int rv = 0; - SOLARIS_BIGNUM sol_rr = {0}; - SOLARIS_BIGNUM sol_a1 = {0}; - SOLARIS_BIGNUM sol_p1 = {0}; - SOLARIS_BIGNUM sol_a2 = {0}; - SOLARIS_BIGNUM sol_p2 = {0}; - SOLARIS_BIGNUM sol_m = {0}; - SOLARIS_BIGNUM sol_tmp = {0}; - - if (big_init(&sol_rr, (m->top + 3) * BN_BITS2 / BIG_CHUNK_SIZE) != - BIG_OK) { - goto cleanup; - } - if (big_init(&sol_a1, (a1->top + 1) * BN_BITS2 / BIG_CHUNK_SIZE) != - BIG_OK) { - goto cleanup; - } - if (big_init(&sol_p1, (p1->top + 1) * BN_BITS2 / BIG_CHUNK_SIZE) != - BIG_OK) { - goto cleanup; - } - if (big_init(&sol_a2, (a2->top + 1) * BN_BITS2 / BIG_CHUNK_SIZE) != - BIG_OK) { - goto cleanup; - } - if (big_init(&sol_p2, (p2->top + 1) * BN_BITS2 / BIG_CHUNK_SIZE) != - BIG_OK) { - goto cleanup; - } - if (big_init(&sol_m, (m->top + 1) * BN_BITS2 / BIG_CHUNK_SIZE) != - BIG_OK) { - goto cleanup; - } - if (big_init(&sol_tmp, 2 * sol_m.len + 1) != BIG_OK) { - goto cleanup; - } - - if (big_init(&sol_tmp, 2 * sol_m.len + 1) != BIG_OK) { - goto cleanup; - } - - bn2solbn(a1, &sol_a1); - bn2solbn(p1, &sol_p1); - bn2solbn(a2, &sol_a2); - bn2solbn(p2, &sol_p2); - bn2solbn(m, &sol_m); - - - /* calls libsoftcrypto's big_modexp() routine */ - if (big_modexp(&sol_rr, &sol_a1, &sol_p1, &sol_m, NULL) != - BIG_OK) { - goto cleanup; - } - - if (big_modexp(&sol_tmp, &sol_a2, &sol_p2, &sol_m, NULL) != - BIG_OK) { - goto cleanup; - } - - if (big_mul(&sol_tmp, &sol_rr, &sol_tmp) != BIG_OK) { - goto cleanup; - } - - if (big_div_pos(NULL, &sol_rr, &sol_tmp, &sol_m) != BIG_OK) { - goto cleanup; - } - - if (bn_wexpand(rr, m->top + 2) == NULL) { - goto cleanup; - } - solbn2bn(&sol_rr, rr); - - rv = 1; - -cleanup: - - big_finish(&sol_tmp); - big_finish(&sol_m); - big_finish(&sol_p2); - big_finish(&sol_a2); - big_finish(&sol_p1); - big_finish(&sol_a1); - big_finish(&sol_rr); - - return (rv); -} - -/* Our internal DSA_METHOD that we provide pointers to */ -static DSA_METHOD t4_dsa = -{ - "Oracle T4 DSA method", /* name */ - NULL, /* dsa_do_sign */ - NULL, /* dsa_sign_setup */ - NULL, /* dsa_do_verify */ - t4_dsa_mod_exp, /* dsa_mod_exp, */ - t4_dsa_bn_mod_exp, /* bn_mod_exp, */ - NULL, /* init */ - NULL, /* finish */ - NULL, /* flags */ - NULL, /* app_data */ - NULL, /* dsa_paramgen */ - NULL /* dsa_keygen */ -}; - -DSA_METHOD * -t4_DSA(void) -{ - const DSA_METHOD *meth1; - - meth1 = DSA_OpenSSL(); - t4_dsa.dsa_do_sign = meth1->dsa_do_sign; - t4_dsa.dsa_sign_setup = meth1->dsa_sign_setup; - t4_dsa.dsa_do_verify = meth1->dsa_do_verify; - t4_dsa.finish = meth1->finish; - - return (&t4_dsa); -} - -#endif /* !OPENSSL_NO_DSA */ - - -#ifndef OPENSSL_NO_DH - -static int -t4_dh_bn_mod_exp(const DH *dh, BIGNUM *r, const BIGNUM *a, const BIGNUM *p, - const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx) -{ - return (t4_bn_mod_exp(r, a, p, m, ctx, m_ctx)); -} - - - -/* Our internal DH_METHOD that we provide pointers to */ -static DH_METHOD t4_dh = -{ - "Oracle T4 DH method", /* name */ - NULL, /* generate_key */ - NULL, /* compute_key */ - t4_dh_bn_mod_exp, /* bn_mod_exp, */ - NULL, /* init */ - NULL, /* finish */ - NULL, /* flags */ - NULL /* app_data */ -}; - -DH_METHOD * -t4_DH(void) -{ - const DH_METHOD *meth1; - - meth1 = DH_OpenSSL(); - t4_dh.generate_key = meth1->generate_key; - t4_dh.compute_key = meth1->compute_key; - t4_dh.finish = meth1->finish; - - return (&t4_dh); -} - -#endif /* !OPENSSL_NO_DH */ - -#endif /* COMPILE_HW_T4 */ - -#endif /* !OPENSSL_NO_HW && !OPENSSL_NO_HW_MONTMUL_T4 */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/eng_t4_sha1.c --- a/components/openssl/openssl-1.0.1/engines/t4/eng_t4_sha1.c Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,197 +0,0 @@ -/* - * This product includes cryptographic software developed by the OpenSSL - * Project for use in the OpenSSL Toolkit (http://www.openssl.org/). This - * product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). - */ - -/* - * ==================================================================== - * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * This engine supports SPARC microprocessors that provide AES and other - * cipher and hash instructions, such as the T4 microprocessor. - * - * This file implements the SHA-1 message digest operations. - */ - -#include - -#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_MD_T4) -#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1) -#include -#include -#include -#include - -/* - * For SHA1, OpenSSL SHA_CTX has an extra num field at the end, - * while the Solaris SHA1_CTX does not have this field. - */ -#include -#include - -#if (defined(sun4v) || defined(__sparcv9) || defined(__sparcv8plus) || \ - defined(__sparcv8)) && !defined(OPENSSL_NO_ASM) -#define COMPILE_HW_T4 -#endif - -#ifdef COMPILE_HW_T4 - -/* Assembly language function; replaces C function sha1_block_data_order(): */ -extern void t4_sha1_multiblock(SHA_CTX *ctx, const void *input, size_t num); - -/* Formal declaration for functions in EVP_MD structure */ -static int t4_digest_init_sha1(EVP_MD_CTX *ctx); -static int t4_digest_update_sha1(EVP_MD_CTX *ctx, const void *data, - size_t len); -static int t4_digest_final_sha1(EVP_MD_CTX *ctx, unsigned char *md); -static int t4_digest_copy_sha1(EVP_MD_CTX *to, const EVP_MD_CTX *from); - -/* - * OpenSSL's libcrypto EVP stuff. This is how this engine gets wired to EVP. - * EVP_MD is defined in evp.h. To maintain binary compatibility the - * definition cannot be modified. - * Stuff specific to the t4 engine is kept in t4_cipher_ctx_t, which is - * pointed to by the last field, app_data. - * - * Fields: type, pkey_type, md_size, flags, - * init(), update(), final(), - * copy(), cleanup(), sign(), verify(), - * required_pkey_type, block_size, ctx_size, md5_ctrl() - */ -const EVP_MD t4_sha1 = { - NID_sha1, NID_sha1WithRSAEncryption, SHA_DIGEST_LENGTH, - EVP_MD_FLAG_PKEY_METHOD_SIGNATURE | EVP_MD_FLAG_DIGALGID_ABSENT, - t4_digest_init_sha1, t4_digest_update_sha1, t4_digest_final_sha1, - t4_digest_copy_sha1, NULL, - EVP_PKEY_RSA_method, SHA_CBLOCK, - sizeof (SHA_CTX), NULL - }; - -/* These functions are defined in md32_common.h: */ -static int t4_sha1_update(SHA_CTX *c, const void *data_, size_t len); -static void t4_sha1_transform(SHA_CTX *c, const unsigned char *data); -static int t4_sha1_final(unsigned char *md, SHA_CTX *c); -#pragma inline(t4_sha1_update, t4_sha1_transform, t4_sha1_final) - -#define DATA_ORDER_IS_BIG_ENDIAN -/* HASH_LONG/SHA_LONG is unsigned int (32 bits): */ -#define HASH_LONG SHA_LONG -#define HASH_CTX SHA_CTX -#define HASH_CBLOCK SHA_CBLOCK -#define HASH_UPDATE t4_sha1_update -#define HASH_TRANSFORM t4_sha1_transform -#define HASH_FINAL t4_sha1_final -#define HASH_BLOCK_DATA_ORDER t4_sha1_multiblock -#define HASH_MAKE_STRING(c, s) do { \ - unsigned int ll; \ - ll = (c)->h0; HOST_l2c(ll, (s)); \ - ll = (c)->h1; HOST_l2c(ll, (s)); \ - ll = (c)->h2; HOST_l2c(ll, (s)); \ - ll = (c)->h3; HOST_l2c(ll, (s)); \ - ll = (c)->h4; HOST_l2c(ll, (s)); \ - } while (0) - -/* This defines HASH_UPDATE, HASH_TRANSFORM, HASH_FINAL functions: */ -#include "md32_common.h" - - -/* - * SHA-1 functions (FIPS 180-1 Secure Hash Standard) - */ - -static int -t4_digest_init_sha1(EVP_MD_CTX *ctx) -{ - SHA_CTX *c = (SHA_CTX *)ctx->md_data; - - /* Optimization: don't call memset(c, 0,...) or initialize c->data[] */ - c->Nl = c->Nh = c->num = 0; - c->h0 = 0x67452301U; - c->h1 = 0xefcdab89U; - c->h2 = 0x98badcfeU; - c->h3 = 0x10325476U; - c->h4 = 0xc3d2e1f0U; - - return (1); -} - -/* Continue SHA1 digest operation, using the message block to update context. */ -static int -t4_digest_update_sha1(EVP_MD_CTX *ctx, const void *data, size_t len) -{ - return (t4_sha1_update((SHA_CTX *)ctx->md_data, data, len)); -} - -/* End SHA1 digest operation, finalizing message digest and zeroing context. */ -static int -t4_digest_final_sha1(EVP_MD_CTX *ctx, unsigned char *md) -{ - return (t4_sha1_final(md, (SHA_CTX *)ctx->md_data)); -} - -/* Required for Engine API */ -static int -t4_digest_copy_sha1(EVP_MD_CTX *to, const EVP_MD_CTX *from) -{ - if ((to->md_data != NULL) && (from->md_data != NULL)) { - (void) memcpy(to->md_data, from->md_data, sizeof (SHA_CTX)); - } - return (1); -} - -#endif /* COMPILE_HW_T4 */ -#endif /* !OPENSSL_NO_SHA && !OPENSSL_NO_SHA1 */ -#endif /* !OPENSSL_NO_HW && !OPENSSL_NO_HW_MD_T4 */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/eng_t4_sha256.c --- a/components/openssl/openssl-1.0.1/engines/t4/eng_t4_sha256.c Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,246 +0,0 @@ -/* - * This product includes cryptographic software developed by the OpenSSL - * Project for use in the OpenSSL Toolkit (http://www.openssl.org/). This - * product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). - */ - -/* - * ==================================================================== - * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * This engine supports SPARC microprocessors that provide AES and other - * cipher and hash instructions, such as the T4 microprocessor. - * - * This file implements the SHA-256 message digest operations. - */ - -#include - -#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_MD_T4) -#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256) -#include -#include -#include -#include -#include -/* - * Solaris sys/sha2.h and OpenSSL openssl/sha.h both define - * SHA256_CTX, SHA512_CTX, SHA256, SHA384, and SHA512. - * For SHA2, OpenSSL SHA256_CTX has extra num and md_len fields at - * the end and Solaris SHA2_CTX has an extra algotype field at the beginning. - */ -#include "eng_t4_sha2_asm.h" - -#if (defined(sun4v) || defined(__sparcv9) || defined(__sparcv8plus) || \ - defined(__sparcv8)) && !defined(OPENSSL_NO_ASM) -#define COMPILE_HW_T4 -#endif - -#ifdef COMPILE_HW_T4 - -/* Formal declaration for functions in EVP_MD structure */ -static int t4_digest_init_sha256(EVP_MD_CTX *ctx); -static int t4_digest_init_sha224(EVP_MD_CTX *ctx); -static int t4_digest_update_sha256(EVP_MD_CTX *ctx, const void *data, - size_t len); -static int t4_digest_final_sha256(EVP_MD_CTX *ctx, unsigned char *md); -static int t4_digest_copy_sha256(EVP_MD_CTX *to, const EVP_MD_CTX *from); - - -/* - * OpenSSL's libcrypto EVP stuff. This is how this engine gets wired to EVP. - * EVP_MD is defined in evp.h. To maintain binary compatibility the - * definition cannot be modified. - * Stuff specific to the t4 engine is kept in t4_cipher_ctx_t, which is - * pointed to by the last field, app_data. - * - * Fields: type, pkey_type, md_size, flags, - * init(), update(), final(), - * copy(), cleanup(), sign(), verify(), - * required_pkey_type, block_size, ctx_size, md5_ctrl() - */ -const EVP_MD t4_sha256 = { - NID_sha256, NID_sha256WithRSAEncryption, SHA256_DIGEST_LENGTH, - EVP_MD_FLAG_PKEY_METHOD_SIGNATURE | EVP_MD_FLAG_DIGALGID_ABSENT, - t4_digest_init_sha256, t4_digest_update_sha256, t4_digest_final_sha256, - t4_digest_copy_sha256, NULL, - EVP_PKEY_RSA_method, SHA256_CBLOCK, - sizeof (T4_SHA256_CTX), NULL - }; -/* SHA-224 uses the same context, cblock size, & update function as SHA-256: */ -const EVP_MD t4_sha224 = { - NID_sha224, NID_sha224WithRSAEncryption, SHA224_DIGEST_LENGTH, - EVP_MD_FLAG_PKEY_METHOD_SIGNATURE | EVP_MD_FLAG_DIGALGID_ABSENT, - t4_digest_init_sha224, t4_digest_update_sha256, t4_digest_final_sha256, - t4_digest_copy_sha256, NULL, - EVP_PKEY_RSA_method, SHA256_CBLOCK, - sizeof (T4_SHA256_CTX), NULL - }; - -/* These functions are defined in md32_common.h: */ -static int t4_sha256_update(T4_SHA256_CTX *c, const void *data_, size_t len); -static void t4_sha256_transform(T4_SHA256_CTX *c, const unsigned char *data); -static int t4_sha256_final(unsigned char *md, T4_SHA256_CTX *c); -#pragma inline(t4_sha256_update, t4_sha256_transform, t4_sha256_final) - -#define DATA_ORDER_IS_BIG_ENDIAN -/* HASH_LONG/SHA_LONG is unsigned int (32 bits): */ -#define HASH_LONG SHA_LONG -#define HASH_CTX T4_SHA256_CTX -#define HASH_CBLOCK SHA_CBLOCK -#define HASH_UPDATE t4_sha256_update -#define HASH_TRANSFORM t4_sha256_transform -#define HASH_FINAL t4_sha256_final -#define HASH_BLOCK_DATA_ORDER t4_sha256_multiblock -#define HASH_MAKE_STRING(c, s) \ - do { \ - unsigned int ll, nn; \ - switch ((c)->md_len) { \ - case SHA256_DIGEST_LENGTH: \ - for (nn = 0; nn < SHA256_DIGEST_LENGTH / 4; nn++) { \ - ll = (c)->h[nn]; HOST_l2c(ll, (s)); } \ - break; \ - case SHA224_DIGEST_LENGTH: \ - for (nn = 0; nn < SHA224_DIGEST_LENGTH / 4; nn++) { \ - ll = (c)->h[nn]; HOST_l2c(ll, (s)); } \ - break; \ - default: \ - if ((c)->md_len > SHA256_DIGEST_LENGTH) \ - return (0); \ - for (nn = 0; nn < (c)->md_len / 4; nn++) { \ - ll = (c)->h[nn]; HOST_l2c(ll, (s)); } \ - break; \ - } \ - } while (0) - -/* This defines HASH_UPDATE, HASH_TRANSFORM, HASH_FINAL functions: */ -#include "md32_common.h" - - -/* - * SHA256 functions (part of FIPS 180-2 Secure Hash Standard) - */ - -static int -t4_digest_init_sha256(EVP_MD_CTX *ctx) -{ - T4_SHA256_CTX *c = (T4_SHA256_CTX *)ctx->md_data; - - /* Optimization: don't call memset(c, 0,...) or initialize c->data[] */ - c->Nl = c->Nh = c->num = 0; - c->h[0] = 0x6a09e667U; - c->h[1] = 0xbb67ae85U; - c->h[2] = 0x3c6ef372U; - c->h[3] = 0xa54ff53aU; - c->h[4] = 0x510e527fU; - c->h[5] = 0x9b05688cU; - c->h[6] = 0x1f83d9abU; - c->h[7] = 0x5be0cd19U; - c->md_len = SHA256_DIGEST_LENGTH; - return (1); -} - - -static int -t4_digest_init_sha224(EVP_MD_CTX *ctx) -{ - T4_SHA256_CTX *c = (T4_SHA256_CTX *)ctx->md_data; - - /* Optimization: don't call memset(c, 0,...) or initialize c->data[] */ - c->Nl = c->Nh = c->num = 0; - c->h[0] = 0xc1059ed8UL; - c->h[1] = 0x367cd507UL; - c->h[2] = 0x3070dd17UL; - c->h[3] = 0xf70e5939UL; - c->h[4] = 0xffc00b31UL; - c->h[5] = 0x68581511UL; - c->h[6] = 0x64f98fa7UL; - c->h[7] = 0xbefa4fa4UL; - c->md_len = SHA224_DIGEST_LENGTH; - return (1); -} - -/* Continue SHA256 digest operation, using message block to update context. */ -static int -t4_digest_update_sha256(EVP_MD_CTX *ctx, const void *data, size_t len) -{ - T4_SHA256_CTX *sha256_ctx = (T4_SHA256_CTX *)ctx->md_data; - - return (t4_sha256_update((T4_SHA256_CTX *)ctx->md_data, data, len)); -} - -/* End SHA256 digest operation, finalizing message digest and zeroing context */ -static int -t4_digest_final_sha256(EVP_MD_CTX *ctx, unsigned char *md) -{ - T4_SHA256_CTX *sha256_ctx = (T4_SHA256_CTX *)ctx->md_data; - - return (t4_sha256_final(md, (T4_SHA256_CTX *)ctx->md_data)); -} - -/* Required for Engine API */ -static int -t4_digest_copy_sha256(EVP_MD_CTX *to, const EVP_MD_CTX *from) -{ - if ((to->md_data != NULL) && (from->md_data != NULL)) { - (void) memcpy(to->md_data, from->md_data, - sizeof (T4_SHA256_CTX)); - } - return (1); -} - -#endif /* COMPILE_HW_T4 */ -#endif /* !OPENSSL_NO_SHA && !OPENSSL_NO_SHA256 */ -#endif /* !OPENSSL_NO_HW && !OPENSSL_NO_HW_MD_T4 */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/eng_t4_sha2_asm.h --- a/components/openssl/openssl-1.0.1/engines/t4/eng_t4_sha2_asm.h Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,135 +0,0 @@ -/* - * This product includes cryptographic software developed by the OpenSSL - * Project for use in the OpenSSL Toolkit (http://www.openssl.org/). This - * product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). - */ - -/* - * ==================================================================== - * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - */ - -#ifndef ENG_T4_SHA2_ASM_H -#define ENG_T4_SHA2_ASM_H - -/* - * SPARC T4 SHA2 (SHA256/SHA512) assembly language functions and context. - * The context must match that used by the Solaris SPARC T4 assembly - * (except for OpenSSL-specific fields num and md_len that aren't in Solaris). - * - * Based on OpenSSL file openssl/sha.h and Solaris file sys/sha2.h. - */ - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef OPENSSL_NO_SHA256 -/* - * The contents of this structure are a private interface between the - * Init/Update/Multiblock/Final functions. - * Callers must never attempt to read or write any of the fields - * in this structure directly. - */ -typedef struct t4_SHA256state_st { - uint32_t algotype; /* Solaris-only field; unused here */ - uint32_t algotype_pad; /* Pad to align next field 0 mod 8 */ - uint32_t h[8]; /* State (ABCDEFGH) */ - uint32_t h_pad[8]; /* Pad fields to match T4_SHA512_CTX */ - uint32_t Nl, Nh; /* Number of bits, module 2^64 */ - uint32_t Nl_pad, Nh_pad; /* Pad fields to match T4_SHA512_CTX */ - uint32_t data[SHA_LBLOCK]; /* Input */ - unsigned int num, md_len; /* Fields unused by Solaris assembly */ -} T4_SHA256_CTX; -#endif /* !OPENSSL_NO_SHA256 */ - - -#ifndef OPENSSL_NO_SHA512 -/* - * The contents of this structure are a private interface between the - * Init/Update/Multiblock/Final functions. - * Callers must never attempt to read or write any of the fields - * in this structure directly. - */ -typedef struct t4_SHA512state_st { - uint32_t algotype; /* Solaris-only field; unused here */ - uint64_t h[8]; /* State (ABCDEFGH) */ - uint64_t Nl, Nh; /* Number of bits, module 2^128 */ - union { - uint64_t d[SHA_LBLOCK]; - unsigned char p[SHA512_CBLOCK]; - } u; /* Input */ - unsigned int num, md_len; /* Fields unused by Solaris assembly */ -} T4_SHA512_CTX; -#endif /* !OPENSSL_NO_SHA512 */ - -/* - * SPARC T4 assembly language functions - */ -#ifndef OPENSSL_NO_SHA256 -extern void t4_sha256_multiblock(T4_SHA256_CTX *c, const void *input, - size_t num); -#endif -#ifndef OPENSSL_NO_SHA512 -extern void t4_sha512_multiblock(T4_SHA512_CTX *c, const void *input, - size_t num); -#endif - -#ifdef __cplusplus -} -#endif -#endif /* ENG_T4_SHA2_ASM_H */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/eng_t4_sha512.c --- a/components/openssl/openssl-1.0.1/engines/t4/eng_t4_sha512.c Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,325 +0,0 @@ -/* - * This product includes cryptographic software developed by the OpenSSL - * Project for use in the OpenSSL Toolkit (http://www.openssl.org/). This - * product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). - */ - -/* - * ==================================================================== - * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * This engine supports SPARC microprocessors that provide AES and other - * cipher and hash instructions, such as the T4 microprocessor. - * - * This file implements the SHA-512 message digest operations. - */ - -#include - -#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_MD_T4) -#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512) -#include -#include -#include -#include -#include -/* - * Solaris sys/sha2.h and OpenSSL openssl/sha.h both define - * SHA512_CTX, SHA512_CTX, SHA512, SHA384, and SHA512. - * For SHA2, OpenSSL SHA512_CTX has extra num and md_len fields at - * the end and Solaris SHA2_CTX has an extra algotype field at the beginning. - */ -#include "eng_t4_sha2_asm.h" - -#if (defined(sun4v) || defined(__sparcv9) || defined(__sparcv8plus) || \ - defined(__sparcv8)) && !defined(OPENSSL_NO_ASM) -#define COMPILE_HW_T4 -#endif - -#ifdef COMPILE_HW_T4 - -/* Formal declaration for functions in EVP_MD structure */ -static int t4_digest_init_sha384(EVP_MD_CTX *ctx); -static int t4_digest_init_sha512(EVP_MD_CTX *ctx); -static int t4_digest_update_sha512(EVP_MD_CTX *ctx, const void *data, - size_t len); -static int t4_digest_final_sha512(EVP_MD_CTX *ctx, unsigned char *md); -static int t4_digest_copy_sha512(EVP_MD_CTX *to, const EVP_MD_CTX *from); - - -/* - * OpenSSL's libcrypto EVP stuff. This is how this engine gets wired to EVP. - * EVP_MD is defined in evp.h. To maintain binary compatibility the - * definition cannot be modified. - * Stuff specific to the t4 engine is kept in t4_cipher_ctx_t, which is - * pointed to by the last field, app_data. - * - * Fields: type, pkey_type, md_size, flags, - * init(), update(), final(), - * copy(), cleanup(), sign(), verify(), - * required_pkey_type, block_size, ctx_size, md5_ctrl() - */ - -const EVP_MD t4_sha512 = { - NID_sha512, NID_sha512WithRSAEncryption, SHA512_DIGEST_LENGTH, - EVP_MD_FLAG_PKEY_METHOD_SIGNATURE | EVP_MD_FLAG_DIGALGID_ABSENT, - t4_digest_init_sha512, t4_digest_update_sha512, t4_digest_final_sha512, - t4_digest_copy_sha512, NULL, - EVP_PKEY_RSA_method, SHA512_CBLOCK, - sizeof (T4_SHA512_CTX), NULL - }; -/* SHA-384 uses the same context, cblock size, & update function as SHA-512: */ -const EVP_MD t4_sha384 = { - NID_sha384, NID_sha384WithRSAEncryption, SHA384_DIGEST_LENGTH, - EVP_MD_FLAG_PKEY_METHOD_SIGNATURE | EVP_MD_FLAG_DIGALGID_ABSENT, - t4_digest_init_sha384, t4_digest_update_sha512, t4_digest_final_sha512, - t4_digest_copy_sha512, NULL, - EVP_PKEY_RSA_method, SHA512_CBLOCK, - sizeof (T4_SHA512_CTX), NULL - }; - - -/* - * SHA512 functions (part of FIPS 180-2 Secure Hash Standard) - */ - -static int -t4_digest_init_sha512(EVP_MD_CTX *ctx) -{ - T4_SHA512_CTX *c = (T4_SHA512_CTX *)ctx->md_data; - - c->Nl = c->Nh = c->num = 0; - c->h[0] = 0x6a09e667f3bcc908ULL; - c->h[1] = 0xbb67ae8584caa73bULL; - c->h[2] = 0x3c6ef372fe94f82bULL; - c->h[3] = 0xa54ff53a5f1d36f1ULL; - c->h[4] = 0x510e527fade682d1ULL; - c->h[5] = 0x9b05688c2b3e6c1fULL; - c->h[6] = 0x1f83d9abfb41bd6bULL; - c->h[7] = 0x5be0cd19137e2179ULL; - c->md_len = SHA512_DIGEST_LENGTH; - - return (1); -} - - -static int -t4_digest_init_sha384(EVP_MD_CTX *ctx) -{ - T4_SHA512_CTX *c = (T4_SHA512_CTX *)ctx->md_data; - - c->Nl = c->Nh = c->num = 0; - c->h[0] = 0xcbbb9d5dc1059ed8ULL; - c->h[1] = 0x629a292a367cd507ULL; - c->h[2] = 0x9159015a3070dd17ULL; - c->h[3] = 0x152fecd8f70e5939ULL; - c->h[4] = 0x67332667ffc00b31ULL; - c->h[5] = 0x8eb44a8768581511ULL; - c->h[6] = 0xdb0c2e0d64f98fa7ULL; - c->h[7] = 0x47b5481dbefa4fa4ULL; - c->md_len = SHA384_DIGEST_LENGTH; - - return (1); -} - - -/* - * Continue SHA512 or SHA384 digest operation, using the message block to - * update context. - * Modified from SHA512_Update() in OpenSSL crypto/sha/sha512.c. - */ -static int -t4_digest_update_sha512(EVP_MD_CTX *ctx, const void *_data, size_t len) -{ - T4_SHA512_CTX *c = (T4_SHA512_CTX *)ctx->md_data; - SHA_LONG64 l; - unsigned char *p = c->u.p; - const unsigned char *data = (const unsigned char *)_data; - - if (len == 0) - return (1); - - l = (c->Nl + (((SHA_LONG64)len) << 3)) & 0xffffffffffffffffULL; - if (l < c->Nl) - c->Nh++; - if (sizeof (len) >= 8) { - c->Nh += (((SHA_LONG64)len) >> 61); - } - c->Nl = l; - - if (c->num != 0) { - size_t n = sizeof (c->u) - c->num; - - if (len < n) { - memcpy(p + c->num, data, len); - c->num += (unsigned int)len; - return (1); - } else { - memcpy(p + c->num, data, n); - c->num = 0; - len -= n; - data += n; - t4_sha512_multiblock(c, p, 1); - } - } - - if (len >= sizeof (c->u)) { - if ((size_t)data % sizeof (c->u.d[0]) != 0) { - /* Align unaligned data one block-at-a-time */ - while (len >= sizeof (c->u)) { - memcpy(p, data, sizeof (c->u)); - t4_sha512_multiblock(c, p, 1); - len -= sizeof (c->u); - data += sizeof (c->u); - } - } else { - t4_sha512_multiblock(c, data, len / sizeof (c->u)); - data += len; - len %= sizeof (c->u); - data -= len; - } - } - - if (len != 0) { - memcpy(p, data, len); - c->num = (int)len; - } - - return (1); -} - - -/* - * End SHA-512 or SHA-384 digest operation, finalizing message digest and - * zeroing context. - * Modified from SHA512_Final() in OpenSSL crypto/sha/sha512.c. - */ -static int -t4_digest_final_sha512(EVP_MD_CTX *ctx, unsigned char *md) -{ - T4_SHA512_CTX *c = (T4_SHA512_CTX *)ctx->md_data; - unsigned char *p = (unsigned char *)c->u.p; - size_t n = c->num; - - p[n] = 0x80; /* There always is a room for one */ - n++; - if (n > (sizeof (c->u) - 16)) { - memset(p + n, 0, sizeof (c->u) - n); - n = 0; - t4_sha512_multiblock(c, p, 1); - } - - memset(p+n, 0, sizeof (c->u) - 16 - n); - c->u.d[SHA_LBLOCK - 2] = c->Nh; - c->u.d[SHA_LBLOCK - 1] = c->Nl; - - t4_sha512_multiblock(c, p, 1); - - if (md == 0) - return (0); - - switch (c->md_len) { - /* Let compiler decide if it's appropriate to unroll... */ - case SHA384_DIGEST_LENGTH: - for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) { - SHA_LONG64 t = c->h[n]; - - *(md++) = (unsigned char)(t >> 56); - *(md++) = (unsigned char)(t >> 48); - *(md++) = (unsigned char)(t >> 40); - *(md++) = (unsigned char)(t >> 32); - *(md++) = (unsigned char)(t >> 24); - *(md++) = (unsigned char)(t >> 16); - *(md++) = (unsigned char)(t >> 8); - *(md++) = (unsigned char)(t); - } - break; - case SHA512_DIGEST_LENGTH: - for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) { - SHA_LONG64 t = c->h[n]; - - *(md++) = (unsigned char)(t >> 56); - *(md++) = (unsigned char)(t >> 48); - *(md++) = (unsigned char)(t >> 40); - *(md++) = (unsigned char)(t >> 32); - *(md++) = (unsigned char)(t >> 24); - *(md++) = (unsigned char)(t >> 16); - *(md++) = (unsigned char)(t >> 8); - *(md++) = (unsigned char)(t); - } - break; - /* ... as well as make sure md_len is not abused. */ - default: - return (0); - } - - return (1); -} - - -/* Required for Engine API */ -static int -t4_digest_copy_sha512(EVP_MD_CTX *to, const EVP_MD_CTX *from) -{ - if ((to->md_data != NULL) && (from->md_data != NULL)) { - (void) memcpy(to->md_data, from->md_data, - sizeof (T4_SHA512_CTX)); - } - return (1); -} - -#endif /* COMPILE_HW_T4 */ -#endif /* !OPENSSL_NO_SHA && !OPENSSL_NO_SHA512 */ -#endif /* !OPENSSL_NO_HW && !OPENSSL_NO_HW_MD_T4 */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/t4_aes.S --- a/components/openssl/openssl-1.0.1/engines/t4/t4_aes.S Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3052 +0,0 @@ -/* - * ==================================================================== - * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - */ - -/*LINTLIBRARY*/ - -#if defined(lint) || defined(__lint) - - -#include - -/*ARGSUSED*/ -void t4_aes_expand128(uint64_t *rk, const uint32_t *key) -{ return; } - -/*ARGSUSED*/ -void t4_aes_expand192(uint64_t *rk, const uint32_t *key) -{ return; } - -/*ARGSUSED*/ -void t4_aes_expand256(uint64_t *rk, const uint32_t *key) -{ return; } - -void t4_aes128_load_keys_for_encrypt(uint64_t *ks) -{ return; } - -/*ARGSUSED*/ -void t4_aes192_load_keys_for_encrypt(uint64_t *ks) -{ return; } - -/*ARGSUSED*/ -void t4_aes256_load_keys_for_encrypt(uint64_t *ks) -{ return; } - -/*ARGSUSED*/ -void t4_aes128_ecb_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes192_ecb_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes256_ecb_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes128_cbc_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes192_cbc_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes256_cbc_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes128_ctr_crypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes192_ctr_crypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes256_ctr_crypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes128_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes192_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes256_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -void t4_aes128_load_keys_for_decrypt(uint64_t *ks) -{ return; } - -/*ARGSUSED*/ -void t4_aes192_load_keys_for_decrypt(uint64_t *ks) -{ return; } - -/*ARGSUSED*/ -void t4_aes256_load_keys_for_decrypt(uint64_t *ks) -{ return; } - -/*ARGSUSED*/ -void t4_aes128_ecb_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes192_ecb_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes256_ecb_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes128_cbc_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes192_cbc_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes256_cbc_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes128_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes192_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -/*ARGSUSED*/ -void t4_aes256_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in, - uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) -{ return; } - -#else /* lint || __lint */ - -#include - - - ENTRY(t4_aes_expand128) - -!load key - ld [%o1], %f0 - ld [%o1 + 0x4], %f1 - ld [%o1 + 0x8], %f2 - ld [%o1 + 0xc], %f3 - -!expand the key - !aes_kexpand1 %f0, %f2, 0x0, %f4 - !aes_kexpand2 %f2, %f4, %f6 - !aes_kexpand1 %f4, %f6, 0x1, %f8 - !aes_kexpand2 %f6, %f8, %f10 - !aes_kexpand1 %f8, %f10, 0x2, %f12 - !aes_kexpand2 %f10, %f12, %f14 - !aes_kexpand1 %f12, %f14, 0x3, %f16 - !aes_kexpand2 %f14, %f16, %f18 - !aes_kexpand1 %f16, %f18, 0x4, %f20 - !aes_kexpand2 %f18, %f20, %f22 - !aes_kexpand1 %f20, %f22, 0x5, %f24 - !aes_kexpand2 %f22, %f24, %f26 - !aes_kexpand1 %f24, %f26, 0x6, %f28 - !aes_kexpand2 %f26, %f28, %f30 - !aes_kexpand1 %f28, %f30, 0x7, %f32 - !aes_kexpand2 %f30, %f32, %f34 - !aes_kexpand1 %f32, %f34, 0x8, %f36 - !aes_kexpand2 %f34, %f36, %f38 - !aes_kexpand1 %f36, %f38, 0x9, %f40 - !aes_kexpand2 %f38, %f40, %f42 - .byte 0x88, 0xc8, 0x01, 0x02 - .byte 0x8d, 0xb0, 0xa6, 0x24 - .byte 0x90, 0xc9, 0x03, 0x06 - .byte 0x95, 0xb1, 0xa6, 0x28 - .byte 0x98, 0xca, 0x05, 0x0a - .byte 0x9d, 0xb2, 0xa6, 0x2c - .byte 0xa0, 0xcb, 0x07, 0x0e - .byte 0xa5, 0xb3, 0xa6, 0x30 - .byte 0xa8, 0xcc, 0x09, 0x12 - .byte 0xad, 0xb4, 0xa6, 0x34 - .byte 0xb0, 0xcd, 0x0b, 0x16 - .byte 0xb5, 0xb5, 0xa6, 0x38 - .byte 0xb8, 0xce, 0x0d, 0x1a - .byte 0xbd, 0xb6, 0xa6, 0x3c - .byte 0x82, 0xcf, 0x0f, 0x1e - .byte 0x87, 0xb7, 0xa6, 0x21 - .byte 0x8a, 0xc8, 0x51, 0x03 - .byte 0x8f, 0xb0, 0xe6, 0x25 - .byte 0x92, 0xc9, 0x53, 0x07 - .byte 0x97, 0xb1, 0xe6, 0x29 - -!copy expanded key back into array - std %f4, [%o0] - std %f6, [%o0 + 0x8] - std %f8, [%o0 + 0x10] - std %f10, [%o0 + 0x18] - std %f12, [%o0 + 0x20] - std %f14, [%o0 + 0x28] - std %f16, [%o0 + 0x30] - std %f18, [%o0 + 0x38] - std %f20, [%o0 + 0x40] - std %f22, [%o0 + 0x48] - std %f24, [%o0 + 0x50] - std %f26, [%o0 + 0x58] - std %f28, [%o0 + 0x60] - std %f30, [%o0 + 0x68] - std %f32, [%o0 + 0x70] - std %f34, [%o0 + 0x78] - std %f36, [%o0 + 0x80] - std %f38, [%o0 + 0x88] - std %f40, [%o0 + 0x90] - retl - std %f42, [%o0 + 0x98] - - SET_SIZE(t4_aes_expand128) - - - ENTRY(t4_aes_expand192) - -!load key - ld [%o1], %f0 - ld [%o1 + 0x4], %f1 - ld [%o1 + 0x8], %f2 - ld [%o1 + 0xc], %f3 - ld [%o1 + 0x10], %f4 - ld [%o1 + 0x14], %f5 - -!expand the key - !aes_kexpand1 %f0, %f4, 0x0, %f6 - !aes_kexpand2 %f2, %f6, %f8 - !aes_kexpand2 %f4, %f8, %f10 - - !aes_kexpand1 %f6, %f10, 0x1, %f12 - !aes_kexpand2 %f8, %f12, %f14 - !aes_kexpand2 %f10, %f14, %f16 - - !aes_kexpand1 %f12, %f16, 0x2, %f18 - !aes_kexpand2 %f14, %f18, %f20 - !aes_kexpand2 %f16, %f20, %f22 - - !aes_kexpand1 %f18, %f22, 0x3, %f24 - !aes_kexpand2 %f20, %f24, %f26 - !aes_kexpand2 %f22, %f26, %f28 - - !aes_kexpand1 %f24, %f28, 0x4, %f30 - !aes_kexpand2 %f26, %f30, %f32 - !aes_kexpand2 %f28, %f32, %f34 - - !aes_kexpand1 %f30, %f34, 0x5, %f36 - !aes_kexpand2 %f32, %f36, %f38 - !aes_kexpand2 %f34, %f38, %f40 - - !aes_kexpand1 %f36, %f40, 0x6, %f42 - !aes_kexpand2 %f38, %f42, %f44 - !aes_kexpand2 %f40, %f44, %f46 - - !aes_kexpand1 %f42, %f46, 0x7, %f48 - !aes_kexpand2 %f44, %f48, %f50 - .byte 0x8c, 0xc8, 0x01, 0x04 - .byte 0x91, 0xb0, 0xa6, 0x26 - .byte 0x95, 0xb1, 0x26, 0x28 - .byte 0x98, 0xc9, 0x83, 0x0a - .byte 0x9d, 0xb2, 0x26, 0x2c - .byte 0xa1, 0xb2, 0xa6, 0x2e - .byte 0xa4, 0xcb, 0x05, 0x10 - .byte 0xa9, 0xb3, 0xa6, 0x32 - .byte 0xad, 0xb4, 0x26, 0x34 - .byte 0xb0, 0xcc, 0x87, 0x16 - .byte 0xb5, 0xb5, 0x26, 0x38 - .byte 0xb9, 0xb5, 0xa6, 0x3a - .byte 0xbc, 0xce, 0x09, 0x1c - .byte 0x83, 0xb6, 0xa6, 0x3e - .byte 0x87, 0xb7, 0x26, 0x21 - .byte 0x8a, 0xcf, 0x8b, 0x03 - .byte 0x8f, 0xb0, 0x66, 0x25 - .byte 0x93, 0xb0, 0xe6, 0x27 - .byte 0x96, 0xc9, 0x4d, 0x09 - .byte 0x9b, 0xb1, 0xe6, 0x2b - .byte 0x9f, 0xb2, 0x66, 0x2d - .byte 0xa2, 0xca, 0xcf, 0x0f - .byte 0xa7, 0xb3, 0x66, 0x31 - -!copy expanded key back into array - std %f6, [%o0] - std %f8, [%o0 + 0x8] - std %f10, [%o0 + 0x10] - std %f12, [%o0 + 0x18] - std %f14, [%o0 + 0x20] - std %f16, [%o0 + 0x28] - std %f18, [%o0 + 0x30] - std %f20, [%o0 + 0x38] - std %f22, [%o0 + 0x40] - std %f24, [%o0 + 0x48] - std %f26, [%o0 + 0x50] - std %f28, [%o0 + 0x58] - std %f30, [%o0 + 0x60] - std %f32, [%o0 + 0x68] - std %f34, [%o0 + 0x70] - std %f36, [%o0 + 0x78] - std %f38, [%o0 + 0x80] - std %f40, [%o0 + 0x88] - std %f42, [%o0 + 0x90] - std %f44, [%o0 + 0x98] - std %f46, [%o0 + 0xa0] - std %f48, [%o0 + 0xa8] - retl - std %f50, [%o0 + 0xb0] - - SET_SIZE(t4_aes_expand192) - - - ENTRY(t4_aes_expand256) - -!load key - ld [%o1], %f0 - ld [%o1 + 0x4], %f1 - ld [%o1 + 0x8], %f2 - ld [%o1 + 0xc], %f3 - ld [%o1 + 0x10], %f4 - ld [%o1 + 0x14], %f5 - ld [%o1 + 0x18], %f6 - ld [%o1 + 0x1c], %f7 - -!expand the key - !aes_kexpand1 %f0, %f6, 0x0, %f8 - !aes_kexpand2 %f2, %f8, %f10 - !aes_kexpand0 %f4, %f10, %f12 - !aes_kexpand2 %f6, %f12, %f14 - - !aes_kexpand1 %f8, %f14, 0x1, %f16 - !aes_kexpand2 %f10, %f16, %f18 - !aes_kexpand0 %f12, %f18, %f20 - !aes_kexpand2 %f14, %f20, %f22 - - !aes_kexpand1 %f16, %f22, 0x2, %f24 - !aes_kexpand2 %f18, %f24, %f26 - !aes_kexpand0 %f20, %f26, %f28 - !aes_kexpand2 %f22, %f28, %f30 - - !aes_kexpand1 %f24, %f30, 0x3, %f32 - !aes_kexpand2 %f26, %f32, %f34 - !aes_kexpand0 %f28, %f34, %f36 - !aes_kexpand2 %f30, %f36, %f38 - - !aes_kexpand1 %f32, %f38, 0x4, %f40 - !aes_kexpand2 %f34, %f40, %f42 - !aes_kexpand0 %f36, %f42, %f44 - !aes_kexpand2 %f38, %f44, %f46 - - !aes_kexpand1 %f40, %f46, 0x5, %f48 - !aes_kexpand2 %f42, %f48, %f50 - !aes_kexpand0 %f44, %f50, %f52 - !aes_kexpand2 %f46, %f52, %f54 - - !aes_kexpand1 %f48, %f54, 0x6, %f56 - !aes_kexpand2 %f50, %f56, %f58 - .byte 0x90, 0xc8, 0x01, 0x06 - .byte 0x95, 0xb0, 0xa6, 0x28 - .byte 0x99, 0xb1, 0x26, 0x0a - .byte 0x9d, 0xb1, 0xa6, 0x2c - .byte 0xa0, 0xca, 0x03, 0x0e - .byte 0xa5, 0xb2, 0xa6, 0x30 - .byte 0xa9, 0xb3, 0x26, 0x12 - .byte 0xad, 0xb3, 0xa6, 0x34 - .byte 0xb0, 0xcc, 0x05, 0x16 - .byte 0xb5, 0xb4, 0xa6, 0x38 - .byte 0xb9, 0xb5, 0x26, 0x1a - .byte 0xbd, 0xb5, 0xa6, 0x3c - .byte 0x82, 0xce, 0x07, 0x1e - .byte 0x87, 0xb6, 0xa6, 0x21 - .byte 0x8b, 0xb7, 0x26, 0x03 - .byte 0x8f, 0xb7, 0xa6, 0x25 - .byte 0x92, 0xc8, 0x49, 0x07 - .byte 0x97, 0xb0, 0xe6, 0x29 - .byte 0x9b, 0xb1, 0x66, 0x0b - .byte 0x9f, 0xb1, 0xe6, 0x2d - .byte 0xa2, 0xca, 0x4b, 0x0f - .byte 0xa7, 0xb2, 0xe6, 0x31 - .byte 0xab, 0xb3, 0x66, 0x13 - .byte 0xaf, 0xb3, 0xe6, 0x35 - .byte 0xb2, 0xcc, 0x4d, 0x17 - .byte 0xb7, 0xb4, 0xe6, 0x39 - -!copy expanded key back into array - std %f8, [%o0] - std %f10, [%o0 + 0x8] - std %f12, [%o0 + 0x10] - std %f14, [%o0 + 0x18] - std %f16, [%o0 + 0x20] - std %f18, [%o0 + 0x28] - std %f20, [%o0 + 0x30] - std %f22, [%o0 + 0x38] - std %f24, [%o0 + 0x40] - std %f26, [%o0 + 0x48] - std %f28, [%o0 + 0x50] - std %f30, [%o0 + 0x58] - std %f32, [%o0 + 0x60] - std %f34, [%o0 + 0x68] - std %f36, [%o0 + 0x70] - std %f38, [%o0 + 0x78] - std %f40, [%o0 + 0x80] - std %f42, [%o0 + 0x88] - std %f44, [%o0 + 0x90] - std %f46, [%o0 + 0x98] - std %f48, [%o0 + 0xa0] - std %f50, [%o0 + 0xa8] - std %f52, [%o0 + 0xb0] - std %f54, [%o0 + 0xb8] - std %f56, [%o0 + 0xc0] - retl - std %f58, [%o0 + 0xc8] - - SET_SIZE(t4_aes_expand256) - - -#define FIRST_TWO_EROUNDS \ - .byte 0xb2, 0xc8, 0x3e, 0x1d ; \ - .byte 0xb6, 0xc8, 0xbe, 0x3d ; \ - .byte 0xba, 0xc9, 0x36, 0x19 ; \ - .byte 0xbe, 0xc9, 0xb6, 0x39 - !aes_eround01 %f0, %f60, %f62, %f56 ; \ - !aes_eround23 %f2, %f60, %f62, %f58 ; \ - !aes_eround01 %f4, %f56, %f58, %f60 ; \ - !aes_eround23 %f6, %f56, %f58, %f62 - -#define MID_TWO_EROUNDS \ - .byte 0xb2, 0xca, 0x3e, 0x1d ; \ - .byte 0xb6, 0xca, 0xbe, 0x3d ; \ - .byte 0xba, 0xcb, 0x36, 0x19 ; \ - .byte 0xbe, 0xcb, 0xb6, 0x39 - !aes_eround01 %f8, %f60, %f62, %f56 ; \ - !aes_eround23 %f10, %f60, %f62, %f58 ; \ - !aes_eround01 %f12, %f56, %f58, %f60 ; \ - !aes_eround23 %f14, %f56, %f58, %f62 - -#define MID_TWO_EROUNDS_2 \ - .byte 0x8c, 0xca, 0x04, 0x00 ; \ - .byte 0x88, 0xca, 0x84, 0x20 ; \ - .byte 0xb2, 0xca, 0x3e, 0x1d ; \ - .byte 0xb6, 0xca, 0xbe, 0x3d ; \ - .byte 0x80, 0xcb, 0x08, 0x06 ; \ - .byte 0x84, 0xcb, 0x88, 0x26 ; \ - .byte 0xba, 0xcb, 0x36, 0x19 ; \ - .byte 0xbe, 0xcb, 0xb6, 0x39 - !aes_eround01 %f8, %f0, %f2, %f6 ; \ - !aes_eround23 %f10, %f0, %f2, %f4 ; \ - !aes_eround01 %f8, %f60, %f62, %f56 ; \ - !aes_eround23 %f10, %f60, %f62, %f58 ; \ - !aes_eround01 %f12, %f6, %f4, %f0 ; \ - !aes_eround23 %f14, %f6, %f4, %f2 ; \ - !aes_eround01 %f12, %f56, %f58, %f60 ; \ - !aes_eround23 %f14, %f56, %f58, %f62 - -#define TEN_EROUNDS \ - .byte 0xb2, 0xcc, 0x3e, 0x1d ; \ - .byte 0xb6, 0xcc, 0xbe, 0x3d ; \ - .byte 0xba, 0xcd, 0x36, 0x19 ; \ - .byte 0xbe, 0xcd, 0xb6, 0x39 ; \ - .byte 0xb2, 0xce, 0x3e, 0x1d ; \ - .byte 0xb6, 0xce, 0xbe, 0x3d ; \ - .byte 0xba, 0xcf, 0x36, 0x19 ; \ - .byte 0xbe, 0xcf, 0xb6, 0x39 ; \ - .byte 0xb2, 0xc8, 0x7e, 0x1d ; \ - .byte 0xb6, 0xc8, 0xfe, 0x3d ; \ - .byte 0xba, 0xc9, 0x76, 0x19 ; \ - .byte 0xbe, 0xc9, 0xf6, 0x39 ; \ - .byte 0xb2, 0xca, 0x7e, 0x1d ; \ - .byte 0xb6, 0xca, 0xfe, 0x3d ; \ - .byte 0xba, 0xcb, 0x76, 0x19 ; \ - .byte 0xbe, 0xcb, 0xf6, 0x39 ; \ - .byte 0xb2, 0xcc, 0x7e, 0x1d ; \ - .byte 0xb6, 0xcc, 0xfe, 0x3d ; \ - .byte 0xba, 0xcd, 0x76, 0x99 ; \ - .byte 0xbe, 0xcd, 0xf6, 0xb9 - !aes_eround01 %f16, %f60, %f62, %f56 ; \ - !aes_eround23 %f18, %f60, %f62, %f58 ; \ - !aes_eround01 %f20, %f56, %f58, %f60 ; \ - !aes_eround23 %f22, %f56, %f58, %f62 ; \ - !aes_eround01 %f24, %f60, %f62, %f56 ; \ - !aes_eround23 %f26, %f60, %f62, %f58 ; \ - !aes_eround01 %f28, %f56, %f58, %f60 ; \ - !aes_eround23 %f30, %f56, %f58, %f62 ; \ - !aes_eround01 %f32, %f60, %f62, %f56 ; \ - !aes_eround23 %f34, %f60, %f62, %f58 ; \ - !aes_eround01 %f36, %f56, %f58, %f60 ; \ - !aes_eround23 %f38, %f56, %f58, %f62 ; \ - !aes_eround01 %f40, %f60, %f62, %f56 ; \ - !aes_eround23 %f42, %f60, %f62, %f58 ; \ - !aes_eround01 %f44, %f56, %f58, %f60 ; \ - !aes_eround23 %f46, %f56, %f58, %f62 ; \ - !aes_eround01 %f48, %f60, %f62, %f56 ; \ - !aes_eround23 %f50, %f60, %f62, %f58 ; \ - !aes_eround01_l %f52, %f56, %f58, %f60 ; \ - !aes_eround23_l %f54, %f56, %f58, %f62 - -#define TEN_EROUNDS_2 \ - .byte 0x8c, 0xcc, 0x04, 0x00 ; \ - .byte 0x88, 0xcc, 0x84, 0x20 ; \ - .byte 0xb2, 0xcc, 0x3e, 0x1d ; \ - .byte 0xb6, 0xcc, 0xbe, 0x3d ; \ - .byte 0x80, 0xcd, 0x08, 0x06 ; \ - .byte 0x84, 0xcd, 0x88, 0x26 ; \ - .byte 0xba, 0xcd, 0x36, 0x19 ; \ - .byte 0xbe, 0xcd, 0xb6, 0x39 ; \ - .byte 0x8c, 0xce, 0x04, 0x00 ; \ - .byte 0x88, 0xce, 0x84, 0x20 ; \ - .byte 0xb2, 0xce, 0x3e, 0x1d ; \ - .byte 0xb6, 0xce, 0xbe, 0x3d ; \ - .byte 0x80, 0xcf, 0x08, 0x06 ; \ - .byte 0x84, 0xcf, 0x88, 0x26 ; \ - .byte 0xba, 0xcf, 0x36, 0x19 ; \ - .byte 0xbe, 0xcf, 0xb6, 0x39 ; \ - .byte 0x8c, 0xc8, 0x44, 0x00 ; \ - .byte 0x88, 0xc8, 0xc4, 0x20 ; \ - .byte 0xb2, 0xc8, 0x7e, 0x1d ; \ - .byte 0xb6, 0xc8, 0xfe, 0x3d ; \ - .byte 0x80, 0xc9, 0x48, 0x06 ; \ - .byte 0x84, 0xc9, 0xc8, 0x26 ; \ - .byte 0xba, 0xc9, 0x76, 0x19 ; \ - .byte 0xbe, 0xc9, 0xf6, 0x39 ; \ - .byte 0x8c, 0xca, 0x44, 0x00 ; \ - .byte 0x88, 0xca, 0xc4, 0x20 ; \ - .byte 0xb2, 0xca, 0x7e, 0x1d ; \ - .byte 0xb6, 0xca, 0xfe, 0x3d ; \ - .byte 0x80, 0xcb, 0x48, 0x06 ; \ - .byte 0x84, 0xcb, 0xc8, 0x26 ; \ - .byte 0xba, 0xcb, 0x76, 0x19 ; \ - .byte 0xbe, 0xcb, 0xf6, 0x39 ; \ - .byte 0x8c, 0xcc, 0x44, 0x00 ; \ - .byte 0x88, 0xcc, 0xc4, 0x20 ; \ - .byte 0xb2, 0xcc, 0x7e, 0x1d ; \ - .byte 0xb6, 0xcc, 0xfe, 0x3d ; \ - .byte 0x80, 0xcd, 0x48, 0x86 ; \ - .byte 0x84, 0xcd, 0xc8, 0xa6 ; \ - .byte 0xba, 0xcd, 0x76, 0x99 ; \ - .byte 0xbe, 0xcd, 0xf6, 0xb9 - !aes_eround01 %f16, %f0, %f2, %f6 ; \ - !aes_eround23 %f18, %f0, %f2, %f4 ; \ - !aes_eround01 %f16, %f60, %f62, %f56 ; \ - !aes_eround23 %f18, %f60, %f62, %f58 ; \ - !aes_eround01 %f20, %f6, %f4, %f0 ; \ - !aes_eround23 %f22, %f6, %f4, %f2 ; \ - !aes_eround01 %f20, %f56, %f58, %f60 ; \ - !aes_eround23 %f22, %f56, %f58, %f62 ; \ - !aes_eround01 %f24, %f0, %f2, %f6 ; \ - !aes_eround23 %f26, %f0, %f2, %f4 ; \ - !aes_eround01 %f24, %f60, %f62, %f56 ; \ - !aes_eround23 %f26, %f60, %f62, %f58 ; \ - !aes_eround01 %f28, %f6, %f4, %f0 ; \ - !aes_eround23 %f30, %f6, %f4, %f2 ; \ - !aes_eround01 %f28, %f56, %f58, %f60 ; \ - !aes_eround23 %f30, %f56, %f58, %f62 ; \ - !aes_eround01 %f32, %f0, %f2, %f6 ; \ - !aes_eround23 %f34, %f0, %f2, %f4 ; \ - !aes_eround01 %f32, %f60, %f62, %f56 ; \ - !aes_eround23 %f34, %f60, %f62, %f58 ; \ - !aes_eround01 %f36, %f6, %f4, %f0 ; \ - !aes_eround23 %f38, %f6, %f4, %f2 ; \ - !aes_eround01 %f36, %f56, %f58, %f60 ; \ - !aes_eround23 %f38, %f56, %f58, %f62 ; \ - !aes_eround01 %f40, %f0, %f2, %f6 ; \ - !aes_eround23 %f42, %f0, %f2, %f4 ; \ - !aes_eround01 %f40, %f60, %f62, %f56 ; \ - !aes_eround23 %f42, %f60, %f62, %f58 ; \ - !aes_eround01 %f44, %f6, %f4, %f0 ; \ - !aes_eround23 %f46, %f6, %f4, %f2 ; \ - !aes_eround01 %f44, %f56, %f58, %f60 ; \ - !aes_eround23 %f46, %f56, %f58, %f62 ; \ - !aes_eround01 %f48, %f0, %f2, %f6 ; \ - !aes_eround23 %f50, %f0, %f2, %f4 ; \ - !aes_eround01 %f48, %f60, %f62, %f56 ; \ - !aes_eround23 %f50, %f60, %f62, %f58 ; \ - !aes_eround01_l %f52, %f6, %f4, %f0 ; \ - !aes_eround23_l %f54, %f6, %f4, %f2 ; \ - !aes_eround01_l %f52, %f56, %f58, %f60 ; \ - !aes_eround23_l %f54, %f56, %f58, %f62 - -#define TWELVE_EROUNDS \ - MID_TWO_EROUNDS ; \ - TEN_EROUNDS - -#define TWELVE_EROUNDS_2 \ - MID_TWO_EROUNDS_2 ; \ - TEN_EROUNDS_2 - -#define FOURTEEN_EROUNDS \ - FIRST_TWO_EROUNDS ; \ - TWELVE_EROUNDS - -#define FOURTEEN_EROUNDS_2 \ - .byte 0xb0, 0xc8, 0x2c, 0x14 ; \ - .byte 0xac, 0xc8, 0xac, 0x34 ; \ - ldd [%o0 + 0x60], %f20 ; \ - .byte 0xb2, 0xc8, 0x3e, 0x1d ; \ - .byte 0xb6, 0xc8, 0xbe, 0x3d ; \ - .byte 0x80, 0xc9, 0x2c, 0x18 ; \ - .byte 0x84, 0xc9, 0xac, 0x38 ;\ - ldd [%o0 + 0x68], %f22 ; \ - .byte 0xba, 0xc9, 0x36, 0x19 ; \ - ldd [%o0 + 0x70], %f24 ; \ - .byte 0xbe, 0xc9, 0xb6, 0x39 ; \ - .byte 0x8c, 0xca, 0x04, 0x00 ; \ - .byte 0x88, 0xca, 0x84, 0x20 ; \ - .byte 0xb2, 0xca, 0x3e, 0x1d ; \ - .byte 0xb6, 0xca, 0xbe, 0x3d ; \ - .byte 0x80, 0xcb, 0x08, 0x06 ; \ - .byte 0x84, 0xcb, 0x88, 0x26 ; \ - .byte 0xba, 0xcb, 0x36, 0x19 ; \ - .byte 0xbe, 0xcb, 0xb6, 0x39 ; \ - .byte 0x8c, 0xcc, 0x04, 0x00 ; \ - .byte 0x88, 0xcc, 0x84, 0x20 ; \ - .byte 0xb2, 0xcc, 0x3e, 0x1d ; \ - .byte 0xb6, 0xcc, 0xbe, 0x3d ; \ - .byte 0x80, 0xcd, 0x08, 0x06 ; \ - .byte 0x84, 0xcd, 0x88, 0x26 ; \ - .byte 0xba, 0xcd, 0x36, 0x19 ; \ - .byte 0xbe, 0xcd, 0xb6, 0x39 ; \ - .byte 0x8c, 0xce, 0x04, 0x00 ; \ - .byte 0x88, 0xce, 0x84, 0x20 ; \ - .byte 0xb2, 0xce, 0x3e, 0x1d ; \ - .byte 0xb6, 0xce, 0xbe, 0x3d ; \ - .byte 0x80, 0xcf, 0x08, 0x06 ; \ - .byte 0x84, 0xcf, 0x88, 0x26 ; \ - .byte 0xba, 0xcf, 0x36, 0x19 ; \ - .byte 0xbe, 0xcf, 0xb6, 0x39 ; \ - .byte 0x8c, 0xc8, 0x44, 0x00 ; \ - .byte 0x88, 0xc8, 0xc4, 0x20 ; \ - .byte 0xb2, 0xc8, 0x7e, 0x1d ; \ - .byte 0xb6, 0xc8, 0xfe, 0x3d ; \ - .byte 0x80, 0xc9, 0x48, 0x06 ; \ - .byte 0x84, 0xc9, 0xc8, 0x26 ; \ - .byte 0xba, 0xc9, 0x76, 0x19 ; \ - .byte 0xbe, 0xc9, 0xf6, 0x39 ; \ - .byte 0x8c, 0xca, 0x44, 0x00 ; \ - .byte 0x88, 0xca, 0xc4, 0x20 ; \ - .byte 0xb2, 0xca, 0x7e, 0x1d ; \ - .byte 0xb6, 0xca, 0xfe, 0x3d ; \ - .byte 0x80, 0xcb, 0x48, 0x06 ; \ - .byte 0x84, 0xcb, 0xc8, 0x26 ; \ - .byte 0xba, 0xcb, 0x76, 0x19 ; \ - .byte 0xbe, 0xcb, 0xf6, 0x39 ; \ - .byte 0x8c, 0xcc, 0x44, 0x00 ; \ - .byte 0x88, 0xcc, 0xc4, 0x20 ; \ - ldd [%o0 + 0x10], %f0 ; \ - .byte 0xb2, 0xcc, 0x7e, 0x1d ; \ - ldd [%o0 + 0x18], %f2 ; \ - .byte 0xb6, 0xcc, 0xfe, 0x3d ; \ - .byte 0xa8, 0xcd, 0x48, 0x86 ; \ - .byte 0xac, 0xcd, 0xc8, 0xa6 ; \ - ldd [%o0 + 0x20], %f4 ; \ - .byte 0xba, 0xcd, 0x76, 0x99 ; \ - ldd [%o0 + 0x28], %f6 ; \ - .byte 0xbe, 0xcd, 0xf6, 0xb9 - !aes_eround01 %f0, %f20, %f22, %f24 ; \ - !aes_eround23 %f2, %f20, %f22, %f22 ; \ - !ldd [%o0 + 0x60], %f20 ; \ - !aes_eround01 %f0, %f60, %f62, %f56 ; \ - !aes_eround23 %f2, %f60, %f62, %f58 ; \ - !aes_eround01 %f4, %f24, %f22, %f0 ; \ - !aes_eround23 %f6, %f24, %f22, %f2 ; \ - !ldd [%o0 + 0x68], %f22 ; \ - !aes_eround01 %f4, %f56, %f58, %f60 ; \ - !ldd [%o0 + 0x70], %f24 ; \ - !aes_eround23 %f6, %f56, %f58, %f62 ; \ - !aes_eround01 %f8, %f0, %f2, %f6 ; \ - !aes_eround23 %f10, %f0, %f2, %f4 ; \ - !aes_eround01 %f8, %f60, %f62, %f56 ; \ - !aes_eround23 %f10, %f60, %f62, %f58 ; \ - !aes_eround01 %f12, %f6, %f4, %f0 ; \ - !aes_eround23 %f14, %f6, %f4, %f2 ; \ - !aes_eround01 %f12, %f56, %f58, %f60 ; \ - !aes_eround23 %f14, %f56, %f58, %f62 ; \ - !aes_eround01 %f16, %f0, %f2, %f6 ; \ - !aes_eround23 %f18, %f0, %f2, %f4 ; \ - !aes_eround01 %f16, %f60, %f62, %f56 ; \ - !aes_eround23 %f18, %f60, %f62, %f58 ; \ - !aes_eround01 %f20, %f6, %f4, %f0 ; \ - !aes_eround23 %f22, %f6, %f4, %f2 ; \ - !aes_eround01 %f20, %f56, %f58, %f60 ; \ - !aes_eround23 %f22, %f56, %f58, %f62 ; \ - !aes_eround01 %f24, %f0, %f2, %f6 ; \ - !aes_eround23 %f26, %f0, %f2, %f4 ; \ - !aes_eround01 %f24, %f60, %f62, %f56 ; \ - !aes_eround23 %f26, %f60, %f62, %f58 ; \ - !aes_eround01 %f28, %f6, %f4, %f0 ; \ - !aes_eround23 %f30, %f6, %f4, %f2 ; \ - !aes_eround01 %f28, %f56, %f58, %f60 ; \ - !aes_eround23 %f30, %f56, %f58, %f62 ; \ - !aes_eround01 %f32, %f0, %f2, %f6 ; \ - !aes_eround23 %f34, %f0, %f2, %f4 ; \ - !aes_eround01 %f32, %f60, %f62, %f56 ; \ - !aes_eround23 %f34, %f60, %f62, %f58 ; \ - !aes_eround01 %f36, %f6, %f4, %f0 ; \ - !aes_eround23 %f38, %f6, %f4, %f2 ; \ - !aes_eround01 %f36, %f56, %f58, %f60 ; \ - !aes_eround23 %f38, %f56, %f58, %f62 ; \ - !aes_eround01 %f40, %f0, %f2, %f6 ; \ - !aes_eround23 %f42, %f0, %f2, %f4 ; \ - !aes_eround01 %f40, %f60, %f62, %f56 ; \ - !aes_eround23 %f42, %f60, %f62, %f58 ; \ - !aes_eround01 %f44, %f6, %f4, %f0 ; \ - !aes_eround23 %f46, %f6, %f4, %f2 ; \ - !aes_eround01 %f44, %f56, %f58, %f60 ; \ - !aes_eround23 %f46, %f56, %f58, %f62 ; \ - !aes_eround01 %f48, %f0, %f2, %f6 ; \ - !aes_eround23 %f50, %f0, %f2, %f4 ; \ - !ldd [%o0 + 0x10], %f0 ; \ - !aes_eround01 %f48, %f60, %f62, %f56 ; \ - !ldd [%o0 + 0x18], %f2 ; \ - !aes_eround23 %f50, %f60, %f62, %f58 ; \ - !aes_eround01_l %f52, %f6, %f4, %f20 ; \ - !aes_eround23_l %f54, %f6, %f4, %f22 ; \ - !ldd [%o0 + 0x20], %f4 ; \ - !aes_eround01_l %f52, %f56, %f58, %f60 ; \ - !ldd [%o0 + 0x28], %f6 ; \ - !aes_eround23_l %f54, %f56, %f58, %f62 - -#define FIRST_TWO_DROUNDS \ - .byte 0xb2, 0xc8, 0x3e, 0x5d ; \ - .byte 0xb6, 0xc8, 0xbe, 0x7d ; \ - .byte 0xba, 0xc9, 0x36, 0x59 ; \ - .byte 0xbe, 0xc9, 0xb6, 0x79 - !aes_dround01 %f0, %f60, %f62, %f56 ; \ - !aes_dround23 %f2, %f60, %f62, %f58 ; \ - !aes_dround01 %f4, %f56, %f58, %f60 ; \ - !aes_dround23 %f6, %f56, %f58, %f62 - -#define MID_TWO_DROUNDS \ - .byte 0xb2, 0xca, 0x3e, 0x5d ; \ - .byte 0xb6, 0xca, 0xbe, 0x7d ; \ - .byte 0xba, 0xcb, 0x36, 0x59 ; \ - .byte 0xbe, 0xcb, 0xb6, 0x79 - !aes_dround01 %f8, %f60, %f62, %f56 ; \ - !aes_dround23 %f10, %f60, %f62, %f58 ; \ - !aes_dround01 %f12, %f56, %f58, %f60 ; \ - !aes_dround23 %f14, %f56, %f58, %f62 - -#define MID_TWO_DROUNDS_2 \ - .byte 0x8c, 0xca, 0x04, 0x40 ; \ - .byte 0x88, 0xca, 0x84, 0x60 ; \ - .byte 0xb2, 0xca, 0x3e, 0x5d ; \ - .byte 0xb6, 0xca, 0xbe, 0x7d ; \ - .byte 0x80, 0xcb, 0x08, 0x46 ; \ - .byte 0x84, 0xcb, 0x88, 0x66 ; \ - .byte 0xba, 0xcb, 0x36, 0x59 ; \ - .byte 0xbe, 0xcb, 0xb6, 0x79 - !aes_dround01 %f8, %f0, %f2, %f6 ; \ - !aes_dround23 %f10, %f0, %f2, %f4 ; \ - !aes_dround01 %f8, %f60, %f62, %f56 ; \ - !aes_dround23 %f10, %f60, %f62, %f58 ; \ - !aes_dround01 %f12, %f6, %f4, %f0 ; \ - !aes_dround23 %f14, %f6, %f4, %f2 ; \ - !aes_dround01 %f12, %f56, %f58, %f60 ; \ - !aes_dround23 %f14, %f56, %f58, %f62 - -#define TEN_DROUNDS \ - .byte 0xb2, 0xcc, 0x3e, 0x5d ; \ - .byte 0xb6, 0xcc, 0xbe, 0x7d ; \ - .byte 0xba, 0xcd, 0x36, 0x59 ; \ - .byte 0xbe, 0xcd, 0xb6, 0x79 ; \ - .byte 0xb2, 0xce, 0x3e, 0x5d ; \ - .byte 0xb6, 0xce, 0xbe, 0x7d ; \ - .byte 0xba, 0xcf, 0x36, 0x59 ; \ - .byte 0xbe, 0xcf, 0xb6, 0x79 ; \ - .byte 0xb2, 0xc8, 0x7e, 0x5d ; \ - .byte 0xb6, 0xc8, 0xfe, 0x7d ; \ - .byte 0xba, 0xc9, 0x76, 0x59 ; \ - .byte 0xbe, 0xc9, 0xf6, 0x79 ; \ - .byte 0xb2, 0xca, 0x7e, 0x5d ; \ - .byte 0xb6, 0xca, 0xfe, 0x7d ; \ - .byte 0xba, 0xcb, 0x76, 0x59 ; \ - .byte 0xbe, 0xcb, 0xf6, 0x79 ; \ - .byte 0xb2, 0xcc, 0x7e, 0x5d ; \ - .byte 0xb6, 0xcc, 0xfe, 0x7d ; \ - .byte 0xba, 0xcd, 0x76, 0xd9 ; \ - .byte 0xbe, 0xcd, 0xf6, 0xf9 - !aes_dround01 %f16, %f60, %f62, %f56 ; \ - !aes_dround23 %f18, %f60, %f62, %f58 ; \ - !aes_dround01 %f20, %f56, %f58, %f60 ; \ - !aes_dround23 %f22, %f56, %f58, %f62 ; \ - !aes_dround01 %f24, %f60, %f62, %f56 ; \ - !aes_dround23 %f26, %f60, %f62, %f58 ; \ - !aes_dround01 %f28, %f56, %f58, %f60 ; \ - !aes_dround23 %f30, %f56, %f58, %f62 ; \ - !aes_dround01 %f32, %f60, %f62, %f56 ; \ - !aes_dround23 %f34, %f60, %f62, %f58 ; \ - !aes_dround01 %f36, %f56, %f58, %f60 ; \ - !aes_dround23 %f38, %f56, %f58, %f62 ; \ - !aes_dround01 %f40, %f60, %f62, %f56 ; \ - !aes_dround23 %f42, %f60, %f62, %f58 ; \ - !aes_dround01 %f44, %f56, %f58, %f60 ; \ - !aes_dround23 %f46, %f56, %f58, %f62 ; \ - !aes_dround01 %f48, %f60, %f62, %f56 ; \ - !aes_dround23 %f50, %f60, %f62, %f58 ; \ - !aes_dround01_l %f52, %f56, %f58, %f60 ; \ - !aes_dround23_l %f54, %f56, %f58, %f62 - -#define TEN_DROUNDS_2 \ - .byte 0x8c, 0xcc, 0x04, 0x40 ; \ - .byte 0x88, 0xcc, 0x84, 0x60 ; \ - .byte 0xb2, 0xcc, 0x3e, 0x5d ; \ - .byte 0xb6, 0xcc, 0xbe, 0x7d ; \ - .byte 0x80, 0xcd, 0x08, 0x46 ; \ - .byte 0x84, 0xcd, 0x88, 0x66 ; \ - .byte 0xba, 0xcd, 0x36, 0x59 ; \ - .byte 0xbe, 0xcd, 0xb6, 0x79 ; \ - .byte 0x8c, 0xce, 0x04, 0x40 ; \ - .byte 0x88, 0xce, 0x84, 0x60 ; \ - .byte 0xb2, 0xce, 0x3e, 0x5d ; \ - .byte 0xb6, 0xce, 0xbe, 0x7d ; \ - .byte 0x80, 0xcf, 0x08, 0x46 ; \ - .byte 0x84, 0xcf, 0x88, 0x66 ; \ - .byte 0xba, 0xcf, 0x36, 0x59 ; \ - .byte 0xbe, 0xcf, 0xb6, 0x79 ; \ - .byte 0x8c, 0xc8, 0x44, 0x40 ; \ - .byte 0x88, 0xc8, 0xc4, 0x60 ; \ - .byte 0xb2, 0xc8, 0x7e, 0x5d ; \ - .byte 0xb6, 0xc8, 0xfe, 0x7d ; \ - .byte 0x80, 0xc9, 0x48, 0x46 ; \ - .byte 0x84, 0xc9, 0xc8, 0x66 ; \ - .byte 0xba, 0xc9, 0x76, 0x59 ; \ - .byte 0xbe, 0xc9, 0xf6, 0x79 ; \ - .byte 0x8c, 0xca, 0x44, 0x40 ; \ - .byte 0x88, 0xca, 0xc4, 0x60 ; \ - .byte 0xb2, 0xca, 0x7e, 0x5d ; \ - .byte 0xb6, 0xca, 0xfe, 0x7d ; \ - .byte 0x80, 0xcb, 0x48, 0x46 ; \ - .byte 0x84, 0xcb, 0xc8, 0x66 ; \ - .byte 0xba, 0xcb, 0x76, 0x59 ; \ - .byte 0xbe, 0xcb, 0xf6, 0x79 ; \ - .byte 0x8c, 0xcc, 0x44, 0x40 ; \ - .byte 0x88, 0xcc, 0xc4, 0x60 ; \ - .byte 0xb2, 0xcc, 0x7e, 0x5d ; \ - .byte 0xb6, 0xcc, 0xfe, 0x7d ; \ - .byte 0x80, 0xcd, 0x48, 0xc6 ; \ - .byte 0x84, 0xcd, 0xc8, 0xe6 ; \ - .byte 0xba, 0xcd, 0x76, 0xd9 ; \ - .byte 0xbe, 0xcd, 0xf6, 0xf9 - !aes_dround01 %f16, %f0, %f2, %f6 ; \ - !aes_dround23 %f18, %f0, %f2, %f4 ; \ - !aes_dround01 %f16, %f60, %f62, %f56 ; \ - !aes_dround23 %f18, %f60, %f62, %f58 ; \ - !aes_dround01 %f20, %f6, %f4, %f0 ; \ - !aes_dround23 %f22, %f6, %f4, %f2 ; \ - !aes_dround01 %f20, %f56, %f58, %f60 ; \ - !aes_dround23 %f22, %f56, %f58, %f62 ; \ - !aes_dround01 %f24, %f0, %f2, %f6 ; \ - !aes_dround23 %f26, %f0, %f2, %f4 ; \ - !aes_dround01 %f24, %f60, %f62, %f56 ; \ - !aes_dround23 %f26, %f60, %f62, %f58 ; \ - !aes_dround01 %f28, %f6, %f4, %f0 ; \ - !aes_dround23 %f30, %f6, %f4, %f2 ; \ - !aes_dround01 %f28, %f56, %f58, %f60 ; \ - !aes_dround23 %f30, %f56, %f58, %f62 ; \ - !aes_dround01 %f32, %f0, %f2, %f6 ; \ - !aes_dround23 %f34, %f0, %f2, %f4 ; \ - !aes_dround01 %f32, %f60, %f62, %f56 ; \ - !aes_dround23 %f34, %f60, %f62, %f58 ; \ - !aes_dround01 %f36, %f6, %f4, %f0 ; \ - !aes_dround23 %f38, %f6, %f4, %f2 ; \ - !aes_dround01 %f36, %f56, %f58, %f60 ; \ - !aes_dround23 %f38, %f56, %f58, %f62 ; \ - !aes_dround01 %f40, %f0, %f2, %f6 ; \ - !aes_dround23 %f42, %f0, %f2, %f4 ; \ - !aes_dround01 %f40, %f60, %f62, %f56 ; \ - !aes_dround23 %f42, %f60, %f62, %f58 ; \ - !aes_dround01 %f44, %f6, %f4, %f0 ; \ - !aes_dround23 %f46, %f6, %f4, %f2 ; \ - !aes_dround01 %f44, %f56, %f58, %f60 ; \ - !aes_dround23 %f46, %f56, %f58, %f62 ; \ - !aes_dround01 %f48, %f0, %f2, %f6 ; \ - !aes_dround23 %f50, %f0, %f2, %f4 ; \ - !aes_dround01 %f48, %f60, %f62, %f56 ; \ - !aes_dround23 %f50, %f60, %f62, %f58 ; \ - !aes_dround01_l %f52, %f6, %f4, %f0 ; \ - !aes_dround23_l %f54, %f6, %f4, %f2 ; \ - !aes_dround01_l %f52, %f56, %f58, %f60 ; \ - !aes_dround23_l %f54, %f56, %f58, %f62 - -#define TWELVE_DROUNDS \ - MID_TWO_DROUNDS ; \ - TEN_DROUNDS - -#define TWELVE_DROUNDS_2 \ - MID_TWO_DROUNDS_2 ; \ - TEN_DROUNDS_2 - -#define FOURTEEN_DROUNDS \ - FIRST_TWO_DROUNDS ; \ - TWELVE_DROUNDS - -#define FOURTEEN_DROUNDS_2 \ - .byte 0xb0, 0xc8, 0x2c, 0x54 ; \ - .byte 0xac, 0xc8, 0xac, 0x74 ; \ - ldd [%o0 + 0x80], %f20 ; \ - .byte 0xb2, 0xc8, 0x3e, 0x5d ; \ - .byte 0xb6, 0xc8, 0xbe, 0x7d ; \ - .byte 0x80, 0xc9, 0x2c, 0x58 ; \ - .byte 0x84, 0xc9, 0xac, 0x78 ; \ - ldd [%o0 + 0x88], %f22 ; \ - .byte 0xba, 0xc9, 0x36, 0x59 ; \ - ldd [%o0 + 0x70], %f24 ; \ - .byte 0xbe, 0xc9, 0xb6, 0x79 ; \ - .byte 0x8c, 0xca, 0x04, 0x40 ; \ - .byte 0x88, 0xca, 0x84, 0x60 ; \ - .byte 0xb2, 0xca, 0x3e, 0x5d ; \ - .byte 0xb6, 0xca, 0xbe, 0x7d ; \ - .byte 0x80, 0xcb, 0x08, 0x46 ; \ - .byte 0x84, 0xcb, 0x88, 0x66 ; \ - .byte 0xba, 0xcb, 0x36, 0x59 ; \ - .byte 0xbe, 0xcb, 0xb6, 0x79 ; \ - .byte 0x8c, 0xcc, 0x04, 0x40 ; \ - .byte 0x88, 0xcc, 0x84, 0x60 ; \ - .byte 0xb2, 0xcc, 0x3e, 0x5d ; \ - .byte 0xb6, 0xcc, 0xbe, 0x7d ; \ - .byte 0x80, 0xcd, 0x08, 0x46 ; \ - .byte 0x84, 0xcd, 0x88, 0x66 ; \ - .byte 0xba, 0xcd, 0x36, 0x59 ; \ - .byte 0xbe, 0xcd, 0xb6, 0x79 ; \ - .byte 0x8c, 0xce, 0x04, 0x40 ; \ - .byte 0x88, 0xce, 0x84, 0x60 ; \ - .byte 0xb2, 0xce, 0x3e, 0x5d ; \ - .byte 0xb6, 0xce, 0xbe, 0x7d ; \ - .byte 0x80, 0xcf, 0x08, 0x46 ; \ - .byte 0x84, 0xcf, 0x88, 0x66 ; \ - .byte 0xba, 0xcf, 0x36, 0x59 ; \ - .byte 0xbe, 0xcf, 0xb6, 0x79 ; \ - .byte 0x8c, 0xc8, 0x44, 0x40 ; \ - .byte 0x88, 0xc8, 0xc4, 0x60 ; \ - .byte 0xb2, 0xc8, 0x7e, 0x5d ; \ - .byte 0xb6, 0xc8, 0xfe, 0x7d ; \ - .byte 0x80, 0xc9, 0x48, 0x46 ; \ - .byte 0x84, 0xc9, 0xc8, 0x66 ; \ - .byte 0xba, 0xc9, 0x76, 0x59 ; \ - .byte 0xbe, 0xc9, 0xf6, 0x79 ; \ - .byte 0x8c, 0xca, 0x44, 0x40 ; \ - .byte 0x88, 0xca, 0xc4, 0x60 ; \ - .byte 0xb2, 0xca, 0x7e, 0x5d ; \ - .byte 0xb6, 0xca, 0xfe, 0x7d ; \ - .byte 0x80, 0xcb, 0x48, 0x46 ; \ - .byte 0x84, 0xcb, 0xc8, 0x66 ; \ - .byte 0xba, 0xcb, 0x76, 0x59 ; \ - .byte 0xbe, 0xcb, 0xf6, 0x79 ; \ - .byte 0x8c, 0xcc, 0x44, 0x40 ; \ - .byte 0x88, 0xcc, 0xc4, 0x60 ; \ - ldd [%o0 + 0xd0], %f0 ; \ - .byte 0xb2, 0xcc, 0x7e, 0x5d ; \ - ldd [%o0 + 0xd8], %f2 ; \ - .byte 0xb6, 0xcc, 0xfe, 0x7d ; \ - .byte 0xa8, 0xcd, 0x48, 0xc6 ; \ - .byte 0xac, 0xcd, 0xc8, 0xe6 ; \ - ldd [%o0 + 0xc0], %f4 ; \ - .byte 0xba, 0xcd, 0x76, 0xd9 ; \ - ldd [%o0 + 0xc8], %f6 ; \ - .byte 0xbe, 0xcd, 0xf6, 0xf9 - !aes_dround01 %f0, %f20, %f22, %f24 ; \ - !aes_dround23 %f2, %f20, %f22, %f22 ; \ - !ldd [%o0 + 0x80], %f20 ; \ - !aes_dround01 %f0, %f60, %f62, %f56 ; \ - !aes_dround23 %f2, %f60, %f62, %f58 ; \ - !aes_dround01 %f4, %f24, %f22, %f0 ; \ - !aes_dround23 %f6, %f24, %f22, %f2 ; \ - !ldd [%o0 + 0x88], %f22 ; \ - !aes_dround01 %f4, %f56, %f58, %f60 ; \ - !ldd [%o0 + 0x70], %f24 ; \ - !aes_dround23 %f6, %f56, %f58, %f62 ; \ - !aes_dround01 %f8, %f0, %f2, %f6 ; \ - !aes_dround23 %f10, %f0, %f2, %f4 ; \ - !aes_dround01 %f8, %f60, %f62, %f56 ; \ - !aes_dround23 %f10, %f60, %f62, %f58 ; \ - !aes_dround01 %f12, %f6, %f4, %f0 ; \ - !aes_dround23 %f14, %f6, %f4, %f2 ; \ - !aes_dround01 %f12, %f56, %f58, %f60 ; \ - !aes_dround23 %f14, %f56, %f58, %f62 ; \ - !aes_dround01 %f16, %f0, %f2, %f6 ; \ - !aes_dround23 %f18, %f0, %f2, %f4 ; \ - !aes_dround01 %f16, %f60, %f62, %f56 ; \ - !aes_dround23 %f18, %f60, %f62, %f58 ; \ - !aes_dround01 %f20, %f6, %f4, %f0 ; \ - !aes_dround23 %f22, %f6, %f4, %f2 ; \ - !aes_dround01 %f20, %f56, %f58, %f60 ; \ - !aes_dround23 %f22, %f56, %f58, %f62 ; \ - !aes_dround01 %f24, %f0, %f2, %f6 ; \ - !aes_dround23 %f26, %f0, %f2, %f4 ; \ - !aes_dround01 %f24, %f60, %f62, %f56 ; \ - !aes_dround23 %f26, %f60, %f62, %f58 ; \ - !aes_dround01 %f28, %f6, %f4, %f0 ; \ - !aes_dround23 %f30, %f6, %f4, %f2 ; \ - !aes_dround01 %f28, %f56, %f58, %f60 ; \ - !aes_dround23 %f30, %f56, %f58, %f62 ; \ - !aes_dround01 %f32, %f0, %f2, %f6 ; \ - !aes_dround23 %f34, %f0, %f2, %f4 ; \ - !aes_dround01 %f32, %f60, %f62, %f56 ; \ - !aes_dround23 %f34, %f60, %f62, %f58 ; \ - !aes_dround01 %f36, %f6, %f4, %f0 ; \ - !aes_dround23 %f38, %f6, %f4, %f2 ; \ - !aes_dround01 %f36, %f56, %f58, %f60 ; \ - !aes_dround23 %f38, %f56, %f58, %f62 ; \ - !aes_dround01 %f40, %f0, %f2, %f6 ; \ - !aes_dround23 %f42, %f0, %f2, %f4 ; \ - !aes_dround01 %f40, %f60, %f62, %f56 ; \ - !aes_dround23 %f42, %f60, %f62, %f58 ; \ - !aes_dround01 %f44, %f6, %f4, %f0 ; \ - !aes_dround23 %f46, %f6, %f4, %f2 ; \ - !aes_dround01 %f44, %f56, %f58, %f60 ; \ - !aes_dround23 %f46, %f56, %f58, %f62 ; \ - !aes_dround01 %f48, %f0, %f2, %f6 ; \ - !aes_dround23 %f50, %f0, %f2, %f4 ; \ - !ldd [%o0 + 0xd0], %f0 ; \ - !aes_dround01 %f48, %f60, %f62, %f56 ; \ - !ldd [%o0 + 0xd8], %f2 ; \ - !aes_dround23 %f50, %f60, %f62, %f58 ; \ - !aes_dround01_l %f52, %f6, %f4, %f20 ; \ - !aes_dround23_l %f54, %f6, %f4, %f22 ; \ - !ldd [%o0 + 0xc0], %f4 ; \ - !aes_dround01_l %f52, %f56, %f58, %f60 ; \ - !ldd [%o0 + 0xc8], %f6 ; \ - !aes_dround23_l %f54, %f56, %f58, %f62 - - - ENTRY(t4_aes128_load_keys_for_encrypt) - - ldd [%o0 + 0x10], %f16 - ldd [%o0 + 0x18], %f18 - ldd [%o0 + 0x20], %f20 - ldd [%o0 + 0x28], %f22 - ldd [%o0 + 0x30], %f24 - ldd [%o0 + 0x38], %f26 - ldd [%o0 + 0x40], %f28 - ldd [%o0 + 0x48], %f30 - ldd [%o0 + 0x50], %f32 - ldd [%o0 + 0x58], %f34 - ldd [%o0 + 0x60], %f36 - ldd [%o0 + 0x68], %f38 - ldd [%o0 + 0x70], %f40 - ldd [%o0 + 0x78], %f42 - ldd [%o0 + 0x80], %f44 - ldd [%o0 + 0x88], %f46 - ldd [%o0 + 0x90], %f48 - ldd [%o0 + 0x98], %f50 - ldd [%o0 + 0xa0], %f52 - retl - ldd [%o0 + 0xa8], %f54 - - SET_SIZE(t4_aes128_load_keys_for_encrypt) - - - ENTRY(t4_aes192_load_keys_for_encrypt) - - ldd [%o0 + 0x10], %f8 - ldd [%o0 + 0x18], %f10 - ldd [%o0 + 0x20], %f12 - ldd [%o0 + 0x28], %f14 - ldd [%o0 + 0x30], %f16 - ldd [%o0 + 0x38], %f18 - ldd [%o0 + 0x40], %f20 - ldd [%o0 + 0x48], %f22 - ldd [%o0 + 0x50], %f24 - ldd [%o0 + 0x58], %f26 - ldd [%o0 + 0x60], %f28 - ldd [%o0 + 0x68], %f30 - ldd [%o0 + 0x70], %f32 - ldd [%o0 + 0x78], %f34 - ldd [%o0 + 0x80], %f36 - ldd [%o0 + 0x88], %f38 - ldd [%o0 + 0x90], %f40 - ldd [%o0 + 0x98], %f42 - ldd [%o0 + 0xa0], %f44 - ldd [%o0 + 0xa8], %f46 - ldd [%o0 + 0xb0], %f48 - ldd [%o0 + 0xb8], %f50 - ldd [%o0 + 0xc0], %f52 - retl - ldd [%o0 + 0xc8], %f54 - - SET_SIZE(t4_aes192_load_keys_for_encrypt) - - - ENTRY(t4_aes256_load_keys_for_encrypt) - - ldd [%o0 + 0x10], %f0 - ldd [%o0 + 0x18], %f2 - ldd [%o0 + 0x20], %f4 - ldd [%o0 + 0x28], %f6 - ldd [%o0 + 0x30], %f8 - ldd [%o0 + 0x38], %f10 - ldd [%o0 + 0x40], %f12 - ldd [%o0 + 0x48], %f14 - ldd [%o0 + 0x50], %f16 - ldd [%o0 + 0x58], %f18 - ldd [%o0 + 0x60], %f20 - ldd [%o0 + 0x68], %f22 - ldd [%o0 + 0x70], %f24 - ldd [%o0 + 0x78], %f26 - ldd [%o0 + 0x80], %f28 - ldd [%o0 + 0x88], %f30 - ldd [%o0 + 0x90], %f32 - ldd [%o0 + 0x98], %f34 - ldd [%o0 + 0xa0], %f36 - ldd [%o0 + 0xa8], %f38 - ldd [%o0 + 0xb0], %f40 - ldd [%o0 + 0xb8], %f42 - ldd [%o0 + 0xc0], %f44 - ldd [%o0 + 0xc8], %f46 - ldd [%o0 + 0xd0], %f48 - ldd [%o0 + 0xd8], %f50 - ldd [%o0 + 0xe0], %f52 - retl - ldd [%o0 + 0xe8], %f54 - - SET_SIZE(t4_aes256_load_keys_for_encrypt) - - -#define TEST_PARALLEL_ECB_ENCRYPT -#ifdef TEST_PARALLEL_ECB_ENCRYPT - ENTRY(t4_aes128_ecb_encrypt) - - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - and %o3, 16, %o4 - brz %o4, ecbenc128_loop - nop - - ldx [%o1], %g3 !input - ldx [%o1 + 8], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f60 - movxtod %g4, %f62 - - TEN_EROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - be ecbenc128_loop_end - add %o2, 16, %o2 - -ecbenc128_loop: - ldx [%o1], %g3 !input - ldx [%o1 + 8], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f0 - movxtod %g4, %f2 - ldx [%o1 + 16], %g3 !input - ldx [%o1 + 24], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f60 - movxtod %g4, %f62 - - TEN_EROUNDS_2 - - std %f0, [%o2] - std %f2, [%o2 + 8] - - std %f60, [%o2 + 16] - std %f62, [%o2 + 24] - - add %o1, 32, %o1 - subcc %o3, 32, %o3 - bne ecbenc128_loop - add %o2, 32, %o2 -ecbenc128_loop_end: - retl - nop - - SET_SIZE(t4_aes128_ecb_encrypt) - - - ENTRY(t4_aes192_ecb_encrypt) - - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - and %o3, 16, %o4 - brz %o4, ecbenc192_loop - nop - - ldx [%o1], %g3 !input - ldx [%o1 + 8], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f60 - movxtod %g4, %f62 - - TWELVE_EROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - be ecbenc192_loop_end - add %o2, 16, %o2 - -ecbenc192_loop: - ldx [%o1], %g3 !input - ldx [%o1 + 8], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f0 - movxtod %g4, %f2 - ldx [%o1 + 16], %g3 !input - ldx [%o1 + 24], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f60 - movxtod %g4, %f62 - - TWELVE_EROUNDS_2 - - std %f0, [%o2] - std %f2, [%o2 + 8] - - std %f60, [%o2 + 16] - std %f62, [%o2 + 24] - - add %o1, 32, %o1 - subcc %o3, 32, %o3 - bne ecbenc192_loop - add %o2, 32, %o2 -ecbenc192_loop_end: - retl - nop - - SET_SIZE(t4_aes192_ecb_encrypt) - - - ENTRY(t4_aes256_ecb_encrypt) - - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - and %o3, 16, %o4 - brz %o4, ecbenc256_loop - nop - - ldx [%o1], %g3 !input - ldx [%o1 + 8], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f60 - movxtod %g4, %f62 - - FOURTEEN_EROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - be ecbenc256_loop_end - add %o2, 16, %o2 - -ecbenc256_loop: - ldx [%o1], %g3 !input - ldx [%o1 + 8], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f20 - movxtod %g4, %f22 - ldx [%o1 + 16], %g3 !input - ldx [%o1 + 24], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f60 - movxtod %g4, %f62 - - FOURTEEN_EROUNDS_2 - - std %f20, [%o2] - std %f22, [%o2 + 8] - - std %f60, [%o2 + 16] - std %f62, [%o2 + 24] - - add %o1, 32, %o1 - subcc %o3, 32, %o3 - bne ecbenc256_loop - add %o2, 32, %o2 - - ldd [%o0 + 0x60], %f20 - ldd [%o0 + 0x68], %f22 - -ecbenc256_loop_end: - retl - nop - - SET_SIZE(t4_aes256_ecb_encrypt) - -#else - - ENTRY(t4_aes128_ecb_encrypt) - - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -ecbenc128_loop: - ldx [%o1], %g3 !input - ldx [%o1 + 8], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f60 - movxtod %g4, %f62 - - TEN_EROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne ecbenc128_loop - add %o2, 16, %o2 - - retl - nop - - SET_SIZE(t4_aes128_ecb_encrypt) - - - ENTRY(t4_aes192_ecb_encrypt) - - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -ecbenc192_loop: - ldx [%o1], %g3 !input - ldx [%o1 + 8], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f60 - movxtod %g4, %f62 - - TWELVE_EROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne ecbenc192_loop - add %o2, 16, %o2 - - retl - nop - - SET_SIZE(t4_aes192_ecb_encrypt) - - - ENTRY(t4_aes256_ecb_encrypt) - - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -ecbenc256_loop: - ldx [%o1], %g3 !input - ldx [%o1 + 8], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f60 - movxtod %g4, %f62 - - FOURTEEN_EROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne ecbenc256_loop - add %o2, 16, %o2 - - retl - nop - - SET_SIZE(t4_aes256_ecb_encrypt) -#endif - - - ENTRY(t4_aes128_cbc_encrypt) - - ldd [%o4], %f60 ! IV - ldd [%o4 +8], %f62 ! IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -cbcenc128_loop: - ldx [%o1], %g3 !input - ldx [%o1 + 8], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f56 - movxtod %g4, %f58 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - TEN_EROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne cbcenc128_loop - add %o2, 16, %o2 - - std %f60, [%o4] - retl - std %f62, [%o4 + 8] - - SET_SIZE(t4_aes128_cbc_encrypt) - - - ENTRY(t4_aes192_cbc_encrypt) - - ldd [%o4], %f60 ! IV - ldd [%o4 + 8], %f62 ! IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -cbcenc192_loop: - ldx [%o1], %g3 !input - ldx [%o1 + 8], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f56 - movxtod %g4, %f58 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - TWELVE_EROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne cbcenc192_loop - add %o2, 16, %o2 - - std %f60, [%o4] - retl - std %f62, [%o4 + 8] - - SET_SIZE(t4_aes192_cbc_encrypt) - - - ENTRY(t4_aes256_cbc_encrypt) - - ldd [%o4], %f60 ! IV - ldd [%o4 + 8], %f62 ! IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -cbcenc256_loop: - ldx [%o1], %g3 !input - ldx [%o1 + 8], %g4 !input - xor %g1, %g3, %g3 !input ^ ks[0-1] - xor %g2, %g4, %g4 !input ^ ks[0-1] - movxtod %g3, %f56 - movxtod %g4, %f58 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - FOURTEEN_EROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne cbcenc256_loop - add %o2, 16, %o2 - - std %f60, [%o4] - retl - std %f62, [%o4 + 8] - - SET_SIZE(t4_aes256_cbc_encrypt) - - -#define TEST_PARALLEL_CTR_CRYPT -#ifdef TEST_PARALLEL_CTR_CRYPT - ENTRY(t4_aes128_ctr_crypt) - - ldx [%o4], %g3 ! IV - ldx [%o4 +8], %g4 ! IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - and %o3, 16, %g5 - brz, %g5, ctr128_loop - - xor %g1, %g3, %g5 - movxtod %g5, %f60 - xor %g2, %g4, %g5 - movxtod %g5, %f62 - inc %g4 - - TEN_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - be ctr128_loop_end - add %o2, 16, %o2 - -ctr128_loop: - xor %g1, %g3, %g5 - movxtod %g5, %f0 - xor %g2, %g4, %g5 - movxtod %g5, %f2 - inc %g4 - - xor %g1, %g3, %g5 - movxtod %g5, %f60 - xor %g2, %g4, %g5 - movxtod %g5, %f62 - inc %g4 - - TEN_EROUNDS_2 - - ldd [%o1], %f6 !input - ldd [%o1 + 8], %f4 !input - ldd [%o1 + 16], %f56 !input - ldd [%o1 + 24], %f58 !input - fxor %f0, %f6, %f0 - fxor %f2, %f4, %f2 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - std %f0, [%o2] - std %f2, [%o2 + 8] - std %f60, [%o2 + 16] - std %f62, [%o2 + 24] - - add %o1, 32, %o1 - subcc %o3, 32, %o3 - bne ctr128_loop - add %o2, 32, %o2 - -ctr128_loop_end: - stx %g3, [%o4] - retl - stx %g4, [%o4 + 8] - - SET_SIZE(t4_aes128_ctr_crypt) - - - ENTRY(t4_aes192_ctr_crypt) - - ldx [%o4], %g3 ! IV - ldx [%o4 +8], %g4 ! IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - and %o3, 16, %g5 - brz, %g5, ctr192_loop - - xor %g1, %g3, %g5 - movxtod %g5, %f60 - xor %g2, %g4, %g5 - movxtod %g5, %f62 - inc %g4 - - TWELVE_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - be ctr192_loop_end - add %o2, 16, %o2 - -ctr192_loop: - xor %g1, %g3, %g5 - movxtod %g5, %f0 - xor %g2, %g4, %g5 - movxtod %g5, %f2 - inc %g4 - - xor %g1, %g3, %g5 - movxtod %g5, %f60 - xor %g2, %g4, %g5 - movxtod %g5, %f62 - inc %g4 - - TWELVE_EROUNDS_2 - - ldd [%o1], %f6 !input - ldd [%o1 + 8], %f4 !input - ldd [%o1 + 16], %f56 !input - ldd [%o1 + 24], %f58 !input - fxor %f0, %f6, %f0 - fxor %f2, %f4, %f2 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - std %f0, [%o2] - std %f2, [%o2 + 8] - std %f60, [%o2 + 16] - std %f62, [%o2 + 24] - - add %o1, 32, %o1 - subcc %o3, 32, %o3 - bne ctr192_loop - add %o2, 32, %o2 - -ctr192_loop_end: - stx %g3, [%o4] - retl - stx %g4, [%o4 + 8] - - SET_SIZE(t4_aes192_ctr_crypt) - - - ENTRY(t4_aes256_ctr_crypt) - - ldx [%o4], %g3 ! IV - ldx [%o4 +8], %g4 ! IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - and %o3, 16, %g5 - brz, %g5, ctr256_loop - - xor %g1, %g3, %g5 - movxtod %g5, %f60 - xor %g2, %g4, %g5 - movxtod %g5, %f62 - inc %g4 - - FOURTEEN_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - be ctr256_loop_end - add %o2, 16, %o2 - -ctr256_loop: - xor %g1, %g3, %g5 - movxtod %g5, %f20 - xor %g2, %g4, %g5 - movxtod %g5, %f22 - inc %g4 - - xor %g1, %g3, %g5 - movxtod %g5, %f60 - xor %g2, %g4, %g5 - movxtod %g5, %f62 - inc %g4 - - FOURTEEN_EROUNDS_2 - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f20, %f56, %f20 - fxor %f22, %f58, %f22 - ldd [%o1 + 16], %f56 !input - ldd [%o1 + 24], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - std %f20, [%o2] - std %f22, [%o2 + 8] - std %f60, [%o2 + 16] - std %f62, [%o2 + 24] - - add %o1, 32, %o1 - subcc %o3, 32, %o3 - bne ctr256_loop - add %o2, 32, %o2 - - ldd [%o0 + 0x60], %f20 - ldd [%o0 + 0x68], %f22 - -ctr256_loop_end: - stx %g3, [%o4] - retl - stx %g4, [%o4 + 8] - - SET_SIZE(t4_aes256_ctr_crypt) - -#else - - ENTRY(t4_aes128_ctr_crypt) - - ldx [%o4], %g3 ! IV - ldx [%o4 +8], %g4 ! IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -ctr128_loop: - xor %g1, %g3, %g5 - movxtod %g5, %f60 - xor %g2, %g4, %g5 - movxtod %g5, %f62 - inc %g4 - - TEN_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne ctr128_loop - add %o2, 16, %o2 - - stx %g3, [%o4] - retl - stx %g4, [%o4 + 8] - - SET_SIZE(t4_aes128_ctr_crypt) - - ENTRY(t4_aes192_ctr_crypt) - - ldx [%o4], %g3 ! IV - ldx [%o4 +8], %g4 ! IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -ctr192_loop: - xor %g1, %g3, %g5 - movxtod %g5, %f60 - xor %g2, %g4, %g5 - movxtod %g5, %f62 - inc %g4 - - TWELVE_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne ctr192_loop - add %o2, 16, %o2 - - stx %g3, [%o4] - retl - stx %g4, [%o4 + 8] - - SET_SIZE(t4_aes192_ctr_crypt) - - - ENTRY(t4_aes256_ctr_crypt) - - ldx [%o4], %g3 ! IV - ldx [%o4 +8], %g4 ! IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -ctr256_loop: - xor %g1, %g3, %g5 - movxtod %g5, %f60 - xor %g2, %g4, %g5 - movxtod %g5, %f62 - inc %g4 - - FOURTEEN_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne ctr256_loop - add %o2, 16, %o2 - - stx %g3, [%o4] - retl - stx %g4, [%o4 + 8] - - SET_SIZE(t4_aes256_ctr_crypt) - -#endif - - ENTRY(t4_aes128_cfb128_encrypt) - - ldd [%o4], %f60 ! IV - ldd [%o4 +8], %f62 ! IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -cfb128_128_loop: - movxtod %g1, %f56 - movxtod %g2, %f58 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - TEN_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne cfb128_128_loop - add %o2, 16, %o2 - - std %f60, [%o4] - retl - std %f62, [%o4 + 8] - - SET_SIZE(t4_aes128_cfb128_encrypt) - - - ENTRY(t4_aes192_cfb128_encrypt) - - ldd [%o4], %f60 ! IV - ldd [%o4 +8], %f62 ! IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -cfb128_192_loop: - movxtod %g1, %f56 - movxtod %g2, %f58 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - TWELVE_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne cfb128_192_loop - add %o2, 16, %o2 - - std %f60, [%o4] - retl - std %f62, [%o4 + 8] - - SET_SIZE(t4_aes192_cfb128_encrypt) - - - ENTRY(t4_aes256_cfb128_encrypt) - - ldd [%o4], %f60 ! IV - ldd [%o4 +8], %f62 ! IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -cfb128_256_loop: - movxtod %g1, %f56 - movxtod %g2, %f58 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - FOURTEEN_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne cfb128_256_loop - add %o2, 16, %o2 - - std %f60, [%o4] - retl - std %f62, [%o4 + 8] - - SET_SIZE(t4_aes256_cfb128_encrypt) - - - ENTRY(t4_aes128_load_keys_for_decrypt) - - ldd [%o0], %f52 - ldd [%o0 + 0x8], %f54 - ldd [%o0 + 0x10], %f48 - ldd [%o0 + 0x18], %f50 - ldd [%o0 + 0x20], %f44 - ldd [%o0 + 0x28], %f46 - ldd [%o0 + 0x30], %f40 - ldd [%o0 + 0x38], %f42 - ldd [%o0 + 0x40], %f36 - ldd [%o0 + 0x48], %f38 - ldd [%o0 + 0x50], %f32 - ldd [%o0 + 0x58], %f34 - ldd [%o0 + 0x60], %f28 - ldd [%o0 + 0x68], %f30 - ldd [%o0 + 0x70], %f24 - ldd [%o0 + 0x78], %f26 - ldd [%o0 + 0x80], %f20 - ldd [%o0 + 0x88], %f22 - ldd [%o0 + 0x90], %f16 - retl - ldd [%o0 + 0x98], %f18 - - SET_SIZE(t4_aes128_load_keys_for_decrypt) - - - ENTRY(t4_aes192_load_keys_for_decrypt) - - ldd [%o0], %f52 - ldd [%o0 + 0x8], %f54 - ldd [%o0 + 0x10], %f48 - ldd [%o0 + 0x18], %f50 - ldd [%o0 + 0x20], %f44 - ldd [%o0 + 0x28], %f46 - ldd [%o0 + 0x30], %f40 - ldd [%o0 + 0x38], %f42 - ldd [%o0 + 0x40], %f36 - ldd [%o0 + 0x48], %f38 - ldd [%o0 + 0x50], %f32 - ldd [%o0 + 0x58], %f34 - ldd [%o0 + 0x60], %f28 - ldd [%o0 + 0x68], %f30 - ldd [%o0 + 0x70], %f24 - ldd [%o0 + 0x78], %f26 - ldd [%o0 + 0x80], %f20 - ldd [%o0 + 0x88], %f22 - ldd [%o0 + 0x90], %f16 - ldd [%o0 + 0x98], %f18 - ldd [%o0 + 0xa0], %f12 - ldd [%o0 + 0xa8], %f14 - ldd [%o0 + 0xb0], %f8 - retl - ldd [%o0 + 0xb8], %f10 - - SET_SIZE(t4_aes192_load_keys_for_decrypt) - - - ENTRY(t4_aes256_load_keys_for_decrypt) - - - ldd [%o0], %f52 - ldd [%o0 + 0x8], %f54 - ldd [%o0 + 0x10], %f48 - ldd [%o0 + 0x18], %f50 - ldd [%o0 + 0x20], %f44 - ldd [%o0 + 0x28], %f46 - ldd [%o0 + 0x30], %f40 - ldd [%o0 + 0x38], %f42 - ldd [%o0 + 0x40], %f36 - ldd [%o0 + 0x48], %f38 - ldd [%o0 + 0x50], %f32 - ldd [%o0 + 0x58], %f34 - ldd [%o0 + 0x60], %f28 - ldd [%o0 + 0x68], %f30 - ldd [%o0 + 0x70], %f24 - ldd [%o0 + 0x78], %f26 - ldd [%o0 + 0x80], %f20 - ldd [%o0 + 0x88], %f22 - ldd [%o0 + 0x90], %f16 - ldd [%o0 + 0x98], %f18 - ldd [%o0 + 0xa0], %f12 - ldd [%o0 + 0xa8], %f14 - ldd [%o0 + 0xb0], %f8 - ldd [%o0 + 0xb8], %f10 - ldd [%o0 + 0xc0], %f4 - ldd [%o0 + 0xc8], %f6 - ldd [%o0 + 0xd0], %f0 - retl - ldd [%o0 + 0xd8], %f2 - - SET_SIZE(t4_aes256_load_keys_for_decrypt) - - -#define TEST_PARALLEL_ECB_DECRYPT -#ifdef TEST_PARALLEL_ECB_DECRYPT - ENTRY(t4_aes128_ecb_decrypt) - - ldx [%o0 + 0xa0], %g1 !ks[last-1] - ldx [%o0 + 0xa8], %g2 !ks[last] - and %o3, 16, %o4 - brz %o4, ecbdec128_loop - nop - - ldx [%o1], %o4 - ldx [%o1 + 8], %o5 - xor %g1, %o4, %g3 !initial ARK - movxtod %g3, %f60 - xor %g2, %o5, %g3 !initial ARK - movxtod %g3, %f62 - - TEN_DROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 0x8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - be ecbdec128_loop_end - add %o2, 16, %o2 - -ecbdec128_loop: - ldx [%o1], %o4 - ldx [%o1 + 8], %o5 - xor %g1, %o4, %g3 !initial ARK - movxtod %g3, %f0 - xor %g2, %o5, %g3 !initial ARK - movxtod %g3, %f2 - ldx [%o1 + 16], %o4 - ldx [%o1 + 24], %o5 - xor %g1, %o4, %g3 !initial ARK - movxtod %g3, %f60 - xor %g2, %o5, %g3 !initial ARK - movxtod %g3, %f62 - - TEN_DROUNDS_2 - - std %f0, [%o2] - std %f2, [%o2 + 8] - std %f60, [%o2 + 16] - std %f62, [%o2 + 24] - - add %o1, 32, %o1 - subcc %o3, 32, %o3 - bne ecbdec128_loop - add %o2, 32, %o2 -ecbdec128_loop_end: - - retl - nop - - SET_SIZE(t4_aes128_ecb_decrypt) - - ENTRY(t4_aes192_ecb_decrypt) - - ldx [%o0 + 0xc0], %g1 !ks[last-1] - ldx [%o0 + 0xc8], %g2 !ks[last] - and %o3, 16, %o4 - brz %o4, ecbdec192_loop - nop - - ldx [%o1], %o4 - ldx [%o1 + 8], %o5 - xor %g1, %o4, %g3 !initial ARK - movxtod %g3, %f60 - xor %g2, %o5, %g3 !initial ARK - movxtod %g3, %f62 - - TWELVE_DROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 0x8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - be ecbdec192_loop_end - add %o2, 16, %o2 - -ecbdec192_loop: - ldx [%o1], %o4 - ldx [%o1 + 8], %o5 - xor %g1, %o4, %g3 !initial ARK - movxtod %g3, %f0 - xor %g2, %o5, %g3 !initial ARK - movxtod %g3, %f2 - ldx [%o1 + 16], %o4 - ldx [%o1 + 24], %o5 - xor %g1, %o4, %g3 !initial ARK - movxtod %g3, %f60 - xor %g2, %o5, %g3 !initial ARK - movxtod %g3, %f62 - - TWELVE_DROUNDS_2 - - std %f0, [%o2] - std %f2, [%o2 + 8] - std %f60, [%o2 + 16] - std %f62, [%o2 + 24] - - add %o1, 32, %o1 - subcc %o3, 32, %o3 - bne ecbdec192_loop - add %o2, 32, %o2 -ecbdec192_loop_end: - - retl - nop - - SET_SIZE(t4_aes192_ecb_decrypt) - - - ENTRY(t4_aes256_ecb_decrypt) - - ldx [%o0 + 0xe0], %g1 !ks[last-1] - ldx [%o0 + 0xe8], %g2 !ks[last] - and %o3, 16, %o4 - brz %o4, ecbdec256_loop - nop - - ldx [%o1], %o4 - ldx [%o1 + 8], %o5 - xor %g1, %o4, %g3 !initial ARK - movxtod %g3, %f60 - xor %g2, %o5, %g3 !initial ARK - movxtod %g3, %f62 - - FOURTEEN_DROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 0x8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - be ecbdec256_loop_end - add %o2, 16, %o2 - -ecbdec256_loop: - ldx [%o1], %o4 - ldx [%o1 + 8], %o5 - xor %g1, %o4, %g3 !initial ARK - movxtod %g3, %f20 - xor %g2, %o5, %g3 !initial ARK - movxtod %g3, %f22 - ldx [%o1 + 16], %o4 - ldx [%o1 + 24], %o5 - xor %g1, %o4, %g3 !initial ARK - movxtod %g3, %f60 - xor %g2, %o5, %g3 !initial ARK - movxtod %g3, %f62 - - FOURTEEN_DROUNDS_2 - - std %f20, [%o2] - std %f22, [%o2 + 8] - std %f60, [%o2 + 16] - std %f62, [%o2 + 24] - - add %o1, 32, %o1 - subcc %o3, 32, %o3 - bne ecbdec256_loop - add %o2, 32, %o2 - - ldd [%o0 + 0x80], %f20 - ldd [%o0 + 0x88], %f22 - -ecbdec256_loop_end: - - retl - nop - - SET_SIZE(t4_aes256_ecb_decrypt) - -#else - - ENTRY(t4_aes128_ecb_decrypt) - - ldx [%o0 + 0xa0], %g1 !ks[last-1] - ldx [%o0 + 0xa8], %g2 !ks[last] - -ecbdec128_loop: - ldx [%o1], %o4 - ldx [%o1 + 8], %o5 - xor %g1, %o4, %g3 !initial ARK - movxtod %g3, %f60 - xor %g2, %o5, %g3 !initial ARK - movxtod %g3, %f62 - - TEN_DROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 0x8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne ecbdec128_loop - add %o2, 16, %o2 - - retl - nop - - SET_SIZE(t4_aes128_ecb_decrypt) - - - ENTRY(t4_aes192_ecb_decrypt) - - ldx [%o0 + 0xc0], %g1 !ks[last-1] - ldx [%o0 + 0xc8], %g2 !ks[last] - -ecbdec192_loop: - ldx [%o1], %o4 - ldx [%o1 + 8], %o5 - xor %g1, %o4, %g3 !initial ARK - movxtod %g3, %f60 - xor %g2, %o5, %g3 !initial ARK - movxtod %g3, %f62 - - TWELVE_DROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 0x8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne ecbdec192_loop - add %o2, 16, %o2 - - retl - nop - - SET_SIZE(t4_aes192_ecb_decrypt) - - - ENTRY(t4_aes256_ecb_decrypt) - - ldx [%o0 + 0xe0], %g1 !ks[last-1] - ldx [%o0 + 0xe8], %g2 !ks[last] - -ecbdec256_loop: - ldx [%o1], %o4 - ldx [%o1 + 8], %o5 - xor %g1, %o4, %g3 !initial ARK - movxtod %g3, %f60 - xor %g2, %o5, %g3 !initial ARK - movxtod %g3, %f62 - - FOURTEEN_DROUNDS - - std %f60, [%o2] - std %f62, [%o2 + 0x8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne ecbdec256_loop - add %o2, 16, %o2 - - retl - nop - - SET_SIZE(t4_aes256_ecb_decrypt) - -#endif - -#define TEST_PARALLEL_CBC_DECRYPT -#ifdef EST_PARALLEL_CBC_DECRYPT - ENTRY(t4_aes128_cbc_decrypt) - - save %sp, -SA(MINFRAME), %sp - ldx [%i4], %o0 !IV - ldx [%i4 + 8], %o1 !IV - ldx [%i0 + 0xa0], %o2 !ks[last-1] - ldx [%i0 + 0xa8], %o3 !ks[last] - and %i3, 16, %o4 - brz %o4, cbcdec128_loop - nop - - ldx [%i1], %o4 - ldx [%i1 + 8], %o5 - xor %o2, %o4, %g1 !initial ARK - movxtod %g1, %f60 - xor %o3, %o5, %g1 !initial ARK - movxtod %g1, %f62 - - TEN_DROUNDS - - movxtod %o0, %f56 - movxtod %o1, %f58 - mov %o4, %o0 !save last block as next IV - mov %o5, %o1 - fxor %f56, %f60, %f60 !add in previous IV - fxor %f58, %f62, %f62 - - std %f60, [%i2] - std %f62, [%i2 + 0x8] - - add %i1, 16, %i1 - subcc %i3, 16, %i3 - be cbcdec128_loop_end - add %i2, 16, %i2 - - -cbcdec128_loop: - ldx [%i1], %g4 - ldx [%i1 + 8], %g5 - xor %o2, %g4, %g1 !initial ARK - movxtod %g1, %f0 - xor %o3, %g5, %g1 !initial ARK - movxtod %g1, %f2 - - ldx [%i1 + 16], %o4 - ldx [%i1 + 24], %o5 - xor %o2, %o4, %g1 !initial ARK - movxtod %g1, %f60 - xor %o3, %o5, %g1 !initial ARK - movxtod %g1, %f62 - - TEN_DROUNDS_2 - - movxtod %o0, %f6 - movxtod %o1, %f4 - fxor %f6, %f0, %f0 !add in previous IV - fxor %f4, %f2, %f2 - - std %f0, [%i2] - std %f2, [%i2 + 8] - - movxtod %g4, %f56 - movxtod %g5, %f58 - mov %o4, %o0 !save last block as next IV - mov %o5, %o1 - fxor %f56, %f60, %f60 !add in previous IV - fxor %f58, %f62, %f62 - - std %f60, [%i2 + 16] - std %f62, [%i2 + 24] - - add %i1, 32, %i1 - subcc %i3, 32, %i3 - bne cbcdec128_loop - add %i2, 32, %i2 - -cbcdec128_loop_end: - stx %o0, [%i4] - stx %o1, [%i4 + 8] - ret - restore - - SET_SIZE(t4_aes128_cbc_decrypt) - - - ENTRY(t4_aes192_cbc_decrypt) - - save %sp, -SA(MINFRAME), %sp - ldx [%i4], %o0 !IV - ldx [%i4 + 8], %o1 !IV - ldx [%i0 + 0xc0], %o2 !ks[last-1] - ldx [%i0 + 0xc8], %o3 !ks[last] - and %i3, 16, %o4 - brz %o4, cbcdec192_loop - nop - - ldx [%i1], %o4 - ldx [%i1 + 8], %o5 - xor %o2, %o4, %g1 !initial ARK - movxtod %g1, %f60 - xor %o3, %o5, %g1 !initial ARK - movxtod %g1, %f62 - - TWELVE_DROUNDS - - movxtod %o0, %f56 - movxtod %o1, %f58 - mov %o4, %o0 !save last block as next IV - mov %o5, %o1 - fxor %f56, %f60, %f60 !add in previous IV - fxor %f58, %f62, %f62 - - std %f60, [%i2] - std %f62, [%i2 + 0x8] - - add %i1, 16, %i1 - subcc %i3, 16, %i3 - be cbcdec192_loop_end - add %i2, 16, %i2 - - -cbcdec192_loop: - ldx [%i1], %g4 - ldx [%i1 + 8], %g5 - xor %o2, %g4, %g1 !initial ARK - movxtod %g1, %f0 - xor %o3, %g5, %g1 !initial ARK - movxtod %g1, %f2 - - ldx [%i1 + 16], %o4 - ldx [%i1 + 24], %o5 - xor %o2, %o4, %g1 !initial ARK - movxtod %g1, %f60 - xor %o3, %o5, %g1 !initial ARK - movxtod %g1, %f62 - - TWELVE_DROUNDS_2 - - movxtod %o0, %f6 - movxtod %o1, %f4 - fxor %f6, %f0, %f0 !add in previous IV - fxor %f4, %f2, %f2 - - std %f0, [%i2] - std %f2, [%i2 + 8] - - movxtod %g4, %f56 - movxtod %g5, %f58 - mov %o4, %o0 !save last block as next IV - mov %o5, %o1 - fxor %f56, %f60, %f60 !add in previous IV - fxor %f58, %f62, %f62 - - std %f60, [%i2 + 16] - std %f62, [%i2 + 24] - - add %i1, 32, %i1 - subcc %i3, 32, %i3 - bne cbcdec192_loop - add %i2, 32, %i2 - -cbcdec192_loop_end: - stx %o0, [%i4] - stx %o1, [%i4 + 8] - ret - restore - - SET_SIZE(t4_aes192_cbc_decrypt) - - - ENTRY(t4_aes256_cbc_decrypt) - - save %sp, -SA(MINFRAME), %sp - mov %i0, %o0 !FOURTEEN_DROUNDS uses %o0 - ldx [%i4], %g2 !IV - ldx [%i4 + 8], %o1 !IV - ldx [%o0 + 0xe0], %o2 !ks[last-1] - ldx [%o0 + 0xe8], %o3 !ks[last] - and %i3, 16, %o4 - brz %o4, cbcdec256_loop - nop - - ldx [%i1], %o4 - ldx [%i1 + 8], %o5 - xor %o2, %o4, %g1 !initial ARK - movxtod %g1, %f60 - xor %o3, %o5, %g1 !initial ARK - movxtod %g1, %f62 - - FOURTEEN_DROUNDS - - movxtod %g2, %f56 - movxtod %o1, %f58 - mov %o4, %g2 !save last block as next IV - mov %o5, %o1 - fxor %f56, %f60, %f60 !add in previous IV - fxor %f58, %f62, %f62 - - std %f60, [%i2] - std %f62, [%i2 + 0x8] - - add %i1, 16, %i1 - subcc %i3, 16, %i3 - be cbcdec256_loop_end - add %i2, 16, %i2 - - -cbcdec256_loop: - ldx [%i1], %g4 - ldx [%i1 + 8], %g5 - xor %o2, %g4, %g1 !initial ARK - movxtod %g1, %f20 - xor %o3, %g5, %g1 !initial ARK - movxtod %g1, %f22 - - ldx [%i1 + 16], %o4 - ldx [%i1 + 24], %o5 - xor %o2, %o4, %g1 !initial ARK - movxtod %g1, %f60 - xor %o3, %o5, %g1 !initial ARK - movxtod %g1, %f62 - - FOURTEEN_DROUNDS_2 - - movxtod %g2, %f56 - movxtod %o1, %f58 - fxor %f56, %f20, %f20 !add in previous IV - fxor %f58, %f22, %f22 - - std %f20, [%i2] - std %f22, [%i2 + 8] - - movxtod %g4, %f56 - movxtod %g5, %f58 - mov %o4, %g2 !save last block as next IV - mov %o5, %o1 - fxor %f56, %f60, %f60 !add in previous IV - fxor %f58, %f62, %f62 - - std %f60, [%i2 + 16] - std %f62, [%i2 + 24] - - add %i1, 32, %i1 - subcc %i3, 32, %i3 - bne cbcdec256_loop - add %i2, 32, %i2 - - ldd [%o0 + 0x80], %f20 - ldd [%o0 + 0x88], %f22 - -cbcdec256_loop_end: - stx %g2, [%i4] - stx %o1, [%i4 + 8] - ret - restore - - SET_SIZE(t4_aes256_cbc_decrypt) - -#else - - ENTRY(t4_aes128_cbc_decrypt) - - save %sp, -SA(MINFRAME), %sp - ldx [%i4], %o0 !IV - ldx [%i4 + 8], %o1 !IV - ldx [%i0 + 0xa0], %o2 !ks[last-1] - ldx [%i0 + 0xa8], %o3 !ks[last] - -cbcdec128_loop: - ldx [%i1], %o4 - ldx [%i1 + 8], %o5 - xor %o2, %o4, %g1 !initial ARK - movxtod %g1, %f60 - xor %o3, %o5, %g1 !initial ARK - movxtod %g1, %f62 - - TEN_DROUNDS - - movxtod %o0, %f56 - movxtod %o1, %f58 - mov %o4, %o0 !save last block as next IV - mov %o5, %o1 - fxor %f56, %f60, %f60 !add in previous IV - fxor %f58, %f62, %f62 - - std %f60, [%i2] - std %f62, [%i2 + 0x8] - - add %i1, 16, %i1 - subcc %i3, 16, %i3 - bne cbcdec128_loop - add %i2, 16, %i2 - - stx %o0, [%i4] - stx %o1, [%i4 + 8] - ret - restore - - SET_SIZE(t4_aes128_cbc_decrypt) - - - ENTRY(t4_aes192_cbc_decrypt) - - save %sp, -SA(MINFRAME), %sp - ldx [%i4], %o0 !IV - ldx [%i4 + 8], %o1 !IV - ldx [%i0 + 0xc0], %o2 !ks[last-1] - ldx [%i0 + 0xc8], %o3 !ks[last] - -cbcdec192_loop: - ldx [%i1], %o4 - ldx [%i1 + 8], %o5 - xor %o2, %o4, %g1 !initial ARK - movxtod %g1, %f60 - xor %o3, %o5, %g1 !initial ARK - movxtod %g1, %f62 - - TWELVE_DROUNDS - - movxtod %o0, %f56 - movxtod %o1, %f58 - mov %o4, %o0 !save last block as next IV - mov %o5, %o1 - fxor %f56, %f60, %f60 !add in previous IV - fxor %f58, %f62, %f62 - - std %f60, [%i2] - std %f62, [%i2 + 0x8] - - add %i1, 16, %i1 - subcc %i3, 16, %i3 - bne cbcdec192_loop - add %i2, 16, %i2 - - stx %o0, [%i4] - stx %o1, [%i4 + 8] - ret - restore - - SET_SIZE(t4_aes192_cbc_decrypt) - - - ENTRY(t4_aes256_cbc_decrypt) - - save %sp, -SA(MINFRAME), %sp - ldx [%i4], %o0 !IV - ldx [%i4 + 8], %o1 !IV - ldx [%i0 + 0xe0], %o2 !ks[last-1] - ldx [%i0 + 0xe8], %o3 !ks[last] - -cbcdec256_loop: - ldx [%i1], %o4 - ldx [%i1 + 8], %o5 - xor %o2, %o4, %g1 !initial ARK - movxtod %g1, %f60 - xor %o3, %o5, %g1 !initial ARK - movxtod %g1, %f62 - - FOURTEEN_DROUNDS - - movxtod %o0, %f56 - movxtod %o1, %f58 - mov %o4, %o0 !save last block as next IV - mov %o5, %o1 - fxor %f56, %f60, %f60 !add in previous IV - fxor %f58, %f62, %f62 - - std %f60, [%i2] - std %f62, [%i2 + 0x8] - - add %i1, 16, %i1 - subcc %i3, 16, %i3 - bne cbcdec256_loop - add %i2, 16, %i2 - - stx %o0, [%i4] - stx %o1, [%i4 + 8] - ret - restore - - SET_SIZE(t4_aes256_cbc_decrypt) - -#endif - -#define TEST_PARALLEL_CFB128_DECRYPT -#ifdef TEST_PARALLEL_CFB128_DECRYPT - - ENTRY(t4_aes128_cfb128_decrypt) - - ldd [%o4], %f56 !IV - ldd [%o4 + 8], %f58 !IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - and %o3, 16, %o5 - brz %o5, cfb128dec_128_loop - - movxtod %g1, %f60 - movxtod %g2, %f62 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - /* CFB mode uses encryption for the decrypt operation */ - TEN_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - be cfb128dec_128_loop_end - add %o2, 16, %o2 - -cfb128dec_128_loop: - ldd [%o1], %f6 !input - ldd [%o1 + 8], %f4 !input - movxtod %g1, %f60 - movxtod %g2, %f62 - fxor %f60, %f6, %f0 - fxor %f62, %f4, %f2 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - /* CFB mode uses encryption for the decrypt operation */ - TEN_EROUNDS_2 - - ldd [%o1], %f6 !input - ldd [%o1 + 8], %f4 !input - ldd [%o1 + 16], %f56 !input - ldd [%o1 + 24], %f58 !input - - fxor %f60, %f6, %f6 - fxor %f62, %f4, %f4 - fxor %f0, %f56, %f60 - fxor %f2, %f58, %f62 - - std %f6, [%o2] - std %f4, [%o2 + 8] - std %f60, [%o2 + 16] - std %f62, [%o2 + 24] - - add %o1, 32, %o1 - subcc %o3, 32, %o3 - bne cfb128dec_128_loop - add %o2, 32, %o2 - -cfb128dec_128_loop_end: - std %f56, [%o4] - retl - std %f58, [%o4 + 8] - - SET_SIZE(t4_aes128_cfb128_decrypt) - - - ENTRY(t4_aes192_cfb128_decrypt) - - ldd [%o4], %f56 !IV - ldd [%o4 + 8], %f58 !IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - and %o3, 16, %o5 - brz %o5, cfb128dec_192_loop - - movxtod %g1, %f60 - movxtod %g2, %f62 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - /* CFB mode uses encryption for the decrypt operation */ - TWELVE_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - be cfb128dec_192_loop_end - add %o2, 16, %o2 - -cfb128dec_192_loop: - ldd [%o1], %f6 !input - ldd [%o1 + 8], %f4 !input - movxtod %g1, %f60 - movxtod %g2, %f62 - fxor %f60, %f6, %f0 - fxor %f62, %f4, %f2 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - /* CFB mode uses encryption for the decrypt operation */ - TWELVE_EROUNDS_2 - - ldd [%o1], %f6 !input - ldd [%o1 + 8], %f4 !input - ldd [%o1 + 16], %f56 !input - ldd [%o1 + 24], %f58 !input - - fxor %f60, %f6, %f6 - fxor %f62, %f4, %f4 - fxor %f0, %f56, %f60 - fxor %f2, %f58, %f62 - - std %f6, [%o2] - std %f4, [%o2 + 8] - std %f60, [%o2 + 16] - std %f62, [%o2 + 24] - - add %o1, 32, %o1 - subcc %o3, 32, %o3 - bne cfb128dec_192_loop - add %o2, 32, %o2 - -cfb128dec_192_loop_end: - std %f56, [%o4] - retl - std %f58, [%o4 + 8] - - SET_SIZE(t4_aes192_cfb128_decrypt) - - - ENTRY(t4_aes256_cfb128_decrypt) - - ldd [%o4], %f56 !IV - ldd [%o4 + 8], %f58 !IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - and %o3, 16, %o5 - brz %o5, cfb128dec_256_loop - - movxtod %g1, %f60 - movxtod %g2, %f62 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - /* CFB mode uses encryption for the decrypt operation */ - FOURTEEN_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - be cfb128dec_256_loop_end - add %o2, 16, %o2 - -cfb128dec_256_loop: - ldd [%o1], %f20 !input - ldd [%o1 + 8], %f22 !input - movxtod %g1, %f60 - movxtod %g2, %f62 - fxor %f60, %f20, %f20 - fxor %f62, %f22, %f22 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - /* CFB mode uses encryption for the decrypt operation */ - FOURTEEN_EROUNDS_2 - - ldd [%o1 + 16], %f56 !input - ldd [%o1 + 24], %f58 !input - fxor %f20, %f56, %f20 - fxor %f22, %f58, %f22 - std %f20, [%o2 + 16] - std %f22, [%o2 + 24] - - ldd [%o1], %f20 !input - ldd [%o1 + 8], %f22 !input - - fxor %f60, %f20, %f20 - fxor %f62, %f22, %f22 - - std %f20, [%o2] - std %f22, [%o2 + 8] - - add %o1, 32, %o1 - subcc %o3, 32, %o3 - bne cfb128dec_256_loop - add %o2, 32, %o2 - - ldd [%o0 + 0x60], %f20 - ldd [%o0 + 0x68], %f22 - -cfb128dec_256_loop_end: - std %f56, [%o4] - retl - std %f58, [%o4 + 8] - - SET_SIZE(t4_aes256_cfb128_decrypt) - -#else - ENTRY(t4_aes128_cfb128_decrypt) - - ldd [%o4], %f56 !IV - ldd [%o4 + 8], %f58 !IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -cfb128dec_128_loop: - movxtod %g1, %f60 - movxtod %g2, %f62 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - /* CFB mode uses encryption for the decrypt operation */ - TEN_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne cfb128dec_128_loop - add %o2, 16, %o2 - - std %f56, [%o4] - retl - std %f58, [%o4 + 8] - - SET_SIZE(t4_aes128_cfb128_decrypt) - - - ENTRY(t4_aes192_cfb128_decrypt) - - ldd [%o4], %f56 !IV - ldd [%o4 + 8], %f58 !IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -cfb128dec_192_loop: - movxtod %g1, %f60 - movxtod %g2, %f62 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - /* CFB mode uses encryption for the decrypt operation */ - TWELVE_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne cfb128dec_192_loop - add %o2, 16, %o2 - - std %f56, [%o4] - retl - std %f58, [%o4 + 8] - - SET_SIZE(t4_aes192_cfb128_decrypt) - - - ENTRY(t4_aes256_cfb128_decrypt) - - ldd [%o4], %f56 !IV - ldd [%o4 + 8], %f58 !IV - ldx [%o0], %g1 ! ks[0] - ldx [%o0 + 8], %g2 ! ks[1] - -cfb128dec_256_loop: - movxtod %g1, %f60 - movxtod %g2, %f62 - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - /* CFB mode uses encryption for the decrypt operation */ - FOURTEEN_EROUNDS - - ldd [%o1], %f56 !input - ldd [%o1 + 8], %f58 !input - fxor %f60, %f56, %f60 - fxor %f62, %f58, %f62 - - std %f60, [%o2] - std %f62, [%o2 + 8] - - add %o1, 16, %o1 - subcc %o3, 16, %o3 - bne cfb128dec_256_loop - add %o2, 16, %o2 - - std %f56, [%o4] - retl - std %f58, [%o4 + 8] - - SET_SIZE(t4_aes256_cfb128_decrypt) - -#endif - -#endif /* lint || __lint */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/t4_md5.S --- a/components/openssl/openssl-1.0.1/engines/t4/t4_md5.S Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,151 +0,0 @@ -/* - * ==================================================================== - * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - */ - -/*LINTLIBRARY*/ - -#if defined(lint) || defined(__lint) - -#include -#include - -/*ARGSUSED*/ -void -t4_md5_multiblock(MD5_CTX *ctx, const uint8_t *input, - unsigned int input_length_in_blocks) -{ return; } - -#else /* lint || __lint */ - -#include - - ENTRY(t4_md5_multiblock) - -!load result from previous digest (stored in ctx) - ld [%o0], %f0 - ld [%o0 + 0x4], %f1 - ld [%o0 + 0x8], %f2 - ld [%o0 + 0xc], %f3 - - and %o1, 7, %o3 - brnz %o3, md5_unaligned_input - nop - -md5_loop: - -!load 64 bytes of data - ldd [%o1], %f8 !load 8 bytes of data - ldd [%o1 + 0x8], %f10 !load 8 bytes of data - ldd [%o1 + 0x10], %f12 !load 8 bytes of data - ldd [%o1 + 0x18], %f14 !load 8 bytes of data - ldd [%o1 + 0x20], %f16 !load 8 bytes of data - ldd [%o1 + 0x28], %f18 !load 8 bytes of data - ldd [%o1 + 0x30], %f20 !load 8 bytes of data - ldd [%o1 + 0x38], %f22 !load 8 bytes of data - -!perform crypto instruction here - !md5 - .byte 0x81, 0xb0, 0x28, 0x00 - - dec %o2 - brnz %o2, md5_loop - add %o1, 0x40, %o1 - -!copy digest back into ctx - st %f0, [%o0] - st %f1, [%o0 + 0x4] - st %f2, [%o0 + 0x8] - retl - st %f3, [%o0 + 0xc] - -md5_unaligned_input: - alignaddr %o1, %g0, %g0 ! generate %gsr - andn %o1, 7, %o1 - -md5_unaligned_input_loop: - ldd [%o1], %f8 !load 8 bytes of data - ldd [%o1 + 0x8], %f10 !load 8 bytes of data - ldd [%o1 + 0x10], %f12 !load 8 bytes of data - ldd [%o1 + 0x18], %f14 !load 8 bytes of data - ldd [%o1 + 0x20], %f16 !load 8 bytes of data - ldd [%o1 + 0x28], %f18 !load 8 bytes of data - ldd [%o1 + 0x30], %f20 !load 8 bytes of data - ldd [%o1 + 0x38], %f22 !load 8 bytes of data - ldd [%o1 + 0x40], %f24 !load 8 bytes of data - faligndata %f8, %f10, %f8 - faligndata %f10, %f12, %f10 - faligndata %f12, %f14, %f12 - faligndata %f14, %f16, %f14 - faligndata %f16, %f18, %f16 - faligndata %f18, %f20, %f18 - faligndata %f20, %f22, %f20 - faligndata %f22, %f24, %f22 - -!perform crypto instruction here - !md5 - .byte 0x81, 0xb0, 0x28, 0x00 - - dec %o2 - brnz %o2, md5_unaligned_input_loop - add %o1, 0x40, %o1 - -!copy digest back into ctx - st %f0, [%o0] - st %f1, [%o0 + 0x4] - st %f2, [%o0 + 0x8] - retl - st %f3, [%o0 + 0xc] - - SET_SIZE(t4_md5_multiblock) - -#endif /* lint || __lint */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/t4_sha1.S --- a/components/openssl/openssl-1.0.1/engines/t4/t4_sha1.S Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,153 +0,0 @@ -/* - * ==================================================================== - * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - */ - -/*LINTLIBRARY*/ - -#if defined(lint) || defined(__lint) - -#include -#include - -/*ARGSUSED*/ -void -t4_sha1_multiblock(SHA_CTX *ctx, const void *input, size_t num) -{ return; } - -#else /* lint || __lint */ - -#include - - ENTRY(t4_sha1_multiblock) - -!load result from previous digest (stored in ctx) - ld [%o0], %f0 - ld [%o0 + 0x4], %f1 - ld [%o0 + 0x8], %f2 - ld [%o0 + 0xc], %f3 - ld [%o0 + 0x10], %f4 - - and %o1, 7, %o3 - brnz %o3, sha1_unaligned_input - nop - -sha1_loop: - -!load 64 bytes of data - ldd [%o1], %f8 !load 8 bytes of data - ldd [%o1 + 0x8], %f10 !load 8 bytes of data - ldd [%o1 + 0x10], %f12 !load 8 bytes of data - ldd [%o1 + 0x18], %f14 !load 8 bytes of data - ldd [%o1 + 0x20], %f16 !load 8 bytes of data - ldd [%o1 + 0x28], %f18 !load 8 bytes of data - ldd [%o1 + 0x30], %f20 !load 8 bytes of data - ldd [%o1 + 0x38], %f22 !load 8 bytes of data - -!perform crypto instruction here - !sha1 - .byte 0x81, 0xb0, 0x28, 0x20 - - dec %o2 - brnz %o2, sha1_loop - add %o1, 0x40, %o1 - -!copy digest back into ctx - st %f0, [%o0] - st %f1, [%o0 + 0x4] - st %f2, [%o0 + 0x8] - st %f3, [%o0 + 0xc] - retl - st %f4, [%o0 + 0x10] - -sha1_unaligned_input: - alignaddr %o1, %g0, %g0 ! generate %gsr - andn %o1, 7, %o1 - -sha1_unaligned_input_loop: - ldd [%o1], %f8 !load 8 bytes of data - ldd [%o1 + 0x8], %f10 !load 8 bytes of data - ldd [%o1 + 0x10], %f12 !load 8 bytes of data - ldd [%o1 + 0x18], %f14 !load 8 bytes of data - ldd [%o1 + 0x20], %f16 !load 8 bytes of data - ldd [%o1 + 0x28], %f18 !load 8 bytes of data - ldd [%o1 + 0x30], %f20 !load 8 bytes of data - ldd [%o1 + 0x38], %f22 !load 8 bytes of data - ldd [%o1 + 0x40], %f24 !load 8 bytes of data - faligndata %f8, %f10, %f8 - faligndata %f10, %f12, %f10 - faligndata %f12, %f14, %f12 - faligndata %f14, %f16, %f14 - faligndata %f16, %f18, %f16 - faligndata %f18, %f20, %f18 - faligndata %f20, %f22, %f20 - faligndata %f22, %f24, %f22 - -!perform crypto instruction here - !sha1 - .byte 0x81, 0xb0, 0x28, 0x20 - - dec %o2 - brnz %o2, sha1_unaligned_input_loop - add %o1, 0x40, %o1 - -!copy digest back into ctx - st %f0, [%o0] - st %f1, [%o0 + 0x4] - st %f2, [%o0 + 0x8] - st %f3, [%o0 + 0xc] - retl - st %f4, [%o0 + 0x10] - - SET_SIZE(t4_sha1_multiblock) - -#endif /* lint || __lint */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/engines/t4/t4_sha2.S --- a/components/openssl/openssl-1.0.1/engines/t4/t4_sha2.S Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,314 +0,0 @@ -/* - * ==================================================================== - * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - */ - -/*LINTLIBRARY*/ - -#if defined(lint) || defined(__lint) - -#include -#include -#include "../engine/eng_t4_sha2_asm.h" - -/*ARGSUSED*/ -void -t4_sha256_multiblock(T4_SHA256_CTX *c, const void *input, - size_t num) -{ return; } - -/*ARGSUSED*/ -void -t4_sha512_multiblock(T4_SHA512_CTX *c, const void *input, - size_t num) -{ return; } - -#else /* lint || __lint */ - -#include - - ENTRY(t4_sha256_multiblock) - - add %o0, 0x8, %o0 !skip over first field in ctx - -!load result from previous digest (stored in ctx) - ld [%o0], %f0 - ld [%o0 + 0x4], %f1 - ld [%o0 + 0x8], %f2 - ld [%o0 + 0xc], %f3 - ld [%o0 + 0x10], %f4 - ld [%o0 + 0x14], %f5 - ld [%o0 + 0x18], %f6 - ld [%o0 + 0x1c], %f7 - - and %o1, 7, %o3 - brnz %o3, sha256_unaligned_input - nop - -sha256_loop: - -!load 64 bytes of data - ldd [%o1], %f8 !load 8 bytes of data - ldd [%o1 + 0x8], %f10 !load 8 bytes of data - ldd [%o1 + 0x10], %f12 !load 8 bytes of data - ldd [%o1 + 0x18], %f14 !load 8 bytes of data - ldd [%o1 + 0x20], %f16 !load 8 bytes of data - ldd [%o1 + 0x28], %f18 !load 8 bytes of data - ldd [%o1 + 0x30], %f20 !load 8 bytes of data - ldd [%o1 + 0x38], %f22 !load 8 bytes of data - -!perform crypto instruction here - !sha256 - .byte 0x81, 0xb0, 0x28, 0x40 - - dec %o2 - brnz %o2, sha256_loop - add %o1, 0x40, %o1 - -!copy digest back into ctx - st %f0, [%o0] - st %f1, [%o0 + 0x4] - st %f2, [%o0 + 0x8] - st %f3, [%o0 + 0xc] - st %f4, [%o0 + 0x10] - st %f5, [%o0 + 0x14] - st %f6, [%o0 + 0x18] - retl - st %f7, [%o0 + 0x1c] - -sha256_unaligned_input: - alignaddr %o1, %g0, %g0 ! generate %gsr - andn %o1, 7, %o1 - -sha256_unaligned_input_loop: - ldd [%o1], %f8 !load 8 bytes of data - ldd [%o1 + 0x8], %f10 !load 8 bytes of data - ldd [%o1 + 0x10], %f12 !load 8 bytes of data - ldd [%o1 + 0x18], %f14 !load 8 bytes of data - ldd [%o1 + 0x20], %f16 !load 8 bytes of data - ldd [%o1 + 0x28], %f18 !load 8 bytes of data - ldd [%o1 + 0x30], %f20 !load 8 bytes of data - ldd [%o1 + 0x38], %f22 !load 8 bytes of data - ldd [%o1 + 0x40], %f24 !load 8 bytes of data - faligndata %f8, %f10, %f8 - faligndata %f10, %f12, %f10 - faligndata %f12, %f14, %f12 - faligndata %f14, %f16, %f14 - faligndata %f16, %f18, %f16 - faligndata %f18, %f20, %f18 - faligndata %f20, %f22, %f20 - faligndata %f22, %f24, %f22 - -!perform crypto instruction here - !sha256 - .byte 0x81, 0xb0, 0x28, 0x40 - - dec %o2 - brnz %o2, sha256_unaligned_input_loop - add %o1, 0x40, %o1 - -!copy digest back into ctx - st %f0, [%o0] - st %f1, [%o0 + 0x4] - st %f2, [%o0 + 0x8] - st %f3, [%o0 + 0xc] - st %f4, [%o0 + 0x10] - st %f5, [%o0 + 0x14] - st %f6, [%o0 + 0x18] - retl - st %f7, [%o0 + 0x1c] - - SET_SIZE(t4_sha256_multiblock) - - - ENTRY(t4_sha512_multiblock) - - add %o0, 0x8, %o0 !skip over first field in ctx - -!load result from previous digest (stored in ctx) - ld [%o0], %f0 - ld [%o0 + 0x4], %f1 - ld [%o0 + 0x8], %f2 - ld [%o0 + 0xc], %f3 - ld [%o0 + 0x10], %f4 - ld [%o0 + 0x14], %f5 - ld [%o0 + 0x18], %f6 - ld [%o0 + 0x1c], %f7 - ld [%o0 + 0x20], %f8 - ld [%o0 + 0x24], %f9 - ld [%o0 + 0x28], %f10 - ld [%o0 + 0x2c], %f11 - ld [%o0 + 0x30], %f12 - ld [%o0 + 0x34], %f13 - ld [%o0 + 0x38], %f14 - ld [%o0 + 0x3c], %f15 - - and %o1, 7, %o3 - brnz %o3, sha512_unaligned_input - nop - -sha512_loop: - -!load 128 bytes of data - ldd [%o1], %f16 !load 8 bytes of data - ldd [%o1 + 0x8], %f18 !load 8 bytes of data - ldd [%o1 + 0x10], %f20 !load 8 bytes of data - ldd [%o1 + 0x18], %f22 !load 8 bytes of data - ldd [%o1 + 0x20], %f24 !load 8 bytes of data - ldd [%o1 + 0x28], %f26 !load 8 bytes of data - ldd [%o1 + 0x30], %f28 !load 8 bytes of data - ldd [%o1 + 0x38], %f30 !load 8 bytes of data - ldd [%o1 + 0x40], %f32 !load 8 bytes of data - ldd [%o1 + 0x48], %f34 !load 8 bytes of data - ldd [%o1 + 0x50], %f36 !load 8 bytes of data - ldd [%o1 + 0x58], %f38 !load 8 bytes of data - ldd [%o1 + 0x60], %f40 !load 8 bytes of data - ldd [%o1 + 0x68], %f42 !load 8 bytes of data - ldd [%o1 + 0x70], %f44 !load 8 bytes of data - ldd [%o1 + 0x78], %f46 !load 8 bytes of data - -!perform crypto instruction here - !sha512 - .byte 0x81, 0xb0, 0x28, 0x60 - - dec %o2 - brnz %o2, sha512_loop - add %o1, 0x80, %o1 - -!copy digest back into ctx - st %f0, [%o0] - st %f1, [%o0 + 0x4] - st %f2, [%o0 + 0x8] - st %f3, [%o0 + 0xc] - st %f4, [%o0 + 0x10] - st %f5, [%o0 + 0x14] - st %f6, [%o0 + 0x18] - st %f7, [%o0+ 0x1c] - st %f8, [%o0+ 0x20] - st %f9, [%o0+ 0x24] - st %f10, [%o0+ 0x28] - st %f11, [%o0+ 0x2c] - st %f12, [%o0+ 0x30] - st %f13, [%o0+ 0x34] - st %f14, [%o0+ 0x38] - retl - st %f15, [%o0+ 0x3c] - -sha512_unaligned_input: - alignaddr %o1, %g0, %g0 ! generate %gsr - andn %o1, 7, %o1 - -sha512_unaligned_input_loop: - ldd [%o1], %f16 !load 8 bytes of data - ldd [%o1 + 0x8], %f18 !load 8 bytes of data - ldd [%o1 + 0x10], %f20 !load 8 bytes of data - ldd [%o1 + 0x18], %f22 !load 8 bytes of data - ldd [%o1 + 0x20], %f24 !load 8 bytes of data - ldd [%o1 + 0x28], %f26 !load 8 bytes of data - ldd [%o1 + 0x30], %f28 !load 8 bytes of data - ldd [%o1 + 0x38], %f30 !load 8 bytes of data - ldd [%o1 + 0x40], %f32 !load 8 bytes of data - ldd [%o1 + 0x48], %f34 !load 8 bytes of data - ldd [%o1 + 0x50], %f36 !load 8 bytes of data - ldd [%o1 + 0x58], %f38 !load 8 bytes of data - ldd [%o1 + 0x60], %f40 !load 8 bytes of data - ldd [%o1 + 0x68], %f42 !load 8 bytes of data - ldd [%o1 + 0x70], %f44 !load 8 bytes of data - ldd [%o1 + 0x78], %f46 !load 8 bytes of data - ldd [%o1 + 0x80], %f48 !load 8 bytes of data - faligndata %f16, %f18, %f16 - faligndata %f18, %f20, %f18 - faligndata %f20, %f22, %f20 - faligndata %f22, %f24, %f22 - faligndata %f24, %f26, %f24 - faligndata %f26, %f28, %f26 - faligndata %f28, %f30, %f28 - faligndata %f30, %f32, %f30 - faligndata %f32, %f34, %f32 - faligndata %f34, %f36, %f34 - faligndata %f36, %f38, %f36 - faligndata %f38, %f40, %f38 - faligndata %f40, %f42, %f40 - faligndata %f42, %f44, %f42 - faligndata %f44, %f46, %f44 - faligndata %f46, %f48, %f46 - -!perform crypto instruction here - !sha512 - .byte 0x81, 0xb0, 0x28, 0x60 - - dec %o2 - brnz %o2, sha512_unaligned_input_loop - add %o1, 0x80, %o1 - -!copy digest back into ctx - st %f0, [%o0] - st %f1, [%o0 + 0x4] - st %f2, [%o0 + 0x8] - st %f3, [%o0 + 0xc] - st %f4, [%o0 + 0x10] - st %f5, [%o0 + 0x14] - st %f6, [%o0 + 0x18] - st %f7, [%o0+ 0x1c] - st %f8, [%o0+ 0x20] - st %f9, [%o0+ 0x24] - st %f10, [%o0+ 0x28] - st %f11, [%o0+ 0x2c] - st %f12, [%o0+ 0x30] - st %f13, [%o0+ 0x34] - st %f14, [%o0+ 0x38] - retl - st %f15, [%o0+ 0x3c] - - SET_SIZE(t4_sha512_multiblock) - -#endif /* lint || __lint */ diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/inline-t4/sparc_arch.h --- a/components/openssl/openssl-1.0.1/inline-t4/sparc_arch.h Mon Jul 08 16:18:46 2013 -0700 +++ b/components/openssl/openssl-1.0.1/inline-t4/sparc_arch.h Mon Jul 08 17:50:18 2013 -0700 @@ -1,78 +1,90 @@ #ifndef __SPARC_ARCH_H__ -#define __SPARC_ARCH_H__ +#define __SPARC_ARCH_H__ -#define SPARCV9_TICK_PRIVILEGED (1<<0) -#define SPARCV9_PREFER_FPU (1<<1) -#define SPARCV9_VIS1 (1<<2) -#define SPARCV9_VIS2 (1<<3) /* reserved */ -#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ -#define SPARCV9_BLK (1<<5) /* VIS1 block copy */ -#define SPARCV9_VIS3 (1<<6) -#define SPARCV9_RANDOM (1<<7) +#define SPARCV9_TICK_PRIVILEGED (1<<0) +#define SPARCV9_PREFER_FPU (1<<1) +#define SPARCV9_VIS1 (1<<2) +#define SPARCV9_VIS2 (1<<3) /* reserved */ +#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ +#define SPARCV9_BLK (1<<5) /* VIS1 block copy */ +#define SPARCV9_VIS3 (1<<6) +#define SPARCV9_RANDOM (1<<7) +#define SPARCV9_64BIT_STACK (1<<8) /* * OPENSSL_sparcv9cap_P[1] is copy of Compatibility Feature Register, * %asr26, SPARC-T4 and later. There is no SPARCV9_CFR bit in * OPENSSL_sparcv9cap_P[0], as %cfr copy is sufficient... */ -#define CFR_AES 0x00000001 /* Supports AES opcodes */ -#define CFR_DES 0x00000002 /* Supports DES opcodes */ -#define CFR_KASUMI 0x00000004 /* Supports KASUMI opcodes */ -#define CFR_CAMELLIA 0x00000008 /* Supports CAMELLIA opcodes*/ -#define CFR_MD5 0x00000010 /* Supports MD5 opcodes */ -#define CFR_SHA1 0x00000020 /* Supports SHA1 opcodes */ -#define CFR_SHA256 0x00000040 /* Supports SHA256 opcodes */ -#define CFR_SHA512 0x00000080 /* Supports SHA512 opcodes */ -#define CFR_MPMUL 0x00000100 /* Supports MPMUL opcodes */ -#define CFR_MONTMUL 0x00000200 /* Supports MONTMUL opcodes */ -#define CFR_MONTSQR 0x00000400 /* Supports MONTSQR opcodes */ -#define CFR_CRC32C 0x00000800 /* Supports CRC32C opcodes */ +#define CFR_AES 0x00000001 /* Supports AES opcodes */ +#define CFR_DES 0x00000002 /* Supports DES opcodes */ +#define CFR_KASUMI 0x00000004 /* Supports KASUMI opcodes */ +#define CFR_CAMELLIA 0x00000008 /* Supports CAMELLIA opcodes */ +#define CFR_MD5 0x00000010 /* Supports MD5 opcodes */ +#define CFR_SHA1 0x00000020 /* Supports SHA1 opcodes */ +#define CFR_SHA256 0x00000040 /* Supports SHA256 opcodes */ +#define CFR_SHA512 0x00000080 /* Supports SHA512 opcodes */ +#define CFR_MPMUL 0x00000100 /* Supports MPMUL opcodes */ +#define CFR_MONTMUL 0x00000200 /* Supports MONTMUL opcodes */ +#define CFR_MONTSQR 0x00000400 /* Supports MONTSQR opcodes */ +#define CFR_CRC32C 0x00000800 /* Supports CRC32C opcodes */ #if defined(OPENSSL_PIC) && !defined(__PIC__) -# define __PIC__ +#define __PIC__ #endif -#define SPARC_PIC_THUNK(reg) \ +#if defined(__SUNPRO_C) && defined(__sparcv9) && !defined(__arch64__) +#define __arch64__ +#endif + +#define SPARC_PIC_THUNK(reg) \ .align 32; \ .Lpic_thunk: \ jmp %o7 + 8; \ - add %o7, reg, reg; + add %o7, reg, reg; -#define SPARC_PIC_THUNK_CALL(reg) \ +#define SPARC_PIC_THUNK_CALL(reg) \ sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \ call .Lpic_thunk; \ - or reg, %lo(_GLOBAL_OFFSET_TABLE_+4), reg; + or reg, %lo(_GLOBAL_OFFSET_TABLE_+4), reg; #if 1 -# define SPARC_SETUP_GOT_REG(reg) SPARC_PIC_THUNK_CALL(reg) +#define SPARC_SETUP_GOT_REG(reg) SPARC_PIC_THUNK_CALL(reg) #else -# define SPARC_SETUP_GOT_REG(reg) \ +#define SPARC_SETUP_GOT_REG(reg) \ sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \ call .+8; \ - or reg,%lo(_GLOBAL_OFFSET_TABLE_+4), reg; \ + or reg, %lo(_GLOBAL_OFFSET_TABLE_+4), reg; \ add %o7, reg, reg #endif -#if (defined(__GNUC__) && defined(__arch64__)) || \ - (defined(__SUNPRO_C) && defined(__sparcv9)) +#if defined(__arch64__) -# define SPARC_LOAD_ADDRESS(SYM, reg) \ +#define SPARC_LOAD_ADDRESS(SYM, reg) \ setx SYM, %o7, reg; -# define LDPTR ldx +#define LDPTR ldx +#define SIZE_T_CC %xcc +#define STACK_FRAME 192 +#define STACK_BIAS 2047 +#define STACK_7thARG (STACK_BIAS+176) #else -# define SPARC_LOAD_ADDRESS(SYM, reg) \ +#define SPARC_LOAD_ADDRESS(SYM, reg) \ set SYM, reg; -# define LDPTR ld -# define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg) +#define LDPTR ld +#define SIZE_T_CC %icc +#define STACK_FRAME 112 +#define STACK_BIAS 0 +#define STACK_7thARG 92 +#define SPARC_LOAD_ADDRESS_LEAF(SYM, reg, tmp) SPARC_LOAD_ADDRESS(SYM, reg) #endif #ifdef __PIC__ -# undef SPARC_LOAD_ADDRESS -# undef SPARC_LOAD_ADDRESS_LEAF -# define SPARC_LOAD_ADDRESS(SYM, reg) \ +#undef SPARC_LOAD_ADDRESS +#undef SPARC_LOAD_ADDRESS_LEAF +#define SPARC_LOAD_ADDRESS(SYM, reg) \ SPARC_SETUP_GOT_REG(reg); \ sethi %hi(SYM), %o7; \ or %o7, %lo(SYM), %o7; \ @@ -80,7 +92,7 @@ #endif #ifndef SPARC_LOAD_ADDRESS_LEAF -# define SPARC_LOAD_ADDRESS_LEAF(SYM, reg, tmp) \ +#define SPARC_LOAD_ADDRESS_LEAF(SYM, reg, tmp) \ mov %o7, tmp; \ SPARC_LOAD_ADDRESS(SYM, reg) \ mov tmp, %o7; diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/inline-t4/sparct4-mont.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/components/openssl/openssl-1.0.1/inline-t4/sparct4-mont.pl Mon Jul 08 17:50:18 2013 -0700 @@ -0,0 +1,1222 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by David S. Miller and Andy Polyakov +# . The module is licensed under 2-clause BSD +# license. November 2012. All rights reserved. +# ==================================================================== + +###################################################################### +# Montgomery squaring-n-multiplication module for SPARC T4. +# +# The module consists of three parts: +# +# 1) collection of "single-op" subroutines that perform single +# operation, Montgomery squaring or multiplication, on 512-, +# 1024-, 1536- and 2048-bit operands; +# 2) collection of "multi-op" subroutines that perform 5 squaring and +# 1 multiplication operations on operands of above lengths; +# 3) fall-back and helper VIS3 subroutines. +# +# RSA sign is dominated by multi-op subroutine, while RSA verify and +# DSA - by single-op. Special note about 4096-bit RSA verify result. +# Operands are too long for dedicated hardware and it's handled by +# VIS3 code, which is why you don't see any improvement. It's surely +# possible to improve it [by deploying 'mpmul' instruction], maybe in +# the future... +# +# Performance improvement. +# +# 64-bit process, VIS3: +# sign verify sign/s verify/s +# rsa 1024 bits 0.000628s 0.000028s 1592.4 35434.4 +# rsa 2048 bits 0.003282s 0.000106s 304.7 9438.3 +# rsa 4096 bits 0.025866s 0.000340s 38.7 2940.9 +# dsa 1024 bits 0.000301s 0.000332s 3323.7 3013.9 +# dsa 2048 bits 0.001056s 0.001233s 946.9 810.8 +# +# 64-bit process, this module: +# sign verify sign/s verify/s +# rsa 1024 bits 0.000256s 0.000016s 3904.4 61411.9 +# rsa 2048 bits 0.000946s 0.000029s 1056.8 34292.7 +# rsa 4096 bits 0.005061s 0.000340s 197.6 2940.5 +# dsa 1024 bits 0.000176s 0.000195s 5674.7 5130.5 +# dsa 2048 bits 0.000296s 0.000354s 3383.2 2827.6 +# +###################################################################### +# 32-bit process, VIS3: +# sign verify sign/s verify/s +# rsa 1024 bits 0.000665s 0.000028s 1504.8 35233.3 +# rsa 2048 bits 0.003349s 0.000106s 298.6 9433.4 +# rsa 4096 bits 0.025959s 0.000341s 38.5 2934.8 +# dsa 1024 bits 0.000320s 0.000341s 3123.3 2929.6 +# dsa 2048 bits 0.001101s 0.001260s 908.2 793.4 +# +# 32-bit process, this module: +# sign verify sign/s verify/s +# rsa 1024 bits 0.000301s 0.000017s 3317.1 60240.0 +# rsa 2048 bits 0.001034s 0.000030s 966.9 33812.7 +# rsa 4096 bits 0.005244s 0.000341s 190.7 2935.4 +# dsa 1024 bits 0.000201s 0.000205s 4976.1 4879.2 +# dsa 2048 bits 0.000328s 0.000360s 3051.1 2774.2 +# +# 32-bit code is prone to performance degradation as interrupt rate +# dispatched to CPU executing the code grows. This is because in +# standard process of handling interrupt in 32-bit process context +# upper halves of most integer registers used as input or output are +# zeroed. This renders result invalid, and operation has to be re-run. +# If CPU is "bothered" with timer interrupts only, the penalty is +# hardly measurable. But in order to mitigate this problem for higher +# interrupt rates contemporary Linux kernel recognizes biased stack +# even in 32-bit process context and preserves full register contents. +# See http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=517ffce4e1a03aea979fe3a18a3dd1761a24fafb +# for details. + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); +require "sparcv9_modes.pl"; + +$code.=<<___; +#include "sparc_arch.h" + +#ifdef __arch64__ +.register %g2,#scratch +.register %g3,#scratch +#endif + +.section ".text",#alloc,#execinstr + +#ifdef __PIC__ +SPARC_PIC_THUNK(%g1) +#endif +___ + +######################################################################## +# Register layout for mont[mul|sqr] instructions. +# For details see "Oracle SPARC Architecture 2011" manual at +# http://www.oracle.com/technetwork/server-storage/sun-sparc-enterprise/documentation/. +# +my @R=map("%f".2*$_,(0..11,30,31,12..29)); +my @N=(map("%l$_",(0..7)),map("%o$_",(0..5))); @N=(@N,@N,@N[0..3]); +my @A=(@N[0..13],@R[14..31]); +my @B=(map("%i$_",(0..5)),map("%l$_",(0..7))); @B=(@B,@B,map("%o$_",(0..3))); + +######################################################################## +# int bn_mul_mont_t4_$NUM(u64 *rp,const u64 *ap,const u64 *bp, +# const u64 *np,const BN_ULONG *n0); +# +sub generate_bn_mul_mont_t4() { +my $NUM=shift; +my ($rp,$ap,$bp,$np,$sentinel)=map("%g$_",(1..5)); + +$code.=<<___; +.globl bn_mul_mont_t4_$NUM +.align 32 +bn_mul_mont_t4_$NUM: +#ifdef __arch64__ + mov 0,$sentinel + mov -128,%g4 +#elif defined(SPARCV9_64BIT_STACK) + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) + ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] + mov -2047,%g4 + and %g1,SPARCV9_64BIT_STACK,%g1 + movrz %g1,0,%g4 + mov -1,$sentinel + add %g4,-128,%g4 +#else + mov -1,$sentinel + mov -128,%g4 +#endif + sllx $sentinel,32,$sentinel + save %sp,%g4,%sp +#ifndef __arch64__ + save %sp,-128,%sp ! warm it up + save %sp,-128,%sp + save %sp,-128,%sp + save %sp,-128,%sp + save %sp,-128,%sp + save %sp,-128,%sp + restore + restore + restore + restore + restore + restore +#endif + and %sp,1,%g4 + or $sentinel,%fp,%fp + or %g4,$sentinel,$sentinel + + ! copy arguments to global registers + mov %i0,$rp + mov %i1,$ap + mov %i2,$bp + mov %i3,$np + ld [%i4+0],%f1 ! load *n0 + ld [%i4+4],%f0 + fsrc2 %f0,%f60 +___ + +# load ap[$NUM] ######################################################## +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for($i=0; $i<14 && $i<$NUM; $i++) { +my $lo=$i<13?@A[$i+1]:"%o7"; +$code.=<<___; + ld [$ap+$i*8+0],$lo + ld [$ap+$i*8+4],@A[$i] + sllx @A[$i],32,@A[$i] + or $lo,@A[$i],@A[$i] +___ +} +for(; $i<$NUM; $i++) { +my ($hi,$lo)=("%f".2*($i%4),"%f".(2*($i%4)+1)); +$code.=<<___; + ld [$ap+$i*8+0],$lo + ld [$ap+$i*8+4],$hi + fsrc2 $hi,@A[$i] +___ +} +# load np[$NUM] ######################################################## +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for($i=0; $i<14 && $i<$NUM; $i++) { +my $lo=$i<13?@N[$i+1]:"%o7"; +$code.=<<___; + ld [$np+$i*8+0],$lo + ld [$np+$i*8+4],@N[$i] + sllx @N[$i],32,@N[$i] + or $lo,@N[$i],@N[$i] +___ +} +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for(; $i<28 && $i<$NUM; $i++) { +my $lo=$i<27?@N[$i+1]:"%o7"; +$code.=<<___; + ld [$np+$i*8+0],$lo + ld [$np+$i*8+4],@N[$i] + sllx @N[$i],32,@N[$i] + or $lo,@N[$i],@N[$i] +___ +} +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for(; $i<$NUM; $i++) { +my $lo=($i<$NUM-1)?@N[$i+1]:"%o7"; +$code.=<<___; + ld [$np+$i*8+0],$lo + ld [$np+$i*8+4],@N[$i] + sllx @N[$i],32,@N[$i] + or $lo,@N[$i],@N[$i] +___ +} +$code.=<<___; + cmp $ap,$bp + be SIZE_T_CC,.Lmsquare_$NUM + nop +___ + +# load bp[$NUM] ######################################################## +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for($i=0; $i<14 && $i<$NUM; $i++) { +my $lo=$i<13?@B[$i+1]:"%o7"; +$code.=<<___; + ld [$bp+$i*8+0],$lo + ld [$bp+$i*8+4],@B[$i] + sllx @B[$i],32,@B[$i] + or $lo,@B[$i],@B[$i] +___ +} +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for(; $i<$NUM; $i++) { +my $lo=($i<$NUM-1)?@B[$i+1]:"%o7"; +$code.=<<___; + ld [$bp+$i*8+0],$lo + ld [$bp+$i*8+4],@B[$i] + sllx @B[$i],32,@B[$i] + or $lo,@B[$i],@B[$i] +___ +} +# magic ################################################################ +$code.=<<___; + .word 0x81b02920+$NUM-1 ! montmul $NUM-1 +.Lmresume_$NUM: + fbu,pn %fcc3,.Lmabort_$NUM +#ifndef __arch64__ + and %fp,$sentinel,$sentinel + brz,pn $sentinel,.Lmabort_$NUM +#endif + nop +#ifdef __arch64__ + restore + restore + restore + restore + restore +#else + restore; and %fp,$sentinel,$sentinel + restore; and %fp,$sentinel,$sentinel + restore; and %fp,$sentinel,$sentinel + restore; and %fp,$sentinel,$sentinel + brz,pn $sentinel,.Lmabort1_$NUM + restore +#endif +___ + +# save tp[$NUM] ######################################################## +for($i=0; $i<14 && $i<$NUM; $i++) { +$code.=<<___; + movxtod @A[$i],@R[$i] +___ +} +$code.=<<___; +#ifdef __arch64__ + restore +#else + and %fp,$sentinel,$sentinel + restore + and $sentinel,1,%o7 + and %fp,$sentinel,$sentinel + srl %fp,0,%fp ! just in case? + or %o7,$sentinel,$sentinel + brz,a,pn $sentinel,.Lmdone_$NUM + mov 0,%i0 ! return failure +#endif +___ +for($i=0; $i<12 && $i<$NUM; $i++) { +@R[$i] =~ /%f([0-9]+)/; +my $lo = "%f".($1+1); +$code.=<<___; + st $lo,[$rp+$i*8+0] + st @R[$i],[$rp+$i*8+4] +___ +} +for(; $i<$NUM; $i++) { +my ($hi,$lo)=("%f".2*($i%4),"%f".(2*($i%4)+1)); +$code.=<<___; + fsrc2 @R[$i],$hi + st $lo,[$rp+$i*8+0] + st $hi,[$rp+$i*8+4] +___ +} +$code.=<<___; + mov 1,%i0 ! return success +.Lmdone_$NUM: + ret + restore + +.Lmabort_$NUM: + restore + restore + restore + restore + restore +.Lmabort1_$NUM: + restore + + mov 0,%i0 ! return failure + ret + restore + +.align 32 +.Lmsquare_$NUM: + save %sp,-128,%sp; or $sentinel,%fp,%fp + save %sp,-128,%sp; or $sentinel,%fp,%fp + .word 0x81b02940+$NUM-1 ! montsqr $NUM-1 + ba .Lmresume_$NUM + nop +.type bn_mul_mont_t4_$NUM, #function +.size bn_mul_mont_t4_$NUM, .-bn_mul_mont_t4_$NUM +___ +} + +for ($i=8;$i<=32;$i+=8) { + &generate_bn_mul_mont_t4($i); +} + +######################################################################## +# +sub load_ccr { +my ($ptbl,$pwr,$ccr,$skip_wr)=@_; +$code.=<<___; + srl $pwr, 2, %o4 + and $pwr, 3, %o5 + and %o4, 7, %o4 + sll %o5, 3, %o5 ! offset within first cache line + add %o5, $ptbl, $ptbl ! of the pwrtbl + or %g0, 1, %o5 + sll %o5, %o4, $ccr +___ +$code.=<<___ if (!$skip_wr); + wr $ccr, %g0, %ccr +___ +} +sub load_b_pair { +my ($pwrtbl,$B0,$B1)=@_; + +$code.=<<___; + ldx [$pwrtbl+0*32], $B0 + ldx [$pwrtbl+8*32], $B1 + ldx [$pwrtbl+1*32], %o4 + ldx [$pwrtbl+9*32], %o5 + movvs %icc, %o4, $B0 + ldx [$pwrtbl+2*32], %o4 + movvs %icc, %o5, $B1 + ldx [$pwrtbl+10*32],%o5 + move %icc, %o4, $B0 + ldx [$pwrtbl+3*32], %o4 + move %icc, %o5, $B1 + ldx [$pwrtbl+11*32],%o5 + movneg %icc, %o4, $B0 + ldx [$pwrtbl+4*32], %o4 + movneg %icc, %o5, $B1 + ldx [$pwrtbl+12*32],%o5 + movcs %xcc, %o4, $B0 + ldx [$pwrtbl+5*32],%o4 + movcs %xcc, %o5, $B1 + ldx [$pwrtbl+13*32],%o5 + movvs %xcc, %o4, $B0 + ldx [$pwrtbl+6*32], %o4 + movvs %xcc, %o5, $B1 + ldx [$pwrtbl+14*32],%o5 + move %xcc, %o4, $B0 + ldx [$pwrtbl+7*32], %o4 + move %xcc, %o5, $B1 + ldx [$pwrtbl+15*32],%o5 + movneg %xcc, %o4, $B0 + add $pwrtbl,16*32, $pwrtbl + movneg %xcc, %o5, $B1 +___ +} +sub load_b { +my ($pwrtbl,$Bi)=@_; + +$code.=<<___; + ldx [$pwrtbl+0*32], $Bi + ldx [$pwrtbl+1*32], %o4 + ldx [$pwrtbl+2*32], %o5 + movvs %icc, %o4, $Bi + ldx [$pwrtbl+3*32], %o4 + move %icc, %o5, $Bi + ldx [$pwrtbl+4*32], %o5 + movneg %icc, %o4, $Bi + ldx [$pwrtbl+5*32], %o4 + movcs %xcc, %o5, $Bi + ldx [$pwrtbl+6*32], %o5 + movvs %xcc, %o4, $Bi + ldx [$pwrtbl+7*32], %o4 + move %xcc, %o5, $Bi + add $pwrtbl,8*32, $pwrtbl + movneg %xcc, %o4, $Bi +___ +} + +######################################################################## +# int bn_pwr5_mont_t4_$NUM(u64 *tp,const u64 *np,const BN_ULONG *n0, +# const u64 *pwrtbl,int pwr,int stride); +# +sub generate_bn_pwr5_mont_t4() { +my $NUM=shift; +my ($tp,$np,$pwrtbl,$pwr,$sentinel)=map("%g$_",(1..5)); + +$code.=<<___; +.globl bn_pwr5_mont_t4_$NUM +.align 32 +bn_pwr5_mont_t4_$NUM: +#ifdef __arch64__ + mov 0,$sentinel + mov -128,%g4 +#elif defined(SPARCV9_64BIT_STACK) + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) + ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] + mov -2047,%g4 + and %g1,SPARCV9_64BIT_STACK,%g1 + movrz %g1,0,%g4 + mov -1,$sentinel + add %g4,-128,%g4 +#else + mov -1,$sentinel + mov -128,%g4 +#endif + sllx $sentinel,32,$sentinel + save %sp,%g4,%sp +#ifndef __arch64__ + save %sp,-128,%sp ! warm it up + save %sp,-128,%sp + save %sp,-128,%sp + save %sp,-128,%sp + save %sp,-128,%sp + save %sp,-128,%sp + restore + restore + restore + restore + restore + restore +#endif + and %sp,1,%g4 + or $sentinel,%fp,%fp + or %g4,$sentinel,$sentinel + + ! copy arguments to global registers + mov %i0,$tp + mov %i1,$np + ld [%i2+0],%f1 ! load *n0 + ld [%i2+4],%f0 + mov %i3,$pwrtbl + srl %i4,%g0,%i4 ! pack last arguments + sllx %i5,32,$pwr + or %i4,$pwr,$pwr + fsrc2 %f0,%f60 +___ + +# load tp[$NUM] ######################################################## +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for($i=0; $i<14 && $i<$NUM; $i++) { +$code.=<<___; + ldx [$tp+$i*8],@A[$i] +___ +} +for(; $i<$NUM; $i++) { +$code.=<<___; + ldd [$tp+$i*8],@A[$i] +___ +} +# load np[$NUM] ######################################################## +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for($i=0; $i<14 && $i<$NUM; $i++) { +$code.=<<___; + ldx [$np+$i*8],@N[$i] +___ +} +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for(; $i<28 && $i<$NUM; $i++) { +$code.=<<___; + ldx [$np+$i*8],@N[$i] +___ +} +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for(; $i<$NUM; $i++) { +$code.=<<___; + ldx [$np+$i*8],@N[$i] +___ +} +# load pwrtbl[pwr] ######################################################## +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp + + srlx $pwr, 32, %o4 ! unpack $pwr + srl $pwr, %g0, %o5 + sub %o4, 5, %o4 + mov $pwrtbl, %o7 + sllx %o4, 32, $pwr ! re-pack $pwr + or %o5, $pwr, $pwr + srl %o5, %o4, %o5 +___ + &load_ccr("%o7","%o5","%o4"); +$code.=<<___; + b .Lstride_$NUM + nop +.align 16 +.Lstride_$NUM: +___ +for($i=0; $i<14 && $i<$NUM; $i+=2) { + &load_b_pair("%o7",@B[$i],@B[$i+1]); +} +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for(; $i<$NUM; $i+=2) { + &load_b_pair("%i7",@B[$i],@B[$i+1]); +} +$code.=<<___; + srax $pwr, 32, %o4 ! unpack $pwr + srl $pwr, %g0, %o5 + sub %o4, 5, %o4 + mov $pwrtbl, %i7 + sllx %o4, 32, $pwr ! re-pack $pwr + or %o5, $pwr, $pwr + srl %o5, %o4, %o5 +___ + &load_ccr("%i7","%o5","%o4",1); + +# magic ################################################################ +for($i=0; $i<5; $i++) { +$code.=<<___; + .word 0x81b02940+$NUM-1 ! montsqr $NUM-1 + fbu,pn %fcc3,.Labort_$NUM +#ifndef __arch64__ + and %fp,$sentinel,$sentinel + brz,pn $sentinel,.Labort_$NUM +#endif + nop +___ +} +$code.=<<___; + wr %o4, %g0, %ccr + .word 0x81b02920+$NUM-1 ! montmul $NUM-1 + fbu,pn %fcc3,.Labort_$NUM +#ifndef __arch64__ + and %fp,$sentinel,$sentinel + brz,pn $sentinel,.Labort_$NUM +#endif + + srax $pwr, 32, %o4 +#ifdef __arch64__ + brgez %o4,.Lstride_$NUM + restore + restore + restore + restore + restore +#else + brgez %o4,.Lstride_$NUM + restore; and %fp,$sentinel,$sentinel + restore; and %fp,$sentinel,$sentinel + restore; and %fp,$sentinel,$sentinel + restore; and %fp,$sentinel,$sentinel + brz,pn $sentinel,.Labort1_$NUM + restore +#endif +___ + +# save tp[$NUM] ######################################################## +for($i=0; $i<14 && $i<$NUM; $i++) { +$code.=<<___; + movxtod @A[$i],@R[$i] +___ +} +$code.=<<___; +#ifdef __arch64__ + restore +#else + and %fp,$sentinel,$sentinel + restore + and $sentinel,1,%o7 + and %fp,$sentinel,$sentinel + srl %fp,0,%fp ! just in case? + or %o7,$sentinel,$sentinel + brz,a,pn $sentinel,.Ldone_$NUM + mov 0,%i0 ! return failure +#endif +___ +for($i=0; $i<$NUM; $i++) { +$code.=<<___; + std @R[$i],[$tp+$i*8] +___ +} +$code.=<<___; + mov 1,%i0 ! return success +.Ldone_$NUM: + ret + restore + +.Labort_$NUM: + restore + restore + restore + restore + restore +.Labort1_$NUM: + restore + + mov 0,%i0 ! return failure + ret + restore +.type bn_pwr5_mont_t4_$NUM, #function +.size bn_pwr5_mont_t4_$NUM, .-bn_pwr5_mont_t4_$NUM +___ +} + +for ($i=8;$i<=32;$i+=8) { + &generate_bn_pwr5_mont_t4($i); +} + +{ +######################################################################## +# Fall-back subroutines +# +# copy of bn_mul_mont_vis3 adjusted for vectors of 64-bit values +# +($n0,$m0,$m1,$lo0,$hi0, $lo1,$hi1,$aj,$alo,$nj,$nlo,$tj)= + (map("%g$_",(1..5)),map("%o$_",(0..5,7))); + +# int bn_mul_mont( +$rp="%o0"; # u64 *rp, +$ap="%o1"; # const u64 *ap, +$bp="%o2"; # const u64 *bp, +$np="%o3"; # const u64 *np, +$n0p="%o4"; # const BN_ULONG *n0, +$num="%o5"; # int num); # caller ensures that num is >=3 +$code.=<<___; +.globl bn_mul_mont_t4 +.align 32 +bn_mul_mont_t4: + add %sp, STACK_BIAS, %g4 ! real top of stack + sll $num, 3, $num ! size in bytes + add $num, 63, %g1 + andn %g1, 63, %g1 ! buffer size rounded up to 64 bytes + sub %g4, %g1, %g1 + andn %g1, 63, %g1 ! align at 64 byte + sub %g1, STACK_FRAME, %g1 ! new top of stack + sub %g1, %g4, %g1 + + save %sp, %g1, %sp +___ +# +-------------------------------+<----- %sp +# . . +# +-------------------------------+<----- aligned at 64 bytes +# | __int64 tmp[0] | +# +-------------------------------+ +# . . +# . . +# +-------------------------------+<----- aligned at 64 bytes +# . . +($rp,$ap,$bp,$np,$n0p,$num)=map("%i$_",(0..5)); +($t0,$t1,$t2,$t3,$cnt,$tp,$bufsz)=map("%l$_",(0..7)); +($ovf,$i)=($t0,$t1); +$code.=<<___; + ld [$n0p+0], $t0 ! pull n0[0..1] value + ld [$n0p+4], $t1 + add %sp, STACK_BIAS+STACK_FRAME, $tp + ldx [$bp+0], $m0 ! m0=bp[0] + sllx $t1, 32, $n0 + add $bp, 8, $bp + or $t0, $n0, $n0 + + ldx [$ap+0], $aj ! ap[0] + + mulx $aj, $m0, $lo0 ! ap[0]*bp[0] + umulxhi $aj, $m0, $hi0 + + ldx [$ap+8], $aj ! ap[1] + add $ap, 16, $ap + ldx [$np+0], $nj ! np[0] + + mulx $lo0, $n0, $m1 ! "tp[0]"*n0 + + mulx $aj, $m0, $alo ! ap[1]*bp[0] + umulxhi $aj, $m0, $aj ! ahi=aj + + mulx $nj, $m1, $lo1 ! np[0]*m1 + umulxhi $nj, $m1, $hi1 + + ldx [$np+8], $nj ! np[1] + + addcc $lo0, $lo1, $lo1 + add $np, 16, $np + addxc %g0, $hi1, $hi1 + + mulx $nj, $m1, $nlo ! np[1]*m1 + umulxhi $nj, $m1, $nj ! nhi=nj + + ba .L1st + sub $num, 24, $cnt ! cnt=num-3 + +.align 16 +.L1st: + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 + + ldx [$ap+0], $aj ! ap[j] + addcc $nlo, $hi1, $lo1 + add $ap, 8, $ap + addxc $nj, %g0, $hi1 ! nhi=nj + + ldx [$np+0], $nj ! np[j] + mulx $aj, $m0, $alo ! ap[j]*bp[0] + add $np, 8, $np + umulxhi $aj, $m0, $aj ! ahi=aj + + mulx $nj, $m1, $nlo ! np[j]*m1 + addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] + umulxhi $nj, $m1, $nj ! nhi=nj + addxc %g0, $hi1, $hi1 + stxa $lo1, [$tp]0xe2 ! tp[j-1] + add $tp, 8, $tp ! tp++ + + brnz,pt $cnt, .L1st + sub $cnt, 8, $cnt ! j-- +!.L1st + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 ! ahi=aj + + addcc $nlo, $hi1, $lo1 + addxc $nj, %g0, $hi1 + addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] + addxc %g0, $hi1, $hi1 + stxa $lo1, [$tp]0xe2 ! tp[j-1] + add $tp, 8, $tp + + addcc $hi0, $hi1, $hi1 + addxc %g0, %g0, $ovf ! upmost overflow bit + stxa $hi1, [$tp]0xe2 + add $tp, 8, $tp + + ba .Louter + sub $num, 16, $i ! i=num-2 + +.align 16 +.Louter: + ldx [$bp+0], $m0 ! m0=bp[i] + add $bp, 8, $bp + + sub $ap, $num, $ap ! rewind + sub $np, $num, $np + sub $tp, $num, $tp + + ldx [$ap+0], $aj ! ap[0] + ldx [$np+0], $nj ! np[0] + + mulx $aj, $m0, $lo0 ! ap[0]*bp[i] + ldx [$tp], $tj ! tp[0] + umulxhi $aj, $m0, $hi0 + ldx [$ap+8], $aj ! ap[1] + addcc $lo0, $tj, $lo0 ! ap[0]*bp[i]+tp[0] + mulx $aj, $m0, $alo ! ap[1]*bp[i] + addxc %g0, $hi0, $hi0 + mulx $lo0, $n0, $m1 ! tp[0]*n0 + umulxhi $aj, $m0, $aj ! ahi=aj + mulx $nj, $m1, $lo1 ! np[0]*m1 + add $ap, 16, $ap + umulxhi $nj, $m1, $hi1 + ldx [$np+8], $nj ! np[1] + add $np, 16, $np + addcc $lo1, $lo0, $lo1 + mulx $nj, $m1, $nlo ! np[1]*m1 + addxc %g0, $hi1, $hi1 + umulxhi $nj, $m1, $nj ! nhi=nj + + ba .Linner + sub $num, 24, $cnt ! cnt=num-3 +.align 16 +.Linner: + addcc $alo, $hi0, $lo0 + ldx [$tp+8], $tj ! tp[j] + addxc $aj, %g0, $hi0 ! ahi=aj + ldx [$ap+0], $aj ! ap[j] + add $ap, 8, $ap + addcc $nlo, $hi1, $lo1 + mulx $aj, $m0, $alo ! ap[j]*bp[i] + addxc $nj, %g0, $hi1 ! nhi=nj + ldx [$np+0], $nj ! np[j] + add $np, 8, $np + umulxhi $aj, $m0, $aj ! ahi=aj + addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j] + mulx $nj, $m1, $nlo ! np[j]*m1 + addxc %g0, $hi0, $hi0 + umulxhi $nj, $m1, $nj ! nhi=nj + addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j] + addxc %g0, $hi1, $hi1 + stx $lo1, [$tp] ! tp[j-1] + add $tp, 8, $tp + brnz,pt $cnt, .Linner + sub $cnt, 8, $cnt +!.Linner + ldx [$tp+8], $tj ! tp[j] + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 ! ahi=aj + addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j] + addxc %g0, $hi0, $hi0 + + addcc $nlo, $hi1, $lo1 + addxc $nj, %g0, $hi1 ! nhi=nj + addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j] + addxc %g0, $hi1, $hi1 + stx $lo1, [$tp] ! tp[j-1] + + subcc %g0, $ovf, %g0 ! move upmost overflow to CCR.xcc + addxccc $hi1, $hi0, $hi1 + addxc %g0, %g0, $ovf + stx $hi1, [$tp+8] + add $tp, 16, $tp + + brnz,pt $i, .Louter + sub $i, 8, $i + + sub $ap, $num, $ap ! rewind + sub $np, $num, $np + sub $tp, $num, $tp + ba .Lsub + subcc $num, 8, $cnt ! cnt=num-1 and clear CCR.xcc + +.align 16 +.Lsub: + ldx [$tp], $tj + add $tp, 8, $tp + ldx [$np+0], $nj + add $np, 8, $np + subccc $tj, $nj, $t2 ! tp[j]-np[j] + srlx $tj, 32, $tj + srlx $nj, 32, $nj + subccc $tj, $nj, $t3 + add $rp, 8, $rp + st $t2, [$rp-4] ! reverse order + st $t3, [$rp-8] + brnz,pt $cnt, .Lsub + sub $cnt, 8, $cnt + + sub $np, $num, $np ! rewind + sub $tp, $num, $tp + sub $rp, $num, $rp + + subc $ovf, %g0, $ovf ! handle upmost overflow bit + and $tp, $ovf, $ap + andn $rp, $ovf, $np + or $np, $ap, $ap ! ap=borrow?tp:rp + ba .Lcopy + sub $num, 8, $cnt + +.align 16 +.Lcopy: ! copy or in-place refresh + ldx [$ap+0], $t2 + add $ap, 8, $ap + stx %g0, [$tp] ! zap + add $tp, 8, $tp + stx $t2, [$rp+0] + add $rp, 8, $rp + brnz $cnt, .Lcopy + sub $cnt, 8, $cnt + + mov 1, %o0 + ret + restore +.type bn_mul_mont_t4, #function +.size bn_mul_mont_t4, .-bn_mul_mont_t4 +___ + +# int bn_mul_mont_gather5( +$rp="%o0"; # u64 *rp, +$ap="%o1"; # const u64 *ap, +$bp="%o2"; # const u64 *pwrtbl, +$np="%o3"; # const u64 *np, +$n0p="%o4"; # const BN_ULONG *n0, +$num="%o5"; # int num, # caller ensures that num is >=3 + # int power); +$code.=<<___; +.globl bn_mul_mont_gather5_t4 +.align 32 +bn_mul_mont_gather5_t4: + add %sp, STACK_BIAS, %g4 ! real top of stack + sll $num, 3, $num ! size in bytes + add $num, 63, %g1 + andn %g1, 63, %g1 ! buffer size rounded up to 64 bytes + sub %g4, %g1, %g1 + andn %g1, 63, %g1 ! align at 64 byte + sub %g1, STACK_FRAME, %g1 ! new top of stack + sub %g1, %g4, %g1 + LDPTR [%sp+STACK_7thARG], %g4 ! load power, 7th argument + + save %sp, %g1, %sp +___ +# +-------------------------------+<----- %sp +# . . +# +-------------------------------+<----- aligned at 64 bytes +# | __int64 tmp[0] | +# +-------------------------------+ +# . . +# . . +# +-------------------------------+<----- aligned at 64 bytes +# . . +($rp,$ap,$bp,$np,$n0p,$num)=map("%i$_",(0..5)); +($t0,$t1,$t2,$t3,$cnt,$tp,$bufsz,$ccr)=map("%l$_",(0..7)); +($ovf,$i)=($t0,$t1); + &load_ccr($bp,"%g4",$ccr); + &load_b($bp,$m0,"%o7"); # m0=bp[0] + +$code.=<<___; + ld [$n0p+0], $t0 ! pull n0[0..1] value + ld [$n0p+4], $t1 + add %sp, STACK_BIAS+STACK_FRAME, $tp + sllx $t1, 32, $n0 + or $t0, $n0, $n0 + + ldx [$ap+0], $aj ! ap[0] + + mulx $aj, $m0, $lo0 ! ap[0]*bp[0] + umulxhi $aj, $m0, $hi0 + + ldx [$ap+8], $aj ! ap[1] + add $ap, 16, $ap + ldx [$np+0], $nj ! np[0] + + mulx $lo0, $n0, $m1 ! "tp[0]"*n0 + + mulx $aj, $m0, $alo ! ap[1]*bp[0] + umulxhi $aj, $m0, $aj ! ahi=aj + + mulx $nj, $m1, $lo1 ! np[0]*m1 + umulxhi $nj, $m1, $hi1 + + ldx [$np+8], $nj ! np[1] + + addcc $lo0, $lo1, $lo1 + add $np, 16, $np + addxc %g0, $hi1, $hi1 + + mulx $nj, $m1, $nlo ! np[1]*m1 + umulxhi $nj, $m1, $nj ! nhi=nj + + ba .L1st_g5 + sub $num, 24, $cnt ! cnt=num-3 + +.align 16 +.L1st_g5: + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 + + ldx [$ap+0], $aj ! ap[j] + addcc $nlo, $hi1, $lo1 + add $ap, 8, $ap + addxc $nj, %g0, $hi1 ! nhi=nj + + ldx [$np+0], $nj ! np[j] + mulx $aj, $m0, $alo ! ap[j]*bp[0] + add $np, 8, $np + umulxhi $aj, $m0, $aj ! ahi=aj + + mulx $nj, $m1, $nlo ! np[j]*m1 + addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] + umulxhi $nj, $m1, $nj ! nhi=nj + addxc %g0, $hi1, $hi1 + stxa $lo1, [$tp]0xe2 ! tp[j-1] + add $tp, 8, $tp ! tp++ + + brnz,pt $cnt, .L1st_g5 + sub $cnt, 8, $cnt ! j-- +!.L1st_g5 + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 ! ahi=aj + + addcc $nlo, $hi1, $lo1 + addxc $nj, %g0, $hi1 + addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] + addxc %g0, $hi1, $hi1 + stxa $lo1, [$tp]0xe2 ! tp[j-1] + add $tp, 8, $tp + + addcc $hi0, $hi1, $hi1 + addxc %g0, %g0, $ovf ! upmost overflow bit + stxa $hi1, [$tp]0xe2 + add $tp, 8, $tp + + ba .Louter_g5 + sub $num, 16, $i ! i=num-2 + +.align 16 +.Louter_g5: + wr $ccr, %g0, %ccr +___ + &load_b($bp,$m0); # m0=bp[i] +$code.=<<___; + sub $ap, $num, $ap ! rewind + sub $np, $num, $np + sub $tp, $num, $tp + + ldx [$ap+0], $aj ! ap[0] + ldx [$np+0], $nj ! np[0] + + mulx $aj, $m0, $lo0 ! ap[0]*bp[i] + ldx [$tp], $tj ! tp[0] + umulxhi $aj, $m0, $hi0 + ldx [$ap+8], $aj ! ap[1] + addcc $lo0, $tj, $lo0 ! ap[0]*bp[i]+tp[0] + mulx $aj, $m0, $alo ! ap[1]*bp[i] + addxc %g0, $hi0, $hi0 + mulx $lo0, $n0, $m1 ! tp[0]*n0 + umulxhi $aj, $m0, $aj ! ahi=aj + mulx $nj, $m1, $lo1 ! np[0]*m1 + add $ap, 16, $ap + umulxhi $nj, $m1, $hi1 + ldx [$np+8], $nj ! np[1] + add $np, 16, $np + addcc $lo1, $lo0, $lo1 + mulx $nj, $m1, $nlo ! np[1]*m1 + addxc %g0, $hi1, $hi1 + umulxhi $nj, $m1, $nj ! nhi=nj + + ba .Linner_g5 + sub $num, 24, $cnt ! cnt=num-3 +.align 16 +.Linner_g5: + addcc $alo, $hi0, $lo0 + ldx [$tp+8], $tj ! tp[j] + addxc $aj, %g0, $hi0 ! ahi=aj + ldx [$ap+0], $aj ! ap[j] + add $ap, 8, $ap + addcc $nlo, $hi1, $lo1 + mulx $aj, $m0, $alo ! ap[j]*bp[i] + addxc $nj, %g0, $hi1 ! nhi=nj + ldx [$np+0], $nj ! np[j] + add $np, 8, $np + umulxhi $aj, $m0, $aj ! ahi=aj + addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j] + mulx $nj, $m1, $nlo ! np[j]*m1 + addxc %g0, $hi0, $hi0 + umulxhi $nj, $m1, $nj ! nhi=nj + addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j] + addxc %g0, $hi1, $hi1 + stx $lo1, [$tp] ! tp[j-1] + add $tp, 8, $tp + brnz,pt $cnt, .Linner_g5 + sub $cnt, 8, $cnt +!.Linner_g5 + ldx [$tp+8], $tj ! tp[j] + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 ! ahi=aj + addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j] + addxc %g0, $hi0, $hi0 + + addcc $nlo, $hi1, $lo1 + addxc $nj, %g0, $hi1 ! nhi=nj + addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j] + addxc %g0, $hi1, $hi1 + stx $lo1, [$tp] ! tp[j-1] + + subcc %g0, $ovf, %g0 ! move upmost overflow to CCR.xcc + addxccc $hi1, $hi0, $hi1 + addxc %g0, %g0, $ovf + stx $hi1, [$tp+8] + add $tp, 16, $tp + + brnz,pt $i, .Louter_g5 + sub $i, 8, $i + + sub $ap, $num, $ap ! rewind + sub $np, $num, $np + sub $tp, $num, $tp + ba .Lsub_g5 + subcc $num, 8, $cnt ! cnt=num-1 and clear CCR.xcc + +.align 16 +.Lsub_g5: + ldx [$tp], $tj + add $tp, 8, $tp + ldx [$np+0], $nj + add $np, 8, $np + subccc $tj, $nj, $t2 ! tp[j]-np[j] + srlx $tj, 32, $tj + srlx $nj, 32, $nj + subccc $tj, $nj, $t3 + add $rp, 8, $rp + st $t2, [$rp-4] ! reverse order + st $t3, [$rp-8] + brnz,pt $cnt, .Lsub_g5 + sub $cnt, 8, $cnt + + sub $np, $num, $np ! rewind + sub $tp, $num, $tp + sub $rp, $num, $rp + + subc $ovf, %g0, $ovf ! handle upmost overflow bit + and $tp, $ovf, $ap + andn $rp, $ovf, $np + or $np, $ap, $ap ! ap=borrow?tp:rp + ba .Lcopy_g5 + sub $num, 8, $cnt + +.align 16 +.Lcopy_g5: ! copy or in-place refresh + ldx [$ap+0], $t2 + add $ap, 8, $ap + stx %g0, [$tp] ! zap + add $tp, 8, $tp + stx $t2, [$rp+0] + add $rp, 8, $rp + brnz $cnt, .Lcopy_g5 + sub $cnt, 8, $cnt + + mov 1, %o0 + ret + restore +.type bn_mul_mont_gather5_t4, #function +.size bn_mul_mont_gather5_t4, .-bn_mul_mont_gather5_t4 +___ +} + +$code.=<<___; +.globl bn_flip_t4 +.align 32 +bn_flip_t4: +.Loop_flip: + ld [%o1+0], %o4 + sub %o2, 1, %o2 + ld [%o1+4], %o5 + add %o1, 8, %o1 + st %o5, [%o0+0] + st %o4, [%o0+4] + brnz %o2, .Loop_flip + add %o0, 8, %o0 + retl + nop +.type bn_flip_t4, #function +.size bn_flip_t4, .-bn_flip_t4 + +.globl bn_flip_n_scatter5_t4 +.align 32 +bn_flip_n_scatter5_t4: + sll %o3, 3, %o3 + srl %o1, 1, %o1 + add %o3, %o2, %o2 ! &pwrtbl[pwr] + sub %o1, 1, %o1 +.Loop_flip_n_scatter5: + ld [%o0+0], %o4 ! inp[i] + ld [%o0+4], %o5 + add %o0, 8, %o0 + sllx %o5, 32, %o5 + or %o4, %o5, %o5 + stx %o5, [%o2] + add %o2, 32*8, %o2 + brnz %o1, .Loop_flip_n_scatter5 + sub %o1, 1, %o1 + retl + nop +.type bn_flip_n_scatter5_t4, #function +.size bn_flip_n_scatter5_t4, .-bn_flip_n_scatter5_t4 + +.globl bn_gather5_t4 +.align 32 +bn_gather5_t4: +___ + &load_ccr("%o2","%o3","%g1"); +$code.=<<___; + sub %o1, 1, %o1 +.Loop_gather5: +___ + &load_b("%o2","%g1"); +$code.=<<___; + stx %g1, [%o0] + add %o0, 8, %o0 + brnz %o1, .Loop_gather5 + sub %o1, 1, %o1 + + retl + nop +.type bn_gather5_t4, #function +.size bn_gather5_t4, .-bn_gather5_t4 + +.asciz "Montgomery Multiplication for SPARC T4, David S. Miller, Andy Polyakov" +.align 4 +___ + +&emit_assembler(); + +close STDOUT; diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/inline-t4/sparcv9-gf2m.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/components/openssl/openssl-1.0.1/inline-t4/sparcv9-gf2m.pl Mon Jul 08 17:50:18 2013 -0700 @@ -0,0 +1,198 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# October 2012 +# +# The module implements bn_GF2m_mul_2x2 polynomial multiplication used +# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for +# the time being... Except that it has two code paths: one suitable +# for all SPARCv9 processors and one for VIS3-capable ones. Former +# delivers ~25-45% more, more for longer keys, heaviest DH and DSA +# verify operations on venerable UltraSPARC II. On T4 VIS3 code is +# ~100-230% faster than gcc-generated code and ~35-90% faster than +# the pure SPARCv9 code path. + +$bits=32; +for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } +if ($bits==64) { $bias=2047; $frame=192; } +else { $bias=0; $frame=112; } + +$locals=16*8; + +$code.=<<___; +#include + +.section ".text",#alloc,#execinstr +___ +$code.=<<___ if ($bits==64); +.register %g2,#scratch +.register %g3,#scratch +___ + +$tab="%l0"; + +@T=("%g2","%g3"); +@i=("%g4","%g5"); + +($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5)); +($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo; + +$code.=<<___; +#ifdef __PIC__ +SPARC_PIC_THUNK(%g1) +#endif + +.globl bn_GF2m_mul_2x2 +.align 16 +bn_GF2m_mul_2x2: + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) + ld [%g1+0],%g1 ! OPENSSL_sparcv9cap_P[0] + + andcc %g1, SPARCV9_VIS3, %g0 + bz,pn %icc,.Lsoftware + nop + + sllx %o1, 32, %o1 + sllx %o3, 32, %o3 + or %o2, %o1, %o1 + or %o4, %o3, %o3 + .word 0x95b262ab ! xmulx %o1, %o3, %o2 + .word 0x99b262cb ! xmulxhi %o1, %o3, %o4 + srlx %o2, 32, %o1 ! 13 cycles later + st %o2, [%o0+0] + st %o1, [%o0+4] + srlx %o4, 32, %o3 + st %o4, [%o0+8] + retl + st %o3, [%o0+12] + +.align 16 +.Lsoftware: + save %sp,-$frame-$locals,%sp + + sllx %i1,32,$a + mov -1,$a12 + sllx %i3,32,$b + or %i2,$a,$a + srlx $a12,1,$a48 ! 0x7fff... + or %i4,$b,$b + srlx $a12,2,$a12 ! 0x3fff... + add %sp,$bias+$frame,$tab + + sllx $a,2,$a4 + mov $a,$a1 + sllx $a,1,$a2 + + srax $a4,63,@i[1] ! broadcast 61st bit + and $a48,$a4,$a4 ! (a<<2)&0x7fff... + srlx $a48,2,$a48 + srax $a2,63,@i[0] ! broadcast 62nd bit + and $a12,$a2,$a2 ! (a<<1)&0x3fff... + srax $a1,63,$lo ! broadcast 63rd bit + and $a48,$a1,$a1 ! (a<<0)&0x1fff... + + sllx $a1,3,$a8 + and $b,$lo,$lo + and $b,@i[0],@i[0] + and $b,@i[1],@i[1] + + stx %g0,[$tab+0*8] ! tab[0]=0 + xor $a1,$a2,$a12 + stx $a1,[$tab+1*8] ! tab[1]=a1 + stx $a2,[$tab+2*8] ! tab[2]=a2 + xor $a4,$a8,$a48 + stx $a12,[$tab+3*8] ! tab[3]=a1^a2 + xor $a4,$a1,$a1 + + stx $a4,[$tab+4*8] ! tab[4]=a4 + xor $a4,$a2,$a2 + stx $a1,[$tab+5*8] ! tab[5]=a1^a4 + xor $a4,$a12,$a12 + stx $a2,[$tab+6*8] ! tab[6]=a2^a4 + xor $a48,$a1,$a1 + stx $a12,[$tab+7*8] ! tab[7]=a1^a2^a4 + xor $a48,$a2,$a2 + + stx $a8,[$tab+8*8] ! tab[8]=a8 + xor $a48,$a12,$a12 + stx $a1,[$tab+9*8] ! tab[9]=a1^a8 + xor $a4,$a1,$a1 + stx $a2,[$tab+10*8] ! tab[10]=a2^a8 + xor $a4,$a2,$a2 + stx $a12,[$tab+11*8] ! tab[11]=a1^a2^a8 + + xor $a4,$a12,$a12 + stx $a48,[$tab+12*8] ! tab[12]=a4^a8 + srlx $lo,1,$hi + stx $a1,[$tab+13*8] ! tab[13]=a1^a4^a8 + sllx $lo,63,$lo + stx $a2,[$tab+14*8] ! tab[14]=a2^a4^a8 + srlx @i[0],2,@T[0] + stx $a12,[$tab+15*8] ! tab[15]=a1^a2^a4^a8 + + sllx @i[0],62,$a1 + sllx $b,3,@i[0] + srlx @i[1],3,@T[1] + and @i[0],`0xf<<3`,@i[0] + sllx @i[1],61,$a2 + ldx [$tab+@i[0]],@i[0] + srlx $b,4-3,@i[1] + xor @T[0],$hi,$hi + and @i[1],`0xf<<3`,@i[1] + xor $a1,$lo,$lo + ldx [$tab+@i[1]],@i[1] + xor @T[1],$hi,$hi + + xor @i[0],$lo,$lo + srlx $b,8-3,@i[0] + xor $a2,$lo,$lo + and @i[0],`0xf<<3`,@i[0] +___ +for($n=1;$n<14;$n++) { +$code.=<<___; + sllx @i[1],`$n*4`,@T[0] + ldx [$tab+@i[0]],@i[0] + srlx @i[1],`64-$n*4`,@T[1] + xor @T[0],$lo,$lo + srlx $b,`($n+2)*4`-3,@i[1] + xor @T[1],$hi,$hi + and @i[1],`0xf<<3`,@i[1] +___ + push(@i,shift(@i)); push(@T,shift(@T)); +} +$code.=<<___; + sllx @i[1],`$n*4`,@T[0] + ldx [$tab+@i[0]],@i[0] + srlx @i[1],`64-$n*4`,@T[1] + xor @T[0],$lo,$lo + + sllx @i[0],`($n+1)*4`,@T[0] + xor @T[1],$hi,$hi + srlx @i[0],`64-($n+1)*4`,@T[1] + xor @T[0],$lo,$lo + xor @T[1],$hi,$hi + + srlx $lo,32,%i1 + st $lo,[%i0+0] + st %i1,[%i0+4] + srlx $hi,32,%i2 + st $hi,[%i0+8] + st %i2,[%i0+12] + + ret + restore +.type bn_GF2m_mul_2x2,#function +.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 +.asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by " +.align 4 +___ + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; +print $code; +close STDOUT; diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/inline-t4/vis3-mont.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/components/openssl/openssl-1.0.1/inline-t4/vis3-mont.pl Mon Jul 08 17:50:18 2013 -0700 @@ -0,0 +1,373 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# October 2012. +# +# SPARCv9 VIS3 Montgomery multiplicaion procedure suitable for T3 and +# onward. There are three new instructions used here: umulxhi, +# addxc[cc] and initializing store. On T3 RSA private key operations +# are 1.54/1.87/2.11/2.26 times faster for 512/1024/2048/4096-bit key +# lengths. This is without dedicated squaring procedure. On T4 +# corresponding coefficients are 1.47/2.10/2.80/2.90x, which is mostly +# for reference purposes, because T4 has dedicated Montgomery +# multiplication and squaring *instructions* that deliver even more. + +$bits=32; +for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } +if ($bits==64) { $bias=2047; $frame=192; } +else { $bias=0; $frame=112; } + +$code.=<<___ if ($bits==64); +.register %g2,#scratch +.register %g3,#scratch +___ +$code.=<<___; +.section ".text",#alloc,#execinstr +___ + +($n0,$m0,$m1,$lo0,$hi0, $lo1,$hi1,$aj,$alo,$nj,$nlo,$tj)= + (map("%g$_",(1..5)),map("%o$_",(0..5,7))); + +# int bn_mul_mont( +$rp="%o0"; # BN_ULONG *rp, +$ap="%o1"; # const BN_ULONG *ap, +$bp="%o2"; # const BN_ULONG *bp, +$np="%o3"; # const BN_ULONG *np, +$n0p="%o4"; # const BN_ULONG *n0, +$num="%o5"; # int num); # caller ensures that num is even + # and >=6 +$code.=<<___; +.globl bn_mul_mont_vis3 +.align 32 +bn_mul_mont_vis3: + add %sp, $bias, %g4 ! real top of stack + sll $num, 2, $num ! size in bytes + add $num, 63, %g5 + andn %g5, 63, %g5 ! buffer size rounded up to 64 bytes + add %g5, %g5, %g1 + add %g5, %g1, %g1 ! 3*buffer size + sub %g4, %g1, %g1 + andn %g1, 63, %g1 ! align at 64 byte + sub %g1, $frame, %g1 ! new top of stack + sub %g1, %g4, %g1 + + save %sp, %g1, %sp +___ + +# +-------------------------------+<----- %sp +# . . +# +-------------------------------+<----- aligned at 64 bytes +# | __int64 tmp[0] | +# +-------------------------------+ +# . . +# . . +# +-------------------------------+<----- aligned at 64 bytes +# | __int64 ap[1..0] | converted ap[] +# +-------------------------------+ +# | __int64 np[1..0] | converted np[] +# +-------------------------------+ +# | __int64 ap[3..2] | +# . . +# . . +# +-------------------------------+ +($rp,$ap,$bp,$np,$n0p,$num)=map("%i$_",(0..5)); +($t0,$t1,$t2,$t3,$cnt,$tp,$bufsz,$anp)=map("%l$_",(0..7)); +($ovf,$i)=($t0,$t1); +$code.=<<___; + ld [$n0p+0], $t0 ! pull n0[0..1] value + add %sp, $bias+$frame, $tp + ld [$n0p+4], $t1 + add $tp, %g5, $anp + ld [$bp+0], $t2 ! m0=bp[0] + sllx $t1, 32, $n0 + ld [$bp+4], $t3 + or $t0, $n0, $n0 + add $bp, 8, $bp + + ld [$ap+0], $t0 ! ap[0] + sllx $t3, 32, $m0 + ld [$ap+4], $t1 + or $t2, $m0, $m0 + + ld [$ap+8], $t2 ! ap[1] + sllx $t1, 32, $aj + ld [$ap+12], $t3 + or $t0, $aj, $aj + add $ap, 16, $ap + stxa $aj, [$anp]0xe2 ! converted ap[0] + + mulx $aj, $m0, $lo0 ! ap[0]*bp[0] + umulxhi $aj, $m0, $hi0 + + ld [$np+0], $t0 ! np[0] + sllx $t3, 32, $aj + ld [$np+4], $t1 + or $t2, $aj, $aj + + ld [$np+8], $t2 ! np[1] + sllx $t1, 32, $nj + ld [$np+12], $t3 + or $t0, $nj, $nj + add $np, 16, $np + stx $nj, [$anp+8] ! converted np[0] + + mulx $lo0, $n0, $m1 ! "tp[0]"*n0 + stx $aj, [$anp+16] ! converted ap[1] + + mulx $aj, $m0, $alo ! ap[1]*bp[0] + umulxhi $aj, $m0, $aj ! ahi=aj + + mulx $nj, $m1, $lo1 ! np[0]*m1 + umulxhi $nj, $m1, $hi1 + + sllx $t3, 32, $nj + or $t2, $nj, $nj + stx $nj, [$anp+24] ! converted np[1] + add $anp, 32, $anp + + addcc $lo0, $lo1, $lo1 + addxc %g0, $hi1, $hi1 + + mulx $nj, $m1, $nlo ! np[1]*m1 + umulxhi $nj, $m1, $nj ! nhi=nj + + ba .L1st + sub $num, 24, $cnt ! cnt=num-3 + +.align 16 +.L1st: + ld [$ap+0], $t0 ! ap[j] + addcc $alo, $hi0, $lo0 + ld [$ap+4], $t1 + addxc $aj, %g0, $hi0 + + sllx $t1, 32, $aj + add $ap, 8, $ap + or $t0, $aj, $aj + stxa $aj, [$anp]0xe2 ! converted ap[j] + + ld [$np+0], $t2 ! np[j] + addcc $nlo, $hi1, $lo1 + ld [$np+4], $t3 + addxc $nj, %g0, $hi1 ! nhi=nj + + sllx $t3, 32, $nj + add $np, 8, $np + mulx $aj, $m0, $alo ! ap[j]*bp[0] + or $t2, $nj, $nj + umulxhi $aj, $m0, $aj ! ahi=aj + stx $nj, [$anp+8] ! converted np[j] + add $anp, 16, $anp ! anp++ + + mulx $nj, $m1, $nlo ! np[j]*m1 + addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] + umulxhi $nj, $m1, $nj ! nhi=nj + addxc %g0, $hi1, $hi1 + stxa $lo1, [$tp]0xe2 ! tp[j-1] + add $tp, 8, $tp ! tp++ + + brnz,pt $cnt, .L1st + sub $cnt, 8, $cnt ! j-- +!.L1st + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 ! ahi=aj + + addcc $nlo, $hi1, $lo1 + addxc $nj, %g0, $hi1 + addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] + addxc %g0, $hi1, $hi1 + stxa $lo1, [$tp]0xe2 ! tp[j-1] + add $tp, 8, $tp + + addcc $hi0, $hi1, $hi1 + addxc %g0, %g0, $ovf ! upmost overflow bit + stxa $hi1, [$tp]0xe2 + add $tp, 8, $tp + + ba .Louter + sub $num, 16, $i ! i=num-2 + +.align 16 +.Louter: + ld [$bp+0], $t2 ! m0=bp[i] + ld [$bp+4], $t3 + + sub $anp, $num, $anp ! rewind + sub $tp, $num, $tp + sub $anp, $num, $anp + + add $bp, 8, $bp + sllx $t3, 32, $m0 + ldx [$anp+0], $aj ! ap[0] + or $t2, $m0, $m0 + ldx [$anp+8], $nj ! np[0] + + mulx $aj, $m0, $lo0 ! ap[0]*bp[i] + ldx [$tp], $tj ! tp[0] + umulxhi $aj, $m0, $hi0 + ldx [$anp+16], $aj ! ap[1] + addcc $lo0, $tj, $lo0 ! ap[0]*bp[i]+tp[0] + mulx $aj, $m0, $alo ! ap[1]*bp[i] + addxc %g0, $hi0, $hi0 + mulx $lo0, $n0, $m1 ! tp[0]*n0 + umulxhi $aj, $m0, $aj ! ahi=aj + mulx $nj, $m1, $lo1 ! np[0]*m1 + umulxhi $nj, $m1, $hi1 + ldx [$anp+24], $nj ! np[1] + add $anp, 32, $anp + addcc $lo1, $lo0, $lo1 + mulx $nj, $m1, $nlo ! np[1]*m1 + addxc %g0, $hi1, $hi1 + umulxhi $nj, $m1, $nj ! nhi=nj + + ba .Linner + sub $num, 24, $cnt ! cnt=num-3 +.align 16 +.Linner: + addcc $alo, $hi0, $lo0 + ldx [$tp+8], $tj ! tp[j] + addxc $aj, %g0, $hi0 ! ahi=aj + ldx [$anp+0], $aj ! ap[j] + addcc $nlo, $hi1, $lo1 + mulx $aj, $m0, $alo ! ap[j]*bp[i] + addxc $nj, %g0, $hi1 ! nhi=nj + ldx [$anp+8], $nj ! np[j] + add $anp, 16, $anp + umulxhi $aj, $m0, $aj ! ahi=aj + addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j] + mulx $nj, $m1, $nlo ! np[j]*m1 + addxc %g0, $hi0, $hi0 + umulxhi $nj, $m1, $nj ! nhi=nj + addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j] + addxc %g0, $hi1, $hi1 + stx $lo1, [$tp] ! tp[j-1] + add $tp, 8, $tp + brnz,pt $cnt, .Linner + sub $cnt, 8, $cnt +!.Linner + ldx [$tp+8], $tj ! tp[j] + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 ! ahi=aj + addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j] + addxc %g0, $hi0, $hi0 + + addcc $nlo, $hi1, $lo1 + addxc $nj, %g0, $hi1 ! nhi=nj + addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j] + addxc %g0, $hi1, $hi1 + stx $lo1, [$tp] ! tp[j-1] + + subcc %g0, $ovf, %g0 ! move upmost overflow to CCR.xcc + addxccc $hi1, $hi0, $hi1 + addxc %g0, %g0, $ovf + stx $hi1, [$tp+8] + add $tp, 16, $tp + + brnz,pt $i, .Louter + sub $i, 8, $i + + sub $anp, $num, $anp ! rewind + sub $tp, $num, $tp + sub $anp, $num, $anp + ba .Lsub + subcc $num, 8, $cnt ! cnt=num-1 and clear CCR.xcc + +.align 16 +.Lsub: + ldx [$tp], $tj + add $tp, 8, $tp + ldx [$anp+8], $nj + add $anp, 16, $anp + subccc $tj, $nj, $t2 ! tp[j]-np[j] + srlx $tj, 32, $tj + srlx $nj, 32, $nj + subccc $tj, $nj, $t3 + add $rp, 8, $rp + st $t2, [$rp-4] ! reverse order + st $t3, [$rp-8] + brnz,pt $cnt, .Lsub + sub $cnt, 8, $cnt + + sub $anp, $num, $anp ! rewind + sub $tp, $num, $tp + sub $anp, $num, $anp + sub $rp, $num, $rp + + subc $ovf, %g0, $ovf ! handle upmost overflow bit + and $tp, $ovf, $ap + andn $rp, $ovf, $np + or $np, $ap, $ap ! ap=borrow?tp:rp + ba .Lcopy + sub $num, 8, $cnt + +.align 16 +.Lcopy: ! copy or in-place refresh + ld [$ap+0], $t2 + ld [$ap+4], $t3 + add $ap, 8, $ap + stx %g0, [$tp] ! zap + add $tp, 8, $tp + stx %g0, [$anp] ! zap + stx %g0, [$anp+8] + add $anp, 16, $anp + st $t3, [$rp+0] ! flip order + st $t2, [$rp+4] + add $rp, 8, $rp + brnz $cnt, .Lcopy + sub $cnt, 8, $cnt + + mov 1, %o0 + ret + restore +.type bn_mul_mont_vis3, #function +.size bn_mul_mont_vis3, .-bn_mul_mont_vis3 +.asciz "Montgomery Multiplication for SPARCv9 VIS3, CRYPTOGAMS by " +.align 4 +___ + +# Purpose of these subroutines is to explicitly encode VIS instructions, +# so that one can compile the module without having to specify VIS +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# Idea is to reserve for option to produce "universal" binary and let +# programmer detect if current CPU is VIS capable at run-time. +sub unvis3 { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); +my ($ref,$opf); +my %visopf = ( "addxc" => 0x011, + "addxccc" => 0x013, + "umulxhi" => 0x016 ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if ($opf=$visopf{$mnemonic}) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%([goli])([0-9])/); + $_=$bias{$1}+$2; + } + + return sprintf ".word\t0x%08x !%s", + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + s/\b(umulxhi|addxc[c]{0,2})\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ + &unvis3($1,$2,$3,$4) + /ge; + + print $_,"\n"; +} + +close STDOUT; diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/patches/openssl-1.0.1e-t4-engine.sparc-patch --- a/components/openssl/openssl-1.0.1/patches/openssl-1.0.1e-t4-engine.sparc-patch Mon Jul 08 16:18:46 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,184 +0,0 @@ -# -# Engine t4 patch openssl-1.0.0d-t4-engine.sparc-patch. -# This engine is SPARC-only. -# -Index: Configure -=================================================================== -diff -ru openssl-1.0.0d/Configure openssl-1.0.0d/Configure ---- openssl-1.0.0d/Configure 2011-05-24 17:02:24.000000000 -0700 -+++ openssl-1.0.0d/Configure 2011-07-27 10:48:17.817470000 -0700 -@@ -135,8 +135,9 @@ - - my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:"; - my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; --my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; --my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; -+my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o t4_md5.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o t4_sha1.o t4_sha2.o:::::::ghash-sparcv9.o::void"; -+my $sparcv9_fips_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o t4_md5.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o t4_sha1.o t4_sha2.o:::::::ghash-sparcv9.o::void"; -+my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:t4_aes.o::t4_md5.o:t4_sha1.o t4_sha2.o:::::::void"; - my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; - my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::"; - my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; -@@ -264,9 +264,11 @@ - # - "solaris64-x86_64-cc-sunw","cc:-xO3 -m64 -xstrconst -Xa -DL_ENDIAN::-D_REENTRANT::-lsocket -lnsl -lc:SIXTY_FOUR_BIT_LONG RC4_CHUNK BF_PTR DES_PTR DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:solaris-shared:-KPIC:-m64 -G -dy -z text -zdefs -Bdirect -zignore -M/usr/lib/ld/map.pagealign -M/usr/lib/ld/map.noexdata:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", - # --"solaris-sparcv9-cc-sunw","cc:-xtarget=ultra -m32 -Qoption cg -xregs=no%appl -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -lc:BN_LLONG RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:solaris-shared:-KPIC:-m32 -G -dy -z text -zdefs -Bdirect -zignore -M/usr/lib/ld/map.pagealign:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -+"solaris-sparcv9-cc-sunw","cc:-xtarget=ultra -m32 -Qoption cg -xregs=no%appl -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -lc -lsoftcrypto:BN_LLONG RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:solaris-shared:-KPIC:-m32 -G -dy -z text -zdefs -Bdirect -zignore -M/usr/lib/ld/map.pagealign:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -+"solaris-sparcv9-cc-sunw-fips","cc:-xtarget=ultra -m32 -Qoption cg -xregs=no%appl -xO5 -xstrconst -xdepend -Xa -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -lc -lsoftcrypto:BN_LLONG RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_fips_asm}:dlfcn:solaris-shared:-KPIC:-m32 -G -dy -z text -zdefs -Bdirect -zignore -M/usr/lib/ld/map.pagealign:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", - # --"solaris64-sparcv9-cc-sunw","cc:-xtarget=ultra -m64 -Qoption cg -xregs=no%appl -xO5 -xstrconst -xdepend -xspace -Xa -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -lc:BN_LLONG RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:solaris-shared:-KPIC:-m64 -G -dy -z text -zdefs -Bdirect -zignore -M/usr/lib/ld/map.pagealign:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):/usr/bin/ar rs::/64", -+"solaris64-sparcv9-cc-sunw","cc:-xtarget=ultra -m64 -Qoption cg -xregs=no%appl -xO5 -xstrconst -xdepend -xspace -Xa -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -lc -lsoftcrypto:BN_LLONG RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:solaris-shared:-KPIC:-m64 -G -dy -z text -zdefs -Bdirect -zignore -M/usr/lib/ld/map.pagealign:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):/usr/bin/ar rs::/64", -+"solaris64-sparcv9-cc-sunw-fips","cc:-xtarget=ultra -m64 -Qoption cg -xregs=no%appl -xO5 -xstrconst -xdepend -xspace -Xa -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -lc -lsoftcrypto:BN_LLONG RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_fips_asm}:dlfcn:solaris-shared:-KPIC:-m64 -G -dy -z text -zdefs -Bdirect -zignore -M/usr/lib/ld/map.pagealign:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):/usr/bin/ar rs::/64", - # Option -xF=%all instructs the compiler to place functions and data - # variables into separate section fragments. This enables the link editor - # to discard unused sections and files when linking wanboot-openssl.o -Index: crypto/aes/Makefile -=================================================================== -diff -ru openssl-1.0.0d/crypto/aes/ openssl-1.0.0d/crypto/aes/Makefile ---- openssl-1.0.0d/crypto/aes/Makefile 2011-05-24 17:03:31.000000000 -0700 -+++ openssl-1.0.0d/crypto/aes/Makefile 2011-06-30 17:26:34.980110000 -0700 -@@ -17,6 +17,10 @@ - ASFLAGS= $(INCLUDES) $(ASFLAG) - AFLAGS= $(ASFLAGS) - -+BITS:= $(shell if grep '^SHARED_LDFLAGS.*=.*-m32' ../../Makefile >/dev/null; \ -+ then echo 32; else echo 64; fi) -+ASFLAGSYF= -xregsym=no -K pic -P -xarch=v9v -D_sparcv9 -D_ASM -Dsparc -m$(BITS) -+ - GENERAL=Makefile - #TEST=aestest.c - TEST= -@@ -72,6 +76,10 @@ - aes-sparcv9.s: asm/aes-sparcv9.pl - $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@ - -+t4_aes.o: asm/t4_aes.S -+ as $(ASFLAGSYF) -o $@ asm/t4_aes.S -+ elfedit -e 'cap:hw1 -and -cmp vis vis3' $@ -+ - aes-ppc.s: asm/aes-ppc.pl - $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@ - -Index: crypto/engine/Makefile -=================================================================== -diff -ru openssl-1.0.0d/crypto/engine/Makefile openssl-1.0.0d/crypto/engine/Makefile ---- openssl-1.0.0d/crypto/engine/Makefile 2011-05-24 17:04:12.000000000 -0700 -+++ openssl-1.0.0d/crypto/engine/Makefile 2011-07-28 10:10:00.000000000 -0700 -@@ -22,12 +22,16 @@ - tb_rsa.c tb_dsa.c tb_ecdsa.c tb_dh.c tb_ecdh.c tb_rand.c tb_store.c \ - tb_cipher.c tb_digest.c tb_pkmeth.c tb_asnmth.c \ - eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c \ -+ eng_t4.c eng_t4_md5.c \ -+ eng_t4_sha1.c eng_t4_sha256.c eng_t4_sha512.c eng_t4_montmul.c \ - eng_rsax.c eng_rdrand.c hw_pk11.c hw_pk11_pub.c hw_pk11_uri.c - LIBOBJ= eng_err.o eng_lib.o eng_list.o eng_init.o eng_ctrl.o \ - eng_table.o eng_pkey.o eng_fat.o eng_all.o \ - tb_rsa.o tb_dsa.o tb_ecdsa.o tb_dh.o tb_ecdh.o tb_rand.o tb_store.o \ - tb_cipher.o tb_digest.o tb_pkmeth.o tb_asnmth.o \ - eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o \ -+ eng_t4.o eng_t4_md5.o \ -+ eng_t4_sha1.o eng_t4_sha256.o eng_t4_sha512.o eng_t4_montmul.o \ - eng_rsax.o eng_rdrand.o hw_pk11.o hw_pk11_pub.o hw_pk11_uri.o - - SRC= $(LIBSRC) -Index: crypto/engine/eng_all.c -=================================================================== -diff -ru openssl-1.0.0d/crypto/engine/eng_all.c openssl-1.0.0d/crypto/engine/eng_all.c ---- openssl-1.0.0d/crypto/engine/eng_all.c 2011-05-24 17:02:20.000000000 -0700 -+++ openssl-1.0.0d/crypto/engine/eng_all.c 2011-06-22 17:34:25.145829355 -0700 -@@ -79,6 +79,10 @@ - #ifndef OPENSSL_NO_RDRAND - ENGINE_load_rdrand(); - #endif -+#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_T4) -+ ENGINE_load_t4(); -+ ENGINE_register_all_complete(); -+#endif - ENGINE_load_dynamic(); - #ifndef OPENSSL_NO_HW_PKCS11 - ENGINE_load_pk11(); -Index: crypto/engine/engine.h -================================================================== -diff -ru openssl-1.0.0d/crypto/engine/engine.h openssl-1.0.0d/crypto/engine/engine.h ---- openssl-1.0.0d/crypto/engine/engine.h 2011-05-24 17:02:20.000000000 -0700 -+++ openssl-1.0.0d/crypto/engine/engine.h 2011-05-24 18:05:29.075766123 -0700 -@@ -351,6 +351,7 @@ - #endif - #endif - void ENGINE_load_cryptodev(void); -+void ENGINE_load_t4(void); - void ENGINE_load_pk11(void); - void ENGINE_load_rsax(void); - void ENGINE_load_rdrand(void); -Index: crypto/md5/Makefile -================================================================== -diff -ru openssl-1.0.0d/crypto/md5/Makefile openssl-1.0.0d/crypto/md5/Makefile ---- openssl-1.0.0d/crypto/md5/Makefile 2011-05-24 17:03:14.000000000 -0700 -+++ openssl-1.0.0d/crypto/md5/Makefile 2011-08-27 16:01:49.000000000 -0700 -@@ -17,6 +17,10 @@ - ASFLAGS= $(INCLUDES) $(ASFLAG) - AFLAGS= $(ASFLAGS) - -+BITS:= $(shell if grep '^SHARED_LDFLAGS.*=.*-m32' ../../Makefile >/dev/null; \ -+ then echo 32; else echo 64; fi) -+ASFLAGSYF= -xregsym=no -K pic -P -xarch=v9v -D_sparcv9 -D_ASM -Dsparc -m$(BITS) -+ - GENERAL=Makefile - TEST=md5test.c - APPS= -@@ -55,6 +59,10 @@ - md5-sparcv9.S: asm/md5-sparcv9.pl - $(PERL) asm/md5-sparcv9.pl $@ $(CFLAGS) - -+t4_md5.o: asm/t4_md5.S -+ as $(ASFLAGSYF) -o $@ asm/t4_md5.S -+ elfedit -e 'cap:hw1 -and -cmp vis vis3' $@ -+ - files: - $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO - -Index: crypto/sha/Makefile -================================================================== -diff -ru openssl-1.0.0d/crypto/sha/Makefile openssl-1.0.0d/crypto/sha/Makefile ---- openssl-1.0.0d/crypto/sha/Makefile 2011-05-24 17:03:18.000000000 -0700 -+++ openssl-1.0.0d/crypto/sha/Makefile 2011-08-27 16:01:49.000000000 -0700 -@@ -17,6 +17,10 @@ - ASFLAGS= $(INCLUDES) $(ASFLAG) - AFLAGS= $(ASFLAGS) - -+BITS:= $(shell if grep '^SHARED_LDFLAGS.*=.*-m32' ../../Makefile >/dev/null; \ -+ then echo 32; else echo 64; fi) -+ASFLAGSYF= -xregsym=no -K pic -P -xarch=v9v -D_sparcv9 -D_ASM -Dsparc -m$(BITS) -+ - GENERAL=Makefile - TEST=shatest.c sha1test.c sha256t.c sha512t.c - APPS= -@@ -91,6 +95,14 @@ - sha256-armv4.o: sha256-armv4.S - sha512-armv4.o: sha512-armv4.S - -+t4_sha1.o: asm/t4_sha1.S -+ as $(ASFLAGSYF) -o $@ asm/t4_sha1.S -+ elfedit -e 'cap:hw1 -and -cmp vis vis3' $@ -+ -+t4_sha2.o: asm/t4_sha2.S -+ as $(ASFLAGSYF) -o $@ asm/t4_sha2.S -+ elfedit -e 'cap:hw1 -and -cmp vis vis3' $@ -+ - files: - $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO - -Index: util/libeay.num -================================================================== -diff -ru openssl-1.0.0d/util/libeay.num openssl-1.0.0d/util/libeay.num ---- openssl-1.0.0d/util/libeay.num 2010-07-25 09:56:06.000000000 -0700 -+++ openssl-1.0.0d/util/libeay.num 2011-05-25 11:19:15.585211842 -0700 -@@ -4179,6 +4179,7 @@ - UI_method_set_prompt_constructor 4551 EXIST:!VMS:FUNCTION: - UI_method_set_prompt_constructr 4551 EXIST:VMS:FUNCTION: - EVP_read_pw_string_min 4552 EXIST::FUNCTION: -+ENGINE_load_t4 4553 EXIST::FUNCTION:ENGINE - CRYPTO_cts128_encrypt 4553 EXIST::FUNCTION: - CRYPTO_cts128_decrypt_block 4554 EXIST::FUNCTION: - CRYPTO_cfb128_1_encrypt 4555 EXIST::FUNCTION: diff -r 15aec33b84fa -r 3515c1afdfc8 components/openssl/openssl-1.0.1/patches/openssl-t4-inline.sparc-patch --- a/components/openssl/openssl-1.0.1/patches/openssl-t4-inline.sparc-patch Mon Jul 08 16:18:46 2013 -0700 +++ b/components/openssl/openssl-1.0.1/patches/openssl-t4-inline.sparc-patch Mon Jul 08 17:50:18 2013 -0700 @@ -11,7 +11,7 @@ my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:"; my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; -+my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; ++my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::"; @@ -20,19 +20,50 @@ diff -ru openssl-1.0.1e/crypto/sparccpuid.S openssl-1.0.1e/crypto/sparccpuid.S --- openssl-1.0.1e/crypto/sparccpuid.S 2011-05-24 17:02:24.000000000 -0700 +++ openssl-1.0.1e/crypto/sparccpuid.S 2011-07-27 10:48:17.817470000 -0700 -@@ -251,6 +251,11 @@ +@@ -1,3 +1,7 @@ ++#ifdef OPENSSL_FIPSCANISTER ++#include ++#endif ++ + #if defined(__SUNPRO_C) && defined(__sparcv9) + # define ABI64 /* They've said -xarch=v9 at command line */ + #elif defined(__GNUC__) && defined(__arch64__) +@@ -123,7 +127,7 @@ + fmovs %f1,%f3 + fmovs %f0,%f2 + +- add %fp,BIAS,%i0 ! return pointer to callerīs top of stack ++ add %fp,BIAS,%i0 ! return pointer to caller?s top of stack + + ret + restore +@@ -235,10 +239,10 @@ + .global _sparcv9_vis1_probe + .align 8 + _sparcv9_vis1_probe: ++ .word 0x81b00d80 !fxor %f0,%f0,%f0 + add %sp,BIAS+2,%o1 +- .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 + retl +- .word 0x81b00d80 !fxor %f0,%f0,%f0 ++ .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 + .type _sparcv9_vis1_probe,#function + .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe + +@@ -251,7 +255,12 @@ ! UltraSPARC IIe 7 ! UltraSPARC III 7 ! UltraSPARC T1 24 +! SPARC T4 65(*) -+! + ! +! (*) result has lesser to do with VIS instruction latencies, rdtick +! appears that slow, but it does the trick in sense that FP and +! VIS code paths are still slower than integer-only ones. - ! ++! ! Numbers for T2 and SPARC64 V-VII are more than welcomed. ! -@@ -260,6 +265,8 @@ + ! It would be possible to detect specifically US-T1 by instrumenting +@@ -260,6 +269,8 @@ .global _sparcv9_vis1_instrument .align 8 _sparcv9_vis1_instrument: @@ -41,7 +72,7 @@ .word 0x91410000 !rd %tick,%o0 .word 0x81b00d80 !fxor %f0,%f0,%f0 .word 0x85b08d82 !fxor %f2,%f2,%f2 -@@ -314,6 +321,30 @@ +@@ -314,6 +325,30 @@ .type _sparcv9_fmadd_probe,#function .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe @@ -72,14 +103,125 @@ .global OPENSSL_cleanse .align 32 OPENSSL_cleanse: +@@ -397,11 +432,102 @@ + .type OPENSSL_cleanse,#function + .size OPENSSL_cleanse,.-OPENSSL_cleanse + +-#ifndef _BOOT ++.global _sparcv9_vis1_instrument_bus ++.align 8 ++_sparcv9_vis1_instrument_bus: ++ mov %o1,%o3 ! save cnt ++ .word 0x99410000 !rd %tick,%o4 ! tick ++ mov %o4,%o5 ! lasttick = tick ++ set 0,%g4 ! diff ++ ++ andn %o0,63,%g1 ++ .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load ++ .word 0x8143e040 !membar #Sync ++ .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit ++ .word 0x8143e040 !membar #Sync ++ ld [%o0],%o4 ++ add %o4,%g4,%g4 ++ .word 0xc9e2100c !cas [%o0],%o4,%g4 ++ ++.Loop: .word 0x99410000 !rd %tick,%o4 ++ sub %o4,%o5,%g4 ! diff=tick-lasttick ++ mov %o4,%o5 ! lasttick=tick ++ ++ andn %o0,63,%g1 ++ .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load ++ .word 0x8143e040 !membar #Sync ++ .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit ++ .word 0x8143e040 !membar #Sync ++ ld [%o0],%o4 ++ add %o4,%g4,%g4 ++ .word 0xc9e2100c !cas [%o0],%o4,%g4 ++ subcc %o1,1,%o1 ! --$cnt ++ bnz .Loop ++ add %o0,4,%o0 ! ++$out ++ ++ retl ++ mov %o3,%o0 ++.type _sparcv9_vis1_instrument_bus,#function ++.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus ++ ++.global _sparcv9_vis1_instrument_bus2 ++.align 8 ++_sparcv9_vis1_instrument_bus2: ++ mov %o1,%o3 ! save cnt ++ sll %o1,2,%o1 ! cnt*=4 ++ ++ .word 0x99410000 !rd %tick,%o4 ! tick ++ mov %o4,%o5 ! lasttick = tick ++ set 0,%g4 ! diff ++ ++ andn %o0,63,%g1 ++ .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load ++ .word 0x8143e040 !membar #Sync ++ .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit ++ .word 0x8143e040 !membar #Sync ++ ld [%o0],%o4 ++ add %o4,%g4,%g4 ++ .word 0xc9e2100c !cas [%o0],%o4,%g4 ++ ++ .word 0x99410000 !rd %tick,%o4 ! tick ++ sub %o4,%o5,%g4 ! diff=tick-lasttick ++ mov %o4,%o5 ! lasttick=tick ++ mov %g4,%g5 ! lastdiff=diff ++.Loop2: ++ andn %o0,63,%g1 ++ .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load ++ .word 0x8143e040 !membar #Sync ++ .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit ++ .word 0x8143e040 !membar #Sync ++ ld [%o0],%o4 ++ add %o4,%g4,%g4 ++ .word 0xc9e2100c !cas [%o0],%o4,%g4 ++ ++ subcc %o2,1,%o2 ! --max ++ bz .Ldone2 ++ nop ++ ++ .word 0x99410000 !rd %tick,%o4 ! tick ++ sub %o4,%o5,%g4 ! diff=tick-lasttick ++ mov %o4,%o5 ! lasttick=tick ++ cmp %g4,%g5 ++ mov %g4,%g5 ! lastdiff=diff ++ ++ .word 0x83408000 !rd %ccr,%g1 ++ and %g1,4,%g1 ! isolate zero flag ++ xor %g1,4,%g1 ! flip zero flag ++ ++ subcc %o1,%g1,%o1 ! conditional --$cnt ++ bnz .Loop2 ++ add %o0,%g1,%o0 ! conditional ++$out ++ ++.Ldone2: ++ srl %o1,2,%o1 ++ retl ++ sub %o3,%o1,%o0 ++.type _sparcv9_vis1_instrument_bus2,#function ++.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 ++ + .section ".init",#alloc,#execinstr + call OPENSSL_cpuid_setup + nop +-#else +- nop +- nop +-#endif Index: crypto/sparcv9cap.c =================================================================== diff -ru openssl-1.0.1e/crypto/sparcv9cap.c openssl-1.0.1e/crypto/sparcv9cap.c --- openssl-1.0.1e/crypto/sparcv9cap.c 2011-05-24 17:02:24.000000000 -0700 +++ openssl-1.0.1e/crypto/sparcv9cap.c 2011-07-27 10:48:17.817470000 -0700 -@@ -6,16 +6,15 @@ +@@ -4,31 +4,55 @@ + #include + #include #include ++#include #include -#define SPARCV9_TICK_PRIVILEGED (1<<0) @@ -101,16 +243,50 @@ #endif int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) -@@ -24,7 +23,7 @@ + { ++ int bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); + int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); - if (num>=8 && !(num&1) && +- if (num>=8 && !(num&1) && - (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == -+ (OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == - (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) - return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); - else -@@ -36,11 +35,16 @@ +- (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) +- return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); +- else +- return bn_mul_mont_int(rp,ap,bp,np,n0,num); ++ if (!(num&1) && num>=6) ++ { ++ if ((num&15)==0 && num<=64 && ++ (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))== ++ (CFR_MONTMUL|CFR_MONTSQR)) ++ { ++ typedef int (*bn_mul_mont_f)(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); ++ int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); ++ int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); ++ int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); ++ int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); ++ static const bn_mul_mont_f funcs[4] = { ++ bn_mul_mont_t4_8, bn_mul_mont_t4_16, ++ bn_mul_mont_t4_24, bn_mul_mont_t4_32 }; ++ bn_mul_mont_f worker = funcs[num/16-1]; ++ ++ if ((*worker)(rp,ap,bp,np,n0)) return 1; ++ /* retry once and fall back */ ++ if ((*worker)(rp,ap,bp,np,n0)) return 1; ++ return bn_mul_mont_vis3(rp,ap,bp,np,n0,num); ++ } ++ if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3)) ++ return bn_mul_mont_vis3(rp,ap,bp,np,n0,num); ++ else if (num>=8 && ++ (OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == ++ (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ++ return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); ++ } ++ return bn_mul_mont_int(rp,ap,bp,np,n0,num); + } + + unsigned long _sparcv9_rdtick(void); +@@ -36,11 +60,16 @@ unsigned long _sparcv9_vis1_instrument(void); void _sparcv9_vis2_probe(void); void _sparcv9_fmadd_probe(void); @@ -128,13 +304,13 @@ #if defined(__sun) && defined(__SVR4) return gethrtime(); #else -@@ -51,6 +55,25 @@ +@@ -51,6 +81,25 @@ } #endif +size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt) + { -+ if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) == ++ if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == + SPARCV9_BLK) + return _sparcv9_vis1_instrument_bus(out,cnt); + else @@ -143,7 +319,7 @@ + +size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max) + { -+ if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) == ++ if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == + SPARCV9_BLK) + return _sparcv9_vis1_instrument_bus2(out,cnt,max); + else @@ -154,7 +330,7 @@ #if defined(_BOOT) /* * Hardcoding sparc capabilities for wanboot. -@@ -58,7 +81,7 @@ +@@ -58,7 +106,7 @@ */ void OPENSSL_cpuid_setup(void) { @@ -163,7 +339,7 @@ } #elif 0 && defined(__sun) && defined(__SVR4) -@@ -85,11 +108,11 @@ +@@ -85,11 +116,11 @@ if (!strcmp (name,"SUNW,UltraSPARC") || !strncmp(name,"SUNW,UltraSPARC-I",17)) /* covers II,III,IV */ { @@ -177,7 +353,7 @@ return DI_WALK_TERMINATE; } -@@ -96,7 +119,7 @@ +@@ -96,7 +127,7 @@ /* This is expected to catch remaining UltraSPARCs, such as T1 */ else if (!strncmp(name,"SUNW,UltraSPARC",15)) { @@ -186,7 +362,7 @@ return DI_WALK_TERMINATE; } -@@ -115,7 +138,7 @@ +@@ -115,7 +146,7 @@ if ((e=getenv("OPENSSL_sparcv9cap"))) { @@ -195,7 +371,7 @@ return; } -@@ -123,17 +146,17 @@ +@@ -123,17 +154,17 @@ { if (strcmp(si,"sun4v")) /* FPU is preferred for all CPUs, but US-T1/2 */ @@ -217,7 +393,7 @@ return; } } -@@ -193,12 +216,14 @@ +@@ -193,12 +224,14 @@ if ((e=getenv("OPENSSL_sparcv9cap"))) { @@ -234,7 +410,7 @@ sigfillset(&all_masked); sigdelset(&all_masked,SIGILL); -@@ -221,20 +246,20 @@ +@@ -221,20 +254,20 @@ if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_rdtick(); @@ -259,7 +435,7 @@ } } -@@ -241,9 +266,37 @@ +@@ -241,13 +274,53 @@ if (sigsetjmp(common_jmp,1) == 0) { _sparcv9_fmadd_probe(); @@ -298,6 +474,22 @@ sigaction(SIGBUS,&bus_oact,NULL); sigaction(SIGILL,&ill_oact,NULL); + sigprocmask(SIG_SETMASK,&oset,NULL); ++ ++ if (sizeof(size_t)==8) ++ OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; ++#ifdef __linux ++ else ++ { ++ int ret = syscall(340); ++ ++ if (ret>=0 && ret&1) ++ OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; ++ } ++#endif + } + + #endif Index: crypto/md5/Makefile =================================================================== diff -ru openssl-1.0.1e/crypto/md5/Makefile openssl-1.0.1e/crypto/md5/Makefile @@ -1163,6 +1355,292 @@ #endif return 1; } +Index: openssl/crypto/bn/Makefile +=================================================================== +diff -ru openssl-1.0.1e/crypto/bn/Makefile openssl-1.0.1e/crypto/bn/Makefile.new +--- openssl-1.0.1e/crypto/bn/Makefile 2011-05-24 17:02:24.000000000 -0700 ++++ openssl-1.0.1e/crypto/bn/Makefile 2011-07-27 10:48:17.817470000 -0700 +@@ -77,6 +77,12 @@ + $(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@ + sparcv9-mont.s: asm/sparcv9-mont.pl + $(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@ ++vis3-mont.s: asm/vis3-mont.pl ++ $(PERL) asm/vis3-mont.pl $(CFLAGS) > $@ ++sparct4-mont.S: asm/sparct4-mont.pl ++ $(PERL) asm/sparct4-mont.pl $(CFLAGS) > $@ ++sparcv9-gf2m.S: asm/sparcv9-gf2m.pl ++ $(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@ + + bn-mips3.o: asm/mips3.s + @if [ "$(CC)" = "gcc" ]; then \ +Index: openssl/crypto/bn/bn_exp.c +=================================================================== +diff -ru openssl-1.0.1e/crypto/bn/bn_exp.c openssl-1.0.1e/crypto/bn/bn_exp.c.new +--- bn_exp.c 2011/10/29 19:25:13 1.38 ++++ bn_exp.c 2012/11/17 10:34:11 1.39 +@@ -123,8 +123,15 @@ + # ifndef alloca + # define alloca(s) __builtin_alloca((s)) + # endif ++#else ++#include + #endif + ++#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) ++# include "sparc_arch.h" ++extern unsigned int OPENSSL_sparcv9cap_P[]; ++#endif ++ + /* maximum precomputation table size for *variable* sliding windows */ + #define TABLE_SIZE 32 + +@@ -467,7 +467,15 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, + wstart=bits-1; /* The top bit of the window */ + wend=0; /* The bottom bit of the window */ + ++#if 1 /* by Shay Gueron's suggestion */ ++ j = mont->N.top; /* borrow j */ ++ if (bn_wexpand(r,j) == NULL) goto err; ++ r->d[0] = (0-m->d[0])&BN_MASK2; /* 2^(top*BN_BITS2) - m */ ++ for(i=1;id[i] = (~m->d[i])&BN_MASK2; ++ r->top = j; ++#else + if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; ++#endif + for (;;) + { + if (BN_is_bit_set(p,wstart) == 0) +@@ -519,6 +527,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, + start=0; + if (wstart < 0) break; + } ++#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) ++ if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_VIS3|SPARCV9_PREFER_FPU)) ++ { ++ j = mont->N.top; /* borrow j */ ++ val[0]->d[0] = 1; /* borrow val[0] */ ++ for (i=1;id[i] = 0; ++ val[0]->top = j; ++ if (!BN_mod_mul_montgomery(rr,r,val[0],mont,ctx)) goto err; ++ } ++ else ++#endif + if (!BN_from_montgomery(rr,r,mont,ctx)) goto err; + ret=1; + err: +@@ -528,6 +547,28 @@ err: + return(ret); + } + ++#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) ++static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) ++ { ++ BN_ULONG ret=0; ++ int wordpos; ++ ++ wordpos = bitpos/BN_BITS2; ++ bitpos %= BN_BITS2; ++ if (wordpos>=0 && wordpos < a->top) ++ { ++ ret = a->d[wordpos]&BN_MASK2; ++ if (bitpos) ++ { ++ ret >>= bitpos; ++ if (++wordpos < a->top) ++ ret |= a->d[wordpos]<<(BN_BITS2-bitpos); ++ } ++ } ++ ++ return ret&BN_MASK2; ++} ++#endif + + /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout + * so that accessing any of these table values shows the same access pattern as far +@@ -587,6 +592,9 @@ + int powerbufLen = 0; + unsigned char *powerbuf=NULL; + BIGNUM tmp, am; ++#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) ++ unsigned int t4=0; ++#endif + + bn_check_top(a); + bn_check_top(p); +@@ -621,9 +629,18 @@ + + /* Get the window size to use with size of p. */ + window = BN_window_bits_for_ctime_exponent_size(bits); ++#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) ++ if (window>=5 && (top&15)==0 && top<=64 && ++ (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))== ++ (CFR_MONTMUL|CFR_MONTSQR) && ++ (t4=OPENSSL_sparcv9cap_P[0])) ++ window=5; ++ else ++#endif + #if defined(OPENSSL_BN_ASM_MONT5) + if (window==6 && bits<=1024) window=5; /* ~5% improvement of 2048-bit RSA sign */ + #endif ++ (void)0; + + /* Allocate a buffer large enough to hold all of the pre-computed + * powers of am, am itself and tmp. +@@ -656,13 +715,13 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, + tmp.flags = am.flags = BN_FLG_STATIC_DATA; + + /* prepare a^0 in Montgomery domain */ +-#if 1 +- if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) goto err; +-#else ++#if 1 /* by Shay Gueron's suggestion */ + tmp.d[0] = (0-m->d[0])&BN_MASK2; /* 2^(top*BN_BITS2) - m */ + for (i=1;id[i])&BN_MASK2; + tmp.top = top; ++#else ++ if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) goto err; + #endif + + /* prepare a^1 in Montgomery domain */ +@@ -673,6 +690,121 @@ + } + else if (!BN_to_montgomery(&am,a,mont,ctx)) goto err; + ++#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) ++ if (t4) ++ { ++ typedef int (*bn_pwr5_mont_f)(BN_ULONG *tp,const BN_ULONG *np, ++ const BN_ULONG *n0,const void *table,int power,int bits); ++ int bn_pwr5_mont_t4_8(BN_ULONG *tp,const BN_ULONG *np, ++ const BN_ULONG *n0,const void *table,int power,int bits); ++ int bn_pwr5_mont_t4_16(BN_ULONG *tp,const BN_ULONG *np, ++ const BN_ULONG *n0,const void *table,int power,int bits); ++ int bn_pwr5_mont_t4_24(BN_ULONG *tp,const BN_ULONG *np, ++ const BN_ULONG *n0,const void *table,int power,int bits); ++ int bn_pwr5_mont_t4_32(BN_ULONG *tp,const BN_ULONG *np, ++ const BN_ULONG *n0,const void *table,int power,int bits); ++ static const bn_pwr5_mont_f pwr5_funcs[4] = { ++ bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16, ++ bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32 }; ++ bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top/16-1]; ++ ++ typedef int (*bn_mul_mont_f)(BN_ULONG *rp,const BN_ULONG *ap, ++ const void *bp,const BN_ULONG *np,const BN_ULONG *n0); ++ int bn_mul_mont_t4_8(BN_ULONG *rp,const BN_ULONG *ap, ++ const void *bp,const BN_ULONG *np,const BN_ULONG *n0); ++ int bn_mul_mont_t4_16(BN_ULONG *rp,const BN_ULONG *ap, ++ const void *bp,const BN_ULONG *np,const BN_ULONG *n0); ++ int bn_mul_mont_t4_24(BN_ULONG *rp,const BN_ULONG *ap, ++ const void *bp,const BN_ULONG *np,const BN_ULONG *n0); ++ int bn_mul_mont_t4_32(BN_ULONG *rp,const BN_ULONG *ap, ++ const void *bp,const BN_ULONG *np,const BN_ULONG *n0); ++ static const bn_mul_mont_f mul_funcs[4] = { ++ bn_mul_mont_t4_8, bn_mul_mont_t4_16, ++ bn_mul_mont_t4_24, bn_mul_mont_t4_32 }; ++ bn_mul_mont_f mul_worker = mul_funcs[top/16-1]; ++ ++ void bn_mul_mont_vis3(BN_ULONG *rp,const BN_ULONG *ap, ++ const void *bp,const BN_ULONG *np, ++ const BN_ULONG *n0,int num); ++ void bn_mul_mont_t4(BN_ULONG *rp,const BN_ULONG *ap, ++ const void *bp,const BN_ULONG *np, ++ const BN_ULONG *n0,int num); ++ void bn_mul_mont_gather5_t4(BN_ULONG *rp,const BN_ULONG *ap, ++ const void *table,const BN_ULONG *np, ++ const BN_ULONG *n0,int num,int power); ++ void bn_flip_n_scatter5_t4(const BN_ULONG *inp,size_t num, ++ void *table,size_t power); ++ void bn_gather5_t4(BN_ULONG *out,size_t num, ++ void *table,size_t power); ++ void bn_flip_t4(BN_ULONG *dst,BN_ULONG *src,size_t num); ++ ++ BN_ULONG *np=mont->N.d, *n0=mont->n0; ++ int stride = 5*(6-(top/16-1)); /* multiple of 5, but less than 32 */ ++ ++ /* BN_to_montgomery can contaminate words above .top ++ * [in BN_DEBUG[_DEBUG] build]... */ ++ for (i=am.top; iN.d,top); ++ ++ bits--; ++ for (wvalue=0, i=bits%5; i>=0; i--,bits--) ++ wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); ++ bn_gather5_t4(tmp.d,top,powerbuf,wvalue); ++ ++ /* Scan the exponent one window at a time starting from the most ++ * significant bits. ++ */ ++ while (bits >= 0) ++ { ++ if (bits < stride) stride = bits+1; ++ bits -= stride; ++ wvalue = (bn_get_bits(p,bits+1)); ++ ++ if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride)) continue; ++ /* retry once and fall back */ ++ if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride)) continue; ++ ++ bits += stride-5; ++ wvalue >>= stride-5; ++ wvalue &= 31; ++ bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); ++ bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); ++ bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); ++ bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); ++ bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); ++ bn_mul_mont_gather5_t4(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue); ++ } ++ ++ bn_flip_t4(tmp.d,tmp.d,top); ++ top *= 2; ++ /* back to 32-bit domain */ ++ tmp.top=top; ++ bn_correct_top(&tmp); ++ OPENSSL_cleanse(np,top*sizeof(BN_ULONG)); ++ } ++ else ++#endif + #if defined(OPENSSL_BN_ASM_MONT5) + /* This optimization uses ideas from http://eprint.iacr.org/2011/239, + * specifically optimization of cache-timing attack countermeasures +@@ -816,6 +990,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, + } + + /* Convert the final result from montgomery to standard format */ ++#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) ++ if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_VIS3|SPARCV9_PREFER_FPU)) ++ { ++ am.d[0] = 1; /* borrow am */ ++ for (i=1;i