diff -r 65a016eaa866 -r 227137d9fbce components/openssl/openssl-1.0.1/engines/t4/t4_aes.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/components/openssl/openssl-1.0.1/engines/t4/t4_aes.S Fri Feb 15 07:58:18 2013 -0800 @@ -0,0 +1,3052 @@ +/* + * ==================================================================== + * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + */ + +/*LINTLIBRARY*/ + +#if defined(lint) || defined(__lint) + + +#include + +/*ARGSUSED*/ +void t4_aes_expand128(uint64_t *rk, const uint32_t *key) +{ return; } + +/*ARGSUSED*/ +void t4_aes_expand192(uint64_t *rk, const uint32_t *key) +{ return; } + +/*ARGSUSED*/ +void t4_aes_expand256(uint64_t *rk, const uint32_t *key) +{ return; } + +void t4_aes128_load_keys_for_encrypt(uint64_t *ks) +{ return; } + +/*ARGSUSED*/ +void t4_aes192_load_keys_for_encrypt(uint64_t *ks) +{ return; } + +/*ARGSUSED*/ +void t4_aes256_load_keys_for_encrypt(uint64_t *ks) +{ return; } + +/*ARGSUSED*/ +void t4_aes128_ecb_encrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes192_ecb_encrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes256_ecb_encrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes128_cbc_encrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes192_cbc_encrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes256_cbc_encrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes128_ctr_crypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes192_ctr_crypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes256_ctr_crypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes128_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes192_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes256_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +void t4_aes128_load_keys_for_decrypt(uint64_t *ks) +{ return; } + +/*ARGSUSED*/ +void t4_aes192_load_keys_for_decrypt(uint64_t *ks) +{ return; } + +/*ARGSUSED*/ +void t4_aes256_load_keys_for_decrypt(uint64_t *ks) +{ return; } + +/*ARGSUSED*/ +void t4_aes128_ecb_decrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes192_ecb_decrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes256_ecb_decrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes128_cbc_decrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes192_cbc_decrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes256_cbc_decrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes128_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes192_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +/*ARGSUSED*/ +void t4_aes256_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in, + uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv) +{ return; } + +#else /* lint || __lint */ + +#include + + + ENTRY(t4_aes_expand128) + +!load key + ld [%o1], %f0 + ld [%o1 + 0x4], %f1 + ld [%o1 + 0x8], %f2 + ld [%o1 + 0xc], %f3 + +!expand the key + !aes_kexpand1 %f0, %f2, 0x0, %f4 + !aes_kexpand2 %f2, %f4, %f6 + !aes_kexpand1 %f4, %f6, 0x1, %f8 + !aes_kexpand2 %f6, %f8, %f10 + !aes_kexpand1 %f8, %f10, 0x2, %f12 + !aes_kexpand2 %f10, %f12, %f14 + !aes_kexpand1 %f12, %f14, 0x3, %f16 + !aes_kexpand2 %f14, %f16, %f18 + !aes_kexpand1 %f16, %f18, 0x4, %f20 + !aes_kexpand2 %f18, %f20, %f22 + !aes_kexpand1 %f20, %f22, 0x5, %f24 + !aes_kexpand2 %f22, %f24, %f26 + !aes_kexpand1 %f24, %f26, 0x6, %f28 + !aes_kexpand2 %f26, %f28, %f30 + !aes_kexpand1 %f28, %f30, 0x7, %f32 + !aes_kexpand2 %f30, %f32, %f34 + !aes_kexpand1 %f32, %f34, 0x8, %f36 + !aes_kexpand2 %f34, %f36, %f38 + !aes_kexpand1 %f36, %f38, 0x9, %f40 + !aes_kexpand2 %f38, %f40, %f42 + .byte 0x88, 0xc8, 0x01, 0x02 + .byte 0x8d, 0xb0, 0xa6, 0x24 + .byte 0x90, 0xc9, 0x03, 0x06 + .byte 0x95, 0xb1, 0xa6, 0x28 + .byte 0x98, 0xca, 0x05, 0x0a + .byte 0x9d, 0xb2, 0xa6, 0x2c + .byte 0xa0, 0xcb, 0x07, 0x0e + .byte 0xa5, 0xb3, 0xa6, 0x30 + .byte 0xa8, 0xcc, 0x09, 0x12 + .byte 0xad, 0xb4, 0xa6, 0x34 + .byte 0xb0, 0xcd, 0x0b, 0x16 + .byte 0xb5, 0xb5, 0xa6, 0x38 + .byte 0xb8, 0xce, 0x0d, 0x1a + .byte 0xbd, 0xb6, 0xa6, 0x3c + .byte 0x82, 0xcf, 0x0f, 0x1e + .byte 0x87, 0xb7, 0xa6, 0x21 + .byte 0x8a, 0xc8, 0x51, 0x03 + .byte 0x8f, 0xb0, 0xe6, 0x25 + .byte 0x92, 0xc9, 0x53, 0x07 + .byte 0x97, 0xb1, 0xe6, 0x29 + +!copy expanded key back into array + std %f4, [%o0] + std %f6, [%o0 + 0x8] + std %f8, [%o0 + 0x10] + std %f10, [%o0 + 0x18] + std %f12, [%o0 + 0x20] + std %f14, [%o0 + 0x28] + std %f16, [%o0 + 0x30] + std %f18, [%o0 + 0x38] + std %f20, [%o0 + 0x40] + std %f22, [%o0 + 0x48] + std %f24, [%o0 + 0x50] + std %f26, [%o0 + 0x58] + std %f28, [%o0 + 0x60] + std %f30, [%o0 + 0x68] + std %f32, [%o0 + 0x70] + std %f34, [%o0 + 0x78] + std %f36, [%o0 + 0x80] + std %f38, [%o0 + 0x88] + std %f40, [%o0 + 0x90] + retl + std %f42, [%o0 + 0x98] + + SET_SIZE(t4_aes_expand128) + + + ENTRY(t4_aes_expand192) + +!load key + ld [%o1], %f0 + ld [%o1 + 0x4], %f1 + ld [%o1 + 0x8], %f2 + ld [%o1 + 0xc], %f3 + ld [%o1 + 0x10], %f4 + ld [%o1 + 0x14], %f5 + +!expand the key + !aes_kexpand1 %f0, %f4, 0x0, %f6 + !aes_kexpand2 %f2, %f6, %f8 + !aes_kexpand2 %f4, %f8, %f10 + + !aes_kexpand1 %f6, %f10, 0x1, %f12 + !aes_kexpand2 %f8, %f12, %f14 + !aes_kexpand2 %f10, %f14, %f16 + + !aes_kexpand1 %f12, %f16, 0x2, %f18 + !aes_kexpand2 %f14, %f18, %f20 + !aes_kexpand2 %f16, %f20, %f22 + + !aes_kexpand1 %f18, %f22, 0x3, %f24 + !aes_kexpand2 %f20, %f24, %f26 + !aes_kexpand2 %f22, %f26, %f28 + + !aes_kexpand1 %f24, %f28, 0x4, %f30 + !aes_kexpand2 %f26, %f30, %f32 + !aes_kexpand2 %f28, %f32, %f34 + + !aes_kexpand1 %f30, %f34, 0x5, %f36 + !aes_kexpand2 %f32, %f36, %f38 + !aes_kexpand2 %f34, %f38, %f40 + + !aes_kexpand1 %f36, %f40, 0x6, %f42 + !aes_kexpand2 %f38, %f42, %f44 + !aes_kexpand2 %f40, %f44, %f46 + + !aes_kexpand1 %f42, %f46, 0x7, %f48 + !aes_kexpand2 %f44, %f48, %f50 + .byte 0x8c, 0xc8, 0x01, 0x04 + .byte 0x91, 0xb0, 0xa6, 0x26 + .byte 0x95, 0xb1, 0x26, 0x28 + .byte 0x98, 0xc9, 0x83, 0x0a + .byte 0x9d, 0xb2, 0x26, 0x2c + .byte 0xa1, 0xb2, 0xa6, 0x2e + .byte 0xa4, 0xcb, 0x05, 0x10 + .byte 0xa9, 0xb3, 0xa6, 0x32 + .byte 0xad, 0xb4, 0x26, 0x34 + .byte 0xb0, 0xcc, 0x87, 0x16 + .byte 0xb5, 0xb5, 0x26, 0x38 + .byte 0xb9, 0xb5, 0xa6, 0x3a + .byte 0xbc, 0xce, 0x09, 0x1c + .byte 0x83, 0xb6, 0xa6, 0x3e + .byte 0x87, 0xb7, 0x26, 0x21 + .byte 0x8a, 0xcf, 0x8b, 0x03 + .byte 0x8f, 0xb0, 0x66, 0x25 + .byte 0x93, 0xb0, 0xe6, 0x27 + .byte 0x96, 0xc9, 0x4d, 0x09 + .byte 0x9b, 0xb1, 0xe6, 0x2b + .byte 0x9f, 0xb2, 0x66, 0x2d + .byte 0xa2, 0xca, 0xcf, 0x0f + .byte 0xa7, 0xb3, 0x66, 0x31 + +!copy expanded key back into array + std %f6, [%o0] + std %f8, [%o0 + 0x8] + std %f10, [%o0 + 0x10] + std %f12, [%o0 + 0x18] + std %f14, [%o0 + 0x20] + std %f16, [%o0 + 0x28] + std %f18, [%o0 + 0x30] + std %f20, [%o0 + 0x38] + std %f22, [%o0 + 0x40] + std %f24, [%o0 + 0x48] + std %f26, [%o0 + 0x50] + std %f28, [%o0 + 0x58] + std %f30, [%o0 + 0x60] + std %f32, [%o0 + 0x68] + std %f34, [%o0 + 0x70] + std %f36, [%o0 + 0x78] + std %f38, [%o0 + 0x80] + std %f40, [%o0 + 0x88] + std %f42, [%o0 + 0x90] + std %f44, [%o0 + 0x98] + std %f46, [%o0 + 0xa0] + std %f48, [%o0 + 0xa8] + retl + std %f50, [%o0 + 0xb0] + + SET_SIZE(t4_aes_expand192) + + + ENTRY(t4_aes_expand256) + +!load key + ld [%o1], %f0 + ld [%o1 + 0x4], %f1 + ld [%o1 + 0x8], %f2 + ld [%o1 + 0xc], %f3 + ld [%o1 + 0x10], %f4 + ld [%o1 + 0x14], %f5 + ld [%o1 + 0x18], %f6 + ld [%o1 + 0x1c], %f7 + +!expand the key + !aes_kexpand1 %f0, %f6, 0x0, %f8 + !aes_kexpand2 %f2, %f8, %f10 + !aes_kexpand0 %f4, %f10, %f12 + !aes_kexpand2 %f6, %f12, %f14 + + !aes_kexpand1 %f8, %f14, 0x1, %f16 + !aes_kexpand2 %f10, %f16, %f18 + !aes_kexpand0 %f12, %f18, %f20 + !aes_kexpand2 %f14, %f20, %f22 + + !aes_kexpand1 %f16, %f22, 0x2, %f24 + !aes_kexpand2 %f18, %f24, %f26 + !aes_kexpand0 %f20, %f26, %f28 + !aes_kexpand2 %f22, %f28, %f30 + + !aes_kexpand1 %f24, %f30, 0x3, %f32 + !aes_kexpand2 %f26, %f32, %f34 + !aes_kexpand0 %f28, %f34, %f36 + !aes_kexpand2 %f30, %f36, %f38 + + !aes_kexpand1 %f32, %f38, 0x4, %f40 + !aes_kexpand2 %f34, %f40, %f42 + !aes_kexpand0 %f36, %f42, %f44 + !aes_kexpand2 %f38, %f44, %f46 + + !aes_kexpand1 %f40, %f46, 0x5, %f48 + !aes_kexpand2 %f42, %f48, %f50 + !aes_kexpand0 %f44, %f50, %f52 + !aes_kexpand2 %f46, %f52, %f54 + + !aes_kexpand1 %f48, %f54, 0x6, %f56 + !aes_kexpand2 %f50, %f56, %f58 + .byte 0x90, 0xc8, 0x01, 0x06 + .byte 0x95, 0xb0, 0xa6, 0x28 + .byte 0x99, 0xb1, 0x26, 0x0a + .byte 0x9d, 0xb1, 0xa6, 0x2c + .byte 0xa0, 0xca, 0x03, 0x0e + .byte 0xa5, 0xb2, 0xa6, 0x30 + .byte 0xa9, 0xb3, 0x26, 0x12 + .byte 0xad, 0xb3, 0xa6, 0x34 + .byte 0xb0, 0xcc, 0x05, 0x16 + .byte 0xb5, 0xb4, 0xa6, 0x38 + .byte 0xb9, 0xb5, 0x26, 0x1a + .byte 0xbd, 0xb5, 0xa6, 0x3c + .byte 0x82, 0xce, 0x07, 0x1e + .byte 0x87, 0xb6, 0xa6, 0x21 + .byte 0x8b, 0xb7, 0x26, 0x03 + .byte 0x8f, 0xb7, 0xa6, 0x25 + .byte 0x92, 0xc8, 0x49, 0x07 + .byte 0x97, 0xb0, 0xe6, 0x29 + .byte 0x9b, 0xb1, 0x66, 0x0b + .byte 0x9f, 0xb1, 0xe6, 0x2d + .byte 0xa2, 0xca, 0x4b, 0x0f + .byte 0xa7, 0xb2, 0xe6, 0x31 + .byte 0xab, 0xb3, 0x66, 0x13 + .byte 0xaf, 0xb3, 0xe6, 0x35 + .byte 0xb2, 0xcc, 0x4d, 0x17 + .byte 0xb7, 0xb4, 0xe6, 0x39 + +!copy expanded key back into array + std %f8, [%o0] + std %f10, [%o0 + 0x8] + std %f12, [%o0 + 0x10] + std %f14, [%o0 + 0x18] + std %f16, [%o0 + 0x20] + std %f18, [%o0 + 0x28] + std %f20, [%o0 + 0x30] + std %f22, [%o0 + 0x38] + std %f24, [%o0 + 0x40] + std %f26, [%o0 + 0x48] + std %f28, [%o0 + 0x50] + std %f30, [%o0 + 0x58] + std %f32, [%o0 + 0x60] + std %f34, [%o0 + 0x68] + std %f36, [%o0 + 0x70] + std %f38, [%o0 + 0x78] + std %f40, [%o0 + 0x80] + std %f42, [%o0 + 0x88] + std %f44, [%o0 + 0x90] + std %f46, [%o0 + 0x98] + std %f48, [%o0 + 0xa0] + std %f50, [%o0 + 0xa8] + std %f52, [%o0 + 0xb0] + std %f54, [%o0 + 0xb8] + std %f56, [%o0 + 0xc0] + retl + std %f58, [%o0 + 0xc8] + + SET_SIZE(t4_aes_expand256) + + +#define FIRST_TWO_EROUNDS \ + .byte 0xb2, 0xc8, 0x3e, 0x1d ; \ + .byte 0xb6, 0xc8, 0xbe, 0x3d ; \ + .byte 0xba, 0xc9, 0x36, 0x19 ; \ + .byte 0xbe, 0xc9, 0xb6, 0x39 + !aes_eround01 %f0, %f60, %f62, %f56 ; \ + !aes_eround23 %f2, %f60, %f62, %f58 ; \ + !aes_eround01 %f4, %f56, %f58, %f60 ; \ + !aes_eround23 %f6, %f56, %f58, %f62 + +#define MID_TWO_EROUNDS \ + .byte 0xb2, 0xca, 0x3e, 0x1d ; \ + .byte 0xb6, 0xca, 0xbe, 0x3d ; \ + .byte 0xba, 0xcb, 0x36, 0x19 ; \ + .byte 0xbe, 0xcb, 0xb6, 0x39 + !aes_eround01 %f8, %f60, %f62, %f56 ; \ + !aes_eround23 %f10, %f60, %f62, %f58 ; \ + !aes_eround01 %f12, %f56, %f58, %f60 ; \ + !aes_eround23 %f14, %f56, %f58, %f62 + +#define MID_TWO_EROUNDS_2 \ + .byte 0x8c, 0xca, 0x04, 0x00 ; \ + .byte 0x88, 0xca, 0x84, 0x20 ; \ + .byte 0xb2, 0xca, 0x3e, 0x1d ; \ + .byte 0xb6, 0xca, 0xbe, 0x3d ; \ + .byte 0x80, 0xcb, 0x08, 0x06 ; \ + .byte 0x84, 0xcb, 0x88, 0x26 ; \ + .byte 0xba, 0xcb, 0x36, 0x19 ; \ + .byte 0xbe, 0xcb, 0xb6, 0x39 + !aes_eround01 %f8, %f0, %f2, %f6 ; \ + !aes_eround23 %f10, %f0, %f2, %f4 ; \ + !aes_eround01 %f8, %f60, %f62, %f56 ; \ + !aes_eround23 %f10, %f60, %f62, %f58 ; \ + !aes_eround01 %f12, %f6, %f4, %f0 ; \ + !aes_eround23 %f14, %f6, %f4, %f2 ; \ + !aes_eround01 %f12, %f56, %f58, %f60 ; \ + !aes_eround23 %f14, %f56, %f58, %f62 + +#define TEN_EROUNDS \ + .byte 0xb2, 0xcc, 0x3e, 0x1d ; \ + .byte 0xb6, 0xcc, 0xbe, 0x3d ; \ + .byte 0xba, 0xcd, 0x36, 0x19 ; \ + .byte 0xbe, 0xcd, 0xb6, 0x39 ; \ + .byte 0xb2, 0xce, 0x3e, 0x1d ; \ + .byte 0xb6, 0xce, 0xbe, 0x3d ; \ + .byte 0xba, 0xcf, 0x36, 0x19 ; \ + .byte 0xbe, 0xcf, 0xb6, 0x39 ; \ + .byte 0xb2, 0xc8, 0x7e, 0x1d ; \ + .byte 0xb6, 0xc8, 0xfe, 0x3d ; \ + .byte 0xba, 0xc9, 0x76, 0x19 ; \ + .byte 0xbe, 0xc9, 0xf6, 0x39 ; \ + .byte 0xb2, 0xca, 0x7e, 0x1d ; \ + .byte 0xb6, 0xca, 0xfe, 0x3d ; \ + .byte 0xba, 0xcb, 0x76, 0x19 ; \ + .byte 0xbe, 0xcb, 0xf6, 0x39 ; \ + .byte 0xb2, 0xcc, 0x7e, 0x1d ; \ + .byte 0xb6, 0xcc, 0xfe, 0x3d ; \ + .byte 0xba, 0xcd, 0x76, 0x99 ; \ + .byte 0xbe, 0xcd, 0xf6, 0xb9 + !aes_eround01 %f16, %f60, %f62, %f56 ; \ + !aes_eround23 %f18, %f60, %f62, %f58 ; \ + !aes_eround01 %f20, %f56, %f58, %f60 ; \ + !aes_eround23 %f22, %f56, %f58, %f62 ; \ + !aes_eround01 %f24, %f60, %f62, %f56 ; \ + !aes_eround23 %f26, %f60, %f62, %f58 ; \ + !aes_eround01 %f28, %f56, %f58, %f60 ; \ + !aes_eround23 %f30, %f56, %f58, %f62 ; \ + !aes_eround01 %f32, %f60, %f62, %f56 ; \ + !aes_eround23 %f34, %f60, %f62, %f58 ; \ + !aes_eround01 %f36, %f56, %f58, %f60 ; \ + !aes_eround23 %f38, %f56, %f58, %f62 ; \ + !aes_eround01 %f40, %f60, %f62, %f56 ; \ + !aes_eround23 %f42, %f60, %f62, %f58 ; \ + !aes_eround01 %f44, %f56, %f58, %f60 ; \ + !aes_eround23 %f46, %f56, %f58, %f62 ; \ + !aes_eround01 %f48, %f60, %f62, %f56 ; \ + !aes_eround23 %f50, %f60, %f62, %f58 ; \ + !aes_eround01_l %f52, %f56, %f58, %f60 ; \ + !aes_eround23_l %f54, %f56, %f58, %f62 + +#define TEN_EROUNDS_2 \ + .byte 0x8c, 0xcc, 0x04, 0x00 ; \ + .byte 0x88, 0xcc, 0x84, 0x20 ; \ + .byte 0xb2, 0xcc, 0x3e, 0x1d ; \ + .byte 0xb6, 0xcc, 0xbe, 0x3d ; \ + .byte 0x80, 0xcd, 0x08, 0x06 ; \ + .byte 0x84, 0xcd, 0x88, 0x26 ; \ + .byte 0xba, 0xcd, 0x36, 0x19 ; \ + .byte 0xbe, 0xcd, 0xb6, 0x39 ; \ + .byte 0x8c, 0xce, 0x04, 0x00 ; \ + .byte 0x88, 0xce, 0x84, 0x20 ; \ + .byte 0xb2, 0xce, 0x3e, 0x1d ; \ + .byte 0xb6, 0xce, 0xbe, 0x3d ; \ + .byte 0x80, 0xcf, 0x08, 0x06 ; \ + .byte 0x84, 0xcf, 0x88, 0x26 ; \ + .byte 0xba, 0xcf, 0x36, 0x19 ; \ + .byte 0xbe, 0xcf, 0xb6, 0x39 ; \ + .byte 0x8c, 0xc8, 0x44, 0x00 ; \ + .byte 0x88, 0xc8, 0xc4, 0x20 ; \ + .byte 0xb2, 0xc8, 0x7e, 0x1d ; \ + .byte 0xb6, 0xc8, 0xfe, 0x3d ; \ + .byte 0x80, 0xc9, 0x48, 0x06 ; \ + .byte 0x84, 0xc9, 0xc8, 0x26 ; \ + .byte 0xba, 0xc9, 0x76, 0x19 ; \ + .byte 0xbe, 0xc9, 0xf6, 0x39 ; \ + .byte 0x8c, 0xca, 0x44, 0x00 ; \ + .byte 0x88, 0xca, 0xc4, 0x20 ; \ + .byte 0xb2, 0xca, 0x7e, 0x1d ; \ + .byte 0xb6, 0xca, 0xfe, 0x3d ; \ + .byte 0x80, 0xcb, 0x48, 0x06 ; \ + .byte 0x84, 0xcb, 0xc8, 0x26 ; \ + .byte 0xba, 0xcb, 0x76, 0x19 ; \ + .byte 0xbe, 0xcb, 0xf6, 0x39 ; \ + .byte 0x8c, 0xcc, 0x44, 0x00 ; \ + .byte 0x88, 0xcc, 0xc4, 0x20 ; \ + .byte 0xb2, 0xcc, 0x7e, 0x1d ; \ + .byte 0xb6, 0xcc, 0xfe, 0x3d ; \ + .byte 0x80, 0xcd, 0x48, 0x86 ; \ + .byte 0x84, 0xcd, 0xc8, 0xa6 ; \ + .byte 0xba, 0xcd, 0x76, 0x99 ; \ + .byte 0xbe, 0xcd, 0xf6, 0xb9 + !aes_eround01 %f16, %f0, %f2, %f6 ; \ + !aes_eround23 %f18, %f0, %f2, %f4 ; \ + !aes_eround01 %f16, %f60, %f62, %f56 ; \ + !aes_eround23 %f18, %f60, %f62, %f58 ; \ + !aes_eround01 %f20, %f6, %f4, %f0 ; \ + !aes_eround23 %f22, %f6, %f4, %f2 ; \ + !aes_eround01 %f20, %f56, %f58, %f60 ; \ + !aes_eround23 %f22, %f56, %f58, %f62 ; \ + !aes_eround01 %f24, %f0, %f2, %f6 ; \ + !aes_eround23 %f26, %f0, %f2, %f4 ; \ + !aes_eround01 %f24, %f60, %f62, %f56 ; \ + !aes_eround23 %f26, %f60, %f62, %f58 ; \ + !aes_eround01 %f28, %f6, %f4, %f0 ; \ + !aes_eround23 %f30, %f6, %f4, %f2 ; \ + !aes_eround01 %f28, %f56, %f58, %f60 ; \ + !aes_eround23 %f30, %f56, %f58, %f62 ; \ + !aes_eround01 %f32, %f0, %f2, %f6 ; \ + !aes_eround23 %f34, %f0, %f2, %f4 ; \ + !aes_eround01 %f32, %f60, %f62, %f56 ; \ + !aes_eround23 %f34, %f60, %f62, %f58 ; \ + !aes_eround01 %f36, %f6, %f4, %f0 ; \ + !aes_eround23 %f38, %f6, %f4, %f2 ; \ + !aes_eround01 %f36, %f56, %f58, %f60 ; \ + !aes_eround23 %f38, %f56, %f58, %f62 ; \ + !aes_eround01 %f40, %f0, %f2, %f6 ; \ + !aes_eround23 %f42, %f0, %f2, %f4 ; \ + !aes_eround01 %f40, %f60, %f62, %f56 ; \ + !aes_eround23 %f42, %f60, %f62, %f58 ; \ + !aes_eround01 %f44, %f6, %f4, %f0 ; \ + !aes_eround23 %f46, %f6, %f4, %f2 ; \ + !aes_eround01 %f44, %f56, %f58, %f60 ; \ + !aes_eround23 %f46, %f56, %f58, %f62 ; \ + !aes_eround01 %f48, %f0, %f2, %f6 ; \ + !aes_eround23 %f50, %f0, %f2, %f4 ; \ + !aes_eround01 %f48, %f60, %f62, %f56 ; \ + !aes_eround23 %f50, %f60, %f62, %f58 ; \ + !aes_eround01_l %f52, %f6, %f4, %f0 ; \ + !aes_eround23_l %f54, %f6, %f4, %f2 ; \ + !aes_eround01_l %f52, %f56, %f58, %f60 ; \ + !aes_eround23_l %f54, %f56, %f58, %f62 + +#define TWELVE_EROUNDS \ + MID_TWO_EROUNDS ; \ + TEN_EROUNDS + +#define TWELVE_EROUNDS_2 \ + MID_TWO_EROUNDS_2 ; \ + TEN_EROUNDS_2 + +#define FOURTEEN_EROUNDS \ + FIRST_TWO_EROUNDS ; \ + TWELVE_EROUNDS + +#define FOURTEEN_EROUNDS_2 \ + .byte 0xb0, 0xc8, 0x2c, 0x14 ; \ + .byte 0xac, 0xc8, 0xac, 0x34 ; \ + ldd [%o0 + 0x60], %f20 ; \ + .byte 0xb2, 0xc8, 0x3e, 0x1d ; \ + .byte 0xb6, 0xc8, 0xbe, 0x3d ; \ + .byte 0x80, 0xc9, 0x2c, 0x18 ; \ + .byte 0x84, 0xc9, 0xac, 0x38 ;\ + ldd [%o0 + 0x68], %f22 ; \ + .byte 0xba, 0xc9, 0x36, 0x19 ; \ + ldd [%o0 + 0x70], %f24 ; \ + .byte 0xbe, 0xc9, 0xb6, 0x39 ; \ + .byte 0x8c, 0xca, 0x04, 0x00 ; \ + .byte 0x88, 0xca, 0x84, 0x20 ; \ + .byte 0xb2, 0xca, 0x3e, 0x1d ; \ + .byte 0xb6, 0xca, 0xbe, 0x3d ; \ + .byte 0x80, 0xcb, 0x08, 0x06 ; \ + .byte 0x84, 0xcb, 0x88, 0x26 ; \ + .byte 0xba, 0xcb, 0x36, 0x19 ; \ + .byte 0xbe, 0xcb, 0xb6, 0x39 ; \ + .byte 0x8c, 0xcc, 0x04, 0x00 ; \ + .byte 0x88, 0xcc, 0x84, 0x20 ; \ + .byte 0xb2, 0xcc, 0x3e, 0x1d ; \ + .byte 0xb6, 0xcc, 0xbe, 0x3d ; \ + .byte 0x80, 0xcd, 0x08, 0x06 ; \ + .byte 0x84, 0xcd, 0x88, 0x26 ; \ + .byte 0xba, 0xcd, 0x36, 0x19 ; \ + .byte 0xbe, 0xcd, 0xb6, 0x39 ; \ + .byte 0x8c, 0xce, 0x04, 0x00 ; \ + .byte 0x88, 0xce, 0x84, 0x20 ; \ + .byte 0xb2, 0xce, 0x3e, 0x1d ; \ + .byte 0xb6, 0xce, 0xbe, 0x3d ; \ + .byte 0x80, 0xcf, 0x08, 0x06 ; \ + .byte 0x84, 0xcf, 0x88, 0x26 ; \ + .byte 0xba, 0xcf, 0x36, 0x19 ; \ + .byte 0xbe, 0xcf, 0xb6, 0x39 ; \ + .byte 0x8c, 0xc8, 0x44, 0x00 ; \ + .byte 0x88, 0xc8, 0xc4, 0x20 ; \ + .byte 0xb2, 0xc8, 0x7e, 0x1d ; \ + .byte 0xb6, 0xc8, 0xfe, 0x3d ; \ + .byte 0x80, 0xc9, 0x48, 0x06 ; \ + .byte 0x84, 0xc9, 0xc8, 0x26 ; \ + .byte 0xba, 0xc9, 0x76, 0x19 ; \ + .byte 0xbe, 0xc9, 0xf6, 0x39 ; \ + .byte 0x8c, 0xca, 0x44, 0x00 ; \ + .byte 0x88, 0xca, 0xc4, 0x20 ; \ + .byte 0xb2, 0xca, 0x7e, 0x1d ; \ + .byte 0xb6, 0xca, 0xfe, 0x3d ; \ + .byte 0x80, 0xcb, 0x48, 0x06 ; \ + .byte 0x84, 0xcb, 0xc8, 0x26 ; \ + .byte 0xba, 0xcb, 0x76, 0x19 ; \ + .byte 0xbe, 0xcb, 0xf6, 0x39 ; \ + .byte 0x8c, 0xcc, 0x44, 0x00 ; \ + .byte 0x88, 0xcc, 0xc4, 0x20 ; \ + ldd [%o0 + 0x10], %f0 ; \ + .byte 0xb2, 0xcc, 0x7e, 0x1d ; \ + ldd [%o0 + 0x18], %f2 ; \ + .byte 0xb6, 0xcc, 0xfe, 0x3d ; \ + .byte 0xa8, 0xcd, 0x48, 0x86 ; \ + .byte 0xac, 0xcd, 0xc8, 0xa6 ; \ + ldd [%o0 + 0x20], %f4 ; \ + .byte 0xba, 0xcd, 0x76, 0x99 ; \ + ldd [%o0 + 0x28], %f6 ; \ + .byte 0xbe, 0xcd, 0xf6, 0xb9 + !aes_eround01 %f0, %f20, %f22, %f24 ; \ + !aes_eround23 %f2, %f20, %f22, %f22 ; \ + !ldd [%o0 + 0x60], %f20 ; \ + !aes_eround01 %f0, %f60, %f62, %f56 ; \ + !aes_eround23 %f2, %f60, %f62, %f58 ; \ + !aes_eround01 %f4, %f24, %f22, %f0 ; \ + !aes_eround23 %f6, %f24, %f22, %f2 ; \ + !ldd [%o0 + 0x68], %f22 ; \ + !aes_eround01 %f4, %f56, %f58, %f60 ; \ + !ldd [%o0 + 0x70], %f24 ; \ + !aes_eround23 %f6, %f56, %f58, %f62 ; \ + !aes_eround01 %f8, %f0, %f2, %f6 ; \ + !aes_eround23 %f10, %f0, %f2, %f4 ; \ + !aes_eround01 %f8, %f60, %f62, %f56 ; \ + !aes_eround23 %f10, %f60, %f62, %f58 ; \ + !aes_eround01 %f12, %f6, %f4, %f0 ; \ + !aes_eround23 %f14, %f6, %f4, %f2 ; \ + !aes_eround01 %f12, %f56, %f58, %f60 ; \ + !aes_eround23 %f14, %f56, %f58, %f62 ; \ + !aes_eround01 %f16, %f0, %f2, %f6 ; \ + !aes_eround23 %f18, %f0, %f2, %f4 ; \ + !aes_eround01 %f16, %f60, %f62, %f56 ; \ + !aes_eround23 %f18, %f60, %f62, %f58 ; \ + !aes_eround01 %f20, %f6, %f4, %f0 ; \ + !aes_eround23 %f22, %f6, %f4, %f2 ; \ + !aes_eround01 %f20, %f56, %f58, %f60 ; \ + !aes_eround23 %f22, %f56, %f58, %f62 ; \ + !aes_eround01 %f24, %f0, %f2, %f6 ; \ + !aes_eround23 %f26, %f0, %f2, %f4 ; \ + !aes_eround01 %f24, %f60, %f62, %f56 ; \ + !aes_eround23 %f26, %f60, %f62, %f58 ; \ + !aes_eround01 %f28, %f6, %f4, %f0 ; \ + !aes_eround23 %f30, %f6, %f4, %f2 ; \ + !aes_eround01 %f28, %f56, %f58, %f60 ; \ + !aes_eround23 %f30, %f56, %f58, %f62 ; \ + !aes_eround01 %f32, %f0, %f2, %f6 ; \ + !aes_eround23 %f34, %f0, %f2, %f4 ; \ + !aes_eround01 %f32, %f60, %f62, %f56 ; \ + !aes_eround23 %f34, %f60, %f62, %f58 ; \ + !aes_eround01 %f36, %f6, %f4, %f0 ; \ + !aes_eround23 %f38, %f6, %f4, %f2 ; \ + !aes_eround01 %f36, %f56, %f58, %f60 ; \ + !aes_eround23 %f38, %f56, %f58, %f62 ; \ + !aes_eround01 %f40, %f0, %f2, %f6 ; \ + !aes_eround23 %f42, %f0, %f2, %f4 ; \ + !aes_eround01 %f40, %f60, %f62, %f56 ; \ + !aes_eround23 %f42, %f60, %f62, %f58 ; \ + !aes_eround01 %f44, %f6, %f4, %f0 ; \ + !aes_eround23 %f46, %f6, %f4, %f2 ; \ + !aes_eround01 %f44, %f56, %f58, %f60 ; \ + !aes_eround23 %f46, %f56, %f58, %f62 ; \ + !aes_eround01 %f48, %f0, %f2, %f6 ; \ + !aes_eround23 %f50, %f0, %f2, %f4 ; \ + !ldd [%o0 + 0x10], %f0 ; \ + !aes_eround01 %f48, %f60, %f62, %f56 ; \ + !ldd [%o0 + 0x18], %f2 ; \ + !aes_eround23 %f50, %f60, %f62, %f58 ; \ + !aes_eround01_l %f52, %f6, %f4, %f20 ; \ + !aes_eround23_l %f54, %f6, %f4, %f22 ; \ + !ldd [%o0 + 0x20], %f4 ; \ + !aes_eround01_l %f52, %f56, %f58, %f60 ; \ + !ldd [%o0 + 0x28], %f6 ; \ + !aes_eround23_l %f54, %f56, %f58, %f62 + +#define FIRST_TWO_DROUNDS \ + .byte 0xb2, 0xc8, 0x3e, 0x5d ; \ + .byte 0xb6, 0xc8, 0xbe, 0x7d ; \ + .byte 0xba, 0xc9, 0x36, 0x59 ; \ + .byte 0xbe, 0xc9, 0xb6, 0x79 + !aes_dround01 %f0, %f60, %f62, %f56 ; \ + !aes_dround23 %f2, %f60, %f62, %f58 ; \ + !aes_dround01 %f4, %f56, %f58, %f60 ; \ + !aes_dround23 %f6, %f56, %f58, %f62 + +#define MID_TWO_DROUNDS \ + .byte 0xb2, 0xca, 0x3e, 0x5d ; \ + .byte 0xb6, 0xca, 0xbe, 0x7d ; \ + .byte 0xba, 0xcb, 0x36, 0x59 ; \ + .byte 0xbe, 0xcb, 0xb6, 0x79 + !aes_dround01 %f8, %f60, %f62, %f56 ; \ + !aes_dround23 %f10, %f60, %f62, %f58 ; \ + !aes_dround01 %f12, %f56, %f58, %f60 ; \ + !aes_dround23 %f14, %f56, %f58, %f62 + +#define MID_TWO_DROUNDS_2 \ + .byte 0x8c, 0xca, 0x04, 0x40 ; \ + .byte 0x88, 0xca, 0x84, 0x60 ; \ + .byte 0xb2, 0xca, 0x3e, 0x5d ; \ + .byte 0xb6, 0xca, 0xbe, 0x7d ; \ + .byte 0x80, 0xcb, 0x08, 0x46 ; \ + .byte 0x84, 0xcb, 0x88, 0x66 ; \ + .byte 0xba, 0xcb, 0x36, 0x59 ; \ + .byte 0xbe, 0xcb, 0xb6, 0x79 + !aes_dround01 %f8, %f0, %f2, %f6 ; \ + !aes_dround23 %f10, %f0, %f2, %f4 ; \ + !aes_dround01 %f8, %f60, %f62, %f56 ; \ + !aes_dround23 %f10, %f60, %f62, %f58 ; \ + !aes_dround01 %f12, %f6, %f4, %f0 ; \ + !aes_dround23 %f14, %f6, %f4, %f2 ; \ + !aes_dround01 %f12, %f56, %f58, %f60 ; \ + !aes_dround23 %f14, %f56, %f58, %f62 + +#define TEN_DROUNDS \ + .byte 0xb2, 0xcc, 0x3e, 0x5d ; \ + .byte 0xb6, 0xcc, 0xbe, 0x7d ; \ + .byte 0xba, 0xcd, 0x36, 0x59 ; \ + .byte 0xbe, 0xcd, 0xb6, 0x79 ; \ + .byte 0xb2, 0xce, 0x3e, 0x5d ; \ + .byte 0xb6, 0xce, 0xbe, 0x7d ; \ + .byte 0xba, 0xcf, 0x36, 0x59 ; \ + .byte 0xbe, 0xcf, 0xb6, 0x79 ; \ + .byte 0xb2, 0xc8, 0x7e, 0x5d ; \ + .byte 0xb6, 0xc8, 0xfe, 0x7d ; \ + .byte 0xba, 0xc9, 0x76, 0x59 ; \ + .byte 0xbe, 0xc9, 0xf6, 0x79 ; \ + .byte 0xb2, 0xca, 0x7e, 0x5d ; \ + .byte 0xb6, 0xca, 0xfe, 0x7d ; \ + .byte 0xba, 0xcb, 0x76, 0x59 ; \ + .byte 0xbe, 0xcb, 0xf6, 0x79 ; \ + .byte 0xb2, 0xcc, 0x7e, 0x5d ; \ + .byte 0xb6, 0xcc, 0xfe, 0x7d ; \ + .byte 0xba, 0xcd, 0x76, 0xd9 ; \ + .byte 0xbe, 0xcd, 0xf6, 0xf9 + !aes_dround01 %f16, %f60, %f62, %f56 ; \ + !aes_dround23 %f18, %f60, %f62, %f58 ; \ + !aes_dround01 %f20, %f56, %f58, %f60 ; \ + !aes_dround23 %f22, %f56, %f58, %f62 ; \ + !aes_dround01 %f24, %f60, %f62, %f56 ; \ + !aes_dround23 %f26, %f60, %f62, %f58 ; \ + !aes_dround01 %f28, %f56, %f58, %f60 ; \ + !aes_dround23 %f30, %f56, %f58, %f62 ; \ + !aes_dround01 %f32, %f60, %f62, %f56 ; \ + !aes_dround23 %f34, %f60, %f62, %f58 ; \ + !aes_dround01 %f36, %f56, %f58, %f60 ; \ + !aes_dround23 %f38, %f56, %f58, %f62 ; \ + !aes_dround01 %f40, %f60, %f62, %f56 ; \ + !aes_dround23 %f42, %f60, %f62, %f58 ; \ + !aes_dround01 %f44, %f56, %f58, %f60 ; \ + !aes_dround23 %f46, %f56, %f58, %f62 ; \ + !aes_dround01 %f48, %f60, %f62, %f56 ; \ + !aes_dround23 %f50, %f60, %f62, %f58 ; \ + !aes_dround01_l %f52, %f56, %f58, %f60 ; \ + !aes_dround23_l %f54, %f56, %f58, %f62 + +#define TEN_DROUNDS_2 \ + .byte 0x8c, 0xcc, 0x04, 0x40 ; \ + .byte 0x88, 0xcc, 0x84, 0x60 ; \ + .byte 0xb2, 0xcc, 0x3e, 0x5d ; \ + .byte 0xb6, 0xcc, 0xbe, 0x7d ; \ + .byte 0x80, 0xcd, 0x08, 0x46 ; \ + .byte 0x84, 0xcd, 0x88, 0x66 ; \ + .byte 0xba, 0xcd, 0x36, 0x59 ; \ + .byte 0xbe, 0xcd, 0xb6, 0x79 ; \ + .byte 0x8c, 0xce, 0x04, 0x40 ; \ + .byte 0x88, 0xce, 0x84, 0x60 ; \ + .byte 0xb2, 0xce, 0x3e, 0x5d ; \ + .byte 0xb6, 0xce, 0xbe, 0x7d ; \ + .byte 0x80, 0xcf, 0x08, 0x46 ; \ + .byte 0x84, 0xcf, 0x88, 0x66 ; \ + .byte 0xba, 0xcf, 0x36, 0x59 ; \ + .byte 0xbe, 0xcf, 0xb6, 0x79 ; \ + .byte 0x8c, 0xc8, 0x44, 0x40 ; \ + .byte 0x88, 0xc8, 0xc4, 0x60 ; \ + .byte 0xb2, 0xc8, 0x7e, 0x5d ; \ + .byte 0xb6, 0xc8, 0xfe, 0x7d ; \ + .byte 0x80, 0xc9, 0x48, 0x46 ; \ + .byte 0x84, 0xc9, 0xc8, 0x66 ; \ + .byte 0xba, 0xc9, 0x76, 0x59 ; \ + .byte 0xbe, 0xc9, 0xf6, 0x79 ; \ + .byte 0x8c, 0xca, 0x44, 0x40 ; \ + .byte 0x88, 0xca, 0xc4, 0x60 ; \ + .byte 0xb2, 0xca, 0x7e, 0x5d ; \ + .byte 0xb6, 0xca, 0xfe, 0x7d ; \ + .byte 0x80, 0xcb, 0x48, 0x46 ; \ + .byte 0x84, 0xcb, 0xc8, 0x66 ; \ + .byte 0xba, 0xcb, 0x76, 0x59 ; \ + .byte 0xbe, 0xcb, 0xf6, 0x79 ; \ + .byte 0x8c, 0xcc, 0x44, 0x40 ; \ + .byte 0x88, 0xcc, 0xc4, 0x60 ; \ + .byte 0xb2, 0xcc, 0x7e, 0x5d ; \ + .byte 0xb6, 0xcc, 0xfe, 0x7d ; \ + .byte 0x80, 0xcd, 0x48, 0xc6 ; \ + .byte 0x84, 0xcd, 0xc8, 0xe6 ; \ + .byte 0xba, 0xcd, 0x76, 0xd9 ; \ + .byte 0xbe, 0xcd, 0xf6, 0xf9 + !aes_dround01 %f16, %f0, %f2, %f6 ; \ + !aes_dround23 %f18, %f0, %f2, %f4 ; \ + !aes_dround01 %f16, %f60, %f62, %f56 ; \ + !aes_dround23 %f18, %f60, %f62, %f58 ; \ + !aes_dround01 %f20, %f6, %f4, %f0 ; \ + !aes_dround23 %f22, %f6, %f4, %f2 ; \ + !aes_dround01 %f20, %f56, %f58, %f60 ; \ + !aes_dround23 %f22, %f56, %f58, %f62 ; \ + !aes_dround01 %f24, %f0, %f2, %f6 ; \ + !aes_dround23 %f26, %f0, %f2, %f4 ; \ + !aes_dround01 %f24, %f60, %f62, %f56 ; \ + !aes_dround23 %f26, %f60, %f62, %f58 ; \ + !aes_dround01 %f28, %f6, %f4, %f0 ; \ + !aes_dround23 %f30, %f6, %f4, %f2 ; \ + !aes_dround01 %f28, %f56, %f58, %f60 ; \ + !aes_dround23 %f30, %f56, %f58, %f62 ; \ + !aes_dround01 %f32, %f0, %f2, %f6 ; \ + !aes_dround23 %f34, %f0, %f2, %f4 ; \ + !aes_dround01 %f32, %f60, %f62, %f56 ; \ + !aes_dround23 %f34, %f60, %f62, %f58 ; \ + !aes_dround01 %f36, %f6, %f4, %f0 ; \ + !aes_dround23 %f38, %f6, %f4, %f2 ; \ + !aes_dround01 %f36, %f56, %f58, %f60 ; \ + !aes_dround23 %f38, %f56, %f58, %f62 ; \ + !aes_dround01 %f40, %f0, %f2, %f6 ; \ + !aes_dround23 %f42, %f0, %f2, %f4 ; \ + !aes_dround01 %f40, %f60, %f62, %f56 ; \ + !aes_dround23 %f42, %f60, %f62, %f58 ; \ + !aes_dround01 %f44, %f6, %f4, %f0 ; \ + !aes_dround23 %f46, %f6, %f4, %f2 ; \ + !aes_dround01 %f44, %f56, %f58, %f60 ; \ + !aes_dround23 %f46, %f56, %f58, %f62 ; \ + !aes_dround01 %f48, %f0, %f2, %f6 ; \ + !aes_dround23 %f50, %f0, %f2, %f4 ; \ + !aes_dround01 %f48, %f60, %f62, %f56 ; \ + !aes_dround23 %f50, %f60, %f62, %f58 ; \ + !aes_dround01_l %f52, %f6, %f4, %f0 ; \ + !aes_dround23_l %f54, %f6, %f4, %f2 ; \ + !aes_dround01_l %f52, %f56, %f58, %f60 ; \ + !aes_dround23_l %f54, %f56, %f58, %f62 + +#define TWELVE_DROUNDS \ + MID_TWO_DROUNDS ; \ + TEN_DROUNDS + +#define TWELVE_DROUNDS_2 \ + MID_TWO_DROUNDS_2 ; \ + TEN_DROUNDS_2 + +#define FOURTEEN_DROUNDS \ + FIRST_TWO_DROUNDS ; \ + TWELVE_DROUNDS + +#define FOURTEEN_DROUNDS_2 \ + .byte 0xb0, 0xc8, 0x2c, 0x54 ; \ + .byte 0xac, 0xc8, 0xac, 0x74 ; \ + ldd [%o0 + 0x80], %f20 ; \ + .byte 0xb2, 0xc8, 0x3e, 0x5d ; \ + .byte 0xb6, 0xc8, 0xbe, 0x7d ; \ + .byte 0x80, 0xc9, 0x2c, 0x58 ; \ + .byte 0x84, 0xc9, 0xac, 0x78 ; \ + ldd [%o0 + 0x88], %f22 ; \ + .byte 0xba, 0xc9, 0x36, 0x59 ; \ + ldd [%o0 + 0x70], %f24 ; \ + .byte 0xbe, 0xc9, 0xb6, 0x79 ; \ + .byte 0x8c, 0xca, 0x04, 0x40 ; \ + .byte 0x88, 0xca, 0x84, 0x60 ; \ + .byte 0xb2, 0xca, 0x3e, 0x5d ; \ + .byte 0xb6, 0xca, 0xbe, 0x7d ; \ + .byte 0x80, 0xcb, 0x08, 0x46 ; \ + .byte 0x84, 0xcb, 0x88, 0x66 ; \ + .byte 0xba, 0xcb, 0x36, 0x59 ; \ + .byte 0xbe, 0xcb, 0xb6, 0x79 ; \ + .byte 0x8c, 0xcc, 0x04, 0x40 ; \ + .byte 0x88, 0xcc, 0x84, 0x60 ; \ + .byte 0xb2, 0xcc, 0x3e, 0x5d ; \ + .byte 0xb6, 0xcc, 0xbe, 0x7d ; \ + .byte 0x80, 0xcd, 0x08, 0x46 ; \ + .byte 0x84, 0xcd, 0x88, 0x66 ; \ + .byte 0xba, 0xcd, 0x36, 0x59 ; \ + .byte 0xbe, 0xcd, 0xb6, 0x79 ; \ + .byte 0x8c, 0xce, 0x04, 0x40 ; \ + .byte 0x88, 0xce, 0x84, 0x60 ; \ + .byte 0xb2, 0xce, 0x3e, 0x5d ; \ + .byte 0xb6, 0xce, 0xbe, 0x7d ; \ + .byte 0x80, 0xcf, 0x08, 0x46 ; \ + .byte 0x84, 0xcf, 0x88, 0x66 ; \ + .byte 0xba, 0xcf, 0x36, 0x59 ; \ + .byte 0xbe, 0xcf, 0xb6, 0x79 ; \ + .byte 0x8c, 0xc8, 0x44, 0x40 ; \ + .byte 0x88, 0xc8, 0xc4, 0x60 ; \ + .byte 0xb2, 0xc8, 0x7e, 0x5d ; \ + .byte 0xb6, 0xc8, 0xfe, 0x7d ; \ + .byte 0x80, 0xc9, 0x48, 0x46 ; \ + .byte 0x84, 0xc9, 0xc8, 0x66 ; \ + .byte 0xba, 0xc9, 0x76, 0x59 ; \ + .byte 0xbe, 0xc9, 0xf6, 0x79 ; \ + .byte 0x8c, 0xca, 0x44, 0x40 ; \ + .byte 0x88, 0xca, 0xc4, 0x60 ; \ + .byte 0xb2, 0xca, 0x7e, 0x5d ; \ + .byte 0xb6, 0xca, 0xfe, 0x7d ; \ + .byte 0x80, 0xcb, 0x48, 0x46 ; \ + .byte 0x84, 0xcb, 0xc8, 0x66 ; \ + .byte 0xba, 0xcb, 0x76, 0x59 ; \ + .byte 0xbe, 0xcb, 0xf6, 0x79 ; \ + .byte 0x8c, 0xcc, 0x44, 0x40 ; \ + .byte 0x88, 0xcc, 0xc4, 0x60 ; \ + ldd [%o0 + 0xd0], %f0 ; \ + .byte 0xb2, 0xcc, 0x7e, 0x5d ; \ + ldd [%o0 + 0xd8], %f2 ; \ + .byte 0xb6, 0xcc, 0xfe, 0x7d ; \ + .byte 0xa8, 0xcd, 0x48, 0xc6 ; \ + .byte 0xac, 0xcd, 0xc8, 0xe6 ; \ + ldd [%o0 + 0xc0], %f4 ; \ + .byte 0xba, 0xcd, 0x76, 0xd9 ; \ + ldd [%o0 + 0xc8], %f6 ; \ + .byte 0xbe, 0xcd, 0xf6, 0xf9 + !aes_dround01 %f0, %f20, %f22, %f24 ; \ + !aes_dround23 %f2, %f20, %f22, %f22 ; \ + !ldd [%o0 + 0x80], %f20 ; \ + !aes_dround01 %f0, %f60, %f62, %f56 ; \ + !aes_dround23 %f2, %f60, %f62, %f58 ; \ + !aes_dround01 %f4, %f24, %f22, %f0 ; \ + !aes_dround23 %f6, %f24, %f22, %f2 ; \ + !ldd [%o0 + 0x88], %f22 ; \ + !aes_dround01 %f4, %f56, %f58, %f60 ; \ + !ldd [%o0 + 0x70], %f24 ; \ + !aes_dround23 %f6, %f56, %f58, %f62 ; \ + !aes_dround01 %f8, %f0, %f2, %f6 ; \ + !aes_dround23 %f10, %f0, %f2, %f4 ; \ + !aes_dround01 %f8, %f60, %f62, %f56 ; \ + !aes_dround23 %f10, %f60, %f62, %f58 ; \ + !aes_dround01 %f12, %f6, %f4, %f0 ; \ + !aes_dround23 %f14, %f6, %f4, %f2 ; \ + !aes_dround01 %f12, %f56, %f58, %f60 ; \ + !aes_dround23 %f14, %f56, %f58, %f62 ; \ + !aes_dround01 %f16, %f0, %f2, %f6 ; \ + !aes_dround23 %f18, %f0, %f2, %f4 ; \ + !aes_dround01 %f16, %f60, %f62, %f56 ; \ + !aes_dround23 %f18, %f60, %f62, %f58 ; \ + !aes_dround01 %f20, %f6, %f4, %f0 ; \ + !aes_dround23 %f22, %f6, %f4, %f2 ; \ + !aes_dround01 %f20, %f56, %f58, %f60 ; \ + !aes_dround23 %f22, %f56, %f58, %f62 ; \ + !aes_dround01 %f24, %f0, %f2, %f6 ; \ + !aes_dround23 %f26, %f0, %f2, %f4 ; \ + !aes_dround01 %f24, %f60, %f62, %f56 ; \ + !aes_dround23 %f26, %f60, %f62, %f58 ; \ + !aes_dround01 %f28, %f6, %f4, %f0 ; \ + !aes_dround23 %f30, %f6, %f4, %f2 ; \ + !aes_dround01 %f28, %f56, %f58, %f60 ; \ + !aes_dround23 %f30, %f56, %f58, %f62 ; \ + !aes_dround01 %f32, %f0, %f2, %f6 ; \ + !aes_dround23 %f34, %f0, %f2, %f4 ; \ + !aes_dround01 %f32, %f60, %f62, %f56 ; \ + !aes_dround23 %f34, %f60, %f62, %f58 ; \ + !aes_dround01 %f36, %f6, %f4, %f0 ; \ + !aes_dround23 %f38, %f6, %f4, %f2 ; \ + !aes_dround01 %f36, %f56, %f58, %f60 ; \ + !aes_dround23 %f38, %f56, %f58, %f62 ; \ + !aes_dround01 %f40, %f0, %f2, %f6 ; \ + !aes_dround23 %f42, %f0, %f2, %f4 ; \ + !aes_dround01 %f40, %f60, %f62, %f56 ; \ + !aes_dround23 %f42, %f60, %f62, %f58 ; \ + !aes_dround01 %f44, %f6, %f4, %f0 ; \ + !aes_dround23 %f46, %f6, %f4, %f2 ; \ + !aes_dround01 %f44, %f56, %f58, %f60 ; \ + !aes_dround23 %f46, %f56, %f58, %f62 ; \ + !aes_dround01 %f48, %f0, %f2, %f6 ; \ + !aes_dround23 %f50, %f0, %f2, %f4 ; \ + !ldd [%o0 + 0xd0], %f0 ; \ + !aes_dround01 %f48, %f60, %f62, %f56 ; \ + !ldd [%o0 + 0xd8], %f2 ; \ + !aes_dround23 %f50, %f60, %f62, %f58 ; \ + !aes_dround01_l %f52, %f6, %f4, %f20 ; \ + !aes_dround23_l %f54, %f6, %f4, %f22 ; \ + !ldd [%o0 + 0xc0], %f4 ; \ + !aes_dround01_l %f52, %f56, %f58, %f60 ; \ + !ldd [%o0 + 0xc8], %f6 ; \ + !aes_dround23_l %f54, %f56, %f58, %f62 + + + ENTRY(t4_aes128_load_keys_for_encrypt) + + ldd [%o0 + 0x10], %f16 + ldd [%o0 + 0x18], %f18 + ldd [%o0 + 0x20], %f20 + ldd [%o0 + 0x28], %f22 + ldd [%o0 + 0x30], %f24 + ldd [%o0 + 0x38], %f26 + ldd [%o0 + 0x40], %f28 + ldd [%o0 + 0x48], %f30 + ldd [%o0 + 0x50], %f32 + ldd [%o0 + 0x58], %f34 + ldd [%o0 + 0x60], %f36 + ldd [%o0 + 0x68], %f38 + ldd [%o0 + 0x70], %f40 + ldd [%o0 + 0x78], %f42 + ldd [%o0 + 0x80], %f44 + ldd [%o0 + 0x88], %f46 + ldd [%o0 + 0x90], %f48 + ldd [%o0 + 0x98], %f50 + ldd [%o0 + 0xa0], %f52 + retl + ldd [%o0 + 0xa8], %f54 + + SET_SIZE(t4_aes128_load_keys_for_encrypt) + + + ENTRY(t4_aes192_load_keys_for_encrypt) + + ldd [%o0 + 0x10], %f8 + ldd [%o0 + 0x18], %f10 + ldd [%o0 + 0x20], %f12 + ldd [%o0 + 0x28], %f14 + ldd [%o0 + 0x30], %f16 + ldd [%o0 + 0x38], %f18 + ldd [%o0 + 0x40], %f20 + ldd [%o0 + 0x48], %f22 + ldd [%o0 + 0x50], %f24 + ldd [%o0 + 0x58], %f26 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f32 + ldd [%o0 + 0x78], %f34 + ldd [%o0 + 0x80], %f36 + ldd [%o0 + 0x88], %f38 + ldd [%o0 + 0x90], %f40 + ldd [%o0 + 0x98], %f42 + ldd [%o0 + 0xa0], %f44 + ldd [%o0 + 0xa8], %f46 + ldd [%o0 + 0xb0], %f48 + ldd [%o0 + 0xb8], %f50 + ldd [%o0 + 0xc0], %f52 + retl + ldd [%o0 + 0xc8], %f54 + + SET_SIZE(t4_aes192_load_keys_for_encrypt) + + + ENTRY(t4_aes256_load_keys_for_encrypt) + + ldd [%o0 + 0x10], %f0 + ldd [%o0 + 0x18], %f2 + ldd [%o0 + 0x20], %f4 + ldd [%o0 + 0x28], %f6 + ldd [%o0 + 0x30], %f8 + ldd [%o0 + 0x38], %f10 + ldd [%o0 + 0x40], %f12 + ldd [%o0 + 0x48], %f14 + ldd [%o0 + 0x50], %f16 + ldd [%o0 + 0x58], %f18 + ldd [%o0 + 0x60], %f20 + ldd [%o0 + 0x68], %f22 + ldd [%o0 + 0x70], %f24 + ldd [%o0 + 0x78], %f26 + ldd [%o0 + 0x80], %f28 + ldd [%o0 + 0x88], %f30 + ldd [%o0 + 0x90], %f32 + ldd [%o0 + 0x98], %f34 + ldd [%o0 + 0xa0], %f36 + ldd [%o0 + 0xa8], %f38 + ldd [%o0 + 0xb0], %f40 + ldd [%o0 + 0xb8], %f42 + ldd [%o0 + 0xc0], %f44 + ldd [%o0 + 0xc8], %f46 + ldd [%o0 + 0xd0], %f48 + ldd [%o0 + 0xd8], %f50 + ldd [%o0 + 0xe0], %f52 + retl + ldd [%o0 + 0xe8], %f54 + + SET_SIZE(t4_aes256_load_keys_for_encrypt) + + +#define TEST_PARALLEL_ECB_ENCRYPT +#ifdef TEST_PARALLEL_ECB_ENCRYPT + ENTRY(t4_aes128_ecb_encrypt) + + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + and %o3, 16, %o4 + brz %o4, ecbenc128_loop + nop + + ldx [%o1], %g3 !input + ldx [%o1 + 8], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f60 + movxtod %g4, %f62 + + TEN_EROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + be ecbenc128_loop_end + add %o2, 16, %o2 + +ecbenc128_loop: + ldx [%o1], %g3 !input + ldx [%o1 + 8], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f0 + movxtod %g4, %f2 + ldx [%o1 + 16], %g3 !input + ldx [%o1 + 24], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f60 + movxtod %g4, %f62 + + TEN_EROUNDS_2 + + std %f0, [%o2] + std %f2, [%o2 + 8] + + std %f60, [%o2 + 16] + std %f62, [%o2 + 24] + + add %o1, 32, %o1 + subcc %o3, 32, %o3 + bne ecbenc128_loop + add %o2, 32, %o2 +ecbenc128_loop_end: + retl + nop + + SET_SIZE(t4_aes128_ecb_encrypt) + + + ENTRY(t4_aes192_ecb_encrypt) + + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + and %o3, 16, %o4 + brz %o4, ecbenc192_loop + nop + + ldx [%o1], %g3 !input + ldx [%o1 + 8], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f60 + movxtod %g4, %f62 + + TWELVE_EROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + be ecbenc192_loop_end + add %o2, 16, %o2 + +ecbenc192_loop: + ldx [%o1], %g3 !input + ldx [%o1 + 8], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f0 + movxtod %g4, %f2 + ldx [%o1 + 16], %g3 !input + ldx [%o1 + 24], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f60 + movxtod %g4, %f62 + + TWELVE_EROUNDS_2 + + std %f0, [%o2] + std %f2, [%o2 + 8] + + std %f60, [%o2 + 16] + std %f62, [%o2 + 24] + + add %o1, 32, %o1 + subcc %o3, 32, %o3 + bne ecbenc192_loop + add %o2, 32, %o2 +ecbenc192_loop_end: + retl + nop + + SET_SIZE(t4_aes192_ecb_encrypt) + + + ENTRY(t4_aes256_ecb_encrypt) + + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + and %o3, 16, %o4 + brz %o4, ecbenc256_loop + nop + + ldx [%o1], %g3 !input + ldx [%o1 + 8], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f60 + movxtod %g4, %f62 + + FOURTEEN_EROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + be ecbenc256_loop_end + add %o2, 16, %o2 + +ecbenc256_loop: + ldx [%o1], %g3 !input + ldx [%o1 + 8], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f20 + movxtod %g4, %f22 + ldx [%o1 + 16], %g3 !input + ldx [%o1 + 24], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f60 + movxtod %g4, %f62 + + FOURTEEN_EROUNDS_2 + + std %f20, [%o2] + std %f22, [%o2 + 8] + + std %f60, [%o2 + 16] + std %f62, [%o2 + 24] + + add %o1, 32, %o1 + subcc %o3, 32, %o3 + bne ecbenc256_loop + add %o2, 32, %o2 + + ldd [%o0 + 0x60], %f20 + ldd [%o0 + 0x68], %f22 + +ecbenc256_loop_end: + retl + nop + + SET_SIZE(t4_aes256_ecb_encrypt) + +#else + + ENTRY(t4_aes128_ecb_encrypt) + + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +ecbenc128_loop: + ldx [%o1], %g3 !input + ldx [%o1 + 8], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f60 + movxtod %g4, %f62 + + TEN_EROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne ecbenc128_loop + add %o2, 16, %o2 + + retl + nop + + SET_SIZE(t4_aes128_ecb_encrypt) + + + ENTRY(t4_aes192_ecb_encrypt) + + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +ecbenc192_loop: + ldx [%o1], %g3 !input + ldx [%o1 + 8], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f60 + movxtod %g4, %f62 + + TWELVE_EROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne ecbenc192_loop + add %o2, 16, %o2 + + retl + nop + + SET_SIZE(t4_aes192_ecb_encrypt) + + + ENTRY(t4_aes256_ecb_encrypt) + + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +ecbenc256_loop: + ldx [%o1], %g3 !input + ldx [%o1 + 8], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f60 + movxtod %g4, %f62 + + FOURTEEN_EROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne ecbenc256_loop + add %o2, 16, %o2 + + retl + nop + + SET_SIZE(t4_aes256_ecb_encrypt) +#endif + + + ENTRY(t4_aes128_cbc_encrypt) + + ldd [%o4], %f60 ! IV + ldd [%o4 +8], %f62 ! IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +cbcenc128_loop: + ldx [%o1], %g3 !input + ldx [%o1 + 8], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f56 + movxtod %g4, %f58 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + TEN_EROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne cbcenc128_loop + add %o2, 16, %o2 + + std %f60, [%o4] + retl + std %f62, [%o4 + 8] + + SET_SIZE(t4_aes128_cbc_encrypt) + + + ENTRY(t4_aes192_cbc_encrypt) + + ldd [%o4], %f60 ! IV + ldd [%o4 + 8], %f62 ! IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +cbcenc192_loop: + ldx [%o1], %g3 !input + ldx [%o1 + 8], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f56 + movxtod %g4, %f58 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + TWELVE_EROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne cbcenc192_loop + add %o2, 16, %o2 + + std %f60, [%o4] + retl + std %f62, [%o4 + 8] + + SET_SIZE(t4_aes192_cbc_encrypt) + + + ENTRY(t4_aes256_cbc_encrypt) + + ldd [%o4], %f60 ! IV + ldd [%o4 + 8], %f62 ! IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +cbcenc256_loop: + ldx [%o1], %g3 !input + ldx [%o1 + 8], %g4 !input + xor %g1, %g3, %g3 !input ^ ks[0-1] + xor %g2, %g4, %g4 !input ^ ks[0-1] + movxtod %g3, %f56 + movxtod %g4, %f58 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + FOURTEEN_EROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne cbcenc256_loop + add %o2, 16, %o2 + + std %f60, [%o4] + retl + std %f62, [%o4 + 8] + + SET_SIZE(t4_aes256_cbc_encrypt) + + +#define TEST_PARALLEL_CTR_CRYPT +#ifdef TEST_PARALLEL_CTR_CRYPT + ENTRY(t4_aes128_ctr_crypt) + + ldx [%o4], %g3 ! IV + ldx [%o4 +8], %g4 ! IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + and %o3, 16, %g5 + brz, %g5, ctr128_loop + + xor %g1, %g3, %g5 + movxtod %g5, %f60 + xor %g2, %g4, %g5 + movxtod %g5, %f62 + inc %g4 + + TEN_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + be ctr128_loop_end + add %o2, 16, %o2 + +ctr128_loop: + xor %g1, %g3, %g5 + movxtod %g5, %f0 + xor %g2, %g4, %g5 + movxtod %g5, %f2 + inc %g4 + + xor %g1, %g3, %g5 + movxtod %g5, %f60 + xor %g2, %g4, %g5 + movxtod %g5, %f62 + inc %g4 + + TEN_EROUNDS_2 + + ldd [%o1], %f6 !input + ldd [%o1 + 8], %f4 !input + ldd [%o1 + 16], %f56 !input + ldd [%o1 + 24], %f58 !input + fxor %f0, %f6, %f0 + fxor %f2, %f4, %f2 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + std %f0, [%o2] + std %f2, [%o2 + 8] + std %f60, [%o2 + 16] + std %f62, [%o2 + 24] + + add %o1, 32, %o1 + subcc %o3, 32, %o3 + bne ctr128_loop + add %o2, 32, %o2 + +ctr128_loop_end: + stx %g3, [%o4] + retl + stx %g4, [%o4 + 8] + + SET_SIZE(t4_aes128_ctr_crypt) + + + ENTRY(t4_aes192_ctr_crypt) + + ldx [%o4], %g3 ! IV + ldx [%o4 +8], %g4 ! IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + and %o3, 16, %g5 + brz, %g5, ctr192_loop + + xor %g1, %g3, %g5 + movxtod %g5, %f60 + xor %g2, %g4, %g5 + movxtod %g5, %f62 + inc %g4 + + TWELVE_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + be ctr192_loop_end + add %o2, 16, %o2 + +ctr192_loop: + xor %g1, %g3, %g5 + movxtod %g5, %f0 + xor %g2, %g4, %g5 + movxtod %g5, %f2 + inc %g4 + + xor %g1, %g3, %g5 + movxtod %g5, %f60 + xor %g2, %g4, %g5 + movxtod %g5, %f62 + inc %g4 + + TWELVE_EROUNDS_2 + + ldd [%o1], %f6 !input + ldd [%o1 + 8], %f4 !input + ldd [%o1 + 16], %f56 !input + ldd [%o1 + 24], %f58 !input + fxor %f0, %f6, %f0 + fxor %f2, %f4, %f2 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + std %f0, [%o2] + std %f2, [%o2 + 8] + std %f60, [%o2 + 16] + std %f62, [%o2 + 24] + + add %o1, 32, %o1 + subcc %o3, 32, %o3 + bne ctr192_loop + add %o2, 32, %o2 + +ctr192_loop_end: + stx %g3, [%o4] + retl + stx %g4, [%o4 + 8] + + SET_SIZE(t4_aes192_ctr_crypt) + + + ENTRY(t4_aes256_ctr_crypt) + + ldx [%o4], %g3 ! IV + ldx [%o4 +8], %g4 ! IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + and %o3, 16, %g5 + brz, %g5, ctr256_loop + + xor %g1, %g3, %g5 + movxtod %g5, %f60 + xor %g2, %g4, %g5 + movxtod %g5, %f62 + inc %g4 + + FOURTEEN_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + be ctr256_loop_end + add %o2, 16, %o2 + +ctr256_loop: + xor %g1, %g3, %g5 + movxtod %g5, %f20 + xor %g2, %g4, %g5 + movxtod %g5, %f22 + inc %g4 + + xor %g1, %g3, %g5 + movxtod %g5, %f60 + xor %g2, %g4, %g5 + movxtod %g5, %f62 + inc %g4 + + FOURTEEN_EROUNDS_2 + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f20, %f56, %f20 + fxor %f22, %f58, %f22 + ldd [%o1 + 16], %f56 !input + ldd [%o1 + 24], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + std %f20, [%o2] + std %f22, [%o2 + 8] + std %f60, [%o2 + 16] + std %f62, [%o2 + 24] + + add %o1, 32, %o1 + subcc %o3, 32, %o3 + bne ctr256_loop + add %o2, 32, %o2 + + ldd [%o0 + 0x60], %f20 + ldd [%o0 + 0x68], %f22 + +ctr256_loop_end: + stx %g3, [%o4] + retl + stx %g4, [%o4 + 8] + + SET_SIZE(t4_aes256_ctr_crypt) + +#else + + ENTRY(t4_aes128_ctr_crypt) + + ldx [%o4], %g3 ! IV + ldx [%o4 +8], %g4 ! IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +ctr128_loop: + xor %g1, %g3, %g5 + movxtod %g5, %f60 + xor %g2, %g4, %g5 + movxtod %g5, %f62 + inc %g4 + + TEN_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne ctr128_loop + add %o2, 16, %o2 + + stx %g3, [%o4] + retl + stx %g4, [%o4 + 8] + + SET_SIZE(t4_aes128_ctr_crypt) + + ENTRY(t4_aes192_ctr_crypt) + + ldx [%o4], %g3 ! IV + ldx [%o4 +8], %g4 ! IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +ctr192_loop: + xor %g1, %g3, %g5 + movxtod %g5, %f60 + xor %g2, %g4, %g5 + movxtod %g5, %f62 + inc %g4 + + TWELVE_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne ctr192_loop + add %o2, 16, %o2 + + stx %g3, [%o4] + retl + stx %g4, [%o4 + 8] + + SET_SIZE(t4_aes192_ctr_crypt) + + + ENTRY(t4_aes256_ctr_crypt) + + ldx [%o4], %g3 ! IV + ldx [%o4 +8], %g4 ! IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +ctr256_loop: + xor %g1, %g3, %g5 + movxtod %g5, %f60 + xor %g2, %g4, %g5 + movxtod %g5, %f62 + inc %g4 + + FOURTEEN_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne ctr256_loop + add %o2, 16, %o2 + + stx %g3, [%o4] + retl + stx %g4, [%o4 + 8] + + SET_SIZE(t4_aes256_ctr_crypt) + +#endif + + ENTRY(t4_aes128_cfb128_encrypt) + + ldd [%o4], %f60 ! IV + ldd [%o4 +8], %f62 ! IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +cfb128_128_loop: + movxtod %g1, %f56 + movxtod %g2, %f58 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + TEN_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne cfb128_128_loop + add %o2, 16, %o2 + + std %f60, [%o4] + retl + std %f62, [%o4 + 8] + + SET_SIZE(t4_aes128_cfb128_encrypt) + + + ENTRY(t4_aes192_cfb128_encrypt) + + ldd [%o4], %f60 ! IV + ldd [%o4 +8], %f62 ! IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +cfb128_192_loop: + movxtod %g1, %f56 + movxtod %g2, %f58 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + TWELVE_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne cfb128_192_loop + add %o2, 16, %o2 + + std %f60, [%o4] + retl + std %f62, [%o4 + 8] + + SET_SIZE(t4_aes192_cfb128_encrypt) + + + ENTRY(t4_aes256_cfb128_encrypt) + + ldd [%o4], %f60 ! IV + ldd [%o4 +8], %f62 ! IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +cfb128_256_loop: + movxtod %g1, %f56 + movxtod %g2, %f58 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + FOURTEEN_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne cfb128_256_loop + add %o2, 16, %o2 + + std %f60, [%o4] + retl + std %f62, [%o4 + 8] + + SET_SIZE(t4_aes256_cfb128_encrypt) + + + ENTRY(t4_aes128_load_keys_for_decrypt) + + ldd [%o0], %f52 + ldd [%o0 + 0x8], %f54 + ldd [%o0 + 0x10], %f48 + ldd [%o0 + 0x18], %f50 + ldd [%o0 + 0x20], %f44 + ldd [%o0 + 0x28], %f46 + ldd [%o0 + 0x30], %f40 + ldd [%o0 + 0x38], %f42 + ldd [%o0 + 0x40], %f36 + ldd [%o0 + 0x48], %f38 + ldd [%o0 + 0x50], %f32 + ldd [%o0 + 0x58], %f34 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f24 + ldd [%o0 + 0x78], %f26 + ldd [%o0 + 0x80], %f20 + ldd [%o0 + 0x88], %f22 + ldd [%o0 + 0x90], %f16 + retl + ldd [%o0 + 0x98], %f18 + + SET_SIZE(t4_aes128_load_keys_for_decrypt) + + + ENTRY(t4_aes192_load_keys_for_decrypt) + + ldd [%o0], %f52 + ldd [%o0 + 0x8], %f54 + ldd [%o0 + 0x10], %f48 + ldd [%o0 + 0x18], %f50 + ldd [%o0 + 0x20], %f44 + ldd [%o0 + 0x28], %f46 + ldd [%o0 + 0x30], %f40 + ldd [%o0 + 0x38], %f42 + ldd [%o0 + 0x40], %f36 + ldd [%o0 + 0x48], %f38 + ldd [%o0 + 0x50], %f32 + ldd [%o0 + 0x58], %f34 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f24 + ldd [%o0 + 0x78], %f26 + ldd [%o0 + 0x80], %f20 + ldd [%o0 + 0x88], %f22 + ldd [%o0 + 0x90], %f16 + ldd [%o0 + 0x98], %f18 + ldd [%o0 + 0xa0], %f12 + ldd [%o0 + 0xa8], %f14 + ldd [%o0 + 0xb0], %f8 + retl + ldd [%o0 + 0xb8], %f10 + + SET_SIZE(t4_aes192_load_keys_for_decrypt) + + + ENTRY(t4_aes256_load_keys_for_decrypt) + + + ldd [%o0], %f52 + ldd [%o0 + 0x8], %f54 + ldd [%o0 + 0x10], %f48 + ldd [%o0 + 0x18], %f50 + ldd [%o0 + 0x20], %f44 + ldd [%o0 + 0x28], %f46 + ldd [%o0 + 0x30], %f40 + ldd [%o0 + 0x38], %f42 + ldd [%o0 + 0x40], %f36 + ldd [%o0 + 0x48], %f38 + ldd [%o0 + 0x50], %f32 + ldd [%o0 + 0x58], %f34 + ldd [%o0 + 0x60], %f28 + ldd [%o0 + 0x68], %f30 + ldd [%o0 + 0x70], %f24 + ldd [%o0 + 0x78], %f26 + ldd [%o0 + 0x80], %f20 + ldd [%o0 + 0x88], %f22 + ldd [%o0 + 0x90], %f16 + ldd [%o0 + 0x98], %f18 + ldd [%o0 + 0xa0], %f12 + ldd [%o0 + 0xa8], %f14 + ldd [%o0 + 0xb0], %f8 + ldd [%o0 + 0xb8], %f10 + ldd [%o0 + 0xc0], %f4 + ldd [%o0 + 0xc8], %f6 + ldd [%o0 + 0xd0], %f0 + retl + ldd [%o0 + 0xd8], %f2 + + SET_SIZE(t4_aes256_load_keys_for_decrypt) + + +#define TEST_PARALLEL_ECB_DECRYPT +#ifdef TEST_PARALLEL_ECB_DECRYPT + ENTRY(t4_aes128_ecb_decrypt) + + ldx [%o0 + 0xa0], %g1 !ks[last-1] + ldx [%o0 + 0xa8], %g2 !ks[last] + and %o3, 16, %o4 + brz %o4, ecbdec128_loop + nop + + ldx [%o1], %o4 + ldx [%o1 + 8], %o5 + xor %g1, %o4, %g3 !initial ARK + movxtod %g3, %f60 + xor %g2, %o5, %g3 !initial ARK + movxtod %g3, %f62 + + TEN_DROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 0x8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + be ecbdec128_loop_end + add %o2, 16, %o2 + +ecbdec128_loop: + ldx [%o1], %o4 + ldx [%o1 + 8], %o5 + xor %g1, %o4, %g3 !initial ARK + movxtod %g3, %f0 + xor %g2, %o5, %g3 !initial ARK + movxtod %g3, %f2 + ldx [%o1 + 16], %o4 + ldx [%o1 + 24], %o5 + xor %g1, %o4, %g3 !initial ARK + movxtod %g3, %f60 + xor %g2, %o5, %g3 !initial ARK + movxtod %g3, %f62 + + TEN_DROUNDS_2 + + std %f0, [%o2] + std %f2, [%o2 + 8] + std %f60, [%o2 + 16] + std %f62, [%o2 + 24] + + add %o1, 32, %o1 + subcc %o3, 32, %o3 + bne ecbdec128_loop + add %o2, 32, %o2 +ecbdec128_loop_end: + + retl + nop + + SET_SIZE(t4_aes128_ecb_decrypt) + + ENTRY(t4_aes192_ecb_decrypt) + + ldx [%o0 + 0xc0], %g1 !ks[last-1] + ldx [%o0 + 0xc8], %g2 !ks[last] + and %o3, 16, %o4 + brz %o4, ecbdec192_loop + nop + + ldx [%o1], %o4 + ldx [%o1 + 8], %o5 + xor %g1, %o4, %g3 !initial ARK + movxtod %g3, %f60 + xor %g2, %o5, %g3 !initial ARK + movxtod %g3, %f62 + + TWELVE_DROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 0x8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + be ecbdec192_loop_end + add %o2, 16, %o2 + +ecbdec192_loop: + ldx [%o1], %o4 + ldx [%o1 + 8], %o5 + xor %g1, %o4, %g3 !initial ARK + movxtod %g3, %f0 + xor %g2, %o5, %g3 !initial ARK + movxtod %g3, %f2 + ldx [%o1 + 16], %o4 + ldx [%o1 + 24], %o5 + xor %g1, %o4, %g3 !initial ARK + movxtod %g3, %f60 + xor %g2, %o5, %g3 !initial ARK + movxtod %g3, %f62 + + TWELVE_DROUNDS_2 + + std %f0, [%o2] + std %f2, [%o2 + 8] + std %f60, [%o2 + 16] + std %f62, [%o2 + 24] + + add %o1, 32, %o1 + subcc %o3, 32, %o3 + bne ecbdec192_loop + add %o2, 32, %o2 +ecbdec192_loop_end: + + retl + nop + + SET_SIZE(t4_aes192_ecb_decrypt) + + + ENTRY(t4_aes256_ecb_decrypt) + + ldx [%o0 + 0xe0], %g1 !ks[last-1] + ldx [%o0 + 0xe8], %g2 !ks[last] + and %o3, 16, %o4 + brz %o4, ecbdec256_loop + nop + + ldx [%o1], %o4 + ldx [%o1 + 8], %o5 + xor %g1, %o4, %g3 !initial ARK + movxtod %g3, %f60 + xor %g2, %o5, %g3 !initial ARK + movxtod %g3, %f62 + + FOURTEEN_DROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 0x8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + be ecbdec256_loop_end + add %o2, 16, %o2 + +ecbdec256_loop: + ldx [%o1], %o4 + ldx [%o1 + 8], %o5 + xor %g1, %o4, %g3 !initial ARK + movxtod %g3, %f20 + xor %g2, %o5, %g3 !initial ARK + movxtod %g3, %f22 + ldx [%o1 + 16], %o4 + ldx [%o1 + 24], %o5 + xor %g1, %o4, %g3 !initial ARK + movxtod %g3, %f60 + xor %g2, %o5, %g3 !initial ARK + movxtod %g3, %f62 + + FOURTEEN_DROUNDS_2 + + std %f20, [%o2] + std %f22, [%o2 + 8] + std %f60, [%o2 + 16] + std %f62, [%o2 + 24] + + add %o1, 32, %o1 + subcc %o3, 32, %o3 + bne ecbdec256_loop + add %o2, 32, %o2 + + ldd [%o0 + 0x80], %f20 + ldd [%o0 + 0x88], %f22 + +ecbdec256_loop_end: + + retl + nop + + SET_SIZE(t4_aes256_ecb_decrypt) + +#else + + ENTRY(t4_aes128_ecb_decrypt) + + ldx [%o0 + 0xa0], %g1 !ks[last-1] + ldx [%o0 + 0xa8], %g2 !ks[last] + +ecbdec128_loop: + ldx [%o1], %o4 + ldx [%o1 + 8], %o5 + xor %g1, %o4, %g3 !initial ARK + movxtod %g3, %f60 + xor %g2, %o5, %g3 !initial ARK + movxtod %g3, %f62 + + TEN_DROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 0x8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne ecbdec128_loop + add %o2, 16, %o2 + + retl + nop + + SET_SIZE(t4_aes128_ecb_decrypt) + + + ENTRY(t4_aes192_ecb_decrypt) + + ldx [%o0 + 0xc0], %g1 !ks[last-1] + ldx [%o0 + 0xc8], %g2 !ks[last] + +ecbdec192_loop: + ldx [%o1], %o4 + ldx [%o1 + 8], %o5 + xor %g1, %o4, %g3 !initial ARK + movxtod %g3, %f60 + xor %g2, %o5, %g3 !initial ARK + movxtod %g3, %f62 + + TWELVE_DROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 0x8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne ecbdec192_loop + add %o2, 16, %o2 + + retl + nop + + SET_SIZE(t4_aes192_ecb_decrypt) + + + ENTRY(t4_aes256_ecb_decrypt) + + ldx [%o0 + 0xe0], %g1 !ks[last-1] + ldx [%o0 + 0xe8], %g2 !ks[last] + +ecbdec256_loop: + ldx [%o1], %o4 + ldx [%o1 + 8], %o5 + xor %g1, %o4, %g3 !initial ARK + movxtod %g3, %f60 + xor %g2, %o5, %g3 !initial ARK + movxtod %g3, %f62 + + FOURTEEN_DROUNDS + + std %f60, [%o2] + std %f62, [%o2 + 0x8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne ecbdec256_loop + add %o2, 16, %o2 + + retl + nop + + SET_SIZE(t4_aes256_ecb_decrypt) + +#endif + +#define TEST_PARALLEL_CBC_DECRYPT +#ifdef EST_PARALLEL_CBC_DECRYPT + ENTRY(t4_aes128_cbc_decrypt) + + save %sp, -SA(MINFRAME), %sp + ldx [%i4], %o0 !IV + ldx [%i4 + 8], %o1 !IV + ldx [%i0 + 0xa0], %o2 !ks[last-1] + ldx [%i0 + 0xa8], %o3 !ks[last] + and %i3, 16, %o4 + brz %o4, cbcdec128_loop + nop + + ldx [%i1], %o4 + ldx [%i1 + 8], %o5 + xor %o2, %o4, %g1 !initial ARK + movxtod %g1, %f60 + xor %o3, %o5, %g1 !initial ARK + movxtod %g1, %f62 + + TEN_DROUNDS + + movxtod %o0, %f56 + movxtod %o1, %f58 + mov %o4, %o0 !save last block as next IV + mov %o5, %o1 + fxor %f56, %f60, %f60 !add in previous IV + fxor %f58, %f62, %f62 + + std %f60, [%i2] + std %f62, [%i2 + 0x8] + + add %i1, 16, %i1 + subcc %i3, 16, %i3 + be cbcdec128_loop_end + add %i2, 16, %i2 + + +cbcdec128_loop: + ldx [%i1], %g4 + ldx [%i1 + 8], %g5 + xor %o2, %g4, %g1 !initial ARK + movxtod %g1, %f0 + xor %o3, %g5, %g1 !initial ARK + movxtod %g1, %f2 + + ldx [%i1 + 16], %o4 + ldx [%i1 + 24], %o5 + xor %o2, %o4, %g1 !initial ARK + movxtod %g1, %f60 + xor %o3, %o5, %g1 !initial ARK + movxtod %g1, %f62 + + TEN_DROUNDS_2 + + movxtod %o0, %f6 + movxtod %o1, %f4 + fxor %f6, %f0, %f0 !add in previous IV + fxor %f4, %f2, %f2 + + std %f0, [%i2] + std %f2, [%i2 + 8] + + movxtod %g4, %f56 + movxtod %g5, %f58 + mov %o4, %o0 !save last block as next IV + mov %o5, %o1 + fxor %f56, %f60, %f60 !add in previous IV + fxor %f58, %f62, %f62 + + std %f60, [%i2 + 16] + std %f62, [%i2 + 24] + + add %i1, 32, %i1 + subcc %i3, 32, %i3 + bne cbcdec128_loop + add %i2, 32, %i2 + +cbcdec128_loop_end: + stx %o0, [%i4] + stx %o1, [%i4 + 8] + ret + restore + + SET_SIZE(t4_aes128_cbc_decrypt) + + + ENTRY(t4_aes192_cbc_decrypt) + + save %sp, -SA(MINFRAME), %sp + ldx [%i4], %o0 !IV + ldx [%i4 + 8], %o1 !IV + ldx [%i0 + 0xc0], %o2 !ks[last-1] + ldx [%i0 + 0xc8], %o3 !ks[last] + and %i3, 16, %o4 + brz %o4, cbcdec192_loop + nop + + ldx [%i1], %o4 + ldx [%i1 + 8], %o5 + xor %o2, %o4, %g1 !initial ARK + movxtod %g1, %f60 + xor %o3, %o5, %g1 !initial ARK + movxtod %g1, %f62 + + TWELVE_DROUNDS + + movxtod %o0, %f56 + movxtod %o1, %f58 + mov %o4, %o0 !save last block as next IV + mov %o5, %o1 + fxor %f56, %f60, %f60 !add in previous IV + fxor %f58, %f62, %f62 + + std %f60, [%i2] + std %f62, [%i2 + 0x8] + + add %i1, 16, %i1 + subcc %i3, 16, %i3 + be cbcdec192_loop_end + add %i2, 16, %i2 + + +cbcdec192_loop: + ldx [%i1], %g4 + ldx [%i1 + 8], %g5 + xor %o2, %g4, %g1 !initial ARK + movxtod %g1, %f0 + xor %o3, %g5, %g1 !initial ARK + movxtod %g1, %f2 + + ldx [%i1 + 16], %o4 + ldx [%i1 + 24], %o5 + xor %o2, %o4, %g1 !initial ARK + movxtod %g1, %f60 + xor %o3, %o5, %g1 !initial ARK + movxtod %g1, %f62 + + TWELVE_DROUNDS_2 + + movxtod %o0, %f6 + movxtod %o1, %f4 + fxor %f6, %f0, %f0 !add in previous IV + fxor %f4, %f2, %f2 + + std %f0, [%i2] + std %f2, [%i2 + 8] + + movxtod %g4, %f56 + movxtod %g5, %f58 + mov %o4, %o0 !save last block as next IV + mov %o5, %o1 + fxor %f56, %f60, %f60 !add in previous IV + fxor %f58, %f62, %f62 + + std %f60, [%i2 + 16] + std %f62, [%i2 + 24] + + add %i1, 32, %i1 + subcc %i3, 32, %i3 + bne cbcdec192_loop + add %i2, 32, %i2 + +cbcdec192_loop_end: + stx %o0, [%i4] + stx %o1, [%i4 + 8] + ret + restore + + SET_SIZE(t4_aes192_cbc_decrypt) + + + ENTRY(t4_aes256_cbc_decrypt) + + save %sp, -SA(MINFRAME), %sp + mov %i0, %o0 !FOURTEEN_DROUNDS uses %o0 + ldx [%i4], %g2 !IV + ldx [%i4 + 8], %o1 !IV + ldx [%o0 + 0xe0], %o2 !ks[last-1] + ldx [%o0 + 0xe8], %o3 !ks[last] + and %i3, 16, %o4 + brz %o4, cbcdec256_loop + nop + + ldx [%i1], %o4 + ldx [%i1 + 8], %o5 + xor %o2, %o4, %g1 !initial ARK + movxtod %g1, %f60 + xor %o3, %o5, %g1 !initial ARK + movxtod %g1, %f62 + + FOURTEEN_DROUNDS + + movxtod %g2, %f56 + movxtod %o1, %f58 + mov %o4, %g2 !save last block as next IV + mov %o5, %o1 + fxor %f56, %f60, %f60 !add in previous IV + fxor %f58, %f62, %f62 + + std %f60, [%i2] + std %f62, [%i2 + 0x8] + + add %i1, 16, %i1 + subcc %i3, 16, %i3 + be cbcdec256_loop_end + add %i2, 16, %i2 + + +cbcdec256_loop: + ldx [%i1], %g4 + ldx [%i1 + 8], %g5 + xor %o2, %g4, %g1 !initial ARK + movxtod %g1, %f20 + xor %o3, %g5, %g1 !initial ARK + movxtod %g1, %f22 + + ldx [%i1 + 16], %o4 + ldx [%i1 + 24], %o5 + xor %o2, %o4, %g1 !initial ARK + movxtod %g1, %f60 + xor %o3, %o5, %g1 !initial ARK + movxtod %g1, %f62 + + FOURTEEN_DROUNDS_2 + + movxtod %g2, %f56 + movxtod %o1, %f58 + fxor %f56, %f20, %f20 !add in previous IV + fxor %f58, %f22, %f22 + + std %f20, [%i2] + std %f22, [%i2 + 8] + + movxtod %g4, %f56 + movxtod %g5, %f58 + mov %o4, %g2 !save last block as next IV + mov %o5, %o1 + fxor %f56, %f60, %f60 !add in previous IV + fxor %f58, %f62, %f62 + + std %f60, [%i2 + 16] + std %f62, [%i2 + 24] + + add %i1, 32, %i1 + subcc %i3, 32, %i3 + bne cbcdec256_loop + add %i2, 32, %i2 + + ldd [%o0 + 0x80], %f20 + ldd [%o0 + 0x88], %f22 + +cbcdec256_loop_end: + stx %g2, [%i4] + stx %o1, [%i4 + 8] + ret + restore + + SET_SIZE(t4_aes256_cbc_decrypt) + +#else + + ENTRY(t4_aes128_cbc_decrypt) + + save %sp, -SA(MINFRAME), %sp + ldx [%i4], %o0 !IV + ldx [%i4 + 8], %o1 !IV + ldx [%i0 + 0xa0], %o2 !ks[last-1] + ldx [%i0 + 0xa8], %o3 !ks[last] + +cbcdec128_loop: + ldx [%i1], %o4 + ldx [%i1 + 8], %o5 + xor %o2, %o4, %g1 !initial ARK + movxtod %g1, %f60 + xor %o3, %o5, %g1 !initial ARK + movxtod %g1, %f62 + + TEN_DROUNDS + + movxtod %o0, %f56 + movxtod %o1, %f58 + mov %o4, %o0 !save last block as next IV + mov %o5, %o1 + fxor %f56, %f60, %f60 !add in previous IV + fxor %f58, %f62, %f62 + + std %f60, [%i2] + std %f62, [%i2 + 0x8] + + add %i1, 16, %i1 + subcc %i3, 16, %i3 + bne cbcdec128_loop + add %i2, 16, %i2 + + stx %o0, [%i4] + stx %o1, [%i4 + 8] + ret + restore + + SET_SIZE(t4_aes128_cbc_decrypt) + + + ENTRY(t4_aes192_cbc_decrypt) + + save %sp, -SA(MINFRAME), %sp + ldx [%i4], %o0 !IV + ldx [%i4 + 8], %o1 !IV + ldx [%i0 + 0xc0], %o2 !ks[last-1] + ldx [%i0 + 0xc8], %o3 !ks[last] + +cbcdec192_loop: + ldx [%i1], %o4 + ldx [%i1 + 8], %o5 + xor %o2, %o4, %g1 !initial ARK + movxtod %g1, %f60 + xor %o3, %o5, %g1 !initial ARK + movxtod %g1, %f62 + + TWELVE_DROUNDS + + movxtod %o0, %f56 + movxtod %o1, %f58 + mov %o4, %o0 !save last block as next IV + mov %o5, %o1 + fxor %f56, %f60, %f60 !add in previous IV + fxor %f58, %f62, %f62 + + std %f60, [%i2] + std %f62, [%i2 + 0x8] + + add %i1, 16, %i1 + subcc %i3, 16, %i3 + bne cbcdec192_loop + add %i2, 16, %i2 + + stx %o0, [%i4] + stx %o1, [%i4 + 8] + ret + restore + + SET_SIZE(t4_aes192_cbc_decrypt) + + + ENTRY(t4_aes256_cbc_decrypt) + + save %sp, -SA(MINFRAME), %sp + ldx [%i4], %o0 !IV + ldx [%i4 + 8], %o1 !IV + ldx [%i0 + 0xe0], %o2 !ks[last-1] + ldx [%i0 + 0xe8], %o3 !ks[last] + +cbcdec256_loop: + ldx [%i1], %o4 + ldx [%i1 + 8], %o5 + xor %o2, %o4, %g1 !initial ARK + movxtod %g1, %f60 + xor %o3, %o5, %g1 !initial ARK + movxtod %g1, %f62 + + FOURTEEN_DROUNDS + + movxtod %o0, %f56 + movxtod %o1, %f58 + mov %o4, %o0 !save last block as next IV + mov %o5, %o1 + fxor %f56, %f60, %f60 !add in previous IV + fxor %f58, %f62, %f62 + + std %f60, [%i2] + std %f62, [%i2 + 0x8] + + add %i1, 16, %i1 + subcc %i3, 16, %i3 + bne cbcdec256_loop + add %i2, 16, %i2 + + stx %o0, [%i4] + stx %o1, [%i4 + 8] + ret + restore + + SET_SIZE(t4_aes256_cbc_decrypt) + +#endif + +#define TEST_PARALLEL_CFB128_DECRYPT +#ifdef TEST_PARALLEL_CFB128_DECRYPT + + ENTRY(t4_aes128_cfb128_decrypt) + + ldd [%o4], %f56 !IV + ldd [%o4 + 8], %f58 !IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + and %o3, 16, %o5 + brz %o5, cfb128dec_128_loop + + movxtod %g1, %f60 + movxtod %g2, %f62 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + /* CFB mode uses encryption for the decrypt operation */ + TEN_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + be cfb128dec_128_loop_end + add %o2, 16, %o2 + +cfb128dec_128_loop: + ldd [%o1], %f6 !input + ldd [%o1 + 8], %f4 !input + movxtod %g1, %f60 + movxtod %g2, %f62 + fxor %f60, %f6, %f0 + fxor %f62, %f4, %f2 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + /* CFB mode uses encryption for the decrypt operation */ + TEN_EROUNDS_2 + + ldd [%o1], %f6 !input + ldd [%o1 + 8], %f4 !input + ldd [%o1 + 16], %f56 !input + ldd [%o1 + 24], %f58 !input + + fxor %f60, %f6, %f6 + fxor %f62, %f4, %f4 + fxor %f0, %f56, %f60 + fxor %f2, %f58, %f62 + + std %f6, [%o2] + std %f4, [%o2 + 8] + std %f60, [%o2 + 16] + std %f62, [%o2 + 24] + + add %o1, 32, %o1 + subcc %o3, 32, %o3 + bne cfb128dec_128_loop + add %o2, 32, %o2 + +cfb128dec_128_loop_end: + std %f56, [%o4] + retl + std %f58, [%o4 + 8] + + SET_SIZE(t4_aes128_cfb128_decrypt) + + + ENTRY(t4_aes192_cfb128_decrypt) + + ldd [%o4], %f56 !IV + ldd [%o4 + 8], %f58 !IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + and %o3, 16, %o5 + brz %o5, cfb128dec_192_loop + + movxtod %g1, %f60 + movxtod %g2, %f62 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + /* CFB mode uses encryption for the decrypt operation */ + TWELVE_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + be cfb128dec_192_loop_end + add %o2, 16, %o2 + +cfb128dec_192_loop: + ldd [%o1], %f6 !input + ldd [%o1 + 8], %f4 !input + movxtod %g1, %f60 + movxtod %g2, %f62 + fxor %f60, %f6, %f0 + fxor %f62, %f4, %f2 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + /* CFB mode uses encryption for the decrypt operation */ + TWELVE_EROUNDS_2 + + ldd [%o1], %f6 !input + ldd [%o1 + 8], %f4 !input + ldd [%o1 + 16], %f56 !input + ldd [%o1 + 24], %f58 !input + + fxor %f60, %f6, %f6 + fxor %f62, %f4, %f4 + fxor %f0, %f56, %f60 + fxor %f2, %f58, %f62 + + std %f6, [%o2] + std %f4, [%o2 + 8] + std %f60, [%o2 + 16] + std %f62, [%o2 + 24] + + add %o1, 32, %o1 + subcc %o3, 32, %o3 + bne cfb128dec_192_loop + add %o2, 32, %o2 + +cfb128dec_192_loop_end: + std %f56, [%o4] + retl + std %f58, [%o4 + 8] + + SET_SIZE(t4_aes192_cfb128_decrypt) + + + ENTRY(t4_aes256_cfb128_decrypt) + + ldd [%o4], %f56 !IV + ldd [%o4 + 8], %f58 !IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + and %o3, 16, %o5 + brz %o5, cfb128dec_256_loop + + movxtod %g1, %f60 + movxtod %g2, %f62 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + /* CFB mode uses encryption for the decrypt operation */ + FOURTEEN_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + be cfb128dec_256_loop_end + add %o2, 16, %o2 + +cfb128dec_256_loop: + ldd [%o1], %f20 !input + ldd [%o1 + 8], %f22 !input + movxtod %g1, %f60 + movxtod %g2, %f62 + fxor %f60, %f20, %f20 + fxor %f62, %f22, %f22 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + /* CFB mode uses encryption for the decrypt operation */ + FOURTEEN_EROUNDS_2 + + ldd [%o1 + 16], %f56 !input + ldd [%o1 + 24], %f58 !input + fxor %f20, %f56, %f20 + fxor %f22, %f58, %f22 + std %f20, [%o2 + 16] + std %f22, [%o2 + 24] + + ldd [%o1], %f20 !input + ldd [%o1 + 8], %f22 !input + + fxor %f60, %f20, %f20 + fxor %f62, %f22, %f22 + + std %f20, [%o2] + std %f22, [%o2 + 8] + + add %o1, 32, %o1 + subcc %o3, 32, %o3 + bne cfb128dec_256_loop + add %o2, 32, %o2 + + ldd [%o0 + 0x60], %f20 + ldd [%o0 + 0x68], %f22 + +cfb128dec_256_loop_end: + std %f56, [%o4] + retl + std %f58, [%o4 + 8] + + SET_SIZE(t4_aes256_cfb128_decrypt) + +#else + ENTRY(t4_aes128_cfb128_decrypt) + + ldd [%o4], %f56 !IV + ldd [%o4 + 8], %f58 !IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +cfb128dec_128_loop: + movxtod %g1, %f60 + movxtod %g2, %f62 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + /* CFB mode uses encryption for the decrypt operation */ + TEN_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne cfb128dec_128_loop + add %o2, 16, %o2 + + std %f56, [%o4] + retl + std %f58, [%o4 + 8] + + SET_SIZE(t4_aes128_cfb128_decrypt) + + + ENTRY(t4_aes192_cfb128_decrypt) + + ldd [%o4], %f56 !IV + ldd [%o4 + 8], %f58 !IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +cfb128dec_192_loop: + movxtod %g1, %f60 + movxtod %g2, %f62 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + /* CFB mode uses encryption for the decrypt operation */ + TWELVE_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne cfb128dec_192_loop + add %o2, 16, %o2 + + std %f56, [%o4] + retl + std %f58, [%o4 + 8] + + SET_SIZE(t4_aes192_cfb128_decrypt) + + + ENTRY(t4_aes256_cfb128_decrypt) + + ldd [%o4], %f56 !IV + ldd [%o4 + 8], %f58 !IV + ldx [%o0], %g1 ! ks[0] + ldx [%o0 + 8], %g2 ! ks[1] + +cfb128dec_256_loop: + movxtod %g1, %f60 + movxtod %g2, %f62 + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + /* CFB mode uses encryption for the decrypt operation */ + FOURTEEN_EROUNDS + + ldd [%o1], %f56 !input + ldd [%o1 + 8], %f58 !input + fxor %f60, %f56, %f60 + fxor %f62, %f58, %f62 + + std %f60, [%o2] + std %f62, [%o2 + 8] + + add %o1, 16, %o1 + subcc %o3, 16, %o3 + bne cfb128dec_256_loop + add %o2, 16, %o2 + + std %f56, [%o4] + retl + std %f58, [%o4 + 8] + + SET_SIZE(t4_aes256_cfb128_decrypt) + +#endif + +#endif /* lint || __lint */