7069023 OpenSSL t4 engine improvements: des, 3des, sha-224/384, and remove proprietary code
/*
* ====================================================================
* Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* [email protected].
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*/
/*
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
*/
/*LINTLIBRARY*/
#if defined(lint) || defined(__lint)
#include <sys/types.h>
/*ARGSUSED*/
void t4_aes_expand128(uint64_t *rk, const uint32_t *key)
{ return; }
/*ARGSUSED*/
void t4_aes_expand192(uint64_t *rk, const uint32_t *key)
{ return; }
/*ARGSUSED*/
void t4_aes_expand256(uint64_t *rk, const uint32_t *key)
{ return; }
void t4_aes128_load_keys_for_encrypt(uint64_t *ks)
{ return; }
/*ARGSUSED*/
void t4_aes192_load_keys_for_encrypt(uint64_t *ks)
{ return; }
/*ARGSUSED*/
void t4_aes256_load_keys_for_encrypt(uint64_t *ks)
{ return; }
/*ARGSUSED*/
void t4_aes128_ecb_encrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes192_ecb_encrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes256_ecb_encrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes128_cbc_encrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes192_cbc_encrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes256_cbc_encrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes128_ctr_crypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes192_ctr_crypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes256_ctr_crypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes128_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes192_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes256_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
void t4_aes128_load_keys_for_decrypt(uint64_t *ks)
{ return; }
/*ARGSUSED*/
void t4_aes192_load_keys_for_decrypt(uint64_t *ks)
{ return; }
/*ARGSUSED*/
void t4_aes256_load_keys_for_decrypt(uint64_t *ks)
{ return; }
/*ARGSUSED*/
void t4_aes128_ecb_decrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes192_ecb_decrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes256_ecb_decrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes128_cbc_decrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes192_cbc_decrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes256_cbc_decrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes128_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes192_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
/*ARGSUSED*/
void t4_aes256_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in,
uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
{ return; }
#else /* lint || __lint */
#include<sys/asm_linkage.h>
ENTRY(t4_aes_expand128)
!load key
ld [%o1], %f0
ld [%o1 + 0x4], %f1
ld [%o1 + 0x8], %f2
ld [%o1 + 0xc], %f3
!expand the key
!aes_kexpand1 %f0, %f2, 0x0, %f4
!aes_kexpand2 %f2, %f4, %f6
!aes_kexpand1 %f4, %f6, 0x1, %f8
!aes_kexpand2 %f6, %f8, %f10
!aes_kexpand1 %f8, %f10, 0x2, %f12
!aes_kexpand2 %f10, %f12, %f14
!aes_kexpand1 %f12, %f14, 0x3, %f16
!aes_kexpand2 %f14, %f16, %f18
!aes_kexpand1 %f16, %f18, 0x4, %f20
!aes_kexpand2 %f18, %f20, %f22
!aes_kexpand1 %f20, %f22, 0x5, %f24
!aes_kexpand2 %f22, %f24, %f26
!aes_kexpand1 %f24, %f26, 0x6, %f28
!aes_kexpand2 %f26, %f28, %f30
!aes_kexpand1 %f28, %f30, 0x7, %f32
!aes_kexpand2 %f30, %f32, %f34
!aes_kexpand1 %f32, %f34, 0x8, %f36
!aes_kexpand2 %f34, %f36, %f38
!aes_kexpand1 %f36, %f38, 0x9, %f40
!aes_kexpand2 %f38, %f40, %f42
.byte 0x88, 0xc8, 0x01, 0x02
.byte 0x8d, 0xb0, 0xa6, 0x24
.byte 0x90, 0xc9, 0x03, 0x06
.byte 0x95, 0xb1, 0xa6, 0x28
.byte 0x98, 0xca, 0x05, 0x0a
.byte 0x9d, 0xb2, 0xa6, 0x2c
.byte 0xa0, 0xcb, 0x07, 0x0e
.byte 0xa5, 0xb3, 0xa6, 0x30
.byte 0xa8, 0xcc, 0x09, 0x12
.byte 0xad, 0xb4, 0xa6, 0x34
.byte 0xb0, 0xcd, 0x0b, 0x16
.byte 0xb5, 0xb5, 0xa6, 0x38
.byte 0xb8, 0xce, 0x0d, 0x1a
.byte 0xbd, 0xb6, 0xa6, 0x3c
.byte 0x82, 0xcf, 0x0f, 0x1e
.byte 0x87, 0xb7, 0xa6, 0x21
.byte 0x8a, 0xc8, 0x51, 0x03
.byte 0x8f, 0xb0, 0xe6, 0x25
.byte 0x92, 0xc9, 0x53, 0x07
.byte 0x97, 0xb1, 0xe6, 0x29
!copy expanded key back into array
std %f4, [%o0]
std %f6, [%o0 + 0x8]
std %f8, [%o0 + 0x10]
std %f10, [%o0 + 0x18]
std %f12, [%o0 + 0x20]
std %f14, [%o0 + 0x28]
std %f16, [%o0 + 0x30]
std %f18, [%o0 + 0x38]
std %f20, [%o0 + 0x40]
std %f22, [%o0 + 0x48]
std %f24, [%o0 + 0x50]
std %f26, [%o0 + 0x58]
std %f28, [%o0 + 0x60]
std %f30, [%o0 + 0x68]
std %f32, [%o0 + 0x70]
std %f34, [%o0 + 0x78]
std %f36, [%o0 + 0x80]
std %f38, [%o0 + 0x88]
std %f40, [%o0 + 0x90]
retl
std %f42, [%o0 + 0x98]
SET_SIZE(t4_aes_expand128)
ENTRY(t4_aes_expand192)
!load key
ld [%o1], %f0
ld [%o1 + 0x4], %f1
ld [%o1 + 0x8], %f2
ld [%o1 + 0xc], %f3
ld [%o1 + 0x10], %f4
ld [%o1 + 0x14], %f5
!expand the key
!aes_kexpand1 %f0, %f4, 0x0, %f6
!aes_kexpand2 %f2, %f6, %f8
!aes_kexpand2 %f4, %f8, %f10
!aes_kexpand1 %f6, %f10, 0x1, %f12
!aes_kexpand2 %f8, %f12, %f14
!aes_kexpand2 %f10, %f14, %f16
!aes_kexpand1 %f12, %f16, 0x2, %f18
!aes_kexpand2 %f14, %f18, %f20
!aes_kexpand2 %f16, %f20, %f22
!aes_kexpand1 %f18, %f22, 0x3, %f24
!aes_kexpand2 %f20, %f24, %f26
!aes_kexpand2 %f22, %f26, %f28
!aes_kexpand1 %f24, %f28, 0x4, %f30
!aes_kexpand2 %f26, %f30, %f32
!aes_kexpand2 %f28, %f32, %f34
!aes_kexpand1 %f30, %f34, 0x5, %f36
!aes_kexpand2 %f32, %f36, %f38
!aes_kexpand2 %f34, %f38, %f40
!aes_kexpand1 %f36, %f40, 0x6, %f42
!aes_kexpand2 %f38, %f42, %f44
!aes_kexpand2 %f40, %f44, %f46
!aes_kexpand1 %f42, %f46, 0x7, %f48
!aes_kexpand2 %f44, %f48, %f50
.byte 0x8c, 0xc8, 0x01, 0x04
.byte 0x91, 0xb0, 0xa6, 0x26
.byte 0x95, 0xb1, 0x26, 0x28
.byte 0x98, 0xc9, 0x83, 0x0a
.byte 0x9d, 0xb2, 0x26, 0x2c
.byte 0xa1, 0xb2, 0xa6, 0x2e
.byte 0xa4, 0xcb, 0x05, 0x10
.byte 0xa9, 0xb3, 0xa6, 0x32
.byte 0xad, 0xb4, 0x26, 0x34
.byte 0xb0, 0xcc, 0x87, 0x16
.byte 0xb5, 0xb5, 0x26, 0x38
.byte 0xb9, 0xb5, 0xa6, 0x3a
.byte 0xbc, 0xce, 0x09, 0x1c
.byte 0x83, 0xb6, 0xa6, 0x3e
.byte 0x87, 0xb7, 0x26, 0x21
.byte 0x8a, 0xcf, 0x8b, 0x03
.byte 0x8f, 0xb0, 0x66, 0x25
.byte 0x93, 0xb0, 0xe6, 0x27
.byte 0x96, 0xc9, 0x4d, 0x09
.byte 0x9b, 0xb1, 0xe6, 0x2b
.byte 0x9f, 0xb2, 0x66, 0x2d
.byte 0xa2, 0xca, 0xcf, 0x0f
.byte 0xa7, 0xb3, 0x66, 0x31
!copy expanded key back into array
std %f6, [%o0]
std %f8, [%o0 + 0x8]
std %f10, [%o0 + 0x10]
std %f12, [%o0 + 0x18]
std %f14, [%o0 + 0x20]
std %f16, [%o0 + 0x28]
std %f18, [%o0 + 0x30]
std %f20, [%o0 + 0x38]
std %f22, [%o0 + 0x40]
std %f24, [%o0 + 0x48]
std %f26, [%o0 + 0x50]
std %f28, [%o0 + 0x58]
std %f30, [%o0 + 0x60]
std %f32, [%o0 + 0x68]
std %f34, [%o0 + 0x70]
std %f36, [%o0 + 0x78]
std %f38, [%o0 + 0x80]
std %f40, [%o0 + 0x88]
std %f42, [%o0 + 0x90]
std %f44, [%o0 + 0x98]
std %f46, [%o0 + 0xa0]
std %f48, [%o0 + 0xa8]
retl
std %f50, [%o0 + 0xb0]
SET_SIZE(t4_aes_expand192)
ENTRY(t4_aes_expand256)
!load key
ld [%o1], %f0
ld [%o1 + 0x4], %f1
ld [%o1 + 0x8], %f2
ld [%o1 + 0xc], %f3
ld [%o1 + 0x10], %f4
ld [%o1 + 0x14], %f5
ld [%o1 + 0x18], %f6
ld [%o1 + 0x1c], %f7
!expand the key
!aes_kexpand1 %f0, %f6, 0x0, %f8
!aes_kexpand2 %f2, %f8, %f10
!aes_kexpand0 %f4, %f10, %f12
!aes_kexpand2 %f6, %f12, %f14
!aes_kexpand1 %f8, %f14, 0x1, %f16
!aes_kexpand2 %f10, %f16, %f18
!aes_kexpand0 %f12, %f18, %f20
!aes_kexpand2 %f14, %f20, %f22
!aes_kexpand1 %f16, %f22, 0x2, %f24
!aes_kexpand2 %f18, %f24, %f26
!aes_kexpand0 %f20, %f26, %f28
!aes_kexpand2 %f22, %f28, %f30
!aes_kexpand1 %f24, %f30, 0x3, %f32
!aes_kexpand2 %f26, %f32, %f34
!aes_kexpand0 %f28, %f34, %f36
!aes_kexpand2 %f30, %f36, %f38
!aes_kexpand1 %f32, %f38, 0x4, %f40
!aes_kexpand2 %f34, %f40, %f42
!aes_kexpand0 %f36, %f42, %f44
!aes_kexpand2 %f38, %f44, %f46
!aes_kexpand1 %f40, %f46, 0x5, %f48
!aes_kexpand2 %f42, %f48, %f50
!aes_kexpand0 %f44, %f50, %f52
!aes_kexpand2 %f46, %f52, %f54
!aes_kexpand1 %f48, %f54, 0x6, %f56
!aes_kexpand2 %f50, %f56, %f58
.byte 0x90, 0xc8, 0x01, 0x06
.byte 0x95, 0xb0, 0xa6, 0x28
.byte 0x99, 0xb1, 0x26, 0x0a
.byte 0x9d, 0xb1, 0xa6, 0x2c
.byte 0xa0, 0xca, 0x03, 0x0e
.byte 0xa5, 0xb2, 0xa6, 0x30
.byte 0xa9, 0xb3, 0x26, 0x12
.byte 0xad, 0xb3, 0xa6, 0x34
.byte 0xb0, 0xcc, 0x05, 0x16
.byte 0xb5, 0xb4, 0xa6, 0x38
.byte 0xb9, 0xb5, 0x26, 0x1a
.byte 0xbd, 0xb5, 0xa6, 0x3c
.byte 0x82, 0xce, 0x07, 0x1e
.byte 0x87, 0xb6, 0xa6, 0x21
.byte 0x8b, 0xb7, 0x26, 0x03
.byte 0x8f, 0xb7, 0xa6, 0x25
.byte 0x92, 0xc8, 0x49, 0x07
.byte 0x97, 0xb0, 0xe6, 0x29
.byte 0x9b, 0xb1, 0x66, 0x0b
.byte 0x9f, 0xb1, 0xe6, 0x2d
.byte 0xa2, 0xca, 0x4b, 0x0f
.byte 0xa7, 0xb2, 0xe6, 0x31
.byte 0xab, 0xb3, 0x66, 0x13
.byte 0xaf, 0xb3, 0xe6, 0x35
.byte 0xb2, 0xcc, 0x4d, 0x17
.byte 0xb7, 0xb4, 0xe6, 0x39
!copy expanded key back into array
std %f8, [%o0]
std %f10, [%o0 + 0x8]
std %f12, [%o0 + 0x10]
std %f14, [%o0 + 0x18]
std %f16, [%o0 + 0x20]
std %f18, [%o0 + 0x28]
std %f20, [%o0 + 0x30]
std %f22, [%o0 + 0x38]
std %f24, [%o0 + 0x40]
std %f26, [%o0 + 0x48]
std %f28, [%o0 + 0x50]
std %f30, [%o0 + 0x58]
std %f32, [%o0 + 0x60]
std %f34, [%o0 + 0x68]
std %f36, [%o0 + 0x70]
std %f38, [%o0 + 0x78]
std %f40, [%o0 + 0x80]
std %f42, [%o0 + 0x88]
std %f44, [%o0 + 0x90]
std %f46, [%o0 + 0x98]
std %f48, [%o0 + 0xa0]
std %f50, [%o0 + 0xa8]
std %f52, [%o0 + 0xb0]
std %f54, [%o0 + 0xb8]
std %f56, [%o0 + 0xc0]
retl
std %f58, [%o0 + 0xc8]
SET_SIZE(t4_aes_expand256)
#define FIRST_TWO_EROUNDS \
.byte 0xb2, 0xc8, 0x3e, 0x1d ; \
.byte 0xb6, 0xc8, 0xbe, 0x3d ; \
.byte 0xba, 0xc9, 0x36, 0x19 ; \
.byte 0xbe, 0xc9, 0xb6, 0x39
!aes_eround01 %f0, %f60, %f62, %f56 ; \
!aes_eround23 %f2, %f60, %f62, %f58 ; \
!aes_eround01 %f4, %f56, %f58, %f60 ; \
!aes_eround23 %f6, %f56, %f58, %f62
#define MID_TWO_EROUNDS \
.byte 0xb2, 0xca, 0x3e, 0x1d ; \
.byte 0xb6, 0xca, 0xbe, 0x3d ; \
.byte 0xba, 0xcb, 0x36, 0x19 ; \
.byte 0xbe, 0xcb, 0xb6, 0x39
!aes_eround01 %f8, %f60, %f62, %f56 ; \
!aes_eround23 %f10, %f60, %f62, %f58 ; \
!aes_eround01 %f12, %f56, %f58, %f60 ; \
!aes_eround23 %f14, %f56, %f58, %f62
#define MID_TWO_EROUNDS_2 \
.byte 0x8c, 0xca, 0x04, 0x00 ; \
.byte 0x88, 0xca, 0x84, 0x20 ; \
.byte 0xb2, 0xca, 0x3e, 0x1d ; \
.byte 0xb6, 0xca, 0xbe, 0x3d ; \
.byte 0x80, 0xcb, 0x08, 0x06 ; \
.byte 0x84, 0xcb, 0x88, 0x26 ; \
.byte 0xba, 0xcb, 0x36, 0x19 ; \
.byte 0xbe, 0xcb, 0xb6, 0x39
!aes_eround01 %f8, %f0, %f2, %f6 ; \
!aes_eround23 %f10, %f0, %f2, %f4 ; \
!aes_eround01 %f8, %f60, %f62, %f56 ; \
!aes_eround23 %f10, %f60, %f62, %f58 ; \
!aes_eround01 %f12, %f6, %f4, %f0 ; \
!aes_eround23 %f14, %f6, %f4, %f2 ; \
!aes_eround01 %f12, %f56, %f58, %f60 ; \
!aes_eround23 %f14, %f56, %f58, %f62
#define TEN_EROUNDS \
.byte 0xb2, 0xcc, 0x3e, 0x1d ; \
.byte 0xb6, 0xcc, 0xbe, 0x3d ; \
.byte 0xba, 0xcd, 0x36, 0x19 ; \
.byte 0xbe, 0xcd, 0xb6, 0x39 ; \
.byte 0xb2, 0xce, 0x3e, 0x1d ; \
.byte 0xb6, 0xce, 0xbe, 0x3d ; \
.byte 0xba, 0xcf, 0x36, 0x19 ; \
.byte 0xbe, 0xcf, 0xb6, 0x39 ; \
.byte 0xb2, 0xc8, 0x7e, 0x1d ; \
.byte 0xb6, 0xc8, 0xfe, 0x3d ; \
.byte 0xba, 0xc9, 0x76, 0x19 ; \
.byte 0xbe, 0xc9, 0xf6, 0x39 ; \
.byte 0xb2, 0xca, 0x7e, 0x1d ; \
.byte 0xb6, 0xca, 0xfe, 0x3d ; \
.byte 0xba, 0xcb, 0x76, 0x19 ; \
.byte 0xbe, 0xcb, 0xf6, 0x39 ; \
.byte 0xb2, 0xcc, 0x7e, 0x1d ; \
.byte 0xb6, 0xcc, 0xfe, 0x3d ; \
.byte 0xba, 0xcd, 0x76, 0x99 ; \
.byte 0xbe, 0xcd, 0xf6, 0xb9
!aes_eround01 %f16, %f60, %f62, %f56 ; \
!aes_eround23 %f18, %f60, %f62, %f58 ; \
!aes_eround01 %f20, %f56, %f58, %f60 ; \
!aes_eround23 %f22, %f56, %f58, %f62 ; \
!aes_eround01 %f24, %f60, %f62, %f56 ; \
!aes_eround23 %f26, %f60, %f62, %f58 ; \
!aes_eround01 %f28, %f56, %f58, %f60 ; \
!aes_eround23 %f30, %f56, %f58, %f62 ; \
!aes_eround01 %f32, %f60, %f62, %f56 ; \
!aes_eround23 %f34, %f60, %f62, %f58 ; \
!aes_eround01 %f36, %f56, %f58, %f60 ; \
!aes_eround23 %f38, %f56, %f58, %f62 ; \
!aes_eround01 %f40, %f60, %f62, %f56 ; \
!aes_eround23 %f42, %f60, %f62, %f58 ; \
!aes_eround01 %f44, %f56, %f58, %f60 ; \
!aes_eround23 %f46, %f56, %f58, %f62 ; \
!aes_eround01 %f48, %f60, %f62, %f56 ; \
!aes_eround23 %f50, %f60, %f62, %f58 ; \
!aes_eround01_l %f52, %f56, %f58, %f60 ; \
!aes_eround23_l %f54, %f56, %f58, %f62
#define TEN_EROUNDS_2 \
.byte 0x8c, 0xcc, 0x04, 0x00 ; \
.byte 0x88, 0xcc, 0x84, 0x20 ; \
.byte 0xb2, 0xcc, 0x3e, 0x1d ; \
.byte 0xb6, 0xcc, 0xbe, 0x3d ; \
.byte 0x80, 0xcd, 0x08, 0x06 ; \
.byte 0x84, 0xcd, 0x88, 0x26 ; \
.byte 0xba, 0xcd, 0x36, 0x19 ; \
.byte 0xbe, 0xcd, 0xb6, 0x39 ; \
.byte 0x8c, 0xce, 0x04, 0x00 ; \
.byte 0x88, 0xce, 0x84, 0x20 ; \
.byte 0xb2, 0xce, 0x3e, 0x1d ; \
.byte 0xb6, 0xce, 0xbe, 0x3d ; \
.byte 0x80, 0xcf, 0x08, 0x06 ; \
.byte 0x84, 0xcf, 0x88, 0x26 ; \
.byte 0xba, 0xcf, 0x36, 0x19 ; \
.byte 0xbe, 0xcf, 0xb6, 0x39 ; \
.byte 0x8c, 0xc8, 0x44, 0x00 ; \
.byte 0x88, 0xc8, 0xc4, 0x20 ; \
.byte 0xb2, 0xc8, 0x7e, 0x1d ; \
.byte 0xb6, 0xc8, 0xfe, 0x3d ; \
.byte 0x80, 0xc9, 0x48, 0x06 ; \
.byte 0x84, 0xc9, 0xc8, 0x26 ; \
.byte 0xba, 0xc9, 0x76, 0x19 ; \
.byte 0xbe, 0xc9, 0xf6, 0x39 ; \
.byte 0x8c, 0xca, 0x44, 0x00 ; \
.byte 0x88, 0xca, 0xc4, 0x20 ; \
.byte 0xb2, 0xca, 0x7e, 0x1d ; \
.byte 0xb6, 0xca, 0xfe, 0x3d ; \
.byte 0x80, 0xcb, 0x48, 0x06 ; \
.byte 0x84, 0xcb, 0xc8, 0x26 ; \
.byte 0xba, 0xcb, 0x76, 0x19 ; \
.byte 0xbe, 0xcb, 0xf6, 0x39 ; \
.byte 0x8c, 0xcc, 0x44, 0x00 ; \
.byte 0x88, 0xcc, 0xc4, 0x20 ; \
.byte 0xb2, 0xcc, 0x7e, 0x1d ; \
.byte 0xb6, 0xcc, 0xfe, 0x3d ; \
.byte 0x80, 0xcd, 0x48, 0x86 ; \
.byte 0x84, 0xcd, 0xc8, 0xa6 ; \
.byte 0xba, 0xcd, 0x76, 0x99 ; \
.byte 0xbe, 0xcd, 0xf6, 0xb9
!aes_eround01 %f16, %f0, %f2, %f6 ; \
!aes_eround23 %f18, %f0, %f2, %f4 ; \
!aes_eround01 %f16, %f60, %f62, %f56 ; \
!aes_eround23 %f18, %f60, %f62, %f58 ; \
!aes_eround01 %f20, %f6, %f4, %f0 ; \
!aes_eround23 %f22, %f6, %f4, %f2 ; \
!aes_eround01 %f20, %f56, %f58, %f60 ; \
!aes_eround23 %f22, %f56, %f58, %f62 ; \
!aes_eround01 %f24, %f0, %f2, %f6 ; \
!aes_eround23 %f26, %f0, %f2, %f4 ; \
!aes_eround01 %f24, %f60, %f62, %f56 ; \
!aes_eround23 %f26, %f60, %f62, %f58 ; \
!aes_eround01 %f28, %f6, %f4, %f0 ; \
!aes_eround23 %f30, %f6, %f4, %f2 ; \
!aes_eround01 %f28, %f56, %f58, %f60 ; \
!aes_eround23 %f30, %f56, %f58, %f62 ; \
!aes_eround01 %f32, %f0, %f2, %f6 ; \
!aes_eround23 %f34, %f0, %f2, %f4 ; \
!aes_eround01 %f32, %f60, %f62, %f56 ; \
!aes_eround23 %f34, %f60, %f62, %f58 ; \
!aes_eround01 %f36, %f6, %f4, %f0 ; \
!aes_eround23 %f38, %f6, %f4, %f2 ; \
!aes_eround01 %f36, %f56, %f58, %f60 ; \
!aes_eround23 %f38, %f56, %f58, %f62 ; \
!aes_eround01 %f40, %f0, %f2, %f6 ; \
!aes_eround23 %f42, %f0, %f2, %f4 ; \
!aes_eround01 %f40, %f60, %f62, %f56 ; \
!aes_eround23 %f42, %f60, %f62, %f58 ; \
!aes_eround01 %f44, %f6, %f4, %f0 ; \
!aes_eround23 %f46, %f6, %f4, %f2 ; \
!aes_eround01 %f44, %f56, %f58, %f60 ; \
!aes_eround23 %f46, %f56, %f58, %f62 ; \
!aes_eround01 %f48, %f0, %f2, %f6 ; \
!aes_eround23 %f50, %f0, %f2, %f4 ; \
!aes_eround01 %f48, %f60, %f62, %f56 ; \
!aes_eround23 %f50, %f60, %f62, %f58 ; \
!aes_eround01_l %f52, %f6, %f4, %f0 ; \
!aes_eround23_l %f54, %f6, %f4, %f2 ; \
!aes_eround01_l %f52, %f56, %f58, %f60 ; \
!aes_eround23_l %f54, %f56, %f58, %f62
#define TWELVE_EROUNDS \
MID_TWO_EROUNDS ; \
TEN_EROUNDS
#define TWELVE_EROUNDS_2 \
MID_TWO_EROUNDS_2 ; \
TEN_EROUNDS_2
#define FOURTEEN_EROUNDS \
FIRST_TWO_EROUNDS ; \
TWELVE_EROUNDS
#define FOURTEEN_EROUNDS_2 \
.byte 0xb0, 0xc8, 0x2c, 0x14 ; \
.byte 0xac, 0xc8, 0xac, 0x34 ; \
ldd [%o0 + 0x60], %f20 ; \
.byte 0xb2, 0xc8, 0x3e, 0x1d ; \
.byte 0xb6, 0xc8, 0xbe, 0x3d ; \
.byte 0x80, 0xc9, 0x2c, 0x18 ; \
.byte 0x84, 0xc9, 0xac, 0x38 ;\
ldd [%o0 + 0x68], %f22 ; \
.byte 0xba, 0xc9, 0x36, 0x19 ; \
ldd [%o0 + 0x70], %f24 ; \
.byte 0xbe, 0xc9, 0xb6, 0x39 ; \
.byte 0x8c, 0xca, 0x04, 0x00 ; \
.byte 0x88, 0xca, 0x84, 0x20 ; \
.byte 0xb2, 0xca, 0x3e, 0x1d ; \
.byte 0xb6, 0xca, 0xbe, 0x3d ; \
.byte 0x80, 0xcb, 0x08, 0x06 ; \
.byte 0x84, 0xcb, 0x88, 0x26 ; \
.byte 0xba, 0xcb, 0x36, 0x19 ; \
.byte 0xbe, 0xcb, 0xb6, 0x39 ; \
.byte 0x8c, 0xcc, 0x04, 0x00 ; \
.byte 0x88, 0xcc, 0x84, 0x20 ; \
.byte 0xb2, 0xcc, 0x3e, 0x1d ; \
.byte 0xb6, 0xcc, 0xbe, 0x3d ; \
.byte 0x80, 0xcd, 0x08, 0x06 ; \
.byte 0x84, 0xcd, 0x88, 0x26 ; \
.byte 0xba, 0xcd, 0x36, 0x19 ; \
.byte 0xbe, 0xcd, 0xb6, 0x39 ; \
.byte 0x8c, 0xce, 0x04, 0x00 ; \
.byte 0x88, 0xce, 0x84, 0x20 ; \
.byte 0xb2, 0xce, 0x3e, 0x1d ; \
.byte 0xb6, 0xce, 0xbe, 0x3d ; \
.byte 0x80, 0xcf, 0x08, 0x06 ; \
.byte 0x84, 0xcf, 0x88, 0x26 ; \
.byte 0xba, 0xcf, 0x36, 0x19 ; \
.byte 0xbe, 0xcf, 0xb6, 0x39 ; \
.byte 0x8c, 0xc8, 0x44, 0x00 ; \
.byte 0x88, 0xc8, 0xc4, 0x20 ; \
.byte 0xb2, 0xc8, 0x7e, 0x1d ; \
.byte 0xb6, 0xc8, 0xfe, 0x3d ; \
.byte 0x80, 0xc9, 0x48, 0x06 ; \
.byte 0x84, 0xc9, 0xc8, 0x26 ; \
.byte 0xba, 0xc9, 0x76, 0x19 ; \
.byte 0xbe, 0xc9, 0xf6, 0x39 ; \
.byte 0x8c, 0xca, 0x44, 0x00 ; \
.byte 0x88, 0xca, 0xc4, 0x20 ; \
.byte 0xb2, 0xca, 0x7e, 0x1d ; \
.byte 0xb6, 0xca, 0xfe, 0x3d ; \
.byte 0x80, 0xcb, 0x48, 0x06 ; \
.byte 0x84, 0xcb, 0xc8, 0x26 ; \
.byte 0xba, 0xcb, 0x76, 0x19 ; \
.byte 0xbe, 0xcb, 0xf6, 0x39 ; \
.byte 0x8c, 0xcc, 0x44, 0x00 ; \
.byte 0x88, 0xcc, 0xc4, 0x20 ; \
ldd [%o0 + 0x10], %f0 ; \
.byte 0xb2, 0xcc, 0x7e, 0x1d ; \
ldd [%o0 + 0x18], %f2 ; \
.byte 0xb6, 0xcc, 0xfe, 0x3d ; \
.byte 0xa8, 0xcd, 0x48, 0x86 ; \
.byte 0xac, 0xcd, 0xc8, 0xa6 ; \
ldd [%o0 + 0x20], %f4 ; \
.byte 0xba, 0xcd, 0x76, 0x99 ; \
ldd [%o0 + 0x28], %f6 ; \
.byte 0xbe, 0xcd, 0xf6, 0xb9
!aes_eround01 %f0, %f20, %f22, %f24 ; \
!aes_eround23 %f2, %f20, %f22, %f22 ; \
!ldd [%o0 + 0x60], %f20 ; \
!aes_eround01 %f0, %f60, %f62, %f56 ; \
!aes_eround23 %f2, %f60, %f62, %f58 ; \
!aes_eround01 %f4, %f24, %f22, %f0 ; \
!aes_eround23 %f6, %f24, %f22, %f2 ; \
!ldd [%o0 + 0x68], %f22 ; \
!aes_eround01 %f4, %f56, %f58, %f60 ; \
!ldd [%o0 + 0x70], %f24 ; \
!aes_eround23 %f6, %f56, %f58, %f62 ; \
!aes_eround01 %f8, %f0, %f2, %f6 ; \
!aes_eround23 %f10, %f0, %f2, %f4 ; \
!aes_eround01 %f8, %f60, %f62, %f56 ; \
!aes_eround23 %f10, %f60, %f62, %f58 ; \
!aes_eround01 %f12, %f6, %f4, %f0 ; \
!aes_eround23 %f14, %f6, %f4, %f2 ; \
!aes_eround01 %f12, %f56, %f58, %f60 ; \
!aes_eround23 %f14, %f56, %f58, %f62 ; \
!aes_eround01 %f16, %f0, %f2, %f6 ; \
!aes_eround23 %f18, %f0, %f2, %f4 ; \
!aes_eround01 %f16, %f60, %f62, %f56 ; \
!aes_eround23 %f18, %f60, %f62, %f58 ; \
!aes_eround01 %f20, %f6, %f4, %f0 ; \
!aes_eround23 %f22, %f6, %f4, %f2 ; \
!aes_eround01 %f20, %f56, %f58, %f60 ; \
!aes_eround23 %f22, %f56, %f58, %f62 ; \
!aes_eround01 %f24, %f0, %f2, %f6 ; \
!aes_eround23 %f26, %f0, %f2, %f4 ; \
!aes_eround01 %f24, %f60, %f62, %f56 ; \
!aes_eround23 %f26, %f60, %f62, %f58 ; \
!aes_eround01 %f28, %f6, %f4, %f0 ; \
!aes_eround23 %f30, %f6, %f4, %f2 ; \
!aes_eround01 %f28, %f56, %f58, %f60 ; \
!aes_eround23 %f30, %f56, %f58, %f62 ; \
!aes_eround01 %f32, %f0, %f2, %f6 ; \
!aes_eround23 %f34, %f0, %f2, %f4 ; \
!aes_eround01 %f32, %f60, %f62, %f56 ; \
!aes_eround23 %f34, %f60, %f62, %f58 ; \
!aes_eround01 %f36, %f6, %f4, %f0 ; \
!aes_eround23 %f38, %f6, %f4, %f2 ; \
!aes_eround01 %f36, %f56, %f58, %f60 ; \
!aes_eround23 %f38, %f56, %f58, %f62 ; \
!aes_eround01 %f40, %f0, %f2, %f6 ; \
!aes_eround23 %f42, %f0, %f2, %f4 ; \
!aes_eround01 %f40, %f60, %f62, %f56 ; \
!aes_eround23 %f42, %f60, %f62, %f58 ; \
!aes_eround01 %f44, %f6, %f4, %f0 ; \
!aes_eround23 %f46, %f6, %f4, %f2 ; \
!aes_eround01 %f44, %f56, %f58, %f60 ; \
!aes_eround23 %f46, %f56, %f58, %f62 ; \
!aes_eround01 %f48, %f0, %f2, %f6 ; \
!aes_eround23 %f50, %f0, %f2, %f4 ; \
!ldd [%o0 + 0x10], %f0 ; \
!aes_eround01 %f48, %f60, %f62, %f56 ; \
!ldd [%o0 + 0x18], %f2 ; \
!aes_eround23 %f50, %f60, %f62, %f58 ; \
!aes_eround01_l %f52, %f6, %f4, %f20 ; \
!aes_eround23_l %f54, %f6, %f4, %f22 ; \
!ldd [%o0 + 0x20], %f4 ; \
!aes_eround01_l %f52, %f56, %f58, %f60 ; \
!ldd [%o0 + 0x28], %f6 ; \
!aes_eround23_l %f54, %f56, %f58, %f62
#define FIRST_TWO_DROUNDS \
.byte 0xb2, 0xc8, 0x3e, 0x5d ; \
.byte 0xb6, 0xc8, 0xbe, 0x7d ; \
.byte 0xba, 0xc9, 0x36, 0x59 ; \
.byte 0xbe, 0xc9, 0xb6, 0x79
!aes_dround01 %f0, %f60, %f62, %f56 ; \
!aes_dround23 %f2, %f60, %f62, %f58 ; \
!aes_dround01 %f4, %f56, %f58, %f60 ; \
!aes_dround23 %f6, %f56, %f58, %f62
#define MID_TWO_DROUNDS \
.byte 0xb2, 0xca, 0x3e, 0x5d ; \
.byte 0xb6, 0xca, 0xbe, 0x7d ; \
.byte 0xba, 0xcb, 0x36, 0x59 ; \
.byte 0xbe, 0xcb, 0xb6, 0x79
!aes_dround01 %f8, %f60, %f62, %f56 ; \
!aes_dround23 %f10, %f60, %f62, %f58 ; \
!aes_dround01 %f12, %f56, %f58, %f60 ; \
!aes_dround23 %f14, %f56, %f58, %f62
#define MID_TWO_DROUNDS_2 \
.byte 0x8c, 0xca, 0x04, 0x40 ; \
.byte 0x88, 0xca, 0x84, 0x60 ; \
.byte 0xb2, 0xca, 0x3e, 0x5d ; \
.byte 0xb6, 0xca, 0xbe, 0x7d ; \
.byte 0x80, 0xcb, 0x08, 0x46 ; \
.byte 0x84, 0xcb, 0x88, 0x66 ; \
.byte 0xba, 0xcb, 0x36, 0x59 ; \
.byte 0xbe, 0xcb, 0xb6, 0x79
!aes_dround01 %f8, %f0, %f2, %f6 ; \
!aes_dround23 %f10, %f0, %f2, %f4 ; \
!aes_dround01 %f8, %f60, %f62, %f56 ; \
!aes_dround23 %f10, %f60, %f62, %f58 ; \
!aes_dround01 %f12, %f6, %f4, %f0 ; \
!aes_dround23 %f14, %f6, %f4, %f2 ; \
!aes_dround01 %f12, %f56, %f58, %f60 ; \
!aes_dround23 %f14, %f56, %f58, %f62
#define TEN_DROUNDS \
.byte 0xb2, 0xcc, 0x3e, 0x5d ; \
.byte 0xb6, 0xcc, 0xbe, 0x7d ; \
.byte 0xba, 0xcd, 0x36, 0x59 ; \
.byte 0xbe, 0xcd, 0xb6, 0x79 ; \
.byte 0xb2, 0xce, 0x3e, 0x5d ; \
.byte 0xb6, 0xce, 0xbe, 0x7d ; \
.byte 0xba, 0xcf, 0x36, 0x59 ; \
.byte 0xbe, 0xcf, 0xb6, 0x79 ; \
.byte 0xb2, 0xc8, 0x7e, 0x5d ; \
.byte 0xb6, 0xc8, 0xfe, 0x7d ; \
.byte 0xba, 0xc9, 0x76, 0x59 ; \
.byte 0xbe, 0xc9, 0xf6, 0x79 ; \
.byte 0xb2, 0xca, 0x7e, 0x5d ; \
.byte 0xb6, 0xca, 0xfe, 0x7d ; \
.byte 0xba, 0xcb, 0x76, 0x59 ; \
.byte 0xbe, 0xcb, 0xf6, 0x79 ; \
.byte 0xb2, 0xcc, 0x7e, 0x5d ; \
.byte 0xb6, 0xcc, 0xfe, 0x7d ; \
.byte 0xba, 0xcd, 0x76, 0xd9 ; \
.byte 0xbe, 0xcd, 0xf6, 0xf9
!aes_dround01 %f16, %f60, %f62, %f56 ; \
!aes_dround23 %f18, %f60, %f62, %f58 ; \
!aes_dround01 %f20, %f56, %f58, %f60 ; \
!aes_dround23 %f22, %f56, %f58, %f62 ; \
!aes_dround01 %f24, %f60, %f62, %f56 ; \
!aes_dround23 %f26, %f60, %f62, %f58 ; \
!aes_dround01 %f28, %f56, %f58, %f60 ; \
!aes_dround23 %f30, %f56, %f58, %f62 ; \
!aes_dround01 %f32, %f60, %f62, %f56 ; \
!aes_dround23 %f34, %f60, %f62, %f58 ; \
!aes_dround01 %f36, %f56, %f58, %f60 ; \
!aes_dround23 %f38, %f56, %f58, %f62 ; \
!aes_dround01 %f40, %f60, %f62, %f56 ; \
!aes_dround23 %f42, %f60, %f62, %f58 ; \
!aes_dround01 %f44, %f56, %f58, %f60 ; \
!aes_dround23 %f46, %f56, %f58, %f62 ; \
!aes_dround01 %f48, %f60, %f62, %f56 ; \
!aes_dround23 %f50, %f60, %f62, %f58 ; \
!aes_dround01_l %f52, %f56, %f58, %f60 ; \
!aes_dround23_l %f54, %f56, %f58, %f62
#define TEN_DROUNDS_2 \
.byte 0x8c, 0xcc, 0x04, 0x40 ; \
.byte 0x88, 0xcc, 0x84, 0x60 ; \
.byte 0xb2, 0xcc, 0x3e, 0x5d ; \
.byte 0xb6, 0xcc, 0xbe, 0x7d ; \
.byte 0x80, 0xcd, 0x08, 0x46 ; \
.byte 0x84, 0xcd, 0x88, 0x66 ; \
.byte 0xba, 0xcd, 0x36, 0x59 ; \
.byte 0xbe, 0xcd, 0xb6, 0x79 ; \
.byte 0x8c, 0xce, 0x04, 0x40 ; \
.byte 0x88, 0xce, 0x84, 0x60 ; \
.byte 0xb2, 0xce, 0x3e, 0x5d ; \
.byte 0xb6, 0xce, 0xbe, 0x7d ; \
.byte 0x80, 0xcf, 0x08, 0x46 ; \
.byte 0x84, 0xcf, 0x88, 0x66 ; \
.byte 0xba, 0xcf, 0x36, 0x59 ; \
.byte 0xbe, 0xcf, 0xb6, 0x79 ; \
.byte 0x8c, 0xc8, 0x44, 0x40 ; \
.byte 0x88, 0xc8, 0xc4, 0x60 ; \
.byte 0xb2, 0xc8, 0x7e, 0x5d ; \
.byte 0xb6, 0xc8, 0xfe, 0x7d ; \
.byte 0x80, 0xc9, 0x48, 0x46 ; \
.byte 0x84, 0xc9, 0xc8, 0x66 ; \
.byte 0xba, 0xc9, 0x76, 0x59 ; \
.byte 0xbe, 0xc9, 0xf6, 0x79 ; \
.byte 0x8c, 0xca, 0x44, 0x40 ; \
.byte 0x88, 0xca, 0xc4, 0x60 ; \
.byte 0xb2, 0xca, 0x7e, 0x5d ; \
.byte 0xb6, 0xca, 0xfe, 0x7d ; \
.byte 0x80, 0xcb, 0x48, 0x46 ; \
.byte 0x84, 0xcb, 0xc8, 0x66 ; \
.byte 0xba, 0xcb, 0x76, 0x59 ; \
.byte 0xbe, 0xcb, 0xf6, 0x79 ; \
.byte 0x8c, 0xcc, 0x44, 0x40 ; \
.byte 0x88, 0xcc, 0xc4, 0x60 ; \
.byte 0xb2, 0xcc, 0x7e, 0x5d ; \
.byte 0xb6, 0xcc, 0xfe, 0x7d ; \
.byte 0x80, 0xcd, 0x48, 0xc6 ; \
.byte 0x84, 0xcd, 0xc8, 0xe6 ; \
.byte 0xba, 0xcd, 0x76, 0xd9 ; \
.byte 0xbe, 0xcd, 0xf6, 0xf9
!aes_dround01 %f16, %f0, %f2, %f6 ; \
!aes_dround23 %f18, %f0, %f2, %f4 ; \
!aes_dround01 %f16, %f60, %f62, %f56 ; \
!aes_dround23 %f18, %f60, %f62, %f58 ; \
!aes_dround01 %f20, %f6, %f4, %f0 ; \
!aes_dround23 %f22, %f6, %f4, %f2 ; \
!aes_dround01 %f20, %f56, %f58, %f60 ; \
!aes_dround23 %f22, %f56, %f58, %f62 ; \
!aes_dround01 %f24, %f0, %f2, %f6 ; \
!aes_dround23 %f26, %f0, %f2, %f4 ; \
!aes_dround01 %f24, %f60, %f62, %f56 ; \
!aes_dround23 %f26, %f60, %f62, %f58 ; \
!aes_dround01 %f28, %f6, %f4, %f0 ; \
!aes_dround23 %f30, %f6, %f4, %f2 ; \
!aes_dround01 %f28, %f56, %f58, %f60 ; \
!aes_dround23 %f30, %f56, %f58, %f62 ; \
!aes_dround01 %f32, %f0, %f2, %f6 ; \
!aes_dround23 %f34, %f0, %f2, %f4 ; \
!aes_dround01 %f32, %f60, %f62, %f56 ; \
!aes_dround23 %f34, %f60, %f62, %f58 ; \
!aes_dround01 %f36, %f6, %f4, %f0 ; \
!aes_dround23 %f38, %f6, %f4, %f2 ; \
!aes_dround01 %f36, %f56, %f58, %f60 ; \
!aes_dround23 %f38, %f56, %f58, %f62 ; \
!aes_dround01 %f40, %f0, %f2, %f6 ; \
!aes_dround23 %f42, %f0, %f2, %f4 ; \
!aes_dround01 %f40, %f60, %f62, %f56 ; \
!aes_dround23 %f42, %f60, %f62, %f58 ; \
!aes_dround01 %f44, %f6, %f4, %f0 ; \
!aes_dround23 %f46, %f6, %f4, %f2 ; \
!aes_dround01 %f44, %f56, %f58, %f60 ; \
!aes_dround23 %f46, %f56, %f58, %f62 ; \
!aes_dround01 %f48, %f0, %f2, %f6 ; \
!aes_dround23 %f50, %f0, %f2, %f4 ; \
!aes_dround01 %f48, %f60, %f62, %f56 ; \
!aes_dround23 %f50, %f60, %f62, %f58 ; \
!aes_dround01_l %f52, %f6, %f4, %f0 ; \
!aes_dround23_l %f54, %f6, %f4, %f2 ; \
!aes_dround01_l %f52, %f56, %f58, %f60 ; \
!aes_dround23_l %f54, %f56, %f58, %f62
#define TWELVE_DROUNDS \
MID_TWO_DROUNDS ; \
TEN_DROUNDS
#define TWELVE_DROUNDS_2 \
MID_TWO_DROUNDS_2 ; \
TEN_DROUNDS_2
#define FOURTEEN_DROUNDS \
FIRST_TWO_DROUNDS ; \
TWELVE_DROUNDS
#define FOURTEEN_DROUNDS_2 \
.byte 0xb0, 0xc8, 0x2c, 0x54 ; \
.byte 0xac, 0xc8, 0xac, 0x74 ; \
ldd [%o0 + 0x80], %f20 ; \
.byte 0xb2, 0xc8, 0x3e, 0x5d ; \
.byte 0xb6, 0xc8, 0xbe, 0x7d ; \
.byte 0x80, 0xc9, 0x2c, 0x58 ; \
.byte 0x84, 0xc9, 0xac, 0x78 ; \
ldd [%o0 + 0x88], %f22 ; \
.byte 0xba, 0xc9, 0x36, 0x59 ; \
ldd [%o0 + 0x70], %f24 ; \
.byte 0xbe, 0xc9, 0xb6, 0x79 ; \
.byte 0x8c, 0xca, 0x04, 0x40 ; \
.byte 0x88, 0xca, 0x84, 0x60 ; \
.byte 0xb2, 0xca, 0x3e, 0x5d ; \
.byte 0xb6, 0xca, 0xbe, 0x7d ; \
.byte 0x80, 0xcb, 0x08, 0x46 ; \
.byte 0x84, 0xcb, 0x88, 0x66 ; \
.byte 0xba, 0xcb, 0x36, 0x59 ; \
.byte 0xbe, 0xcb, 0xb6, 0x79 ; \
.byte 0x8c, 0xcc, 0x04, 0x40 ; \
.byte 0x88, 0xcc, 0x84, 0x60 ; \
.byte 0xb2, 0xcc, 0x3e, 0x5d ; \
.byte 0xb6, 0xcc, 0xbe, 0x7d ; \
.byte 0x80, 0xcd, 0x08, 0x46 ; \
.byte 0x84, 0xcd, 0x88, 0x66 ; \
.byte 0xba, 0xcd, 0x36, 0x59 ; \
.byte 0xbe, 0xcd, 0xb6, 0x79 ; \
.byte 0x8c, 0xce, 0x04, 0x40 ; \
.byte 0x88, 0xce, 0x84, 0x60 ; \
.byte 0xb2, 0xce, 0x3e, 0x5d ; \
.byte 0xb6, 0xce, 0xbe, 0x7d ; \
.byte 0x80, 0xcf, 0x08, 0x46 ; \
.byte 0x84, 0xcf, 0x88, 0x66 ; \
.byte 0xba, 0xcf, 0x36, 0x59 ; \
.byte 0xbe, 0xcf, 0xb6, 0x79 ; \
.byte 0x8c, 0xc8, 0x44, 0x40 ; \
.byte 0x88, 0xc8, 0xc4, 0x60 ; \
.byte 0xb2, 0xc8, 0x7e, 0x5d ; \
.byte 0xb6, 0xc8, 0xfe, 0x7d ; \
.byte 0x80, 0xc9, 0x48, 0x46 ; \
.byte 0x84, 0xc9, 0xc8, 0x66 ; \
.byte 0xba, 0xc9, 0x76, 0x59 ; \
.byte 0xbe, 0xc9, 0xf6, 0x79 ; \
.byte 0x8c, 0xca, 0x44, 0x40 ; \
.byte 0x88, 0xca, 0xc4, 0x60 ; \
.byte 0xb2, 0xca, 0x7e, 0x5d ; \
.byte 0xb6, 0xca, 0xfe, 0x7d ; \
.byte 0x80, 0xcb, 0x48, 0x46 ; \
.byte 0x84, 0xcb, 0xc8, 0x66 ; \
.byte 0xba, 0xcb, 0x76, 0x59 ; \
.byte 0xbe, 0xcb, 0xf6, 0x79 ; \
.byte 0x8c, 0xcc, 0x44, 0x40 ; \
.byte 0x88, 0xcc, 0xc4, 0x60 ; \
ldd [%o0 + 0xd0], %f0 ; \
.byte 0xb2, 0xcc, 0x7e, 0x5d ; \
ldd [%o0 + 0xd8], %f2 ; \
.byte 0xb6, 0xcc, 0xfe, 0x7d ; \
.byte 0xa8, 0xcd, 0x48, 0xc6 ; \
.byte 0xac, 0xcd, 0xc8, 0xe6 ; \
ldd [%o0 + 0xc0], %f4 ; \
.byte 0xba, 0xcd, 0x76, 0xd9 ; \
ldd [%o0 + 0xc8], %f6 ; \
.byte 0xbe, 0xcd, 0xf6, 0xf9
!aes_dround01 %f0, %f20, %f22, %f24 ; \
!aes_dround23 %f2, %f20, %f22, %f22 ; \
!ldd [%o0 + 0x80], %f20 ; \
!aes_dround01 %f0, %f60, %f62, %f56 ; \
!aes_dround23 %f2, %f60, %f62, %f58 ; \
!aes_dround01 %f4, %f24, %f22, %f0 ; \
!aes_dround23 %f6, %f24, %f22, %f2 ; \
!ldd [%o0 + 0x88], %f22 ; \
!aes_dround01 %f4, %f56, %f58, %f60 ; \
!ldd [%o0 + 0x70], %f24 ; \
!aes_dround23 %f6, %f56, %f58, %f62 ; \
!aes_dround01 %f8, %f0, %f2, %f6 ; \
!aes_dround23 %f10, %f0, %f2, %f4 ; \
!aes_dround01 %f8, %f60, %f62, %f56 ; \
!aes_dround23 %f10, %f60, %f62, %f58 ; \
!aes_dround01 %f12, %f6, %f4, %f0 ; \
!aes_dround23 %f14, %f6, %f4, %f2 ; \
!aes_dround01 %f12, %f56, %f58, %f60 ; \
!aes_dround23 %f14, %f56, %f58, %f62 ; \
!aes_dround01 %f16, %f0, %f2, %f6 ; \
!aes_dround23 %f18, %f0, %f2, %f4 ; \
!aes_dround01 %f16, %f60, %f62, %f56 ; \
!aes_dround23 %f18, %f60, %f62, %f58 ; \
!aes_dround01 %f20, %f6, %f4, %f0 ; \
!aes_dround23 %f22, %f6, %f4, %f2 ; \
!aes_dround01 %f20, %f56, %f58, %f60 ; \
!aes_dround23 %f22, %f56, %f58, %f62 ; \
!aes_dround01 %f24, %f0, %f2, %f6 ; \
!aes_dround23 %f26, %f0, %f2, %f4 ; \
!aes_dround01 %f24, %f60, %f62, %f56 ; \
!aes_dround23 %f26, %f60, %f62, %f58 ; \
!aes_dround01 %f28, %f6, %f4, %f0 ; \
!aes_dround23 %f30, %f6, %f4, %f2 ; \
!aes_dround01 %f28, %f56, %f58, %f60 ; \
!aes_dround23 %f30, %f56, %f58, %f62 ; \
!aes_dround01 %f32, %f0, %f2, %f6 ; \
!aes_dround23 %f34, %f0, %f2, %f4 ; \
!aes_dround01 %f32, %f60, %f62, %f56 ; \
!aes_dround23 %f34, %f60, %f62, %f58 ; \
!aes_dround01 %f36, %f6, %f4, %f0 ; \
!aes_dround23 %f38, %f6, %f4, %f2 ; \
!aes_dround01 %f36, %f56, %f58, %f60 ; \
!aes_dround23 %f38, %f56, %f58, %f62 ; \
!aes_dround01 %f40, %f0, %f2, %f6 ; \
!aes_dround23 %f42, %f0, %f2, %f4 ; \
!aes_dround01 %f40, %f60, %f62, %f56 ; \
!aes_dround23 %f42, %f60, %f62, %f58 ; \
!aes_dround01 %f44, %f6, %f4, %f0 ; \
!aes_dround23 %f46, %f6, %f4, %f2 ; \
!aes_dround01 %f44, %f56, %f58, %f60 ; \
!aes_dround23 %f46, %f56, %f58, %f62 ; \
!aes_dround01 %f48, %f0, %f2, %f6 ; \
!aes_dround23 %f50, %f0, %f2, %f4 ; \
!ldd [%o0 + 0xd0], %f0 ; \
!aes_dround01 %f48, %f60, %f62, %f56 ; \
!ldd [%o0 + 0xd8], %f2 ; \
!aes_dround23 %f50, %f60, %f62, %f58 ; \
!aes_dround01_l %f52, %f6, %f4, %f20 ; \
!aes_dround23_l %f54, %f6, %f4, %f22 ; \
!ldd [%o0 + 0xc0], %f4 ; \
!aes_dround01_l %f52, %f56, %f58, %f60 ; \
!ldd [%o0 + 0xc8], %f6 ; \
!aes_dround23_l %f54, %f56, %f58, %f62
ENTRY(t4_aes128_load_keys_for_encrypt)
ldd [%o0 + 0x10], %f16
ldd [%o0 + 0x18], %f18
ldd [%o0 + 0x20], %f20
ldd [%o0 + 0x28], %f22
ldd [%o0 + 0x30], %f24
ldd [%o0 + 0x38], %f26
ldd [%o0 + 0x40], %f28
ldd [%o0 + 0x48], %f30
ldd [%o0 + 0x50], %f32
ldd [%o0 + 0x58], %f34
ldd [%o0 + 0x60], %f36
ldd [%o0 + 0x68], %f38
ldd [%o0 + 0x70], %f40
ldd [%o0 + 0x78], %f42
ldd [%o0 + 0x80], %f44
ldd [%o0 + 0x88], %f46
ldd [%o0 + 0x90], %f48
ldd [%o0 + 0x98], %f50
ldd [%o0 + 0xa0], %f52
retl
ldd [%o0 + 0xa8], %f54
SET_SIZE(t4_aes128_load_keys_for_encrypt)
ENTRY(t4_aes192_load_keys_for_encrypt)
ldd [%o0 + 0x10], %f8
ldd [%o0 + 0x18], %f10
ldd [%o0 + 0x20], %f12
ldd [%o0 + 0x28], %f14
ldd [%o0 + 0x30], %f16
ldd [%o0 + 0x38], %f18
ldd [%o0 + 0x40], %f20
ldd [%o0 + 0x48], %f22
ldd [%o0 + 0x50], %f24
ldd [%o0 + 0x58], %f26
ldd [%o0 + 0x60], %f28
ldd [%o0 + 0x68], %f30
ldd [%o0 + 0x70], %f32
ldd [%o0 + 0x78], %f34
ldd [%o0 + 0x80], %f36
ldd [%o0 + 0x88], %f38
ldd [%o0 + 0x90], %f40
ldd [%o0 + 0x98], %f42
ldd [%o0 + 0xa0], %f44
ldd [%o0 + 0xa8], %f46
ldd [%o0 + 0xb0], %f48
ldd [%o0 + 0xb8], %f50
ldd [%o0 + 0xc0], %f52
retl
ldd [%o0 + 0xc8], %f54
SET_SIZE(t4_aes192_load_keys_for_encrypt)
ENTRY(t4_aes256_load_keys_for_encrypt)
ldd [%o0 + 0x10], %f0
ldd [%o0 + 0x18], %f2
ldd [%o0 + 0x20], %f4
ldd [%o0 + 0x28], %f6
ldd [%o0 + 0x30], %f8
ldd [%o0 + 0x38], %f10
ldd [%o0 + 0x40], %f12
ldd [%o0 + 0x48], %f14
ldd [%o0 + 0x50], %f16
ldd [%o0 + 0x58], %f18
ldd [%o0 + 0x60], %f20
ldd [%o0 + 0x68], %f22
ldd [%o0 + 0x70], %f24
ldd [%o0 + 0x78], %f26
ldd [%o0 + 0x80], %f28
ldd [%o0 + 0x88], %f30
ldd [%o0 + 0x90], %f32
ldd [%o0 + 0x98], %f34
ldd [%o0 + 0xa0], %f36
ldd [%o0 + 0xa8], %f38
ldd [%o0 + 0xb0], %f40
ldd [%o0 + 0xb8], %f42
ldd [%o0 + 0xc0], %f44
ldd [%o0 + 0xc8], %f46
ldd [%o0 + 0xd0], %f48
ldd [%o0 + 0xd8], %f50
ldd [%o0 + 0xe0], %f52
retl
ldd [%o0 + 0xe8], %f54
SET_SIZE(t4_aes256_load_keys_for_encrypt)
#define TEST_PARALLEL_ECB_ENCRYPT
#ifdef TEST_PARALLEL_ECB_ENCRYPT
ENTRY(t4_aes128_ecb_encrypt)
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
and %o3, 16, %o4
brz %o4, ecbenc128_loop
nop
ldx [%o1], %g3 !input
ldx [%o1 + 8], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f60
movxtod %g4, %f62
TEN_EROUNDS
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
be ecbenc128_loop_end
add %o2, 16, %o2
ecbenc128_loop:
ldx [%o1], %g3 !input
ldx [%o1 + 8], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f0
movxtod %g4, %f2
ldx [%o1 + 16], %g3 !input
ldx [%o1 + 24], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f60
movxtod %g4, %f62
TEN_EROUNDS_2
std %f0, [%o2]
std %f2, [%o2 + 8]
std %f60, [%o2 + 16]
std %f62, [%o2 + 24]
add %o1, 32, %o1
subcc %o3, 32, %o3
bne ecbenc128_loop
add %o2, 32, %o2
ecbenc128_loop_end:
retl
nop
SET_SIZE(t4_aes128_ecb_encrypt)
ENTRY(t4_aes192_ecb_encrypt)
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
and %o3, 16, %o4
brz %o4, ecbenc192_loop
nop
ldx [%o1], %g3 !input
ldx [%o1 + 8], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f60
movxtod %g4, %f62
TWELVE_EROUNDS
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
be ecbenc192_loop_end
add %o2, 16, %o2
ecbenc192_loop:
ldx [%o1], %g3 !input
ldx [%o1 + 8], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f0
movxtod %g4, %f2
ldx [%o1 + 16], %g3 !input
ldx [%o1 + 24], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f60
movxtod %g4, %f62
TWELVE_EROUNDS_2
std %f0, [%o2]
std %f2, [%o2 + 8]
std %f60, [%o2 + 16]
std %f62, [%o2 + 24]
add %o1, 32, %o1
subcc %o3, 32, %o3
bne ecbenc192_loop
add %o2, 32, %o2
ecbenc192_loop_end:
retl
nop
SET_SIZE(t4_aes192_ecb_encrypt)
ENTRY(t4_aes256_ecb_encrypt)
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
and %o3, 16, %o4
brz %o4, ecbenc256_loop
nop
ldx [%o1], %g3 !input
ldx [%o1 + 8], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f60
movxtod %g4, %f62
FOURTEEN_EROUNDS
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
be ecbenc256_loop_end
add %o2, 16, %o2
ecbenc256_loop:
ldx [%o1], %g3 !input
ldx [%o1 + 8], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f20
movxtod %g4, %f22
ldx [%o1 + 16], %g3 !input
ldx [%o1 + 24], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f60
movxtod %g4, %f62
FOURTEEN_EROUNDS_2
std %f20, [%o2]
std %f22, [%o2 + 8]
std %f60, [%o2 + 16]
std %f62, [%o2 + 24]
add %o1, 32, %o1
subcc %o3, 32, %o3
bne ecbenc256_loop
add %o2, 32, %o2
ldd [%o0 + 0x60], %f20
ldd [%o0 + 0x68], %f22
ecbenc256_loop_end:
retl
nop
SET_SIZE(t4_aes256_ecb_encrypt)
#else
ENTRY(t4_aes128_ecb_encrypt)
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
ecbenc128_loop:
ldx [%o1], %g3 !input
ldx [%o1 + 8], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f60
movxtod %g4, %f62
TEN_EROUNDS
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne ecbenc128_loop
add %o2, 16, %o2
retl
nop
SET_SIZE(t4_aes128_ecb_encrypt)
ENTRY(t4_aes192_ecb_encrypt)
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
ecbenc192_loop:
ldx [%o1], %g3 !input
ldx [%o1 + 8], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f60
movxtod %g4, %f62
TWELVE_EROUNDS
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne ecbenc192_loop
add %o2, 16, %o2
retl
nop
SET_SIZE(t4_aes192_ecb_encrypt)
ENTRY(t4_aes256_ecb_encrypt)
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
ecbenc256_loop:
ldx [%o1], %g3 !input
ldx [%o1 + 8], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f60
movxtod %g4, %f62
FOURTEEN_EROUNDS
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne ecbenc256_loop
add %o2, 16, %o2
retl
nop
SET_SIZE(t4_aes256_ecb_encrypt)
#endif
ENTRY(t4_aes128_cbc_encrypt)
ldd [%o4], %f60 ! IV
ldd [%o4 +8], %f62 ! IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
cbcenc128_loop:
ldx [%o1], %g3 !input
ldx [%o1 + 8], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f56
movxtod %g4, %f58
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
TEN_EROUNDS
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne cbcenc128_loop
add %o2, 16, %o2
std %f60, [%o4]
retl
std %f62, [%o4 + 8]
SET_SIZE(t4_aes128_cbc_encrypt)
ENTRY(t4_aes192_cbc_encrypt)
ldd [%o4], %f60 ! IV
ldd [%o4 + 8], %f62 ! IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
cbcenc192_loop:
ldx [%o1], %g3 !input
ldx [%o1 + 8], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f56
movxtod %g4, %f58
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
TWELVE_EROUNDS
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne cbcenc192_loop
add %o2, 16, %o2
std %f60, [%o4]
retl
std %f62, [%o4 + 8]
SET_SIZE(t4_aes192_cbc_encrypt)
ENTRY(t4_aes256_cbc_encrypt)
ldd [%o4], %f60 ! IV
ldd [%o4 + 8], %f62 ! IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
cbcenc256_loop:
ldx [%o1], %g3 !input
ldx [%o1 + 8], %g4 !input
xor %g1, %g3, %g3 !input ^ ks[0-1]
xor %g2, %g4, %g4 !input ^ ks[0-1]
movxtod %g3, %f56
movxtod %g4, %f58
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
FOURTEEN_EROUNDS
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne cbcenc256_loop
add %o2, 16, %o2
std %f60, [%o4]
retl
std %f62, [%o4 + 8]
SET_SIZE(t4_aes256_cbc_encrypt)
#define TEST_PARALLEL_CTR_CRYPT
#ifdef TEST_PARALLEL_CTR_CRYPT
ENTRY(t4_aes128_ctr_crypt)
ldx [%o4], %g3 ! IV
ldx [%o4 +8], %g4 ! IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
and %o3, 16, %g5
brz, %g5, ctr128_loop
xor %g1, %g3, %g5
movxtod %g5, %f60
xor %g2, %g4, %g5
movxtod %g5, %f62
inc %g4
TEN_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
be ctr128_loop_end
add %o2, 16, %o2
ctr128_loop:
xor %g1, %g3, %g5
movxtod %g5, %f0
xor %g2, %g4, %g5
movxtod %g5, %f2
inc %g4
xor %g1, %g3, %g5
movxtod %g5, %f60
xor %g2, %g4, %g5
movxtod %g5, %f62
inc %g4
TEN_EROUNDS_2
ldd [%o1], %f6 !input
ldd [%o1 + 8], %f4 !input
ldd [%o1 + 16], %f56 !input
ldd [%o1 + 24], %f58 !input
fxor %f0, %f6, %f0
fxor %f2, %f4, %f2
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f0, [%o2]
std %f2, [%o2 + 8]
std %f60, [%o2 + 16]
std %f62, [%o2 + 24]
add %o1, 32, %o1
subcc %o3, 32, %o3
bne ctr128_loop
add %o2, 32, %o2
ctr128_loop_end:
stx %g3, [%o4]
retl
stx %g4, [%o4 + 8]
SET_SIZE(t4_aes128_ctr_crypt)
ENTRY(t4_aes192_ctr_crypt)
ldx [%o4], %g3 ! IV
ldx [%o4 +8], %g4 ! IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
and %o3, 16, %g5
brz, %g5, ctr192_loop
xor %g1, %g3, %g5
movxtod %g5, %f60
xor %g2, %g4, %g5
movxtod %g5, %f62
inc %g4
TWELVE_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
be ctr192_loop_end
add %o2, 16, %o2
ctr192_loop:
xor %g1, %g3, %g5
movxtod %g5, %f0
xor %g2, %g4, %g5
movxtod %g5, %f2
inc %g4
xor %g1, %g3, %g5
movxtod %g5, %f60
xor %g2, %g4, %g5
movxtod %g5, %f62
inc %g4
TWELVE_EROUNDS_2
ldd [%o1], %f6 !input
ldd [%o1 + 8], %f4 !input
ldd [%o1 + 16], %f56 !input
ldd [%o1 + 24], %f58 !input
fxor %f0, %f6, %f0
fxor %f2, %f4, %f2
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f0, [%o2]
std %f2, [%o2 + 8]
std %f60, [%o2 + 16]
std %f62, [%o2 + 24]
add %o1, 32, %o1
subcc %o3, 32, %o3
bne ctr192_loop
add %o2, 32, %o2
ctr192_loop_end:
stx %g3, [%o4]
retl
stx %g4, [%o4 + 8]
SET_SIZE(t4_aes192_ctr_crypt)
ENTRY(t4_aes256_ctr_crypt)
ldx [%o4], %g3 ! IV
ldx [%o4 +8], %g4 ! IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
and %o3, 16, %g5
brz, %g5, ctr256_loop
xor %g1, %g3, %g5
movxtod %g5, %f60
xor %g2, %g4, %g5
movxtod %g5, %f62
inc %g4
FOURTEEN_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
be ctr256_loop_end
add %o2, 16, %o2
ctr256_loop:
xor %g1, %g3, %g5
movxtod %g5, %f20
xor %g2, %g4, %g5
movxtod %g5, %f22
inc %g4
xor %g1, %g3, %g5
movxtod %g5, %f60
xor %g2, %g4, %g5
movxtod %g5, %f62
inc %g4
FOURTEEN_EROUNDS_2
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f20, %f56, %f20
fxor %f22, %f58, %f22
ldd [%o1 + 16], %f56 !input
ldd [%o1 + 24], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f20, [%o2]
std %f22, [%o2 + 8]
std %f60, [%o2 + 16]
std %f62, [%o2 + 24]
add %o1, 32, %o1
subcc %o3, 32, %o3
bne ctr256_loop
add %o2, 32, %o2
ldd [%o0 + 0x60], %f20
ldd [%o0 + 0x68], %f22
ctr256_loop_end:
stx %g3, [%o4]
retl
stx %g4, [%o4 + 8]
SET_SIZE(t4_aes256_ctr_crypt)
#else
ENTRY(t4_aes128_ctr_crypt)
ldx [%o4], %g3 ! IV
ldx [%o4 +8], %g4 ! IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
ctr128_loop:
xor %g1, %g3, %g5
movxtod %g5, %f60
xor %g2, %g4, %g5
movxtod %g5, %f62
inc %g4
TEN_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne ctr128_loop
add %o2, 16, %o2
stx %g3, [%o4]
retl
stx %g4, [%o4 + 8]
SET_SIZE(t4_aes128_ctr_crypt)
ENTRY(t4_aes192_ctr_crypt)
ldx [%o4], %g3 ! IV
ldx [%o4 +8], %g4 ! IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
ctr192_loop:
xor %g1, %g3, %g5
movxtod %g5, %f60
xor %g2, %g4, %g5
movxtod %g5, %f62
inc %g4
TWELVE_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne ctr192_loop
add %o2, 16, %o2
stx %g3, [%o4]
retl
stx %g4, [%o4 + 8]
SET_SIZE(t4_aes192_ctr_crypt)
ENTRY(t4_aes256_ctr_crypt)
ldx [%o4], %g3 ! IV
ldx [%o4 +8], %g4 ! IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
ctr256_loop:
xor %g1, %g3, %g5
movxtod %g5, %f60
xor %g2, %g4, %g5
movxtod %g5, %f62
inc %g4
FOURTEEN_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne ctr256_loop
add %o2, 16, %o2
stx %g3, [%o4]
retl
stx %g4, [%o4 + 8]
SET_SIZE(t4_aes256_ctr_crypt)
#endif
ENTRY(t4_aes128_cfb128_encrypt)
ldd [%o4], %f60 ! IV
ldd [%o4 +8], %f62 ! IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
cfb128_128_loop:
movxtod %g1, %f56
movxtod %g2, %f58
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
TEN_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne cfb128_128_loop
add %o2, 16, %o2
std %f60, [%o4]
retl
std %f62, [%o4 + 8]
SET_SIZE(t4_aes128_cfb128_encrypt)
ENTRY(t4_aes192_cfb128_encrypt)
ldd [%o4], %f60 ! IV
ldd [%o4 +8], %f62 ! IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
cfb128_192_loop:
movxtod %g1, %f56
movxtod %g2, %f58
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
TWELVE_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne cfb128_192_loop
add %o2, 16, %o2
std %f60, [%o4]
retl
std %f62, [%o4 + 8]
SET_SIZE(t4_aes192_cfb128_encrypt)
ENTRY(t4_aes256_cfb128_encrypt)
ldd [%o4], %f60 ! IV
ldd [%o4 +8], %f62 ! IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
cfb128_256_loop:
movxtod %g1, %f56
movxtod %g2, %f58
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
FOURTEEN_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne cfb128_256_loop
add %o2, 16, %o2
std %f60, [%o4]
retl
std %f62, [%o4 + 8]
SET_SIZE(t4_aes256_cfb128_encrypt)
ENTRY(t4_aes128_load_keys_for_decrypt)
ldd [%o0], %f52
ldd [%o0 + 0x8], %f54
ldd [%o0 + 0x10], %f48
ldd [%o0 + 0x18], %f50
ldd [%o0 + 0x20], %f44
ldd [%o0 + 0x28], %f46
ldd [%o0 + 0x30], %f40
ldd [%o0 + 0x38], %f42
ldd [%o0 + 0x40], %f36
ldd [%o0 + 0x48], %f38
ldd [%o0 + 0x50], %f32
ldd [%o0 + 0x58], %f34
ldd [%o0 + 0x60], %f28
ldd [%o0 + 0x68], %f30
ldd [%o0 + 0x70], %f24
ldd [%o0 + 0x78], %f26
ldd [%o0 + 0x80], %f20
ldd [%o0 + 0x88], %f22
ldd [%o0 + 0x90], %f16
retl
ldd [%o0 + 0x98], %f18
SET_SIZE(t4_aes128_load_keys_for_decrypt)
ENTRY(t4_aes192_load_keys_for_decrypt)
ldd [%o0], %f52
ldd [%o0 + 0x8], %f54
ldd [%o0 + 0x10], %f48
ldd [%o0 + 0x18], %f50
ldd [%o0 + 0x20], %f44
ldd [%o0 + 0x28], %f46
ldd [%o0 + 0x30], %f40
ldd [%o0 + 0x38], %f42
ldd [%o0 + 0x40], %f36
ldd [%o0 + 0x48], %f38
ldd [%o0 + 0x50], %f32
ldd [%o0 + 0x58], %f34
ldd [%o0 + 0x60], %f28
ldd [%o0 + 0x68], %f30
ldd [%o0 + 0x70], %f24
ldd [%o0 + 0x78], %f26
ldd [%o0 + 0x80], %f20
ldd [%o0 + 0x88], %f22
ldd [%o0 + 0x90], %f16
ldd [%o0 + 0x98], %f18
ldd [%o0 + 0xa0], %f12
ldd [%o0 + 0xa8], %f14
ldd [%o0 + 0xb0], %f8
retl
ldd [%o0 + 0xb8], %f10
SET_SIZE(t4_aes192_load_keys_for_decrypt)
ENTRY(t4_aes256_load_keys_for_decrypt)
ldd [%o0], %f52
ldd [%o0 + 0x8], %f54
ldd [%o0 + 0x10], %f48
ldd [%o0 + 0x18], %f50
ldd [%o0 + 0x20], %f44
ldd [%o0 + 0x28], %f46
ldd [%o0 + 0x30], %f40
ldd [%o0 + 0x38], %f42
ldd [%o0 + 0x40], %f36
ldd [%o0 + 0x48], %f38
ldd [%o0 + 0x50], %f32
ldd [%o0 + 0x58], %f34
ldd [%o0 + 0x60], %f28
ldd [%o0 + 0x68], %f30
ldd [%o0 + 0x70], %f24
ldd [%o0 + 0x78], %f26
ldd [%o0 + 0x80], %f20
ldd [%o0 + 0x88], %f22
ldd [%o0 + 0x90], %f16
ldd [%o0 + 0x98], %f18
ldd [%o0 + 0xa0], %f12
ldd [%o0 + 0xa8], %f14
ldd [%o0 + 0xb0], %f8
ldd [%o0 + 0xb8], %f10
ldd [%o0 + 0xc0], %f4
ldd [%o0 + 0xc8], %f6
ldd [%o0 + 0xd0], %f0
retl
ldd [%o0 + 0xd8], %f2
SET_SIZE(t4_aes256_load_keys_for_decrypt)
#define TEST_PARALLEL_ECB_DECRYPT
#ifdef TEST_PARALLEL_ECB_DECRYPT
ENTRY(t4_aes128_ecb_decrypt)
ldx [%o0 + 0xa0], %g1 !ks[last-1]
ldx [%o0 + 0xa8], %g2 !ks[last]
and %o3, 16, %o4
brz %o4, ecbdec128_loop
nop
ldx [%o1], %o4
ldx [%o1 + 8], %o5
xor %g1, %o4, %g3 !initial ARK
movxtod %g3, %f60
xor %g2, %o5, %g3 !initial ARK
movxtod %g3, %f62
TEN_DROUNDS
std %f60, [%o2]
std %f62, [%o2 + 0x8]
add %o1, 16, %o1
subcc %o3, 16, %o3
be ecbdec128_loop_end
add %o2, 16, %o2
ecbdec128_loop:
ldx [%o1], %o4
ldx [%o1 + 8], %o5
xor %g1, %o4, %g3 !initial ARK
movxtod %g3, %f0
xor %g2, %o5, %g3 !initial ARK
movxtod %g3, %f2
ldx [%o1 + 16], %o4
ldx [%o1 + 24], %o5
xor %g1, %o4, %g3 !initial ARK
movxtod %g3, %f60
xor %g2, %o5, %g3 !initial ARK
movxtod %g3, %f62
TEN_DROUNDS_2
std %f0, [%o2]
std %f2, [%o2 + 8]
std %f60, [%o2 + 16]
std %f62, [%o2 + 24]
add %o1, 32, %o1
subcc %o3, 32, %o3
bne ecbdec128_loop
add %o2, 32, %o2
ecbdec128_loop_end:
retl
nop
SET_SIZE(t4_aes128_ecb_decrypt)
ENTRY(t4_aes192_ecb_decrypt)
ldx [%o0 + 0xc0], %g1 !ks[last-1]
ldx [%o0 + 0xc8], %g2 !ks[last]
and %o3, 16, %o4
brz %o4, ecbdec192_loop
nop
ldx [%o1], %o4
ldx [%o1 + 8], %o5
xor %g1, %o4, %g3 !initial ARK
movxtod %g3, %f60
xor %g2, %o5, %g3 !initial ARK
movxtod %g3, %f62
TWELVE_DROUNDS
std %f60, [%o2]
std %f62, [%o2 + 0x8]
add %o1, 16, %o1
subcc %o3, 16, %o3
be ecbdec192_loop_end
add %o2, 16, %o2
ecbdec192_loop:
ldx [%o1], %o4
ldx [%o1 + 8], %o5
xor %g1, %o4, %g3 !initial ARK
movxtod %g3, %f0
xor %g2, %o5, %g3 !initial ARK
movxtod %g3, %f2
ldx [%o1 + 16], %o4
ldx [%o1 + 24], %o5
xor %g1, %o4, %g3 !initial ARK
movxtod %g3, %f60
xor %g2, %o5, %g3 !initial ARK
movxtod %g3, %f62
TWELVE_DROUNDS_2
std %f0, [%o2]
std %f2, [%o2 + 8]
std %f60, [%o2 + 16]
std %f62, [%o2 + 24]
add %o1, 32, %o1
subcc %o3, 32, %o3
bne ecbdec192_loop
add %o2, 32, %o2
ecbdec192_loop_end:
retl
nop
SET_SIZE(t4_aes192_ecb_decrypt)
ENTRY(t4_aes256_ecb_decrypt)
ldx [%o0 + 0xe0], %g1 !ks[last-1]
ldx [%o0 + 0xe8], %g2 !ks[last]
and %o3, 16, %o4
brz %o4, ecbdec256_loop
nop
ldx [%o1], %o4
ldx [%o1 + 8], %o5
xor %g1, %o4, %g3 !initial ARK
movxtod %g3, %f60
xor %g2, %o5, %g3 !initial ARK
movxtod %g3, %f62
FOURTEEN_DROUNDS
std %f60, [%o2]
std %f62, [%o2 + 0x8]
add %o1, 16, %o1
subcc %o3, 16, %o3
be ecbdec256_loop_end
add %o2, 16, %o2
ecbdec256_loop:
ldx [%o1], %o4
ldx [%o1 + 8], %o5
xor %g1, %o4, %g3 !initial ARK
movxtod %g3, %f20
xor %g2, %o5, %g3 !initial ARK
movxtod %g3, %f22
ldx [%o1 + 16], %o4
ldx [%o1 + 24], %o5
xor %g1, %o4, %g3 !initial ARK
movxtod %g3, %f60
xor %g2, %o5, %g3 !initial ARK
movxtod %g3, %f62
FOURTEEN_DROUNDS_2
std %f20, [%o2]
std %f22, [%o2 + 8]
std %f60, [%o2 + 16]
std %f62, [%o2 + 24]
add %o1, 32, %o1
subcc %o3, 32, %o3
bne ecbdec256_loop
add %o2, 32, %o2
ldd [%o0 + 0x80], %f20
ldd [%o0 + 0x88], %f22
ecbdec256_loop_end:
retl
nop
SET_SIZE(t4_aes256_ecb_decrypt)
#else
ENTRY(t4_aes128_ecb_decrypt)
ldx [%o0 + 0xa0], %g1 !ks[last-1]
ldx [%o0 + 0xa8], %g2 !ks[last]
ecbdec128_loop:
ldx [%o1], %o4
ldx [%o1 + 8], %o5
xor %g1, %o4, %g3 !initial ARK
movxtod %g3, %f60
xor %g2, %o5, %g3 !initial ARK
movxtod %g3, %f62
TEN_DROUNDS
std %f60, [%o2]
std %f62, [%o2 + 0x8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne ecbdec128_loop
add %o2, 16, %o2
retl
nop
SET_SIZE(t4_aes128_ecb_decrypt)
ENTRY(t4_aes192_ecb_decrypt)
ldx [%o0 + 0xc0], %g1 !ks[last-1]
ldx [%o0 + 0xc8], %g2 !ks[last]
ecbdec192_loop:
ldx [%o1], %o4
ldx [%o1 + 8], %o5
xor %g1, %o4, %g3 !initial ARK
movxtod %g3, %f60
xor %g2, %o5, %g3 !initial ARK
movxtod %g3, %f62
TWELVE_DROUNDS
std %f60, [%o2]
std %f62, [%o2 + 0x8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne ecbdec192_loop
add %o2, 16, %o2
retl
nop
SET_SIZE(t4_aes192_ecb_decrypt)
ENTRY(t4_aes256_ecb_decrypt)
ldx [%o0 + 0xe0], %g1 !ks[last-1]
ldx [%o0 + 0xe8], %g2 !ks[last]
ecbdec256_loop:
ldx [%o1], %o4
ldx [%o1 + 8], %o5
xor %g1, %o4, %g3 !initial ARK
movxtod %g3, %f60
xor %g2, %o5, %g3 !initial ARK
movxtod %g3, %f62
FOURTEEN_DROUNDS
std %f60, [%o2]
std %f62, [%o2 + 0x8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne ecbdec256_loop
add %o2, 16, %o2
retl
nop
SET_SIZE(t4_aes256_ecb_decrypt)
#endif
#define TEST_PARALLEL_CBC_DECRYPT
#ifdef EST_PARALLEL_CBC_DECRYPT
ENTRY(t4_aes128_cbc_decrypt)
save %sp, -SA(MINFRAME), %sp
ldx [%i4], %o0 !IV
ldx [%i4 + 8], %o1 !IV
ldx [%i0 + 0xa0], %o2 !ks[last-1]
ldx [%i0 + 0xa8], %o3 !ks[last]
and %i3, 16, %o4
brz %o4, cbcdec128_loop
nop
ldx [%i1], %o4
ldx [%i1 + 8], %o5
xor %o2, %o4, %g1 !initial ARK
movxtod %g1, %f60
xor %o3, %o5, %g1 !initial ARK
movxtod %g1, %f62
TEN_DROUNDS
movxtod %o0, %f56
movxtod %o1, %f58
mov %o4, %o0 !save last block as next IV
mov %o5, %o1
fxor %f56, %f60, %f60 !add in previous IV
fxor %f58, %f62, %f62
std %f60, [%i2]
std %f62, [%i2 + 0x8]
add %i1, 16, %i1
subcc %i3, 16, %i3
be cbcdec128_loop_end
add %i2, 16, %i2
cbcdec128_loop:
ldx [%i1], %g4
ldx [%i1 + 8], %g5
xor %o2, %g4, %g1 !initial ARK
movxtod %g1, %f0
xor %o3, %g5, %g1 !initial ARK
movxtod %g1, %f2
ldx [%i1 + 16], %o4
ldx [%i1 + 24], %o5
xor %o2, %o4, %g1 !initial ARK
movxtod %g1, %f60
xor %o3, %o5, %g1 !initial ARK
movxtod %g1, %f62
TEN_DROUNDS_2
movxtod %o0, %f6
movxtod %o1, %f4
fxor %f6, %f0, %f0 !add in previous IV
fxor %f4, %f2, %f2
std %f0, [%i2]
std %f2, [%i2 + 8]
movxtod %g4, %f56
movxtod %g5, %f58
mov %o4, %o0 !save last block as next IV
mov %o5, %o1
fxor %f56, %f60, %f60 !add in previous IV
fxor %f58, %f62, %f62
std %f60, [%i2 + 16]
std %f62, [%i2 + 24]
add %i1, 32, %i1
subcc %i3, 32, %i3
bne cbcdec128_loop
add %i2, 32, %i2
cbcdec128_loop_end:
stx %o0, [%i4]
stx %o1, [%i4 + 8]
ret
restore
SET_SIZE(t4_aes128_cbc_decrypt)
ENTRY(t4_aes192_cbc_decrypt)
save %sp, -SA(MINFRAME), %sp
ldx [%i4], %o0 !IV
ldx [%i4 + 8], %o1 !IV
ldx [%i0 + 0xc0], %o2 !ks[last-1]
ldx [%i0 + 0xc8], %o3 !ks[last]
and %i3, 16, %o4
brz %o4, cbcdec192_loop
nop
ldx [%i1], %o4
ldx [%i1 + 8], %o5
xor %o2, %o4, %g1 !initial ARK
movxtod %g1, %f60
xor %o3, %o5, %g1 !initial ARK
movxtod %g1, %f62
TWELVE_DROUNDS
movxtod %o0, %f56
movxtod %o1, %f58
mov %o4, %o0 !save last block as next IV
mov %o5, %o1
fxor %f56, %f60, %f60 !add in previous IV
fxor %f58, %f62, %f62
std %f60, [%i2]
std %f62, [%i2 + 0x8]
add %i1, 16, %i1
subcc %i3, 16, %i3
be cbcdec192_loop_end
add %i2, 16, %i2
cbcdec192_loop:
ldx [%i1], %g4
ldx [%i1 + 8], %g5
xor %o2, %g4, %g1 !initial ARK
movxtod %g1, %f0
xor %o3, %g5, %g1 !initial ARK
movxtod %g1, %f2
ldx [%i1 + 16], %o4
ldx [%i1 + 24], %o5
xor %o2, %o4, %g1 !initial ARK
movxtod %g1, %f60
xor %o3, %o5, %g1 !initial ARK
movxtod %g1, %f62
TWELVE_DROUNDS_2
movxtod %o0, %f6
movxtod %o1, %f4
fxor %f6, %f0, %f0 !add in previous IV
fxor %f4, %f2, %f2
std %f0, [%i2]
std %f2, [%i2 + 8]
movxtod %g4, %f56
movxtod %g5, %f58
mov %o4, %o0 !save last block as next IV
mov %o5, %o1
fxor %f56, %f60, %f60 !add in previous IV
fxor %f58, %f62, %f62
std %f60, [%i2 + 16]
std %f62, [%i2 + 24]
add %i1, 32, %i1
subcc %i3, 32, %i3
bne cbcdec192_loop
add %i2, 32, %i2
cbcdec192_loop_end:
stx %o0, [%i4]
stx %o1, [%i4 + 8]
ret
restore
SET_SIZE(t4_aes192_cbc_decrypt)
ENTRY(t4_aes256_cbc_decrypt)
save %sp, -SA(MINFRAME), %sp
mov %i0, %o0 !FOURTEEN_DROUNDS uses %o0
ldx [%i4], %g2 !IV
ldx [%i4 + 8], %o1 !IV
ldx [%o0 + 0xe0], %o2 !ks[last-1]
ldx [%o0 + 0xe8], %o3 !ks[last]
and %i3, 16, %o4
brz %o4, cbcdec256_loop
nop
ldx [%i1], %o4
ldx [%i1 + 8], %o5
xor %o2, %o4, %g1 !initial ARK
movxtod %g1, %f60
xor %o3, %o5, %g1 !initial ARK
movxtod %g1, %f62
FOURTEEN_DROUNDS
movxtod %g2, %f56
movxtod %o1, %f58
mov %o4, %g2 !save last block as next IV
mov %o5, %o1
fxor %f56, %f60, %f60 !add in previous IV
fxor %f58, %f62, %f62
std %f60, [%i2]
std %f62, [%i2 + 0x8]
add %i1, 16, %i1
subcc %i3, 16, %i3
be cbcdec256_loop_end
add %i2, 16, %i2
cbcdec256_loop:
ldx [%i1], %g4
ldx [%i1 + 8], %g5
xor %o2, %g4, %g1 !initial ARK
movxtod %g1, %f20
xor %o3, %g5, %g1 !initial ARK
movxtod %g1, %f22
ldx [%i1 + 16], %o4
ldx [%i1 + 24], %o5
xor %o2, %o4, %g1 !initial ARK
movxtod %g1, %f60
xor %o3, %o5, %g1 !initial ARK
movxtod %g1, %f62
FOURTEEN_DROUNDS_2
movxtod %g2, %f56
movxtod %o1, %f58
fxor %f56, %f20, %f20 !add in previous IV
fxor %f58, %f22, %f22
std %f20, [%i2]
std %f22, [%i2 + 8]
movxtod %g4, %f56
movxtod %g5, %f58
mov %o4, %g2 !save last block as next IV
mov %o5, %o1
fxor %f56, %f60, %f60 !add in previous IV
fxor %f58, %f62, %f62
std %f60, [%i2 + 16]
std %f62, [%i2 + 24]
add %i1, 32, %i1
subcc %i3, 32, %i3
bne cbcdec256_loop
add %i2, 32, %i2
ldd [%o0 + 0x80], %f20
ldd [%o0 + 0x88], %f22
cbcdec256_loop_end:
stx %g2, [%i4]
stx %o1, [%i4 + 8]
ret
restore
SET_SIZE(t4_aes256_cbc_decrypt)
#else
ENTRY(t4_aes128_cbc_decrypt)
save %sp, -SA(MINFRAME), %sp
ldx [%i4], %o0 !IV
ldx [%i4 + 8], %o1 !IV
ldx [%i0 + 0xa0], %o2 !ks[last-1]
ldx [%i0 + 0xa8], %o3 !ks[last]
cbcdec128_loop:
ldx [%i1], %o4
ldx [%i1 + 8], %o5
xor %o2, %o4, %g1 !initial ARK
movxtod %g1, %f60
xor %o3, %o5, %g1 !initial ARK
movxtod %g1, %f62
TEN_DROUNDS
movxtod %o0, %f56
movxtod %o1, %f58
mov %o4, %o0 !save last block as next IV
mov %o5, %o1
fxor %f56, %f60, %f60 !add in previous IV
fxor %f58, %f62, %f62
std %f60, [%i2]
std %f62, [%i2 + 0x8]
add %i1, 16, %i1
subcc %i3, 16, %i3
bne cbcdec128_loop
add %i2, 16, %i2
stx %o0, [%i4]
stx %o1, [%i4 + 8]
ret
restore
SET_SIZE(t4_aes128_cbc_decrypt)
ENTRY(t4_aes192_cbc_decrypt)
save %sp, -SA(MINFRAME), %sp
ldx [%i4], %o0 !IV
ldx [%i4 + 8], %o1 !IV
ldx [%i0 + 0xc0], %o2 !ks[last-1]
ldx [%i0 + 0xc8], %o3 !ks[last]
cbcdec192_loop:
ldx [%i1], %o4
ldx [%i1 + 8], %o5
xor %o2, %o4, %g1 !initial ARK
movxtod %g1, %f60
xor %o3, %o5, %g1 !initial ARK
movxtod %g1, %f62
TWELVE_DROUNDS
movxtod %o0, %f56
movxtod %o1, %f58
mov %o4, %o0 !save last block as next IV
mov %o5, %o1
fxor %f56, %f60, %f60 !add in previous IV
fxor %f58, %f62, %f62
std %f60, [%i2]
std %f62, [%i2 + 0x8]
add %i1, 16, %i1
subcc %i3, 16, %i3
bne cbcdec192_loop
add %i2, 16, %i2
stx %o0, [%i4]
stx %o1, [%i4 + 8]
ret
restore
SET_SIZE(t4_aes192_cbc_decrypt)
ENTRY(t4_aes256_cbc_decrypt)
save %sp, -SA(MINFRAME), %sp
ldx [%i4], %o0 !IV
ldx [%i4 + 8], %o1 !IV
ldx [%i0 + 0xe0], %o2 !ks[last-1]
ldx [%i0 + 0xe8], %o3 !ks[last]
cbcdec256_loop:
ldx [%i1], %o4
ldx [%i1 + 8], %o5
xor %o2, %o4, %g1 !initial ARK
movxtod %g1, %f60
xor %o3, %o5, %g1 !initial ARK
movxtod %g1, %f62
FOURTEEN_DROUNDS
movxtod %o0, %f56
movxtod %o1, %f58
mov %o4, %o0 !save last block as next IV
mov %o5, %o1
fxor %f56, %f60, %f60 !add in previous IV
fxor %f58, %f62, %f62
std %f60, [%i2]
std %f62, [%i2 + 0x8]
add %i1, 16, %i1
subcc %i3, 16, %i3
bne cbcdec256_loop
add %i2, 16, %i2
stx %o0, [%i4]
stx %o1, [%i4 + 8]
ret
restore
SET_SIZE(t4_aes256_cbc_decrypt)
#endif
#define TEST_PARALLEL_CFB128_DECRYPT
#ifdef TEST_PARALLEL_CFB128_DECRYPT
ENTRY(t4_aes128_cfb128_decrypt)
ldd [%o4], %f56 !IV
ldd [%o4 + 8], %f58 !IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
and %o3, 16, %o5
brz %o5, cfb128dec_128_loop
movxtod %g1, %f60
movxtod %g2, %f62
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
/* CFB mode uses encryption for the decrypt operation */
TEN_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
be cfb128dec_128_loop_end
add %o2, 16, %o2
cfb128dec_128_loop:
ldd [%o1], %f6 !input
ldd [%o1 + 8], %f4 !input
movxtod %g1, %f60
movxtod %g2, %f62
fxor %f60, %f6, %f0
fxor %f62, %f4, %f2
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
/* CFB mode uses encryption for the decrypt operation */
TEN_EROUNDS_2
ldd [%o1], %f6 !input
ldd [%o1 + 8], %f4 !input
ldd [%o1 + 16], %f56 !input
ldd [%o1 + 24], %f58 !input
fxor %f60, %f6, %f6
fxor %f62, %f4, %f4
fxor %f0, %f56, %f60
fxor %f2, %f58, %f62
std %f6, [%o2]
std %f4, [%o2 + 8]
std %f60, [%o2 + 16]
std %f62, [%o2 + 24]
add %o1, 32, %o1
subcc %o3, 32, %o3
bne cfb128dec_128_loop
add %o2, 32, %o2
cfb128dec_128_loop_end:
std %f56, [%o4]
retl
std %f58, [%o4 + 8]
SET_SIZE(t4_aes128_cfb128_decrypt)
ENTRY(t4_aes192_cfb128_decrypt)
ldd [%o4], %f56 !IV
ldd [%o4 + 8], %f58 !IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
and %o3, 16, %o5
brz %o5, cfb128dec_192_loop
movxtod %g1, %f60
movxtod %g2, %f62
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
/* CFB mode uses encryption for the decrypt operation */
TWELVE_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
be cfb128dec_192_loop_end
add %o2, 16, %o2
cfb128dec_192_loop:
ldd [%o1], %f6 !input
ldd [%o1 + 8], %f4 !input
movxtod %g1, %f60
movxtod %g2, %f62
fxor %f60, %f6, %f0
fxor %f62, %f4, %f2
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
/* CFB mode uses encryption for the decrypt operation */
TWELVE_EROUNDS_2
ldd [%o1], %f6 !input
ldd [%o1 + 8], %f4 !input
ldd [%o1 + 16], %f56 !input
ldd [%o1 + 24], %f58 !input
fxor %f60, %f6, %f6
fxor %f62, %f4, %f4
fxor %f0, %f56, %f60
fxor %f2, %f58, %f62
std %f6, [%o2]
std %f4, [%o2 + 8]
std %f60, [%o2 + 16]
std %f62, [%o2 + 24]
add %o1, 32, %o1
subcc %o3, 32, %o3
bne cfb128dec_192_loop
add %o2, 32, %o2
cfb128dec_192_loop_end:
std %f56, [%o4]
retl
std %f58, [%o4 + 8]
SET_SIZE(t4_aes192_cfb128_decrypt)
ENTRY(t4_aes256_cfb128_decrypt)
ldd [%o4], %f56 !IV
ldd [%o4 + 8], %f58 !IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
and %o3, 16, %o5
brz %o5, cfb128dec_256_loop
movxtod %g1, %f60
movxtod %g2, %f62
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
/* CFB mode uses encryption for the decrypt operation */
FOURTEEN_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
be cfb128dec_256_loop_end
add %o2, 16, %o2
cfb128dec_256_loop:
ldd [%o1], %f20 !input
ldd [%o1 + 8], %f22 !input
movxtod %g1, %f60
movxtod %g2, %f62
fxor %f60, %f20, %f20
fxor %f62, %f22, %f22
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
/* CFB mode uses encryption for the decrypt operation */
FOURTEEN_EROUNDS_2
ldd [%o1 + 16], %f56 !input
ldd [%o1 + 24], %f58 !input
fxor %f20, %f56, %f20
fxor %f22, %f58, %f22
std %f20, [%o2 + 16]
std %f22, [%o2 + 24]
ldd [%o1], %f20 !input
ldd [%o1 + 8], %f22 !input
fxor %f60, %f20, %f20
fxor %f62, %f22, %f22
std %f20, [%o2]
std %f22, [%o2 + 8]
add %o1, 32, %o1
subcc %o3, 32, %o3
bne cfb128dec_256_loop
add %o2, 32, %o2
ldd [%o0 + 0x60], %f20
ldd [%o0 + 0x68], %f22
cfb128dec_256_loop_end:
std %f56, [%o4]
retl
std %f58, [%o4 + 8]
SET_SIZE(t4_aes256_cfb128_decrypt)
#else
ENTRY(t4_aes128_cfb128_decrypt)
ldd [%o4], %f56 !IV
ldd [%o4 + 8], %f58 !IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
cfb128dec_128_loop:
movxtod %g1, %f60
movxtod %g2, %f62
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
/* CFB mode uses encryption for the decrypt operation */
TEN_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne cfb128dec_128_loop
add %o2, 16, %o2
std %f56, [%o4]
retl
std %f58, [%o4 + 8]
SET_SIZE(t4_aes128_cfb128_decrypt)
ENTRY(t4_aes192_cfb128_decrypt)
ldd [%o4], %f56 !IV
ldd [%o4 + 8], %f58 !IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
cfb128dec_192_loop:
movxtod %g1, %f60
movxtod %g2, %f62
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
/* CFB mode uses encryption for the decrypt operation */
TWELVE_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne cfb128dec_192_loop
add %o2, 16, %o2
std %f56, [%o4]
retl
std %f58, [%o4 + 8]
SET_SIZE(t4_aes192_cfb128_decrypt)
ENTRY(t4_aes256_cfb128_decrypt)
ldd [%o4], %f56 !IV
ldd [%o4 + 8], %f58 !IV
ldx [%o0], %g1 ! ks[0]
ldx [%o0 + 8], %g2 ! ks[1]
cfb128dec_256_loop:
movxtod %g1, %f60
movxtod %g2, %f62
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
/* CFB mode uses encryption for the decrypt operation */
FOURTEEN_EROUNDS
ldd [%o1], %f56 !input
ldd [%o1 + 8], %f58 !input
fxor %f60, %f56, %f60
fxor %f62, %f58, %f62
std %f60, [%o2]
std %f62, [%o2 + 8]
add %o1, 16, %o1
subcc %o3, 16, %o3
bne cfb128dec_256_loop
add %o2, 16, %o2
std %f56, [%o4]
retl
std %f58, [%o4 + 8]
SET_SIZE(t4_aes256_cfb128_decrypt)
#endif
#endif /* lint || __lint */