/*
* ---------------------------------------------------------------------------
* Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
*
* LICENSE TERMS
*
* The free distribution and use of this software is allowed (with or without
* changes) provided that:
*
* 1. source code distributions include the above copyright notice, this
* list of conditions and the following disclaimer;
*
* 2. binary distributions include the above copyright notice, this list
* of conditions and the following disclaimer in their documentation;
*
* 3. the name of the copyright holder is not used to endorse products
* built using this software without specific written permission.
*
* DISCLAIMER
*
* This software is provided 'as is' with no explicit or implied warranties
* in respect of its properties, including, but not limited to, correctness
* ---------------------------------------------------------------------------
* Issue 20/12/2007
*
* I am grateful to Dag Arne Osvik for many discussions of the techniques that
* Some of the techniques used in this implementation are the result of
* suggestions made by him for which I am most grateful.
*
* An AES implementation for AMD64 processors using the YASM assembler. This
* implementation provides only encryption, decryption and hence requires key
* scheduling support in C. It uses 8k bytes of tables but its encryption and
* decryption performance is very close to that obtained using large tables.
* It can use either MS Windows or Gnu/Linux/OpenSolaris OS calling conventions,
* which are as follows:
* ms windows gnu/linux/opensolaris os
*
* in_blk rcx rdi
* out_blk rdx rsi
* context (cx) r8 rdx
*
* preserved rsi - + rbx, rbp, rsp, r12, r13, r14 & r15
* registers rdi - on both
*
* destroyed - rsi + rax, rcx, rdx, r8, r9, r10 & r11
* registers - rdi on both
*
* The convention used here is that for gnu/linux/opensolaris os.
*
* This code provides the standard AES block size (128 bits, 16 bytes) and the
* three standard AES key sizes (128, 192 and 256 bits). It has the same call
* interface as my C implementation. It uses the Microsoft C AMD64 calling
* conventions in which the three parameters are placed in rcx, rdx and r8
* respectively. The rbx, rsi, rdi, rbp and r12..r15 registers are preserved.
*
* OpenSolaris Note:
* That is parameters are placed in rdi, rsi, rdx, and rcx, respectively.
*
* AES_RETURN aes_encrypt(const unsigned char in_blk[],
* unsigned char out_blk[], const aes_encrypt_ctx cx[1])/
*
* AES_RETURN aes_decrypt(const unsigned char in_blk[],
* unsigned char out_blk[], const aes_decrypt_ctx cx[1])/
*
* AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
* const aes_encrypt_ctx cx[1])/
*
* AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
* const aes_decrypt_ctx cx[1])/
*
* AES_RETURN aes_encrypt_key(const unsigned char key[],
* unsigned int len, const aes_decrypt_ctx cx[1])/
*
* AES_RETURN aes_decrypt_key(const unsigned char key[],
* unsigned int len, const aes_decrypt_ctx cx[1])/
*
* where <NNN> is 128, 102 or 256. In the last two calls the length can be in
* either bits or bytes.
*
* selections MUST match those in the C header file aesopt.h
*/
/*
* The encryption key schedule has the following in memory layout where N is the
* number of rounds (10, 12 or 14):
*
* lo: | input key (round 0) | / each round is four 32-bit words
* | encryption round 1 |
* | encryption round 2 |
* ....
* | encryption round N-1 |
* hi: | encryption round N |
*
* The decryption key schedule is normally set up so that it has the same
* layout as above by actually reversing the order of the encryption key
* schedule in memory (this happens when AES_REV_DKS is set):
*
* lo: | decryption round 0 | = | encryption round N |
* | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
* | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
* .... ....
* | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
* hi: | decryption round N | = | input key (round 0) |
*
* with rounds except the first and last modified using inv_mix_column()
* But if AES_REV_DKS is NOT set the order of keys is left as it is for
* encryption so that it has to be accessed in reverse when used for
* decryption (although the inverse mix column modifications are done)
*
* lo: | decryption round 0 | = | input key (round 0) |
* | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
* | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
* .... ....
* | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
* hi: | decryption round N | = | encryption round N |
*
* This layout is faster when the assembler key scheduling provided here
* is used.
*
* End of user defines
*/
/*
* ---------------------------------------------------------------------------
* OpenSolaris OS modifications
*
* This source originates from Brian Gladman file aes_amd64.asm
* with these changes:
*
* 1. Removed MS Windows-specific code within DLL_EXPORT, _SEH_, and
* !__GNUC__ ifdefs. Also removed ENCRYPTION, DECRYPTION,
* AES_128, AES_192, AES_256, AES_VAR ifdefs.
*
*
*
* 4. Translate Intel/yasm/nasm syntax to ATT/OpenSolaris as(1) syntax
* (operands reversed, literals prefixed with "$", registers prefixed with "%",
* and "[register+offset]", addressing changed to "offset(register)",
* parenthesis in constant expressions "()" changed to square brackets "[]",
* "." removed from local (numeric) labels, and other changes.
* Examples:
* Intel/yasm/nasm Syntax ATT/OpenSolaris Syntax
* mov rax,(4*20h) mov $[4*0x20],%rax
* mov rax,[ebx+20h] mov 0x20(%ebx),%rax
* lea rax,[ebx+ecx] lea (%ebx,%ecx),%rax
* sub rax,[ebx+ecx*4-20h] sub -0x20(%ebx,%ecx,4),%rax
*
* /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
* definitions for lint.
*
* 6. Renamed functions and reordered parameters to match OpenSolaris:
* Original Gladman interface:
* int aes_encrypt(const unsigned char *in,
* unsigned char *out, const aes_encrypt_ctx cx[1])/
* int aes_decrypt(const unsigned char *in,
* unsigned char *out, const aes_encrypt_ctx cx[1])/
* Note: aes_encrypt_ctx contains ks, a 60 element array of uint32_t,
* and a union type, inf., containing inf.l, a uint32_t and
* inf.b, a 4-element array of uint32_t. Only b[0] in the array (aka "l") is
* used and contains the key schedule length * 16 where key schedule length is
* 10, 12, or 14 bytes.
*
* OpenSolaris OS interface:
* void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
* const uint32_t pt[4], uint32_t ct[4])/
* void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
* const uint32_t pt[4], uint32_t ct[4])/
* typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]/
* uint32_t ks32[(MAX_AES_NR + 1) * 4]/ } aes_ks_t/
* Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
* ct is crypto text, and MAX_AES_NR is 14.
* For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
*/
/* ARGSUSED */
void
}
/* ARGSUSED */
void
}
#else
#include <sys/asm_linkage.h>
#define w8(x) [x], 0, 0, 0, [x], 0, 0, 0
#define enc_vals(x) \
#define dec_vals(x) \
#ifdef AES_REV_DKS
#else
#endif /* AES_REV_DKS */
\
\
\
\
\
#ifdef LAST_ROUND_TABLES
\
\
\
\
#else
\
\
\
\
#endif /* LAST_ROUND_TABLES */
\
\
\
\
\
#ifdef LAST_ROUND_TABLES
\
\
\
\
#else
\
\
\
\
#endif /* LAST_ROUND_TABLES */
/*
* OpenSolaris OS:
* void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
* const uint32_t pt[4], uint32_t ct[4])/
*
* Original interface:
* int aes_encrypt(const unsigned char *in,
* unsigned char *out, const aes_encrypt_ctx cx[1])/
*/
.align 64
#ifdef LAST_ROUND_TABLES
#endif
#ifdef GLADMAN_INTERFACE
#else
#endif /* GLADMAN_INTERFACE */
je 3f
je 2f
je 1f
jmp 4f
/*
* OpenSolaris OS:
* void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
* const uint32_t pt[4], uint32_t ct[4])/
*
* Original interface:
* int aes_decrypt(const unsigned char *in,
* unsigned char *out, const aes_encrypt_ctx cx[1])/
*/
.align 64
#ifdef LAST_ROUND_TABLES
#endif
#ifdef GLADMAN_INTERFACE
#else
#endif /* GLADMAN_INTERFACE */
#ifdef AES_REV_DKS
#else
#endif
je 3f
je 2f
je 1f
jmp 4f
#endif /* lint || __lint */