90bcde942a3919300ffc73f98ea903b58386c395da/*
90bcde942a3919300ffc73f98ea903b58386c395da * ---------------------------------------------------------------------------
90bcde942a3919300ffc73f98ea903b58386c395da * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * LICENSE TERMS
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * The free distribution and use of this software is allowed (with or without
90bcde942a3919300ffc73f98ea903b58386c395da * changes) provided that:
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * 1. source code distributions include the above copyright notice, this
90bcde942a3919300ffc73f98ea903b58386c395da * list of conditions and the following disclaimer;
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * 2. binary distributions include the above copyright notice, this list
90bcde942a3919300ffc73f98ea903b58386c395da * of conditions and the following disclaimer in their documentation;
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * 3. the name of the copyright holder is not used to endorse products
90bcde942a3919300ffc73f98ea903b58386c395da * built using this software without specific written permission.
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * DISCLAIMER
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * This software is provided 'as is' with no explicit or implied warranties
90bcde942a3919300ffc73f98ea903b58386c395da * in respect of its properties, including, but not limited to, correctness
90bcde942a3919300ffc73f98ea903b58386c395da * and/or fitness for purpose.
90bcde942a3919300ffc73f98ea903b58386c395da * ---------------------------------------------------------------------------
90bcde942a3919300ffc73f98ea903b58386c395da * Issue 20/12/2007
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * I am grateful to Dag Arne Osvik for many discussions of the techniques that
90bcde942a3919300ffc73f98ea903b58386c395da * can be used to optimise AES assembler code on AMD64/EM64T architectures.
90bcde942a3919300ffc73f98ea903b58386c395da * Some of the techniques used in this implementation are the result of
90bcde942a3919300ffc73f98ea903b58386c395da * suggestions made by him for which I am most grateful.
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * An AES implementation for AMD64 processors using the YASM assembler. This
90bcde942a3919300ffc73f98ea903b58386c395da * implementation provides only encryption, decryption and hence requires key
90bcde942a3919300ffc73f98ea903b58386c395da * scheduling support in C. It uses 8k bytes of tables but its encryption and
90bcde942a3919300ffc73f98ea903b58386c395da * decryption performance is very close to that obtained using large tables.
90bcde942a3919300ffc73f98ea903b58386c395da * It can use either MS Windows or Gnu/Linux/OpenSolaris OS calling conventions,
90bcde942a3919300ffc73f98ea903b58386c395da * which are as follows:
90bcde942a3919300ffc73f98ea903b58386c395da * ms windows gnu/linux/opensolaris os
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * in_blk rcx rdi
90bcde942a3919300ffc73f98ea903b58386c395da * out_blk rdx rsi
90bcde942a3919300ffc73f98ea903b58386c395da * context (cx) r8 rdx
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * preserved rsi - + rbx, rbp, rsp, r12, r13, r14 & r15
90bcde942a3919300ffc73f98ea903b58386c395da * registers rdi - on both
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * destroyed - rsi + rax, rcx, rdx, r8, r9, r10 & r11
90bcde942a3919300ffc73f98ea903b58386c395da * registers - rdi on both
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * The convention used here is that for gnu/linux/opensolaris os.
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * This code provides the standard AES block size (128 bits, 16 bytes) and the
90bcde942a3919300ffc73f98ea903b58386c395da * three standard AES key sizes (128, 192 and 256 bits). It has the same call
90bcde942a3919300ffc73f98ea903b58386c395da * interface as my C implementation. It uses the Microsoft C AMD64 calling
90bcde942a3919300ffc73f98ea903b58386c395da * conventions in which the three parameters are placed in rcx, rdx and r8
90bcde942a3919300ffc73f98ea903b58386c395da * respectively. The rbx, rsi, rdi, rbp and r12..r15 registers are preserved.
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * OpenSolaris Note:
90bcde942a3919300ffc73f98ea903b58386c395da * Modified to use GNU/Linux/Solaris calling conventions.
90bcde942a3919300ffc73f98ea903b58386c395da * That is parameters are placed in rdi, rsi, rdx, and rcx, respectively.
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * AES_RETURN aes_encrypt(const unsigned char in_blk[],
90bcde942a3919300ffc73f98ea903b58386c395da * unsigned char out_blk[], const aes_encrypt_ctx cx[1])/
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * AES_RETURN aes_decrypt(const unsigned char in_blk[],
90bcde942a3919300ffc73f98ea903b58386c395da * unsigned char out_blk[], const aes_decrypt_ctx cx[1])/
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
90bcde942a3919300ffc73f98ea903b58386c395da * const aes_encrypt_ctx cx[1])/
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
90bcde942a3919300ffc73f98ea903b58386c395da * const aes_decrypt_ctx cx[1])/
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * AES_RETURN aes_encrypt_key(const unsigned char key[],
90bcde942a3919300ffc73f98ea903b58386c395da * unsigned int len, const aes_decrypt_ctx cx[1])/
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * AES_RETURN aes_decrypt_key(const unsigned char key[],
90bcde942a3919300ffc73f98ea903b58386c395da * unsigned int len, const aes_decrypt_ctx cx[1])/
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * where <NNN> is 128, 102 or 256. In the last two calls the length can be in
90bcde942a3919300ffc73f98ea903b58386c395da * either bits or bytes.
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * Comment in/out the following lines to obtain the desired subroutines. These
90bcde942a3919300ffc73f98ea903b58386c395da * selections MUST match those in the C header file aesopt.h
90bcde942a3919300ffc73f98ea903b58386c395da */
90bcde942a3919300ffc73f98ea903b58386c395da#define AES_REV_DKS /* define if key decryption schedule is reversed */
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define LAST_ROUND_TABLES /* define for the faster version using extra tables */
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da/*
90bcde942a3919300ffc73f98ea903b58386c395da * The encryption key schedule has the following in memory layout where N is the
90bcde942a3919300ffc73f98ea903b58386c395da * number of rounds (10, 12 or 14):
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * lo: | input key (round 0) | / each round is four 32-bit words
90bcde942a3919300ffc73f98ea903b58386c395da * | encryption round 1 |
90bcde942a3919300ffc73f98ea903b58386c395da * | encryption round 2 |
90bcde942a3919300ffc73f98ea903b58386c395da * ....
90bcde942a3919300ffc73f98ea903b58386c395da * | encryption round N-1 |
90bcde942a3919300ffc73f98ea903b58386c395da * hi: | encryption round N |
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * The decryption key schedule is normally set up so that it has the same
90bcde942a3919300ffc73f98ea903b58386c395da * layout as above by actually reversing the order of the encryption key
90bcde942a3919300ffc73f98ea903b58386c395da * schedule in memory (this happens when AES_REV_DKS is set):
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * lo: | decryption round 0 | = | encryption round N |
90bcde942a3919300ffc73f98ea903b58386c395da * | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
90bcde942a3919300ffc73f98ea903b58386c395da * | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
90bcde942a3919300ffc73f98ea903b58386c395da * .... ....
90bcde942a3919300ffc73f98ea903b58386c395da * | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
90bcde942a3919300ffc73f98ea903b58386c395da * hi: | decryption round N | = | input key (round 0) |
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * with rounds except the first and last modified using inv_mix_column()
90bcde942a3919300ffc73f98ea903b58386c395da * But if AES_REV_DKS is NOT set the order of keys is left as it is for
90bcde942a3919300ffc73f98ea903b58386c395da * encryption so that it has to be accessed in reverse when used for
90bcde942a3919300ffc73f98ea903b58386c395da * decryption (although the inverse mix column modifications are done)
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * lo: | decryption round 0 | = | input key (round 0) |
90bcde942a3919300ffc73f98ea903b58386c395da * | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
90bcde942a3919300ffc73f98ea903b58386c395da * | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
90bcde942a3919300ffc73f98ea903b58386c395da * .... ....
90bcde942a3919300ffc73f98ea903b58386c395da * | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
90bcde942a3919300ffc73f98ea903b58386c395da * hi: | decryption round N | = | encryption round N |
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * This layout is faster when the assembler key scheduling provided here
90bcde942a3919300ffc73f98ea903b58386c395da * is used.
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * End of user defines
90bcde942a3919300ffc73f98ea903b58386c395da */
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da/*
90bcde942a3919300ffc73f98ea903b58386c395da * ---------------------------------------------------------------------------
90bcde942a3919300ffc73f98ea903b58386c395da * OpenSolaris OS modifications
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * This source originates from Brian Gladman file aes_amd64.asm
90bcde942a3919300ffc73f98ea903b58386c395da * in http://fp.gladman.plus.com/AES/aes-src-04-03-08.zip
90bcde942a3919300ffc73f98ea903b58386c395da * with these changes:
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * 1. Removed MS Windows-specific code within DLL_EXPORT, _SEH_, and
90bcde942a3919300ffc73f98ea903b58386c395da * !__GNUC__ ifdefs. Also removed ENCRYPTION, DECRYPTION,
90bcde942a3919300ffc73f98ea903b58386c395da * AES_128, AES_192, AES_256, AES_VAR ifdefs.
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * 2. Translate yasm/nasm %define and .macro definitions to cpp(1) #define
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * 3. Translate yasm/nasm %ifdef/%ifndef to cpp(1) #ifdef
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * 4. Translate Intel/yasm/nasm syntax to ATT/OpenSolaris as(1) syntax
90bcde942a3919300ffc73f98ea903b58386c395da * (operands reversed, literals prefixed with "$", registers prefixed with "%",
90bcde942a3919300ffc73f98ea903b58386c395da * and "[register+offset]", addressing changed to "offset(register)",
90bcde942a3919300ffc73f98ea903b58386c395da * parenthesis in constant expressions "()" changed to square brackets "[]",
90bcde942a3919300ffc73f98ea903b58386c395da * "." removed from local (numeric) labels, and other changes.
90bcde942a3919300ffc73f98ea903b58386c395da * Examples:
90bcde942a3919300ffc73f98ea903b58386c395da * Intel/yasm/nasm Syntax ATT/OpenSolaris Syntax
90bcde942a3919300ffc73f98ea903b58386c395da * mov rax,(4*20h) mov $[4*0x20],%rax
90bcde942a3919300ffc73f98ea903b58386c395da * mov rax,[ebx+20h] mov 0x20(%ebx),%rax
90bcde942a3919300ffc73f98ea903b58386c395da * lea rax,[ebx+ecx] lea (%ebx,%ecx),%rax
90bcde942a3919300ffc73f98ea903b58386c395da * sub rax,[ebx+ecx*4-20h] sub -0x20(%ebx,%ecx,4),%rax
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * 5. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
694c35faa87b858ecdadfe4fc592615f4eefbb07Josef 'Jeff' Sipek * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
694c35faa87b858ecdadfe4fc592615f4eefbb07Josef 'Jeff' Sipek * definitions for lint.
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * 6. Renamed functions and reordered parameters to match OpenSolaris:
90bcde942a3919300ffc73f98ea903b58386c395da * Original Gladman interface:
90bcde942a3919300ffc73f98ea903b58386c395da * int aes_encrypt(const unsigned char *in,
90bcde942a3919300ffc73f98ea903b58386c395da * unsigned char *out, const aes_encrypt_ctx cx[1])/
90bcde942a3919300ffc73f98ea903b58386c395da * int aes_decrypt(const unsigned char *in,
90bcde942a3919300ffc73f98ea903b58386c395da * unsigned char *out, const aes_encrypt_ctx cx[1])/
90bcde942a3919300ffc73f98ea903b58386c395da * Note: aes_encrypt_ctx contains ks, a 60 element array of uint32_t,
90bcde942a3919300ffc73f98ea903b58386c395da * and a union type, inf., containing inf.l, a uint32_t and
90bcde942a3919300ffc73f98ea903b58386c395da * inf.b, a 4-element array of uint32_t. Only b[0] in the array (aka "l") is
90bcde942a3919300ffc73f98ea903b58386c395da * used and contains the key schedule length * 16 where key schedule length is
90bcde942a3919300ffc73f98ea903b58386c395da * 10, 12, or 14 bytes.
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * OpenSolaris OS interface:
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
90bcde942a3919300ffc73f98ea903b58386c395da * const uint32_t pt[4], uint32_t ct[4])/
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
90bcde942a3919300ffc73f98ea903b58386c395da * const uint32_t pt[4], uint32_t ct[4])/
90bcde942a3919300ffc73f98ea903b58386c395da * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]/
90bcde942a3919300ffc73f98ea903b58386c395da * uint32_t ks32[(MAX_AES_NR + 1) * 4]/ } aes_ks_t/
90bcde942a3919300ffc73f98ea903b58386c395da * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
90bcde942a3919300ffc73f98ea903b58386c395da * ct is crypto text, and MAX_AES_NR is 14.
90bcde942a3919300ffc73f98ea903b58386c395da * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
90bcde942a3919300ffc73f98ea903b58386c395da */
90bcde942a3919300ffc73f98ea903b58386c395da
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson#if defined(lint) || defined(__lint)
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson#include <sys/types.h>
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson/* ARGSUSED */
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Andersonvoid
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Andersonaes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4],
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson uint32_t ct[4]) {
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson}
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson/* ARGSUSED */
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Andersonvoid
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Andersonaes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson uint32_t pt[4]) {
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson}
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson#else
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson
90bcde942a3919300ffc73f98ea903b58386c395da#include <sys/asm_linkage.h>
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define KS_LENGTH 60
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define raxd eax
90bcde942a3919300ffc73f98ea903b58386c395da#define rdxd edx
90bcde942a3919300ffc73f98ea903b58386c395da#define rcxd ecx
90bcde942a3919300ffc73f98ea903b58386c395da#define rbxd ebx
90bcde942a3919300ffc73f98ea903b58386c395da#define rsid esi
90bcde942a3919300ffc73f98ea903b58386c395da#define rdid edi
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define raxb al
90bcde942a3919300ffc73f98ea903b58386c395da#define rdxb dl
90bcde942a3919300ffc73f98ea903b58386c395da#define rcxb cl
90bcde942a3919300ffc73f98ea903b58386c395da#define rbxb bl
90bcde942a3919300ffc73f98ea903b58386c395da#define rsib sil
90bcde942a3919300ffc73f98ea903b58386c395da#define rdib dil
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da/ finite field multiplies by {02}, {04} and {08}
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define f2(x) [[x<<1]^[[[x>>7]&1]*0x11b]]
90bcde942a3919300ffc73f98ea903b58386c395da#define f4(x) [[x<<2]^[[[x>>6]&1]*0x11b]^[[[x>>6]&2]*0x11b]]
90bcde942a3919300ffc73f98ea903b58386c395da#define f8(x) [[x<<3]^[[[x>>5]&1]*0x11b]^[[[x>>5]&2]*0x11b]^[[[x>>5]&4]*0x11b]]
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da/ finite field multiplies required in table generation
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define f3(x) [[f2(x)] ^ [x]]
90bcde942a3919300ffc73f98ea903b58386c395da#define f9(x) [[f8(x)] ^ [x]]
90bcde942a3919300ffc73f98ea903b58386c395da#define fb(x) [[f8(x)] ^ [f2(x)] ^ [x]]
90bcde942a3919300ffc73f98ea903b58386c395da#define fd(x) [[f8(x)] ^ [f4(x)] ^ [x]]
90bcde942a3919300ffc73f98ea903b58386c395da#define fe(x) [[f8(x)] ^ [f4(x)] ^ [f2(x)]]
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da/ macros for expanding S-box data
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define u8(x) [f2(x)], [x], [x], [f3(x)], [f2(x)], [x], [x], [f3(x)]
90bcde942a3919300ffc73f98ea903b58386c395da#define v8(x) [fe(x)], [f9(x)], [fd(x)], [fb(x)], [fe(x)], [f9(x)], [fd(x)], [x]
90bcde942a3919300ffc73f98ea903b58386c395da#define w8(x) [x], 0, 0, 0, [x], 0, 0, 0
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define enc_vals(x) \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x63),x(0x7c),x(0x77),x(0x7b),x(0xf2),x(0x6b),x(0x6f),x(0xc5); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x30),x(0x01),x(0x67),x(0x2b),x(0xfe),x(0xd7),x(0xab),x(0x76); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xca),x(0x82),x(0xc9),x(0x7d),x(0xfa),x(0x59),x(0x47),x(0xf0); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xad),x(0xd4),x(0xa2),x(0xaf),x(0x9c),x(0xa4),x(0x72),x(0xc0); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xb7),x(0xfd),x(0x93),x(0x26),x(0x36),x(0x3f),x(0xf7),x(0xcc); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x34),x(0xa5),x(0xe5),x(0xf1),x(0x71),x(0xd8),x(0x31),x(0x15); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x04),x(0xc7),x(0x23),x(0xc3),x(0x18),x(0x96),x(0x05),x(0x9a); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x07),x(0x12),x(0x80),x(0xe2),x(0xeb),x(0x27),x(0xb2),x(0x75); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x09),x(0x83),x(0x2c),x(0x1a),x(0x1b),x(0x6e),x(0x5a),x(0xa0); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x52),x(0x3b),x(0xd6),x(0xb3),x(0x29),x(0xe3),x(0x2f),x(0x84); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x53),x(0xd1),x(0x00),x(0xed),x(0x20),x(0xfc),x(0xb1),x(0x5b); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x6a),x(0xcb),x(0xbe),x(0x39),x(0x4a),x(0x4c),x(0x58),x(0xcf); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xd0),x(0xef),x(0xaa),x(0xfb),x(0x43),x(0x4d),x(0x33),x(0x85); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x45),x(0xf9),x(0x02),x(0x7f),x(0x50),x(0x3c),x(0x9f),x(0xa8); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x51),x(0xa3),x(0x40),x(0x8f),x(0x92),x(0x9d),x(0x38),x(0xf5); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xbc),x(0xb6),x(0xda),x(0x21),x(0x10),x(0xff),x(0xf3),x(0xd2); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xcd),x(0x0c),x(0x13),x(0xec),x(0x5f),x(0x97),x(0x44),x(0x17); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xc4),x(0xa7),x(0x7e),x(0x3d),x(0x64),x(0x5d),x(0x19),x(0x73); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x60),x(0x81),x(0x4f),x(0xdc),x(0x22),x(0x2a),x(0x90),x(0x88); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x46),x(0xee),x(0xb8),x(0x14),x(0xde),x(0x5e),x(0x0b),x(0xdb); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xe0),x(0x32),x(0x3a),x(0x0a),x(0x49),x(0x06),x(0x24),x(0x5c); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xc2),x(0xd3),x(0xac),x(0x62),x(0x91),x(0x95),x(0xe4),x(0x79); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xe7),x(0xc8),x(0x37),x(0x6d),x(0x8d),x(0xd5),x(0x4e),x(0xa9); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x6c),x(0x56),x(0xf4),x(0xea),x(0x65),x(0x7a),x(0xae),x(0x08); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xba),x(0x78),x(0x25),x(0x2e),x(0x1c),x(0xa6),x(0xb4),x(0xc6); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xe8),x(0xdd),x(0x74),x(0x1f),x(0x4b),x(0xbd),x(0x8b),x(0x8a); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x70),x(0x3e),x(0xb5),x(0x66),x(0x48),x(0x03),x(0xf6),x(0x0e); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x61),x(0x35),x(0x57),x(0xb9),x(0x86),x(0xc1),x(0x1d),x(0x9e); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xe1),x(0xf8),x(0x98),x(0x11),x(0x69),x(0xd9),x(0x8e),x(0x94); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x9b),x(0x1e),x(0x87),x(0xe9),x(0xce),x(0x55),x(0x28),x(0xdf); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x8c),x(0xa1),x(0x89),x(0x0d),x(0xbf),x(0xe6),x(0x42),x(0x68); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x41),x(0x99),x(0x2d),x(0x0f),x(0xb0),x(0x54),x(0xbb),x(0x16)
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define dec_vals(x) \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x52),x(0x09),x(0x6a),x(0xd5),x(0x30),x(0x36),x(0xa5),x(0x38); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xbf),x(0x40),x(0xa3),x(0x9e),x(0x81),x(0xf3),x(0xd7),x(0xfb); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x7c),x(0xe3),x(0x39),x(0x82),x(0x9b),x(0x2f),x(0xff),x(0x87); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x34),x(0x8e),x(0x43),x(0x44),x(0xc4),x(0xde),x(0xe9),x(0xcb); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x54),x(0x7b),x(0x94),x(0x32),x(0xa6),x(0xc2),x(0x23),x(0x3d); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xee),x(0x4c),x(0x95),x(0x0b),x(0x42),x(0xfa),x(0xc3),x(0x4e); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x08),x(0x2e),x(0xa1),x(0x66),x(0x28),x(0xd9),x(0x24),x(0xb2); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x76),x(0x5b),x(0xa2),x(0x49),x(0x6d),x(0x8b),x(0xd1),x(0x25); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x72),x(0xf8),x(0xf6),x(0x64),x(0x86),x(0x68),x(0x98),x(0x16); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xd4),x(0xa4),x(0x5c),x(0xcc),x(0x5d),x(0x65),x(0xb6),x(0x92); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x6c),x(0x70),x(0x48),x(0x50),x(0xfd),x(0xed),x(0xb9),x(0xda); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x5e),x(0x15),x(0x46),x(0x57),x(0xa7),x(0x8d),x(0x9d),x(0x84); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x90),x(0xd8),x(0xab),x(0x00),x(0x8c),x(0xbc),x(0xd3),x(0x0a); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xf7),x(0xe4),x(0x58),x(0x05),x(0xb8),x(0xb3),x(0x45),x(0x06); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xd0),x(0x2c),x(0x1e),x(0x8f),x(0xca),x(0x3f),x(0x0f),x(0x02); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xc1),x(0xaf),x(0xbd),x(0x03),x(0x01),x(0x13),x(0x8a),x(0x6b); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x3a),x(0x91),x(0x11),x(0x41),x(0x4f),x(0x67),x(0xdc),x(0xea); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x97),x(0xf2),x(0xcf),x(0xce),x(0xf0),x(0xb4),x(0xe6),x(0x73); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x96),x(0xac),x(0x74),x(0x22),x(0xe7),x(0xad),x(0x35),x(0x85); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xe2),x(0xf9),x(0x37),x(0xe8),x(0x1c),x(0x75),x(0xdf),x(0x6e); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x47),x(0xf1),x(0x1a),x(0x71),x(0x1d),x(0x29),x(0xc5),x(0x89); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x6f),x(0xb7),x(0x62),x(0x0e),x(0xaa),x(0x18),x(0xbe),x(0x1b); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xfc),x(0x56),x(0x3e),x(0x4b),x(0xc6),x(0xd2),x(0x79),x(0x20); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x9a),x(0xdb),x(0xc0),x(0xfe),x(0x78),x(0xcd),x(0x5a),x(0xf4); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x1f),x(0xdd),x(0xa8),x(0x33),x(0x88),x(0x07),x(0xc7),x(0x31); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xb1),x(0x12),x(0x10),x(0x59),x(0x27),x(0x80),x(0xec),x(0x5f); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x60),x(0x51),x(0x7f),x(0xa9),x(0x19),x(0xb5),x(0x4a),x(0x0d); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x2d),x(0xe5),x(0x7a),x(0x9f),x(0x93),x(0xc9),x(0x9c),x(0xef); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xa0),x(0xe0),x(0x3b),x(0x4d),x(0xae),x(0x2a),x(0xf5),x(0xb0); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xc8),x(0xeb),x(0xbb),x(0x3c),x(0x83),x(0x53),x(0x99),x(0x61); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0x17),x(0x2b),x(0x04),x(0x7e),x(0xba),x(0x77),x(0xd6),x(0x26); \
90bcde942a3919300ffc73f98ea903b58386c395da .byte x(0xe1),x(0x69),x(0x14),x(0x63),x(0x55),x(0x21),x(0x0c),x(0x7d)
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define tptr %rbp /* table pointer */
90bcde942a3919300ffc73f98ea903b58386c395da#define kptr %r8 /* key schedule pointer */
90bcde942a3919300ffc73f98ea903b58386c395da#define fofs 128 /* adjust offset in key schedule to keep |disp| < 128 */
90bcde942a3919300ffc73f98ea903b58386c395da#define fk_ref(x, y) -16*x+fofs+4*y(kptr)
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#ifdef AES_REV_DKS
90bcde942a3919300ffc73f98ea903b58386c395da#define rofs 128
90bcde942a3919300ffc73f98ea903b58386c395da#define ik_ref(x, y) -16*x+rofs+4*y(kptr)
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#else
90bcde942a3919300ffc73f98ea903b58386c395da#define rofs -128
90bcde942a3919300ffc73f98ea903b58386c395da#define ik_ref(x, y) 16*x+rofs+4*y(kptr)
90bcde942a3919300ffc73f98ea903b58386c395da#endif /* AES_REV_DKS */
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define tab_0(x) (tptr,x,8)
90bcde942a3919300ffc73f98ea903b58386c395da#define tab_1(x) 3(tptr,x,8)
90bcde942a3919300ffc73f98ea903b58386c395da#define tab_2(x) 2(tptr,x,8)
90bcde942a3919300ffc73f98ea903b58386c395da#define tab_3(x) 1(tptr,x,8)
90bcde942a3919300ffc73f98ea903b58386c395da#define tab_f(x) 1(tptr,x,8)
90bcde942a3919300ffc73f98ea903b58386c395da#define tab_i(x) 7(tptr,x,8)
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define ff_rnd(p1, p2, p3, p4, round) /* normal forward round */ \
90bcde942a3919300ffc73f98ea903b58386c395da mov fk_ref(round,0), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da mov fk_ref(round,1), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da mov fk_ref(round,2), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da mov fk_ref(round,3), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %al, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ah, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %eax; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %al, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ah, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %ebx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %cl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ch, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %ecx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %cl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ch, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %edx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da mov p1, %eax; \
90bcde942a3919300ffc73f98ea903b58386c395da mov p2, %ebx; \
90bcde942a3919300ffc73f98ea903b58386c395da mov p3, %ecx; \
90bcde942a3919300ffc73f98ea903b58386c395da mov p4, %edx
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#ifdef LAST_ROUND_TABLES
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define fl_rnd(p1, p2, p3, p4, round) /* last forward round */ \
90bcde942a3919300ffc73f98ea903b58386c395da add $2048, tptr; \
90bcde942a3919300ffc73f98ea903b58386c395da mov fk_ref(round,0), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da mov fk_ref(round,1), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da mov fk_ref(round,2), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da mov fk_ref(round,3), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %al, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ah, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %eax; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %al, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ah, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %ebx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %cl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ch, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %ecx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %cl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ch, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %edx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p1
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#else
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define fl_rnd(p1, p2, p3, p4, round) /* last forward round */ \
90bcde942a3919300ffc73f98ea903b58386c395da mov fk_ref(round,0), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da mov fk_ref(round,1), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da mov fk_ref(round,2), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da mov fk_ref(round,3), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %al, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ah, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %eax; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p1; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $8, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p4; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %al, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ah, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $16, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $24, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p3; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p2; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %ebx; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p2; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $8, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p1; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $16, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $24, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p4; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p3; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %cl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ch, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %ecx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p3; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $8, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p2; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %cl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ch, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $16, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $24, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p1; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p4; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %edx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p4; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $8, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p3; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_f(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $16, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $24, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p2; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p1
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#endif /* LAST_ROUND_TABLES */
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define ii_rnd(p1, p2, p3, p4, round) /* normal inverse round */ \
90bcde942a3919300ffc73f98ea903b58386c395da mov ik_ref(round,0), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da mov ik_ref(round,1), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da mov ik_ref(round,2), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da mov ik_ref(round,3), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %al, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ah, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %eax; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %al, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ah, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %ebx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %cl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ch, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %ecx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %cl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ch, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %edx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da mov p1, %eax; \
90bcde942a3919300ffc73f98ea903b58386c395da mov p2, %ebx; \
90bcde942a3919300ffc73f98ea903b58386c395da mov p3, %ecx; \
90bcde942a3919300ffc73f98ea903b58386c395da mov p4, %edx
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#ifdef LAST_ROUND_TABLES
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define il_rnd(p1, p2, p3, p4, round) /* last inverse round */ \
90bcde942a3919300ffc73f98ea903b58386c395da add $2048, tptr; \
90bcde942a3919300ffc73f98ea903b58386c395da mov ik_ref(round,0), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da mov ik_ref(round,1), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da mov ik_ref(round,2), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da mov ik_ref(round,3), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %al, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ah, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %eax; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %al, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ah, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %ebx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %cl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ch, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %ecx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %cl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ch, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %edx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_0(%rsi), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_1(%rdi), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_2(%rsi), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da xor tab_3(%rdi), p3
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#else
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#define il_rnd(p1, p2, p3, p4, round) /* last inverse round */ \
90bcde942a3919300ffc73f98ea903b58386c395da mov ik_ref(round,0), p1; \
90bcde942a3919300ffc73f98ea903b58386c395da mov ik_ref(round,1), p2; \
90bcde942a3919300ffc73f98ea903b58386c395da mov ik_ref(round,2), p3; \
90bcde942a3919300ffc73f98ea903b58386c395da mov ik_ref(round,3), p4; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %al, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ah, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %eax; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p1; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $8, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p2; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %al, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ah, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $16, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $24, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p3; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p4; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %ebx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p2; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $8, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p3; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %bh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $16, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $24, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p4; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p1; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %cl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ch, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %ecx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p3; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $8, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p4; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %cl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %ch, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $16, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $24, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p1; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p2; \
90bcde942a3919300ffc73f98ea903b58386c395da \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da shr $16, %edx; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p4; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $8, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p1; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dl, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx %dh, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rsi), %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da movzx tab_i(%rdi), %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $16, %esi; \
90bcde942a3919300ffc73f98ea903b58386c395da rol $24, %edi; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %esi, p2; \
90bcde942a3919300ffc73f98ea903b58386c395da xor %edi, p3
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#endif /* LAST_ROUND_TABLES */
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da/*
90bcde942a3919300ffc73f98ea903b58386c395da * OpenSolaris OS:
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
90bcde942a3919300ffc73f98ea903b58386c395da * const uint32_t pt[4], uint32_t ct[4])/
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * Original interface:
90bcde942a3919300ffc73f98ea903b58386c395da * int aes_encrypt(const unsigned char *in,
90bcde942a3919300ffc73f98ea903b58386c395da * unsigned char *out, const aes_encrypt_ctx cx[1])/
90bcde942a3919300ffc73f98ea903b58386c395da */
90bcde942a3919300ffc73f98ea903b58386c395da .align 64
90bcde942a3919300ffc73f98ea903b58386c395daenc_tab:
90bcde942a3919300ffc73f98ea903b58386c395da enc_vals(u8)
90bcde942a3919300ffc73f98ea903b58386c395da#ifdef LAST_ROUND_TABLES
90bcde942a3919300ffc73f98ea903b58386c395da / Last Round Tables:
90bcde942a3919300ffc73f98ea903b58386c395da enc_vals(w8)
90bcde942a3919300ffc73f98ea903b58386c395da#endif
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson ENTRY_NP(aes_encrypt_amd64)
90bcde942a3919300ffc73f98ea903b58386c395da#ifdef GLADMAN_INTERFACE
90bcde942a3919300ffc73f98ea903b58386c395da / Original interface
90bcde942a3919300ffc73f98ea903b58386c395da sub $[4*8], %rsp / gnu/linux/opensolaris binary interface
90bcde942a3919300ffc73f98ea903b58386c395da mov %rsi, (%rsp) / output pointer (P2)
90bcde942a3919300ffc73f98ea903b58386c395da mov %rdx, %r8 / context (P3)
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da mov %rbx, 1*8(%rsp) / P1: input pointer in rdi
90bcde942a3919300ffc73f98ea903b58386c395da mov %rbp, 2*8(%rsp) / P2: output pointer in (rsp)
90bcde942a3919300ffc73f98ea903b58386c395da mov %r12, 3*8(%rsp) / P3: context in r8
90bcde942a3919300ffc73f98ea903b58386c395da movzx 4*KS_LENGTH(kptr), %esi / Get byte key length * 16
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#else
90bcde942a3919300ffc73f98ea903b58386c395da / OpenSolaris OS interface
90bcde942a3919300ffc73f98ea903b58386c395da sub $[4*8], %rsp / Make room on stack to save registers
90bcde942a3919300ffc73f98ea903b58386c395da mov %rcx, (%rsp) / Save output pointer (P4) on stack
90bcde942a3919300ffc73f98ea903b58386c395da mov %rdi, %r8 / context (P1)
90bcde942a3919300ffc73f98ea903b58386c395da mov %rdx, %rdi / P3: save input pointer
90bcde942a3919300ffc73f98ea903b58386c395da shl $4, %esi / P2: esi byte key length * 16
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da mov %rbx, 1*8(%rsp) / Save registers
90bcde942a3919300ffc73f98ea903b58386c395da mov %rbp, 2*8(%rsp)
90bcde942a3919300ffc73f98ea903b58386c395da mov %r12, 3*8(%rsp)
90bcde942a3919300ffc73f98ea903b58386c395da / P1: context in r8
90bcde942a3919300ffc73f98ea903b58386c395da / P2: byte key length * 16 in esi
90bcde942a3919300ffc73f98ea903b58386c395da / P3: input pointer in rdi
90bcde942a3919300ffc73f98ea903b58386c395da / P4: output pointer in (rsp)
90bcde942a3919300ffc73f98ea903b58386c395da#endif /* GLADMAN_INTERFACE */
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da lea enc_tab(%rip), tptr
90bcde942a3919300ffc73f98ea903b58386c395da sub $fofs, kptr
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da / Load input block into registers
90bcde942a3919300ffc73f98ea903b58386c395da mov (%rdi), %eax
90bcde942a3919300ffc73f98ea903b58386c395da mov 1*4(%rdi), %ebx
90bcde942a3919300ffc73f98ea903b58386c395da mov 2*4(%rdi), %ecx
90bcde942a3919300ffc73f98ea903b58386c395da mov 3*4(%rdi), %edx
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da xor fofs(kptr), %eax
90bcde942a3919300ffc73f98ea903b58386c395da xor fofs+4(kptr), %ebx
90bcde942a3919300ffc73f98ea903b58386c395da xor fofs+8(kptr), %ecx
90bcde942a3919300ffc73f98ea903b58386c395da xor fofs+12(kptr), %edx
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da lea (kptr,%rsi), kptr
90bcde942a3919300ffc73f98ea903b58386c395da / Jump based on byte key length * 16:
90bcde942a3919300ffc73f98ea903b58386c395da cmp $[10*16], %esi
90bcde942a3919300ffc73f98ea903b58386c395da je 3f
90bcde942a3919300ffc73f98ea903b58386c395da cmp $[12*16], %esi
90bcde942a3919300ffc73f98ea903b58386c395da je 2f
90bcde942a3919300ffc73f98ea903b58386c395da cmp $[14*16], %esi
90bcde942a3919300ffc73f98ea903b58386c395da je 1f
90bcde942a3919300ffc73f98ea903b58386c395da mov $-1, %rax / error
90bcde942a3919300ffc73f98ea903b58386c395da jmp 4f
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da / Perform normal forward rounds
90bcde942a3919300ffc73f98ea903b58386c395da1: ff_rnd(%r9d, %r10d, %r11d, %r12d, 13)
90bcde942a3919300ffc73f98ea903b58386c395da ff_rnd(%r9d, %r10d, %r11d, %r12d, 12)
90bcde942a3919300ffc73f98ea903b58386c395da2: ff_rnd(%r9d, %r10d, %r11d, %r12d, 11)
90bcde942a3919300ffc73f98ea903b58386c395da ff_rnd(%r9d, %r10d, %r11d, %r12d, 10)
90bcde942a3919300ffc73f98ea903b58386c395da3: ff_rnd(%r9d, %r10d, %r11d, %r12d, 9)
90bcde942a3919300ffc73f98ea903b58386c395da ff_rnd(%r9d, %r10d, %r11d, %r12d, 8)
90bcde942a3919300ffc73f98ea903b58386c395da ff_rnd(%r9d, %r10d, %r11d, %r12d, 7)
90bcde942a3919300ffc73f98ea903b58386c395da ff_rnd(%r9d, %r10d, %r11d, %r12d, 6)
90bcde942a3919300ffc73f98ea903b58386c395da ff_rnd(%r9d, %r10d, %r11d, %r12d, 5)
90bcde942a3919300ffc73f98ea903b58386c395da ff_rnd(%r9d, %r10d, %r11d, %r12d, 4)
90bcde942a3919300ffc73f98ea903b58386c395da ff_rnd(%r9d, %r10d, %r11d, %r12d, 3)
90bcde942a3919300ffc73f98ea903b58386c395da ff_rnd(%r9d, %r10d, %r11d, %r12d, 2)
90bcde942a3919300ffc73f98ea903b58386c395da ff_rnd(%r9d, %r10d, %r11d, %r12d, 1)
90bcde942a3919300ffc73f98ea903b58386c395da fl_rnd(%r9d, %r10d, %r11d, %r12d, 0)
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da / Copy results
90bcde942a3919300ffc73f98ea903b58386c395da mov (%rsp), %rbx
90bcde942a3919300ffc73f98ea903b58386c395da mov %r9d, (%rbx)
90bcde942a3919300ffc73f98ea903b58386c395da mov %r10d, 4(%rbx)
90bcde942a3919300ffc73f98ea903b58386c395da mov %r11d, 8(%rbx)
90bcde942a3919300ffc73f98ea903b58386c395da mov %r12d, 12(%rbx)
90bcde942a3919300ffc73f98ea903b58386c395da xor %rax, %rax
90bcde942a3919300ffc73f98ea903b58386c395da4: / Restore registers
90bcde942a3919300ffc73f98ea903b58386c395da mov 1*8(%rsp), %rbx
90bcde942a3919300ffc73f98ea903b58386c395da mov 2*8(%rsp), %rbp
90bcde942a3919300ffc73f98ea903b58386c395da mov 3*8(%rsp), %r12
90bcde942a3919300ffc73f98ea903b58386c395da add $[4*8], %rsp
90bcde942a3919300ffc73f98ea903b58386c395da ret
90bcde942a3919300ffc73f98ea903b58386c395da
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson SET_SIZE(aes_encrypt_amd64)
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da/*
90bcde942a3919300ffc73f98ea903b58386c395da * OpenSolaris OS:
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
90bcde942a3919300ffc73f98ea903b58386c395da * const uint32_t pt[4], uint32_t ct[4])/
90bcde942a3919300ffc73f98ea903b58386c395da *
90bcde942a3919300ffc73f98ea903b58386c395da * Original interface:
90bcde942a3919300ffc73f98ea903b58386c395da * int aes_decrypt(const unsigned char *in,
90bcde942a3919300ffc73f98ea903b58386c395da * unsigned char *out, const aes_encrypt_ctx cx[1])/
90bcde942a3919300ffc73f98ea903b58386c395da */
90bcde942a3919300ffc73f98ea903b58386c395da .align 64
90bcde942a3919300ffc73f98ea903b58386c395dadec_tab:
90bcde942a3919300ffc73f98ea903b58386c395da dec_vals(v8)
90bcde942a3919300ffc73f98ea903b58386c395da#ifdef LAST_ROUND_TABLES
90bcde942a3919300ffc73f98ea903b58386c395da / Last Round Tables:
90bcde942a3919300ffc73f98ea903b58386c395da dec_vals(w8)
90bcde942a3919300ffc73f98ea903b58386c395da#endif
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson ENTRY_NP(aes_decrypt_amd64)
90bcde942a3919300ffc73f98ea903b58386c395da#ifdef GLADMAN_INTERFACE
90bcde942a3919300ffc73f98ea903b58386c395da / Original interface
90bcde942a3919300ffc73f98ea903b58386c395da sub $[4*8], %rsp / gnu/linux/opensolaris binary interface
90bcde942a3919300ffc73f98ea903b58386c395da mov %rsi, (%rsp) / output pointer (P2)
90bcde942a3919300ffc73f98ea903b58386c395da mov %rdx, %r8 / context (P3)
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da mov %rbx, 1*8(%rsp) / P1: input pointer in rdi
90bcde942a3919300ffc73f98ea903b58386c395da mov %rbp, 2*8(%rsp) / P2: output pointer in (rsp)
90bcde942a3919300ffc73f98ea903b58386c395da mov %r12, 3*8(%rsp) / P3: context in r8
90bcde942a3919300ffc73f98ea903b58386c395da movzx 4*KS_LENGTH(kptr), %esi / Get byte key length * 16
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#else
90bcde942a3919300ffc73f98ea903b58386c395da / OpenSolaris OS interface
90bcde942a3919300ffc73f98ea903b58386c395da sub $[4*8], %rsp / Make room on stack to save registers
90bcde942a3919300ffc73f98ea903b58386c395da mov %rcx, (%rsp) / Save output pointer (P4) on stack
90bcde942a3919300ffc73f98ea903b58386c395da mov %rdi, %r8 / context (P1)
90bcde942a3919300ffc73f98ea903b58386c395da mov %rdx, %rdi / P3: save input pointer
90bcde942a3919300ffc73f98ea903b58386c395da shl $4, %esi / P2: esi byte key length * 16
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da mov %rbx, 1*8(%rsp) / Save registers
90bcde942a3919300ffc73f98ea903b58386c395da mov %rbp, 2*8(%rsp)
90bcde942a3919300ffc73f98ea903b58386c395da mov %r12, 3*8(%rsp)
90bcde942a3919300ffc73f98ea903b58386c395da / P1: context in r8
90bcde942a3919300ffc73f98ea903b58386c395da / P2: byte key length * 16 in esi
90bcde942a3919300ffc73f98ea903b58386c395da / P3: input pointer in rdi
90bcde942a3919300ffc73f98ea903b58386c395da / P4: output pointer in (rsp)
90bcde942a3919300ffc73f98ea903b58386c395da#endif /* GLADMAN_INTERFACE */
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da lea dec_tab(%rip), tptr
90bcde942a3919300ffc73f98ea903b58386c395da sub $rofs, kptr
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da / Load input block into registers
90bcde942a3919300ffc73f98ea903b58386c395da mov (%rdi), %eax
90bcde942a3919300ffc73f98ea903b58386c395da mov 1*4(%rdi), %ebx
90bcde942a3919300ffc73f98ea903b58386c395da mov 2*4(%rdi), %ecx
90bcde942a3919300ffc73f98ea903b58386c395da mov 3*4(%rdi), %edx
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da#ifdef AES_REV_DKS
90bcde942a3919300ffc73f98ea903b58386c395da mov kptr, %rdi
90bcde942a3919300ffc73f98ea903b58386c395da lea (kptr,%rsi), kptr
90bcde942a3919300ffc73f98ea903b58386c395da#else
90bcde942a3919300ffc73f98ea903b58386c395da lea (kptr,%rsi), %rdi
90bcde942a3919300ffc73f98ea903b58386c395da#endif
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da xor rofs(%rdi), %eax
90bcde942a3919300ffc73f98ea903b58386c395da xor rofs+4(%rdi), %ebx
90bcde942a3919300ffc73f98ea903b58386c395da xor rofs+8(%rdi), %ecx
90bcde942a3919300ffc73f98ea903b58386c395da xor rofs+12(%rdi), %edx
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da / Jump based on byte key length * 16:
90bcde942a3919300ffc73f98ea903b58386c395da cmp $[10*16], %esi
90bcde942a3919300ffc73f98ea903b58386c395da je 3f
90bcde942a3919300ffc73f98ea903b58386c395da cmp $[12*16], %esi
90bcde942a3919300ffc73f98ea903b58386c395da je 2f
90bcde942a3919300ffc73f98ea903b58386c395da cmp $[14*16], %esi
90bcde942a3919300ffc73f98ea903b58386c395da je 1f
90bcde942a3919300ffc73f98ea903b58386c395da mov $-1, %rax / error
90bcde942a3919300ffc73f98ea903b58386c395da jmp 4f
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da / Perform normal inverse rounds
90bcde942a3919300ffc73f98ea903b58386c395da1: ii_rnd(%r9d, %r10d, %r11d, %r12d, 13)
90bcde942a3919300ffc73f98ea903b58386c395da ii_rnd(%r9d, %r10d, %r11d, %r12d, 12)
90bcde942a3919300ffc73f98ea903b58386c395da2: ii_rnd(%r9d, %r10d, %r11d, %r12d, 11)
90bcde942a3919300ffc73f98ea903b58386c395da ii_rnd(%r9d, %r10d, %r11d, %r12d, 10)
90bcde942a3919300ffc73f98ea903b58386c395da3: ii_rnd(%r9d, %r10d, %r11d, %r12d, 9)
90bcde942a3919300ffc73f98ea903b58386c395da ii_rnd(%r9d, %r10d, %r11d, %r12d, 8)
90bcde942a3919300ffc73f98ea903b58386c395da ii_rnd(%r9d, %r10d, %r11d, %r12d, 7)
90bcde942a3919300ffc73f98ea903b58386c395da ii_rnd(%r9d, %r10d, %r11d, %r12d, 6)
90bcde942a3919300ffc73f98ea903b58386c395da ii_rnd(%r9d, %r10d, %r11d, %r12d, 5)
90bcde942a3919300ffc73f98ea903b58386c395da ii_rnd(%r9d, %r10d, %r11d, %r12d, 4)
90bcde942a3919300ffc73f98ea903b58386c395da ii_rnd(%r9d, %r10d, %r11d, %r12d, 3)
90bcde942a3919300ffc73f98ea903b58386c395da ii_rnd(%r9d, %r10d, %r11d, %r12d, 2)
90bcde942a3919300ffc73f98ea903b58386c395da ii_rnd(%r9d, %r10d, %r11d, %r12d, 1)
90bcde942a3919300ffc73f98ea903b58386c395da il_rnd(%r9d, %r10d, %r11d, %r12d, 0)
90bcde942a3919300ffc73f98ea903b58386c395da
90bcde942a3919300ffc73f98ea903b58386c395da / Copy results
90bcde942a3919300ffc73f98ea903b58386c395da mov (%rsp), %rbx
90bcde942a3919300ffc73f98ea903b58386c395da mov %r9d, (%rbx)
90bcde942a3919300ffc73f98ea903b58386c395da mov %r10d, 4(%rbx)
90bcde942a3919300ffc73f98ea903b58386c395da mov %r11d, 8(%rbx)
90bcde942a3919300ffc73f98ea903b58386c395da mov %r12d, 12(%rbx)
90bcde942a3919300ffc73f98ea903b58386c395da xor %rax, %rax
90bcde942a3919300ffc73f98ea903b58386c395da4: / Restore registers
90bcde942a3919300ffc73f98ea903b58386c395da mov 1*8(%rsp), %rbx
90bcde942a3919300ffc73f98ea903b58386c395da mov 2*8(%rsp), %rbp
90bcde942a3919300ffc73f98ea903b58386c395da mov 3*8(%rsp), %r12
90bcde942a3919300ffc73f98ea903b58386c395da add $[4*8], %rsp
90bcde942a3919300ffc73f98ea903b58386c395da ret
90bcde942a3919300ffc73f98ea903b58386c395da
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson SET_SIZE(aes_decrypt_amd64)
54034eb2d6e7d811adf4a1fe5105eac6fea6b0b5Dan OpenSolaris Anderson#endif /* lint || __lint */