GIT 8c3cceacf333858dd9d2c04650d6ae3e87d2b824 git+ssh://master.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git

commit 8c3cceacf333858dd9d2c04650d6ae3e87d2b824
Author: Joachim Fritschi <jfritschi@freenet.de>
Date:   Tue Jun 20 21:12:02 2006 +1000

    [CRYPTO] twofish: x86-64 assembly version
    
    The patch passed the trycpt tests and automated filesystem tests.
    This rewrite resulted in some nice perfomance increase over my last patch.
    
    Short summary of the tcrypt benchmarks:
    
    Twofish Assembler vs. Twofish C (256bit 8kb block CBC)
    encrypt: -27% Cycles
    decrypt: -23% Cycles
    
    Twofish Assembler vs. AES Assembler (128bit 8kb block CBC)
    encrypt: +18%  Cycles
    decrypt: +15% Cycles
    
    Twofish Assembler vs. AES Assembler (256bit 8kb block CBC)
    encrypt: -9% Cycles
    decrypt: -8% Cycles
    
    Full Output:
    http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-c-x86_64.txt
    http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-asm-x86_64.txt
    http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-aes-asm-x86_64.txt
    
    
    Here is another bonnie++ benchmark with encrypted filesystems. Most runs maxed
    out the hd. It should give some idea what the module can do for encrypted filesystem
    performance even though you can't see the full numbers.
    
    http://homepages.tu-darmstadt.de/~fritschi/twofish/output_20060610_130806_x86_64.html
    
    Signed-off-by: Joachim Fritschi <jfritschi@freenet.de>
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit 8071970fea9656b0ed4fed7df1283a6b1c77054e
Author: Joachim Fritschi <jfritschi@freenet.de>
Date:   Tue Jun 20 20:59:16 2006 +1000

    [CRYPTO] twofish: i586 assembly version
    
    The patch passed the trycpt tests and automated filesystem tests.
    This rewrite resulted in some nice perfomance increase over my last patch.
    
    Short summary of the tcrypt benchmarks:
    
    Twofish Assembler vs. Twofish C (256bit 8kb block CBC)
    encrypt: -33% Cycles
    decrypt: -45% Cycles
    
    Twofish Assembler vs. AES Assembler (128bit 8kb block CBC)
    encrypt: +3%  Cycles
    decrypt: -22% Cycles
    
    Twofish Assembler vs. AES Assembler (256bit 8kb block CBC)
    encrypt: -20% Cycles
    decrypt: -36% Cycles
    
    Full Output:
    http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-asm-i586.txt
    http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-c-i586.txt
    http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-aes-asm-i586.txt
    
    
    Here is another bonnie++ benchmark with encrypted filesystems. All runs with
    the twofish assembler modules max out the drivespeed. It should give some
    idea what the module can do for encrypted filesystem performance even though
    you can't see the full numbers.
    
    http://homepages.tu-darmstadt.de/~fritschi/twofish/output_20060611_205432_x86.html
    
    Signed-off-by: Joachim Fritschi <jfritschi@freenet.de>
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit 2be922eb8f70632e394382ad80e69931571d91da
Author: Joachim Fritschi <jfritschi@freenet.de>
Date:   Tue Jun 20 20:39:29 2006 +1000

    [CRYPTO] twofish: Fix the priority
    
    This patch adds a proper driver name and priority to the generic c
    implemtation to allow coexistance of c and assembler modules.
    
    Signed-off-by: Joachim Fritschi <jfritschi@freenet.de>
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit b63699901ba633bc6d82ef990f32dbceec173ef4
Author: Joachim Fritschi <jfritschi@freenet.de>
Date:   Tue Jun 20 20:37:23 2006 +1000

    [CRYPTO] twofish: Split out common c code
    
    This patch splits up the twofish crypto routine into a common part ( key
    setup  ) which will be uses by all twofish crypto modules ( generic-c , i586
    assembler and x86_64 assembler ) and generic-c part. It also creates a new
    header file which will be used by all 3 modules.
    
    This eliminates all code duplication.
    
    Correctness was verified with the tcrypt module and automated test scripts.
    
    Signed-off-by: Joachim Fritschi <jfritschi@freenet.de>
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit ef06b4c514a5cc3c504be2171c07939f5b41d2fb
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Sat Jun 10 18:06:34 2006 +1000

    [CRYPTO] tcrypt: Forbid tcrypt from being built-in
    
    It makes no sense to build tcrypt into the kernel.  In fact, now that
    the driver init function's return status is being checked, it is in
    fact harmful to do so.
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit 01d4043a9aa9c1cad8ebb238f763ddb921c91e9a
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Fri Jun 2 08:42:25 2006 +1000

    [CRYPTO] aes: Add wrappers for assembly routines
    
    The wrapper routines are required when asmlinkage differs from the usual
    calling convention.  So we need to have them.  However, by rearranging
    the parameters, they will get optimised away to a single jump for most
    people.
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit 89888ec1c46678330cccf759b916fde858d8a37e
Author: Michal Ludvig <michal@logix.cz>
Date:   Tue May 30 22:04:19 2006 +1000

    [CRYPTO] tcrypt: Speed benchmark support for digest algorithms
    
    This patch adds speed tests (benchmarks) for digest algorithms.
    Tests are run with different buffer sizes (16 bytes, ... 8 kBytes)
    and with each buffer multiple tests are run with different update()
    sizes (e.g. hash 64 bytes buffer in four 16 byte updates).
    There is no correctness checking of the result and all tests and
    algorithms use the same input buffer.
    
    Signed-off-by: Michal Ludvig <michal@logix.cz>
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit 0c6e7d9ae75edb95695730422eadfa9e37bd9c4a
Author: Michal Ludvig <michal@logix.cz>
Date:   Tue May 30 14:49:38 2006 +1000

    [CRYPTO] tcrypt: Return -EAGAIN from module_init()
    
    Intentionaly return -EAGAIN from module_init() to ensure
    it doesn't stay loaded in the kernel.  The module does all
    its work from init() and doesn't offer any runtime
    functionality => we don't need it in the memory, do we?
    
    Signed-off-by: Michal Ludvig <michal@logix.cz>
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit 779226a644c26d4fdd8bc74d15bbf1741e75a038
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Sun May 21 11:57:20 2006 +1000

    [CRYPTO] api: Allow replacement when registering new algorithms
    
    We already allow asynchronous removal of existing algorithm modules.  By
    allowing the replacement of existing algorithms, we can replace algorithms
    without having to wait for for all existing users to complete.
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit ff46f3b7a81c800612d6183bbcb74f9cf0eb720c
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Sun May 21 08:45:26 2006 +1000

    [CRYPTO] api: Removed const from cra_name/cra_driver_name
    
    We do need to change these names now and even more so in future with
    instantiated algorithms.  So let's stop lying to the compiler and get
    rid of the const modifiers.
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit 1e27a31e7dddb2c0db036d576da410d99db5b2bc
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Wed May 24 13:02:26 2006 +1000

    [CRYPTO] api: Added cra_init/cra_exit
    
    This patch adds the hooks cra_init/cra_exit which are called during a tfm's
    construction and destruction respectively.  This will be used by the instances
    to allocate child tfm's.
    
    For now this lets us get rid of the coa_init/coa_exit functions which are
    used for exactly that purpose (unlike the dia_init function which is called
    for each transaction).
    
    In fact the coa_exit path is currently buggy as it may get called twice
    when an error is encountered during initialisation.
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit d7c651266abbe20502753612ba53f55ee171a2ac
Author: Michal Ludvig <michal@logix.cz>
Date:   Mon May 22 08:28:06 2006 +1000

    [CRYPTO] api: Fixed incorrect passing of context instead of tfm
    
    Fix a few omissions in passing TFM instead of CTX to algorithms.
    
    Signed-off-by: Michal Ludvig <michal@logix.cz>
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit f8ccaa3bd4b4e0d536b9f9823aeb1c011bf2a007
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Tue May 16 22:20:34 2006 +1000

    [CRYPTO] padlock: Rearrange context structure to reduce code size
    
    i386 assembly has more compact instructions for accessing 7-bit offsets.
    So by moving the large members to the end of the structure we can save
    quite a bit of code size.  This patch shaves about 10% or 300 bytes off
    the padlock-aes file.
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit ac28bbea337375d709e79bf350cdc5140cafc6ba
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Tue May 16 22:09:29 2006 +1000

    [CRYPTO] all: Pass tfm instead of ctx to algorithms
    
    Up until now algorithms have been happy to get a context pointer since
    they know everything that's in the tfm already (e.g., alignment, block
    size).
    
    However, once we have parameterised algorithms, such information will
    be specific to each tfm.  So the algorithm API needs to be changed to
    pass the tfm structure instead of the context pointer.
    
    This patch is basically a text substitution.  The only tricky bit is
    the assembly routines that need to get the context pointer offset
    through asm-offsets.h.
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit 421fd48609ea1dee411505fc399dbc85f9957a4e
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Tue May 16 22:06:54 2006 +1000

    [CRYPTO] digest: Remove unnecessary zeroing during init
    
    Various digest algorithms operate one block at a time and therefore
    keep a temporary buffer of partial blocks.  This buffer does not need
    to be initialised since there is a counter which indicates what is and
    isn't valid in it.
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit aabc480723e6f947ce731d19847cb877d0b8b615
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Sun May 14 11:00:39 2006 +1000

    [CRYPTO] aes-i586: Get rid of useless function wrappers
    
    The wrappers aes_encrypt/aes_decrypt simply reverse the order of the
    function arguments.  It's just as easy to get the actual assembly code
    to read them in the opposite order.
    
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit 158dc981c340c49e3d702d38d3e8688ba285c9c4
Author: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date:   Mon Apr 10 08:42:35 2006 +1000

    [CRYPTO] digest: Add alignment handling
    
    Some hash modules load/store data words directly.  The digest layer
    should pass properly aligned buffer to update()/final() method.  This
    patch also add cra_alignmask to some hash modules.
    
    Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

commit c1bfff18ce2118ae06ff5dc963c6c734d20629fb
Author: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date:   Thu Apr 6 03:26:01 2006 +1000

    [CRYPTO] khazad: Use 32-bit reads on key
    
    On 64-bit platform, reading 64-bit keys (which is supposed to be
    32-bit aligned) at a time will result in unaligned access.
    
    Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
    Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 arch/i386/crypto/Makefile               |    3 
 arch/i386/crypto/aes-i586-asm.S         |   31 
 arch/i386/crypto/aes.c                  |   20 
 arch/i386/crypto/twofish-i586-asm.S     |  335 +++++++++
 arch/i386/crypto/twofish.c              |   97 ++
 arch/i386/kernel/asm-offsets.c          |    3 
 arch/s390/crypto/aes_s390.c             |   14 
 arch/s390/crypto/des_s390.c             |   42 -
 arch/s390/crypto/sha1_s390.c            |   34 -
 arch/s390/crypto/sha256_s390.c          |   14 
 arch/x86_64/crypto/Makefile             |    3 
 arch/x86_64/crypto/aes-x86_64-asm.S     |   22 
 arch/x86_64/crypto/aes.c                |   20 
 arch/x86_64/crypto/twofish-x86_64-asm.S |  324 +++++++++
 arch/x86_64/crypto/twofish.c            |   97 ++
 arch/x86_64/kernel/asm-offsets.c        |    3 
 crypto/Kconfig                          |   40 +
 crypto/Makefile                         |    1 
 crypto/aes.c                            |   14 
 crypto/anubis.c                         |   13 
 crypto/api.c                            |   17 
 crypto/arc4.c                           |    9 
 crypto/blowfish.c                       |   19 
 crypto/cast5.c                          |   14 
 crypto/cast6.c                          |   15 
 crypto/cipher.c                         |   14 
 crypto/compress.c                       |   15 
 crypto/crc32c.c                         |   19 
 crypto/crypto_null.c                    |   17 
 crypto/deflate.c                        |   23 
 crypto/des.c                            |   27 
 crypto/digest.c                         |   51 -
 crypto/khazad.c                         |   21 
 crypto/md4.c                            |   12 
 crypto/md5.c                            |   12 
 crypto/michael_mic.c                    |   20 
 crypto/serpent.c                        |   27 
 crypto/sha1.c                           |   18 
 crypto/sha256.c                         |   19 
 crypto/sha512.c                         |   29 
 crypto/tcrypt.c                         |  179 +++++
 crypto/tcrypt.h                         |   36 +
 crypto/tea.c                            |   55 -
 crypto/tgr192.c                         |   33 
 crypto/twofish.c                        |  708 --------------------
 crypto/twofish_common.c                 |  744 ++++++++++++++++++++++
 crypto/wp512.c                          |   24 
 drivers/crypto/padlock-aes.c            |   39 -
 include/crypto/twofish.h                |   23 
 include/linux/crypto.h                  |   34 -
 50 files changed, 2314 insertions(+), 1089 deletions(-)

diff -puN arch/i386/crypto/aes.c~git-cryptodev arch/i386/crypto/aes.c
--- a/arch/i386/crypto/aes.c~git-cryptodev
+++ a/arch/i386/crypto/aes.c
@@ -45,8 +45,8 @@
 #include <linux/crypto.h>
 #include <linux/linkage.h>
 
-asmlinkage void aes_enc_blk(const u8 *src, u8 *dst, void *ctx);
-asmlinkage void aes_dec_blk(const u8 *src, u8 *dst, void *ctx);
+asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 
 #define AES_MIN_KEY_SIZE	16
 #define AES_MAX_KEY_SIZE	32
@@ -378,12 +378,12 @@ static void gen_tabs(void)
 	k[8*(i)+11] = ss[3];						\
 }
 
-static int
-aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags)
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len, u32 *flags)
 {
 	int i;
 	u32 ss[8];
-	struct aes_ctx *ctx = ctx_arg;
+	struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
 
 	/* encryption schedule */
@@ -464,16 +464,16 @@ aes_set_key(void *ctx_arg, const u8 *in_
 	return 0;
 }
 
-static inline void aes_encrypt(void *ctx, u8 *dst, const u8 *src)
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	aes_enc_blk(src, dst, ctx);
+	aes_enc_blk(tfm, dst, src);
 }
-static inline void aes_decrypt(void *ctx, u8 *dst, const u8 *src)
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	aes_dec_blk(src, dst, ctx);
+	aes_dec_blk(tfm, dst, src);
 }
 
-
 static struct crypto_alg aes_alg = {
 	.cra_name		=	"aes",
 	.cra_driver_name	=	"aes-i586",
diff -puN arch/i386/crypto/aes-i586-asm.S~git-cryptodev arch/i386/crypto/aes-i586-asm.S
--- a/arch/i386/crypto/aes-i586-asm.S~git-cryptodev
+++ a/arch/i386/crypto/aes-i586-asm.S
@@ -36,22 +36,19 @@
 .file "aes-i586-asm.S"
 .text
 
-// aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
-// aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
-	
-#define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
-
-// offsets to parameters with one register pushed onto stack
+#include <asm/asm-offsets.h>
 
-#define in_blk    8  // input byte array address parameter
-#define out_blk  12  // output byte array address parameter
-#define ctx      16  // AES context structure
-
-// offsets in context structure
+#define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
 
-#define ekey     0   // encryption key schedule base address
-#define nrnd   256   // number of rounds
-#define dkey   260   // decryption key schedule base address
+/* offsets to parameters with one register pushed onto stack */
+#define tfm 8
+#define out_blk 12
+#define in_blk 16
+
+/* offsets in crypto_tfm structure */
+#define ekey (crypto_tfm_ctx_offset + 0)
+#define nrnd (crypto_tfm_ctx_offset + 256)
+#define dkey (crypto_tfm_ctx_offset + 260)
 
 // register mapping for encrypt and decrypt subroutines
 
@@ -220,6 +217,7 @@
 	do_col (table, r5,r0,r1,r4, r2,r3);		/* idx=r5 */
 
 // AES (Rijndael) Encryption Subroutine
+/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
 
 .global  aes_enc_blk
 
@@ -230,7 +228,7 @@
 
 aes_enc_blk:
 	push    %ebp
-	mov     ctx(%esp),%ebp      // pointer to context
+	mov     tfm(%esp),%ebp
 
 // CAUTION: the order and the values used in these assigns 
 // rely on the register mappings
@@ -295,6 +293,7 @@ aes_enc_blk:
 	ret
 
 // AES (Rijndael) Decryption Subroutine
+/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
 
 .global  aes_dec_blk
 
@@ -305,7 +304,7 @@ aes_enc_blk:
 
 aes_dec_blk:
 	push    %ebp
-	mov     ctx(%esp),%ebp       // pointer to context
+	mov     tfm(%esp),%ebp
 
 // CAUTION: the order and the values used in these assigns 
 // rely on the register mappings
diff -puN arch/i386/crypto/Makefile~git-cryptodev arch/i386/crypto/Makefile
--- a/arch/i386/crypto/Makefile~git-cryptodev
+++ a/arch/i386/crypto/Makefile
@@ -5,5 +5,8 @@
 # 
 
 obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
+obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 
 aes-i586-y := aes-i586-asm.o aes.o
+twofish-i586-y := twofish-i586-asm.o twofish.o
+
diff -puN /dev/null arch/i386/crypto/twofish.c
--- /dev/null
+++ a/arch/i386/crypto/twofish.c
@@ -0,0 +1,97 @@
+/*
+ *  Glue Code for optimized 586 assembler version of TWOFISH
+ *
+ * Originally Twofish for GPG
+ * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
+ * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
+ *
+ * The original author has disclaimed all copyright interest in this
+ * code and thus put it in the public domain. The subsequent authors
+ * have put this under the GNU General Public License.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ */
+
+#include <crypto/twofish.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+
+asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+
+static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_enc_blk(tfm, dst, src);
+}
+
+static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_dec_blk(tfm, dst, src);
+}
+
+static struct crypto_alg alg = {
+	.cra_name		=	"twofish",
+	.cra_driver_name	=	"twofish-i586",
+	.cra_priority		=	200,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	TF_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct twofish_ctx),
+	.cra_alignmask		=	3,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
+			.cia_max_keysize	=	TF_MAX_KEY_SIZE,
+			.cia_setkey		=	twofish_setkey,
+			.cia_encrypt		=	twofish_encrypt,
+			.cia_decrypt		=	twofish_decrypt
+		}
+	}
+};
+
+static int __init init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized");
+MODULE_ALIAS("twofish");
diff -puN /dev/null arch/i386/crypto/twofish-i586-asm.S
--- /dev/null
+++ a/arch/i386/crypto/twofish-i586-asm.S
@@ -0,0 +1,335 @@
+/***************************************************************************
+*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
+*                                                                         *
+*   This program is free software; you can redistribute it and/or modify  *
+*   it under the terms of the GNU General Public License as published by  *
+*   the Free Software Foundation; either version 2 of the License, or     *
+*   (at your option) any later version.                                   *
+*                                                                         *
+*   This program is distributed in the hope that it will be useful,       *
+*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+*   GNU General Public License for more details.                          *
+*                                                                         *
+*   You should have received a copy of the GNU General Public License     *
+*   along with this program; if not, write to the                         *
+*   Free Software Foundation, Inc.,                                       *
+*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
+***************************************************************************/
+
+.file "twofish-i586-asm.S"
+.text
+
+#include <asm/asm-offsets.h>
+
+/* return adress at 0 */
+
+#define in_blk    12  /* input byte array address parameter*/
+#define out_blk   8  /* output byte array address parameter*/
+#define tfm       4  /* Twofish context structure */
+
+#define a_offset	0
+#define b_offset	4
+#define c_offset	8
+#define d_offset	12
+
+/* Structure of the crypto context struct*/
+
+#define s0	0	/* S0 Array 256 Words each */
+#define s1	1024	/* S1 Array */
+#define s2	2048	/* S2 Array */
+#define s3	3072	/* S3 Array */
+#define w	4096	/* 8 whitening keys (word) */
+#define k	4128	/* key 1-32 ( word ) */
+
+/* define a few register aliases to allow macro substitution */
+
+#define R0D    %eax
+#define R0B    %al
+#define R0H    %ah
+
+#define R1D    %ebx
+#define R1B    %bl
+#define R1H    %bh
+
+#define R2D    %ecx
+#define R2B    %cl
+#define R2H    %ch
+
+#define R3D    %edx
+#define R3B    %dl
+#define R3H    %dh
+
+
+/* performs input whitening */
+#define input_whitening(src,context,offset)\
+	xor	w+offset(context),	src;
+
+/* performs input whitening */
+#define output_whitening(src,context,offset)\
+	xor	w+16+offset(context),	src;
+
+/*
+ * a input register containing a (rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ */
+#define encrypt_round(a,b,c,d,round)\
+	push	d ## D;\
+	movzx	b ## B,		%edi;\
+	mov	s1(%ebp,%edi,4),d ## D;\
+	movzx	a ## B,		%edi;\
+	mov	s2(%ebp,%edi,4),%esi;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	s2(%ebp,%edi,4),d ## D;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%ebp,%edi,4),%esi;\
+	movzx	b ## B,		%edi;\
+	xor	s3(%ebp,%edi,4),d ## D;\
+	movzx	a ## B,		%edi;\
+	xor	(%ebp,%edi,4),	%esi;\
+	movzx	b ## H,		%edi;\
+	ror	$15,		b ## D;\
+	xor	(%ebp,%edi,4),	d ## D;\
+	movzx	a ## H,		%edi;\
+	xor	s1(%ebp,%edi,4),%esi;\
+	pop	%edi;\
+	add	d ## D,		%esi;\
+	add	%esi,		d ## D;\
+	add	k+round(%ebp),	%esi;\
+	xor	%esi,		c ## D;\
+	rol	$15,		c ## D;\
+	add	k+4+round(%ebp),d ## D;\
+	xor	%edi,		d ## D;
+
+/*
+ * a input register containing a (rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ * last round has different rotations for the output preparation
+ */
+#define encrypt_last_round(a,b,c,d,round)\
+	push	d ## D;\
+	movzx	b ## B,		%edi;\
+	mov	s1(%ebp,%edi,4),d ## D;\
+	movzx	a ## B,		%edi;\
+	mov	s2(%ebp,%edi,4),%esi;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	s2(%ebp,%edi,4),d ## D;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%ebp,%edi,4),%esi;\
+	movzx	b ## B,		%edi;\
+	xor	s3(%ebp,%edi,4),d ## D;\
+	movzx	a ## B,		%edi;\
+	xor	(%ebp,%edi,4),	%esi;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%ebp,%edi,4),	d ## D;\
+	movzx	a ## H,		%edi;\
+	xor	s1(%ebp,%edi,4),%esi;\
+	pop	%edi;\
+	add	d ## D,		%esi;\
+	add	%esi,		d ## D;\
+	add	k+round(%ebp),	%esi;\
+	xor	%esi,		c ## D;\
+	ror	$1,		c ## D;\
+	add	k+4+round(%ebp),d ## D;\
+	xor	%edi,		d ## D;
+
+/*
+ * a input register containing a
+ * b input register containing b (rotated 16)
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ */
+#define decrypt_round(a,b,c,d,round)\
+	push	c ## D;\
+	movzx	a ## B,		%edi;\
+	mov	(%ebp,%edi,4),	c ## D;\
+	movzx	b ## B,		%edi;\
+	mov	s3(%ebp,%edi,4),%esi;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s1(%ebp,%edi,4),c ## D;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%ebp,%edi,4),	%esi;\
+	movzx	a ## B,		%edi;\
+	xor	s2(%ebp,%edi,4),c ## D;\
+	movzx	b ## B,		%edi;\
+	xor	s1(%ebp,%edi,4),%esi;\
+	movzx	a ## H,		%edi;\
+	ror	$15,		a ## D;\
+	xor	s3(%ebp,%edi,4),c ## D;\
+	movzx	b ## H,		%edi;\
+	xor	s2(%ebp,%edi,4),%esi;\
+	pop	%edi;\
+	add	%esi,		c ## D;\
+	add	c ## D,		%esi;\
+	add	k+round(%ebp),	c ## D;\
+	xor	%edi,		c ## D;\
+	add	k+4+round(%ebp),%esi;\
+	xor	%esi,		d ## D;\
+	rol	$15,		d ## D;
+
+/*
+ * a input register containing a
+ * b input register containing b (rotated 16)
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ * last round has different rotations for the output preparation
+ */
+#define decrypt_last_round(a,b,c,d,round)\
+	push	c ## D;\
+	movzx	a ## B,		%edi;\
+	mov	(%ebp,%edi,4),	c ## D;\
+	movzx	b ## B,		%edi;\
+	mov	s3(%ebp,%edi,4),%esi;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s1(%ebp,%edi,4),c ## D;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%ebp,%edi,4),	%esi;\
+	movzx	a ## B,		%edi;\
+	xor	s2(%ebp,%edi,4),c ## D;\
+	movzx	b ## B,		%edi;\
+	xor	s1(%ebp,%edi,4),%esi;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%ebp,%edi,4),c ## D;\
+	movzx	b ## H,		%edi;\
+	xor	s2(%ebp,%edi,4),%esi;\
+	pop	%edi;\
+	add	%esi,		c ## D;\
+	add	c ## D,		%esi;\
+	add	k+round(%ebp),	c ## D;\
+	xor	%edi,		c ## D;\
+	add	k+4+round(%ebp),%esi;\
+	xor	%esi,		d ## D;\
+	ror	$1,		d ## D;
+
+.align 4
+.global twofish_enc_blk
+.global twofish_dec_blk
+
+twofish_enc_blk:
+	push	%ebp			/* save registers according to calling convention*/
+	push    %ebx
+	push    %esi
+	push    %edi
+
+	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
+	add	$crypto_tfm_ctx_offset, %ebp	/* ctx adress */
+	mov     in_blk+16(%esp),%edi	/* input adress in edi */
+
+	mov	(%edi),		%eax
+	mov	b_offset(%edi),	%ebx
+	mov	c_offset(%edi),	%ecx
+	mov	d_offset(%edi),	%edx
+	input_whitening(%eax,%ebp,a_offset)
+	ror	$16,	%eax
+	input_whitening(%ebx,%ebp,b_offset)
+	input_whitening(%ecx,%ebp,c_offset)
+	input_whitening(%edx,%ebp,d_offset)
+	rol	$1,	%edx
+
+	encrypt_round(R0,R1,R2,R3,0);
+	encrypt_round(R2,R3,R0,R1,8);
+	encrypt_round(R0,R1,R2,R3,2*8);
+	encrypt_round(R2,R3,R0,R1,3*8);
+	encrypt_round(R0,R1,R2,R3,4*8);
+	encrypt_round(R2,R3,R0,R1,5*8);
+	encrypt_round(R0,R1,R2,R3,6*8);
+	encrypt_round(R2,R3,R0,R1,7*8);
+	encrypt_round(R0,R1,R2,R3,8*8);
+	encrypt_round(R2,R3,R0,R1,9*8);
+	encrypt_round(R0,R1,R2,R3,10*8);
+	encrypt_round(R2,R3,R0,R1,11*8);
+	encrypt_round(R0,R1,R2,R3,12*8);
+	encrypt_round(R2,R3,R0,R1,13*8);
+	encrypt_round(R0,R1,R2,R3,14*8);
+	encrypt_last_round(R2,R3,R0,R1,15*8);
+
+	output_whitening(%eax,%ebp,c_offset)
+	output_whitening(%ebx,%ebp,d_offset)
+	output_whitening(%ecx,%ebp,a_offset)
+	output_whitening(%edx,%ebp,b_offset)
+	mov	out_blk+16(%esp),%edi;
+	mov	%eax,		c_offset(%edi)
+	mov	%ebx,		d_offset(%edi)
+	mov	%ecx,		(%edi)
+	mov	%edx,		b_offset(%edi)
+
+	pop	%edi
+	pop	%esi
+	pop	%ebx
+	pop	%ebp
+	mov	$1,	%eax
+	ret
+
+twofish_dec_blk:
+	push	%ebp			/* save registers according to calling convention*/
+	push    %ebx
+	push    %esi
+	push    %edi
+
+
+	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
+	add	$crypto_tfm_ctx_offset, %ebp	/* ctx adress */
+	mov     in_blk+16(%esp),%edi	/* input adress in edi */
+
+	mov	(%edi),		%eax
+	mov	b_offset(%edi),	%ebx
+	mov	c_offset(%edi),	%ecx
+	mov	d_offset(%edi),	%edx
+	output_whitening(%eax,%ebp,a_offset)
+	output_whitening(%ebx,%ebp,b_offset)
+	ror	$16,	%ebx
+	output_whitening(%ecx,%ebp,c_offset)
+	output_whitening(%edx,%ebp,d_offset)
+	rol	$1,	%ecx
+
+	decrypt_round(R0,R1,R2,R3,15*8);
+	decrypt_round(R2,R3,R0,R1,14*8);
+	decrypt_round(R0,R1,R2,R3,13*8);
+	decrypt_round(R2,R3,R0,R1,12*8);
+	decrypt_round(R0,R1,R2,R3,11*8);
+	decrypt_round(R2,R3,R0,R1,10*8);
+	decrypt_round(R0,R1,R2,R3,9*8);
+	decrypt_round(R2,R3,R0,R1,8*8);
+	decrypt_round(R0,R1,R2,R3,7*8);
+	decrypt_round(R2,R3,R0,R1,6*8);
+	decrypt_round(R0,R1,R2,R3,5*8);
+	decrypt_round(R2,R3,R0,R1,4*8);
+	decrypt_round(R0,R1,R2,R3,3*8);
+	decrypt_round(R2,R3,R0,R1,2*8);
+	decrypt_round(R0,R1,R2,R3,1*8);
+	decrypt_last_round(R2,R3,R0,R1,0);
+
+	input_whitening(%eax,%ebp,c_offset)
+	input_whitening(%ebx,%ebp,d_offset)
+	input_whitening(%ecx,%ebp,a_offset)
+	input_whitening(%edx,%ebp,b_offset)
+	mov	out_blk+16(%esp),%edi;
+	mov	%eax,		c_offset(%edi)
+	mov	%ebx,		d_offset(%edi)
+	mov	%ecx,		(%edi)
+	mov	%edx,		b_offset(%edi)
+
+	pop	%edi
+	pop	%esi
+	pop	%ebx
+	pop	%ebp
+	mov	$1,	%eax
+	ret
diff -puN arch/i386/kernel/asm-offsets.c~git-cryptodev arch/i386/kernel/asm-offsets.c
--- a/arch/i386/kernel/asm-offsets.c~git-cryptodev
+++ a/arch/i386/kernel/asm-offsets.c
@@ -4,6 +4,7 @@
  * to extract and format the required data.
  */
 
+#include <linux/crypto.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/personality.h>
@@ -69,4 +70,6 @@ void foo(void)
 
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
 	DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL));
+
+	OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
 }
diff -puN arch/s390/crypto/aes_s390.c~git-cryptodev arch/s390/crypto/aes_s390.c
--- a/arch/s390/crypto/aes_s390.c~git-cryptodev
+++ a/arch/s390/crypto/aes_s390.c
@@ -37,10 +37,10 @@ struct s390_aes_ctx {
 	int key_len;
 };
 
-static int aes_set_key(void *ctx, const u8 *in_key, unsigned int key_len,
-		       u32 *flags)
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len, u32 *flags)
 {
-	struct s390_aes_ctx *sctx = ctx;
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
 	switch (key_len) {
 	case 16:
@@ -70,9 +70,9 @@ fail:
 	return -EINVAL;
 }
 
-static void aes_encrypt(void *ctx, u8 *out, const u8 *in)
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	const struct s390_aes_ctx *sctx = ctx;
+	const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
 	switch (sctx->key_len) {
 	case 16:
@@ -90,9 +90,9 @@ static void aes_encrypt(void *ctx, u8 *o
 	}
 }
 
-static void aes_decrypt(void *ctx, u8 *out, const u8 *in)
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	const struct s390_aes_ctx *sctx = ctx;
+	const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
 	switch (sctx->key_len) {
 	case 16:
diff -puN arch/s390/crypto/des_s390.c~git-cryptodev arch/s390/crypto/des_s390.c
--- a/arch/s390/crypto/des_s390.c~git-cryptodev
+++ a/arch/s390/crypto/des_s390.c
@@ -44,10 +44,10 @@ struct crypt_s390_des3_192_ctx {
 	u8 key[DES3_192_KEY_SIZE];
 };
 
-static int des_setkey(void *ctx, const u8 *key, unsigned int keylen,
-		      u32 *flags)
+static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
+		      unsigned int keylen, u32 *flags)
 {
-	struct crypt_s390_des_ctx *dctx = ctx;
+	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
 	int ret;
 
 	/* test if key is valid (not a weak key) */
@@ -57,16 +57,16 @@ static int des_setkey(void *ctx, const u
 	return ret;
 }
 
-static void des_encrypt(void *ctx, u8 *out, const u8 *in)
+static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct crypt_s390_des_ctx *dctx = ctx;
+	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
 
 	crypt_s390_km(KM_DEA_ENCRYPT, dctx->key, out, in, DES_BLOCK_SIZE);
 }
 
-static void des_decrypt(void *ctx, u8 *out, const u8 *in)
+static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct crypt_s390_des_ctx *dctx = ctx;
+	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
 
 	crypt_s390_km(KM_DEA_DECRYPT, dctx->key, out, in, DES_BLOCK_SIZE);
 }
@@ -166,11 +166,11 @@ static struct crypto_alg des_alg = {
  *   Implementers MUST reject keys that exhibit this property.
  *
  */
-static int des3_128_setkey(void *ctx, const u8 *key, unsigned int keylen,
-			   u32 *flags)
+static int des3_128_setkey(struct crypto_tfm *tfm, const u8 *key,
+			   unsigned int keylen, u32 *flags)
 {
 	int i, ret;
-	struct crypt_s390_des3_128_ctx *dctx = ctx;
+	struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
 	const u8* temp_key = key;
 
 	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE))) {
@@ -186,17 +186,17 @@ static int des3_128_setkey(void *ctx, co
 	return 0;
 }
 
-static void des3_128_encrypt(void *ctx, u8 *dst, const u8 *src)
+static void des3_128_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct crypt_s390_des3_128_ctx *dctx = ctx;
+	struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
 
 	crypt_s390_km(KM_TDEA_128_ENCRYPT, dctx->key, dst, (void*)src,
 		      DES3_128_BLOCK_SIZE);
 }
 
-static void des3_128_decrypt(void *ctx, u8 *dst, const u8 *src)
+static void des3_128_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct crypt_s390_des3_128_ctx *dctx = ctx;
+	struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
 
 	crypt_s390_km(KM_TDEA_128_DECRYPT, dctx->key, dst, (void*)src,
 		      DES3_128_BLOCK_SIZE);
@@ -302,11 +302,11 @@ static struct crypto_alg des3_128_alg = 
  *   property.
  *
  */
-static int des3_192_setkey(void *ctx, const u8 *key, unsigned int keylen,
-			   u32 *flags)
+static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
+			   unsigned int keylen, u32 *flags)
 {
 	int i, ret;
-	struct crypt_s390_des3_192_ctx *dctx = ctx;
+	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
 	const u8* temp_key = key;
 
 	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
@@ -325,17 +325,17 @@ static int des3_192_setkey(void *ctx, co
 	return 0;
 }
 
-static void des3_192_encrypt(void *ctx, u8 *dst, const u8 *src)
+static void des3_192_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct crypt_s390_des3_192_ctx *dctx = ctx;
+	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
 
 	crypt_s390_km(KM_TDEA_192_ENCRYPT, dctx->key, dst, (void*)src,
 		      DES3_192_BLOCK_SIZE);
 }
 
-static void des3_192_decrypt(void *ctx, u8 *dst, const u8 *src)
+static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct crypt_s390_des3_192_ctx *dctx = ctx;
+	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
 
 	crypt_s390_km(KM_TDEA_192_DECRYPT, dctx->key, dst, (void*)src,
 		      DES3_192_BLOCK_SIZE);
diff -puN arch/s390/crypto/sha1_s390.c~git-cryptodev arch/s390/crypto/sha1_s390.c
--- a/arch/s390/crypto/sha1_s390.c~git-cryptodev
+++ a/arch/s390/crypto/sha1_s390.c
@@ -40,28 +40,29 @@ struct crypt_s390_sha1_ctx {
 	u8 buffer[2 * SHA1_BLOCK_SIZE];
 };
 
-static void
-sha1_init(void *ctx)
+static void sha1_init(struct crypto_tfm *tfm)
 {
-	static const struct crypt_s390_sha1_ctx initstate = {
-		.state = {
-			0x67452301,
-			0xEFCDAB89,
-			0x98BADCFE,
-			0x10325476,
-			0xC3D2E1F0
-		},
+	struct crypt_s390_sha1_ctx *ctx = crypto_tfm_ctx(tfm);
+	static const u32 initstate[5] = {
+		0x67452301,
+		0xEFCDAB89,
+		0x98BADCFE,
+		0x10325476,
+		0xC3D2E1F0
 	};
-	memcpy(ctx, &initstate, sizeof(initstate));
+
+	ctx->count = 0;
+	memcpy(ctx->state, &initstate, sizeof(initstate));
+	ctx->buf_len = 0;
 }
 
-static void
-sha1_update(void *ctx, const u8 *data, unsigned int len)
+static void sha1_update(struct crypto_tfm *tfm, const u8 *data,
+			unsigned int len)
 {
 	struct crypt_s390_sha1_ctx *sctx;
 	long imd_len;
 
-	sctx = ctx;
+	sctx = crypto_tfm_ctx(tfm);
 	sctx->count += len * 8; //message bit length
 
 	//anything in buffer yet? -> must be completed
@@ -110,10 +111,9 @@ pad_message(struct crypt_s390_sha1_ctx* 
 }
 
 /* Add padding and return the message digest. */
-static void
-sha1_final(void* ctx, u8 *out)
+static void sha1_final(struct crypto_tfm *tfm, u8 *out)
 {
-	struct crypt_s390_sha1_ctx *sctx = ctx;
+	struct crypt_s390_sha1_ctx *sctx = crypto_tfm_ctx(tfm);
 
 	//must perform manual padding
 	pad_message(sctx);
diff -puN arch/s390/crypto/sha256_s390.c~git-cryptodev arch/s390/crypto/sha256_s390.c
--- a/arch/s390/crypto/sha256_s390.c~git-cryptodev
+++ a/arch/s390/crypto/sha256_s390.c
@@ -31,9 +31,9 @@ struct s390_sha256_ctx {
 	u8 buf[2 * SHA256_BLOCK_SIZE];
 };
 
-static void sha256_init(void *ctx)
+static void sha256_init(struct crypto_tfm *tfm)
 {
-	struct s390_sha256_ctx *sctx = ctx;
+	struct s390_sha256_ctx *sctx = crypto_tfm_ctx(tfm);
 
 	sctx->state[0] = 0x6a09e667;
 	sctx->state[1] = 0xbb67ae85;
@@ -44,12 +44,12 @@ static void sha256_init(void *ctx)
 	sctx->state[6] = 0x1f83d9ab;
 	sctx->state[7] = 0x5be0cd19;
 	sctx->count = 0;
-	memset(sctx->buf, 0, sizeof(sctx->buf));
 }
 
-static void sha256_update(void *ctx, const u8 *data, unsigned int len)
+static void sha256_update(struct crypto_tfm *tfm, const u8 *data,
+			  unsigned int len)
 {
-	struct s390_sha256_ctx *sctx = ctx;
+	struct s390_sha256_ctx *sctx = crypto_tfm_ctx(tfm);
 	unsigned int index;
 	int ret;
 
@@ -108,9 +108,9 @@ static void pad_message(struct s390_sha2
 }
 
 /* Add padding and return the message digest */
-static void sha256_final(void* ctx, u8 *out)
+static void sha256_final(struct crypto_tfm *tfm, u8 *out)
 {
-	struct s390_sha256_ctx *sctx = ctx;
+	struct s390_sha256_ctx *sctx = crypto_tfm_ctx(tfm);
 
 	/* must perform manual padding */
 	pad_message(sctx);
diff -puN arch/x86_64/crypto/aes.c~git-cryptodev arch/x86_64/crypto/aes.c
--- a/arch/x86_64/crypto/aes.c~git-cryptodev
+++ a/arch/x86_64/crypto/aes.c
@@ -227,10 +227,10 @@ static void __init gen_tabs(void)
 	t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t;	\
 }
 
-static int aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len,
-		       u32 *flags)
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len, u32 *flags)
 {
-	struct aes_ctx *ctx = ctx_arg;
+	struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
 	u32 i, j, t, u, v, w;
 
@@ -283,8 +283,18 @@ static int aes_set_key(void *ctx_arg, co
 	return 0;
 }
 
-extern void aes_encrypt(void *ctx_arg, u8 *out, const u8 *in);
-extern void aes_decrypt(void *ctx_arg, u8 *out, const u8 *in);
+asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	aes_enc_blk(tfm, dst, src);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	aes_dec_blk(tfm, dst, src);
+}
 
 static struct crypto_alg aes_alg = {
 	.cra_name		=	"aes",
diff -puN arch/x86_64/crypto/aes-x86_64-asm.S~git-cryptodev arch/x86_64/crypto/aes-x86_64-asm.S
--- a/arch/x86_64/crypto/aes-x86_64-asm.S~git-cryptodev
+++ a/arch/x86_64/crypto/aes-x86_64-asm.S
@@ -15,6 +15,10 @@
 
 .text
 
+#include <asm/asm-offsets.h>
+
+#define BASE crypto_tfm_ctx_offset
+
 #define R1	%rax
 #define R1E	%eax
 #define R1X	%ax
@@ -46,19 +50,19 @@
 #define R10	%r10
 #define R11	%r11
 
-#define prologue(FUNC,BASE,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
+#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
 	.global	FUNC;			\
 	.type	FUNC,@function;		\
 	.align	8;			\
 FUNC:	movq	r1,r2;			\
 	movq	r3,r4;			\
-	leaq	BASE+52(r8),r9;		\
+	leaq	BASE+KEY+52(r8),r9;	\
 	movq	r10,r11;		\
 	movl	(r7),r5 ## E;		\
 	movl	4(r7),r1 ## E;		\
 	movl	8(r7),r6 ## E;		\
 	movl	12(r7),r7 ## E;		\
-	movl	(r8),r10 ## E;		\
+	movl	BASE(r8),r10 ## E;	\
 	xorl	-48(r9),r5 ## E;	\
 	xorl	-44(r9),r1 ## E;	\
 	xorl	-40(r9),r6 ## E;	\
@@ -128,8 +132,8 @@ FUNC:	movq	r1,r2;			\
 	movl	r3 ## E,r1 ## E;	\
 	movl	r4 ## E,r2 ## E;
 
-#define entry(FUNC,BASE,B128,B192) \
-	prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
+#define entry(FUNC,KEY,B128,B192) \
+	prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
 
 #define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
 
@@ -147,9 +151,9 @@ FUNC:	movq	r1,r2;			\
 #define decrypt_final(TAB,OFFSET) \
 	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
 
-/* void aes_encrypt(void *ctx, u8 *out, const u8 *in) */
+/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
 
-	entry(aes_encrypt,0,enc128,enc192)
+	entry(aes_enc_blk,0,enc128,enc192)
 	encrypt_round(aes_ft_tab,-96)
 	encrypt_round(aes_ft_tab,-80)
 enc192:	encrypt_round(aes_ft_tab,-64)
@@ -166,9 +170,9 @@ enc128:	encrypt_round(aes_ft_tab,-32)
 	encrypt_final(aes_fl_tab,112)
 	return
 
-/* void aes_decrypt(void *ctx, u8 *out, const u8 *in) */
+/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
 
-	entry(aes_decrypt,240,dec128,dec192)
+	entry(aes_dec_blk,240,dec128,dec192)
 	decrypt_round(aes_it_tab,-96)
 	decrypt_round(aes_it_tab,-80)
 dec192:	decrypt_round(aes_it_tab,-64)
diff -puN arch/x86_64/crypto/Makefile~git-cryptodev arch/x86_64/crypto/Makefile
--- a/arch/x86_64/crypto/Makefile~git-cryptodev
+++ a/arch/x86_64/crypto/Makefile
@@ -5,5 +5,8 @@
 # 
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
+obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 
 aes-x86_64-y := aes-x86_64-asm.o aes.o
+twofish-x86_64-y := twofish-x86_64-asm.o twofish.o
+
diff -puN /dev/null arch/x86_64/crypto/twofish.c
--- /dev/null
+++ a/arch/x86_64/crypto/twofish.c
@@ -0,0 +1,97 @@
+/*
+ * Glue Code for optimized x86_64 assembler version of TWOFISH
+ *
+ * Originally Twofish for GPG
+ * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
+ * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
+ *
+ * The original author has disclaimed all copyright interest in this
+ * code and thus put it in the public domain. The subsequent authors
+ * have put this under the GNU General Public License.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ */
+
+#include <crypto/twofish.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+
+static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_enc_blk(tfm, dst, src);
+}
+
+static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_dec_blk(tfm, dst, src);
+}
+
+static struct crypto_alg alg = {
+	.cra_name		=	"twofish",
+	.cra_driver_name	=	"twofish-x86_64",
+	.cra_priority		=	200,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	TF_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct twofish_ctx),
+	.cra_alignmask		=	3,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
+			.cia_max_keysize	=	TF_MAX_KEY_SIZE,
+			.cia_setkey		=	twofish_setkey,
+			.cia_encrypt		=	twofish_encrypt,
+			.cia_decrypt		=	twofish_decrypt
+		}
+	}
+};
+
+static int __init init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized");
+MODULE_ALIAS("twofish");
diff -puN /dev/null arch/x86_64/crypto/twofish-x86_64-asm.S
--- /dev/null
+++ a/arch/x86_64/crypto/twofish-x86_64-asm.S
@@ -0,0 +1,324 @@
+/***************************************************************************
+*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
+*                                                                         *
+*   This program is free software; you can redistribute it and/or modify  *
+*   it under the terms of the GNU General Public License as published by  *
+*   the Free Software Foundation; either version 2 of the License, or     *
+*   (at your option) any later version.                                   *
+*                                                                         *
+*   This program is distributed in the hope that it will be useful,       *
+*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+*   GNU General Public License for more details.                          *
+*                                                                         *
+*   You should have received a copy of the GNU General Public License     *
+*   along with this program; if not, write to the                         *
+*   Free Software Foundation, Inc.,                                       *
+*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
+***************************************************************************/
+
+.file "twofish-x86_64-asm.S"
+.text
+
+#include <asm/asm-offsets.h>
+
+#define a_offset	0
+#define b_offset	4
+#define c_offset	8
+#define d_offset	12
+
+/* Structure of the crypto context struct*/
+
+#define s0	0	/* S0 Array 256 Words each */
+#define s1	1024	/* S1 Array */
+#define s2	2048	/* S2 Array */
+#define s3	3072	/* S3 Array */
+#define w	4096	/* 8 whitening keys (word) */
+#define k	4128	/* key 1-32 ( word ) */
+
+/* define a few register aliases to allow macro substitution */
+
+#define R0     %rax
+#define R0D    %eax
+#define R0B    %al
+#define R0H    %ah
+
+#define R1     %rbx
+#define R1D    %ebx
+#define R1B    %bl
+#define R1H    %bh
+
+#define R2     %rcx
+#define R2D    %ecx
+#define R2B    %cl
+#define R2H    %ch
+
+#define R3     %rdx
+#define R3D    %edx
+#define R3B    %dl
+#define R3H    %dh
+
+
+/* performs input whitening */
+#define input_whitening(src,context,offset)\
+	xor	w+offset(context),	src;
+
+/* performs input whitening */
+#define output_whitening(src,context,offset)\
+	xor	w+16+offset(context),	src;
+
+
+/*
+ * a input register containing a (rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ */
+#define encrypt_round(a,b,c,d,round)\
+	movzx	b ## B,		%edi;\
+	mov	s1(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	mov	s2(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	s2(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%r11,%rdi,4),%r9d;\
+	movzx	b ## B,		%edi;\
+	xor	s3(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	xor	(%r11,%rdi,4),	%r9d;\
+	movzx	b ## H,		%edi;\
+	ror	$15,		b ## D;\
+	xor	(%r11,%rdi,4),	%r8d;\
+	movzx	a ## H,		%edi;\
+	xor	s1(%r11,%rdi,4),%r9d;\
+	add	%r8d,		%r9d;\
+	add	%r9d,		%r8d;\
+	add	k+round(%r11),	%r9d;\
+	xor	%r9d,		c ## D;\
+	rol	$15,		c ## D;\
+	add	k+4+round(%r11),%r8d;\
+	xor	%r8d,		d ## D;
+
+/*
+ * a input register containing a(rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ * during the round a and b are prepared for the output whitening
+ */
+#define encrypt_last_round(a,b,c,d,round)\
+	mov	b ## D,		%r10d;\
+	shl	$32,		%r10;\
+	movzx	b ## B,		%edi;\
+	mov	s1(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	mov	s2(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	s2(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%r11,%rdi,4),%r9d;\
+	movzx	b ## B,		%edi;\
+	xor	s3(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	xor	(%r11,%rdi,4),	%r9d;\
+	xor	a,		%r10;\
+	movzx	b ## H,		%edi;\
+	xor	(%r11,%rdi,4),	%r8d;\
+	movzx	a ## H,		%edi;\
+	xor	s1(%r11,%rdi,4),%r9d;\
+	add	%r8d,		%r9d;\
+	add	%r9d,		%r8d;\
+	add	k+round(%r11),	%r9d;\
+	xor	%r9d,		c ## D;\
+	ror	$1,		c ## D;\
+	add	k+4+round(%r11),%r8d;\
+	xor	%r8d,		d ## D
+
+/*
+ * a input register containing a
+ * b input register containing b (rotated 16)
+ * c input register containing c (already rol $1)
+ * d input register containing d
+ * operations on a and b are interleaved to increase performance
+ */
+#define decrypt_round(a,b,c,d,round)\
+	movzx	a ## B,		%edi;\
+	mov	(%r11,%rdi,4),	%r9d;\
+	movzx	b ## B,		%edi;\
+	mov	s3(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s1(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%r11,%rdi,4),	%r8d;\
+	movzx	a ## B,		%edi;\
+	xor	s2(%r11,%rdi,4),%r9d;\
+	movzx	b ## B,		%edi;\
+	xor	s1(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	ror	$15,		a ## D;\
+	xor	s3(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	xor	s2(%r11,%rdi,4),%r8d;\
+	add	%r8d,		%r9d;\
+	add	%r9d,		%r8d;\
+	add	k+round(%r11),	%r9d;\
+	xor	%r9d,		c ## D;\
+	add	k+4+round(%r11),%r8d;\
+	xor	%r8d,		d ## D;\
+	rol	$15,		d ## D;
+
+/*
+ * a input register containing a
+ * b input register containing b
+ * c input register containing c (already rol $1)
+ * d input register containing d
+ * operations on a and b are interleaved to increase performance
+ * during the round a and b are prepared for the output whitening
+ */
+#define decrypt_last_round(a,b,c,d,round)\
+	movzx	a ## B,		%edi;\
+	mov	(%r11,%rdi,4),	%r9d;\
+	movzx	b ## B,		%edi;\
+	mov	s3(%r11,%rdi,4),%r8d;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%r11,%rdi,4),	%r8d;\
+	movzx	a ## H,		%edi;\
+	mov	b ## D,		%r10d;\
+	shl	$32,		%r10;\
+	xor	a,		%r10;\
+	ror	$16,		a ## D;\
+	xor	s1(%r11,%rdi,4),%r9d;\
+	movzx	b ## B,		%edi;\
+	xor	s1(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	xor	s2(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	xor	s2(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	xor	s3(%r11,%rdi,4),%r9d;\
+	add	%r8d,		%r9d;\
+	add	%r9d,		%r8d;\
+	add	k+round(%r11),	%r9d;\
+	xor	%r9d,		c ## D;\
+	add	k+4+round(%r11),%r8d;\
+	xor	%r8d,		d ## D;\
+	ror	$1,		d ## D;
+
+.align 8
+.global twofish_enc_blk
+.global twofish_dec_blk
+
+twofish_enc_blk:
+	pushq    R1
+
+	/* %rdi contains the crypto tfm adress */
+	/* %rsi contains the output adress */
+	/* %rdx contains the input adress */
+	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx adress */
+	/* ctx adress is moved to free one non-rex register
+	as target for the 8bit high operations */
+	mov	%rdi,		%r11
+
+	movq	(R3),	R1
+	movq	8(R3),	R3
+	input_whitening(R1,%r11,a_offset)
+	input_whitening(R3,%r11,c_offset)
+	mov	R1D,	R0D
+	rol	$16,	R0D
+	shr	$32,	R1
+	mov	R3D,	R2D
+	shr	$32,	R3
+	rol	$1,	R3D
+
+	encrypt_round(R0,R1,R2,R3,0);
+	encrypt_round(R2,R3,R0,R1,8);
+	encrypt_round(R0,R1,R2,R3,2*8);
+	encrypt_round(R2,R3,R0,R1,3*8);
+	encrypt_round(R0,R1,R2,R3,4*8);
+	encrypt_round(R2,R3,R0,R1,5*8);
+	encrypt_round(R0,R1,R2,R3,6*8);
+	encrypt_round(R2,R3,R0,R1,7*8);
+	encrypt_round(R0,R1,R2,R3,8*8);
+	encrypt_round(R2,R3,R0,R1,9*8);
+	encrypt_round(R0,R1,R2,R3,10*8);
+	encrypt_round(R2,R3,R0,R1,11*8);
+	encrypt_round(R0,R1,R2,R3,12*8);
+	encrypt_round(R2,R3,R0,R1,13*8);
+	encrypt_round(R0,R1,R2,R3,14*8);
+	encrypt_last_round(R2,R3,R0,R1,15*8);
+
+
+	output_whitening(%r10,%r11,a_offset)
+	movq	%r10,	(%rsi)
+
+	shl	$32,	R1
+	xor	R0,	R1
+
+	output_whitening(R1,%r11,c_offset)
+	movq	R1,	8(%rsi)
+
+	popq	R1
+	movq	$1,%rax
+	ret
+
+twofish_dec_blk:
+	pushq    R1
+
+	/* %rdi contains the crypto tfm adress */
+	/* %rsi contains the output adress */
+	/* %rdx contains the input adress */
+	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx adress */
+	/* ctx adress is moved to free one non-rex register
+	as target for the 8bit high operations */
+	mov	%rdi,		%r11
+
+	movq	(R3),	R1
+	movq	8(R3),	R3
+	output_whitening(R1,%r11,a_offset)
+	output_whitening(R3,%r11,c_offset)
+	mov	R1D,	R0D
+	shr	$32,	R1
+	rol	$16,	R1D
+	mov	R3D,	R2D
+	shr	$32,	R3
+	rol	$1,	R2D
+
+	decrypt_round(R0,R1,R2,R3,15*8);
+	decrypt_round(R2,R3,R0,R1,14*8);
+	decrypt_round(R0,R1,R2,R3,13*8);
+	decrypt_round(R2,R3,R0,R1,12*8);
+	decrypt_round(R0,R1,R2,R3,11*8);
+	decrypt_round(R2,R3,R0,R1,10*8);
+	decrypt_round(R0,R1,R2,R3,9*8);
+	decrypt_round(R2,R3,R0,R1,8*8);
+	decrypt_round(R0,R1,R2,R3,7*8);
+	decrypt_round(R2,R3,R0,R1,6*8);
+	decrypt_round(R0,R1,R2,R3,5*8);
+	decrypt_round(R2,R3,R0,R1,4*8);
+	decrypt_round(R0,R1,R2,R3,3*8);
+	decrypt_round(R2,R3,R0,R1,2*8);
+	decrypt_round(R0,R1,R2,R3,1*8);
+	decrypt_last_round(R2,R3,R0,R1,0);
+
+	input_whitening(%r10,%r11,a_offset)
+	movq	%r10,	(%rsi)
+
+	shl	$32,	R1
+	xor	R0,	R1
+
+	input_whitening(R1,%r11,c_offset)
+	movq	R1,	8(%rsi)
+
+	popq	R1
+	movq	$1,%rax
+	ret
diff -puN arch/x86_64/kernel/asm-offsets.c~git-cryptodev arch/x86_64/kernel/asm-offsets.c
--- a/arch/x86_64/kernel/asm-offsets.c~git-cryptodev
+++ a/arch/x86_64/kernel/asm-offsets.c
@@ -4,6 +4,7 @@
  * and format the required data.
  */
 
+#include <linux/crypto.h>
 #include <linux/sched.h> 
 #include <linux/stddef.h>
 #include <linux/errno.h> 
@@ -68,5 +69,7 @@ int main(void)
 	DEFINE(pbe_next, offsetof(struct pbe, next));
 	BLANK();
 	DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
+	BLANK();
+	DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
 	return 0;
 }
diff -puN crypto/aes.c~git-cryptodev crypto/aes.c
--- a/crypto/aes.c~git-cryptodev
+++ a/crypto/aes.c
@@ -248,10 +248,10 @@ gen_tabs (void)
     t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t;   \
 }
 
-static int
-aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags)
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len, u32 *flags)
 {
-	struct aes_ctx *ctx = ctx_arg;
+	struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
 	u32 i, t, u, v, w;
 
@@ -318,9 +318,9 @@ aes_set_key(void *ctx_arg, const u8 *in_
     f_rl(bo, bi, 2, k);     \
     f_rl(bo, bi, 3, k)
 
-static void aes_encrypt(void *ctx_arg, u8 *out, const u8 *in)
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	const struct aes_ctx *ctx = ctx_arg;
+	const struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *src = (const __le32 *)in;
 	__le32 *dst = (__le32 *)out;
 	u32 b0[4], b1[4];
@@ -373,9 +373,9 @@ static void aes_encrypt(void *ctx_arg, u
     i_rl(bo, bi, 2, k);     \
     i_rl(bo, bi, 3, k)
 
-static void aes_decrypt(void *ctx_arg, u8 *out, const u8 *in)
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	const struct aes_ctx *ctx = ctx_arg;
+	const struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *src = (const __le32 *)in;
 	__le32 *dst = (__le32 *)out;
 	u32 b0[4], b1[4];
diff -puN crypto/anubis.c~git-cryptodev crypto/anubis.c
--- a/crypto/anubis.c~git-cryptodev
+++ a/crypto/anubis.c
@@ -460,16 +460,15 @@ static const u32 rc[] = {
 	0xf726ffedU, 0xe89d6f8eU, 0x19a0f089U,
 };
 
-static int anubis_setkey(void *ctx_arg, const u8 *in_key,
+static int anubis_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 			 unsigned int key_len, u32 *flags)
 {
+	struct anubis_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __be32 *key = (const __be32 *)in_key;
 	int N, R, i, r;
 	u32 kappa[ANUBIS_MAX_N];
 	u32 inter[ANUBIS_MAX_N];
 
-	struct anubis_ctx *ctx = ctx_arg;
-
 	switch (key_len)
 	{
 		case 16: case 20: case 24: case 28:
@@ -660,15 +659,15 @@ static void anubis_crypt(u32 roundKey[AN
 		dst[i] = cpu_to_be32(inter[i]);
 }
 
-static void anubis_encrypt(void *ctx_arg, u8 *dst, const u8 *src)
+static void anubis_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct anubis_ctx *ctx = ctx_arg;
+	struct anubis_ctx *ctx = crypto_tfm_ctx(tfm);
 	anubis_crypt(ctx->E, dst, src, ctx->R);
 }
 
-static void anubis_decrypt(void *ctx_arg, u8 *dst, const u8 *src)
+static void anubis_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct anubis_ctx *ctx = ctx_arg;
+	struct anubis_ctx *ctx = crypto_tfm_ctx(tfm);
 	anubis_crypt(ctx->D, dst, src, ctx->R);
 }
 
diff -puN crypto/api.c~git-cryptodev crypto/api.c
--- a/crypto/api.c~git-cryptodev
+++ a/crypto/api.c
@@ -188,13 +188,16 @@ struct crypto_tfm *crypto_alloc_tfm(cons
 	if (crypto_init_flags(tfm, flags))
 		goto out_free_tfm;
 		
-	if (crypto_init_ops(tfm)) {
-		crypto_exit_ops(tfm);
+	if (crypto_init_ops(tfm))
 		goto out_free_tfm;
-	}
+
+	if (alg->cra_init && alg->cra_init(tfm))
+		goto cra_init_failed;
 
 	goto out;
 
+cra_init_failed:
+	crypto_exit_ops(tfm);
 out_free_tfm:
 	kfree(tfm);
 	tfm = NULL;
@@ -215,6 +218,8 @@ void crypto_free_tfm(struct crypto_tfm *
 	alg = tfm->__crt_alg;
 	size = sizeof(*tfm) + alg->cra_ctxsize;
 
+	if (alg->cra_exit)
+		alg->cra_exit(tfm);
 	crypto_exit_ops(tfm);
 	crypto_alg_put(alg);
 	memset(tfm, 0, size);
@@ -224,7 +229,7 @@ void crypto_free_tfm(struct crypto_tfm *
 static inline int crypto_set_driver_name(struct crypto_alg *alg)
 {
 	static const char suffix[] = "-generic";
-	char *driver_name = (char *)alg->cra_driver_name;
+	char *driver_name = alg->cra_driver_name;
 	int len;
 
 	if (*driver_name)
@@ -262,13 +267,13 @@ int crypto_register_alg(struct crypto_al
 	down_write(&crypto_alg_sem);
 	
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
-		if (!strcmp(q->cra_driver_name, alg->cra_driver_name)) {
+		if (q == alg) {
 			ret = -EEXIST;
 			goto out;
 		}
 	}
 	
-	list_add_tail(&alg->cra_list, &crypto_alg_list);
+	list_add(&alg->cra_list, &crypto_alg_list);
 out:	
 	up_write(&crypto_alg_sem);
 	return ret;
diff -puN crypto/arc4.c~git-cryptodev crypto/arc4.c
--- a/crypto/arc4.c~git-cryptodev
+++ a/crypto/arc4.c
@@ -24,9 +24,10 @@ struct arc4_ctx {
 	u8 x, y;
 };
 
-static int arc4_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags)
+static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			unsigned int key_len, u32 *flags)
 {
-	struct arc4_ctx *ctx = ctx_arg;
+	struct arc4_ctx *ctx = crypto_tfm_ctx(tfm);
 	int i, j = 0, k = 0;
 
 	ctx->x = 1;
@@ -48,9 +49,9 @@ static int arc4_set_key(void *ctx_arg, c
 	return 0;
 }
 
-static void arc4_crypt(void *ctx_arg, u8 *out, const u8 *in)
+static void arc4_crypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct arc4_ctx *ctx = ctx_arg;
+	struct arc4_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	u8 *const S = ctx->S;
 	u8 x = ctx->x;
diff -puN crypto/blowfish.c~git-cryptodev crypto/blowfish.c
--- a/crypto/blowfish.c~git-cryptodev
+++ a/crypto/blowfish.c
@@ -349,7 +349,7 @@ static void encrypt_block(struct bf_ctx 
 	dst[1] = yl;
 }
 
-static void bf_encrypt(void *ctx, u8 *dst, const u8 *src)
+static void bf_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	const __be32 *in_blk = (const __be32 *)src;
 	__be32 *const out_blk = (__be32 *)dst;
@@ -357,17 +357,18 @@ static void bf_encrypt(void *ctx, u8 *ds
 
 	in32[0] = be32_to_cpu(in_blk[0]);
 	in32[1] = be32_to_cpu(in_blk[1]);
-	encrypt_block(ctx, out32, in32);
+	encrypt_block(crypto_tfm_ctx(tfm), out32, in32);
 	out_blk[0] = cpu_to_be32(out32[0]);
 	out_blk[1] = cpu_to_be32(out32[1]);
 }
 
-static void bf_decrypt(void *ctx, u8 *dst, const u8 *src)
+static void bf_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
+	struct bf_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __be32 *in_blk = (const __be32 *)src;
 	__be32 *const out_blk = (__be32 *)dst;
-	const u32 *P = ((struct bf_ctx *)ctx)->p;
-	const u32 *S = ((struct bf_ctx *)ctx)->s;
+	const u32 *P = ctx->p;
+	const u32 *S = ctx->s;
 	u32 yl = be32_to_cpu(in_blk[0]);
 	u32 yr = be32_to_cpu(in_blk[1]);
 
@@ -398,12 +399,14 @@ static void bf_decrypt(void *ctx, u8 *ds
 /* 
  * Calculates the blowfish S and P boxes for encryption and decryption.
  */
-static int bf_setkey(void *ctx, const u8 *key, unsigned int keylen, u32 *flags)
+static int bf_setkey(struct crypto_tfm *tfm, const u8 *key,
+		     unsigned int keylen, u32 *flags)
 {
+	struct bf_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *P = ctx->p;
+	u32 *S = ctx->s;
 	short i, j, count;
 	u32 data[2], temp;
-	u32 *P = ((struct bf_ctx *)ctx)->p;
-	u32 *S = ((struct bf_ctx *)ctx)->s;
 
 	/* Copy the initialization s-boxes */
 	for (i = 0, count = 0; i < 256; i++)
diff -puN crypto/cast5.c~git-cryptodev crypto/cast5.c
--- a/crypto/cast5.c~git-cryptodev
+++ a/crypto/cast5.c
@@ -577,9 +577,9 @@ static const u32 sb8[256] = {
     (((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]) )
 
 
-static void cast5_encrypt(void *ctx, u8 * outbuf, const u8 * inbuf)
+static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
 {
-	struct cast5_ctx *c = (struct cast5_ctx *) ctx;
+	struct cast5_ctx *c = crypto_tfm_ctx(tfm);
 	const __be32 *src = (const __be32 *)inbuf;
 	__be32 *dst = (__be32 *)outbuf;
 	u32 l, r, t;
@@ -642,9 +642,9 @@ static void cast5_encrypt(void *ctx, u8 
 	dst[1] = cpu_to_be32(l);
 }
 
-static void cast5_decrypt(void *ctx, u8 * outbuf, const u8 * inbuf)
+static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
 {
-	struct cast5_ctx *c = (struct cast5_ctx *) ctx;
+	struct cast5_ctx *c = crypto_tfm_ctx(tfm);
 	const __be32 *src = (const __be32 *)inbuf;
 	__be32 *dst = (__be32 *)outbuf;
 	u32 l, r, t;
@@ -769,15 +769,15 @@ static void key_schedule(u32 * x, u32 * 
 }
 
 
-static int
-cast5_setkey(void *ctx, const u8 * key, unsigned key_len, u32 * flags)
+static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key,
+			unsigned key_len, u32 *flags)
 {
+	struct cast5_ctx *c = crypto_tfm_ctx(tfm);
 	int i;
 	u32 x[4];
 	u32 z[4];
 	u32 k[16];
 	__be32 p_key[4];
-	struct cast5_ctx *c = (struct cast5_ctx *) ctx;
 	
 	if (key_len < 5 || key_len > 16) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
diff -puN crypto/cast6.c~git-cryptodev crypto/cast6.c
--- a/crypto/cast6.c~git-cryptodev
+++ a/crypto/cast6.c
@@ -381,13 +381,13 @@ static inline void W(u32 *key, unsigned 
 	key[7] ^= F2(key[0], Tr[i % 4][7], Tm[i][7]);
 }
 
-static int
-cast6_setkey(void *ctx, const u8 * in_key, unsigned key_len, u32 * flags)
+static int cast6_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+			unsigned key_len, u32 *flags)
 {
 	int i;
 	u32 key[8];
 	__be32 p_key[8]; /* padded key */
-	struct cast6_ctx *c = (struct cast6_ctx *) ctx;
+	struct cast6_ctx *c = crypto_tfm_ctx(tfm);
 
 	if (key_len < 16 || key_len > 32 || key_len % 4 != 0) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
@@ -444,8 +444,9 @@ static inline void QBAR (u32 * block, u8
         block[2] ^= F1(block[3], Kr[0], Km[0]);
 }
 
-static void cast6_encrypt (void * ctx, u8 * outbuf, const u8 * inbuf) {
-	struct cast6_ctx * c = (struct cast6_ctx *)ctx;
+static void cast6_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
+{
+	struct cast6_ctx *c = crypto_tfm_ctx(tfm);
 	const __be32 *src = (const __be32 *)inbuf;
 	__be32 *dst = (__be32 *)outbuf;
 	u32 block[4];
@@ -476,8 +477,8 @@ static void cast6_encrypt (void * ctx, u
 	dst[3] = cpu_to_be32(block[3]);
 }	
 
-static void cast6_decrypt (void * ctx, u8 * outbuf, const u8 * inbuf) {
-	struct cast6_ctx * c = (struct cast6_ctx *)ctx;
+static void cast6_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) {
+	struct cast6_ctx * c = crypto_tfm_ctx(tfm);
 	const __be32 *src = (const __be32 *)inbuf;
 	__be32 *dst = (__be32 *)outbuf;
 	u32 block[4];
diff -puN crypto/cipher.c~git-cryptodev crypto/cipher.c
--- a/crypto/cipher.c~git-cryptodev
+++ a/crypto/cipher.c
@@ -187,7 +187,7 @@ static unsigned int cbc_process_encrypt(
 	void (*xor)(u8 *, const u8 *) = tfm->crt_u.cipher.cit_xor_block;
 	int bsize = crypto_tfm_alg_blocksize(tfm);
 
-	void (*fn)(void *, u8 *, const u8 *) = desc->crfn;
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = desc->crfn;
 	u8 *iv = desc->info;
 	unsigned int done = 0;
 
@@ -195,7 +195,7 @@ static unsigned int cbc_process_encrypt(
 
 	do {
 		xor(iv, src);
-		fn(crypto_tfm_ctx(tfm), dst, iv);
+		fn(tfm, dst, iv);
 		memcpy(iv, dst, bsize);
 
 		src += bsize;
@@ -218,7 +218,7 @@ static unsigned int cbc_process_decrypt(
 	u8 *buf = (u8 *)ALIGN((unsigned long)stack, alignmask + 1);
 	u8 **dst_p = src == dst ? &buf : &dst;
 
-	void (*fn)(void *, u8 *, const u8 *) = desc->crfn;
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = desc->crfn;
 	u8 *iv = desc->info;
 	unsigned int done = 0;
 
@@ -227,7 +227,7 @@ static unsigned int cbc_process_decrypt(
 	do {
 		u8 *tmp_dst = *dst_p;
 
-		fn(crypto_tfm_ctx(tfm), tmp_dst, src);
+		fn(tfm, tmp_dst, src);
 		xor(tmp_dst, iv);
 		memcpy(iv, src, bsize);
 		if (tmp_dst != dst)
@@ -245,13 +245,13 @@ static unsigned int ecb_process(const st
 {
 	struct crypto_tfm *tfm = desc->tfm;
 	int bsize = crypto_tfm_alg_blocksize(tfm);
-	void (*fn)(void *, u8 *, const u8 *) = desc->crfn;
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = desc->crfn;
 	unsigned int done = 0;
 
 	nbytes -= bsize;
 
 	do {
-		fn(crypto_tfm_ctx(tfm), dst, src);
+		fn(tfm, dst, src);
 
 		src += bsize;
 		dst += bsize;
@@ -268,7 +268,7 @@ static int setkey(struct crypto_tfm *tfm
 		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	} else
-		return cia->cia_setkey(crypto_tfm_ctx(tfm), key, keylen,
+		return cia->cia_setkey(tfm, key, keylen,
 		                       &tfm->crt_flags);
 }
 
diff -puN crypto/compress.c~git-cryptodev crypto/compress.c
--- a/crypto/compress.c~git-cryptodev
+++ a/crypto/compress.c
@@ -22,8 +22,7 @@ static int crypto_compress(struct crypto
                             const u8 *src, unsigned int slen,
                             u8 *dst, unsigned int *dlen)
 {
-	return tfm->__crt_alg->cra_compress.coa_compress(crypto_tfm_ctx(tfm),
-	                                                 src, slen, dst,
+	return tfm->__crt_alg->cra_compress.coa_compress(tfm, src, slen, dst,
 	                                                 dlen);
 }
 
@@ -31,8 +30,7 @@ static int crypto_decompress(struct cryp
                              const u8 *src, unsigned int slen,
                              u8 *dst, unsigned int *dlen)
 {
-	return tfm->__crt_alg->cra_compress.coa_decompress(crypto_tfm_ctx(tfm),
-	                                                   src, slen, dst,
+	return tfm->__crt_alg->cra_compress.coa_decompress(tfm, src, slen, dst,
 	                                                   dlen);
 }
 
@@ -43,21 +41,14 @@ int crypto_init_compress_flags(struct cr
 
 int crypto_init_compress_ops(struct crypto_tfm *tfm)
 {
-	int ret = 0;
 	struct compress_tfm *ops = &tfm->crt_compress;
-	
-	ret = tfm->__crt_alg->cra_compress.coa_init(crypto_tfm_ctx(tfm));
-	if (ret)
-		goto out;
 
 	ops->cot_compress = crypto_compress;
 	ops->cot_decompress = crypto_decompress;
 	
-out:
-	return ret;
+	return 0;
 }
 
 void crypto_exit_compress_ops(struct crypto_tfm *tfm)
 {
-	tfm->__crt_alg->cra_compress.coa_exit(crypto_tfm_ctx(tfm));
 }
diff -puN crypto/crc32c.c~git-cryptodev crypto/crc32c.c
--- a/crypto/crc32c.c~git-cryptodev
+++ a/crypto/crc32c.c
@@ -31,9 +31,9 @@ struct chksum_ctx {
  * crc using table.
  */
 
-static void chksum_init(void *ctx)
+static void chksum_init(struct crypto_tfm *tfm)
 {
-	struct chksum_ctx *mctx = ctx;
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
 
 	mctx->crc = ~(u32)0;			/* common usage */
 }
@@ -43,10 +43,10 @@ static void chksum_init(void *ctx)
  * If your algorithm starts with ~0, then XOR with ~0 before you set
  * the seed.
  */
-static int chksum_setkey(void *ctx, const u8 *key, unsigned int keylen,
-	                  u32 *flags)
+static int chksum_setkey(struct crypto_tfm *tfm, const u8 *key,
+			 unsigned int keylen, u32 *flags)
 {
-	struct chksum_ctx *mctx = ctx;
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
 
 	if (keylen != sizeof(mctx->crc)) {
 		if (flags)
@@ -57,9 +57,10 @@ static int chksum_setkey(void *ctx, cons
 	return 0;
 }
 
-static void chksum_update(void *ctx, const u8 *data, unsigned int length)
+static void chksum_update(struct crypto_tfm *tfm, const u8 *data,
+			  unsigned int length)
 {
-	struct chksum_ctx *mctx = ctx;
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
 	u32 mcrc;
 
 	mcrc = crc32c(mctx->crc, data, (size_t)length);
@@ -67,9 +68,9 @@ static void chksum_update(void *ctx, con
 	mctx->crc = mcrc;
 }
 
-static void chksum_final(void *ctx, u8 *out)
+static void chksum_final(struct crypto_tfm *tfm, u8 *out)
 {
-	struct chksum_ctx *mctx = ctx;
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
 	u32 mcrc = (mctx->crc ^ ~(u32)0);
 	
 	*(u32 *)out = __le32_to_cpu(mcrc);
diff -puN crypto/crypto_null.c~git-cryptodev crypto/crypto_null.c
--- a/crypto/crypto_null.c~git-cryptodev
+++ a/crypto/crypto_null.c
@@ -27,8 +27,8 @@
 #define NULL_BLOCK_SIZE		1
 #define NULL_DIGEST_SIZE	0
 
-static int null_compress(void *ctx, const u8 *src, unsigned int slen,
-                         u8 *dst, unsigned int *dlen)
+static int null_compress(struct crypto_tfm *tfm, const u8 *src,
+			 unsigned int slen, u8 *dst, unsigned int *dlen)
 {
 	if (slen > *dlen)
 		return -EINVAL;
@@ -37,20 +37,21 @@ static int null_compress(void *ctx, cons
 	return 0;
 }
 
-static void null_init(void *ctx)
+static void null_init(struct crypto_tfm *tfm)
 { }
 
-static void null_update(void *ctx, const u8 *data, unsigned int len)
+static void null_update(struct crypto_tfm *tfm, const u8 *data,
+			unsigned int len)
 { }
 
-static void null_final(void *ctx, u8 *out)
+static void null_final(struct crypto_tfm *tfm, u8 *out)
 { }
 
-static int null_setkey(void *ctx, const u8 *key,
-                       unsigned int keylen, u32 *flags)
+static int null_setkey(struct crypto_tfm *tfm, const u8 *key,
+		       unsigned int keylen, u32 *flags)
 { return 0; }
 
-static void null_crypt(void *ctx, u8 *dst, const u8 *src)
+static void null_crypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	memcpy(dst, src, NULL_BLOCK_SIZE);
 }
diff -puN crypto/deflate.c~git-cryptodev crypto/deflate.c
--- a/crypto/deflate.c~git-cryptodev
+++ a/crypto/deflate.c
@@ -102,8 +102,9 @@ static void deflate_decomp_exit(struct d
 	kfree(ctx->decomp_stream.workspace);
 }
 
-static int deflate_init(void *ctx)
+static int deflate_init(struct crypto_tfm *tfm)
 {
+	struct deflate_ctx *ctx = crypto_tfm_ctx(tfm);
 	int ret;
 	
 	ret = deflate_comp_init(ctx);
@@ -116,17 +117,19 @@ out:
 	return ret;
 }
 
-static void deflate_exit(void *ctx)
+static void deflate_exit(struct crypto_tfm *tfm)
 {
+	struct deflate_ctx *ctx = crypto_tfm_ctx(tfm);
+
 	deflate_comp_exit(ctx);
 	deflate_decomp_exit(ctx);
 }
 
-static int deflate_compress(void *ctx, const u8 *src, unsigned int slen,
-	                    u8 *dst, unsigned int *dlen)
+static int deflate_compress(struct crypto_tfm *tfm, const u8 *src,
+			    unsigned int slen, u8 *dst, unsigned int *dlen)
 {
 	int ret = 0;
-	struct deflate_ctx *dctx = ctx;
+	struct deflate_ctx *dctx = crypto_tfm_ctx(tfm);
 	struct z_stream_s *stream = &dctx->comp_stream;
 
 	ret = zlib_deflateReset(stream);
@@ -151,12 +154,12 @@ out:
 	return ret;
 }
  
-static int deflate_decompress(void *ctx, const u8 *src, unsigned int slen,
-                              u8 *dst, unsigned int *dlen)
+static int deflate_decompress(struct crypto_tfm *tfm, const u8 *src,
+			      unsigned int slen, u8 *dst, unsigned int *dlen)
 {
 	
 	int ret = 0;
-	struct deflate_ctx *dctx = ctx;
+	struct deflate_ctx *dctx = crypto_tfm_ctx(tfm);
 	struct z_stream_s *stream = &dctx->decomp_stream;
 
 	ret = zlib_inflateReset(stream);
@@ -198,9 +201,9 @@ static struct crypto_alg alg = {
 	.cra_ctxsize		= sizeof(struct deflate_ctx),
 	.cra_module		= THIS_MODULE,
 	.cra_list		= LIST_HEAD_INIT(alg.cra_list),
+	.cra_init		= deflate_init,
+	.cra_exit		= deflate_exit,
 	.cra_u			= { .compress = {
-	.coa_init		= deflate_init,
-	.coa_exit		= deflate_exit,
 	.coa_compress 		= deflate_compress,
 	.coa_decompress  	= deflate_decompress } }
 };
diff -puN crypto/des.c~git-cryptodev crypto/des.c
--- a/crypto/des.c~git-cryptodev
+++ a/crypto/des.c
@@ -783,9 +783,10 @@ static void dkey(u32 *pe, const u8 *k)
 	}
 }
 
-static int des_setkey(void *ctx, const u8 *key, unsigned int keylen, u32 *flags)
+static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
+		      unsigned int keylen, u32 *flags)
 {
-	struct des_ctx *dctx = ctx;
+	struct des_ctx *dctx = crypto_tfm_ctx(tfm);
 	u32 tmp[DES_EXPKEY_WORDS];
 	int ret;
 
@@ -803,9 +804,10 @@ static int des_setkey(void *ctx, const u
 	return 0;
 }
 
-static void des_encrypt(void *ctx, u8 *dst, const u8 *src)
+static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	const u32 *K = ((struct des_ctx *)ctx)->expkey;
+	struct des_ctx *ctx = crypto_tfm_ctx(tfm);
+	const u32 *K = ctx->expkey;
 	const __le32 *s = (const __le32 *)src;
 	__le32 *d = (__le32 *)dst;
 	u32 L, R, A, B;
@@ -825,9 +827,10 @@ static void des_encrypt(void *ctx, u8 *d
 	d[1] = cpu_to_le32(L);
 }
 
-static void des_decrypt(void *ctx, u8 *dst, const u8 *src)
+static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	const u32 *K = ((struct des_ctx *)ctx)->expkey + DES_EXPKEY_WORDS - 2;
+	struct des_ctx *ctx = crypto_tfm_ctx(tfm);
+	const u32 *K = ctx->expkey + DES_EXPKEY_WORDS - 2;
 	const __le32 *s = (const __le32 *)src;
 	__le32 *d = (__le32 *)dst;
 	u32 L, R, A, B;
@@ -860,11 +863,11 @@ static void des_decrypt(void *ctx, u8 *d
  *   property.
  *
  */
-static int des3_ede_setkey(void *ctx, const u8 *key,
+static int des3_ede_setkey(struct crypto_tfm *tfm, const u8 *key,
 			   unsigned int keylen, u32 *flags)
 {
 	const u32 *K = (const u32 *)key;
-	struct des3_ede_ctx *dctx = ctx;
+	struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
 	u32 *expkey = dctx->expkey;
 
 	if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
@@ -881,9 +884,9 @@ static int des3_ede_setkey(void *ctx, co
 	return 0;
 }
 
-static void des3_ede_encrypt(void *ctx, u8 *dst, const u8 *src)
+static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct des3_ede_ctx *dctx = ctx;
+	struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
 	const u32 *K = dctx->expkey;
 	const __le32 *s = (const __le32 *)src;
 	__le32 *d = (__le32 *)dst;
@@ -912,9 +915,9 @@ static void des3_ede_encrypt(void *ctx, 
 	d[1] = cpu_to_le32(L);
 }
 
-static void des3_ede_decrypt(void *ctx, u8 *dst, const u8 *src)
+static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct des3_ede_ctx *dctx = ctx;
+	struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
 	const u32 *K = dctx->expkey + DES3_EDE_EXPKEY_WORDS - 2;
 	const __le32 *s = (const __le32 *)src;
 	__le32 *d = (__le32 *)dst;
diff -puN crypto/digest.c~git-cryptodev crypto/digest.c
--- a/crypto/digest.c~git-cryptodev
+++ a/crypto/digest.c
@@ -20,13 +20,14 @@
 
 static void init(struct crypto_tfm *tfm)
 {
-	tfm->__crt_alg->cra_digest.dia_init(crypto_tfm_ctx(tfm));
+	tfm->__crt_alg->cra_digest.dia_init(tfm);
 }
 
 static void update(struct crypto_tfm *tfm,
                    struct scatterlist *sg, unsigned int nsg)
 {
 	unsigned int i;
+	unsigned int alignmask = crypto_tfm_alg_alignmask(tfm);
 
 	for (i = 0; i < nsg; i++) {
 
@@ -38,12 +39,22 @@ static void update(struct crypto_tfm *tf
 			unsigned int bytes_from_page = min(l, ((unsigned int)
 							   (PAGE_SIZE)) - 
 							   offset);
-			char *p = crypto_kmap(pg, 0) + offset;
+			char *src = crypto_kmap(pg, 0);
+			char *p = src + offset;
 
-			tfm->__crt_alg->cra_digest.dia_update
-					(crypto_tfm_ctx(tfm), p,
-					 bytes_from_page);
-			crypto_kunmap(p, 0);
+			if (unlikely(offset & alignmask)) {
+				unsigned int bytes =
+					alignmask + 1 - (offset & alignmask);
+				bytes = min(bytes, bytes_from_page);
+				tfm->__crt_alg->cra_digest.dia_update(tfm, p,
+								      bytes);
+				p += bytes;
+				bytes_from_page -= bytes;
+				l -= bytes;
+			}
+			tfm->__crt_alg->cra_digest.dia_update(tfm, p,
+							      bytes_from_page);
+			crypto_kunmap(src, 0);
 			crypto_yield(tfm);
 			offset = 0;
 			pg++;
@@ -54,7 +65,15 @@ static void update(struct crypto_tfm *tf
 
 static void final(struct crypto_tfm *tfm, u8 *out)
 {
-	tfm->__crt_alg->cra_digest.dia_final(crypto_tfm_ctx(tfm), out);
+	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
+	if (unlikely((unsigned long)out & alignmask)) {
+		unsigned int size = crypto_tfm_alg_digestsize(tfm);
+		u8 buffer[size + alignmask];
+		u8 *dst = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
+		tfm->__crt_alg->cra_digest.dia_final(tfm, dst);
+		memcpy(out, dst, size);
+	} else
+		tfm->__crt_alg->cra_digest.dia_final(tfm, out);
 }
 
 static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
@@ -62,25 +81,15 @@ static int setkey(struct crypto_tfm *tfm
 	u32 flags;
 	if (tfm->__crt_alg->cra_digest.dia_setkey == NULL)
 		return -ENOSYS;
-	return tfm->__crt_alg->cra_digest.dia_setkey(crypto_tfm_ctx(tfm),
-						     key, keylen, &flags);
+	return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen, &flags);
 }
 
 static void digest(struct crypto_tfm *tfm,
                    struct scatterlist *sg, unsigned int nsg, u8 *out)
 {
-	unsigned int i;
-
-	tfm->crt_digest.dit_init(tfm);
-		
-	for (i = 0; i < nsg; i++) {
-		char *p = crypto_kmap(sg[i].page, 0) + sg[i].offset;
-		tfm->__crt_alg->cra_digest.dia_update(crypto_tfm_ctx(tfm),
-		                                      p, sg[i].length);
-		crypto_kunmap(p, 0);
-		crypto_yield(tfm);
-	}
-	crypto_digest_final(tfm, out);
+	init(tfm);
+	update(tfm, sg, nsg);
+	final(tfm, out);
 }
 
 int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags)
diff -puN crypto/Kconfig~git-cryptodev crypto/Kconfig
--- a/crypto/Kconfig~git-cryptodev
+++ a/crypto/Kconfig
@@ -131,6 +131,7 @@ config CRYPTO_BLOWFISH
 config CRYPTO_TWOFISH
 	tristate "Twofish cipher algorithm"
 	depends on CRYPTO
+	select CRYPTO_TWOFISH_COMMON
 	help
 	  Twofish cipher algorithm.
 	  
@@ -142,6 +143,43 @@ config CRYPTO_TWOFISH
 	  See also:
 	  <http://www.schneier.com/twofish.html>
 
+config CRYPTO_TWOFISH_COMMON
+	tristate
+	depends on CRYPTO
+	help
+	  Common parts of the Twofish cipher algorithm shared by the
+	  generic c and the assembler implementations.
+
+config CRYPTO_TWOFISH_586
+	tristate "Twofish cipher algorithms (i586)"
+	depends on CRYPTO && ((X86 || UML_X86) && !64BIT)
+	select CRYPTO_TWOFISH_COMMON
+	help
+	  Twofish cipher algorithm.
+
+	  Twofish was submitted as an AES (Advanced Encryption Standard)
+	  candidate cipher by researchers at CounterPane Systems.  It is a
+	  16 round block cipher supporting key sizes of 128, 192, and 256
+	  bits.
+
+	  See also:
+	  <http://www.schneier.com/twofish.html>
+
+config CRYPTO_TWOFISH_X86_64
+	tristate "Twofish cipher algorithm (x86_64)"
+	depends on CRYPTO && ((X86 || UML_X86) && 64BIT)
+	select CRYPTO_TWOFISH_COMMON
+	help
+	  Twofish cipher algorithm (x86_64).
+
+	  Twofish was submitted as an AES (Advanced Encryption Standard)
+	  candidate cipher by researchers at CounterPane Systems.  It is a
+	  16 round block cipher supporting key sizes of 128, 192, and 256
+	  bits.
+
+	  See also:
+	  <http://www.schneier.com/twofish.html>
+
 config CRYPTO_SERPENT
 	tristate "Serpent cipher algorithm"
 	depends on CRYPTO
@@ -337,7 +375,7 @@ config CRYPTO_CRC32C
 
 config CRYPTO_TEST
 	tristate "Testing module"
-	depends on CRYPTO
+	depends on CRYPTO && m
 	help
 	  Quick & dirty crypto test module.
 
diff -puN crypto/khazad.c~git-cryptodev crypto/khazad.c
--- a/crypto/khazad.c~git-cryptodev
+++ a/crypto/khazad.c
@@ -754,11 +754,11 @@ static const u64 c[KHAZAD_ROUNDS + 1] = 
 	0xccc41d14c363da5dULL, 0x5fdc7dcd7f5a6c5cULL, 0xf726ffede89d6f8eULL
 };
 
-static int khazad_setkey(void *ctx_arg, const u8 *in_key,
-                       unsigned int key_len, u32 *flags)
+static int khazad_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+			 unsigned int key_len, u32 *flags)
 {
-	struct khazad_ctx *ctx = ctx_arg;
-	const __be64 *key = (const __be64 *)in_key;
+	struct khazad_ctx *ctx = crypto_tfm_ctx(tfm);
+	const __be32 *key = (const __be32 *)in_key;
 	int r;
 	const u64 *S = T7;
 	u64 K2, K1;
@@ -769,8 +769,9 @@ static int khazad_setkey(void *ctx_arg, 
 		return -EINVAL;
 	}
 
-	K2 = be64_to_cpu(key[0]);
-	K1 = be64_to_cpu(key[1]);
+	/* key is supposed to be 32-bit aligned */
+	K2 = ((u64)be32_to_cpu(key[0]) << 32) | be32_to_cpu(key[1]);
+	K1 = ((u64)be32_to_cpu(key[2]) << 32) | be32_to_cpu(key[3]);
 
 	/* setup the encrypt key */
 	for (r = 0; r <= KHAZAD_ROUNDS; r++) {
@@ -840,15 +841,15 @@ static void khazad_crypt(const u64 round
 	*dst = cpu_to_be64(state);
 }
 
-static void khazad_encrypt(void *ctx_arg, u8 *dst, const u8 *src)
+static void khazad_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct khazad_ctx *ctx = ctx_arg;
+	struct khazad_ctx *ctx = crypto_tfm_ctx(tfm);
 	khazad_crypt(ctx->E, dst, src);
 }
 
-static void khazad_decrypt(void *ctx_arg, u8 *dst, const u8 *src)
+static void khazad_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct khazad_ctx *ctx = ctx_arg;
+	struct khazad_ctx *ctx = crypto_tfm_ctx(tfm);
 	khazad_crypt(ctx->D, dst, src);
 }
 
diff -puN crypto/Makefile~git-cryptodev crypto/Makefile
--- a/crypto/Makefile~git-cryptodev
+++ a/crypto/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
 obj-$(CONFIG_CRYPTO_DES) += des.o
 obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o
 obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o
+obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
 obj-$(CONFIG_CRYPTO_SERPENT) += serpent.o
 obj-$(CONFIG_CRYPTO_AES) += aes.o
 obj-$(CONFIG_CRYPTO_CAST5) += cast5.o
diff -puN crypto/md4.c~git-cryptodev crypto/md4.c
--- a/crypto/md4.c~git-cryptodev
+++ a/crypto/md4.c
@@ -152,9 +152,9 @@ static inline void md4_transform_helper(
 	md4_transform(ctx->hash, ctx->block);
 }
 
-static void md4_init(void *ctx)
+static void md4_init(struct crypto_tfm *tfm)
 {
-	struct md4_ctx *mctx = ctx;
+	struct md4_ctx *mctx = crypto_tfm_ctx(tfm);
 
 	mctx->hash[0] = 0x67452301;
 	mctx->hash[1] = 0xefcdab89;
@@ -163,9 +163,9 @@ static void md4_init(void *ctx)
 	mctx->byte_count = 0;
 }
 
-static void md4_update(void *ctx, const u8 *data, unsigned int len)
+static void md4_update(struct crypto_tfm *tfm, const u8 *data, unsigned int len)
 {
-	struct md4_ctx *mctx = ctx;
+	struct md4_ctx *mctx = crypto_tfm_ctx(tfm);
 	const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f);
 
 	mctx->byte_count += len;
@@ -193,9 +193,9 @@ static void md4_update(void *ctx, const 
 	memcpy(mctx->block, data, len);
 }
 
-static void md4_final(void *ctx, u8 *out)
+static void md4_final(struct crypto_tfm *tfm, u8 *out)
 {
-	struct md4_ctx *mctx = ctx;
+	struct md4_ctx *mctx = crypto_tfm_ctx(tfm);
 	const unsigned int offset = mctx->byte_count & 0x3f;
 	char *p = (char *)mctx->block + offset;
 	int padding = 56 - (offset + 1);
diff -puN crypto/md5.c~git-cryptodev crypto/md5.c
--- a/crypto/md5.c~git-cryptodev
+++ a/crypto/md5.c
@@ -147,9 +147,9 @@ static inline void md5_transform_helper(
 	md5_transform(ctx->hash, ctx->block);
 }
 
-static void md5_init(void *ctx)
+static void md5_init(struct crypto_tfm *tfm)
 {
-	struct md5_ctx *mctx = ctx;
+	struct md5_ctx *mctx = crypto_tfm_ctx(tfm);
 
 	mctx->hash[0] = 0x67452301;
 	mctx->hash[1] = 0xefcdab89;
@@ -158,9 +158,9 @@ static void md5_init(void *ctx)
 	mctx->byte_count = 0;
 }
 
-static void md5_update(void *ctx, const u8 *data, unsigned int len)
+static void md5_update(struct crypto_tfm *tfm, const u8 *data, unsigned int len)
 {
-	struct md5_ctx *mctx = ctx;
+	struct md5_ctx *mctx = crypto_tfm_ctx(tfm);
 	const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f);
 
 	mctx->byte_count += len;
@@ -188,9 +188,9 @@ static void md5_update(void *ctx, const 
 	memcpy(mctx->block, data, len);
 }
 
-static void md5_final(void *ctx, u8 *out)
+static void md5_final(struct crypto_tfm *tfm, u8 *out)
 {
-	struct md5_ctx *mctx = ctx;
+	struct md5_ctx *mctx = crypto_tfm_ctx(tfm);
 	const unsigned int offset = mctx->byte_count & 0x3f;
 	char *p = (char *)mctx->block + offset;
 	int padding = 56 - (offset + 1);
diff -puN crypto/michael_mic.c~git-cryptodev crypto/michael_mic.c
--- a/crypto/michael_mic.c~git-cryptodev
+++ a/crypto/michael_mic.c
@@ -45,16 +45,17 @@ do {				\
 } while (0)
 
 
-static void michael_init(void *ctx)
+static void michael_init(struct crypto_tfm *tfm)
 {
-	struct michael_mic_ctx *mctx = ctx;
+	struct michael_mic_ctx *mctx = crypto_tfm_ctx(tfm);
 	mctx->pending_len = 0;
 }
 
 
-static void michael_update(void *ctx, const u8 *data, unsigned int len)
+static void michael_update(struct crypto_tfm *tfm, const u8 *data,
+			   unsigned int len)
 {
-	struct michael_mic_ctx *mctx = ctx;
+	struct michael_mic_ctx *mctx = crypto_tfm_ctx(tfm);
 	const __le32 *src;
 
 	if (mctx->pending_len) {
@@ -90,9 +91,9 @@ static void michael_update(void *ctx, co
 }
 
 
-static void michael_final(void *ctx, u8 *out)
+static void michael_final(struct crypto_tfm *tfm, u8 *out)
 {
-	struct michael_mic_ctx *mctx = ctx;
+	struct michael_mic_ctx *mctx = crypto_tfm_ctx(tfm);
 	u8 *data = mctx->pending;
 	__le32 *dst = (__le32 *)out;
 
@@ -121,10 +122,10 @@ static void michael_final(void *ctx, u8 
 }
 
 
-static int michael_setkey(void *ctx, const u8 *key, unsigned int keylen,
-			  u32 *flags)
+static int michael_setkey(struct crypto_tfm *tfm, const u8 *key,
+			  unsigned int keylen, u32 *flags)
 {
-	struct michael_mic_ctx *mctx = ctx;
+	struct michael_mic_ctx *mctx = crypto_tfm_ctx(tfm);
 	const __le32 *data = (const __le32 *)key;
 
 	if (keylen != 8) {
@@ -145,6 +146,7 @@ static struct crypto_alg michael_mic_alg
 	.cra_blocksize	= 8,
 	.cra_ctxsize	= sizeof(struct michael_mic_ctx),
 	.cra_module	= THIS_MODULE,
+	.cra_alignmask	= 3,
 	.cra_list	= LIST_HEAD_INIT(michael_mic_alg.cra_list),
 	.cra_u		= { .digest = {
 	.dia_digestsize	= 8,
diff -puN crypto/serpent.c~git-cryptodev crypto/serpent.c
--- a/crypto/serpent.c~git-cryptodev
+++ a/crypto/serpent.c
@@ -215,9 +215,11 @@ struct serpent_ctx {
 };
 
 
-static int serpent_setkey(void *ctx, const u8 *key, unsigned int keylen, u32 *flags)
+static int serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
+			  unsigned int keylen, u32 *flags)
 {
-	u32 *k = ((struct serpent_ctx *)ctx)->expkey;
+	struct serpent_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *k = ctx->expkey;
 	u8  *k8 = (u8 *)k;
 	u32 r0,r1,r2,r3,r4;
 	int i;
@@ -365,10 +367,11 @@ static int serpent_setkey(void *ctx, con
 	return 0;
 }
 
-static void serpent_encrypt(void *ctx, u8 *dst, const u8 *src)
+static void serpent_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
+	struct serpent_ctx *ctx = crypto_tfm_ctx(tfm);
 	const u32
-		*k = ((struct serpent_ctx *)ctx)->expkey,
+		*k = ctx->expkey,
 		*s = (const u32 *)src;
 	u32	*d = (u32 *)dst,
 		r0, r1, r2, r3, r4;
@@ -423,8 +426,9 @@ static void serpent_encrypt(void *ctx, u
 	d[3] = cpu_to_le32(r3);
 }
 
-static void serpent_decrypt(void *ctx, u8 *dst, const u8 *src)
+static void serpent_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
+	struct serpent_ctx *ctx = crypto_tfm_ctx(tfm);
 	const u32
 		*k = ((struct serpent_ctx *)ctx)->expkey,
 		*s = (const u32 *)src;
@@ -492,7 +496,8 @@ static struct crypto_alg serpent_alg = {
 	.cia_decrypt  		=	serpent_decrypt } }
 };
 
-static int tnepres_setkey(void *ctx, const u8 *key, unsigned int keylen, u32 *flags)
+static int tnepres_setkey(struct crypto_tfm *tfm, const u8 *key,
+			  unsigned int keylen, u32 *flags)
 {
 	u8 rev_key[SERPENT_MAX_KEY_SIZE];
 	int i;
@@ -506,10 +511,10 @@ static int tnepres_setkey(void *ctx, con
 	for (i = 0; i < keylen; ++i)
 		rev_key[keylen - i - 1] = key[i];
  
-	return serpent_setkey(ctx, rev_key, keylen, flags);
+	return serpent_setkey(tfm, rev_key, keylen, flags);
 }
 
-static void tnepres_encrypt(void *ctx, u8 *dst, const u8 *src)
+static void tnepres_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	const u32 * const s = (const u32 * const)src;
 	u32 * const d = (u32 * const)dst;
@@ -521,7 +526,7 @@ static void tnepres_encrypt(void *ctx, u
 	rs[2] = swab32(s[1]);
 	rs[3] = swab32(s[0]);
 
-	serpent_encrypt(ctx, (u8 *)rd, (u8 *)rs);
+	serpent_encrypt(tfm, (u8 *)rd, (u8 *)rs);
 
 	d[0] = swab32(rd[3]);
 	d[1] = swab32(rd[2]);
@@ -529,7 +534,7 @@ static void tnepres_encrypt(void *ctx, u
 	d[3] = swab32(rd[0]);
 }
 
-static void tnepres_decrypt(void *ctx, u8 *dst, const u8 *src)
+static void tnepres_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	const u32 * const s = (const u32 * const)src;
 	u32 * const d = (u32 * const)dst;
@@ -541,7 +546,7 @@ static void tnepres_decrypt(void *ctx, u
 	rs[2] = swab32(s[1]);
 	rs[3] = swab32(s[0]);
 
-	serpent_decrypt(ctx, (u8 *)rd, (u8 *)rs);
+	serpent_decrypt(tfm, (u8 *)rd, (u8 *)rs);
 
 	d[0] = swab32(rd[3]);
 	d[1] = swab32(rd[2]);
diff -puN crypto/sha1.c~git-cryptodev crypto/sha1.c
--- a/crypto/sha1.c~git-cryptodev
+++ a/crypto/sha1.c
@@ -34,9 +34,9 @@ struct sha1_ctx {
         u8 buffer[64];
 };
 
-static void sha1_init(void *ctx)
+static void sha1_init(struct crypto_tfm *tfm)
 {
-	struct sha1_ctx *sctx = ctx;
+	struct sha1_ctx *sctx = crypto_tfm_ctx(tfm);
 	static const struct sha1_ctx initstate = {
 	  0,
 	  { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 },
@@ -46,9 +46,10 @@ static void sha1_init(void *ctx)
 	*sctx = initstate;
 }
 
-static void sha1_update(void *ctx, const u8 *data, unsigned int len)
+static void sha1_update(struct crypto_tfm *tfm, const u8 *data,
+			unsigned int len)
 {
-	struct sha1_ctx *sctx = ctx;
+	struct sha1_ctx *sctx = crypto_tfm_ctx(tfm);
 	unsigned int partial, done;
 	const u8 *src;
 
@@ -80,9 +81,9 @@ static void sha1_update(void *ctx, const
 
 
 /* Add padding and return the message digest. */
-static void sha1_final(void* ctx, u8 *out)
+static void sha1_final(struct crypto_tfm *tfm, u8 *out)
 {
-	struct sha1_ctx *sctx = ctx;
+	struct sha1_ctx *sctx = crypto_tfm_ctx(tfm);
 	__be32 *dst = (__be32 *)out;
 	u32 i, index, padlen;
 	__be64 bits;
@@ -93,10 +94,10 @@ static void sha1_final(void* ctx, u8 *ou
 	/* Pad out to 56 mod 64 */
 	index = sctx->count & 0x3f;
 	padlen = (index < 56) ? (56 - index) : ((64+56) - index);
-	sha1_update(sctx, padding, padlen);
+	sha1_update(tfm, padding, padlen);
 
 	/* Append length */
-	sha1_update(sctx, (const u8 *)&bits, sizeof(bits));
+	sha1_update(tfm, (const u8 *)&bits, sizeof(bits));
 
 	/* Store state in digest */
 	for (i = 0; i < 5; i++)
@@ -112,6 +113,7 @@ static struct crypto_alg alg = {
 	.cra_blocksize	=	SHA1_HMAC_BLOCK_SIZE,
 	.cra_ctxsize	=	sizeof(struct sha1_ctx),
 	.cra_module	=	THIS_MODULE,
+	.cra_alignmask	=	3,
 	.cra_list       =       LIST_HEAD_INIT(alg.cra_list),
 	.cra_u		=	{ .digest = {
 	.dia_digestsize	=	SHA1_DIGEST_SIZE,
diff -puN crypto/sha256.c~git-cryptodev crypto/sha256.c
--- a/crypto/sha256.c~git-cryptodev
+++ a/crypto/sha256.c
@@ -230,9 +230,9 @@ static void sha256_transform(u32 *state,
 	memset(W, 0, 64 * sizeof(u32));
 }
 
-static void sha256_init(void *ctx)
+static void sha256_init(struct crypto_tfm *tfm)
 {
-	struct sha256_ctx *sctx = ctx;
+	struct sha256_ctx *sctx = crypto_tfm_ctx(tfm);
 	sctx->state[0] = H0;
 	sctx->state[1] = H1;
 	sctx->state[2] = H2;
@@ -242,12 +242,12 @@ static void sha256_init(void *ctx)
 	sctx->state[6] = H6;
 	sctx->state[7] = H7;
 	sctx->count[0] = sctx->count[1] = 0;
-	memset(sctx->buf, 0, sizeof(sctx->buf));
 }
 
-static void sha256_update(void *ctx, const u8 *data, unsigned int len)
+static void sha256_update(struct crypto_tfm *tfm, const u8 *data,
+			  unsigned int len)
 {
-	struct sha256_ctx *sctx = ctx;
+	struct sha256_ctx *sctx = crypto_tfm_ctx(tfm);
 	unsigned int i, index, part_len;
 
 	/* Compute number of bytes mod 128 */
@@ -277,9 +277,9 @@ static void sha256_update(void *ctx, con
 	memcpy(&sctx->buf[index], &data[i], len-i);
 }
 
-static void sha256_final(void* ctx, u8 *out)
+static void sha256_final(struct crypto_tfm *tfm, u8 *out)
 {
-	struct sha256_ctx *sctx = ctx;
+	struct sha256_ctx *sctx = crypto_tfm_ctx(tfm);
 	__be32 *dst = (__be32 *)out;
 	__be32 bits[2];
 	unsigned int index, pad_len;
@@ -293,10 +293,10 @@ static void sha256_final(void* ctx, u8 *
 	/* Pad out to 56 mod 64. */
 	index = (sctx->count[0] >> 3) & 0x3f;
 	pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
-	sha256_update(sctx, padding, pad_len);
+	sha256_update(tfm, padding, pad_len);
 
 	/* Append length (before padding) */
-	sha256_update(sctx, (const u8 *)bits, sizeof(bits));
+	sha256_update(tfm, (const u8 *)bits, sizeof(bits));
 
 	/* Store state in digest */
 	for (i = 0; i < 8; i++)
@@ -313,6 +313,7 @@ static struct crypto_alg alg = {
 	.cra_blocksize	=	SHA256_HMAC_BLOCK_SIZE,
 	.cra_ctxsize	=	sizeof(struct sha256_ctx),
 	.cra_module	=	THIS_MODULE,
+	.cra_alignmask	=	3,
 	.cra_list       =       LIST_HEAD_INIT(alg.cra_list),
 	.cra_u		=	{ .digest = {
 	.dia_digestsize	=	SHA256_DIGEST_SIZE,
diff -puN crypto/sha512.c~git-cryptodev crypto/sha512.c
--- a/crypto/sha512.c~git-cryptodev
+++ a/crypto/sha512.c
@@ -161,9 +161,9 @@ sha512_transform(u64 *state, u64 *W, con
 }
 
 static void
-sha512_init(void *ctx)
+sha512_init(struct crypto_tfm *tfm)
 {
-        struct sha512_ctx *sctx = ctx;
+	struct sha512_ctx *sctx = crypto_tfm_ctx(tfm);
 	sctx->state[0] = H0;
 	sctx->state[1] = H1;
 	sctx->state[2] = H2;
@@ -173,13 +173,12 @@ sha512_init(void *ctx)
 	sctx->state[6] = H6;
 	sctx->state[7] = H7;
 	sctx->count[0] = sctx->count[1] = sctx->count[2] = sctx->count[3] = 0;
-	memset(sctx->buf, 0, sizeof(sctx->buf));
 }
 
 static void
-sha384_init(void *ctx)
+sha384_init(struct crypto_tfm *tfm)
 {
-        struct sha512_ctx *sctx = ctx;
+	struct sha512_ctx *sctx = crypto_tfm_ctx(tfm);
         sctx->state[0] = HP0;
         sctx->state[1] = HP1;
         sctx->state[2] = HP2;
@@ -189,13 +188,12 @@ sha384_init(void *ctx)
         sctx->state[6] = HP6;
         sctx->state[7] = HP7;
         sctx->count[0] = sctx->count[1] = sctx->count[2] = sctx->count[3] = 0;
-        memset(sctx->buf, 0, sizeof(sctx->buf));
 }
 
 static void
-sha512_update(void *ctx, const u8 *data, unsigned int len)
+sha512_update(struct crypto_tfm *tfm, const u8 *data, unsigned int len)
 {
-        struct sha512_ctx *sctx = ctx;
+	struct sha512_ctx *sctx = crypto_tfm_ctx(tfm);
 
 	unsigned int i, index, part_len;
 
@@ -233,9 +231,9 @@ sha512_update(void *ctx, const u8 *data,
 }
 
 static void
-sha512_final(void *ctx, u8 *hash)
+sha512_final(struct crypto_tfm *tfm, u8 *hash)
 {
-        struct sha512_ctx *sctx = ctx;
+	struct sha512_ctx *sctx = crypto_tfm_ctx(tfm);
         static u8 padding[128] = { 0x80, };
 	__be64 *dst = (__be64 *)hash;
 	__be32 bits[4];
@@ -251,10 +249,10 @@ sha512_final(void *ctx, u8 *hash)
 	/* Pad out to 112 mod 128. */
 	index = (sctx->count[0] >> 3) & 0x7f;
 	pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
-	sha512_update(sctx, padding, pad_len);
+	sha512_update(tfm, padding, pad_len);
 
 	/* Append length (before padding) */
-	sha512_update(sctx, (const u8 *)bits, sizeof(bits));
+	sha512_update(tfm, (const u8 *)bits, sizeof(bits));
 
 	/* Store state in digest */
 	for (i = 0; i < 8; i++)
@@ -264,12 +262,11 @@ sha512_final(void *ctx, u8 *hash)
 	memset(sctx, 0, sizeof(struct sha512_ctx));
 }
 
-static void sha384_final(void *ctx, u8 *hash)
+static void sha384_final(struct crypto_tfm *tfm, u8 *hash)
 {
-        struct sha512_ctx *sctx = ctx;
         u8 D[64];
 
-        sha512_final(sctx, D);
+	sha512_final(tfm, D);
 
         memcpy(hash, D, 48);
         memset(D, 0, 64);
@@ -281,6 +278,7 @@ static struct crypto_alg sha512 = {
         .cra_blocksize  = SHA512_HMAC_BLOCK_SIZE,
         .cra_ctxsize    = sizeof(struct sha512_ctx),
         .cra_module     = THIS_MODULE,
+	.cra_alignmask	= 3,
         .cra_list       = LIST_HEAD_INIT(sha512.cra_list),
         .cra_u          = { .digest = {
                                 .dia_digestsize = SHA512_DIGEST_SIZE,
@@ -295,6 +293,7 @@ static struct crypto_alg sha384 = {
         .cra_flags      = CRYPTO_ALG_TYPE_DIGEST,
         .cra_blocksize  = SHA384_HMAC_BLOCK_SIZE,
         .cra_ctxsize    = sizeof(struct sha512_ctx),
+	.cra_alignmask	= 3,
         .cra_module     = THIS_MODULE,
         .cra_list       = LIST_HEAD_INIT(sha384.cra_list),
         .cra_u          = { .digest = {
diff -puN crypto/tcrypt.c~git-cryptodev crypto/tcrypt.c
--- a/crypto/tcrypt.c~git-cryptodev
+++ a/crypto/tcrypt.c
@@ -570,6 +570,122 @@ out:
 	crypto_free_tfm(tfm);
 }
 
+static void test_digest_jiffies(struct crypto_tfm *tfm, char *p, int blen,
+				int plen, char *out, int sec)
+{
+	struct scatterlist sg[1];
+	unsigned long start, end;
+	int bcount, pcount;
+
+	for (start = jiffies, end = start + sec * HZ, bcount = 0;
+	     time_before(jiffies, end); bcount++) {
+		crypto_digest_init(tfm);
+		for (pcount = 0; pcount < blen; pcount += plen) {
+			sg_set_buf(sg, p + pcount, plen);
+			crypto_digest_update(tfm, sg, 1);
+		}
+		/* we assume there is enough space in 'out' for the result */
+		crypto_digest_final(tfm, out);
+	}
+
+	printk("%6u opers/sec, %9lu bytes/sec\n",
+	       bcount / sec, ((long)bcount * blen) / sec);
+
+	return;
+}
+
+static void test_digest_cycles(struct crypto_tfm *tfm, char *p, int blen,
+			       int plen, char *out)
+{
+	struct scatterlist sg[1];
+	unsigned long cycles = 0;
+	int i, pcount;
+
+	local_bh_disable();
+	local_irq_disable();
+
+	/* Warm-up run. */
+	for (i = 0; i < 4; i++) {
+		crypto_digest_init(tfm);
+		for (pcount = 0; pcount < blen; pcount += plen) {
+			sg_set_buf(sg, p + pcount, plen);
+			crypto_digest_update(tfm, sg, 1);
+		}
+		crypto_digest_final(tfm, out);
+	}
+
+	/* The real thing. */
+	for (i = 0; i < 8; i++) {
+		cycles_t start, end;
+
+		crypto_digest_init(tfm);
+
+		start = get_cycles();
+
+		for (pcount = 0; pcount < blen; pcount += plen) {
+			sg_set_buf(sg, p + pcount, plen);
+			crypto_digest_update(tfm, sg, 1);
+		}
+		crypto_digest_final(tfm, out);
+
+		end = get_cycles();
+
+		cycles += end - start;
+	}
+
+	local_irq_enable();
+	local_bh_enable();
+
+	printk("%6lu cycles/operation, %4lu cycles/byte\n",
+	       cycles / 8, cycles / (8 * blen));
+
+	return;
+}
+
+static void test_digest_speed(char *algo, unsigned int sec,
+			      struct digest_speed *speed)
+{
+	struct crypto_tfm *tfm;
+	char output[1024];
+	int i;
+
+	printk("\ntesting speed of %s\n", algo);
+
+	tfm = crypto_alloc_tfm(algo, 0);
+
+	if (tfm == NULL) {
+		printk("failed to load transform for %s\n", algo);
+		return;
+	}
+
+	if (crypto_tfm_alg_digestsize(tfm) > sizeof(output)) {
+		printk("digestsize(%u) > outputbuffer(%zu)\n",
+		       crypto_tfm_alg_digestsize(tfm), sizeof(output));
+		goto out;
+	}
+
+	for (i = 0; speed[i].blen != 0; i++) {
+		if (speed[i].blen > TVMEMSIZE) {
+			printk("template (%u) too big for tvmem (%u)\n",
+			       speed[i].blen, TVMEMSIZE);
+			goto out;
+		}
+
+		printk("test%3u (%5u byte blocks,%5u bytes per update,%4u updates): ",
+		       i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
+
+		memset(tvmem, 0xff, speed[i].blen);
+
+		if (sec)
+			test_digest_jiffies(tfm, tvmem, speed[i].blen, speed[i].plen, output, sec);
+		else
+			test_digest_cycles(tfm, tvmem, speed[i].blen, speed[i].plen, output);
+	}
+
+out:
+	crypto_free_tfm(tfm);
+}
+
 static void test_deflate(void)
 {
 	unsigned int i;
@@ -1086,6 +1202,60 @@ static void do_test(void)
 				  des_speed_template);
 		break;
 
+	case 300:
+		/* fall through */
+
+	case 301:
+		test_digest_speed("md4", sec, generic_digest_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 302:
+		test_digest_speed("md5", sec, generic_digest_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 303:
+		test_digest_speed("sha1", sec, generic_digest_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 304:
+		test_digest_speed("sha256", sec, generic_digest_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 305:
+		test_digest_speed("sha384", sec, generic_digest_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 306:
+		test_digest_speed("sha512", sec, generic_digest_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 307:
+		test_digest_speed("wp256", sec, generic_digest_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 308:
+		test_digest_speed("wp384", sec, generic_digest_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 309:
+		test_digest_speed("wp512", sec, generic_digest_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 310:
+		test_digest_speed("tgr128", sec, generic_digest_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 311:
+		test_digest_speed("tgr160", sec, generic_digest_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 312:
+		test_digest_speed("tgr192", sec, generic_digest_speed_template);
+		if (mode > 300 && mode < 400) break;
+
+	case 399:
+		break;
+
 	case 1000:
 		test_available();
 		break;
@@ -1113,7 +1283,14 @@ static int __init init(void)
 
 	kfree(xbuf);
 	kfree(tvmem);
-	return 0;
+
+	/* We intentionaly return -EAGAIN to prevent keeping
+	 * the module. It does all its work from init()
+	 * and doesn't offer any runtime functionality 
+	 * => we don't need it in the memory, do we?
+	 *                                        -- mludvig
+	 */
+	return -EAGAIN;
 }
 
 /*
diff -puN crypto/tcrypt.h~git-cryptodev crypto/tcrypt.h
--- a/crypto/tcrypt.h~git-cryptodev
+++ a/crypto/tcrypt.h
@@ -65,6 +65,11 @@ struct cipher_speed {
 	unsigned int blen;
 };
 
+struct digest_speed {
+	unsigned int blen;	/* buffer length */
+	unsigned int plen;	/* per-update length */
+};
+
 /*
  * MD4 test vectors from RFC1320
  */
@@ -2975,4 +2980,35 @@ static struct cipher_speed des_speed_tem
 	{  .klen = 0, .blen = 0, }
 };
 
+/*
+ * Digest speed tests
+ */
+static struct digest_speed generic_digest_speed_template[] = {
+	{ .blen = 16, 	.plen = 16, },
+	{ .blen = 64,	.plen = 16, },
+	{ .blen = 64,	.plen = 64, },
+	{ .blen = 256,	.plen = 16, },
+	{ .blen = 256,	.plen = 64, },
+	{ .blen = 256,	.plen = 256, },
+	{ .blen = 1024,	.plen = 16, },
+	{ .blen = 1024,	.plen = 256, },
+	{ .blen = 1024,	.plen = 1024, },
+	{ .blen = 2048,	.plen = 16, },
+	{ .blen = 2048,	.plen = 256, },
+	{ .blen = 2048,	.plen = 1024, },
+	{ .blen = 2048,	.plen = 2048, },
+	{ .blen = 4096,	.plen = 16, },
+	{ .blen = 4096,	.plen = 256, },
+	{ .blen = 4096,	.plen = 1024, },
+	{ .blen = 4096,	.plen = 4096, },
+	{ .blen = 8192,	.plen = 16, },
+	{ .blen = 8192,	.plen = 256, },
+	{ .blen = 8192,	.plen = 1024, },
+	{ .blen = 8192,	.plen = 4096, },
+	{ .blen = 8192,	.plen = 8192, },
+
+	/* End marker */
+	{  .blen = 0,	.plen = 0, }
+};
+
 #endif	/* _CRYPTO_TCRYPT_H */
diff -puN crypto/tea.c~git-cryptodev crypto/tea.c
--- a/crypto/tea.c~git-cryptodev
+++ a/crypto/tea.c
@@ -45,10 +45,10 @@ struct xtea_ctx {
 	u32 KEY[4];
 };
 
-static int tea_setkey(void *ctx_arg, const u8 *in_key,
-                       unsigned int key_len, u32 *flags)
-{ 
-	struct tea_ctx *ctx = ctx_arg;
+static int tea_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+		      unsigned int key_len, u32 *flags)
+{
+	struct tea_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
 	
 	if (key_len != 16)
@@ -66,12 +66,11 @@ static int tea_setkey(void *ctx_arg, con
 
 }
 
-static void tea_encrypt(void *ctx_arg, u8 *dst, const u8 *src)
-{ 
+static void tea_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
 	u32 y, z, n, sum = 0;
 	u32 k0, k1, k2, k3;
-
-	struct tea_ctx *ctx = ctx_arg;
+	struct tea_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *in = (const __le32 *)src;
 	__le32 *out = (__le32 *)dst;
 
@@ -95,11 +94,11 @@ static void tea_encrypt(void *ctx_arg, u
 	out[1] = cpu_to_le32(z);
 }
 
-static void tea_decrypt(void *ctx_arg, u8 *dst, const u8 *src)
-{ 
+static void tea_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
 	u32 y, z, n, sum;
 	u32 k0, k1, k2, k3;
-	struct tea_ctx *ctx = ctx_arg;
+	struct tea_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *in = (const __le32 *)src;
 	__le32 *out = (__le32 *)dst;
 
@@ -125,10 +124,10 @@ static void tea_decrypt(void *ctx_arg, u
 	out[1] = cpu_to_le32(z);
 }
 
-static int xtea_setkey(void *ctx_arg, const u8 *in_key,
-                       unsigned int key_len, u32 *flags)
-{ 
-	struct xtea_ctx *ctx = ctx_arg;
+static int xtea_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len, u32 *flags)
+{
+	struct xtea_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
 	
 	if (key_len != 16)
@@ -146,12 +145,11 @@ static int xtea_setkey(void *ctx_arg, co
 
 }
 
-static void xtea_encrypt(void *ctx_arg, u8 *dst, const u8 *src)
-{ 
+static void xtea_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
 	u32 y, z, sum = 0;
 	u32 limit = XTEA_DELTA * XTEA_ROUNDS;
-
-	struct xtea_ctx *ctx = ctx_arg;
+	struct xtea_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *in = (const __le32 *)src;
 	__le32 *out = (__le32 *)dst;
 
@@ -168,10 +166,10 @@ static void xtea_encrypt(void *ctx_arg, 
 	out[1] = cpu_to_le32(z);
 }
 
-static void xtea_decrypt(void *ctx_arg, u8 *dst, const u8 *src)
-{ 
+static void xtea_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
 	u32 y, z, sum;
-	struct tea_ctx *ctx = ctx_arg;
+	struct tea_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *in = (const __le32 *)src;
 	__le32 *out = (__le32 *)dst;
 
@@ -191,12 +189,11 @@ static void xtea_decrypt(void *ctx_arg, 
 }
 
 
-static void xeta_encrypt(void *ctx_arg, u8 *dst, const u8 *src)
-{ 
+static void xeta_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
 	u32 y, z, sum = 0;
 	u32 limit = XTEA_DELTA * XTEA_ROUNDS;
-
-	struct xtea_ctx *ctx = ctx_arg;
+	struct xtea_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *in = (const __le32 *)src;
 	__le32 *out = (__le32 *)dst;
 
@@ -213,10 +210,10 @@ static void xeta_encrypt(void *ctx_arg, 
 	out[1] = cpu_to_le32(z);
 }
 
-static void xeta_decrypt(void *ctx_arg, u8 *dst, const u8 *src)
-{ 
+static void xeta_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
 	u32 y, z, sum;
-	struct tea_ctx *ctx = ctx_arg;
+	struct tea_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *in = (const __le32 *)src;
 	__le32 *out = (__le32 *)dst;
 
diff -puN crypto/tgr192.c~git-cryptodev crypto/tgr192.c
--- a/crypto/tgr192.c~git-cryptodev
+++ a/crypto/tgr192.c
@@ -496,11 +496,10 @@ static void tgr192_transform(struct tgr1
 	tctx->c = c;
 }
 
-static void tgr192_init(void *ctx)
+static void tgr192_init(struct crypto_tfm *tfm)
 {
-	struct tgr192_ctx *tctx = ctx;
+	struct tgr192_ctx *tctx = crypto_tfm_ctx(tfm);
 
-	memset (tctx->hash, 0, 64);
 	tctx->a = 0x0123456789abcdefULL;
 	tctx->b = 0xfedcba9876543210ULL;
 	tctx->c = 0xf096a5b4c3b2e187ULL;
@@ -511,9 +510,10 @@ static void tgr192_init(void *ctx)
 
 /* Update the message digest with the contents
  * of INBUF with length INLEN. */
-static void tgr192_update(void *ctx, const u8 * inbuf, unsigned int len)
+static void tgr192_update(struct crypto_tfm *tfm, const u8 *inbuf,
+			  unsigned int len)
 {
-	struct tgr192_ctx *tctx = ctx;
+	struct tgr192_ctx *tctx = crypto_tfm_ctx(tfm);
 
 	if (tctx->count == 64) {	/* flush the buffer */
 		tgr192_transform(tctx, tctx->hash);
@@ -527,7 +527,7 @@ static void tgr192_update(void *ctx, con
 		for (; len && tctx->count < 64; len--) {
 			tctx->hash[tctx->count++] = *inbuf++;
 		}
-		tgr192_update(tctx, NULL, 0);
+		tgr192_update(tfm, NULL, 0);
 		if (!len) {
 			return;
 		}
@@ -549,15 +549,15 @@ static void tgr192_update(void *ctx, con
 
 
 /* The routine terminates the computation */
-static void tgr192_final(void *ctx, u8 * out)
+static void tgr192_final(struct crypto_tfm *tfm, u8 * out)
 {
-	struct tgr192_ctx *tctx = ctx;
+	struct tgr192_ctx *tctx = crypto_tfm_ctx(tfm);
 	__be64 *dst = (__be64 *)out;
 	__be64 *be64p;
 	__le32 *le32p;
 	u32 t, msb, lsb;
 
-	tgr192_update(tctx, NULL, 0); /* flush */ ;
+	tgr192_update(tfm, NULL, 0); /* flush */ ;
 
 	msb = 0;
 	t = tctx->nblocks;
@@ -585,7 +585,7 @@ static void tgr192_final(void *ctx, u8 *
 		while (tctx->count < 64) {
 			tctx->hash[tctx->count++] = 0;
 		}
-		tgr192_update(tctx, NULL, 0); /* flush */ ;
+		tgr192_update(tfm, NULL, 0); /* flush */ ;
 		memset(tctx->hash, 0, 56);    /* fill next block with zeroes */
 	}
 	/* append the 64 bit count */
@@ -601,22 +601,20 @@ static void tgr192_final(void *ctx, u8 *
 	dst[2] = be64p[2] = cpu_to_be64(tctx->c);
 }
 
-static void tgr160_final(void *ctx, u8 * out)
+static void tgr160_final(struct crypto_tfm *tfm, u8 * out)
 {
-	struct tgr192_ctx *wctx = ctx;
 	u8 D[64];
 
-	tgr192_final(wctx, D);
+	tgr192_final(tfm, D);
 	memcpy(out, D, TGR160_DIGEST_SIZE);
 	memset(D, 0, TGR192_DIGEST_SIZE);
 }
 
-static void tgr128_final(void *ctx, u8 * out)
+static void tgr128_final(struct crypto_tfm *tfm, u8 * out)
 {
-	struct tgr192_ctx *wctx = ctx;
 	u8 D[64];
 
-	tgr192_final(wctx, D);
+	tgr192_final(tfm, D);
 	memcpy(out, D, TGR128_DIGEST_SIZE);
 	memset(D, 0, TGR192_DIGEST_SIZE);
 }
@@ -627,6 +625,7 @@ static struct crypto_alg tgr192 = {
 	.cra_blocksize = TGR192_BLOCK_SIZE,
 	.cra_ctxsize = sizeof(struct tgr192_ctx),
 	.cra_module = THIS_MODULE,
+	.cra_alignmask = 7,
 	.cra_list = LIST_HEAD_INIT(tgr192.cra_list),
 	.cra_u = {.digest = {
 			     .dia_digestsize = TGR192_DIGEST_SIZE,
@@ -641,6 +640,7 @@ static struct crypto_alg tgr160 = {
 	.cra_blocksize = TGR192_BLOCK_SIZE,
 	.cra_ctxsize = sizeof(struct tgr192_ctx),
 	.cra_module = THIS_MODULE,
+	.cra_alignmask = 7,
 	.cra_list = LIST_HEAD_INIT(tgr160.cra_list),
 	.cra_u = {.digest = {
 			     .dia_digestsize = TGR160_DIGEST_SIZE,
@@ -655,6 +655,7 @@ static struct crypto_alg tgr128 = {
 	.cra_blocksize = TGR192_BLOCK_SIZE,
 	.cra_ctxsize = sizeof(struct tgr192_ctx),
 	.cra_module = THIS_MODULE,
+	.cra_alignmask = 7,
 	.cra_list = LIST_HEAD_INIT(tgr128.cra_list),
 	.cra_u = {.digest = {
 			     .dia_digestsize = TGR128_DIGEST_SIZE,
diff -puN crypto/twofish.c~git-cryptodev crypto/twofish.c
--- a/crypto/twofish.c~git-cryptodev
+++ a/crypto/twofish.c
@@ -39,6 +39,7 @@
  */
 
 #include <asm/byteorder.h>
+#include <crypto/twofish.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -46,534 +47,6 @@
 #include <linux/crypto.h>
 #include <linux/bitops.h>
 
-
-/* The large precomputed tables for the Twofish cipher (twofish.c)
- * Taken from the same source as twofish.c
- * Marc Mutz <Marc@Mutz.com>
- */
-
-/* These two tables are the q0 and q1 permutations, exactly as described in
- * the Twofish paper. */
-
-static const u8 q0[256] = {
-   0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78,
-   0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C,
-   0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30,
-   0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82,
-   0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE,
-   0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B,
-   0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45,
-   0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7,
-   0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF,
-   0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8,
-   0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED,
-   0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90,
-   0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B,
-   0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B,
-   0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F,
-   0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A,
-   0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17,
-   0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72,
-   0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68,
-   0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4,
-   0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42,
-   0x4A, 0x5E, 0xC1, 0xE0
-};
-
-static const u8 q1[256] = {
-   0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B,
-   0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1,
-   0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B,
-   0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5,
-   0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54,
-   0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96,
-   0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7,
-   0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8,
-   0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF,
-   0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9,
-   0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D,
-   0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E,
-   0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21,
-   0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01,
-   0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E,
-   0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64,
-   0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44,
-   0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E,
-   0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B,
-   0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9,
-   0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56,
-   0x55, 0x09, 0xBE, 0x91
-};
-
-/* These MDS tables are actually tables of MDS composed with q0 and q1,
- * because it is only ever used that way and we can save some time by
- * precomputing.  Of course the main saving comes from precomputing the
- * GF(2^8) multiplication involved in the MDS matrix multiply; by looking
- * things up in these tables we reduce the matrix multiply to four lookups
- * and three XORs.  Semi-formally, the definition of these tables is:
- * mds[0][i] = MDS (q1[i] 0 0 0)^T  mds[1][i] = MDS (0 q0[i] 0 0)^T
- * mds[2][i] = MDS (0 0 q1[i] 0)^T  mds[3][i] = MDS (0 0 0 q0[i])^T
- * where ^T means "transpose", the matrix multiply is performed in GF(2^8)
- * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described
- * by Schneier et al, and I'm casually glossing over the byte/word
- * conversion issues. */
-
-static const u32 mds[4][256] = {
-   {0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B,
-    0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B,
-    0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32,
-    0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1,
-    0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA,
-    0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B,
-    0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1,
-    0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5,
-    0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490,
-    0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154,
-    0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0,
-    0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796,
-    0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228,
-    0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7,
-    0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3,
-    0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8,
-    0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477,
-    0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF,
-    0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C,
-    0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9,
-    0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA,
-    0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D,
-    0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72,
-    0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E,
-    0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76,
-    0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321,
-    0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39,
-    0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01,
-    0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D,
-    0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E,
-    0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5,
-    0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64,
-    0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7,
-    0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544,
-    0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E,
-    0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E,
-    0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A,
-    0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B,
-    0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2,
-    0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9,
-    0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504,
-    0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756,
-    0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91},
-
-   {0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252,
-    0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A,
-    0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020,
-    0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141,
-    0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444,
-    0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424,
-    0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A,
-    0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757,
-    0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383,
-    0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A,
-    0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9,
-    0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656,
-    0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1,
-    0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898,
-    0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414,
-    0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3,
-    0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1,
-    0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989,
-    0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5,
-    0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282,
-    0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E,
-    0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E,
-    0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202,
-    0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC,
-    0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565,
-    0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A,
-    0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808,
-    0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272,
-    0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A,
-    0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969,
-    0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505,
-    0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5,
-    0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D,
-    0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343,
-    0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF,
-    0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3,
-    0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F,
-    0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646,
-    0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6,
-    0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF,
-    0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A,
-    0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7,
-    0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8},
-
-   {0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B,
-    0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F,
-    0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A,
-    0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783,
-    0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70,
-    0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3,
-    0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB,
-    0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA,
-    0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4,
-    0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41,
-    0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C,
-    0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07,
-    0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622,
-    0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18,
-    0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035,
-    0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96,
-    0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84,
-    0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E,
-    0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F,
-    0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD,
-    0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558,
-    0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40,
-    0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA,
-    0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85,
-    0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF,
-    0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773,
-    0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D,
-    0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B,
-    0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C,
-    0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19,
-    0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086,
-    0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D,
-    0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74,
-    0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755,
-    0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691,
-    0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D,
-    0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4,
-    0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53,
-    0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E,
-    0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9,
-    0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705,
-    0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7,
-    0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF},
-
-   {0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98,
-    0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866,
-    0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643,
-    0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77,
-    0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9,
-    0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C,
-    0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3,
-    0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216,
-    0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F,
-    0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25,
-    0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF,
-    0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7,
-    0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4,
-    0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E,
-    0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA,
-    0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C,
-    0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12,
-    0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A,
-    0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D,
-    0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE,
-    0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A,
-    0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C,
-    0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B,
-    0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4,
-    0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B,
-    0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3,
-    0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE,
-    0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB,
-    0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85,
-    0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA,
-    0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E,
-    0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8,
-    0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33,
-    0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC,
-    0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718,
-    0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA,
-    0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8,
-    0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872,
-    0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882,
-    0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D,
-    0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10,
-    0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6,
-    0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8}
-};
-
-/* The exp_to_poly and poly_to_exp tables are used to perform efficient
- * operations in GF(2^8) represented as GF(2)[x]/w(x) where
- * w(x)=x^8+x^6+x^3+x^2+1.  We care about doing that because it's part of the
- * definition of the RS matrix in the key schedule.  Elements of that field
- * are polynomials of degree not greater than 7 and all coefficients 0 or 1,
- * which can be represented naturally by bytes (just substitute x=2).  In that
- * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8)
- * multiplication is inefficient without hardware support.  To multiply
- * faster, I make use of the fact x is a generator for the nonzero elements,
- * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for
- * some n in 0..254.  Note that that caret is exponentiation in GF(2^8),
- * *not* polynomial notation.  So if I want to compute pq where p and q are
- * in GF(2^8), I can just say:
- *    1. if p=0 or q=0 then pq=0
- *    2. otherwise, find m and n such that p=x^m and q=x^n
- *    3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq
- * The translations in steps 2 and 3 are looked up in the tables
- * poly_to_exp (for step 2) and exp_to_poly (for step 3).  To see this
- * in action, look at the CALC_S macro.  As additional wrinkles, note that
- * one of my operands is always a constant, so the poly_to_exp lookup on it
- * is done in advance; I included the original values in the comments so
- * readers can have some chance of recognizing that this *is* the RS matrix
- * from the Twofish paper.  I've only included the table entries I actually
- * need; I never do a lookup on a variable input of zero and the biggest
- * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll
- * never sum to more than 491.	I'm repeating part of the exp_to_poly table
- * so that I don't have to do mod-255 reduction in the exponent arithmetic.
- * Since I know my constant operands are never zero, I only have to worry
- * about zero values in the variable operand, and I do it with a simple
- * conditional branch.	I know conditionals are expensive, but I couldn't
- * see a non-horrible way of avoiding them, and I did manage to group the
- * statements so that each if covers four group multiplications. */
-
-static const u8 poly_to_exp[255] = {
-   0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19,
-   0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A,
-   0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C,
-   0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B,
-   0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47,
-   0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D,
-   0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8,
-   0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C,
-   0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83,
-   0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48,
-   0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26,
-   0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E,
-   0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3,
-   0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9,
-   0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A,
-   0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D,
-   0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75,
-   0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84,
-   0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64,
-   0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49,
-   0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF,
-   0x85, 0xC8, 0xA1
-};
-
-static const u8 exp_to_poly[492] = {
-   0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2,
-   0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03,
-   0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6,
-   0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A,
-   0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63,
-   0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C,
-   0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07,
-   0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88,
-   0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12,
-   0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7,
-   0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C,
-   0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8,
-   0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25,
-   0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A,
-   0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE,
-   0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC,
-   0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E,
-   0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92,
-   0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89,
-   0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB,
-   0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1,
-   0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D,
-   0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC,
-   0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3,
-   0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52,
-   0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0,
-   0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1,
-   0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A,
-   0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11,
-   0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51,
-   0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66,
-   0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB,
-   0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19,
-   0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D,
-   0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56,
-   0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE,
-   0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9,
-   0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE,
-   0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41,
-   0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E,
-   0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB
-};
-
-
-/* The table constants are indices of
- * S-box entries, preprocessed through q0 and q1. */
-static const u8 calc_sb_tbl[512] = {
-    0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4,
-    0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8,
-    0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B,
-    0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B,
-    0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD,
-    0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1,
-    0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B,
-    0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F,
-    0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B,
-    0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D,
-    0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E,
-    0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5,
-    0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14,
-    0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3,
-    0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54,
-    0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51,
-    0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A,
-    0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96,
-    0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10,
-    0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C,
-    0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7,
-    0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70,
-    0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB,
-    0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8,
-    0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF,
-    0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC,
-    0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF,
-    0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2,
-    0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82,
-    0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9,
-    0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97,
-    0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17,
-    0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D,
-    0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3,
-    0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C,
-    0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E,
-    0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F,
-    0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49,
-    0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21,
-    0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9,
-    0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD,
-    0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01,
-    0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F,
-    0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48,
-    0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E,
-    0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19,
-    0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57,
-    0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64,
-    0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE,
-    0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5,
-    0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44,
-    0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69,
-    0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15,
-    0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E,
-    0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34,
-    0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC,
-    0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B,
-    0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB,
-    0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52,
-    0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9,
-    0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4,
-    0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2,
-    0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56,
-    0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91
-};
-
-/* Macro to perform one column of the RS matrix multiplication.  The
- * parameters a, b, c, and d are the four bytes of output; i is the index
- * of the key bytes, and w, x, y, and z, are the column of constants from
- * the RS matrix, preprocessed through the poly_to_exp table. */
-
-#define CALC_S(a, b, c, d, i, w, x, y, z) \
-   if (key[i]) { \
-      tmp = poly_to_exp[key[i] - 1]; \
-      (a) ^= exp_to_poly[tmp + (w)]; \
-      (b) ^= exp_to_poly[tmp + (x)]; \
-      (c) ^= exp_to_poly[tmp + (y)]; \
-      (d) ^= exp_to_poly[tmp + (z)]; \
-   }
-
-/* Macros to calculate the key-dependent S-boxes for a 128-bit key using
- * the S vector from CALC_S.  CALC_SB_2 computes a single entry in all
- * four S-boxes, where i is the index of the entry to compute, and a and b
- * are the index numbers preprocessed through the q0 and q1 tables
- * respectively. */
-
-#define CALC_SB_2(i, a, b) \
-   ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \
-   ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \
-   ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \
-   ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh]
-
-/* Macro exactly like CALC_SB_2, but for 192-bit keys. */
-
-#define CALC_SB192_2(i, a, b) \
-   ctx->s[0][i] = mds[0][q0[q0[(b) ^ sa] ^ se] ^ si]; \
-   ctx->s[1][i] = mds[1][q0[q1[(b) ^ sb] ^ sf] ^ sj]; \
-   ctx->s[2][i] = mds[2][q1[q0[(a) ^ sc] ^ sg] ^ sk]; \
-   ctx->s[3][i] = mds[3][q1[q1[(a) ^ sd] ^ sh] ^ sl];
-
-/* Macro exactly like CALC_SB_2, but for 256-bit keys. */
-
-#define CALC_SB256_2(i, a, b) \
-   ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \
-   ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \
-   ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \
-   ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp];
-
-/* Macros to calculate the whitening and round subkeys.  CALC_K_2 computes the
- * last two stages of the h() function for a given index (either 2i or 2i+1).
- * a, b, c, and d are the four bytes going into the last two stages.  For
- * 128-bit keys, this is the entire h() function and a and c are the index
- * preprocessed through q0 and q1 respectively; for longer keys they are the
- * output of previous stages.  j is the index of the first key byte to use.
- * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2
- * twice, doing the Pseudo-Hadamard Transform, and doing the necessary
- * rotations.  Its parameters are: a, the array to write the results into,
- * j, the index of the first output entry, k and l, the preprocessed indices
- * for index 2i, and m and n, the preprocessed indices for index 2i+1.
- * CALC_K192_2 expands CALC_K_2 to handle 192-bit keys, by doing an
- * additional lookup-and-XOR stage.  The parameters a, b, c and d are the
- * four bytes going into the last three stages.  For 192-bit keys, c = d
- * are the index preprocessed through q0, and a = b are the index
- * preprocessed through q1; j is the index of the first key byte to use.
- * CALC_K192 is identical to CALC_K but for using the CALC_K192_2 macro
- * instead of CALC_K_2.
- * CALC_K256_2 expands CALC_K192_2 to handle 256-bit keys, by doing an
- * additional lookup-and-XOR stage.  The parameters a and b are the index
- * preprocessed through q0 and q1 respectively; j is the index of the first
- * key byte to use.  CALC_K256 is identical to CALC_K but for using the
- * CALC_K256_2 macro instead of CALC_K_2. */
-
-#define CALC_K_2(a, b, c, d, j) \
-     mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \
-   ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \
-   ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \
-   ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]]
-
-#define CALC_K(a, j, k, l, m, n) \
-   x = CALC_K_2 (k, l, k, l, 0); \
-   y = CALC_K_2 (m, n, m, n, 4); \
-   y = rol32(y, 8); \
-   x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = rol32(y, 9)
-
-#define CALC_K192_2(a, b, c, d, j) \
-   CALC_K_2 (q0[a ^ key[(j) + 16]], \
-	     q1[b ^ key[(j) + 17]], \
-	     q0[c ^ key[(j) + 18]], \
-	     q1[d ^ key[(j) + 19]], j)
-
-#define CALC_K192(a, j, k, l, m, n) \
-   x = CALC_K192_2 (l, l, k, k, 0); \
-   y = CALC_K192_2 (n, n, m, m, 4); \
-   y = rol32(y, 8); \
-   x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = rol32(y, 9)
-
-#define CALC_K256_2(a, b, j) \
-   CALC_K192_2 (q1[b ^ key[(j) + 24]], \
-	        q1[a ^ key[(j) + 25]], \
-	        q0[a ^ key[(j) + 26]], \
-	        q0[b ^ key[(j) + 27]], j)
-
-#define CALC_K256(a, j, k, l, m, n) \
-   x = CALC_K256_2 (k, l, 0); \
-   y = CALC_K256_2 (m, n, 4); \
-   y = rol32(y, 8); \
-   x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = rol32(y, 9)
-
-
 /* Macros to compute the g() function in the encryption and decryption
  * rounds.  G1 is the straight g() function; G2 includes the 8-bit
  * rotation for the high 32-bit word. */
@@ -630,181 +103,12 @@ static const u8 calc_sb_tbl[512] = {
    x ^= ctx->w[m]; \
    dst[n] = cpu_to_le32(x)
 
-#define TF_MIN_KEY_SIZE 16
-#define TF_MAX_KEY_SIZE 32
-#define TF_BLOCK_SIZE 16
-
-/* Structure for an expanded Twofish key.  s contains the key-dependent
- * S-boxes composed with the MDS matrix; w contains the eight "whitening"
- * subkeys, K[0] through K[7].	k holds the remaining, "round" subkeys.  Note
- * that k[i] corresponds to what the Twofish paper calls K[i+8]. */
-struct twofish_ctx {
-   u32 s[4][256], w[8], k[32];
-};
-
-/* Perform the key setup. */
-static int twofish_setkey(void *cx, const u8 *key,
-                          unsigned int key_len, u32 *flags)
-{
-	
-	struct twofish_ctx *ctx = cx;
 
-	int i, j, k;
-
-	/* Temporaries for CALC_K. */
-	u32 x, y;
-
-	/* The S vector used to key the S-boxes, split up into individual bytes.
-	 * 128-bit keys use only sa through sh; 256-bit use all of them. */
-	u8 sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0;
-	u8 si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0;
-
-	/* Temporary for CALC_S. */
-	u8 tmp;
-
-	/* Check key length. */
-	if (key_len != 16 && key_len != 24 && key_len != 32)
-	{
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL; /* unsupported key length */
-	}
-
-	/* Compute the first two words of the S vector.  The magic numbers are
-	 * the entries of the RS matrix, preprocessed through poly_to_exp. The
-	 * numbers in the comments are the original (polynomial form) matrix
-	 * entries. */
-	CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
-	CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
-	CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
-	CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
-	CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
-	CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
-	CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
-	CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
-	CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
-	CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
-	CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
-	CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
-	CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
-	CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
-	CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
-	CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
-
-	if (key_len == 24 || key_len == 32) { /* 192- or 256-bit key */
-		/* Calculate the third word of the S vector */
-		CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
-		CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
-		CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
-		CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
-		CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
-		CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
-		CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
-		CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
-	}
-
-	if (key_len == 32) { /* 256-bit key */
-		/* Calculate the fourth word of the S vector */
-		CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
-		CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
-		CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
-		CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
-		CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
-		CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
-		CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
-		CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
-
-		/* Compute the S-boxes. */
-		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
-			CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
-		}
-
-		/* Calculate whitening and round subkeys.  The constants are
-		 * indices of subkeys, preprocessed through q0 and q1. */
-		CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-		CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-		CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-		CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-		CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-		CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-		CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-		CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-		CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-		CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-		CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71);
-		CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-		CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F);
-		CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-		CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-		CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F);
-		CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-		CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-		CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00);
-		CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
-	} else if (key_len == 24) { /* 192-bit key */
-		/* Compute the S-boxes. */
-		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
-		        CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
-		}
-
-		/* Calculate whitening and round subkeys.  The constants are
-		 * indices of subkeys, preprocessed through q0 and q1. */
-		CALC_K192 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-		CALC_K192 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-		CALC_K192 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-		CALC_K192 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-		CALC_K192 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-		CALC_K192 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-		CALC_K192 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-		CALC_K192 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-		CALC_K192 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-		CALC_K192 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-		CALC_K192 (k, 12, 0x18, 0x37, 0xF7, 0x71);
-		CALC_K192 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-		CALC_K192 (k, 16, 0x43, 0x30, 0x75, 0x0F);
-		CALC_K192 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-		CALC_K192 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-		CALC_K192 (k, 22, 0x94, 0x06, 0x48, 0x3F);
-		CALC_K192 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-		CALC_K192 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-		CALC_K192 (k, 28, 0x84, 0x8A, 0x54, 0x00);
-		CALC_K192 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
-	} else { /* 128-bit key */
-		/* Compute the S-boxes. */
-		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
-			CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
-		}
-
-		/* Calculate whitening and round subkeys.  The constants are
-		 * indices of subkeys, preprocessed through q0 and q1. */
-		CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-		CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-		CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-		CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-		CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-		CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-		CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-		CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-		CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-		CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-		CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71);
-		CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-		CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F);
-		CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-		CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-		CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F);
-		CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-		CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-		CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00);
-		CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
-	}
-
-	return 0;
-}
 
 /* Encrypt one block.  in and out may be the same. */
-static void twofish_encrypt(void *cx, u8 *out, const u8 *in)
+static void twofish_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct twofish_ctx *ctx = cx;
+	struct twofish_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *src = (const __le32 *)in;
 	__le32 *dst = (__le32 *)out;
 
@@ -839,9 +143,9 @@ static void twofish_encrypt(void *cx, u8
 }
 
 /* Decrypt one block.  in and out may be the same. */
-static void twofish_decrypt(void *cx, u8 *out, const u8 *in)
+static void twofish_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct twofish_ctx *ctx = cx;
+	struct twofish_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *src = (const __le32 *)in;
 	__le32 *dst = (__le32 *)out;
   
@@ -877,6 +181,8 @@ static void twofish_decrypt(void *cx, u8
 
 static struct crypto_alg alg = {
 	.cra_name           =   "twofish",
+	.cra_driver_name    =   "twofish-generic",
+	.cra_priority       =   100,
 	.cra_flags          =   CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize      =   TF_BLOCK_SIZE,
 	.cra_ctxsize        =   sizeof(struct twofish_ctx),
diff -puN /dev/null crypto/twofish_common.c
--- /dev/null
+++ a/crypto/twofish_common.c
@@ -0,0 +1,744 @@
+/*
+ * Common Twofish algorithm parts shared between the c and assembler
+ * implementations
+ *
+ * Originally Twofish for GPG
+ * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
+ * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
+ *
+ * The original author has disclaimed all copyright interest in this
+ * code and thus put it in the public domain. The subsequent authors
+ * have put this under the GNU General Public License.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ */
+
+#include <crypto/twofish.h>
+#include <linux/bitops.h>
+#include <linux/crypto.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+
+/* The large precomputed tables for the Twofish cipher (twofish.c)
+ * Taken from the same source as twofish.c
+ * Marc Mutz <Marc@Mutz.com>
+ */
+
+/* These two tables are the q0 and q1 permutations, exactly as described in
+ * the Twofish paper. */
+
+static const u8 q0[256] = {
+	0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78,
+	0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C,
+	0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30,
+	0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82,
+	0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE,
+	0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B,
+	0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45,
+	0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7,
+	0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF,
+	0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8,
+	0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED,
+	0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90,
+	0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B,
+	0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B,
+	0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F,
+	0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A,
+	0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17,
+	0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72,
+	0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68,
+	0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4,
+	0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42,
+	0x4A, 0x5E, 0xC1, 0xE0
+};
+
+static const u8 q1[256] = {
+	0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B,
+	0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1,
+	0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B,
+	0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5,
+	0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54,
+	0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96,
+	0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7,
+	0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8,
+	0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF,
+	0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9,
+	0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D,
+	0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E,
+	0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21,
+	0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01,
+	0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E,
+	0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64,
+	0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44,
+	0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E,
+	0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B,
+	0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9,
+	0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56,
+	0x55, 0x09, 0xBE, 0x91
+};
+
+/* These MDS tables are actually tables of MDS composed with q0 and q1,
+ * because it is only ever used that way and we can save some time by
+ * precomputing.  Of course the main saving comes from precomputing the
+ * GF(2^8) multiplication involved in the MDS matrix multiply; by looking
+ * things up in these tables we reduce the matrix multiply to four lookups
+ * and three XORs.  Semi-formally, the definition of these tables is:
+ * mds[0][i] = MDS (q1[i] 0 0 0)^T  mds[1][i] = MDS (0 q0[i] 0 0)^T
+ * mds[2][i] = MDS (0 0 q1[i] 0)^T  mds[3][i] = MDS (0 0 0 q0[i])^T
+ * where ^T means "transpose", the matrix multiply is performed in GF(2^8)
+ * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described
+ * by Schneier et al, and I'm casually glossing over the byte/word
+ * conversion issues. */
+
+static const u32 mds[4][256] = {
+	{
+	0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B,
+	0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B,
+	0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32,
+	0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1,
+	0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA,
+	0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B,
+	0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1,
+	0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5,
+	0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490,
+	0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154,
+	0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0,
+	0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796,
+	0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228,
+	0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7,
+	0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3,
+	0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8,
+	0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477,
+	0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF,
+	0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C,
+	0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9,
+	0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA,
+	0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D,
+	0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72,
+	0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E,
+	0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76,
+	0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321,
+	0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39,
+	0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01,
+	0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D,
+	0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E,
+	0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5,
+	0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64,
+	0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7,
+	0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544,
+	0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E,
+	0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E,
+	0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A,
+	0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B,
+	0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2,
+	0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9,
+	0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504,
+	0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756,
+	0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91},
+
+	{
+	0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252,
+	0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A,
+	0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020,
+	0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141,
+	0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444,
+	0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424,
+	0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A,
+	0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757,
+	0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383,
+	0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A,
+	0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9,
+	0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656,
+	0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1,
+	0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898,
+	0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414,
+	0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3,
+	0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1,
+	0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989,
+	0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5,
+	0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282,
+	0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E,
+	0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E,
+	0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202,
+	0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC,
+	0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565,
+	0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A,
+	0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808,
+	0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272,
+	0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A,
+	0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969,
+	0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505,
+	0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5,
+	0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D,
+	0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343,
+	0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF,
+	0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3,
+	0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F,
+	0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646,
+	0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6,
+	0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF,
+	0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A,
+	0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7,
+	0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8},
+
+	{
+	0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B,
+	0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F,
+	0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A,
+	0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783,
+	0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70,
+	0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3,
+	0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB,
+	0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA,
+	0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4,
+	0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41,
+	0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C,
+	0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07,
+	0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622,
+	0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18,
+	0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035,
+	0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96,
+	0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84,
+	0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E,
+	0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F,
+	0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD,
+	0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558,
+	0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40,
+	0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA,
+	0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85,
+	0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF,
+	0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773,
+	0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D,
+	0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B,
+	0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C,
+	0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19,
+	0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086,
+	0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D,
+	0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74,
+	0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755,
+	0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691,
+	0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D,
+	0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4,
+	0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53,
+	0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E,
+	0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9,
+	0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705,
+	0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7,
+	0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF},
+
+	{
+	0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98,
+	0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866,
+	0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643,
+	0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77,
+	0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9,
+	0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C,
+	0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3,
+	0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216,
+	0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F,
+	0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25,
+	0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF,
+	0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7,
+	0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4,
+	0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E,
+	0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA,
+	0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C,
+	0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12,
+	0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A,
+	0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D,
+	0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE,
+	0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A,
+	0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C,
+	0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B,
+	0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4,
+	0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B,
+	0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3,
+	0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE,
+	0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB,
+	0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85,
+	0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA,
+	0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E,
+	0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8,
+	0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33,
+	0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC,
+	0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718,
+	0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA,
+	0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8,
+	0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872,
+	0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882,
+	0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D,
+	0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10,
+	0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6,
+	0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8}
+};
+
+/* The exp_to_poly and poly_to_exp tables are used to perform efficient
+ * operations in GF(2^8) represented as GF(2)[x]/w(x) where
+ * w(x)=x^8+x^6+x^3+x^2+1.  We care about doing that because it's part of the
+ * definition of the RS matrix in the key schedule.  Elements of that field
+ * are polynomials of degree not greater than 7 and all coefficients 0 or 1,
+ * which can be represented naturally by bytes (just substitute x=2).  In that
+ * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8)
+ * multiplication is inefficient without hardware support.  To multiply
+ * faster, I make use of the fact x is a generator for the nonzero elements,
+ * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for
+ * some n in 0..254.  Note that that caret is exponentiation in GF(2^8),
+ * *not* polynomial notation.  So if I want to compute pq where p and q are
+ * in GF(2^8), I can just say:
+ *    1. if p=0 or q=0 then pq=0
+ *    2. otherwise, find m and n such that p=x^m and q=x^n
+ *    3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq
+ * The translations in steps 2 and 3 are looked up in the tables
+ * poly_to_exp (for step 2) and exp_to_poly (for step 3).  To see this
+ * in action, look at the CALC_S macro.  As additional wrinkles, note that
+ * one of my operands is always a constant, so the poly_to_exp lookup on it
+ * is done in advance; I included the original values in the comments so
+ * readers can have some chance of recognizing that this *is* the RS matrix
+ * from the Twofish paper.  I've only included the table entries I actually
+ * need; I never do a lookup on a variable input of zero and the biggest
+ * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll
+ * never sum to more than 491.	I'm repeating part of the exp_to_poly table
+ * so that I don't have to do mod-255 reduction in the exponent arithmetic.
+ * Since I know my constant operands are never zero, I only have to worry
+ * about zero values in the variable operand, and I do it with a simple
+ * conditional branch.	I know conditionals are expensive, but I couldn't
+ * see a non-horrible way of avoiding them, and I did manage to group the
+ * statements so that each if covers four group multiplications. */
+
+static const u8 poly_to_exp[255] = {
+	0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19,
+	0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A,
+	0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C,
+	0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B,
+	0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47,
+	0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D,
+	0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8,
+	0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C,
+	0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83,
+	0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48,
+	0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26,
+	0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E,
+	0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3,
+	0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9,
+	0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A,
+	0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D,
+	0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75,
+	0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84,
+	0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64,
+	0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49,
+	0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF,
+	0x85, 0xC8, 0xA1
+};
+
+static const u8 exp_to_poly[492] = {
+	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2,
+	0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03,
+	0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6,
+	0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A,
+	0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63,
+	0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C,
+	0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07,
+	0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88,
+	0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12,
+	0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7,
+	0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C,
+	0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8,
+	0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25,
+	0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A,
+	0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE,
+	0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC,
+	0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E,
+	0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92,
+	0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89,
+	0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB,
+	0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1,
+	0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D,
+	0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC,
+	0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3,
+	0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52,
+	0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0,
+	0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1,
+	0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A,
+	0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11,
+	0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51,
+	0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66,
+	0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB,
+	0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19,
+	0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D,
+	0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56,
+	0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE,
+	0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9,
+	0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE,
+	0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41,
+	0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E,
+	0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB
+};
+
+
+/* The table constants are indices of
+ * S-box entries, preprocessed through q0 and q1. */
+static const u8 calc_sb_tbl[512] = {
+	0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4,
+	0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8,
+	0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B,
+	0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B,
+	0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD,
+	0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1,
+	0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B,
+	0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F,
+	0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B,
+	0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D,
+	0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E,
+	0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5,
+	0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14,
+	0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3,
+	0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54,
+	0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51,
+	0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A,
+	0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96,
+	0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10,
+	0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C,
+	0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7,
+	0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70,
+	0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB,
+	0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8,
+	0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF,
+	0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC,
+	0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF,
+	0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2,
+	0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82,
+	0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9,
+	0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97,
+	0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17,
+	0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D,
+	0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3,
+	0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C,
+	0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E,
+	0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F,
+	0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49,
+	0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21,
+	0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9,
+	0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD,
+	0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01,
+	0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F,
+	0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48,
+	0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E,
+	0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19,
+	0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57,
+	0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64,
+	0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE,
+	0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5,
+	0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44,
+	0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69,
+	0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15,
+	0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E,
+	0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34,
+	0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC,
+	0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B,
+	0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB,
+	0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52,
+	0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9,
+	0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4,
+	0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2,
+	0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56,
+	0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91
+};
+
+/* Macro to perform one column of the RS matrix multiplication.  The
+ * parameters a, b, c, and d are the four bytes of output; i is the index
+ * of the key bytes, and w, x, y, and z, are the column of constants from
+ * the RS matrix, preprocessed through the poly_to_exp table. */
+
+#define CALC_S(a, b, c, d, i, w, x, y, z) \
+   if (key[i]) { \
+      tmp = poly_to_exp[key[i] - 1]; \
+      (a) ^= exp_to_poly[tmp + (w)]; \
+      (b) ^= exp_to_poly[tmp + (x)]; \
+      (c) ^= exp_to_poly[tmp + (y)]; \
+      (d) ^= exp_to_poly[tmp + (z)]; \
+   }
+
+/* Macros to calculate the key-dependent S-boxes for a 128-bit key using
+ * the S vector from CALC_S.  CALC_SB_2 computes a single entry in all
+ * four S-boxes, where i is the index of the entry to compute, and a and b
+ * are the index numbers preprocessed through the q0 and q1 tables
+ * respectively. */
+
+#define CALC_SB_2(i, a, b) \
+   ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \
+   ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \
+   ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \
+   ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh]
+
+/* Macro exactly like CALC_SB_2, but for 192-bit keys. */
+
+#define CALC_SB192_2(i, a, b) \
+   ctx->s[0][i] = mds[0][q0[q0[(b) ^ sa] ^ se] ^ si]; \
+   ctx->s[1][i] = mds[1][q0[q1[(b) ^ sb] ^ sf] ^ sj]; \
+   ctx->s[2][i] = mds[2][q1[q0[(a) ^ sc] ^ sg] ^ sk]; \
+   ctx->s[3][i] = mds[3][q1[q1[(a) ^ sd] ^ sh] ^ sl];
+
+/* Macro exactly like CALC_SB_2, but for 256-bit keys. */
+
+#define CALC_SB256_2(i, a, b) \
+   ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \
+   ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \
+   ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \
+   ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp];
+
+/* Macros to calculate the whitening and round subkeys.  CALC_K_2 computes the
+ * last two stages of the h() function for a given index (either 2i or 2i+1).
+ * a, b, c, and d are the four bytes going into the last two stages.  For
+ * 128-bit keys, this is the entire h() function and a and c are the index
+ * preprocessed through q0 and q1 respectively; for longer keys they are the
+ * output of previous stages.  j is the index of the first key byte to use.
+ * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2
+ * twice, doing the Pseudo-Hadamard Transform, and doing the necessary
+ * rotations.  Its parameters are: a, the array to write the results into,
+ * j, the index of the first output entry, k and l, the preprocessed indices
+ * for index 2i, and m and n, the preprocessed indices for index 2i+1.
+ * CALC_K192_2 expands CALC_K_2 to handle 192-bit keys, by doing an
+ * additional lookup-and-XOR stage.  The parameters a, b, c and d are the
+ * four bytes going into the last three stages.  For 192-bit keys, c = d
+ * are the index preprocessed through q0, and a = b are the index
+ * preprocessed through q1; j is the index of the first key byte to use.
+ * CALC_K192 is identical to CALC_K but for using the CALC_K192_2 macro
+ * instead of CALC_K_2.
+ * CALC_K256_2 expands CALC_K192_2 to handle 256-bit keys, by doing an
+ * additional lookup-and-XOR stage.  The parameters a and b are the index
+ * preprocessed through q0 and q1 respectively; j is the index of the first
+ * key byte to use.  CALC_K256 is identical to CALC_K but for using the
+ * CALC_K256_2 macro instead of CALC_K_2. */
+
+#define CALC_K_2(a, b, c, d, j) \
+     mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \
+   ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \
+   ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \
+   ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]]
+
+#define CALC_K(a, j, k, l, m, n) \
+   x = CALC_K_2 (k, l, k, l, 0); \
+   y = CALC_K_2 (m, n, m, n, 4); \
+   y = rol32(y, 8); \
+   x += y; y += x; ctx->a[j] = x; \
+   ctx->a[(j) + 1] = rol32(y, 9)
+
+#define CALC_K192_2(a, b, c, d, j) \
+   CALC_K_2 (q0[a ^ key[(j) + 16]], \
+	     q1[b ^ key[(j) + 17]], \
+	     q0[c ^ key[(j) + 18]], \
+	     q1[d ^ key[(j) + 19]], j)
+
+#define CALC_K192(a, j, k, l, m, n) \
+   x = CALC_K192_2 (l, l, k, k, 0); \
+   y = CALC_K192_2 (n, n, m, m, 4); \
+   y = rol32(y, 8); \
+   x += y; y += x; ctx->a[j] = x; \
+   ctx->a[(j) + 1] = rol32(y, 9)
+
+#define CALC_K256_2(a, b, j) \
+   CALC_K192_2 (q1[b ^ key[(j) + 24]], \
+	        q1[a ^ key[(j) + 25]], \
+	        q0[a ^ key[(j) + 26]], \
+	        q0[b ^ key[(j) + 27]], j)
+
+#define CALC_K256(a, j, k, l, m, n) \
+   x = CALC_K256_2 (k, l, 0); \
+   y = CALC_K256_2 (m, n, 4); \
+   y = rol32(y, 8); \
+   x += y; y += x; ctx->a[j] = x; \
+   ctx->a[(j) + 1] = rol32(y, 9)
+
+/* Perform the key setup. */
+int twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
+		   unsigned int key_len, u32 *flags)
+{
+
+	struct twofish_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	int i, j, k;
+
+	/* Temporaries for CALC_K. */
+	u32 x, y;
+
+	/* The S vector used to key the S-boxes, split up into individual bytes.
+	 * 128-bit keys use only sa through sh; 256-bit use all of them. */
+	u8 sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0;
+	u8 si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0;
+
+	/* Temporary for CALC_S. */
+	u8 tmp;
+
+	/* Check key length. */
+	if (key_len != 16 && key_len != 24 && key_len != 32)
+	{
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL; /* unsupported key length */
+	}
+
+	/* Compute the first two words of the S vector.  The magic numbers are
+	 * the entries of the RS matrix, preprocessed through poly_to_exp. The
+	 * numbers in the comments are the original (polynomial form) matrix
+	 * entries. */
+	CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+	CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+	CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+	CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+	CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+	CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+	CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+	CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+	CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+	CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+	CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+	CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+	CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+	CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+	CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+	CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+
+	if (key_len == 24 || key_len == 32) { /* 192- or 256-bit key */
+		/* Calculate the third word of the S vector */
+		CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+		CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+		CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+		CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+		CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+		CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+		CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+		CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+	}
+
+	if (key_len == 32) { /* 256-bit key */
+		/* Calculate the fourth word of the S vector */
+		CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+		CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+		CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+		CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+		CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+		CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+		CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+		CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+
+		/* Compute the S-boxes. */
+		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
+			CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
+		}
+
+		/* Calculate whitening and round subkeys.  The constants are
+		 * indices of subkeys, preprocessed through q0 and q1. */
+		CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
+		CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
+		CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
+		CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
+		CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
+		CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
+		CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
+		CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
+		CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
+		CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
+		CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71);
+		CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
+		CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F);
+		CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
+		CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
+		CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F);
+		CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
+		CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
+		CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00);
+		CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+	} else if (key_len == 24) { /* 192-bit key */
+		/* Compute the S-boxes. */
+		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
+		        CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
+		}
+
+		/* Calculate whitening and round subkeys.  The constants are
+		 * indices of subkeys, preprocessed through q0 and q1. */
+		CALC_K192 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
+		CALC_K192 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
+		CALC_K192 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
+		CALC_K192 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
+		CALC_K192 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
+		CALC_K192 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
+		CALC_K192 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
+		CALC_K192 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
+		CALC_K192 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
+		CALC_K192 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
+		CALC_K192 (k, 12, 0x18, 0x37, 0xF7, 0x71);
+		CALC_K192 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
+		CALC_K192 (k, 16, 0x43, 0x30, 0x75, 0x0F);
+		CALC_K192 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
+		CALC_K192 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
+		CALC_K192 (k, 22, 0x94, 0x06, 0x48, 0x3F);
+		CALC_K192 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
+		CALC_K192 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
+		CALC_K192 (k, 28, 0x84, 0x8A, 0x54, 0x00);
+		CALC_K192 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+	} else { /* 128-bit key */
+		/* Compute the S-boxes. */
+		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
+			CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
+		}
+
+		/* Calculate whitening and round subkeys.  The constants are
+		 * indices of subkeys, preprocessed through q0 and q1. */
+		CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3);
+		CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
+		CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
+		CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
+		CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
+		CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B);
+		CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
+		CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
+		CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
+		CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD);
+		CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71);
+		CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
+		CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F);
+		CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B);
+		CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA);
+		CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F);
+		CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
+		CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
+		CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00);
+		CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(twofish_setkey);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Twofish cipher common functions");
diff -puN crypto/wp512.c~git-cryptodev crypto/wp512.c
--- a/crypto/wp512.c~git-cryptodev
+++ a/crypto/wp512.c
@@ -981,9 +981,9 @@ static void wp512_process_buffer(struct 
 
 }
 
-static void wp512_init (void *ctx) {
+static void wp512_init(struct crypto_tfm *tfm) {
+	struct wp512_ctx *wctx = crypto_tfm_ctx(tfm);
 	int i;
-	struct wp512_ctx *wctx = ctx;
 
 	memset(wctx->bitLength, 0, 32);
 	wctx->bufferBits = wctx->bufferPos = 0;
@@ -993,10 +993,10 @@ static void wp512_init (void *ctx) {
 	}
 }
 
-static void wp512_update(void *ctx, const u8 *source, unsigned int len)
+static void wp512_update(struct crypto_tfm *tfm, const u8 *source,
+			 unsigned int len)
 {
-
-	struct wp512_ctx *wctx = ctx;
+	struct wp512_ctx *wctx = crypto_tfm_ctx(tfm);
 	int sourcePos    = 0;
 	unsigned int bits_len = len * 8; // convert to number of bits
 	int sourceGap    = (8 - ((int)bits_len & 7)) & 7;
@@ -1054,9 +1054,9 @@ static void wp512_update(void *ctx, cons
 
 }
 
-static void wp512_final(void *ctx, u8 *out)
+static void wp512_final(struct crypto_tfm *tfm, u8 *out)
 {
-	struct wp512_ctx *wctx = ctx;
+	struct wp512_ctx *wctx = crypto_tfm_ctx(tfm);
 	int i;
    	u8 *buffer      = wctx->buffer;
    	u8 *bitLength   = wctx->bitLength;
@@ -1087,22 +1087,20 @@ static void wp512_final(void *ctx, u8 *o
    	wctx->bufferPos    = bufferPos;
 }
 
-static void wp384_final(void *ctx, u8 *out)
+static void wp384_final(struct crypto_tfm *tfm, u8 *out)
 {
-	struct wp512_ctx *wctx = ctx;
 	u8 D[64];
 
-	wp512_final (wctx, D);
+	wp512_final(tfm, D);
 	memcpy (out, D, WP384_DIGEST_SIZE);
 	memset (D, 0, WP512_DIGEST_SIZE);
 }
 
-static void wp256_final(void *ctx, u8 *out)
+static void wp256_final(struct crypto_tfm *tfm, u8 *out)
 {
-	struct wp512_ctx *wctx = ctx;
 	u8 D[64];
 
-	wp512_final (wctx, D);
+	wp512_final(tfm, D);
 	memcpy (out, D, WP256_DIGEST_SIZE);
 	memset (D, 0, WP512_DIGEST_SIZE);
 }
diff -puN drivers/crypto/padlock-aes.c~git-cryptodev drivers/crypto/padlock-aes.c
--- a/drivers/crypto/padlock-aes.c~git-cryptodev
+++ a/drivers/crypto/padlock-aes.c
@@ -60,15 +60,14 @@
 #define AES_EXTENDED_KEY_SIZE_B	(AES_EXTENDED_KEY_SIZE * sizeof(uint32_t))
 
 struct aes_ctx {
-	uint32_t e_data[AES_EXTENDED_KEY_SIZE];
-	uint32_t d_data[AES_EXTENDED_KEY_SIZE];
 	struct {
 		struct cword encrypt;
 		struct cword decrypt;
 	} cword;
-	uint32_t *E;
-	uint32_t *D;
+	u32 *D;
 	int key_length;
+	u32 E[AES_EXTENDED_KEY_SIZE];
+	u32 d_data[AES_EXTENDED_KEY_SIZE];
 };
 
 /* ====== Key management routines ====== */
@@ -282,19 +281,20 @@ aes_hw_extkey_available(uint8_t key_len)
 	return 0;
 }
 
-static inline struct aes_ctx *aes_ctx(void *ctx)
+static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm)
 {
+	unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm);
 	unsigned long align = PADLOCK_ALIGNMENT;
 
 	if (align <= crypto_tfm_ctx_alignment())
 		align = 1;
-	return (struct aes_ctx *)ALIGN((unsigned long)ctx, align);
+	return (struct aes_ctx *)ALIGN(addr, align);
 }
 
-static int
-aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t *flags)
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len, u32 *flags)
 {
-	struct aes_ctx *ctx = aes_ctx(ctx_arg);
+	struct aes_ctx *ctx = aes_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
 	uint32_t i, t, u, v, w;
 	uint32_t P[AES_EXTENDED_KEY_SIZE];
@@ -312,8 +312,7 @@ aes_set_key(void *ctx_arg, const uint8_t
 	 * itself we must supply the plain key for both encryption
 	 * and decryption.
 	 */
-	ctx->E = ctx->e_data;
-	ctx->D = ctx->e_data;
+	ctx->D = ctx->E;
 
 	E_KEY[0] = le32_to_cpu(key[0]);
 	E_KEY[1] = le32_to_cpu(key[1]);
@@ -414,24 +413,22 @@ static inline u8 *padlock_xcrypt_cbc(con
 	return iv;
 }
 
-static void
-aes_encrypt(void *ctx_arg, uint8_t *out, const uint8_t *in)
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct aes_ctx *ctx = aes_ctx(ctx_arg);
+	struct aes_ctx *ctx = aes_ctx(tfm);
 	padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt, 1);
 }
 
-static void
-aes_decrypt(void *ctx_arg, uint8_t *out, const uint8_t *in)
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct aes_ctx *ctx = aes_ctx(ctx_arg);
+	struct aes_ctx *ctx = aes_ctx(tfm);
 	padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, 1);
 }
 
 static unsigned int aes_encrypt_ecb(const struct cipher_desc *desc, u8 *out,
 				    const u8 *in, unsigned int nbytes)
 {
-	struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm));
+	struct aes_ctx *ctx = aes_ctx(desc->tfm);
 	padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt,
 			   nbytes / AES_BLOCK_SIZE);
 	return nbytes & ~(AES_BLOCK_SIZE - 1);
@@ -440,7 +437,7 @@ static unsigned int aes_encrypt_ecb(cons
 static unsigned int aes_decrypt_ecb(const struct cipher_desc *desc, u8 *out,
 				    const u8 *in, unsigned int nbytes)
 {
-	struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm));
+	struct aes_ctx *ctx = aes_ctx(desc->tfm);
 	padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt,
 			   nbytes / AES_BLOCK_SIZE);
 	return nbytes & ~(AES_BLOCK_SIZE - 1);
@@ -449,7 +446,7 @@ static unsigned int aes_decrypt_ecb(cons
 static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out,
 				    const u8 *in, unsigned int nbytes)
 {
-	struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm));
+	struct aes_ctx *ctx = aes_ctx(desc->tfm);
 	u8 *iv;
 
 	iv = padlock_xcrypt_cbc(in, out, ctx->E, desc->info,
@@ -462,7 +459,7 @@ static unsigned int aes_encrypt_cbc(cons
 static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out,
 				    const u8 *in, unsigned int nbytes)
 {
-	struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm));
+	struct aes_ctx *ctx = aes_ctx(desc->tfm);
 	padlock_xcrypt_cbc(in, out, ctx->D, desc->info, &ctx->cword.decrypt,
 			   nbytes / AES_BLOCK_SIZE);
 	return nbytes & ~(AES_BLOCK_SIZE - 1);
diff -puN /dev/null include/crypto/twofish.h
--- /dev/null
+++ a/include/crypto/twofish.h
@@ -0,0 +1,23 @@
+#ifndef _CRYPTO_TWOFISH_H
+#define _CRYPTO_TWOFISH_H
+
+#include <linux/types.h>
+
+#define TF_MIN_KEY_SIZE 16
+#define TF_MAX_KEY_SIZE 32
+#define TF_BLOCK_SIZE 16
+
+struct crypto_tfm;
+
+/* Structure for an expanded Twofish key.  s contains the key-dependent
+ * S-boxes composed with the MDS matrix; w contains the eight "whitening"
+ * subkeys, K[0] through K[7].	k holds the remaining, "round" subkeys.  Note
+ * that k[i] corresponds to what the Twofish paper calls K[i+8]. */
+struct twofish_ctx {
+	u32 s[4][256], w[8], k[32];
+};
+
+int twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
+		   unsigned int key_len, u32 *flags);
+
+#endif
diff -puN include/linux/crypto.h~git-cryptodev include/linux/crypto.h
--- a/include/linux/crypto.h~git-cryptodev
+++ a/include/linux/crypto.h
@@ -66,7 +66,7 @@ struct crypto_tfm;
 
 struct cipher_desc {
 	struct crypto_tfm *tfm;
-	void (*crfn)(void *ctx, u8 *dst, const u8 *src);
+	void (*crfn)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 	unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst,
 			     const u8 *src, unsigned int nbytes);
 	void *info;
@@ -79,10 +79,10 @@ struct cipher_desc {
 struct cipher_alg {
 	unsigned int cia_min_keysize;
 	unsigned int cia_max_keysize;
-	int (*cia_setkey)(void *ctx, const u8 *key,
+	int (*cia_setkey)(struct crypto_tfm *tfm, const u8 *key,
 	                  unsigned int keylen, u32 *flags);
-	void (*cia_encrypt)(void *ctx, u8 *dst, const u8 *src);
-	void (*cia_decrypt)(void *ctx, u8 *dst, const u8 *src);
+	void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+	void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 
 	unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc,
 					u8 *dst, const u8 *src,
@@ -100,20 +100,19 @@ struct cipher_alg {
 
 struct digest_alg {
 	unsigned int dia_digestsize;
-	void (*dia_init)(void *ctx);
-	void (*dia_update)(void *ctx, const u8 *data, unsigned int len);
-	void (*dia_final)(void *ctx, u8 *out);
-	int (*dia_setkey)(void *ctx, const u8 *key,
+	void (*dia_init)(struct crypto_tfm *tfm);
+	void (*dia_update)(struct crypto_tfm *tfm, const u8 *data,
+			   unsigned int len);
+	void (*dia_final)(struct crypto_tfm *tfm, u8 *out);
+	int (*dia_setkey)(struct crypto_tfm *tfm, const u8 *key,
 	                  unsigned int keylen, u32 *flags);
 };
 
 struct compress_alg {
-	int (*coa_init)(void *ctx);
-	void (*coa_exit)(void *ctx);
-	int (*coa_compress)(void *ctx, const u8 *src, unsigned int slen,
-	                    u8 *dst, unsigned int *dlen);
-	int (*coa_decompress)(void *ctx, const u8 *src, unsigned int slen,
-	                      u8 *dst, unsigned int *dlen);
+	int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src,
+			    unsigned int slen, u8 *dst, unsigned int *dlen);
+	int (*coa_decompress)(struct crypto_tfm *tfm, const u8 *src,
+			      unsigned int slen, u8 *dst, unsigned int *dlen);
 };
 
 #define cra_cipher	cra_u.cipher
@@ -129,14 +128,17 @@ struct crypto_alg {
 
 	int cra_priority;
 
-	const char cra_name[CRYPTO_MAX_ALG_NAME];
-	const char cra_driver_name[CRYPTO_MAX_ALG_NAME];
+	char cra_name[CRYPTO_MAX_ALG_NAME];
+	char cra_driver_name[CRYPTO_MAX_ALG_NAME];
 
 	union {
 		struct cipher_alg cipher;
 		struct digest_alg digest;
 		struct compress_alg compress;
 	} cra_u;
+
+	int (*cra_init)(struct crypto_tfm *tfm);
+	void (*cra_exit)(struct crypto_tfm *tfm);
 	
 	struct module *cra_module;
 };
_