2 // Copyright in this code is held by Dr B.R. Gladman but free direct or
3 // derivative use is permitted subject to acknowledgement of its origin
4 // and subject to any constraints placed on the use of the algorithm by
5 // its designers (if such constraints may exist, this will be indicated
8 // Dr. B. R. Gladman . 25th January 2000.
10 // This is an implementation of Serpent, an encryption algorithm designed
11 // by Anderson, Biham and Knudsen and submitted as a candidate for the
12 // Advanced Encryption Standard programme of the US National Institute of
13 // Standards and Technology.
15 // The designers of Serpent have not placed any constraints on the use of
18 #include <sys/types.h>
19 #include <sys/endian.h>
21 #include <crypto/serpent/serpent.h>
23 // Partially optimised Serpent S Box boolean functions derived
24 // using a recursive descent analyser but without a full search
25 // of all subtrees. This set of S boxes is the result of work
26 // by Sam Simpson and Brian Gladman using the spare time on a
27 // cluster of high capacity servers to search for S boxes with
28 // this customised search engine.
30 // Copyright: Dr B. R Gladman (gladman@seven77.demon.co.uk)
31 // and Sam Simpson (s.simpson@mia.co.uk)
34 // We hereby give permission for information in this file to be
35 // used freely subject only to acknowledgement of its origin
39 #define rotr(x,n) (((x) >> ((int)((n) & 0x1f))) | ((x) << ((int)((32 - ((n) & 0x1f))))))
40 #define rotl(x,n) (((x) << ((int)((n) & 0x1f))) | ((x) >> ((int)((32 - ((n) & 0x1f))))))
42 #define sb0(a,b,c,d,e,f,g,h) \
61 #define ib0(a,b,c,d,e,f,g,h) \
81 #define sb1(a,b,c,d,e,f,g,h) \
100 #define ib1(a,b,c,d,e,f,g,h) \
122 #define sb2(a,b,c,d,e,f,g,h) \
143 #define ib2(a,b,c,d,e,f,g,h) \
164 #define sb3(a,b,c,d,e,f,g,h) \
184 // 16 term solution that performs less well than 17 term one
185 // in my environment (PPro/PII)
189 #define sb3(a,b,c,d,e,f,g,h) \
212 #define ib3(a,b,c,d,e,f,g,h) \
234 #define sb4(a,b,c,d,e,f,g,h) \
254 #define ib4(a,b,c,d,e,f,g,h) \
276 #define sb5(a,b,c,d,e,f,g,h) \
297 #define ib5(a,b,c,d,e,f,g,h) \
318 #define sb6(a,b,c,d,e,f,g,h) \
338 #define ib6(a,b,c,d,e,f,g,h) \
358 #define sb7(a,b,c,d,e,f,g,h) \
380 #define ib7(a,b,c,d,e,f,g,h) \
400 #define k_xor(r,a,b,c,d) \
401 { a ^= ctx->l_key[4 * r + 8]; \
402 b ^= ctx->l_key[4 * r + 9]; \
403 c ^= ctx->l_key[4 * r + 10]; \
404 d ^= ctx->l_key[4 * r + 11]; \
407 #define k_set(r,a,b,c,d) \
408 { a = ctx->l_key[4 * r + 8]; \
409 b = ctx->l_key[4 * r + 9]; \
410 c = ctx->l_key[4 * r + 10]; \
411 d = ctx->l_key[4 * r + 11]; \
414 #define k_get(r,a,b,c,d) \
415 { ctx->l_key[4 * r + 8] = a; \
416 ctx->l_key[4 * r + 9] = b; \
417 ctx->l_key[4 * r + 10] = c; \
418 ctx->l_key[4 * r + 11] = d; \
421 // the linear transformation and its inverse
423 #define rot(a,b,c,d) \
436 #define irot(a,b,c,d) \
449 // initialise the key schedule from the user supplied key
451 void serpent_set_key(serpent_ctx *ctx, const u_int8_t in_key[], int key_len)
453 u_int32_t i,lk,a,b,c,d,e,f,g,h;
454 u_int32_t t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
459 i = 0; lk = (key_len + 31) / 32;
463 ctx->l_key[i] = htole32(*((const u_int32_t *)(in_key + 4 * i))); i++;
472 i = key_len / 32; lk = 1 << key_len % 32;
474 ctx->l_key[i] = (ctx->l_key[i] & (lk - 1)) | lk;
477 t1 = ctx->l_key[2] ^ ctx->l_key[4] ^ ctx->l_key[6] ^ 0x9e3779b9;
478 t2 = ctx->l_key[3] ^ ctx->l_key[5] ^ ctx->l_key[7] ^ 0x9e3779b9;
480 for(i = 0; i < 132; i += 2)
482 ctx->l_key[i + 8] = rotr(i ^ ctx->l_key[i] ^ t2, 21);
484 t1 ^= ctx->l_key[i + 2] ^ ctx->l_key[i + 8];
486 ctx->l_key[i + 9] = rotr((i + 1) ^ ctx->l_key[i + 1] ^ t1, 21);
488 t2 ^= ctx->l_key[i + 3] ^ ctx->l_key[i + 9];
491 k_set( 0,a,b,c,d);sb3(a,b,c,d,e,f,g,h);k_get( 0,e,f,g,h);
492 k_set( 1,a,b,c,d);sb2(a,b,c,d,e,f,g,h);k_get( 1,e,f,g,h);
493 k_set( 2,a,b,c,d);sb1(a,b,c,d,e,f,g,h);k_get( 2,e,f,g,h);
494 k_set( 3,a,b,c,d);sb0(a,b,c,d,e,f,g,h);k_get( 3,e,f,g,h);
495 k_set( 4,a,b,c,d);sb7(a,b,c,d,e,f,g,h);k_get( 4,e,f,g,h);
496 k_set( 5,a,b,c,d);sb6(a,b,c,d,e,f,g,h);k_get( 5,e,f,g,h);
497 k_set( 6,a,b,c,d);sb5(a,b,c,d,e,f,g,h);k_get( 6,e,f,g,h);
498 k_set( 7,a,b,c,d);sb4(a,b,c,d,e,f,g,h);k_get( 7,e,f,g,h);
499 k_set( 8,a,b,c,d);sb3(a,b,c,d,e,f,g,h);k_get( 8,e,f,g,h);
500 k_set( 9,a,b,c,d);sb2(a,b,c,d,e,f,g,h);k_get( 9,e,f,g,h);
501 k_set(10,a,b,c,d);sb1(a,b,c,d,e,f,g,h);k_get(10,e,f,g,h);
502 k_set(11,a,b,c,d);sb0(a,b,c,d,e,f,g,h);k_get(11,e,f,g,h);
503 k_set(12,a,b,c,d);sb7(a,b,c,d,e,f,g,h);k_get(12,e,f,g,h);
504 k_set(13,a,b,c,d);sb6(a,b,c,d,e,f,g,h);k_get(13,e,f,g,h);
505 k_set(14,a,b,c,d);sb5(a,b,c,d,e,f,g,h);k_get(14,e,f,g,h);
506 k_set(15,a,b,c,d);sb4(a,b,c,d,e,f,g,h);k_get(15,e,f,g,h);
507 k_set(16,a,b,c,d);sb3(a,b,c,d,e,f,g,h);k_get(16,e,f,g,h);
508 k_set(17,a,b,c,d);sb2(a,b,c,d,e,f,g,h);k_get(17,e,f,g,h);
509 k_set(18,a,b,c,d);sb1(a,b,c,d,e,f,g,h);k_get(18,e,f,g,h);
510 k_set(19,a,b,c,d);sb0(a,b,c,d,e,f,g,h);k_get(19,e,f,g,h);
511 k_set(20,a,b,c,d);sb7(a,b,c,d,e,f,g,h);k_get(20,e,f,g,h);
512 k_set(21,a,b,c,d);sb6(a,b,c,d,e,f,g,h);k_get(21,e,f,g,h);
513 k_set(22,a,b,c,d);sb5(a,b,c,d,e,f,g,h);k_get(22,e,f,g,h);
514 k_set(23,a,b,c,d);sb4(a,b,c,d,e,f,g,h);k_get(23,e,f,g,h);
515 k_set(24,a,b,c,d);sb3(a,b,c,d,e,f,g,h);k_get(24,e,f,g,h);
516 k_set(25,a,b,c,d);sb2(a,b,c,d,e,f,g,h);k_get(25,e,f,g,h);
517 k_set(26,a,b,c,d);sb1(a,b,c,d,e,f,g,h);k_get(26,e,f,g,h);
518 k_set(27,a,b,c,d);sb0(a,b,c,d,e,f,g,h);k_get(27,e,f,g,h);
519 k_set(28,a,b,c,d);sb7(a,b,c,d,e,f,g,h);k_get(28,e,f,g,h);
520 k_set(29,a,b,c,d);sb6(a,b,c,d,e,f,g,h);k_get(29,e,f,g,h);
521 k_set(30,a,b,c,d);sb5(a,b,c,d,e,f,g,h);k_get(30,e,f,g,h);
522 k_set(31,a,b,c,d);sb4(a,b,c,d,e,f,g,h);k_get(31,e,f,g,h);
523 k_set(32,a,b,c,d);sb3(a,b,c,d,e,f,g,h);k_get(32,e,f,g,h);
528 // encrypt a block of text
530 void serpent_encrypt(serpent_ctx *ctx, const u_int8_t in_blk[],
533 u_int32_t a,b,c,d,e,f,g,h;
534 u_int32_t t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
536 a = htole32(*((const u_int32_t *)(in_blk )));
537 b = htole32(*((const u_int32_t *)(in_blk + 4 )));
538 c = htole32(*((const u_int32_t *)(in_blk + 8 )));
539 d = htole32(*((const u_int32_t *)(in_blk + 12)));
541 k_xor( 0,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
542 k_xor( 1,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
543 k_xor( 2,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
544 k_xor( 3,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
545 k_xor( 4,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
546 k_xor( 5,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
547 k_xor( 6,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
548 k_xor( 7,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
549 k_xor( 8,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
550 k_xor( 9,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
551 k_xor(10,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
552 k_xor(11,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
553 k_xor(12,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
554 k_xor(13,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
555 k_xor(14,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
556 k_xor(15,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
557 k_xor(16,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
558 k_xor(17,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
559 k_xor(18,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
560 k_xor(19,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
561 k_xor(20,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
562 k_xor(21,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
563 k_xor(22,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
564 k_xor(23,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
565 k_xor(24,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
566 k_xor(25,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
567 k_xor(26,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
568 k_xor(27,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
569 k_xor(28,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
570 k_xor(29,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
571 k_xor(30,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
572 k_xor(31,e,f,g,h); sb7(e,f,g,h,a,b,c,d); k_xor(32,a,b,c,d);
574 *((u_int32_t *)(out_blk )) = le32toh(a);
575 *((u_int32_t *)(out_blk + 4)) = le32toh(b);
576 *((u_int32_t *)(out_blk + 8)) = le32toh(c);
577 *((u_int32_t *)(out_blk + 12)) = le32toh(d);
580 // decrypt a block of text
582 void serpent_decrypt(serpent_ctx *ctx, const u_int8_t in_blk[],
585 u_int32_t a,b,c,d,e,f,g,h;
586 u_int32_t t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
588 a = htole32(*((const u_int32_t *)(in_blk )));
589 b = htole32(*((const u_int32_t *)(in_blk + 4 )));
590 c = htole32(*((const u_int32_t *)(in_blk + 8 )));
591 d = htole32(*((const u_int32_t *)(in_blk + 12)));
593 k_xor(32,a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(31,e,f,g,h);
594 irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(30,a,b,c,d);
595 irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(29,e,f,g,h);
596 irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(28,a,b,c,d);
597 irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(27,e,f,g,h);
598 irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(26,a,b,c,d);
599 irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(25,e,f,g,h);
600 irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(24,a,b,c,d);
601 irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(23,e,f,g,h);
602 irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(22,a,b,c,d);
603 irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(21,e,f,g,h);
604 irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(20,a,b,c,d);
605 irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(19,e,f,g,h);
606 irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(18,a,b,c,d);
607 irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(17,e,f,g,h);
608 irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(16,a,b,c,d);
609 irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(15,e,f,g,h);
610 irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(14,a,b,c,d);
611 irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(13,e,f,g,h);
612 irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(12,a,b,c,d);
613 irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(11,e,f,g,h);
614 irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(10,a,b,c,d);
615 irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 9,e,f,g,h);
616 irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 8,a,b,c,d);
617 irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor( 7,e,f,g,h);
618 irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor( 6,a,b,c,d);
619 irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor( 5,e,f,g,h);
620 irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor( 4,a,b,c,d);
621 irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor( 3,e,f,g,h);
622 irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor( 2,a,b,c,d);
623 irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 1,e,f,g,h);
624 irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 0,a,b,c,d);
626 *((u_int32_t *)(out_blk )) = le32toh(a);
627 *((u_int32_t *)(out_blk + 4)) = le32toh(b);
628 *((u_int32_t *)(out_blk + 8)) = le32toh(c);
629 *((u_int32_t *)(out_blk + 12)) = le32toh(d);