Import OpenSSL-0.9.8m.
[dragonfly.git] / crypto / openssl / crypto / sha / sha512.c
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #include <openssl/opensslconf.h>
8 #ifdef OPENSSL_FIPS
9 #include <openssl/fips.h>
10 #endif
11
12 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
13 /*
14  * IMPLEMENTATION NOTES.
15  *
16  * As you might have noticed 32-bit hash algorithms:
17  *
18  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
19  * - optimized versions implement two transform functions: one operating
20  *   on [aligned] data in host byte order and one - on data in input
21  *   stream byte order;
22  * - share common byte-order neutral collector and padding function
23  *   implementations, ../md32_common.h;
24  *
25  * Neither of the above applies to this SHA-512 implementations. Reasons
26  * [in reverse order] are:
27  *
28  * - it's the only 64-bit hash algorithm for the moment of this writing,
29  *   there is no need for common collector/padding implementation [yet];
30  * - by supporting only one transform function [which operates on
31  *   *aligned* data in input stream byte order, big-endian in this case]
32  *   we minimize burden of maintenance in two ways: a) collector/padding
33  *   function is simpler; b) only one transform function to stare at;
34  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
35  *   apply a number of optimizations to mitigate potential performance
36  *   penalties caused by previous design decision;
37  *
38  * Caveat lector.
39  *
40  * Implementation relies on the fact that "long long" is 64-bit on
41  * both 32- and 64-bit platforms. If some compiler vendor comes up
42  * with 128-bit long long, adjustment to sha.h would be required.
43  * As this implementation relies on 64-bit integer type, it's totally
44  * inappropriate for platforms which don't support it, most notably
45  * 16-bit platforms.
46  *                                      <appro@fy.chalmers.se>
47  */
48 #include <stdlib.h>
49 #include <string.h>
50
51 #include <openssl/crypto.h>
52 #include <openssl/sha.h>
53 #include <openssl/opensslv.h>
54
55 #include "cryptlib.h"
56
57 const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
58
59 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
60     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
61     defined(__s390__) || defined(__s390x__) || \
62     defined(SHA512_ASM)
63 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
64 #endif
65
66 int SHA384_Init (SHA512_CTX *c)
67         {
68 #ifdef OPENSSL_FIPS
69         FIPS_selftest_check();
70 #endif
71         c->h[0]=U64(0xcbbb9d5dc1059ed8);
72         c->h[1]=U64(0x629a292a367cd507);
73         c->h[2]=U64(0x9159015a3070dd17);
74         c->h[3]=U64(0x152fecd8f70e5939);
75         c->h[4]=U64(0x67332667ffc00b31);
76         c->h[5]=U64(0x8eb44a8768581511);
77         c->h[6]=U64(0xdb0c2e0d64f98fa7);
78         c->h[7]=U64(0x47b5481dbefa4fa4);
79         c->Nl=0;        c->Nh=0;
80         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
81         return 1;
82         }
83
84 int SHA512_Init (SHA512_CTX *c)
85         {
86 #ifdef OPENSSL_FIPS
87         FIPS_selftest_check();
88 #endif
89         c->h[0]=U64(0x6a09e667f3bcc908);
90         c->h[1]=U64(0xbb67ae8584caa73b);
91         c->h[2]=U64(0x3c6ef372fe94f82b);
92         c->h[3]=U64(0xa54ff53a5f1d36f1);
93         c->h[4]=U64(0x510e527fade682d1);
94         c->h[5]=U64(0x9b05688c2b3e6c1f);
95         c->h[6]=U64(0x1f83d9abfb41bd6b);
96         c->h[7]=U64(0x5be0cd19137e2179);
97         c->Nl=0;        c->Nh=0;
98         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
99         return 1;
100         }
101
102 #ifndef SHA512_ASM
103 static
104 #endif
105 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
106
107 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
108         {
109         unsigned char *p=(unsigned char *)c->u.p;
110         size_t n=c->num;
111
112         p[n]=0x80;      /* There always is a room for one */
113         n++;
114         if (n > (sizeof(c->u)-16))
115                 memset (p+n,0,sizeof(c->u)-n), n=0,
116                 sha512_block_data_order (c,p,1);
117
118         memset (p+n,0,sizeof(c->u)-16-n);
119 #ifdef  B_ENDIAN
120         c->u.d[SHA_LBLOCK-2] = c->Nh;
121         c->u.d[SHA_LBLOCK-1] = c->Nl;
122 #else
123         p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
124         p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
125         p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
126         p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
127         p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
128         p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
129         p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
130         p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
131         p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
132         p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
133         p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
134         p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
135         p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
136         p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
137         p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
138         p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
139 #endif
140
141         sha512_block_data_order (c,p,1);
142
143         if (md==0) return 0;
144
145         switch (c->md_len)
146                 {
147                 /* Let compiler decide if it's appropriate to unroll... */
148                 case SHA384_DIGEST_LENGTH:
149                         for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
150                                 {
151                                 SHA_LONG64 t = c->h[n];
152
153                                 *(md++) = (unsigned char)(t>>56);
154                                 *(md++) = (unsigned char)(t>>48);
155                                 *(md++) = (unsigned char)(t>>40);
156                                 *(md++) = (unsigned char)(t>>32);
157                                 *(md++) = (unsigned char)(t>>24);
158                                 *(md++) = (unsigned char)(t>>16);
159                                 *(md++) = (unsigned char)(t>>8);
160                                 *(md++) = (unsigned char)(t);
161                                 }
162                         break;
163                 case SHA512_DIGEST_LENGTH:
164                         for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
165                                 {
166                                 SHA_LONG64 t = c->h[n];
167
168                                 *(md++) = (unsigned char)(t>>56);
169                                 *(md++) = (unsigned char)(t>>48);
170                                 *(md++) = (unsigned char)(t>>40);
171                                 *(md++) = (unsigned char)(t>>32);
172                                 *(md++) = (unsigned char)(t>>24);
173                                 *(md++) = (unsigned char)(t>>16);
174                                 *(md++) = (unsigned char)(t>>8);
175                                 *(md++) = (unsigned char)(t);
176                                 }
177                         break;
178                 /* ... as well as make sure md_len is not abused. */
179                 default:        return 0;
180                 }
181
182         return 1;
183         }
184
185 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
186 {   return SHA512_Final (md,c);   }
187
188 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
189         {
190         SHA_LONG64      l;
191         unsigned char  *p=c->u.p;
192         const unsigned char *data=(const unsigned char *)_data;
193
194         if (len==0) return  1;
195
196         l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
197         if (l < c->Nl)          c->Nh++;
198         if (sizeof(len)>=8)     c->Nh+=(((SHA_LONG64)len)>>61);
199         c->Nl=l;
200
201         if (c->num != 0)
202                 {
203                 size_t n = sizeof(c->u) - c->num;
204
205                 if (len < n)
206                         {
207                         memcpy (p+c->num,data,len), c->num += len;
208                         return 1;
209                         }
210                 else    {
211                         memcpy (p+c->num,data,n), c->num = 0;
212                         len-=n, data+=n;
213                         sha512_block_data_order (c,p,1);
214                         }
215                 }
216
217         if (len >= sizeof(c->u))
218                 {
219 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
220                 if ((size_t)data%sizeof(c->u.d[0]) != 0)
221                         while (len >= sizeof(c->u))
222                                 memcpy (p,data,sizeof(c->u)),
223                                 sha512_block_data_order (c,p,1),
224                                 len  -= sizeof(c->u),
225                                 data += sizeof(c->u);
226                 else
227 #endif
228                         sha512_block_data_order (c,data,len/sizeof(c->u)),
229                         data += len,
230                         len  %= sizeof(c->u),
231                         data -= len;
232                 }
233
234         if (len != 0)   memcpy (p,data,len), c->num = (int)len;
235
236         return 1;
237         }
238
239 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
240 {   return SHA512_Update (c,data,len);   }
241
242 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
243 {   sha512_block_data_order (c,data,1);  }
244
245 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
246         {
247         SHA512_CTX c;
248         static unsigned char m[SHA384_DIGEST_LENGTH];
249
250         if (md == NULL) md=m;
251         SHA384_Init(&c);
252         SHA512_Update(&c,d,n);
253         SHA512_Final(md,&c);
254         OPENSSL_cleanse(&c,sizeof(c));
255         return(md);
256         }
257
258 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
259         {
260         SHA512_CTX c;
261         static unsigned char m[SHA512_DIGEST_LENGTH];
262
263         if (md == NULL) md=m;
264         SHA512_Init(&c);
265         SHA512_Update(&c,d,n);
266         SHA512_Final(md,&c);
267         OPENSSL_cleanse(&c,sizeof(c));
268         return(md);
269         }
270
271 #ifndef SHA512_ASM
272 static const SHA_LONG64 K512[80] = {
273         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
274         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
275         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
276         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
277         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
278         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
279         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
280         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
281         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
282         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
283         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
284         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
285         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
286         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
287         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
288         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
289         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
290         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
291         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
292         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
293         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
294         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
295         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
296         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
297         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
298         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
299         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
300         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
301         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
302         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
303         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
304         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
305         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
306         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
307         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
308         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
309         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
310         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
311         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
312         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
313
314 #ifndef PEDANTIC
315 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
316 #  if defined(__x86_64) || defined(__x86_64__)
317 #   define ROTR(a,n)    ({ unsigned long ret;           \
318                                 asm ("rorq %1,%0"       \
319                                 : "=r"(ret)             \
320                                 : "J"(n),"0"(a)         \
321                                 : "cc"); ret;           })
322 #   if !defined(B_ENDIAN)
323 #    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
324                                 asm ("bswapq    %0"             \
325                                 : "=r"(ret)                     \
326                                 : "0"(ret)); ret;               })
327 #   endif
328 #  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
329 #   if defined(I386_ONLY)
330 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
331                          unsigned int hi=p[0],lo=p[1];          \
332                                 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
333                                     "roll $16,%%eax; roll $16,%%edx; "\
334                                     "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
335                                 : "=a"(lo),"=d"(hi)             \
336                                 : "0"(lo),"1"(hi) : "cc");      \
337                                 ((SHA_LONG64)hi)<<32|lo;        })
338 #   else
339 #    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
340                          unsigned int hi=p[0],lo=p[1];                  \
341                                 asm ("bswapl %0; bswapl %1;"    \
342                                 : "=r"(lo),"=r"(hi)             \
343                                 : "0"(lo),"1"(hi));             \
344                                 ((SHA_LONG64)hi)<<32|lo;        })
345 #   endif
346 #  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
347 #   define ROTR(a,n)    ({ unsigned long ret;           \
348                                 asm ("rotrdi %0,%1,%2"  \
349                                 : "=r"(ret)             \
350                                 : "r"(a),"K"(n)); ret;  })
351 #  endif
352 # elif defined(_MSC_VER)
353 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
354 #   define ROTR(a,n)    _rotr64((a),n)
355 #  endif
356 #  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
357 #   if defined(I386_ONLY)
358     static SHA_LONG64 __fastcall __pull64be(const void *x)
359     {   _asm    mov     edx, [ecx + 0]
360         _asm    mov     eax, [ecx + 4]
361         _asm    xchg    dh,dl
362         _asm    xchg    ah,al
363         _asm    rol     edx,16
364         _asm    rol     eax,16
365         _asm    xchg    dh,dl
366         _asm    xchg    ah,al
367     }
368 #   else
369     static SHA_LONG64 __fastcall __pull64be(const void *x)
370     {   _asm    mov     edx, [ecx + 0]
371         _asm    mov     eax, [ecx + 4]
372         _asm    bswap   edx
373         _asm    bswap   eax
374     }
375 #   endif
376 #   define PULL64(x) __pull64be(&(x))
377 #   if _MSC_VER<=1200
378 #    pragma inline_depth(0)
379 #   endif
380 #  endif
381 # endif
382 #endif
383
384 #ifndef PULL64
385 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
386 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
387 #endif
388
389 #ifndef ROTR
390 #define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
391 #endif
392
393 #define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
394 #define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
395 #define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
396 #define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
397
398 #define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
399 #define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
400
401 #if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
402 #define GO_FOR_SSE2(ctx,in,num)         do {            \
403         void    sha512_block_sse2(void *,const void *,size_t);  \
404         if (!(OPENSSL_ia32cap_P & (1<<26))) break;      \
405         sha512_block_sse2(ctx->h,in,num); return;       \
406                                         } while (0)
407 #endif
408
409 #ifdef OPENSSL_SMALL_FOOTPRINT
410
411 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
412         {
413         const SHA_LONG64 *W=in;
414         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1,T2;
415         SHA_LONG64      X[16];
416         int i;
417
418 #ifdef GO_FOR_SSE2
419         GO_FOR_SSE2(ctx,in,num);
420 #endif
421
422                         while (num--) {
423
424         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
425         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
426
427         for (i=0;i<16;i++)
428                 {
429 #ifdef B_ENDIAN
430                 T1 = X[i] = W[i];
431 #else
432                 T1 = X[i] = PULL64(W[i]);
433 #endif
434                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
435                 T2 = Sigma0(a) + Maj(a,b,c);
436                 h = g;  g = f;  f = e;  e = d + T1;
437                 d = c;  c = b;  b = a;  a = T1 + T2;
438                 }
439
440         for (;i<80;i++)
441                 {
442                 s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);
443                 s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);
444
445                 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
446                 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
447                 T2 = Sigma0(a) + Maj(a,b,c);
448                 h = g;  g = f;  f = e;  e = d + T1;
449                 d = c;  c = b;  b = a;  a = T1 + T2;
450                 }
451
452         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
453         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
454
455                         W+=SHA_LBLOCK;
456                         }
457         }
458
459 #else
460
461 #define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
462         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
463         h = Sigma0(a) + Maj(a,b,c);                     \
464         d += T1;        h += T1;                } while (0)
465
466 #define ROUND_16_80(i,a,b,c,d,e,f,g,h,X)        do {    \
467         s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);        \
468         s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);        \
469         T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f];    \
470         ROUND_00_15(i,a,b,c,d,e,f,g,h);         } while (0)
471
472 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
473         {
474         const SHA_LONG64 *W=in;
475         SHA_LONG64      a,b,c,d,e,f,g,h,s0,s1,T1;
476         SHA_LONG64      X[16];
477         int i;
478
479 #ifdef GO_FOR_SSE2
480         GO_FOR_SSE2(ctx,in,num);
481 #endif
482
483                         while (num--) {
484
485         a = ctx->h[0];  b = ctx->h[1];  c = ctx->h[2];  d = ctx->h[3];
486         e = ctx->h[4];  f = ctx->h[5];  g = ctx->h[6];  h = ctx->h[7];
487
488 #ifdef B_ENDIAN
489         T1 = X[0] = W[0];       ROUND_00_15(0,a,b,c,d,e,f,g,h);
490         T1 = X[1] = W[1];       ROUND_00_15(1,h,a,b,c,d,e,f,g);
491         T1 = X[2] = W[2];       ROUND_00_15(2,g,h,a,b,c,d,e,f);
492         T1 = X[3] = W[3];       ROUND_00_15(3,f,g,h,a,b,c,d,e);
493         T1 = X[4] = W[4];       ROUND_00_15(4,e,f,g,h,a,b,c,d);
494         T1 = X[5] = W[5];       ROUND_00_15(5,d,e,f,g,h,a,b,c);
495         T1 = X[6] = W[6];       ROUND_00_15(6,c,d,e,f,g,h,a,b);
496         T1 = X[7] = W[7];       ROUND_00_15(7,b,c,d,e,f,g,h,a);
497         T1 = X[8] = W[8];       ROUND_00_15(8,a,b,c,d,e,f,g,h);
498         T1 = X[9] = W[9];       ROUND_00_15(9,h,a,b,c,d,e,f,g);
499         T1 = X[10] = W[10];     ROUND_00_15(10,g,h,a,b,c,d,e,f);
500         T1 = X[11] = W[11];     ROUND_00_15(11,f,g,h,a,b,c,d,e);
501         T1 = X[12] = W[12];     ROUND_00_15(12,e,f,g,h,a,b,c,d);
502         T1 = X[13] = W[13];     ROUND_00_15(13,d,e,f,g,h,a,b,c);
503         T1 = X[14] = W[14];     ROUND_00_15(14,c,d,e,f,g,h,a,b);
504         T1 = X[15] = W[15];     ROUND_00_15(15,b,c,d,e,f,g,h,a);
505 #else
506         T1 = X[0]  = PULL64(W[0]);      ROUND_00_15(0,a,b,c,d,e,f,g,h);
507         T1 = X[1]  = PULL64(W[1]);      ROUND_00_15(1,h,a,b,c,d,e,f,g);
508         T1 = X[2]  = PULL64(W[2]);      ROUND_00_15(2,g,h,a,b,c,d,e,f);
509         T1 = X[3]  = PULL64(W[3]);      ROUND_00_15(3,f,g,h,a,b,c,d,e);
510         T1 = X[4]  = PULL64(W[4]);      ROUND_00_15(4,e,f,g,h,a,b,c,d);
511         T1 = X[5]  = PULL64(W[5]);      ROUND_00_15(5,d,e,f,g,h,a,b,c);
512         T1 = X[6]  = PULL64(W[6]);      ROUND_00_15(6,c,d,e,f,g,h,a,b);
513         T1 = X[7]  = PULL64(W[7]);      ROUND_00_15(7,b,c,d,e,f,g,h,a);
514         T1 = X[8]  = PULL64(W[8]);      ROUND_00_15(8,a,b,c,d,e,f,g,h);
515         T1 = X[9]  = PULL64(W[9]);      ROUND_00_15(9,h,a,b,c,d,e,f,g);
516         T1 = X[10] = PULL64(W[10]);     ROUND_00_15(10,g,h,a,b,c,d,e,f);
517         T1 = X[11] = PULL64(W[11]);     ROUND_00_15(11,f,g,h,a,b,c,d,e);
518         T1 = X[12] = PULL64(W[12]);     ROUND_00_15(12,e,f,g,h,a,b,c,d);
519         T1 = X[13] = PULL64(W[13]);     ROUND_00_15(13,d,e,f,g,h,a,b,c);
520         T1 = X[14] = PULL64(W[14]);     ROUND_00_15(14,c,d,e,f,g,h,a,b);
521         T1 = X[15] = PULL64(W[15]);     ROUND_00_15(15,b,c,d,e,f,g,h,a);
522 #endif
523
524         for (i=16;i<80;i+=8)
525                 {
526                 ROUND_16_80(i+0,a,b,c,d,e,f,g,h,X);
527                 ROUND_16_80(i+1,h,a,b,c,d,e,f,g,X);
528                 ROUND_16_80(i+2,g,h,a,b,c,d,e,f,X);
529                 ROUND_16_80(i+3,f,g,h,a,b,c,d,e,X);
530                 ROUND_16_80(i+4,e,f,g,h,a,b,c,d,X);
531                 ROUND_16_80(i+5,d,e,f,g,h,a,b,c,X);
532                 ROUND_16_80(i+6,c,d,e,f,g,h,a,b,X);
533                 ROUND_16_80(i+7,b,c,d,e,f,g,h,a,X);
534                 }
535
536         ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
537         ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
538
539                         W+=SHA_LBLOCK;
540                         }
541         }
542
543 #endif
544
545 #endif /* SHA512_ASM */
546
547 #else /* OPENSSL_NO_SHA512 */
548
549 /* Sensitive compilers ("Compaq C V6.4-005 on OpenVMS VAX V7.3", for
550  * example) dislike a statement-free file, complaining:
551  * "%CC-W-EMPTYFILE, Source file does not contain any declarations."
552  */
553
554 int sha512_dummy();
555
556 #endif /* OPENSSL_NO_SHA512 */