1 /* $OpenBSD: chacha-merged.c,v 1.6 2014/06/24 18:12:09 jsing Exp $ */
3 chacha-merged.c version 20080118
12 #define CHACHA_MINKEYLEN 16
13 #define CHACHA_NONCELEN 8
14 #define CHACHA_CTRLEN 8
15 #define CHACHA_STATELEN (CHACHA_NONCELEN+CHACHA_CTRLEN)
16 #define CHACHA_BLOCKLEN 64
20 uint8_t ks[CHACHA_BLOCKLEN];
24 static inline void chacha_keysetup(struct chacha_ctx *x, const u_char *k,
26 __attribute__((__bounded__(__minbytes__, 2, CHACHA_MINKEYLEN)));
27 static inline void chacha_ivsetup(struct chacha_ctx *x, const u_char *iv,
29 __attribute__((__bounded__(__minbytes__, 2, CHACHA_NONCELEN)))
30 __attribute__((__bounded__(__minbytes__, 3, CHACHA_CTRLEN)));
31 static inline void chacha_encrypt_bytes(struct chacha_ctx *x, const u_char *m,
32 u_char *c, u_int bytes)
33 __attribute__((__bounded__(__buffer__, 2, 4)))
34 __attribute__((__bounded__(__buffer__, 3, 4)));
36 typedef unsigned char u8;
37 typedef unsigned int u32;
39 typedef struct chacha_ctx chacha_ctx;
42 #define U32C(v) (v##U)
44 #define U8V(v) ((u8)(v) & U8C(0xFF))
45 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
47 #define ROTL32(v, n) \
48 (U32V((v) << (n)) | ((v) >> (32 - (n))))
50 #define U8TO32_LITTLE(p) \
52 ((u32)((p)[1]) << 8) | \
53 ((u32)((p)[2]) << 16) | \
54 ((u32)((p)[3]) << 24))
56 #define U32TO8_LITTLE(p, v) \
59 (p)[1] = U8V((v) >> 8); \
60 (p)[2] = U8V((v) >> 16); \
61 (p)[3] = U8V((v) >> 24); \
64 #define ROTATE(v,c) (ROTL32(v,c))
65 #define XOR(v,w) ((v) ^ (w))
66 #define PLUS(v,w) (U32V((v) + (w)))
67 #define PLUSONE(v) (PLUS((v),1))
69 #define QUARTERROUND(a,b,c,d) \
70 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
71 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
72 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
73 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
75 static const char sigma[16] = "expand 32-byte k";
76 static const char tau[16] = "expand 16-byte k";
79 chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits)
81 const char *constants;
83 x->input[4] = U8TO32_LITTLE(k + 0);
84 x->input[5] = U8TO32_LITTLE(k + 4);
85 x->input[6] = U8TO32_LITTLE(k + 8);
86 x->input[7] = U8TO32_LITTLE(k + 12);
87 if (kbits == 256) { /* recommended */
90 } else { /* kbits == 128 */
93 x->input[8] = U8TO32_LITTLE(k + 0);
94 x->input[9] = U8TO32_LITTLE(k + 4);
95 x->input[10] = U8TO32_LITTLE(k + 8);
96 x->input[11] = U8TO32_LITTLE(k + 12);
97 x->input[0] = U8TO32_LITTLE(constants + 0);
98 x->input[1] = U8TO32_LITTLE(constants + 4);
99 x->input[2] = U8TO32_LITTLE(constants + 8);
100 x->input[3] = U8TO32_LITTLE(constants + 12);
104 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
106 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
107 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
108 x->input[14] = U8TO32_LITTLE(iv + 0);
109 x->input[15] = U8TO32_LITTLE(iv + 4);
113 chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes)
115 u32 x0, x1, x2, x3, x4, x5, x6, x7;
116 u32 x8, x9, x10, x11, x12, x13, x14, x15;
117 u32 j0, j1, j2, j3, j4, j5, j6, j7;
118 u32 j8, j9, j10, j11, j12, j13, j14, j15;
145 for (i = 0; i < bytes; ++i)
167 for (i = 20; i > 0; i -= 2) {
168 QUARTERROUND(x0, x4, x8, x12)
169 QUARTERROUND(x1, x5, x9, x13)
170 QUARTERROUND(x2, x6, x10, x14)
171 QUARTERROUND(x3, x7, x11, x15)
172 QUARTERROUND(x0, x5, x10, x15)
173 QUARTERROUND(x1, x6, x11, x12)
174 QUARTERROUND(x2, x7, x8, x13)
175 QUARTERROUND(x3, x4, x9, x14)
187 x10 = PLUS(x10, j10);
188 x11 = PLUS(x11, j11);
189 x12 = PLUS(x12, j12);
190 x13 = PLUS(x13, j13);
191 x14 = PLUS(x14, j14);
192 x15 = PLUS(x15, j15);
195 U32TO8_LITTLE(x->ks + 0, x0);
196 U32TO8_LITTLE(x->ks + 4, x1);
197 U32TO8_LITTLE(x->ks + 8, x2);
198 U32TO8_LITTLE(x->ks + 12, x3);
199 U32TO8_LITTLE(x->ks + 16, x4);
200 U32TO8_LITTLE(x->ks + 20, x5);
201 U32TO8_LITTLE(x->ks + 24, x6);
202 U32TO8_LITTLE(x->ks + 28, x7);
203 U32TO8_LITTLE(x->ks + 32, x8);
204 U32TO8_LITTLE(x->ks + 36, x9);
205 U32TO8_LITTLE(x->ks + 40, x10);
206 U32TO8_LITTLE(x->ks + 44, x11);
207 U32TO8_LITTLE(x->ks + 48, x12);
208 U32TO8_LITTLE(x->ks + 52, x13);
209 U32TO8_LITTLE(x->ks + 56, x14);
210 U32TO8_LITTLE(x->ks + 60, x15);
213 x0 = XOR(x0, U8TO32_LITTLE(m + 0));
214 x1 = XOR(x1, U8TO32_LITTLE(m + 4));
215 x2 = XOR(x2, U8TO32_LITTLE(m + 8));
216 x3 = XOR(x3, U8TO32_LITTLE(m + 12));
217 x4 = XOR(x4, U8TO32_LITTLE(m + 16));
218 x5 = XOR(x5, U8TO32_LITTLE(m + 20));
219 x6 = XOR(x6, U8TO32_LITTLE(m + 24));
220 x7 = XOR(x7, U8TO32_LITTLE(m + 28));
221 x8 = XOR(x8, U8TO32_LITTLE(m + 32));
222 x9 = XOR(x9, U8TO32_LITTLE(m + 36));
223 x10 = XOR(x10, U8TO32_LITTLE(m + 40));
224 x11 = XOR(x11, U8TO32_LITTLE(m + 44));
225 x12 = XOR(x12, U8TO32_LITTLE(m + 48));
226 x13 = XOR(x13, U8TO32_LITTLE(m + 52));
227 x14 = XOR(x14, U8TO32_LITTLE(m + 56));
228 x15 = XOR(x15, U8TO32_LITTLE(m + 60));
234 * Stopping at 2^70 bytes per nonce is the user's
239 U32TO8_LITTLE(c + 0, x0);
240 U32TO8_LITTLE(c + 4, x1);
241 U32TO8_LITTLE(c + 8, x2);
242 U32TO8_LITTLE(c + 12, x3);
243 U32TO8_LITTLE(c + 16, x4);
244 U32TO8_LITTLE(c + 20, x5);
245 U32TO8_LITTLE(c + 24, x6);
246 U32TO8_LITTLE(c + 28, x7);
247 U32TO8_LITTLE(c + 32, x8);
248 U32TO8_LITTLE(c + 36, x9);
249 U32TO8_LITTLE(c + 40, x10);
250 U32TO8_LITTLE(c + 44, x11);
251 U32TO8_LITTLE(c + 48, x12);
252 U32TO8_LITTLE(c + 52, x13);
253 U32TO8_LITTLE(c + 56, x14);
254 U32TO8_LITTLE(c + 60, x15);
258 for (i = 0; i < bytes; ++i)
263 x->unused = 64 - bytes;