2 Copyright 2010-2011, D. E. Shaw Research.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions, and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions, and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 * Neither the name of D. E. Shaw Research nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #ifndef _threefry_dot_h_
33 #define _threefry_dot_h_
34 #include "features/compilerfeatures.h"
37 /** \cond HIDDEN_FROM_DOXYGEN */
38 /* Significant parts of this file were copied from
40 Skein_FinalRnd/ReferenceImplementation/skein.h
41 Skein_FinalRnd/ReferenceImplementation/skein_block.c
43 in http://csrc.nist.gov/groups/ST/hash/sha-3/Round3/documents/Skein_FinalRnd.zip
45 This file has been modified so that it may no longer perform its originally
46 intended function. If you're looking for a Skein or Threefish source code,
47 please consult the original file.
49 The original file had the following header:
50 **************************************************************************
52 ** Interface declarations and internal definitions for Skein hashing.
54 ** Source code author: Doug Whiting, 2008.
56 ** This algorithm and source code is released to the public domain.
58 ***************************************************************************
62 /* See comment at the top of philox.h for the macro pre-process
65 /* Rotation constants: */
66 enum r123_enum_threefry64x4 {
67 /* These are the R_256 constants from the Threefish reference sources
68 with names changed to R_64x4... */
69 R_64x4_0_0=14, R_64x4_0_1=16,
70 R_64x4_1_0=52, R_64x4_1_1=57,
71 R_64x4_2_0=23, R_64x4_2_1=40,
72 R_64x4_3_0= 5, R_64x4_3_1=37,
73 R_64x4_4_0=25, R_64x4_4_1=33,
74 R_64x4_5_0=46, R_64x4_5_1=12,
75 R_64x4_6_0=58, R_64x4_6_1=22,
76 R_64x4_7_0=32, R_64x4_7_1=32
79 enum r123_enum_threefry64x2 {
81 // Output from skein_rot_search: (srs64_B64-X1000)
82 // Random seed = 1. BlockSize = 128 bits. sampleCnt = 1024. rounds = 8, minHW_or=57
83 // Start: Tue Mar 1 10:07:48 2011
84 // rMin = 0.136. #0325[*15] [CRC=455A682F. hw_OR=64. cnt=16384. blkSize= 128].format
94 /* 4 rounds: minHW = 4 [ 4 4 4 4 ]
95 // 5 rounds: minHW = 8 [ 8 8 8 8 ]
96 // 6 rounds: minHW = 16 [ 16 16 16 16 ]
97 // 7 rounds: minHW = 32 [ 32 32 32 32 ]
98 // 8 rounds: minHW = 64 [ 64 64 64 64 ]
99 // 9 rounds: minHW = 64 [ 64 64 64 64 ]
100 //10 rounds: minHW = 64 [ 64 64 64 64 ]
101 //11 rounds: minHW = 64 [ 64 64 64 64 ] */
104 enum r123_enum_threefry32x4 {
105 /* Output from skein_rot_search: (srs-B128-X5000.out)
106 // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28
107 // Start: Mon Aug 24 22:41:36 2009
109 // rMin = 0.472. #0A4B[*33] [CRC=DD1ECE0F. hw_OR=31. cnt=16384. blkSize= 128].format */
110 R_32x4_0_0=10, R_32x4_0_1=26,
111 R_32x4_1_0=11, R_32x4_1_1=21,
112 R_32x4_2_0=13, R_32x4_2_1=27,
113 R_32x4_3_0=23, R_32x4_3_1= 5,
114 R_32x4_4_0= 6, R_32x4_4_1=20,
115 R_32x4_5_0=17, R_32x4_5_1=11,
116 R_32x4_6_0=25, R_32x4_6_1=10,
117 R_32x4_7_0=18, R_32x4_7_1=20
119 /* 4 rounds: minHW = 3 [ 3 3 3 3 ]
120 // 5 rounds: minHW = 7 [ 7 7 7 7 ]
121 // 6 rounds: minHW = 12 [ 13 12 13 12 ]
122 // 7 rounds: minHW = 22 [ 22 23 22 23 ]
123 // 8 rounds: minHW = 31 [ 31 31 31 31 ]
124 // 9 rounds: minHW = 32 [ 32 32 32 32 ]
125 //10 rounds: minHW = 32 [ 32 32 32 32 ]
126 //11 rounds: minHW = 32 [ 32 32 32 32 ] */
130 enum r123_enum_threefry32x2 {
131 /* Output from skein_rot_search (srs32x2-X5000.out)
132 // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28
133 // Start: Tue Jul 12 11:11:33 2011
134 // rMin = 0.334. #0206[*07] [CRC=1D9765C0. hw_OR=32. cnt=16384. blkSize= 64].format */
144 /* 4 rounds: minHW = 4 [ 4 4 4 4 ]
145 // 5 rounds: minHW = 6 [ 6 8 6 8 ]
146 // 6 rounds: minHW = 9 [ 9 12 9 12 ]
147 // 7 rounds: minHW = 16 [ 16 24 16 24 ]
148 // 8 rounds: minHW = 32 [ 32 32 32 32 ]
149 // 9 rounds: minHW = 32 [ 32 32 32 32 ]
150 //10 rounds: minHW = 32 [ 32 32 32 32 ]
151 //11 rounds: minHW = 32 [ 32 32 32 32 ] */
154 enum r123_enum_threefry_wcnt {
158 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint64_t RotL_64(uint64_t x, unsigned int N));
159 R123_CUDA_DEVICE R123_STATIC_INLINE uint64_t RotL_64(uint64_t x, unsigned int N)
161 return (x << (N & 63)) | (x >> ((64-N) & 63));
164 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint32_t RotL_32(uint32_t x, unsigned int N));
165 R123_CUDA_DEVICE R123_STATIC_INLINE uint32_t RotL_32(uint32_t x, unsigned int N)
167 return (x << (N & 31)) | (x >> ((32-N) & 31));
170 #define SKEIN_MK_64(hi32,lo32) ((lo32) + (((uint64_t) (hi32)) << 32))
171 #define SKEIN_KS_PARITY64 SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22)
172 #define SKEIN_KS_PARITY32 0x1BD11BDA
174 #ifndef THREEFRY2x32_DEFAULT_ROUNDS
175 #define THREEFRY2x32_DEFAULT_ROUNDS 20
178 #ifndef THREEFRY2x64_DEFAULT_ROUNDS
179 #define THREEFRY2x64_DEFAULT_ROUNDS 20
182 #ifndef THREEFRY4x32_DEFAULT_ROUNDS
183 #define THREEFRY4x32_DEFAULT_ROUNDS 20
186 #ifndef THREEFRY4x64_DEFAULT_ROUNDS
187 #define THREEFRY4x64_DEFAULT_ROUNDS 20
190 #define _threefry2x_tpl(W) \
191 typedef struct r123array2x##W threefry2x##W##_ctr_t; \
192 typedef struct r123array2x##W threefry2x##W##_key_t; \
193 typedef struct r123array2x##W threefry2x##W##_ukey_t; \
194 R123_CUDA_DEVICE R123_STATIC_INLINE threefry2x##W##_key_t threefry2x##W##keyinit(threefry2x##W##_ukey_t uk) { return uk; } \
195 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \
196 R123_CUDA_DEVICE R123_STATIC_INLINE \
197 threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \
198 threefry2x##W##_ctr_t X; \
199 uint##W##_t ks[2+1]; \
200 int i; /* avoid size_t to avoid need for stddef.h */ \
201 R123_ASSERT(Nrounds<=32); \
202 ks[2] = SKEIN_KS_PARITY##W; \
203 for (i=0;i < 2; i++) \
210 /* Insert initial key before round 0 */ \
211 X.v[0] += ks[0]; X.v[1] += ks[1]; \
213 if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
214 if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
215 if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
216 if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
218 /* InjectKey(r=1) */ \
219 X.v[0] += ks[1]; X.v[1] += ks[2]; \
220 X.v[1] += 1; /* X.v[2-1] += r */ \
222 if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
223 if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
224 if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
225 if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
227 /* InjectKey(r=2) */ \
228 X.v[0] += ks[2]; X.v[1] += ks[0]; \
231 if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
232 if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
233 if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
234 if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
236 /* InjectKey(r=3) */ \
237 X.v[0] += ks[0]; X.v[1] += ks[1]; \
240 if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
241 if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
242 if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
243 if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
245 /* InjectKey(r=4) */ \
246 X.v[0] += ks[1]; X.v[1] += ks[2]; \
249 if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
250 if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
251 if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
252 if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
254 /* InjectKey(r=5) */ \
255 X.v[0] += ks[2]; X.v[1] += ks[0]; \
258 if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
259 if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
260 if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
261 if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
263 /* InjectKey(r=6) */ \
264 X.v[0] += ks[0]; X.v[1] += ks[1]; \
267 if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
268 if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
269 if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
270 if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
272 /* InjectKey(r=7) */ \
273 X.v[0] += ks[1]; X.v[1] += ks[2]; \
276 if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
277 if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
278 if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
279 if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
281 /* InjectKey(r=8) */ \
282 X.v[0] += ks[2]; X.v[1] += ks[0]; \
287 /** @ingroup ThreefryNxW */ \
288 enum r123_enum_threefry2x##W { threefry2x##W##_rounds = THREEFRY2x##W##_DEFAULT_ROUNDS }; \
289 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \
290 R123_CUDA_DEVICE R123_STATIC_INLINE \
291 threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \
292 return threefry2x##W##_R(threefry2x##W##_rounds, in, k); \
296 #define _threefry4x_tpl(W) \
297 typedef struct r123array4x##W threefry4x##W##_ctr_t; \
298 typedef struct r123array4x##W threefry4x##W##_key_t; \
299 typedef struct r123array4x##W threefry4x##W##_ukey_t; \
300 R123_CUDA_DEVICE R123_STATIC_INLINE threefry4x##W##_key_t threefry4x##W##keyinit(threefry4x##W##_ukey_t uk) { return uk; } \
301 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \
302 R123_CUDA_DEVICE R123_STATIC_INLINE \
303 threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \
304 threefry4x##W##_ctr_t X; \
305 uint##W##_t ks[4+1]; \
306 int i; /* avoid size_t to avoid need for stddef.h */ \
307 R123_ASSERT(Nrounds<=72); \
308 ks[4] = SKEIN_KS_PARITY##W; \
309 for (i=0;i < 4; i++) \
316 /* Insert initial key before round 0 */ \
317 X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
320 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
321 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
324 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
325 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
328 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
329 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
332 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
333 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
336 /* InjectKey(r=1) */ \
337 X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
338 X.v[4-1] += 1; /* X.v[WCNT4-1] += r */ \
342 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
343 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
346 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
347 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
350 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
351 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
354 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
355 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
358 /* InjectKey(r=2) */ \
359 X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
360 X.v[4-1] += 2; /* X.v[WCNT4-1] += r */ \
364 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
365 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
368 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
369 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
372 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
373 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
376 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
377 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
380 /* InjectKey(r=3) */ \
381 X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
382 X.v[4-1] += 3; /* X.v[WCNT4-1] += r */ \
386 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
387 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
390 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
391 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
394 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
395 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
398 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
399 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
402 /* InjectKey(r=1) */ \
403 X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
404 X.v[4-1] += 4; /* X.v[WCNT4-1] += r */ \
408 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
409 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
412 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
413 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
416 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
417 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
420 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
421 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
424 /* InjectKey(r=1) */ \
425 X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
426 X.v[4-1] += 5; /* X.v[WCNT4-1] += r */ \
430 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
431 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
434 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
435 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
438 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
439 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
442 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
443 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
446 /* InjectKey(r=1) */ \
447 X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
448 X.v[4-1] += 6; /* X.v[WCNT4-1] += r */ \
452 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
453 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
456 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
457 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
460 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
461 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
464 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
465 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
468 /* InjectKey(r=1) */ \
469 X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
470 X.v[4-1] += 7; /* X.v[WCNT4-1] += r */ \
474 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
475 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
478 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
479 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
482 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
483 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
486 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
487 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
490 /* InjectKey(r=1) */ \
491 X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
492 X.v[4-1] += 8; /* X.v[WCNT4-1] += r */ \
496 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
497 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
500 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
501 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
504 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
505 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
508 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
509 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
512 /* InjectKey(r=1) */ \
513 X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
514 X.v[4-1] += 9; /* X.v[WCNT4-1] += r */ \
518 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
519 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
522 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
523 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
526 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
527 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
530 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
531 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
534 /* InjectKey(r=1) */ \
535 X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
536 X.v[4-1] += 10; /* X.v[WCNT4-1] += r */ \
540 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
541 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
544 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
545 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
548 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
549 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
552 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
553 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
556 /* InjectKey(r=1) */ \
557 X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
558 X.v[4-1] += 11; /* X.v[WCNT4-1] += r */ \
562 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
563 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
566 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
567 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
570 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
571 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
574 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
575 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
578 /* InjectKey(r=1) */ \
579 X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
580 X.v[4-1] += 12; /* X.v[WCNT4-1] += r */ \
584 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
585 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
588 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
589 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
592 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
593 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
596 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
597 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
600 /* InjectKey(r=1) */ \
601 X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
602 X.v[4-1] += 13; /* X.v[WCNT4-1] += r */ \
606 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
607 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
610 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
611 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
614 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
615 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
618 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
619 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
622 /* InjectKey(r=1) */ \
623 X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
624 X.v[4-1] += 14; /* X.v[WCNT4-1] += r */ \
628 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
629 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
632 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
633 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
636 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
637 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
640 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
641 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
644 /* InjectKey(r=1) */ \
645 X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
646 X.v[4-1] += 15; /* X.v[WCNT4-1] += r */ \
650 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
651 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
654 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
655 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
658 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
659 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
662 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
663 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
666 /* InjectKey(r=1) */ \
667 X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
668 X.v[4-1] += 16; /* X.v[WCNT4-1] += r */ \
672 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
673 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
676 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
677 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
680 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
681 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
684 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
685 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
688 /* InjectKey(r=1) */ \
689 X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
690 X.v[4-1] += 17; /* X.v[WCNT4-1] += r */ \
694 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
695 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
698 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
699 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
702 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
703 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
706 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
707 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
710 /* InjectKey(r=1) */ \
711 X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
712 X.v[4-1] += 18; /* X.v[WCNT4-1] += r */ \
717 /** @ingroup ThreefryNxW */ \
718 enum r123_enum_threefry4x##W { threefry4x##W##_rounds = THREEFRY4x##W##_DEFAULT_ROUNDS }; \
719 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \
720 R123_CUDA_DEVICE R123_STATIC_INLINE \
721 threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \
722 return threefry4x##W##_R(threefry4x##W##_rounds, in, k); \
731 /* gcc4.5 and 4.6 seem to optimize a macro-ized threefryNxW better
732 than a static inline function. Why? */
733 #define threefry2x32(c,k) threefry2x32_R(threefry2x32_rounds, c, k)
734 #define threefry4x32(c,k) threefry4x32_R(threefry4x32_rounds, c, k)
735 #define threefry2x64(c,k) threefry2x64_R(threefry2x64_rounds, c, k)
736 #define threefry4x64(c,k) threefry4x64_R(threefry4x64_rounds, c, k)
739 /** \cond HIDDEN_FROM_DOXYGEN */
740 #define _threefryNxWclass_tpl(NxW) \
742 template<unsigned int R> \
743 struct Threefry##NxW##_R{ \
744 typedef threefry##NxW##_ctr_t ctr_type; \
745 typedef threefry##NxW##_key_t key_type; \
746 typedef threefry##NxW##_key_t ukey_type; \
747 static const unsigned int rounds=R; \
748 inline R123_CUDA_DEVICE R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key)){ \
749 R123_STATIC_ASSERT(R<=72, "threefry is only unrolled up to 72 rounds\n"); \
750 return threefry##NxW##_R(R, ctr, key); \
753 typedef Threefry##NxW##_R<threefry##NxW##_rounds> Threefry##NxW; \
758 _threefryNxWclass_tpl(2x32)
759 _threefryNxWclass_tpl(4x32)
760 _threefryNxWclass_tpl(2x64)
761 _threefryNxWclass_tpl(4x64)
763 /* The _tpl macros don't quite work to do string-pasting inside comments.
764 so we just write out the boilerplate documentation four times... */
767 @defgroup ThreefryNxW Threefry Classes and Typedefs
769 The ThreefryNxW classes export the member functions, typedefs and
770 operator overloads required by a @ref CBRNG "CBRNG" class.
773 <a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers: As Easy as 1, 2, 3</i> </a>,
774 the Threefry family is closely related to the Threefish block cipher from
775 <a href="http://www.skein-hash.info/"> Skein Hash Function</a>.
776 Threefry is \b not suitable for cryptographic use.
778 Threefry uses integer addition, bitwise rotation, xor and permutation of words to randomize its output.
780 @class r123::Threefry2x32_R
783 exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
785 The template argument, ROUNDS, is the number of times the Threefry round
786 function will be applied.
788 As of September 2011, the authors know of no statistical flaws with
789 ROUNDS=13 or more for Threefry2x32.
791 @typedef r123::Threefry2x32
793 Threefry2x32 is equivalent to Threefry2x32_R<20>. With 20 rounds,
794 Threefry2x32 has a considerable safety margin over the minimum number
795 of rounds with no known statistical flaws, but still has excellent
798 @class r123::Threefry2x64_R
801 exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
803 The template argument, ROUNDS, is the number of times the Threefry round
804 function will be applied.
806 In November 2011, the authors discovered that 13 rounds of
807 Threefry2x64 sequenced by strided, interleaved key and counter
808 increments failed a very long (longer than the default BigCrush
809 length) WeightDistrub test. At the same time, it was confirmed that
810 14 rounds passes much longer tests (up to 5x10^12 samples) of a
811 similar nature. The authors know of no statistical flaws with
812 ROUNDS=14 or more for Threefry2x64.
814 @typedef r123::Threefry2x64
816 Threefry2x64 is equivalent to Threefry2x64_R<20>. With 20 rounds,
817 Threefry2x64 has a considerable safety margin over the minimum number
818 of rounds with no known statistical flaws, but still has excellent
823 @class r123::Threefry4x32_R
826 exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
828 The template argument, ROUNDS, is the number of times the Threefry round
829 function will be applied.
831 As of September 2011, the authors know of no statistical flaws with
832 ROUNDS=12 or more for Threefry4x32.
834 @typedef r123::Threefry4x32
836 Threefry4x32 is equivalent to Threefry4x32_R<20>. With 20 rounds,
837 Threefry4x32 has a considerable safety margin over the minimum number
838 of rounds with no known statistical flaws, but still has excellent
843 @class r123::Threefry4x64_R
846 exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
848 The template argument, ROUNDS, is the number of times the Threefry round
849 function will be applied.
851 As of September 2011, the authors know of no statistical flaws with
852 ROUNDS=12 or more for Threefry4x64.
854 @typedef r123::Threefry4x64
856 Threefry4x64 is equivalent to Threefry4x64_R<20>. With 20 rounds,
857 Threefry4x64 has a considerable safety margin over the minimum number
858 of rounds with no known statistical flaws, but still has excellent