27 #error "You should not define __MIXMAX_C. Please #include mixmax.h"
34 int iterate(rng_state_t* X){
35 X->sumtot = iterate_raw_vec(X->V, X->sumtot);
40 inline uint64_t MULWU (uint64_t k){
return (( (k)<<(SPECIALMUL) & M61) | ( (k) >> (BITS-SPECIALMUL)) ) ;}
42 inline uint64_t MULWU (uint64_t ){
return 0;}
44 #error SPECIALMUL not defined
47 myuint iterate_raw_vec(myuint* Y, myuint sumtotOld){
54 Y[0] = ( tempV = sumtotOld);
55 myuint sumtot = Y[0], ovflow = 0;
59 myuint tempPO = MULWU(tempP);
60 tempP = modadd(tempP,Y[i]);
61 tempV = MOD_MERSENNE(tempV + tempP + tempPO);
63 tempP = modadd(tempP , Y[i]);
64 tempV = modadd(tempV , tempP);
67 sumtot += tempV;
if (sumtot < tempV) {ovflow++;}
70 temp2 = MOD_MULSPEC(temp2);
71 Y[2] = modadd( Y[2] , temp2 );
72 sumtot += temp2;
if (sumtot < temp2) {ovflow++;}
74 return MOD_MERSENNE(MOD_MERSENNE(sumtot) + (ovflow <<3 ));
77 myuint get_next(rng_state_t* X) {
78 return GET_BY_MACRO(X);
81 double get_next_float(rng_state_t* X){
82 return get_next_float_BY_MACRO(X);
85 void fill_array(rng_state_t* X,
unsigned int n,
double *array)
90 for (i=0; i<(n/M); i++){
91 iterate_and_fill_array(X, array+i*M);
93 unsigned int rem=(n % M);
96 for (j=0; j< (rem); j++){
97 array[M*i+j] = (int64_t)X->V[j] * (
double)(INV_MERSBASE);
105 void iterate_and_fill_array(rng_state_t* X,
double *array){
112 Y[0] = (tempV = X->sumtot);
114 myuint sumtot = Y[0], ovflow = 0;
118 myuint tempPO = MULWU(tempP);
119 tempP = modadd(tempP,Y[i]);
120 tempV = MOD_MERSENNE(tempV + tempP + tempPO);
122 tempP = MOD_MERSENNE(tempP + Y[i]);
123 tempV = MOD_MERSENNE(tempV + tempP);
126 sumtot += tempV;
if (sumtot < tempV) {ovflow++;}
127 array[i-1] = (int64_t)tempV * (
double)(INV_MERSBASE);
130 temp2 = MOD_MULSPEC(temp2);
131 Y[2] = modadd( Y[2] , temp2 );
132 sumtot += temp2;
if (sumtot < temp2) {ovflow++;}
134 X->sumtot = MOD_MERSENNE(MOD_MERSENNE(sumtot) + (ovflow <<3 ));
137 myuint modadd(myuint foo, myuint bar){
138 #if (defined(__x86_64__) || defined(__i386__)) && defined(__GNUC__) && defined(USE_INLINE_ASM)
142 __asm__ (
"addq %2, %0; "
150 return MOD_MERSENNE(foo+bar);
154 rng_state_t* rng_alloc()
157 rng_state_t *p = (rng_state_t*)malloc(
sizeof(rng_state_t));
162 int rng_free(rng_state_t* X)
168 rng_state_t* rng_copy(myuint *Y)
175 rng_state_t* X = rng_alloc();
176 myuint sumtot=0,ovflow=0;
179 for ( i=0; i < N; i++){
181 sumtot += X->V[(i)];
if (sumtot < X->V[(i)]) {ovflow++;}
184 X->sumtot = MOD_MERSENNE(MOD_MERSENNE(sumtot) + (ovflow <<3 ));
188 void seed_vielbein(rng_state_t* X,
unsigned int index)
192 for (i=0; i < N; i++){
197 fprintf(stderr,
"Out of bounds index, is not ( 0 <= index < N )\n"); exit(ARRAY_INDEX_OUT_OF_BOUNDS);
202 if (X->fh==NULL){X->fh=stdout;}
205 void seed_spbox(rng_state_t* X, myuint seed)
207 const myuint MULT64=6364136223846793005ULL;
209 myuint sumtot=0,ovflow=0;
211 fprintf(stderr,
" try seeding with nonzero seed next time!\n");
218 if (X->fh==NULL){X->fh=stdout;}
219 for (i=0; i < N; i++){
220 l*=MULT64; l = (l << 32) ^ (l>>32);
221 X->V[i] = l & MERSBASE;
222 sumtot += X->V[(i)];
if (sumtot < X->V[(i)]) {ovflow++;}
225 X->sumtot = MOD_MERSENNE(MOD_MERSENNE(sumtot) + (ovflow <<3 ));
228 myuint precalc(rng_state_t* X){
232 for (i=0; i < N; i++){
233 temp = MOD_MERSENNE(temp + X->V[i]);
240 int rng_get_N(
void){
return N;}
242 #if defined(__x86_64__)
243 inline myuint mod128(__uint128_t s){
245 s1 = ( ( ((myuint)s)&MERSBASE ) + ( ((myuint)(s>>64)) * 8 ) + ( ((myuint)s) >>BITS) );
246 return MOD_MERSENNE(s1);
249 inline myuint fmodmulM61(myuint cum, myuint a, myuint b){
251 temp = (__uint128_t)a*(__uint128_t)b + cum;
255 #else // on all other platforms, including 32-bit linux, PPC and PPC64 and all Windows
256 #define MASK32 0xFFFFFFFFULL
258 inline myuint fmodmulM61(myuint cum, myuint s, myuint a)
260 register myuint o,ph,pl,ah,al;
266 o = (o & M61) + ((ph*ah)<<3) + ((ah*pl+al*ph + ((al*pl)>>32))>>29) ;
268 o = (o & M61) + ((o>>61));
273 void print_state(rng_state_t* X){
275 fprintf(X->fh,
"mixmax state, file version 1.0\n" );
276 fprintf(X->fh,
"N=%u; V[N]={", rng_get_N() );
277 for (j=0; (j< (rng_get_N()-1) ); j++) {
278 fprintf(X->fh,
"%llu, ", X->V[j] );
280 fprintf(X->fh,
"%llu", X->V[rng_get_N()-1] );
281 fprintf(X->fh,
"}; " );
282 fprintf(X->fh,
"counter=%u; ", X->counter );
283 fprintf(X->fh,
"sumtot=%llu;\n", X->sumtot );
286 void read_state(rng_state_t* X,
const char filename[] ){
289 if( ( fin = fopen(filename,
"r") ) ){
296 fprintf(stderr,
"mixmax -> read_state: error reading file %s\n", filename);
297 exit(ERROR_READING_STATE_FILE);
302 if (!fscanf(fin,
"%llu", &X->V[0]) ) {fprintf(stderr,
"mixmax -> read_state: error reading file %s\n", filename); exit(ERROR_READING_STATE_FILE);}
305 for( i = 1; i < rng_get_N(); i++){
306 if (!fscanf(fin,
", %llu", &vecVal) ) {fprintf(stderr,
"mixmax -> read_state: error reading vector component i=%d from file %s\n", i, filename); exit(ERROR_READING_STATE_FILE);}
308 if( vecVal <= MERSBASE ){
311 fprintf(stderr,
"mixmax -> read_state: Invalid state vector value= %llu"
312 " ( must be less than %llu ) "
313 " obtained from reading file %s\n"
314 , vecVal, MERSBASE, filename);
319 unsigned int counter;
320 if (!fscanf( fin,
"}; counter=%u; ", &counter)){fprintf(stderr,
"mixmax -> read_state: error reading counter from file %s\n", filename); exit(ERROR_READING_STATE_FILE);}
324 fprintf(stderr,
"mixmax -> read_state: Invalid counter = %d"
325 " Must be 0 <= counter < %u\n" , counter, N);
327 exit(ERROR_READING_STATE_COUNTER);
331 if (!fscanf( fin,
"sumtot=%llu\n", &sumtot)){fprintf(stderr,
"mixmax -> read_state: error reading checksum from file %s\n", filename); exit(ERROR_READING_STATE_FILE);}
333 if (X->sumtot != sumtot) {
334 fprintf(stderr,
"mixmax -> checksum error while reading state from file %s - corrupted?\n", filename);
335 exit(ERROR_READING_STATE_CHECKSUM);
337 else{fprintf(stderr,
"mixmax -> read_state: checksum ok: %llu == %llu\n",X->sumtot, sumtot);}
342 #define FUSEDMODMULVEC \
343 { for (i =0; i<N; i++){ \
344 cum[i] = fmodmulM61( cum[i], coeff , Y[i] ) ; \
351 #if ( ( (N==8) || (N==17) || (N==240) ||(N==120) || (N==256) ) && BITS==61 && SKIPISON!=0)
353 #warning Compiling with normal skip
354 void seed_uniquestream( rng_state_t* Xin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t streamID ){
355 seed_vielbein(Xin,0);
356 Xin->sumtot = apply_bigskip(Xin->V, Xin->V, clusterID, machineID, runID, streamID );
357 if (Xin->fh==NULL){Xin->fh=stdout;}
361 void seed_uniquestream( rng_state_t* Xin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t streamID ){
371 static __thread myuint Vcache[N]={1};
372 static __thread myuint sumtmp=0;
373 static __thread myID_t ID1cached=0, ID2cached=0, ID3cached=0, ID4cached=0;
375 if ( ID1cached <= clusterID && ID2cached <= machineID && ID3cached <= runID && ID4cached <= streamID){
377 sumtmp = apply_bigskip(Vcache, Vcache, clusterID - ID1cached, machineID - ID2cached, runID - ID3cached, streamID - ID4cached );
379 ID1cached = clusterID; ID2cached = machineID ; ID3cached = runID ; ID4cached = streamID;
380 for (
int i=0; i<N; i++) { Xin->V[i] = Vcache[i] ; }
381 Xin->sumtot = sumtmp;
383 seed_vielbein(Xin,0);
384 Xin->sumtot = apply_bigskip(Xin->V, Xin->V, clusterID, machineID, runID, streamID );
385 for (
int i=0; i<N; i++) { Vcache[i]=Xin->V[i]; }
386 ID1cached = clusterID; ID2cached = machineID ; ID3cached = runID ; ID4cached = streamID;
394 void branch_inplace( rng_state_t* Xin, myID_t* IDvec ){
395 Xin->sumtot = apply_bigskip(Xin->V, Xin->V, IDvec[3], IDvec[2], IDvec[1], IDvec[0] );
398 myuint apply_bigskip(myuint* Vout, myuint* Vin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t streamID ){
420 const myuint skipMat[128][N] =
424 #include "mixmax_skip_N120.icc"
434 #include "mixmax_skip_N8.icc"
440 myID_t IDvec[4] = {streamID, runID, machineID, clusterID};
449 for (i=0; i<N; i++) { Y[i] = Vin[i]; sumtot = modadd( sumtot, Vin[i]); } ;
450 for (IDindex=0; IDindex<4; IDindex++) {
456 rowPtr = (myuint*)skipMat[r + IDindex*8*
sizeof(myID_t)];
458 for (i=0; i<N; i++){ cum[i] = 0; }
462 for (i =0; i<N; i++){
463 cum[i] = fmodmulM61( cum[i], coeff , Y[i] ) ;
465 sumtot = iterate_raw_vec(Y, sumtot);
468 for (i=0; i<N; i++){ Y[i] = cum[i]; sumtot = modadd( sumtot, cum[i]); } ;
474 for (i=0; i<N; i++){ Vout[i] = Y[i]; sumtot = modadd( sumtot, Y[i]); } ;
478 #warning For this N, we dont have the skipping coefficients yet, using alternative method to seed
480 void seed_uniquestream( rng_state_t* Xin, myID_t clusterID, myID_t machineID, myID_t runID, myID_t streamID ){
481 Xin->V[0] = (myuint)clusterID;
482 Xin->V[1] = (myuint)machineID;
483 Xin->V[2] = (myuint)runID;
484 Xin->V[3] = (myuint)streamID;
485 Xin->V[4] = (myuint)clusterID << 5;
486 Xin->V[5] = (myuint)machineID << 7;
487 Xin->V[6] = (myuint)runID << 11;
488 Xin->V[7] = (myuint)streamID << 13;
490 Xin->sumtot = iterate_raw_vec(Xin->V, Xin->sumtot);
491 Xin->sumtot = iterate_raw_vec(Xin->V, Xin->sumtot);