47#if ( ( defined SSE ) || ( defined SSE2 ) )
51}
vec_t __attribute__( ( aligned( 16 ) ) );
55}
dble_vec_t __attribute__( ( aligned( 16 ) ) );
57static int init = 0, pr, prm, ir, jr, is, is_old, next[96];
63} x __attribute__( ( aligned( 16 ) ) );
65# define STEP( pi, pj ) \
66 __asm__ __volatile__( "movaps %2, %%xmm4 \n\t" \
67 "movaps %%xmm2, %%xmm3 \n\t" \
68 "subps %0, %%xmm4 \n\t" \
69 "movaps %%xmm1, %%xmm5 \n\t" \
70 "cmpps $0x6, %%xmm4, %%xmm2 \n\t" \
71 "andps %%xmm2, %%xmm5 \n\t" \
72 "subps %%xmm3, %%xmm4 \n\t" \
73 "andps %%xmm0, %%xmm2 \n\t" \
74 "addps %%xmm4, %%xmm5 \n\t" \
75 "movaps %%xmm5, %0 \n\t" \
76 "movaps %3, %%xmm6 \n\t" \
77 "movaps %%xmm2, %%xmm3 \n\t" \
78 "subps %1, %%xmm6 \n\t" \
79 "movaps %%xmm1, %%xmm7 \n\t" \
80 "cmpps $0x6, %%xmm6, %%xmm2 \n\t" \
81 "andps %%xmm2, %%xmm7 \n\t" \
82 "subps %%xmm3, %%xmm6 \n\t" \
83 "andps %%xmm0, %%xmm2 \n\t" \
84 "addps %%xmm6, %%xmm7 \n\t" \
86 : "+m"( ( *pi ).c1 ), "+m"( ( *pi ).c2 ) \
87 : "m"( ( *pj ).c1 ), "m"( ( *pj ).c2 ) )
89static void error(
int no ) {
93 printf(
"Error in subroutine rlxd_init\n" );
94 printf(
"Bad choice of luxury level (should be 1 or 2)\n" );
97 printf(
"Error in subroutine rlxd_init\n" );
98 printf(
"Bad choice of seed (should be between 1 and 2^31-1)\n" );
101 printf(
"Error in rlxd_get\n" );
102 printf(
"Undefined state (ranlxd is not initialized\n" );
105 printf(
"Error in rlxd_reset\n" );
106 printf(
"Unexpected input data\n" );
109 printf(
"Program aborted\n" );
113static void update() {
123 __asm__ __volatile__(
"movaps %0, %%xmm0 \n\t"
124 "movaps %1, %%xmm1 \n\t"
127 :
"m"( one_bit ),
"m"(
one ),
"m"( carry ) );
129 for ( k = 0; k < kmax; k++ )
134 if (
pi == pmax )
pi = pmin;
135 if ( pj == pmax ) pj = pmin;
138 __asm__ __volatile__(
"movaps %%xmm2, %0" : :
"m"( carry ) );
142 if ( ir >= 12 ) ir -= 12;
143 if ( jr >= 12 ) jr -= 12;
148static void define_constants() {
157 b = (float)( ldexp( 1.0, -24 ) );
163 for ( k = 0; k < 96; k++ )
165 next[k] = ( k + 1 ) % 96;
166 if ( ( k % 4 ) == 3 ) next[k] = ( k + 5 ) % 96;
172 int ibit, jbit, xbit[31];
177 if ( level == 1 ) pr = 202;
178 else if ( level == 2 ) pr = 397;
183 for ( k = 0; k < 31; k++ )
189 if ( ( seed <= 0 ) || ( i != 0 ) ) error( 2 );
194 for ( i = 0; i < 4; i++ )
196 for ( k = 0; k < 24; k++ )
200 for ( l = 0; l < 24; l++ )
205 xbit[ibit] = ( xbit[ibit] + xbit[jbit] ) % 2;
206 ibit = ( ibit + 1 ) % 31;
207 jbit = ( jbit + 1 ) % 31;
210 if ( ( k % 4 ) != i )
ix = 16777215 -
ix;
212 x.num[4 * k + i] = (float)( ldexp( (
double)(
ix ), -24 ) );
244void ranlxd(
double r[],
int n ) {
246 printf(
"ranlxd works!" );
256 if ( init == 0 ) error( 3 );
258 base = (float)( ldexp( 1.0, 24 ) );
261 for ( k = 0; k < 96; k++ ) state[k + 1] = (
int)( base * x.num[k] );
263 state[97] = (int)( base * carry.c1 );
264 state[98] = (int)( base * carry.c2 );
265 state[99] = (int)( base * carry.c3 );
266 state[100] = (int)( base * carry.c4 );
279 if ( state[0] !=
rlxd_size() ) error( 5 );
281 for ( k = 0; k < 96; k++ )
283 if ( ( state[k + 1] < 0 ) || ( state[k + 1] >= 167777216 ) ) error( 5 );
285 x.num[k] = (float)( ldexp( (
double)( state[k + 1] ), -24 ) );
288 if ( ( ( state[97] != 0 ) && ( state[97] != 1 ) ) ||
289 ( ( state[98] != 0 ) && ( state[98] != 1 ) ) ||
290 ( ( state[99] != 0 ) && ( state[99] != 1 ) ) ||
291 ( ( state[100] != 0 ) && ( state[100] != 1 ) ) )
294 carry.c1 = (float)( ldexp( (
double)( state[97] ), -24 ) );
295 carry.c2 = (float)( ldexp( (
double)( state[98] ), -24 ) );
296 carry.c3 = (float)( ldexp( (
double)( state[99] ), -24 ) );
297 carry.c4 = (float)( ldexp( (
double)( state[100] ), -24 ) );
307 if ( ( ( pr != 202 ) && ( pr != 397 ) ) || ( ir < 0 ) || ( ir > 11 ) || ( jr < 0 ) ||
308 ( jr > 11 ) || ( jr != ( ( ir + 7 ) % 12 ) ) || ( is < 0 ) || ( is > 91 ) )
314# define BASE 0x1000000
315# define MASK 0xffffff
325static int init = 0, pr, prm, ir, jr, is, is_old, next[96];
326static double one_bit;
334# define STEP( pi, pj ) \
335 d = ( *pj ).c1.c1 - ( *pi ).c1.c1 - carry.c1; \
336 ( *pi ).c2.c1 += ( d < 0 ); \
338 ( *pi ).c1.c1 = d & MASK; \
339 d = ( *pj ).c1.c2 - ( *pi ).c1.c2 - carry.c2; \
340 ( *pi ).c2.c2 += ( d < 0 ); \
342 ( *pi ).c1.c2 = d & MASK; \
343 d = ( *pj ).c1.c3 - ( *pi ).c1.c3 - carry.c3; \
344 ( *pi ).c2.c3 += ( d < 0 ); \
346 ( *pi ).c1.c3 = d & MASK; \
347 d = ( *pj ).c1.c4 - ( *pi ).c1.c4 - carry.c4; \
348 ( *pi ).c2.c4 += ( d < 0 ); \
350 ( *pi ).c1.c4 = d & MASK; \
351 d = ( *pj ).c2.c1 - ( *pi ).c2.c1; \
352 carry.c1 = ( d < 0 ); \
354 ( *pi ).c2.c1 = d & MASK; \
355 d = ( *pj ).c2.c2 - ( *pi ).c2.c2; \
356 carry.c2 = ( d < 0 ); \
358 ( *pi ).c2.c2 = d & MASK; \
359 d = ( *pj ).c2.c3 - ( *pi ).c2.c3; \
360 carry.c3 = ( d < 0 ); \
362 ( *pi ).c2.c3 = d & MASK; \
363 d = ( *pj ).c2.c4 - ( *pi ).c2.c4; \
364 carry.c4 = ( d < 0 ); \
366 ( *pi ).c2.c4 = d & MASK
368static void error(
int no ) {
372 printf(
"Error in rlxd_init\n" );
373 printf(
"Arithmetic on this machine is not suitable for ranlxd\n" );
376 printf(
"Error in subroutine rlxd_init\n" );
377 printf(
"Bad choice of luxury level (should be 1 or 2)\n" );
380 printf(
"Error in subroutine rlxd_init\n" );
381 printf(
"Bad choice of seed (should be between 1 and 2^31-1)\n" );
384 printf(
"Error in rlxd_get\n" );
385 printf(
"Undefined state (ranlxd is not initialized)\n" );
388 printf(
"Error in rlxd_reset\n" );
389 printf(
"Arithmetic on this machine is not suitable for ranlxd\n" );
392 printf(
"Error in rlxd_reset\n" );
393 printf(
"Unexpected input data\n" );
396 printf(
"Program aborted\n" );
400static void update() {
410 for ( k = 0; k < kmax; k++ )
415 if (
pi == pmax )
pi = pmin;
416 if ( pj == pmax ) pj = pmin;
421 if ( ir >= 12 ) ir -= 12;
422 if ( jr >= 12 ) jr -= 12;
427static void define_constants() {
430 one_bit = ldexp( 1.0, -24 );
432 for ( k = 0; k < 96; k++ )
434 next[k] = ( k + 1 ) % 96;
435 if ( ( k % 4 ) == 3 ) next[k] = ( k + 5 ) % 96;
441 int ibit, jbit, xbit[31];
444 if ( ( INT_MAX < 2147483647 ) || ( FLT_RADIX != 2 ) || ( FLT_MANT_DIG < 24 ) ||
445 ( DBL_MANT_DIG < 48 ) )
450 if ( level == 1 ) pr = 202;
451 else if ( level == 2 ) pr = 397;
456 for ( k = 0; k < 31; k++ )
462 if ( ( seed <= 0 ) || ( i != 0 ) ) error( 2 );
467 for ( i = 0; i < 4; i++ )
469 for ( k = 0; k < 24; k++ )
473 for ( l = 0; l < 24; l++ )
478 xbit[ibit] = ( xbit[ibit] + xbit[jbit] ) % 2;
479 ibit = ( ibit + 1 ) % 31;
480 jbit = ( jbit + 1 ) % 31;
483 if ( ( k % 4 ) != i )
ix = 16777215 -
ix;
485 x.num[4 * k + i] =
ix;
507 for ( k = 0; k <
n; k++ )
510 if ( is == is_old ) update();
511 r[k] = one_bit * ( (double)( x.num[is + 4] ) + one_bit * (double)( x.num[is] ) );
529 if ( init == 0 ) error( 3 );
533 for ( k = 0; k < 96; k++ ) state[k + 1] = x.num[k];
535 state[97] = carry.c1;
536 state[98] = carry.c2;
537 state[99] = carry.c3;
538 state[100] = carry.c4;
549 if ( ( INT_MAX < 2147483647 ) || ( FLT_RADIX != 2 ) || ( FLT_MANT_DIG < 24 ) ||
550 ( DBL_MANT_DIG < 48 ) )
555 if ( state[0] !=
rlxd_size() ) error( 5 );
557 for ( k = 0; k < 96; k++ )
559 if ( ( state[k + 1] < 0 ) || ( state[k + 1] >= 167777216 ) ) error( 5 );
561 x.num[k] = state[k + 1];
564 if ( ( ( state[97] != 0 ) && ( state[97] != 1 ) ) ||
565 ( ( state[98] != 0 ) && ( state[98] != 1 ) ) ||
566 ( ( state[99] != 0 ) && ( state[99] != 1 ) ) ||
567 ( ( state[100] != 0 ) && ( state[100] != 1 ) ) )
570 carry.c1 = state[97];
571 carry.c2 = state[98];
572 carry.c3 = state[99];
573 carry.c4 = state[100];
583 if ( ( ( pr != 202 ) && ( pr != 397 ) ) || ( ir < 0 ) || ( ir > 11 ) || ( jr < 0 ) ||
584 ( jr > 11 ) || ( jr != ( ( ir + 7 ) % 12 ) ) || ( is < 0 ) || ( is > 91 ) )
void rlxd_get(int state[])
void rlxd_reset(int state[])
void rlxd_init(int level, int seed)
void ranlxd(double r[], int n)