46#if ( ( defined SSE ) || ( defined SSE2 ) )
50}
vec_t __attribute__( ( aligned( 16 ) ) );
54}
dble_vec_t __attribute__( ( aligned( 16 ) ) );
56static int init = 0, pr, prm, ir, jr, is, is_old, next[96];
62} x __attribute__( ( aligned( 16 ) ) );
64# define STEP( pi, pj ) \
65 __asm__ __volatile__( "movaps %2, %%xmm4 \n\t" \
66 "movaps %%xmm2, %%xmm3 \n\t" \
67 "subps %0, %%xmm4 \n\t" \
68 "movaps %%xmm1, %%xmm5 \n\t" \
69 "cmpps $0x6, %%xmm4, %%xmm2 \n\t" \
70 "andps %%xmm2, %%xmm5 \n\t" \
71 "subps %%xmm3, %%xmm4 \n\t" \
72 "andps %%xmm0, %%xmm2 \n\t" \
73 "addps %%xmm4, %%xmm5 \n\t" \
74 "movaps %%xmm5, %0 \n\t" \
75 "movaps %3, %%xmm6 \n\t" \
76 "movaps %%xmm2, %%xmm3 \n\t" \
77 "subps %1, %%xmm6 \n\t" \
78 "movaps %%xmm1, %%xmm7 \n\t" \
79 "cmpps $0x6, %%xmm6, %%xmm2 \n\t" \
80 "andps %%xmm2, %%xmm7 \n\t" \
81 "subps %%xmm3, %%xmm6 \n\t" \
82 "andps %%xmm0, %%xmm2 \n\t" \
83 "addps %%xmm6, %%xmm7 \n\t" \
85 : "+m"( ( *pi ).c1 ), "+m"( ( *pi ).c2 ) \
86 : "m"( ( *pj ).c1 ), "m"( ( *pj ).c2 ) )
88static void error(
int no ) {
92 printf(
"Error in subroutine rlxd_init\n" );
93 printf(
"Bad choice of luxury level (should be 1 or 2)\n" );
96 printf(
"Error in subroutine rlxd_init\n" );
97 printf(
"Bad choice of seed (should be between 1 and 2^31-1)\n" );
100 printf(
"Error in rlxd_get\n" );
101 printf(
"Undefined state (ranlxd is not initialized\n" );
104 printf(
"Error in rlxd_reset\n" );
105 printf(
"Unexpected input data\n" );
108 printf(
"Program aborted\n" );
112static void update() {
122 __asm__ __volatile__(
"movaps %0, %%xmm0 \n\t"
123 "movaps %1, %%xmm1 \n\t"
126 :
"m"( one_bit ),
"m"(
one ),
"m"( carry ) );
128 for ( k = 0; k < kmax; k++ )
133 if (
pi == pmax )
pi = pmin;
134 if ( pj == pmax ) pj = pmin;
137 __asm__ __volatile__(
"movaps %%xmm2, %0" : :
"m"( carry ) );
141 if ( ir >= 12 ) ir -= 12;
142 if ( jr >= 12 ) jr -= 12;
147static void define_constants() {
156 b = (float)( ldexp( 1.0, -24 ) );
162 for ( k = 0; k < 96; k++ )
164 next[k] = ( k + 1 ) % 96;
165 if ( ( k % 4 ) == 3 ) next[k] = ( k + 5 ) % 96;
171 int ibit, jbit, xbit[31];
176 if ( level == 1 ) pr = 202;
177 else if ( level == 2 ) pr = 397;
182 for ( k = 0; k < 31; k++ )
188 if ( ( seed <= 0 ) || ( i != 0 ) ) error( 2 );
193 for ( i = 0; i < 4; i++ )
195 for ( k = 0; k < 24; k++ )
199 for ( l = 0; l < 24; l++ )
204 xbit[ibit] = ( xbit[ibit] + xbit[jbit] ) % 2;
205 ibit = ( ibit + 1 ) % 31;
206 jbit = ( jbit + 1 ) % 31;
209 if ( ( k % 4 ) != i )
ix = 16777215 -
ix;
211 x.num[4 * k + i] = (float)( ldexp( (
double)(
ix ), -24 ) );
228void ranlxd(
double r[],
int n ) {
233 for ( k = 0; k <
n; k++ )
236 if ( is == is_old ) update();
237 r[k] = (double)( x.num[is + 4] ) + (double)( one_bit.c1 * x.num[is] );
247 if ( init == 0 ) error( 3 );
249 base = (float)( ldexp( 1.0, 24 ) );
252 for ( k = 0; k < 96; k++ ) state[k + 1] = (
int)( base * x.num[k] );
254 state[97] = (int)( base * carry.c1 );
255 state[98] = (int)( base * carry.c2 );
256 state[99] = (int)( base * carry.c3 );
257 state[100] = (int)( base * carry.c4 );
270 if ( state[0] !=
rlxd_size() ) error( 5 );
272 for ( k = 0; k < 96; k++ )
274 if ( ( state[k + 1] < 0 ) || ( state[k + 1] >= 167777216 ) ) error( 5 );
276 x.num[k] = (float)( ldexp( (
double)( state[k + 1] ), -24 ) );
279 if ( ( ( state[97] != 0 ) && ( state[97] != 1 ) ) ||
280 ( ( state[98] != 0 ) && ( state[98] != 1 ) ) ||
281 ( ( state[99] != 0 ) && ( state[99] != 1 ) ) ||
282 ( ( state[100] != 0 ) && ( state[100] != 1 ) ) )
285 carry.c1 = (float)( ldexp( (
double)( state[97] ), -24 ) );
286 carry.c2 = (float)( ldexp( (
double)( state[98] ), -24 ) );
287 carry.c3 = (float)( ldexp( (
double)( state[99] ), -24 ) );
288 carry.c4 = (float)( ldexp( (
double)( state[100] ), -24 ) );
298 if ( ( ( pr != 202 ) && ( pr != 397 ) ) || ( ir < 0 ) || ( ir > 11 ) || ( jr < 0 ) ||
299 ( jr > 11 ) || ( jr != ( ( ir + 7 ) % 12 ) ) || ( is < 0 ) || ( is > 91 ) )
305# define BASE 0x1000000
306# define MASK 0xffffff
316static int init = 0, pr, prm, ir, jr, is, is_old, next[96];
317static double one_bit;
325# define STEP( pi, pj ) \
326 d = ( *pj ).c1.c1 - ( *pi ).c1.c1 - carry.c1; \
327 ( *pi ).c2.c1 += ( d < 0 ); \
329 ( *pi ).c1.c1 = d & MASK; \
330 d = ( *pj ).c1.c2 - ( *pi ).c1.c2 - carry.c2; \
331 ( *pi ).c2.c2 += ( d < 0 ); \
333 ( *pi ).c1.c2 = d & MASK; \
334 d = ( *pj ).c1.c3 - ( *pi ).c1.c3 - carry.c3; \
335 ( *pi ).c2.c3 += ( d < 0 ); \
337 ( *pi ).c1.c3 = d & MASK; \
338 d = ( *pj ).c1.c4 - ( *pi ).c1.c4 - carry.c4; \
339 ( *pi ).c2.c4 += ( d < 0 ); \
341 ( *pi ).c1.c4 = d & MASK; \
342 d = ( *pj ).c2.c1 - ( *pi ).c2.c1; \
343 carry.c1 = ( d < 0 ); \
345 ( *pi ).c2.c1 = d & MASK; \
346 d = ( *pj ).c2.c2 - ( *pi ).c2.c2; \
347 carry.c2 = ( d < 0 ); \
349 ( *pi ).c2.c2 = d & MASK; \
350 d = ( *pj ).c2.c3 - ( *pi ).c2.c3; \
351 carry.c3 = ( d < 0 ); \
353 ( *pi ).c2.c3 = d & MASK; \
354 d = ( *pj ).c2.c4 - ( *pi ).c2.c4; \
355 carry.c4 = ( d < 0 ); \
357 ( *pi ).c2.c4 = d & MASK
359static void error(
int no ) {
363 printf(
"Error in rlxd_init\n" );
364 printf(
"Arithmetic on this machine is not suitable for ranlxd\n" );
367 printf(
"Error in subroutine rlxd_init\n" );
368 printf(
"Bad choice of luxury level (should be 1 or 2)\n" );
371 printf(
"Error in subroutine rlxd_init\n" );
372 printf(
"Bad choice of seed (should be between 1 and 2^31-1)\n" );
375 printf(
"Error in rlxd_get\n" );
376 printf(
"Undefined state (ranlxd is not initialized)\n" );
379 printf(
"Error in rlxd_reset\n" );
380 printf(
"Arithmetic on this machine is not suitable for ranlxd\n" );
383 printf(
"Error in rlxd_reset\n" );
384 printf(
"Unexpected input data\n" );
387 printf(
"Program aborted\n" );
391static void update() {
401 for ( k = 0; k < kmax; k++ )
406 if (
pi == pmax )
pi = pmin;
407 if ( pj == pmax ) pj = pmin;
412 if ( ir >= 12 ) ir -= 12;
413 if ( jr >= 12 ) jr -= 12;
418static void define_constants() {
421 one_bit = ldexp( 1.0, -24 );
423 for ( k = 0; k < 96; k++ )
425 next[k] = ( k + 1 ) % 96;
426 if ( ( k % 4 ) == 3 ) next[k] = ( k + 5 ) % 96;
432 int ibit, jbit, xbit[31];
435 if ( ( INT_MAX < 2147483647 ) || ( FLT_RADIX != 2 ) || ( FLT_MANT_DIG < 24 ) ||
436 ( DBL_MANT_DIG < 48 ) )
441 if ( level == 1 ) pr = 202;
442 else if ( level == 2 ) pr = 397;
447 for ( k = 0; k < 31; k++ )
453 if ( ( seed <= 0 ) || ( i != 0 ) ) error( 2 );
458 for ( i = 0; i < 4; i++ )
460 for ( k = 0; k < 24; k++ )
464 for ( l = 0; l < 24; l++ )
469 xbit[ibit] = ( xbit[ibit] + xbit[jbit] ) % 2;
470 ibit = ( ibit + 1 ) % 31;
471 jbit = ( jbit + 1 ) % 31;
474 if ( ( k % 4 ) != i )
ix = 16777215 -
ix;
476 x.num[4 * k + i] =
ix;
498 for ( k = 0; k <
n; k++ )
501 if ( is == is_old ) update();
502 r[k] = one_bit * ( (double)( x.num[is + 4] ) + one_bit * (double)( x.num[is] ) );
511 if ( init == 0 ) error( 3 );
515 for ( k = 0; k < 96; k++ ) state[k + 1] = x.num[k];
517 state[97] = carry.c1;
518 state[98] = carry.c2;
519 state[99] = carry.c3;
520 state[100] = carry.c4;
531 if ( ( INT_MAX < 2147483647 ) || ( FLT_RADIX != 2 ) || ( FLT_MANT_DIG < 24 ) ||
532 ( DBL_MANT_DIG < 48 ) )
537 if ( state[0] !=
rlxd_size() ) error( 5 );
539 for ( k = 0; k < 96; k++ )
541 if ( ( state[k + 1] < 0 ) || ( state[k + 1] >= 167777216 ) ) error( 5 );
543 x.num[k] = state[k + 1];
546 if ( ( ( state[97] != 0 ) && ( state[97] != 1 ) ) ||
547 ( ( state[98] != 0 ) && ( state[98] != 1 ) ) ||
548 ( ( state[99] != 0 ) && ( state[99] != 1 ) ) ||
549 ( ( state[100] != 0 ) && ( state[100] != 1 ) ) )
552 carry.c1 = state[97];
553 carry.c2 = state[98];
554 carry.c3 = state[99];
555 carry.c4 = state[100];
565 if ( ( ( pr != 202 ) && ( pr != 397 ) ) || ( ir < 0 ) || ( ir > 11 ) || ( jr < 0 ) ||
566 ( jr > 11 ) || ( jr != ( ( ir + 7 ) % 12 ) ) || ( is < 0 ) || ( is > 91 ) )
void rlxd_get(int state[])
void rlxd_reset(int state[])
void rlxd_init(int level, int seed)
void ranlxd(double r[], int n)