mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 22:19:02 +00:00
Also use it instead of duplicating the code. Fixes #326 Signed-off-by: Kevin Petit <kevin.petit@arm.com>
246 lines
9.2 KiB
C++
246 lines
9.2 KiB
C++
/*
|
|
A C-program for MT19937, with initialization improved 2002/1/26.
|
|
Coded by Takuji Nishimura and Makoto Matsumoto.
|
|
|
|
Before using, initialize the state by using init_genrand(seed)
|
|
or init_by_array(init_key, key_length).
|
|
|
|
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
|
|
1. Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
2. Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
3. The names of its contributors may not be used to endorse or promote
|
|
products derived from this software without specific prior written
|
|
permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
|
Any feedback is very welcome.
|
|
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
|
|
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
|
|
|
|
Modifications for use in OpenCL by Ian Ollmann, Apple Inc.
|
|
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include "mt19937.h"
|
|
#include "mingw_compat.h"
|
|
#include "harness/alloc.h"
|
|
|
|
#ifdef __SSE2__
|
|
#include <emmintrin.h>
|
|
#endif
|
|
|
|
/* Period parameters */
|
|
#define N 624 /* vector code requires multiple of 4 here */
|
|
#define M 397
|
|
#define MATRIX_A (cl_uint) 0x9908b0dfUL /* constant vector a */
|
|
#define UPPER_MASK (cl_uint) 0x80000000UL /* most significant w-r bits */
|
|
#define LOWER_MASK (cl_uint) 0x7fffffffUL /* least significant r bits */
|
|
|
|
typedef struct _MTdata
|
|
{
|
|
cl_uint mt[N];
|
|
#ifdef __SSE2__
|
|
cl_uint cache[N];
|
|
#endif
|
|
cl_int mti;
|
|
}_MTdata;
|
|
|
|
/* initializes mt[N] with a seed */
|
|
MTdata init_genrand(cl_uint s)
|
|
{
|
|
MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
|
|
if( NULL != r )
|
|
{
|
|
cl_uint *mt = r->mt;
|
|
int mti = 0;
|
|
mt[0]= s; // & 0xffffffffUL;
|
|
for (mti=1; mti<N; mti++) {
|
|
mt[mti] = (cl_uint)
|
|
(1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
|
|
/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
|
|
/* In the previous versions, MSBs of the seed affect */
|
|
/* only MSBs of the array mt[]. */
|
|
/* 2002/01/09 modified by Makoto Matsumoto */
|
|
// mt[mti] &= 0xffffffffUL;
|
|
/* for >32 bit machines */
|
|
}
|
|
r->mti = mti;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
void free_mtdata( MTdata d )
|
|
{
|
|
if(d)
|
|
align_free(d);
|
|
}
|
|
|
|
/* generates a random number on [0,0xffffffff]-interval */
|
|
cl_uint genrand_int32( MTdata d)
|
|
{
|
|
/* mag01[x] = x * MATRIX_A for x=0,1 */
|
|
static const cl_uint mag01[2]={0x0UL, MATRIX_A};
|
|
#ifdef __SSE2__
|
|
static volatile int init = 0;
|
|
static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
|
|
#endif
|
|
|
|
|
|
cl_uint *mt = d->mt;
|
|
cl_uint y;
|
|
|
|
if (d->mti == N)
|
|
{ /* generate N words at one time */
|
|
int kk;
|
|
|
|
#ifdef __SSE2__
|
|
if( 0 == init )
|
|
{
|
|
upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
|
|
lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
|
|
one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
|
|
matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
|
|
c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
|
|
c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
|
|
init = 1;
|
|
}
|
|
#endif
|
|
|
|
kk = 0;
|
|
#ifdef __SSE2__
|
|
// vector loop
|
|
for( ; kk + 4 <= N-M; kk += 4 )
|
|
{
|
|
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
|
|
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
|
|
|
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
|
|
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
|
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) ); // mt[kk+M] ^ (y >> 1)
|
|
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
|
_mm_store_si128( (__m128i*) (mt + kk ), vr );
|
|
}
|
|
#endif
|
|
for ( ;kk<N-M;kk++) {
|
|
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
|
mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
|
}
|
|
|
|
#ifdef __SSE2__
|
|
// advance to next aligned location
|
|
for (;kk<N-1 && (kk & 3);kk++) {
|
|
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
|
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
|
}
|
|
|
|
// vector loop
|
|
for( ; kk + 4 <= N-1; kk += 4 )
|
|
{
|
|
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
|
|
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
|
|
|
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
|
|
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
|
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) ); // mt[kk+M-N] ^ (y >> 1)
|
|
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
|
_mm_store_si128( (__m128i*) (mt + kk ), vr );
|
|
}
|
|
#endif
|
|
|
|
for (;kk<N-1;kk++) {
|
|
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
|
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
|
}
|
|
y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
|
|
mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
|
|
|
#ifdef __SSE2__
|
|
// Do the tempering ahead of time in vector code
|
|
for( kk = 0; kk + 4 <= N; kk += 4 )
|
|
{
|
|
__m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) ); // y = mt[k];
|
|
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) ); // y ^= (y >> 11);
|
|
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) ); // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
|
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) ); // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
|
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) ); // y ^= (y >> 18);
|
|
_mm_store_si128( (__m128i*)(d->cache+kk), vy );
|
|
}
|
|
#endif
|
|
|
|
d->mti = 0;
|
|
}
|
|
#ifdef __SSE2__
|
|
y = d->cache[d->mti++];
|
|
#else
|
|
y = mt[d->mti++];
|
|
|
|
/* Tempering */
|
|
y ^= (y >> 11);
|
|
y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
|
y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
|
y ^= (y >> 18);
|
|
#endif
|
|
|
|
|
|
return y;
|
|
}
|
|
|
|
cl_ulong genrand_int64( MTdata d)
|
|
{
|
|
return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
|
|
}
|
|
|
|
/* generates a random number on [0,1]-real-interval */
|
|
double genrand_real1(MTdata d)
|
|
{
|
|
return genrand_int32(d)*(1.0/4294967295.0);
|
|
/* divided by 2^32-1 */
|
|
}
|
|
|
|
/* generates a random number on [0,1)-real-interval */
|
|
double genrand_real2(MTdata d)
|
|
{
|
|
return genrand_int32(d)*(1.0/4294967296.0);
|
|
/* divided by 2^32 */
|
|
}
|
|
|
|
/* generates a random number on (0,1)-real-interval */
|
|
double genrand_real3(MTdata d)
|
|
{
|
|
return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
|
|
/* divided by 2^32 */
|
|
}
|
|
|
|
/* generates a random number on [0,1) with 53-bit resolution*/
|
|
double genrand_res53(MTdata d)
|
|
{
|
|
unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
|
|
return(a*67108864.0+b)*(1.0/9007199254740992.0);
|
|
}
|