mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Reformat test harness code (#940)
* Reformat common help text Signed-off-by: Stuart Brady <stuart.brady@arm.com> * Reformat test harness code This goes part of the way to fixing issue #625. Signed-off-by: Stuart Brady <stuart.brady@arm.com>
This commit is contained in:
@@ -26,8 +26,8 @@
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
@@ -51,15 +51,15 @@
|
||||
#include "harness/alloc.h"
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
/* Period parameters */
|
||||
#define N 624 /* vector code requires multiple of 4 here */
|
||||
#define N 624 /* vector code requires multiple of 4 here */
|
||||
#define M 397
|
||||
#define MATRIX_A (cl_uint) 0x9908b0dfUL /* constant vector a */
|
||||
#define UPPER_MASK (cl_uint) 0x80000000UL /* most significant w-r bits */
|
||||
#define LOWER_MASK (cl_uint) 0x7fffffffUL /* least significant r bits */
|
||||
#define MATRIX_A (cl_uint)0x9908b0dfUL /* constant vector a */
|
||||
#define UPPER_MASK (cl_uint)0x80000000UL /* most significant w-r bits */
|
||||
#define LOWER_MASK (cl_uint)0x7fffffffUL /* least significant r bits */
|
||||
|
||||
typedef struct _MTdata
|
||||
{
|
||||
@@ -67,26 +67,27 @@ typedef struct _MTdata
|
||||
#ifdef __SSE2__
|
||||
cl_uint cache[N];
|
||||
#endif
|
||||
cl_int mti;
|
||||
}_MTdata;
|
||||
cl_int mti;
|
||||
} _MTdata;
|
||||
|
||||
/* initializes mt[N] with a seed */
|
||||
MTdata init_genrand(cl_uint s)
|
||||
{
|
||||
MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
|
||||
if( NULL != r )
|
||||
MTdata r = (MTdata)align_malloc(sizeof(_MTdata), 16);
|
||||
if (NULL != r)
|
||||
{
|
||||
cl_uint *mt = r->mt;
|
||||
int mti = 0;
|
||||
mt[0]= s; // & 0xffffffffUL;
|
||||
for (mti=1; mti<N; mti++) {
|
||||
mt[mti] = (cl_uint)
|
||||
(1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
|
||||
mt[0] = s; // & 0xffffffffUL;
|
||||
for (mti = 1; mti < N; mti++)
|
||||
{
|
||||
mt[mti] = (cl_uint)(
|
||||
1812433253UL * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti);
|
||||
/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
|
||||
/* In the previous versions, MSBs of the seed affect */
|
||||
/* only MSBs of the array mt[]. */
|
||||
/* 2002/01/09 modified by Makoto Matsumoto */
|
||||
// mt[mti] &= 0xffffffffUL;
|
||||
// mt[mti] &= 0xffffffffUL;
|
||||
/* for >32 bit machines */
|
||||
}
|
||||
r->mti = mti;
|
||||
@@ -95,20 +96,22 @@ MTdata init_genrand(cl_uint s)
|
||||
return r;
|
||||
}
|
||||
|
||||
void free_mtdata( MTdata d )
|
||||
void free_mtdata(MTdata d)
|
||||
{
|
||||
if(d)
|
||||
align_free(d);
|
||||
if (d) align_free(d);
|
||||
}
|
||||
|
||||
/* generates a random number on [0,0xffffffff]-interval */
|
||||
cl_uint genrand_int32( MTdata d)
|
||||
cl_uint genrand_int32(MTdata d)
|
||||
{
|
||||
/* mag01[x] = x * MATRIX_A for x=0,1 */
|
||||
static const cl_uint mag01[2]={0x0UL, MATRIX_A};
|
||||
static const cl_uint mag01[2] = { 0x0UL, MATRIX_A };
|
||||
#ifdef __SSE2__
|
||||
static volatile int init = 0;
|
||||
static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
|
||||
static union {
|
||||
__m128i v;
|
||||
cl_uint s[4];
|
||||
} upper_mask, lower_mask, one, matrix_a, c0, c1;
|
||||
#endif
|
||||
|
||||
|
||||
@@ -120,14 +123,17 @@ cl_uint genrand_int32( MTdata d)
|
||||
int kk;
|
||||
|
||||
#ifdef __SSE2__
|
||||
if( 0 == init )
|
||||
if (0 == init)
|
||||
{
|
||||
upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
|
||||
lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
|
||||
upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] =
|
||||
upper_mask.s[3] = UPPER_MASK;
|
||||
lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] =
|
||||
lower_mask.s[3] = LOWER_MASK;
|
||||
one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
|
||||
matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
|
||||
c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
|
||||
c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
|
||||
matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] =
|
||||
MATRIX_A;
|
||||
c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint)0x9d2c5680UL;
|
||||
c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint)0xefc60000UL;
|
||||
init = 1;
|
||||
}
|
||||
#endif
|
||||
@@ -135,61 +141,89 @@ cl_uint genrand_int32( MTdata d)
|
||||
kk = 0;
|
||||
#ifdef __SSE2__
|
||||
// vector loop
|
||||
for( ; kk + 4 <= N-M; kk += 4 )
|
||||
for (; kk + 4 <= N - M; kk += 4)
|
||||
{
|
||||
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
|
||||
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
||||
// ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
||||
__m128i vy = _mm_or_si128(
|
||||
_mm_and_si128(_mm_load_si128((__m128i *)(mt + kk)),
|
||||
upper_mask.v),
|
||||
_mm_and_si128(_mm_loadu_si128((__m128i *)(mt + kk + 1)),
|
||||
lower_mask.v));
|
||||
|
||||
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
|
||||
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
||||
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) ); // mt[kk+M] ^ (y >> 1)
|
||||
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
||||
_mm_store_si128( (__m128i*) (mt + kk ), vr );
|
||||
// y & 1 ? -1 : 0
|
||||
__m128i mask = _mm_cmpeq_epi32(_mm_and_si128(vy, one.v), one.v);
|
||||
// y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
||||
__m128i vmag01 = _mm_and_si128(mask, matrix_a.v);
|
||||
// mt[kk+M] ^ (y >> 1)
|
||||
__m128i vr =
|
||||
_mm_xor_si128(_mm_loadu_si128((__m128i *)(mt + kk + M)),
|
||||
(__m128i)_mm_srli_epi32(vy, 1));
|
||||
// mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
||||
vr = _mm_xor_si128(vr, vmag01);
|
||||
_mm_store_si128((__m128i *)(mt + kk), vr);
|
||||
}
|
||||
#endif
|
||||
for ( ;kk<N-M;kk++) {
|
||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
||||
mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
for (; kk < N - M; kk++)
|
||||
{
|
||||
y = (cl_uint)((mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK));
|
||||
mt[kk] = mt[kk + M] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
// advance to next aligned location
|
||||
for (;kk<N-1 && (kk & 3);kk++) {
|
||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
||||
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
for (; kk < N - 1 && (kk & 3); kk++)
|
||||
{
|
||||
y = (cl_uint)((mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK));
|
||||
mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
|
||||
}
|
||||
|
||||
// vector loop
|
||||
for( ; kk + 4 <= N-1; kk += 4 )
|
||||
for (; kk + 4 <= N - 1; kk += 4)
|
||||
{
|
||||
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
|
||||
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
||||
__m128i vy = _mm_or_si128(
|
||||
_mm_and_si128(_mm_load_si128((__m128i *)(mt + kk)),
|
||||
upper_mask.v),
|
||||
// ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
||||
_mm_and_si128(_mm_loadu_si128((__m128i *)(mt + kk + 1)),
|
||||
lower_mask.v));
|
||||
|
||||
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
|
||||
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
||||
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) ); // mt[kk+M-N] ^ (y >> 1)
|
||||
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
||||
_mm_store_si128( (__m128i*) (mt + kk ), vr );
|
||||
// y & 1 ? -1 : 0
|
||||
__m128i mask = _mm_cmpeq_epi32(_mm_and_si128(vy, one.v), one.v);
|
||||
// y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
||||
__m128i vmag01 = _mm_and_si128(mask, matrix_a.v);
|
||||
// mt[kk+M-N] ^ (y >> 1)
|
||||
__m128i vr =
|
||||
_mm_xor_si128(_mm_loadu_si128((__m128i *)(mt + kk + M - N)),
|
||||
_mm_srli_epi32(vy, 1));
|
||||
// mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
||||
vr = _mm_xor_si128(vr, vmag01);
|
||||
_mm_store_si128((__m128i *)(mt + kk), vr);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (;kk<N-1;kk++) {
|
||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
||||
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
for (; kk < N - 1; kk++)
|
||||
{
|
||||
y = (cl_uint)((mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK));
|
||||
mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
|
||||
}
|
||||
y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
|
||||
mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
y = (cl_uint)((mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK));
|
||||
mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
|
||||
|
||||
#ifdef __SSE2__
|
||||
// Do the tempering ahead of time in vector code
|
||||
for( kk = 0; kk + 4 <= N; kk += 4 )
|
||||
for (kk = 0; kk + 4 <= N; kk += 4)
|
||||
{
|
||||
__m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) ); // y = mt[k];
|
||||
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) ); // y ^= (y >> 11);
|
||||
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) ); // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
||||
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) ); // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
||||
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) ); // y ^= (y >> 18);
|
||||
_mm_store_si128( (__m128i*)(d->cache+kk), vy );
|
||||
// y = mt[k];
|
||||
__m128i vy = _mm_load_si128((__m128i *)(mt + kk));
|
||||
// y ^= (y >> 11);
|
||||
vy = _mm_xor_si128(vy, _mm_srli_epi32(vy, 11));
|
||||
// y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
||||
vy = _mm_xor_si128(vy, _mm_and_si128(_mm_slli_epi32(vy, 7), c0.v));
|
||||
// y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
||||
vy = _mm_xor_si128(vy, _mm_and_si128(_mm_slli_epi32(vy, 15), c1.v));
|
||||
// y ^= (y >> 18);
|
||||
vy = _mm_xor_si128(vy, _mm_srli_epi32(vy, 18));
|
||||
_mm_store_si128((__m128i *)(d->cache + kk), vy);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -202,8 +236,8 @@ cl_uint genrand_int32( MTdata d)
|
||||
|
||||
/* Tempering */
|
||||
y ^= (y >> 11);
|
||||
y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
||||
y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
||||
y ^= (y << 7) & (cl_uint)0x9d2c5680UL;
|
||||
y ^= (y << 15) & (cl_uint)0xefc60000UL;
|
||||
y ^= (y >> 18);
|
||||
#endif
|
||||
|
||||
@@ -211,35 +245,35 @@ cl_uint genrand_int32( MTdata d)
|
||||
return y;
|
||||
}
|
||||
|
||||
cl_ulong genrand_int64( MTdata d)
|
||||
cl_ulong genrand_int64(MTdata d)
|
||||
{
|
||||
return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
|
||||
return ((cl_ulong)genrand_int32(d) << 32) | (cl_uint)genrand_int32(d);
|
||||
}
|
||||
|
||||
/* generates a random number on [0,1]-real-interval */
|
||||
double genrand_real1(MTdata d)
|
||||
{
|
||||
return genrand_int32(d)*(1.0/4294967295.0);
|
||||
return genrand_int32(d) * (1.0 / 4294967295.0);
|
||||
/* divided by 2^32-1 */
|
||||
}
|
||||
|
||||
/* generates a random number on [0,1)-real-interval */
|
||||
double genrand_real2(MTdata d)
|
||||
{
|
||||
return genrand_int32(d)*(1.0/4294967296.0);
|
||||
return genrand_int32(d) * (1.0 / 4294967296.0);
|
||||
/* divided by 2^32 */
|
||||
}
|
||||
|
||||
/* generates a random number on (0,1)-real-interval */
|
||||
double genrand_real3(MTdata d)
|
||||
{
|
||||
return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
|
||||
return (((double)genrand_int32(d)) + 0.5) * (1.0 / 4294967296.0);
|
||||
/* divided by 2^32 */
|
||||
}
|
||||
|
||||
/* generates a random number on [0,1) with 53-bit resolution*/
|
||||
double genrand_res53(MTdata d)
|
||||
{
|
||||
unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
|
||||
return(a*67108864.0+b)*(1.0/9007199254740992.0);
|
||||
unsigned long a = genrand_int32(d) >> 5, b = genrand_int32(d) >> 6;
|
||||
return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user