Restored the embedded reduction factor to bruteforce. (#1052)

* Restored the embedded reduction factor to bruteforce.

This change was present on the GitLab branch but missed out during the transition to GitHub.

This change is intentionally as close as possible to the patch on GitLab.

Fixes #1045

* Added helper functions for bruteforce step and scale.

* Added missing files from 1e4d19b.

* Renamed getTestScale and getTestStep to set*.
This commit is contained in:
Jeremy Kemp
2021-01-07 11:34:58 +00:00
committed by GitHub
parent 25d9ff5d6e
commit 904fb419ee
14 changed files with 73 additions and 95 deletions

View File

@@ -31,6 +31,7 @@
#include "harness/conversions.h"
#define BUFFER_SIZE (1024*1024*2)
#define EMBEDDED_REDUCTION_FACTOR (64)
#if defined( __GNUC__ )
#define UNUSED __attribute__ ((unused))
@@ -228,6 +229,36 @@ void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int is
float getAllowedUlpError(const Func *f, const bool relaxed);
static inline cl_uint getTestScale(size_t typeSize)
{
if (gWimpyMode)
{
return (cl_uint)typeSize * 2 * gWimpyReductionFactor;
}
else if (gIsEmbedded)
{
return EMBEDDED_REDUCTION_FACTOR;
}
else
{
return 1;
}
}
static inline uint64_t getTestStep(size_t typeSize, size_t bufferSize)
{
if (gWimpyMode)
{
return (1ULL << 32) * gWimpyReductionFactor / (512);
}
else if (gIsEmbedded)
{
return (BUFFER_SIZE / typeSize) * EMBEDDED_REDUCTION_FACTOR;
}
else
{
return bufferSize / typeSize;
}
}
#endif /* UTILITY_H */

View File

@@ -277,12 +277,12 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter,
memset( &test_info, 0, sizeof( test_info ) );
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = 1;
test_info.scale = getTestScale(sizeof(cl_float));
if (gWimpyMode){
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
}
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
@@ -1014,13 +1014,13 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
memset( &test_info, 0, sizeof( test_info ) );
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = 1;
test_info.scale = getTestScale(sizeof(cl_double));
if (gWimpyMode){
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
}
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{

View File

@@ -269,10 +269,9 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
memset( &test_info, 0, sizeof( test_info ) );
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = 1;
test_info.scale = getTestScale(sizeof(cl_float));
if (gWimpyMode) {
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
}
test_info.step = test_info.subBufferSize * test_info.scale;
@@ -963,11 +962,10 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
memset( &test_info, 0, sizeof( test_info ) );
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = 1;
test_info.scale = getTestScale(sizeof(cl_double));
if (gWimpyMode)
{
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
}
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;

View File

@@ -266,12 +266,13 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
memset( &test_info, 0, sizeof( test_info ) );
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = 1;
test_info.scale = getTestScale(sizeof(cl_float));
if (gWimpyMode)
{
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
}
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
@@ -780,12 +781,12 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
memset( &test_info, 0, sizeof( test_info ) );
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = 1;
test_info.scale = getTestScale(sizeof(cl_double));
if (gWimpyMode)
{
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
}
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{

View File

@@ -287,17 +287,13 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
float maxErrorVal = 0.0f;
float maxErrorVal2 = 0.0f;
size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
uint64_t step = bufferSize / sizeof( float );
uint64_t step = getTestStep(sizeof(float), bufferSize);
#if defined PARALLEL_REFERENCE
cl_uint threadCount = GetThreadCount();
#endif
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
if(gWimpyMode ){
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
if( gIsEmbedded )
float_ulps = f->float_embedded_ulps;
else
@@ -716,12 +712,9 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
double maxErrorVal = 0.0f;
double maxErrorVal2 = 0.0f;
size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
uint64_t step = bufferSize / sizeof( double );
uint64_t step = getTestStep(sizeof(double), bufferSize);
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
if(gWimpyMode ){
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
#if defined PARALLEL_REFERENCE
cl_uint threadCount = GetThreadCount();

View File

@@ -191,14 +191,10 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
cl_kernel kernels[ VECTOR_SIZE_COUNT ];
int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ;
size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
uint64_t step = bufferSize / sizeof( float );
uint64_t step = getTestStep(sizeof(float), bufferSize);
int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
if( gWimpyMode )
{
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
// This test is not using ThreadPool so we need to disable FTZ here
// for reference computations
@@ -412,14 +408,11 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
cl_kernel kernels[ VECTOR_SIZE_COUNT ];
int ftz = f->ftz || gForceFTZ;
size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
uint64_t step = bufferSize / sizeof( cl_double );
uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
if( gWimpyMode )
{
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
// This test is not using ThreadPool so we need to disable FTZ here
// for reference computations
FPU_mode_type oldMode;

View File

@@ -253,12 +253,12 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
memset( &test_info, 0, sizeof( test_info ) );
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = 1;
test_info.scale = getTestScale(sizeof(cl_float));
if (gWimpyMode)
{
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
}
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
@@ -765,11 +765,10 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
memset( &test_info, 0, sizeof( test_info ) );
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = 1;
test_info.scale = getTestScale(sizeof(cl_double));
if (gWimpyMode)
{
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
}
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;

View File

@@ -224,12 +224,12 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
memset( &test_info, 0, sizeof( test_info ) );
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = 1;
test_info.scale = getTestScale(sizeof(cl_float));
if (gWimpyMode )
{
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
}
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
@@ -623,11 +623,10 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
memset( &test_info, 0, sizeof( test_info ) );
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = 1;
test_info.scale = getTestScale(sizeof(cl_double));
if (gWimpyMode )
{
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
}
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;

View File

@@ -207,12 +207,8 @@ int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode)
float maxErrorVal2 = 0.0f;
float maxErrorVal3 = 0.0f;
size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
uint64_t step = bufferSize / sizeof( float );
uint64_t step = getTestStep(sizeof(float), bufferSize);
if( gWimpyMode )
{
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
// Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };
@@ -675,11 +671,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
uint64_t step = bufferSize / sizeof( double );
if( gWimpyMode )
{
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
uint64_t step = getTestStep(sizeof(double), bufferSize);
// Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
f->nameInCode, relaxedMode };

View File

@@ -228,16 +228,12 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
float maxErrorVal3 = 0.0f;
size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
uint64_t step = bufferSize / sizeof( float );
uint64_t step = getTestStep(sizeof(float), bufferSize);
int skipNanInf = (0 == strcmp( "fma", f->nameInCode )) && ! gInfNanSupport;
cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
float float_ulps;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
if( gWimpyMode )
{
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
if( gIsEmbedded )
float_ulps = f->float_embedded_ulps;
@@ -874,11 +870,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
uint64_t step = bufferSize / sizeof( double );
if( gWimpyMode )
{
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
uint64_t step = getTestStep(sizeof(double), bufferSize);
Force64BitFPUPrecision();

View File

@@ -240,12 +240,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = 1;
test_info.scale = getTestScale(sizeof(cl_float));
if (gWimpyMode)
{
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
}
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
@@ -1036,12 +1036,12 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
memset( &test_info, 0, sizeof( test_info ) );
test_info.threadCount = GetThreadCount();
test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = 1;
test_info.scale = getTestScale(sizeof(cl_double));
if (gWimpyMode)
{
test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
}
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{

View File

@@ -203,7 +203,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
float maxErrorVal0 = 0.0f;
float maxErrorVal1 = 0.0f;
size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
uint64_t step = bufferSize / sizeof( float );
uint64_t step = getTestStep(sizeof(float), bufferSize);
int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
int isFract = 0 == strcmp( "fract", f->nameInCode );
@@ -211,10 +211,6 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
float float_ulps = getAllowedUlpError(f, relaxedMode);
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
if( gWimpyMode )
{
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
// Init the kernels
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
@@ -666,14 +662,10 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
double maxErrorVal0 = 0.0f;
double maxErrorVal1 = 0.0f;
size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
uint64_t step = bufferSize / sizeof( cl_double );
uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
if( gWimpyMode )
{
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
Force64BitFPUPrecision();

View File

@@ -209,15 +209,12 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
float maxErrorVal2 = 0.0f;
size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
float float_ulps;
uint64_t step = bufferSize / sizeof( float );
uint64_t step = getTestStep(sizeof(float), bufferSize);
int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
cl_ulong maxiError;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
if( gWimpyMode )
{
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
if( gIsEmbedded )
float_ulps = f->float_embedded_ulps;
else
@@ -513,14 +510,10 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
cl_ulong maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0;
size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
uint64_t step = bufferSize / sizeof( double );
uint64_t step = getTestStep(sizeof(double), bufferSize);
int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
if( gWimpyMode )
{
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
Force64BitFPUPrecision();

View File

@@ -196,17 +196,14 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
float maxErrorVal = 0.0f;
size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
uint64_t step = bufferSize / sizeof( float );
uint64_t step = getTestStep(sizeof(float), bufferSize);
int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
int isRangeLimited = 0;
float float_ulps;
float half_sin_cos_tan_limit = 0;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
if( gWimpyMode )
{
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
if( gIsEmbedded)
float_ulps = f->float_embedded_ulps;
else
@@ -473,13 +470,10 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
int ftz = f->ftz || gForceFTZ;
double maxErrorVal = 0.0f;
size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
uint64_t step = bufferSize / sizeof( cl_double );
uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
if( gWimpyMode )
{
step = (1ULL<<32) * gWimpyReductionFactor / (512);
}
Force64BitFPUPrecision();
// Init the kernels