2026-03-19 06:09:01 +00:00 · 2021-01-07 11:34:58 +00:00
parent 25d9ff5d6e
commit 904fb419ee
14 changed files with 73 additions and 95 deletions
							
							
								
							
							
						
@@ -31,6 +31,7 @@
#include "harness/conversions.h"
#define BUFFER_SIZE         (1024*1024*2)
#define EMBEDDED_REDUCTION_FACTOR (64)
#if defined( __GNUC__ )
    #define UNUSED  __attribute__ ((unused))
							
								
							
							
								
							
							
						
@@ -228,6 +229,36 @@ void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int is
float getAllowedUlpError(const Func *f, const bool relaxed);
static inline cl_uint getTestScale(size_t typeSize)
{
    if (gWimpyMode)
    {
        return (cl_uint)typeSize * 2 * gWimpyReductionFactor;
    }
    else if (gIsEmbedded)
    {
        return EMBEDDED_REDUCTION_FACTOR;
    }
    else
    {
        return 1;
    }
}
static inline uint64_t getTestStep(size_t typeSize, size_t bufferSize)
{
    if (gWimpyMode)
    {
        return (1ULL << 32) * gWimpyReductionFactor / (512);
    }
    else if (gIsEmbedded)
    {
        return (BUFFER_SIZE / typeSize) * EMBEDDED_REDUCTION_FACTOR;
    }
    else
    {
        return bufferSize / typeSize;
    }
}
#endif /* UTILITY_H */
							
							
							
						
 
							
							
								
							
							
						
@@ -277,12 +277,12 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter,
    memset( &test_info, 0, sizeof( test_info ) );
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale = 1;
    test_info.scale = getTestScale(sizeof(cl_float));
    if (gWimpyMode){
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
    if (test_info.step / test_info.subBufferSize != test_info.scale)
    {
							
								
							
							
								
							
							
						
@@ -1014,13 +1014,13 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
    memset( &test_info, 0, sizeof( test_info ) );
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale = 1;
    test_info.scale = getTestScale(sizeof(cl_double));
    if (gWimpyMode){
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
    if (test_info.step / test_info.subBufferSize != test_info.scale)
    {
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -269,10 +269,9 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
    memset( &test_info, 0, sizeof( test_info ) );
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale =  1;
    test_info.scale = getTestScale(sizeof(cl_float));
    if (gWimpyMode) {
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
    }
    test_info.step = test_info.subBufferSize * test_info.scale;
							
								
							
							
								
							
							
						
@@ -963,11 +962,10 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
    memset( &test_info, 0, sizeof( test_info ) );
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale =  1;
    test_info.scale = getTestScale(sizeof(cl_double));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -266,12 +266,13 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
    memset( &test_info, 0, sizeof( test_info ) );
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale =  1;
    test_info.scale = getTestScale(sizeof(cl_float));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
    if (test_info.step / test_info.subBufferSize != test_info.scale)
    {
							
								
							
							
								
							
							
						
@@ -780,12 +781,12 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
    memset( &test_info, 0, sizeof( test_info ) );
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale =  1;
    test_info.scale = getTestScale(sizeof(cl_double));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
    if (test_info.step / test_info.subBufferSize != test_info.scale)
    {
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -287,17 +287,13 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    float maxErrorVal = 0.0f;
    float maxErrorVal2 = 0.0f;
    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
    uint64_t step = bufferSize / sizeof( float );
    uint64_t step = getTestStep(sizeof(float), bufferSize);
#if defined PARALLEL_REFERENCE
    cl_uint threadCount = GetThreadCount();
#endif
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    if(gWimpyMode ){
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    if( gIsEmbedded )
        float_ulps = f->float_embedded_ulps;
    else
							
								
							
							
								
							
							
						
@@ -716,12 +712,9 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
    double maxErrorVal = 0.0f;
    double maxErrorVal2 = 0.0f;
    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
    uint64_t step = bufferSize / sizeof( double );
    uint64_t step = getTestStep(sizeof(double), bufferSize);
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    if(gWimpyMode ){
       step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
#if defined PARALLEL_REFERENCE
    cl_uint threadCount = GetThreadCount();
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -191,14 +191,10 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
    int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ;
    size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
    uint64_t step = bufferSize / sizeof( float );
    uint64_t step = getTestStep(sizeof(float), bufferSize);
    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    if( gWimpyMode )
    {
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    // This test is not using ThreadPool so we need to disable FTZ here
    // for reference computations
							
								
							
							
								
							
							
						
@@ -412,14 +408,11 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
    int ftz = f->ftz || gForceFTZ;
    size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
    uint64_t step = bufferSize / sizeof( cl_double );
    uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    if( gWimpyMode )
    {
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    // This test is not using ThreadPool so we need to disable FTZ here
    // for reference computations
    FPU_mode_type oldMode;
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -253,12 +253,12 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    memset( &test_info, 0, sizeof( test_info ) );
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale =  1;
    test_info.scale = getTestScale(sizeof(cl_float));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
    if (test_info.step / test_info.subBufferSize != test_info.scale)
    {
							
								
							
							
								
							
							
						
@@ -765,11 +765,10 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
    memset( &test_info, 0, sizeof( test_info ) );
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale =  1;
    test_info.scale = getTestScale(sizeof(cl_double));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -224,12 +224,12 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
    memset( &test_info, 0, sizeof( test_info ) );
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale =  1;
    test_info.scale = getTestScale(sizeof(cl_float));
    if (gWimpyMode )
    {
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
    if (test_info.step / test_info.subBufferSize != test_info.scale)
    {
							
								
							
							
								
							
							
						
@@ -623,11 +623,10 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
    memset( &test_info, 0, sizeof( test_info ) );
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale =  1;
    test_info.scale = getTestScale(sizeof(cl_double));
    if (gWimpyMode )
    {
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -207,12 +207,8 @@ int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode)
    float maxErrorVal2 = 0.0f;
    float maxErrorVal3 = 0.0f;
    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
    uint64_t step = bufferSize / sizeof( float );
    uint64_t step = getTestStep(sizeof(float), bufferSize);
    if( gWimpyMode )
    {
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                   f->nameInCode, relaxedMode };
							
								
							
							
								
							
							
						
@@ -675,11 +671,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    uint64_t step = bufferSize / sizeof( double );
    if( gWimpyMode )
    {
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    uint64_t step = getTestStep(sizeof(double), bufferSize);
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                   f->nameInCode, relaxedMode };
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -228,16 +228,12 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    float maxErrorVal3 = 0.0f;
    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
    uint64_t step = bufferSize / sizeof( float );
    uint64_t step = getTestStep(sizeof(float), bufferSize);
    int skipNanInf = (0 == strcmp( "fma", f->nameInCode )) && ! gInfNanSupport;
    cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
    float float_ulps;
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    if( gWimpyMode )
    {
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    if( gIsEmbedded )
        float_ulps = f->float_embedded_ulps;
							
								
							
							
								
							
							
						
@@ -874,11 +870,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
    uint64_t step = bufferSize / sizeof( double );
    if( gWimpyMode )
    {
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    uint64_t step = getTestStep(sizeof(double), bufferSize);
    Force64BitFPUPrecision();
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -240,12 +240,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale =  1;
    test_info.scale = getTestScale(sizeof(cl_float));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
    if (test_info.step / test_info.subBufferSize != test_info.scale)
    {
							
								
							
							
								
							
							
						
@@ -1036,12 +1036,12 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
    memset( &test_info, 0, sizeof( test_info ) );
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale =  1;
    test_info.scale = getTestScale(sizeof(cl_double));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
    if (test_info.step / test_info.subBufferSize != test_info.scale)
    {
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -203,7 +203,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
    float maxErrorVal0 = 0.0f;
    float maxErrorVal1 = 0.0f;
    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
    uint64_t step = bufferSize / sizeof( float );
    uint64_t step = getTestStep(sizeof(float), bufferSize);
    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
    cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
    int isFract = 0 == strcmp( "fract", f->nameInCode );
							
							
							
								
							
						
@@ -211,10 +211,6 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
    float float_ulps = getAllowedUlpError(f, relaxedMode);
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    if( gWimpyMode )
    {
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
							
								
							
							
								
							
							
						
@@ -666,14 +662,10 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
    double maxErrorVal0 = 0.0f;
    double maxErrorVal1 = 0.0f;
    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
    uint64_t step = bufferSize / sizeof( cl_double );
    uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    if( gWimpyMode )
    {
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    Force64BitFPUPrecision();
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -209,15 +209,12 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
    float maxErrorVal2 = 0.0f;
    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
    float float_ulps;
     uint64_t step = bufferSize / sizeof( float );
    uint64_t step = getTestStep(sizeof(float), bufferSize);
    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
    cl_ulong  maxiError;
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    if( gWimpyMode )
    {
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    if( gIsEmbedded )
        float_ulps = f->float_embedded_ulps;
    else
							
								
							
							
								
							
							
						
@@ -513,14 +510,10 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
    cl_ulong  maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0;
    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
    uint64_t step = bufferSize / sizeof( double );
    uint64_t step = getTestStep(sizeof(double), bufferSize);
    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    if( gWimpyMode )
    {
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    Force64BitFPUPrecision();
							
								
							
							
							
						
 
							
							
								
							
							
						
@@ -196,17 +196,14 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
    float maxErrorVal = 0.0f;
    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
    uint64_t step = bufferSize / sizeof( float );
    uint64_t step = getTestStep(sizeof(float), bufferSize);
    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
    int isRangeLimited = 0;
    float float_ulps;
    float half_sin_cos_tan_limit = 0;
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    if( gWimpyMode )
    {
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    if( gIsEmbedded)
        float_ulps = f->float_embedded_ulps;
    else
							
								
							
							
								
							
							
						
@@ -473,13 +470,10 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
    int ftz = f->ftz || gForceFTZ;
    double maxErrorVal = 0.0f;
    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
    uint64_t step = bufferSize / sizeof( cl_double );
    uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    if( gWimpyMode )
    {
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
    }
    Force64BitFPUPrecision();
    // Init the kernels