From 904fb419ee81830d406fc5e022135cbccfaf9f6a Mon Sep 17 00:00:00 2001
From: Jeremy Kemp <jeremy@jeremykemp.co.uk>
Date: Thu, 7 Jan 2021 11:34:58 +0000
Subject: [PATCH] Restored the embedded reduction factor to bruteforce. (#1052)

* Restored the embedded reduction factor to bruteforce.

This change was present on the GitLab branch but missed out during the transition to GitHub.

This change is intentionally as close as possible to the patch on GitLab.

Fixes #1045

* Added helper functions for bruteforce step and scale.

* Added missing files from 1e4d19b.

* Renamed getTestScale and getTestStep to set*.
---
 test_conformance/math_brute_force/Utility.h   | 35 +++++++++++++++++--
 test_conformance/math_brute_force/binary.cpp  |  8 ++---
 .../math_brute_force/binaryOperator.cpp       |  6 ++--
 .../math_brute_force/binary_i.cpp             |  9 ++---
 .../math_brute_force/binary_two_results_i.cpp | 11 ++----
 test_conformance/math_brute_force/i_unary.cpp | 13 ++-----
 .../math_brute_force/macro_binary.cpp         |  7 ++--
 .../math_brute_force/macro_unary.cpp          |  7 ++--
 test_conformance/math_brute_force/mad.cpp     | 13 ++-----
 test_conformance/math_brute_force/ternary.cpp | 12 ++-----
 test_conformance/math_brute_force/unary.cpp   |  8 ++---
 .../math_brute_force/unary_two_results.cpp    | 12 ++-----
 .../math_brute_force/unary_two_results_i.cpp  | 13 ++-----
 test_conformance/math_brute_force/unary_u.cpp | 14 +++-----
 14 files changed, 73 insertions(+), 95 deletions(-)

diff --git a/test_conformance/math_brute_force/Utility.h b/test_conformance/math_brute_force/Utility.h
index 31256358..92f8f3dc 100644
--- a/test_conformance/math_brute_force/Utility.h
+++ b/test_conformance/math_brute_force/Utility.h
@@ -31,6 +31,7 @@
 #include "harness/conversions.h"
 
 #define BUFFER_SIZE         (1024*1024*2)
+#define EMBEDDED_REDUCTION_FACTOR (64)
 
 #if defined( __GNUC__ )
     #define UNUSED  __attribute__ ((unused))
@@ -228,6 +229,36 @@ void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int is
 
 float getAllowedUlpError(const Func *f, const bool relaxed);
 
+static inline cl_uint getTestScale(size_t typeSize)
+{
+    if (gWimpyMode)
+    {
+        return (cl_uint)typeSize * 2 * gWimpyReductionFactor;
+    }
+    else if (gIsEmbedded)
+    {
+        return EMBEDDED_REDUCTION_FACTOR;
+    }
+    else
+    {
+        return 1;
+    }
+}
+
+static inline uint64_t getTestStep(size_t typeSize, size_t bufferSize)
+{
+    if (gWimpyMode)
+    {
+        return (1ULL << 32) * gWimpyReductionFactor / (512);
+    }
+    else if (gIsEmbedded)
+    {
+        return (BUFFER_SIZE / typeSize) * EMBEDDED_REDUCTION_FACTOR;
+    }
+    else
+    {
+        return bufferSize / typeSize;
+    }
+}
+
 #endif /* UTILITY_H */
-
-
diff --git a/test_conformance/math_brute_force/binary.cpp b/test_conformance/math_brute_force/binary.cpp
index eb5007c0..0b8be27b 100644
--- a/test_conformance/math_brute_force/binary.cpp
+++ b/test_conformance/math_brute_force/binary.cpp
@@ -277,12 +277,12 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter,
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = 1;
+    test_info.scale = getTestScale(sizeof(cl_float));
 
     if (gWimpyMode){
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -1014,13 +1014,13 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = 1;
+    test_info.scale = getTestScale(sizeof(cl_double));
 
 
     if (gWimpyMode){
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
diff --git a/test_conformance/math_brute_force/binaryOperator.cpp b/test_conformance/math_brute_force/binaryOperator.cpp
index 0742964d..abcb1b00 100644
--- a/test_conformance/math_brute_force/binaryOperator.cpp
+++ b/test_conformance/math_brute_force/binaryOperator.cpp
@@ -269,10 +269,9 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_float));
     if (gWimpyMode) {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
 
     test_info.step = test_info.subBufferSize * test_info.scale;
@@ -963,11 +962,10 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_double));
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
 
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
diff --git a/test_conformance/math_brute_force/binary_i.cpp b/test_conformance/math_brute_force/binary_i.cpp
index 6ba0eb58..01f45242 100644
--- a/test_conformance/math_brute_force/binary_i.cpp
+++ b/test_conformance/math_brute_force/binary_i.cpp
@@ -266,12 +266,13 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_float));
+
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -780,12 +781,12 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_double));
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
diff --git a/test_conformance/math_brute_force/binary_two_results_i.cpp b/test_conformance/math_brute_force/binary_two_results_i.cpp
index c5577b9e..af1b04d1 100644
--- a/test_conformance/math_brute_force/binary_two_results_i.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i.cpp
@@ -287,17 +287,13 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     float maxErrorVal = 0.0f;
     float maxErrorVal2 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
 
 #if defined PARALLEL_REFERENCE
     cl_uint threadCount = GetThreadCount();
 #endif
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
-    if(gWimpyMode ){
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
-
     if( gIsEmbedded )
         float_ulps = f->float_embedded_ulps;
     else
@@ -716,12 +712,9 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     double maxErrorVal = 0.0f;
     double maxErrorVal2 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( double );
+    uint64_t step = getTestStep(sizeof(double), bufferSize);
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    if(gWimpyMode ){
-       step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
 
 #if defined PARALLEL_REFERENCE
     cl_uint threadCount = GetThreadCount();
diff --git a/test_conformance/math_brute_force/i_unary.cpp b/test_conformance/math_brute_force/i_unary.cpp
index 379d8e35..f6bd1223 100644
--- a/test_conformance/math_brute_force/i_unary.cpp
+++ b/test_conformance/math_brute_force/i_unary.cpp
@@ -191,14 +191,10 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
     cl_kernel kernels[ VECTOR_SIZE_COUNT ];
     int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ;
     size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
 
     // This test is not using ThreadPool so we need to disable FTZ here
     // for reference computations
@@ -412,14 +408,11 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
     cl_kernel kernels[ VECTOR_SIZE_COUNT ];
     int ftz = f->ftz || gForceFTZ;
     size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( cl_double );
+    uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
+
     // This test is not using ThreadPool so we need to disable FTZ here
     // for reference computations
     FPU_mode_type oldMode;
diff --git a/test_conformance/math_brute_force/macro_binary.cpp b/test_conformance/math_brute_force/macro_binary.cpp
index b590f50a..1cde215c 100644
--- a/test_conformance/math_brute_force/macro_binary.cpp
+++ b/test_conformance/math_brute_force/macro_binary.cpp
@@ -253,12 +253,12 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_float));
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -765,11 +765,10 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_double));
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
 
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
diff --git a/test_conformance/math_brute_force/macro_unary.cpp b/test_conformance/math_brute_force/macro_unary.cpp
index 872007f1..70f724ce 100644
--- a/test_conformance/math_brute_force/macro_unary.cpp
+++ b/test_conformance/math_brute_force/macro_unary.cpp
@@ -224,12 +224,12 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_float));
     if (gWimpyMode )
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -623,11 +623,10 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_double));
     if (gWimpyMode )
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
 
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
diff --git a/test_conformance/math_brute_force/mad.cpp b/test_conformance/math_brute_force/mad.cpp
index 0737afbc..ed1d7d53 100644
--- a/test_conformance/math_brute_force/mad.cpp
+++ b/test_conformance/math_brute_force/mad.cpp
@@ -207,12 +207,8 @@ int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode)
     float maxErrorVal2 = 0.0f;
     float maxErrorVal3 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
 
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
     // Init the kernels
     BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                    f->nameInCode, relaxedMode };
@@ -675,11 +671,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    uint64_t step = bufferSize / sizeof( double );
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
+    uint64_t step = getTestStep(sizeof(double), bufferSize);
+
     // Init the kernels
     BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                    f->nameInCode, relaxedMode };
diff --git a/test_conformance/math_brute_force/ternary.cpp b/test_conformance/math_brute_force/ternary.cpp
index 2c4b503e..fd97a95d 100644
--- a/test_conformance/math_brute_force/ternary.cpp
+++ b/test_conformance/math_brute_force/ternary.cpp
@@ -228,16 +228,12 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     float maxErrorVal3 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
 
-    uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
     int skipNanInf = (0 == strcmp( "fma", f->nameInCode )) && ! gInfNanSupport;
     cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
     float float_ulps;
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
 
     if( gIsEmbedded )
         float_ulps = f->float_embedded_ulps;
@@ -874,11 +870,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( double );
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
+    uint64_t step = getTestStep(sizeof(double), bufferSize);
 
     Force64BitFPUPrecision();
 
diff --git a/test_conformance/math_brute_force/unary.cpp b/test_conformance/math_brute_force/unary.cpp
index 0cde4f30..8ef33119 100644
--- a/test_conformance/math_brute_force/unary.cpp
+++ b/test_conformance/math_brute_force/unary.cpp
@@ -240,12 +240,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     test_info.threadCount = GetThreadCount();
 
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_float));
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -1036,12 +1036,12 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_double));
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
diff --git a/test_conformance/math_brute_force/unary_two_results.cpp b/test_conformance/math_brute_force/unary_two_results.cpp
index a86277f1..b170e095 100644
--- a/test_conformance/math_brute_force/unary_two_results.cpp
+++ b/test_conformance/math_brute_force/unary_two_results.cpp
@@ -203,7 +203,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
     float maxErrorVal0 = 0.0f;
     float maxErrorVal1 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
     cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
     int isFract = 0 == strcmp( "fract", f->nameInCode );
@@ -211,10 +211,6 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
     float float_ulps = getAllowedUlpError(f, relaxedMode);
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
 
     // Init the kernels
     BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
@@ -666,14 +662,10 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
     double maxErrorVal0 = 0.0f;
     double maxErrorVal1 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( cl_double );
+    uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
 
     Force64BitFPUPrecision();
 
diff --git a/test_conformance/math_brute_force/unary_two_results_i.cpp b/test_conformance/math_brute_force/unary_two_results_i.cpp
index 108be6a4..15326882 100644
--- a/test_conformance/math_brute_force/unary_two_results_i.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i.cpp
@@ -209,15 +209,12 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
     float maxErrorVal2 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
     float float_ulps;
-     uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
     cl_ulong  maxiError;
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
+
     if( gIsEmbedded )
         float_ulps = f->float_embedded_ulps;
     else
@@ -513,14 +510,10 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
     cl_ulong  maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
 
-    uint64_t step = bufferSize / sizeof( double );
+    uint64_t step = getTestStep(sizeof(double), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
 
     Force64BitFPUPrecision();
 
diff --git a/test_conformance/math_brute_force/unary_u.cpp b/test_conformance/math_brute_force/unary_u.cpp
index 87fcae32..97fd25f9 100644
--- a/test_conformance/math_brute_force/unary_u.cpp
+++ b/test_conformance/math_brute_force/unary_u.cpp
@@ -196,17 +196,14 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
     float maxErrorVal = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
 
-    uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
     int isRangeLimited = 0;
     float float_ulps;
     float half_sin_cos_tan_limit = 0;
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
+
     if( gIsEmbedded)
         float_ulps = f->float_embedded_ulps;
     else
@@ -473,13 +470,10 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
     int ftz = f->ftz || gForceFTZ;
     double maxErrorVal = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( cl_double );
+    uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
+
     Force64BitFPUPrecision();
 
     // Init the kernels