diff --git a/test_conformance/math_brute_force/Utility.h b/test_conformance/math_brute_force/Utility.h
index 31256358..92f8f3dc 100644
--- a/test_conformance/math_brute_force/Utility.h
+++ b/test_conformance/math_brute_force/Utility.h
@@ -31,6 +31,7 @@
 #include "harness/conversions.h"
 
 #define BUFFER_SIZE         (1024*1024*2)
+#define EMBEDDED_REDUCTION_FACTOR (64)
 
 #if defined( __GNUC__ )
     #define UNUSED  __attribute__ ((unused))
@@ -228,6 +229,36 @@ void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int is
 
 float getAllowedUlpError(const Func *f, const bool relaxed);
 
+static inline cl_uint getTestScale(size_t typeSize)
+{
+    if (gWimpyMode)
+    {
+        return (cl_uint)typeSize * 2 * gWimpyReductionFactor;
+    }
+    else if (gIsEmbedded)
+    {
+        return EMBEDDED_REDUCTION_FACTOR;
+    }
+    else
+    {
+        return 1;
+    }
+}
+
+static inline uint64_t getTestStep(size_t typeSize, size_t bufferSize)
+{
+    if (gWimpyMode)
+    {
+        return (1ULL << 32) * gWimpyReductionFactor / (512);
+    }
+    else if (gIsEmbedded)
+    {
+        return (BUFFER_SIZE / typeSize) * EMBEDDED_REDUCTION_FACTOR;
+    }
+    else
+    {
+        return bufferSize / typeSize;
+    }
+}
+
 #endif /* UTILITY_H */
-
-
diff --git a/test_conformance/math_brute_force/binary.cpp b/test_conformance/math_brute_force/binary.cpp
index eb5007c0..0b8be27b 100644
--- a/test_conformance/math_brute_force/binary.cpp
+++ b/test_conformance/math_brute_force/binary.cpp
@@ -277,12 +277,12 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter,
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = 1;
+    test_info.scale = getTestScale(sizeof(cl_float));
 
     if (gWimpyMode){
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -1014,13 +1014,13 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = 1;
+    test_info.scale = getTestScale(sizeof(cl_double));
 
 
     if (gWimpyMode){
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
diff --git a/test_conformance/math_brute_force/binaryOperator.cpp b/test_conformance/math_brute_force/binaryOperator.cpp
index 0742964d..abcb1b00 100644
--- a/test_conformance/math_brute_force/binaryOperator.cpp
+++ b/test_conformance/math_brute_force/binaryOperator.cpp
@@ -269,10 +269,9 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_float));
     if (gWimpyMode) {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
 
     test_info.step = test_info.subBufferSize * test_info.scale;
@@ -963,11 +962,10 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_double));
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
 
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
diff --git a/test_conformance/math_brute_force/binary_i.cpp b/test_conformance/math_brute_force/binary_i.cpp
index 6ba0eb58..01f45242 100644
--- a/test_conformance/math_brute_force/binary_i.cpp
+++ b/test_conformance/math_brute_force/binary_i.cpp
@@ -266,12 +266,13 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_float));
+
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -780,12 +781,12 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_double));
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
diff --git a/test_conformance/math_brute_force/binary_two_results_i.cpp b/test_conformance/math_brute_force/binary_two_results_i.cpp
index c5577b9e..af1b04d1 100644
--- a/test_conformance/math_brute_force/binary_two_results_i.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i.cpp
@@ -287,17 +287,13 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     float maxErrorVal = 0.0f;
     float maxErrorVal2 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
 
 #if defined PARALLEL_REFERENCE
     cl_uint threadCount = GetThreadCount();
 #endif
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
-    if(gWimpyMode ){
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
-
     if( gIsEmbedded )
         float_ulps = f->float_embedded_ulps;
     else
@@ -716,12 +712,9 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     double maxErrorVal = 0.0f;
     double maxErrorVal2 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( double );
+    uint64_t step = getTestStep(sizeof(double), bufferSize);
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    if(gWimpyMode ){
-       step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
 
 #if defined PARALLEL_REFERENCE
     cl_uint threadCount = GetThreadCount();
diff --git a/test_conformance/math_brute_force/i_unary.cpp b/test_conformance/math_brute_force/i_unary.cpp
index 379d8e35..f6bd1223 100644
--- a/test_conformance/math_brute_force/i_unary.cpp
+++ b/test_conformance/math_brute_force/i_unary.cpp
@@ -191,14 +191,10 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
     cl_kernel kernels[ VECTOR_SIZE_COUNT ];
     int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ;
     size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
 
     // This test is not using ThreadPool so we need to disable FTZ here
     // for reference computations
@@ -412,14 +408,11 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
     cl_kernel kernels[ VECTOR_SIZE_COUNT ];
     int ftz = f->ftz || gForceFTZ;
     size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( cl_double );
+    uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
+
     // This test is not using ThreadPool so we need to disable FTZ here
     // for reference computations
     FPU_mode_type oldMode;
diff --git a/test_conformance/math_brute_force/macro_binary.cpp b/test_conformance/math_brute_force/macro_binary.cpp
index b590f50a..1cde215c 100644
--- a/test_conformance/math_brute_force/macro_binary.cpp
+++ b/test_conformance/math_brute_force/macro_binary.cpp
@@ -253,12 +253,12 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_float));
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -765,11 +765,10 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_double));
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
 
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
diff --git a/test_conformance/math_brute_force/macro_unary.cpp b/test_conformance/math_brute_force/macro_unary.cpp
index 872007f1..70f724ce 100644
--- a/test_conformance/math_brute_force/macro_unary.cpp
+++ b/test_conformance/math_brute_force/macro_unary.cpp
@@ -224,12 +224,12 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_float));
     if (gWimpyMode )
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -623,11 +623,10 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_double));
     if (gWimpyMode )
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
 
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
diff --git a/test_conformance/math_brute_force/mad.cpp b/test_conformance/math_brute_force/mad.cpp
index 0737afbc..ed1d7d53 100644
--- a/test_conformance/math_brute_force/mad.cpp
+++ b/test_conformance/math_brute_force/mad.cpp
@@ -207,12 +207,8 @@ int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode)
     float maxErrorVal2 = 0.0f;
     float maxErrorVal3 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
 
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
     // Init the kernels
     BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                    f->nameInCode, relaxedMode };
@@ -675,11 +671,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    uint64_t step = bufferSize / sizeof( double );
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
+    uint64_t step = getTestStep(sizeof(double), bufferSize);
+
     // Init the kernels
     BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                    f->nameInCode, relaxedMode };
diff --git a/test_conformance/math_brute_force/ternary.cpp b/test_conformance/math_brute_force/ternary.cpp
index 2c4b503e..fd97a95d 100644
--- a/test_conformance/math_brute_force/ternary.cpp
+++ b/test_conformance/math_brute_force/ternary.cpp
@@ -228,16 +228,12 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     float maxErrorVal3 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
 
-    uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
     int skipNanInf = (0 == strcmp( "fma", f->nameInCode )) && ! gInfNanSupport;
     cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
     float float_ulps;
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
 
     if( gIsEmbedded )
         float_ulps = f->float_embedded_ulps;
@@ -874,11 +870,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( double );
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
+    uint64_t step = getTestStep(sizeof(double), bufferSize);
 
     Force64BitFPUPrecision();
 
diff --git a/test_conformance/math_brute_force/unary.cpp b/test_conformance/math_brute_force/unary.cpp
index 0cde4f30..8ef33119 100644
--- a/test_conformance/math_brute_force/unary.cpp
+++ b/test_conformance/math_brute_force/unary.cpp
@@ -240,12 +240,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     test_info.threadCount = GetThreadCount();
 
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_float));
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -1036,12 +1036,12 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     memset( &test_info, 0, sizeof( test_info ) );
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale =  1;
+    test_info.scale = getTestScale(sizeof(cl_double));
     if (gWimpyMode)
     {
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
diff --git a/test_conformance/math_brute_force/unary_two_results.cpp b/test_conformance/math_brute_force/unary_two_results.cpp
index a86277f1..b170e095 100644
--- a/test_conformance/math_brute_force/unary_two_results.cpp
+++ b/test_conformance/math_brute_force/unary_two_results.cpp
@@ -203,7 +203,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
     float maxErrorVal0 = 0.0f;
     float maxErrorVal1 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
     cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
     int isFract = 0 == strcmp( "fract", f->nameInCode );
@@ -211,10 +211,6 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
     float float_ulps = getAllowedUlpError(f, relaxedMode);
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
 
     // Init the kernels
     BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
@@ -666,14 +662,10 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
     double maxErrorVal0 = 0.0f;
     double maxErrorVal1 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( cl_double );
+    uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
 
     Force64BitFPUPrecision();
 
diff --git a/test_conformance/math_brute_force/unary_two_results_i.cpp b/test_conformance/math_brute_force/unary_two_results_i.cpp
index 108be6a4..15326882 100644
--- a/test_conformance/math_brute_force/unary_two_results_i.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i.cpp
@@ -209,15 +209,12 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
     float maxErrorVal2 = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
     float float_ulps;
-     uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
     cl_ulong  maxiError;
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
+
     if( gIsEmbedded )
         float_ulps = f->float_embedded_ulps;
     else
@@ -513,14 +510,10 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
     cl_ulong  maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
 
-    uint64_t step = bufferSize / sizeof( double );
+    uint64_t step = getTestStep(sizeof(double), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
 
     Force64BitFPUPrecision();
 
diff --git a/test_conformance/math_brute_force/unary_u.cpp b/test_conformance/math_brute_force/unary_u.cpp
index 87fcae32..97fd25f9 100644
--- a/test_conformance/math_brute_force/unary_u.cpp
+++ b/test_conformance/math_brute_force/unary_u.cpp
@@ -196,17 +196,14 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
     float maxErrorVal = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
 
-    uint64_t step = bufferSize / sizeof( float );
+    uint64_t step = getTestStep(sizeof(float), bufferSize);
     int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
     int isRangeLimited = 0;
     float float_ulps;
     float half_sin_cos_tan_limit = 0;
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
+
     if( gIsEmbedded)
         float_ulps = f->float_embedded_ulps;
     else
@@ -473,13 +470,10 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
     int ftz = f->ftz || gForceFTZ;
     double maxErrorVal = 0.0f;
     size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
-    uint64_t step = bufferSize / sizeof( cl_double );
+    uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    if( gWimpyMode )
-    {
-        step = (1ULL<<32) * gWimpyReductionFactor / (512);
-    }
+
     Force64BitFPUPrecision();
 
     // Init the kernels