Synchronise with Khronos-private Gitlab branch

The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
2026-03-23 07:39:01 +00:00 · 2019-02-20 16:10:04 +00:00
parent b1603eb6ba
commit 53db6e7f9f
115 changed files with 2632 additions and 1304 deletions
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -23,6 +23,10 @@ set(${MODULE_NAME}_SOURCES
    ../../test_common/harness/ThreadPool.c
    ../../test_common/harness/mt19937.c
    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/parseParameters.cpp
 )


@@ -46,26 +50,7 @@ set_source_files_properties(
 endif(NOT ANDROID)

 set_source_files_properties(
-    FunctionList.c
-    Sleep.c
-    binary.c
-    binaryOperator.c
-    Utility.c
-    binary_i.c
-    binary_two_results_i.c
-    i_unary.c
-    macro_binary.c
-    macro_unary.c
-    mad.c
-    main.c
-    reference_math.c
-    ternary.c
-    unary.c
-    unary_two_results.c
-    unary_two_results_i.c unary_u.c
-    ../../test_common/harness/rounding_mode.c
-    ../../test_common/harness/ThreadPool.c
-    ../../test_common/harness/msvc9.c
+    ${MODULE_NAME}_SOURCES
    PROPERTIES LANGUAGE CXX)

 if(CMAKE_COMPILER_IS_GNUCC)
--- a/test_conformance/math_brute_force/FunctionList.h
+++ b/test_conformance/math_brute_force/FunctionList.h
@@ -85,7 +85,7 @@ typedef struct Func
  float           relaxed_error;
  int             ftz;
  int             relaxed;
-  const ::vtbl    *vtbl;
+  const vtbl      *vtbl_ptr;
 }Func;


--- a/test_conformance/math_brute_force/Utility.h
+++ b/test_conformance/math_brute_force/Utility.h
@@ -26,11 +26,7 @@
 #include <stdio.h>
 #include "../../test_common/harness/rounding_mode.h"
 #include "../../test_common/harness/fpcontrol.h"
-
-#if defined( _WIN32) && defined (_MSC_VER)
 #include "../../test_common/harness/testHarness.h"
-#endif
-
 #include "../../test_common/harness/ThreadPool.h"
 #define BUFFER_SIZE         (1024*1024*2)

@@ -112,7 +108,7 @@ extern "C" {
 float Abs_Error( float test, double reference );
 float Ulp_Error( float test, double reference );
 //float Ulp_Error_Half( float test, double reference );
-float Ulp_Error_Double( double test, long double reference );
+float Bruteforce_Ulp_Error_Double( double test, long double reference );
 #ifdef __cplusplus
 } //extern "C"
 #endif
--- a/test_conformance/math_brute_force/binary.c
+++ b/test_conformance/math_brute_force/binary.c
@@ -233,6 +233,7 @@ typedef struct TestInfo
    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
    cl_uint     step;                               // step between each chunk and the next.
    cl_uint     scale;                              // stride between individual test values
    float       ulps;                               // max_allowed ulps
@@ -268,6 +269,16 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
    test_info.f = f;
    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -341,7 +352,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
    // Run the kernels
    if( !gSkipCorrectnessTesting )
    {
-        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );

        // Accumulate the arithmetic errors
        for( i = 0; i < test_info.threadCount; i++ )
@@ -991,6 +1002,16 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
    test_info.f = f;
    test_info.ulps = f->double_ulps;
    test_info.ftz = f->ftz || gForceFTZ;
@@ -1063,7 +1084,7 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte

    if( !gSkipCorrectnessTesting )
    {
-        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );

        // Accumulate the arithmetic errors
        for( i = 0; i < test_info.threadCount; i++ )
@@ -1359,7 +1380,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
            {
                cl_double test = ((cl_double*) q)[j];
                long double correct = func.f_ff( s[j], s2[j] );
-                float err = Ulp_Error_Double( test, correct );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
                int fail = ! (fabsf(err) <= ulps);

                if( fail && ftz )
@@ -1399,8 +1420,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                        {
                            long double correct2 = func.f_ff( 0.0, s2[j] );
                            long double correct3 = func.f_ff( -0.0, s2[j] );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
                            if( fabsf( err2 ) < fabsf(err ) )
                                err = err2;
@@ -1422,10 +1443,10 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                                correct3 = func.f_ff( -0.0, 0.0 );
                                long double correct4 = func.f_ff( 0.0, -0.0 );
                                long double correct5 = func.f_ff( -0.0, -0.0 );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
                                                 (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
                                if( fabsf( err2 ) < fabsf(err ) )
@@ -1451,8 +1472,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                        {
                            long double correct2 = func.f_ff( s[j], 0.0 );
                            long double correct3 = func.f_ff( s[j], -0.0 );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
                            if( fabsf( err2 ) < fabsf(err ) )
                                err = err2;
--- a/test_conformance/math_brute_force/binaryOperator.c
+++ b/test_conformance/math_brute_force/binaryOperator.c
@@ -207,6 +207,7 @@ typedef struct TestInfo
    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
    cl_uint     step;                               // step between each chunk and the next.
    cl_uint     scale;                              // stride between individual test values
    float       ulps;                               // max_allowed ulps
@@ -260,6 +261,16 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
    }

    test_info.step = test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
    test_info.f = f;
    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -329,7 +340,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)

    if( !gSkipCorrectnessTesting )
    {
-        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );

        // Accumulate the arithmetic errors
        for( i = 0; i < test_info.threadCount; i++ )
@@ -501,63 +512,51 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
    int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
    int indx = (totalSpecialValueCount - 1) / buffer_elements;

-    if( job_id <= (cl_uint)indx )
-    { // test edge cases
-        float *fp = (float *)p;
-        float *fp2 = (float *)p2;
+
+    if( job_id <= (cl_uint)indx ) {
+        // Insert special values
        uint32_t x, y;

-    x = (job_id * buffer_elements) % specialValuesFloatCount;
-    y = (job_id * buffer_elements) / specialValuesFloatCount;
+        x = (job_id * buffer_elements) % specialValuesFloatCount;
+        y = (job_id * buffer_elements) / specialValuesFloatCount;

-        for( ; j < buffer_elements; j++ )
-        {
-            fp[j] = specialValuesFloat[x];
-            fp2[j] = specialValuesFloat[y];
-            if( ++x >= specialValuesFloatCount )
-            {
+        for( ; j < buffer_elements; j++ ) {
+            p[j] = ((cl_uint *)specialValuesFloat)[x];
+            p2[j] = ((cl_uint *)specialValuesFloat)[y];
+            ++x;
+            if (x >= specialValuesFloatCount) {
                x = 0;
                y++;
-                if( y >= specialValuesFloatCount )
+                if (y >= specialValuesFloatCount)
                    break;
            }
-            if(gTestFastRelaxed && strcmp(name,"divide") == 0 )
-            {
-              float fpj = *(float*)&fp[j];
-              float fpj2 = *(float*)&fp2[j];
-              if(fabs(fpj) > 0x5E800000 ) //[2^-62,2^62]
-              {
-                fp[j] = NAN;
-              }
-              if( fabs(fpj2) > 0x5E800000 ) //[2^-62,2^62]
-              {
-                fp2[j] = NAN;
-              }
+            if (gTestFastRelaxed && strcmp(name,"divide") == 0) {
+                cl_uint pj = p[j] & 0x7fffffff;
+                cl_uint p2j = p2[j] & 0x7fffffff;
+                // Replace values outside [2^-62, 2^62] with QNaN
+                if (pj < 0x20800000 || pj > 0x5e800000)
+                    p[j] = 0x7fc00000;
+                if (p2j < 0x20800000 || p2j > 0x5e800000)
+                    p2[j] = 0x7fc00000;
+            }
        }
    }
-    }

-    //Init any remaining values.
+    // Init any remaining values.
    for( ; j < buffer_elements; j++ )
    {
        p[j] = genrand_int32(d);
        p2[j] = genrand_int32(d);

-        if(gTestFastRelaxed)
-        {
-          if( strcmp(name,"divide")==0){
-            float pj = *(float*)&p[j];
-            float pj2 = *(float*)&p2[j];
-            if(fabs(pj) > 0x5E800000 ) //[2^-62,2^62]
-            {
-              p[j] = NAN;
-            }
-            if( fabs(pj2) > 0x5E800000 ) //[2^-62,2^62]
-            {
-              p2[j] = NAN;
-            }
-          }
-    }
+        if (gTestFastRelaxed && strcmp(name,"divide") == 0) {
+            cl_uint pj = p[j] & 0x7fffffff;
+            cl_uint p2j = p2[j] & 0x7fffffff;
+            // Replace values outside [2^-62, 2^62] with QNaN
+            if (pj < 0x20800000 || pj > 0x5e800000)
+                p[j] = 0x7fc00000;
+            if (p2j < 0x20800000 || p2j > 0x5e800000)
+                p2[j] = 0x7fc00000;
+        }
    }

    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
@@ -950,6 +949,16 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
    }

    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
    test_info.f = f;
    test_info.ulps = f->double_ulps;
    test_info.ftz = f->ftz || gForceFTZ;
@@ -1020,7 +1029,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)

    if( !gSkipCorrectnessTesting )
    {
-        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );

        // Accumulate the arithmetic errors
        for( i = 0; i < test_info.threadCount; i++ )
@@ -1315,7 +1324,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
            {
                cl_double test = ((cl_double*) q)[j];
                long double correct = func.f_ff( s[j], s2[j] );
-                float err = Ulp_Error_Double( test, correct );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
                int fail = ! (fabsf(err) <= ulps);

                if( fail && ftz )
@@ -1334,8 +1343,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                    {
                        long double correct2 = func.f_ff( 0.0, s2[j] );
                        long double correct3 = func.f_ff( -0.0, s2[j] );
-                        float err2 = Ulp_Error_Double( test, correct2  );
-                        float err3 = Ulp_Error_Double( test, correct3  );
+                        float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                        float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
                        if( fabsf( err2 ) < fabsf(err ) )
                            err = err2;
@@ -1357,10 +1366,10 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                            correct3 = func.f_ff( -0.0, 0.0 );
                            long double correct4 = func.f_ff( 0.0, -0.0 );
                            long double correct5 = func.f_ff( -0.0, -0.0 );
-                            err2 = Ulp_Error_Double( test, correct2  );
-                            err3 = Ulp_Error_Double( test, correct3  );
-                            float err4 = Ulp_Error_Double( test, correct4  );
-                            float err5 = Ulp_Error_Double( test, correct5  );
+                            err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                            float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
                                             (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
                            if( fabsf( err2 ) < fabsf(err ) )
@@ -1386,8 +1395,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                    {
                        long double correct2 = func.f_ff( s[j], 0.0 );
                        long double correct3 = func.f_ff( s[j], -0.0 );
-                        float err2 = Ulp_Error_Double( test, correct2  );
-                        float err3 = Ulp_Error_Double( test, correct3  );
+                        float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                        float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
                        if( fabsf( err2 ) < fabsf(err ) )
                            err = err2;
--- a/test_conformance/math_brute_force/binary_i.c
+++ b/test_conformance/math_brute_force/binary_i.c
@@ -230,6 +230,7 @@ typedef struct TestInfo
    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
    cl_uint     step;                               // step between each chunk and the next.
    cl_uint     scale;                              // stride between individual test values
    float       ulps;                               // max_allowed ulps
@@ -262,6 +263,16 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
    test_info.f = f;
    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -330,7 +341,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
    }

    // Run the kernels
-    error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+    error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );


    // Accumulate the arithmetic errors
@@ -758,6 +769,16 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
    test_info.f = f;
    test_info.ulps = f->double_ulps;
    test_info.ftz = f->ftz || gForceFTZ;
@@ -831,7 +852,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)

    // Run the kernels
    if( !gSkipCorrectnessTesting )
-        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );


    // Accumulate the arithmetic errors
@@ -1128,7 +1149,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
            {
                cl_double test = ((cl_double*) q)[j];
                long double correct = func.f_fi( s[j], s2[j] );
-                float err = Ulp_Error_Double( test, correct );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
                int fail = ! (fabsf(err) <= ulps);

                if( fail && ftz )
@@ -1146,8 +1167,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                    {
                        long double correct2 = func.f_fi( 0.0, s2[j] );
                        long double correct3 = func.f_fi( -0.0, s2[j] );
-                        float err2 = Ulp_Error_Double( test, correct2  );
-                        float err3 = Ulp_Error_Double( test, correct3  );
+                        float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                        float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
                        if( fabsf( err2 ) < fabsf(err ) )
                            err = err2;
--- a/test_conformance/math_brute_force/binary_two_results_i.c
+++ b/test_conformance/math_brute_force/binary_two_results_i.c
@@ -871,7 +871,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
                    double test = ((double*) q)[j];
                    int correct2 = INT_MIN;
                    long double correct = f->dfunc.f_ffpI( s[j], s2[j], &correct2 );
-                    float err = Ulp_Error_Double( test, correct );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
                    int64_t iErr;

                    // in case of remquo, we only care about the sign and last seven bits of
@@ -907,8 +907,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
                            int correct3i, correct4i;
                            long double correct3 = f->dfunc.f_ffpI( 0.0, s2[j], &correct3i );
                            long double correct4 = f->dfunc.f_ffpI( -0.0, s2[j], &correct4i );
-                            float err2 = Ulp_Error_Double( test, correct3  );
-                            float err3 = Ulp_Error_Double( test, correct4  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
@@ -937,10 +937,10 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
                                correct4 = f->dfunc.f_ffpI( -0.0, 0.0, &correct4i );
                                long double correct7 = f->dfunc.f_ffpI( 0.0, -0.0, &correct7i );
                                long double correct8 = f->dfunc.f_ffpI( -0.0, -0.0, &correct8i );
-                                err2 = Ulp_Error_Double( test, correct3  );
-                                err3 = Ulp_Error_Double( test, correct4  );
-                                float err4 = Ulp_Error_Double( test, correct7  );
-                                float err5 = Ulp_Error_Double( test, correct8  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct7  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct8  );
                                iErr3 = (long long) q2[j] - (long long) correct3i;
                                iErr4 = (long long) q2[j] - (long long) correct4i;
                                int64_t iErr7 = (long long) q2[j] - (long long) correct7i;
@@ -979,8 +979,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
                            int correct3i, correct4i;
                            long double correct3 = f->dfunc.f_ffpI( s[j], 0.0, &correct3i );
                            long double correct4 = f->dfunc.f_ffpI( s[j], -0.0, &correct4i );
-                            float err2 = Ulp_Error_Double( test, correct3  );
-                            float err3 = Ulp_Error_Double( test, correct4  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
--- a/test_conformance/math_brute_force/macro_binary.c
+++ b/test_conformance/math_brute_force/macro_binary.c
@@ -222,6 +222,7 @@ typedef struct TestInfo
    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
    cl_uint     step;                               // step between each chunk and the next.
    cl_uint     scale;                              // stride between individual test values
    int         ftz;                                // non-zero if running in flush to zero mode
@@ -249,6 +250,16 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
    test_info.f = f;
    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);

@@ -319,7 +330,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
    // Run the kernels
    if( !gSkipCorrectnessTesting )
    {
-        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );

        if( error )
            goto exit;
@@ -749,6 +760,16 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
    }

    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
    test_info.f = f;
    test_info.ftz = f->ftz || gForceFTZ;

@@ -820,7 +841,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)

    if( !gSkipCorrectnessTesting )
    {
-        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );

        if( error )
            goto exit;
--- a/test_conformance/math_brute_force/macro_unary.c
+++ b/test_conformance/math_brute_force/macro_unary.c
@@ -193,6 +193,7 @@ typedef struct TestInfo
    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
    cl_uint     step;                               // step between each chunk and the next.
    cl_uint     scale;                              // stride between individual test values
    int         ftz;                                // non-zero if running in flush to zero mode
@@ -220,6 +221,16 @@ int TestMacro_Int_Float(const Func *f, MTdata d)
        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
    test_info.f = f;
    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
@@ -279,7 +290,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d)

    if( !gSkipCorrectnessTesting )
    {
-        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );

        if( error )
            goto exit;
@@ -602,6 +613,16 @@ int TestMacro_Int_Double(const Func *f, MTdata d)
    }

    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
    test_info.f = f;
    test_info.ftz = f->ftz || gForceFTZ;

@@ -664,7 +685,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d)

    if( !gSkipCorrectnessTesting )
    {
-        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );

        if( error )
            goto exit;
--- a/test_conformance/math_brute_force/mad.c
+++ b/test_conformance/math_brute_force/mad.c
@@ -785,7 +785,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                {
                    double test = ((double*) q)[j];
                    long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
-                    float err = Ulp_Error_Double( test, correct );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
                    int fail = ! (fabsf(err) <= f->double_ulps);

                    if( fail && ftz )
@@ -803,8 +803,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                        { // look at me,
                            long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
                            long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
                            if( fabsf( err2 ) < fabsf(err ) )
                                err = err2;
@@ -826,10 +826,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                                correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
                                long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
                                long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
                                if( fabsf( err2 ) < fabsf(err ) )
@@ -860,14 +860,14 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                                    long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
                                    long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
                                    long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
-                                    err2 = Ulp_Error_Double( test, correct2  );
-                                    err3 = Ulp_Error_Double( test, correct3  );
-                                    err4 = Ulp_Error_Double( test, correct4  );
-                                    err5 = Ulp_Error_Double( test, correct5  );
-                                    float err6 = Ulp_Error_Double( test, correct6  );
-                                    float err7 = Ulp_Error_Double( test, correct7  );
-                                    float err8 = Ulp_Error_Double( test, correct8  );
-                                    float err9 = Ulp_Error_Double( test, correct9  );
+                                    err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                    err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                    err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                    err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                    float err6 = Bruteforce_Ulp_Error_Double( test, correct6  );
+                                    float err7 = Bruteforce_Ulp_Error_Double( test, correct7  );
+                                    float err8 = Bruteforce_Ulp_Error_Double( test, correct8  );
+                                    float err9 = Bruteforce_Ulp_Error_Double( test, correct9  );
                                    fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                     (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
                                                     (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
@@ -907,10 +907,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                                correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
                                long double correct4 = f->dfunc.f_fff( 0.0,  s2[j], -0.0 );
                                long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
                                if( fabsf( err2 ) < fabsf(err ) )
@@ -936,8 +936,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                        {
                            long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
                            long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
                            if( fabsf( err2 ) < fabsf(err ) )
                                err = err2;
@@ -959,10 +959,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                                correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
                                long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
                                long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
                                if( fabsf( err2 ) < fabsf(err ) )
@@ -988,8 +988,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                        {
                            long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
                            long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
                            if( fabsf( err2 ) < fabsf(err ) )
                                err = err2;
--- a/test_conformance/math_brute_force/main.c
+++ b/test_conformance/math_brute_force/main.c
--- a/test_conformance/math_brute_force/reference_math.c
+++ b/test_conformance/math_brute_force/reference_math.c
@@ -1790,7 +1790,7 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000
 //    *signgamp = 1;
    ix = hx&0x7fffffff;
    if(ix>=0x7ff00000) return x*x;
-    if((ix|lx)==0) return one/zero;
+    if((ix|lx)==0) return INFINITY;
    if(ix<0x3b900000) {    /* |x|<2**-70, return -log(|x|) */
        if(hx<0) {
 //            *signgamp = -1;
@@ -1799,9 +1799,9 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000
    }
    if(hx<0) {
        if(ix>=0x43300000)     /* |x|>=2**52, must be -integer */
-        return one/zero;
+        return INFINITY;
        t = reference_sinpi(x);
-        if(t==zero) return one/zero; /* -integer */
+        if(t==zero) return INFINITY; /* -integer */
        nadj = reference_log(pi/reference_fabs(t*x));
 //        if(t<zero) *signgamp = -1;
        x = -x;
--- a/test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
+++ b/test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
--- a/test_conformance/math_brute_force/ternary.c
+++ b/test_conformance/math_brute_force/ternary.c
@@ -1010,7 +1010,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                {
                    double test = ((double*) q)[j];
                    long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
-                    float err = Ulp_Error_Double( test, correct );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
                    int fail = ! (fabsf(err) <= f->double_ulps);

                    if( fail && ftz )
@@ -1028,8 +1028,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                        { // look at me,
                            long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
                            long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
                            if( fabsf( err2 ) < fabsf(err ) )
                                err = err2;
@@ -1051,10 +1051,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                                correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
                                long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
                                long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
                                if( fabsf( err2 ) < fabsf(err ) )
@@ -1085,14 +1085,14 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                                    long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
                                    long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
                                    long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
-                                    err2 = Ulp_Error_Double( test, correct2  );
-                                    err3 = Ulp_Error_Double( test, correct3  );
-                                    err4 = Ulp_Error_Double( test, correct4  );
-                                    err5 = Ulp_Error_Double( test, correct5  );
-                                    float err6 = Ulp_Error_Double( test, correct6  );
-                                    float err7 = Ulp_Error_Double( test, correct7  );
-                                    float err8 = Ulp_Error_Double( test, correct8  );
-                                    float err9 = Ulp_Error_Double( test, correct9  );
+                                    err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                    err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                    err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                    err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                    float err6 = Bruteforce_Ulp_Error_Double( test, correct6  );
+                                    float err7 = Bruteforce_Ulp_Error_Double( test, correct7  );
+                                    float err8 = Bruteforce_Ulp_Error_Double( test, correct8  );
+                                    float err9 = Bruteforce_Ulp_Error_Double( test, correct9  );
                                    fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                     (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
                                                     (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
@@ -1132,10 +1132,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                                correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
                                long double correct4 = f->dfunc.f_fff( 0.0,  s2[j], -0.0 );
                                long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
                                if( fabsf( err2 ) < fabsf(err ) )
@@ -1161,8 +1161,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                        {
                            long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
                            long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
                            if( fabsf( err2 ) < fabsf(err ) )
                                err = err2;
@@ -1184,10 +1184,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                                correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
                                long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
                                long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
                                if( fabsf( err2 ) < fabsf(err ) )
@@ -1213,8 +1213,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                        {
                            long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
                            long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
                            if( fabsf( err2 ) < fabsf(err ) )
                                err = err2;
--- a/test_conformance/math_brute_force/unary.c
+++ b/test_conformance/math_brute_force/unary.c
@@ -200,6 +200,7 @@ typedef struct TestInfo
    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
    cl_uint     step;                               // step between each chunk and the next.
    cl_uint     scale;                              // stride between individual test values
    float       ulps;                               // max_allowed ulps
@@ -234,6 +235,16 @@ int TestFunc_Float_Float(const Func *f, MTdata d)
        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
    }
    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
    test_info.f = f;
    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -309,7 +320,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d)

    if( !gSkipCorrectnessTesting || skipTestingRelaxed)
    {
-        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );

        // Accumulate the arithmetic errors
        for( i = 0; i < test_info.threadCount; i++ )
@@ -892,7 +903,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
            {
                cl_double test = ((cl_double*) q)[j];
                long double correct = func.f_f( s[j] );
-                float err = Ulp_Error_Double( test, correct );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
                int fail = ! (fabsf(err) <= ulps);

                if( fail )
@@ -912,8 +923,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                        {
                            long double correct2 = func.f_f( 0.0L );
                            long double correct3 = func.f_f( -0.0L );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
                            if( fabsf( err2 ) < fabsf(err ) )
                                err = err2;
@@ -997,7 +1008,16 @@ int TestFunc_Double_Double(const Func *f, MTdata d)
        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
    }
-   test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }

    test_info.f = f;
    test_info.ulps = f->double_ulps;
@@ -1062,7 +1082,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d)

    if( !gSkipCorrectnessTesting )
    {
-        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );

        // Accumulate the arithmetic errors
        for( i = 0; i < test_info.threadCount; i++ )
--- a/test_conformance/math_brute_force/unary_two_results.c
+++ b/test_conformance/math_brute_force/unary_two_results.c
@@ -800,8 +800,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d)
                    double test2 = ((double*) q2)[j];
                    long double correct2;
                    long double correct = f->dfunc.f_fpf( s[j], &correct2 );
-                    float err = Ulp_Error_Double( test, correct );
-                    float err2 = Ulp_Error_Double( test2, correct2 );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
+                    float err2 = Bruteforce_Ulp_Error_Double( test2, correct2 );
                    int fail = ! (fabsf(err) <= f->double_ulps && fabsf(err2) <= f->double_ulps);
                    if( ftz )
                    {
@@ -837,10 +837,10 @@ int TestFunc_Double2_Double(const Func *f, MTdata d)
                            long double correct2p, correct2n;
                            long double correctp = f->dfunc.f_fpf( 0.0, &correct2p );
                            long double correctn = f->dfunc.f_fpf( -0.0, &correct2n );
-                            float errp = Ulp_Error_Double( test, correctp  );
-                            float err2p = Ulp_Error_Double( test, correct2p  );
-                            float errn = Ulp_Error_Double( test, correctn  );
-                            float err2n = Ulp_Error_Double( test, correct2n  );
+                            float errp = Bruteforce_Ulp_Error_Double( test, correctp  );
+                            float err2p = Bruteforce_Ulp_Error_Double( test, correct2p  );
+                            float errn = Bruteforce_Ulp_Error_Double( test, correctn  );
+                            float err2n = Bruteforce_Ulp_Error_Double( test, correct2n  );
                            fail =  fail && ((!(fabsf(errp) <= f->double_ulps)) && (!(fabsf(err2p) <= f->double_ulps))    &&
                                            ((!(fabsf(errn) <= f->double_ulps)) && (!(fabsf(err2n) <= f->double_ulps))) );
                            if( fabsf( errp ) < fabsf(err ) )
--- a/test_conformance/math_brute_force/unary_two_results_i.c
+++ b/test_conformance/math_brute_force/unary_two_results_i.c
@@ -633,7 +633,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d)
                    double test = ((double*) q)[j];
                    int correct2 = INT_MIN;
                    long double correct = f->dfunc.f_fpI( s[j], &correct2 );
-                    float err = Ulp_Error_Double( test, correct );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
                    cl_long iErr = (long long) q2[j] - (long long) correct2;
                    int fail = ! (fabsf(err) <= f->double_ulps && abs_cl_long( iErr ) <= maxiError );
                    if( ftz )
@@ -652,8 +652,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d)
                            int correct5, correct6;
                            long double correct3 = f->dfunc.f_fpI( 0.0, &correct5 );
                            long double correct4 = f->dfunc.f_fpI( -0.0, &correct6 );
-                            float err2 = Ulp_Error_Double( test, correct3  );
-                            float err3 = Ulp_Error_Double( test, correct4  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
                            cl_long iErr2 = (long long) q2[j] - (long long) correct5;
                            cl_long iErr3 = (long long) q2[j] - (long long) correct6;

--- a/test_conformance/math_brute_force/unary_u.c
+++ b/test_conformance/math_brute_force/unary_u.c
@@ -567,7 +567,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d)
                {
                    double test = ((double*) q)[j];
                    long double correct = f->dfunc.f_u( s[j] );
-                    float err = Ulp_Error_Double(test, correct);
+                    float err = Bruteforce_Ulp_Error_Double(test, correct);
                    int fail = ! (fabsf(err) <= f->double_ulps);

                    // half_sin/cos/tan are only valid between +-2**16, Inf, NaN