Reduce difference between files in math_brute_force (#1138)

* Reduce differences between files This will help reduce code duplication is future commits. Some code is moved around, some variables are renamed and some statements are slightly altered to reduce differences between files in math_brute_force, yet the semantics remains the same. The differences were identified using n-way diffs. Many differences remain however. Signed-off-by: Marco Antognini <marco.antognini@arm.com> * Workaround clang-format limitation Introduces some insignificant spaces to force clang-format to reduce the indentation and reduce differences between files. Signed-off-by: Marco Antognini <marco.antognini@arm.com>
2026-03-20 14:39:01 +00:00 · 2021-02-10 10:38:31 +00:00
parent f6b501352d
commit 8ad1088af9
13 changed files with 483 additions and 423 deletions
--- a/test_conformance/math_brute_force/binary.cpp
+++ b/test_conformance/math_brute_force/binary.cpp
@@ -25,9 +25,6 @@ int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata,
 int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata,
                                            bool relaxedMode);
 const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
 const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
 extern const vtbl _binary = { "binary", TestFunc_Float_Float_Float,
                              TestFunc_Double_Double_Double };
@@ -36,6 +33,8 @@ extern const vtbl _binary_nextafter = {
    TestFunc_Double_Double_Double_nextafter
 };
 const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
 const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
 static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                       cl_kernel *k, cl_program *p, bool relaxedMode)
@@ -76,7 +75,8 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
-        "       float3 f0, f1;\n"
+        "       float3 f0;\n"
        "       float3 f1;\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 1:\n"
@@ -163,7 +163,8 @@ static int BuildKernelDouble(const char *name, int vectorSize,
        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
-        "       double3 d0, d1;\n"
+        "       double3 d0;\n"
        "       double3 d1;\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 1:\n"
@@ -208,6 +209,35 @@ static int BuildKernelDouble(const char *name, int vectorSize,
                       relaxedMode);
 }
 typedef struct BuildKernelInfo
 {
    cl_uint offset; // the first vector size to build
    cl_uint kernel_count;
    cl_kernel **kernels;
    cl_program *programs;
    const char *nameInCode;
    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 } BuildKernelInfo;
 static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                  void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
    return BuildKernel(info->nameInCode, i, info->kernel_count,
                       info->kernels[i], info->programs + i, info->relaxedMode);
 }
 static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                   void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
    return BuildKernelDouble(info->nameInCode, i, info->kernel_count,
                             info->kernels[i], info->programs + i,
                             info->relaxedMode);
 }
 // A table of more difficult cases to get right
 static const float specialValuesFloat[] = {
    -NAN,
@@ -311,38 +341,9 @@ static const float specialValuesFloat[] = {
    +0.0f
 };
-static size_t specialValuesFloatCount =
+static const size_t specialValuesFloatCount =
    sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
 typedef struct BuildKernelInfo
 {
    cl_uint offset; // the first vector size to build
    cl_uint kernel_count;
    cl_kernel **kernels;
    cl_program *programs;
    const char *nameInCode;
    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 } BuildKernelInfo;
 static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                  void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
    return BuildKernel(info->nameInCode, i, info->kernel_count,
                       info->kernels[i], info->programs + i, info->relaxedMode);
 }
 static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                   void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
    return BuildKernelDouble(info->nameInCode, i, info->kernel_count,
                             info->kernels[i], info->programs + i,
                             info->relaxedMode);
 }
 // Thread specific data for a worker thread
 typedef struct ThreadInfo
 {
@@ -426,11 +427,11 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d,
    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
    test_info.ftz =
        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-
+    test_info.relaxedMode = relaxedMode;
    test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
    test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
    test_info.isNextafter = isNextafter;
-    test_info.relaxedMode = relaxedMode;
+
    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
@@ -543,12 +544,11 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d,
            vlog("passed");
    }
    if (gMeasureTimes)
    {
        // Init input arrays
-        uint32_t *p = (uint32_t *)gIn;
+        cl_uint *p = (cl_uint *)gIn;
-        uint32_t *p2 = (uint32_t *)gIn2;
+        cl_uint *p2 = (cl_uint *)gIn2;
        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
        {
            p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000;
@@ -561,6 +561,7 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d,
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
        {
@@ -568,7 +569,6 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d,
            return error;
        }
        // Run the kernels
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
@@ -633,7 +633,6 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d,
        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
    vlog("\n");
 exit:
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
@@ -684,23 +683,21 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
    int skipNanInf = job->skipNanInf;
    int isNextafter = job->isNextafter;
    cl_uint *t = 0;
-    float *r = 0, *s = 0, *s2 = 0;
+    cl_float *r = 0;
    cl_float *s = 0;
    cl_float *s2 = 0;
    cl_int copysign_test = 0;
    RoundingMode oldRoundMode;
    int skipVerification = 0;
    if (relaxedMode)
    {
        func = job->f->rfunc;
        if (strcmp(name, "pow") == 0 && gFastRelaxedDerived)
        {
            func = job->f->rfunc;
            ulps = INFINITY;
            skipVerification = 1;
        }
        else
        {
            func = job->f->rfunc;
        }
    }
    // start the map of the output arrays
@@ -744,7 +741,8 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
        {
            fp[j] = specialValuesFloat[x];
            fp2[j] = specialValuesFloat[y];
-            if (++x >= specialValuesFloatCount)
+            ++x;
            if (x >= specialValuesFloatCount)
            {
                x = 0;
                y++;
@@ -1203,13 +1201,11 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
        fflush(stdout);
    }
 exit:
    if (overflow) free(overflow);
    return error;
 }
 // A table of more difficult cases to get right
 static const double specialValuesDouble[] = {
    -NAN,
@@ -1444,10 +1440,10 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
            goto exit;
        }
        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
    }
    // Init the kernels
    {
        BuildKernelInfo build_info = {
@@ -1460,6 +1456,7 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
            goto exit;
    }
    // Run the kernels
    if (!gSkipCorrectnessTesting)
    {
        error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info);
@@ -1500,6 +1497,7 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
        {
@@ -1507,7 +1505,6 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
            return error;
        }
        // Run the kernels
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
@@ -1573,7 +1570,6 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
    vlog("\n");
 exit:
    // Release
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
@@ -1622,7 +1618,9 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
    int isNextafter = job->isNextafter;
    cl_ulong *t;
-    cl_double *r, *s, *s2;
+    cl_double *r;
    cl_double *s;
    cl_double *s2;
    Force64BitFPUPrecision();
@@ -1970,6 +1968,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
        }
        fflush(stdout);
    }
 exit:
    return error;
 }
--- a/test_conformance/math_brute_force/binaryOperator.cpp
+++ b/test_conformance/math_brute_force/binaryOperator.cpp
@@ -44,10 +44,11 @@ static int BuildKernel(const char *name, const char *operator_symbol,
                        "* in2 )\n"
                        "{\n"
                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] =  in1[i] ",
+                        "   out[i] = in1[i] ",
                        operator_symbol,
                        " in2[i];\n"
                        "}\n" };
    const char *c3[] = {
        "__kernel void ",
        name,
@@ -70,7 +71,8 @@ static int BuildKernel(const char *name, const char *operator_symbol,
        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
-        "       float3 f0, f1;\n"
+        "       float3 f0;\n"
        "       float3 f1;\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 1:\n"
@@ -137,8 +139,9 @@ static int BuildKernelDouble(const char *name, const char *operator_symbol,
                        operator_symbol,
                        " in2[i];\n"
                        "}\n" };
    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
        "__kernel void ",
        name,
        "_kernel",
@@ -160,7 +163,8 @@ static int BuildKernelDouble(const char *name, const char *operator_symbol,
        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
-        "       double3 d0, d1;\n"
+        "       double3 d0;\n"
        "       double3 d1;\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 1:\n"
@@ -235,43 +239,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
                             info->programs + i, info->relaxedMode);
 }
 // Thread specific data for a worker thread
 typedef struct ThreadInfo
 {
    cl_mem inBuf; // input buffer for the thread
    cl_mem inBuf2; // input buffer for the thread
    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
    float maxError; // max error value. Init to 0.
    double
        maxErrorValue; // position of the max error value (param 1).  Init to 0.
    double maxErrorValue2; // position of the max error value (param 2).  Init
                           // to 0.
    MTdata d;
    cl_command_queue tQueue; // per thread command queue to improve performance
 } ThreadInfo;
 typedef struct TestInfo
 {
    size_t subBufferSize; // Size of the sub-buffer in elements
    const Func *f; // A pointer to the function info
    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
    cl_kernel
        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
                               // worker thread:  k[vector_size][thread_id]
    ThreadInfo *
        tinfo; // An array of thread specific information for each worker thread
    cl_uint threadCount; // Number of worker threads
    cl_uint jobCount; // Number of jobs
    cl_uint step; // step between each chunk and the next.
    cl_uint scale; // stride between individual test values
    float ulps; // max_allowed ulps
    int ftz; // non-zero if running in flush to zero mode
    bool relaxedMode; // True if the test is being run in relaxed mode, false
                      // otherwise.
    // no special fields
 } TestInfo;
 // A table of more difficult cases to get right
 static const float specialValuesFloat[] = {
    -NAN,
@@ -375,9 +342,46 @@ static const float specialValuesFloat[] = {
    +0.0f
 };
-static size_t specialValuesFloatCount =
+static const size_t specialValuesFloatCount =
    sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
 // Thread specific data for a worker thread
 typedef struct ThreadInfo
 {
    cl_mem inBuf; // input buffer for the thread
    cl_mem inBuf2; // input buffer for the thread
    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
    float maxError; // max error value. Init to 0.
    double
        maxErrorValue; // position of the max error value (param 1).  Init to 0.
    double maxErrorValue2; // position of the max error value (param 2).  Init
                           // to 0.
    MTdata d;
    cl_command_queue tQueue; // per thread command queue to improve performance
 } ThreadInfo;
 typedef struct TestInfo
 {
    size_t subBufferSize; // Size of the sub-buffer in elements
    const Func *f; // A pointer to the function info
    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
    cl_kernel
        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
                               // worker thread:  k[vector_size][thread_id]
    ThreadInfo *
        tinfo; // An array of thread specific information for each worker thread
    cl_uint threadCount; // Number of worker threads
    cl_uint jobCount; // Number of jobs
    cl_uint step; // step between each chunk and the next.
    cl_uint scale; // stride between individual test values
    float ulps; // max_allowed ulps
    int ftz; // non-zero if running in flush to zero mode
    bool relaxedMode; // True if the test is being run in relaxed mode, false
                      // otherwise.
    // no special fields
 } TestInfo;
 static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *p);
 int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
@@ -398,6 +402,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
    test_info.subBufferSize = BUFFER_SIZE
        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale = getTestScale(sizeof(cl_float));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize
@@ -405,7 +410,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
               * RoundUpToNextPowerOfTwo(test_info.threadCount));
    }
-    test_info.step = test_info.subBufferSize * test_info.scale;
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
    if (test_info.step / test_info.subBufferSize != test_info.scale)
    {
        // there was overflow
@@ -481,8 +486,8 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
                &region, &error);
            if (error || NULL == test_info.tinfo[i].outBuf[j])
            {
-                vlog_error("Error: Unable to create sub-buffer of gInBuffer "
+                vlog_error("Error: Unable to create sub-buffer of "
-                           "for region {%zd, %zd}\n",
+                           "gInBuffer for region {%zd, %zd}\n",
                           region.origin, region.size);
                goto exit;
            }
@@ -513,6 +518,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
            goto exit;
    }
    // Run the kernels
    if (!gSkipCorrectnessTesting)
    {
        error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info);
@@ -536,7 +542,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
            vlog("passed");
    }
    if (gMeasureTimes)
    {
        // Init input arrays
@@ -554,6 +559,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
        {
@@ -626,7 +632,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
    vlog("\n");
 exit:
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
@@ -665,30 +670,31 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
    cl_uint base = job_id * (cl_uint)job->step;
    ThreadInfo *tinfo = job->tinfo + thread_id;
    fptr func = job->f->func;
    int ftz = job->ftz;
    bool relaxedMode = job->relaxedMode;
    float ulps = getAllowedUlpError(job->f, relaxedMode);
    if (relaxedMode)
    {
        func = job->f->rfunc;
    }
    int ftz = job->ftz;
    MTdata d = tinfo->d;
    cl_uint j, k;
    cl_int error;
    cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
    const char *name = job->f->name;
-    cl_uint *t;
+    cl_uint *t = 0;
-    cl_float *r, *s, *s2;
+    cl_float *r = 0;
    cl_float *s = 0;
    cl_float *s2 = 0;
    RoundingMode oldRoundMode;
    if (relaxedMode)
    {
        func = job->f->rfunc;
    }
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_uint *out[VECTOR_SIZE_COUNT];
    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
-        out[j] = (uint32_t *)clEnqueueMapBuffer(
+        out[j] = (cl_uint *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
            buffer_size, 0, NULL, e + j, &error);
        if (error || NULL == out[j])
@@ -711,7 +717,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
        specialValuesFloatCount * specialValuesFloatCount;
    int indx = (totalSpecialValueCount - 1) / buffer_elements;
    if (job_id <= (cl_uint)indx)
    {
        // Insert special values
@@ -877,7 +882,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
    // an in order queue.
    for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
    {
-        out[j] = (uint32_t *)clEnqueueMapBuffer(
+        out[j] = (cl_uint *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
            buffer_size, 0, NULL, NULL, &error);
        if (error || NULL == out[j])
@@ -889,9 +894,9 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
    }
    // Wait for the last buffer
-    out[j] = (uint32_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
+    out[j] = (cl_uint *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
-                                            CL_TRUE, CL_MAP_READ, 0,
+                                           CL_TRUE, CL_MAP_READ, 0, buffer_size,
-                                            buffer_size, 0, NULL, NULL, &error);
+                                           0, NULL, NULL, &error);
    if (error || NULL == out[j])
    {
        vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
@@ -1136,6 +1141,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
        }
        fflush(stdout);
    }
 exit:
    if (overflow) free(overflow);
    return error;
@@ -1267,6 +1273,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
    float maxError = 0.0f;
    double maxErrorVal = 0.0;
    double maxErrorVal2 = 0.0;
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    // Init test_info
@@ -1275,6 +1282,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
    test_info.subBufferSize = BUFFER_SIZE
        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale = getTestScale(sizeof(cl_double));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize
@@ -1373,7 +1381,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
    }
    // Init the kernels
    {
        BuildKernelInfo build_info = { gMinVectorSizeIndex,
@@ -1389,6 +1396,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
            goto exit;
    }
    // Run the kernels
    if (!gSkipCorrectnessTesting)
    {
        error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info);
@@ -1412,7 +1420,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
            vlog("passed");
    }
    if (gMeasureTimes)
    {
        // Init input arrays
@@ -1503,7 +1510,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
    vlog("\n");
 exit:
    // Release
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
@@ -1551,7 +1557,9 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
    cl_int error;
    const char *name = job->f->name;
    cl_ulong *t;
-    cl_double *r, *s, *s2;
+    cl_double *r;
    cl_double *s;
    cl_double *s2;
    Force64BitFPUPrecision();
--- a/test_conformance/math_brute_force/binary_i.cpp
+++ b/test_conformance/math_brute_force/binary_i.cpp
@@ -15,8 +15,8 @@
 //
 #include "Utility.h"
 #include <string.h>
 #include <limits.h>
 #include <string.h>
 #include "FunctionList.h"
 int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode);
@@ -228,7 +228,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
                             info->relaxedMode);
 }
 // A table of more difficult cases to get right
 static const float specialValuesFloat[] = {
    -NAN,
@@ -331,9 +330,9 @@ static const float specialValuesFloat[] = {
    MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
    +0.0f
 };
 static size_t specialValuesFloatCount =
    sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
 static const size_t specialValuesFloatCount =
    sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
 static const int specialValuesInt[] = {
    0,           1,           2,          3,          126,        127,
@@ -484,8 +483,8 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
                &region, &error);
            if (error || NULL == test_info.tinfo[i].outBuf[j])
            {
-                vlog_error("Error: Unable to create sub-buffer of gInBuffer "
+                vlog_error("Error: Unable to create sub-buffer of "
-                           "for region {%zd, %zd}\n",
+                           "gInBuffer for region {%zd, %zd}\n",
                           region.origin, region.size);
                goto exit;
            }
@@ -497,6 +496,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
            goto exit;
        }
        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
    }
@@ -537,7 +537,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
            vlog("passed");
    }
    if (gMeasureTimes)
    {
        // Init input arrays
@@ -555,6 +554,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
        {
@@ -627,7 +627,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
        vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
    vlog("\n");
 exit:
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
@@ -658,7 +657,6 @@ exit:
    return error;
 }
 static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
 {
    const TestInfo *job = (const TestInfo *)data;
@@ -666,23 +664,24 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
    size_t buffer_size = buffer_elements * sizeof(cl_float);
    cl_uint base = job_id * (cl_uint)job->step;
    ThreadInfo *tinfo = job->tinfo + thread_id;
    float ulps = job->ulps;
    fptr func = job->f->func;
    int ftz = job->ftz;
    float ulps = job->ulps;
    MTdata d = tinfo->d;
    cl_uint j, k;
    cl_int error;
    const char *name = job->f->name;
-    cl_uint *t;
+    cl_uint *t = 0;
-    cl_float *r, *s;
+    cl_float *r = 0;
-    cl_int *s2;
+    cl_float *s = 0;
    cl_int *s2 = 0;
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_uint *out[VECTOR_SIZE_COUNT];
    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
-        out[j] = (uint32_t *)clEnqueueMapBuffer(
+        out[j] = (cl_uint *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
            buffer_size, 0, NULL, e + j, &error);
        if (error || NULL == out[j])
@@ -700,9 +699,11 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
    j = 0;
    int totalSpecialValueCount =
        specialValuesFloatCount * specialValuesIntCount;
    int indx = (totalSpecialValueCount - 1) / buffer_elements;
    if (job_id <= (cl_uint)indx)
    { // test edge cases
        float *fp = (float *)p;
@@ -716,7 +717,8 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
        {
            fp[j] = specialValuesFloat[x];
            ip2[j] = specialValuesInt[y];
-            if (++x >= specialValuesFloatCount)
+            ++x;
            if (x >= specialValuesFloatCount)
            {
                x = 0;
                y++;
@@ -820,7 +822,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
    // an in order queue.
    for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
    {
-        out[j] = (uint32_t *)clEnqueueMapBuffer(
+        out[j] = (cl_uint *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
            buffer_size, 0, NULL, NULL, &error);
        if (error || NULL == out[j])
@@ -832,9 +834,9 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
    }
    // Wait for the last buffer
-    out[j] = (uint32_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
+    out[j] = (cl_uint *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
-                                            CL_TRUE, CL_MAP_READ, 0,
+                                           CL_TRUE, CL_MAP_READ, 0, buffer_size,
-                                            buffer_size, 0, NULL, NULL, &error);
+                                           0, NULL, NULL, &error);
    if (error || NULL == out[j])
    {
        vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
@@ -1057,6 +1059,7 @@ static const double specialValuesDouble[] = {
    MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
    +0.0,
 };
 static size_t specialValuesDoubleCount =
    sizeof(specialValuesDouble) / sizeof(specialValuesDouble[0]);
@@ -1165,12 +1168,9 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            /* Qualcomm fix: 9461 read-write flags must be compatible with
             * parent buffer */
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
                &region, &error);
            /* Qualcomm fix: end */
            if (error || NULL == test_info.tinfo[i].outBuf[j])
            {
                vlog_error("Error: Unable to create sub-buffer of gInBuffer "
@@ -1190,7 +1190,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
    }
    // Init the kernels
    {
        BuildKernelInfo build_info = {
@@ -1320,7 +1319,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
        vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
    vlog("\n");
 exit:
    // Release
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
@@ -1367,7 +1365,8 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
    cl_int error;
    const char *name = job->f->name;
    cl_ulong *t;
-    cl_double *r, *s;
+    cl_double *r;
    cl_double *s;
    cl_int *s2;
    Force64BitFPUPrecision();
@@ -1398,6 +1397,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
    int totalSpecialValueCount =
        specialValuesDoubleCount * specialValuesInt2Count;
    int indx = (totalSpecialValueCount - 1) / buffer_elements;
    if (job_id <= (cl_uint)indx)
    { // test edge cases
        cl_double *fp = (cl_double *)p;
--- a/test_conformance/math_brute_force/binary_two_results_i.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i.cpp
@@ -40,7 +40,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
                        sizeNames[vectorSize],
                        "* in1, __global float",
                        sizeNames[vectorSize],
-                        "* in2)\n"
+                        "* in2 )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
@@ -71,7 +71,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
-        "       float3 f0, f1;\n"
+        "       float3 f0;\n"
        "       float3 f1;\n"
        "       int3 i0 = 0xdeaddead;\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 1:\n"
@@ -83,7 +85,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
        "               break;\n"
        "       }\n"
        "       int3 i0 = 0xdeaddead;\n"
        "       f0 = ",
        name,
        "( f0, f1, &i0 );\n"
@@ -132,12 +133,12 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
                        sizeNames[vectorSize],
                        "* in1, __global double",
                        sizeNames[vectorSize],
-                        "* in2)\n"
+                        "* in2 )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
                        name,
-                        "( in1[i], in2[i], out2 + i );\n"
+                        "( in1[i], in2[i], out2[i] );\n"
                        "}\n" };
    const char *c3[] = {
@@ -164,7 +165,9 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
-        "       double3 d0, d1;\n"
+        "       double3 d0;\n"
        "       double3 d1;\n"
        "       int3 i0 = 0xdeaddead;\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 1:\n"
@@ -176,7 +179,6 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
        "               break;\n"
        "       }\n"
        "       int3 i0 = 0xdeaddead;\n"
        "       d0 = ",
        name,
        "( d0, d1, &i0 );\n"
@@ -309,20 +311,22 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    uint64_t i;
    uint32_t j, k;
    int error;
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
    float maxError = 0.0f;
    float float_ulps;
    int64_t maxError2 = 0;
    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
    int64_t maxError2 = 0;
    float maxErrorVal = 0.0f;
    float maxErrorVal2 = 0.0f;
    size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
    uint64_t step = getTestStep(sizeof(float), bufferSize);
    cl_uint threadCount = GetThreadCount();
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    float float_ulps;
    if (gIsEmbedded)
        float_ulps = f->float_embedded_ulps;
    else
@@ -485,7 +489,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
-                uint32_t *q = (uint32_t *)gOut[k];
+                uint32_t *q = (uint32_t *)(gOut[k]);
                int32_t *q2 = (int32_t *)gOut2[k];
                // Check for exact match to correctly rounded result
@@ -695,9 +699,11 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    if (gMeasureTimes)
    {
        // Init input array
-        uint32_t *p = (uint32_t *)gIn;
+        cl_uint *p = (cl_uint *)gIn;
        for (j = 0; j < bufferSize / sizeof(float); j++)
        {
            p[j] = genrand_int32(d);
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
@@ -823,9 +829,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
        if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                   &build_info)))
        {
            return error;
        }
    }
    for (i = 0; i < (1ULL << 32); i += step)
@@ -1185,7 +1189,6 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
            {
                vlog(".");
            }
            fflush(stdout);
        }
    }
@@ -1202,7 +1205,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
    {
        // Init input array
        double *p = (double *)gIn;
-        for (j = 0; j < bufferSize / sizeof(double); j++)
+        for (j = 0; j < bufferSize / sizeof(cl_double); j++)
            p[j] = DoubleFromUInt32(genrand_int32(d));
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/i_unary.cpp
+++ b/test_conformance/math_brute_force/i_unary.cpp
@@ -42,6 +42,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
                        name,
                        "( in[i] );\n"
                        "}\n" };
    const char *c3[] = {
        "__kernel void math_kernel",
        sizeNames[vectorSize],
@@ -87,7 +88,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
        "}\n"
    };
    const char **kern = c;
    size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -114,7 +114,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
                        sizeNames[vectorSize],
                        "* out, __global double",
                        sizeNames[vectorSize],
-                        "* in)\n"
+                        "* in )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
@@ -177,7 +177,6 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
        kernSize = sizeof(c3) / sizeof(c3[0]);
    }
    char testName[32];
    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
@@ -219,7 +218,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
    int error;
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
    size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
    uint64_t step = getTestStep(sizeof(float), bufferSize);
    int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(float)) + 1);
@@ -234,27 +233,30 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
    Force64BitFPUPrecision();
    // Init the kernels
-    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
+    {
-                                   f->nameInCode, relaxedMode };
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-    if ((error = ThreadPool_Do(BuildKernel_FloatFn,
+                                       f->nameInCode, relaxedMode };
-                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+        if ((error = ThreadPool_Do(BuildKernel_FloatFn,
-                               &build_info)))
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-        return error;
+                                   &build_info)))
            return error;
    }
    for (i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
-        uint32_t *p = (uint32_t *)gIn;
+        cl_uint *p = (cl_uint *)gIn;
        if (gWimpyMode)
        {
            for (j = 0; j < bufferSize / sizeof(float); j++)
-                p[j] = (uint32_t)i + j * scale;
+                p[j] = (cl_uint)i + j * scale;
        }
        else
        {
            for (j = 0; j < bufferSize / sizeof(float); j++)
                p[j] = (uint32_t)i + j;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
@@ -281,7 +283,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
-            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            size_t localCount = (bufferSize + vectorSize - 1)
                / vectorSize; // bufferSize / vectorSize  rounded up
            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
                                        &gOutBuffer[j])))
            {
@@ -396,8 +399,9 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        // Run the kernels
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
-            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            size_t localCount = (bufferSize + vectorSize - 1)
                / vectorSize; // bufferSize / vectorSize  rounded up
            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
                                        &gOutBuffer[j])))
            {
@@ -447,6 +451,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
    }
    vlog("\n");
 exit:
    RestoreFPState(&oldMode);
    // Release
@@ -481,13 +486,13 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
    Force64BitFPUPrecision();
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                   f->nameInCode, relaxedMode };
    if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
                               &build_info)))
    {
-        return error;
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                       f->nameInCode, relaxedMode };
        if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                   &build_info)))
            return error;
    }
    for (i = 0; i < (1ULL << 32); i += step)
@@ -504,6 +509,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
            for (j = 0; j < bufferSize / sizeof(cl_double); j++)
                p[j] = DoubleFromUInt32((uint32_t)i + j);
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
@@ -529,8 +535,9 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        // Run the kernels
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
-            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            size_t localCount = (bufferSize + vectorSize - 1)
                / vectorSize; // bufferSize / vectorSize  rounded up
            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
                                        &gOutBuffer[j])))
            {
@@ -616,6 +623,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
            {
                vlog(".");
            }
            fflush(stdout);
        }
    }
@@ -698,7 +706,6 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
    vlog("\n");
 exit:
    RestoreFPState(&oldMode);
    // Release
--- a/test_conformance/math_brute_force/macro_binary.cpp
+++ b/test_conformance/math_brute_force/macro_binary.cpp
@@ -64,7 +64,8 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
-        "       float3 f0, f1;\n"
+        "       float3 f0;\n"
        "       float3 f1;\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 1:\n"
@@ -92,7 +93,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
        "}\n"
    };
    const char **kern = c;
    size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -110,7 +110,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                       relaxedMode);
 }
 static int BuildKernelDouble(const char *name, int vectorSize,
                             cl_uint kernel_count, cl_kernel *k, cl_program *p,
                             bool relaxedMode)
@@ -153,7 +152,8 @@ static int BuildKernelDouble(const char *name, int vectorSize,
        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
-        "       double3 f0, f1;\n"
+        "       double3 f0;\n"
        "       double3 f1;\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 1:\n"
@@ -190,7 +190,6 @@ static int BuildKernelDouble(const char *name, int vectorSize,
        kernSize = sizeof(c3) / sizeof(c3[0]);
    }
    char testName[32];
    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
@@ -228,7 +227,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
                             info->relaxedMode);
 }
 // A table of more difficult cases to get right
 static const float specialValuesFloat[] = {
    -NAN,
@@ -379,6 +377,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    test_info.subBufferSize = BUFFER_SIZE
        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale = getTestScale(sizeof(cl_float));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize
@@ -460,8 +459,8 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                &region, &error);
            if (error || NULL == test_info.tinfo[i].outBuf[j])
            {
-                vlog_error("Error: Unable to create sub-buffer of gInBuffer "
+                vlog_error("Error: Unable to create sub-buffer of "
-                           "for region {%zd, %zd}\n",
+                           "gInBuffer for region {%zd, %zd}\n",
                           region.origin, region.size);
                goto exit;
            }
@@ -489,7 +488,6 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
    }
    // Run the kernels
    if (!gSkipCorrectnessTesting)
    {
@@ -506,8 +504,8 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    if (gMeasureTimes)
    {
        // Init input arrays
-        uint32_t *p = (uint32_t *)gIn;
+        cl_uint *p = (cl_uint *)gIn;
-        uint32_t *p2 = (uint32_t *)gIn2;
+        cl_uint *p2 = (cl_uint *)gIn2;
        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
        {
            p[j] = genrand_int32(d);
@@ -520,6 +518,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
        {
@@ -531,8 +530,9 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        // Run the kernels
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
            if ((error = clSetKernelArg(test_info.k[j][0], 0,
                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
            {
@@ -586,6 +586,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                      f->name, sizeNames[j]);
        }
    }
    vlog("\n");
 exit:
@@ -631,8 +632,10 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
    cl_uint j, k;
    cl_int error;
    const char *name = job->f->name;
-    cl_int *t, *r;
+    cl_int *t = 0;
-    cl_float *s, *s2;
+    cl_int *r = 0;
    cl_float *s = 0;
    cl_float *s2 = 0;
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
@@ -657,6 +660,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
    j = 0;
    int totalSpecialValueCount =
        specialValuesFloatCount * specialValuesFloatCount;
    int indx = (totalSpecialValueCount - 1) / buffer_elements;
@@ -674,7 +678,8 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
        {
            fp[j] = specialValuesFloat[x];
            fp2[j] = specialValuesFloat[y];
-            if (++x >= specialValuesFloatCount)
+            ++x;
            if (x >= specialValuesFloatCount)
            {
                x = 0;
                y++;
@@ -690,7 +695,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
        p2[j] = genrand_int32(d);
    }
    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
                                      buffer_size, p, 0, NULL, NULL)))
    {
@@ -895,6 +899,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
            }
        }
    }
    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
@@ -1044,7 +1049,6 @@ static const double specialValuesDouble[] = {
 static size_t specialValuesDoubleCount =
    sizeof(specialValuesDouble) / sizeof(specialValuesDouble[0]);
 static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *p);
 int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
@@ -1061,6 +1065,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
    test_info.subBufferSize = BUFFER_SIZE
        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale = getTestScale(sizeof(cl_double));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize
@@ -1136,12 +1141,9 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            /* Qualcomm fix: 9461 read-write flags must be compatible with
             * parent buffer */
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
                &region, &error);
            /* Qualcomm fix: end */
            if (error || NULL == test_info.tinfo[i].outBuf[j])
            {
                vlog_error("Error: Unable to create sub-buffer of gInBuffer "
@@ -1161,7 +1163,6 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
    }
    // Init the kernels
    {
        BuildKernelInfo build_info = {
@@ -1174,6 +1175,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
    }
    // Run the kernels
    if (!gSkipCorrectnessTesting)
    {
        error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info);
@@ -1189,8 +1191,8 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
    if (gMeasureTimes)
    {
        // Init input arrays
-        uint64_t *p = (uint64_t *)gIn;
+        cl_ulong *p = (cl_ulong *)gIn;
-        uint64_t *p2 = (uint64_t *)gIn2;
+        cl_ulong *p2 = (cl_ulong *)gIn2;
        for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
        {
            p[j] =
@@ -1216,8 +1218,9 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
        // Run the kernels
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
            if ((error = clSetKernelArg(test_info.k[j][0], 0,
                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
            {
@@ -1319,8 +1322,10 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
    cl_uint j, k;
    cl_int error;
    const char *name = job->f->name;
-    cl_long *t, *r;
+    cl_long *t;
-    cl_double *s, *s2;
+    cl_long *r;
    cl_double *s;
    cl_double *s2;
    Force64BitFPUPrecision();
@@ -1378,7 +1383,6 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
        ((cl_ulong *)p2)[j] = genrand_int64(d);
    }
    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
                                      buffer_size, p, 0, NULL, NULL)))
    {
@@ -1493,11 +1497,12 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
    t = (cl_long *)r;
    for (j = 0; j < buffer_elements; j++)
    {
-        cl_long *q = (cl_long *)out[0];
+        cl_long *q = out[0];
        // If we aren't getting the correctly rounded result
        if (gMinVectorSizeIndex == 0 && t[j] != q[j])
        {
            // If we aren't getting the correctly rounded result
            if (ftz)
            {
                if (IsDoubleSubnormal(s[j]))
@@ -1528,7 +1533,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
                }
            }
-            uint64_t err = t[j] - q[j];
+            cl_ulong err = t[j] - q[j];
            if (q[j] > t[j]) err = q[j] - t[j];
            vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld "
                       "vs. %lld  (index: %d)\n",
@@ -1575,7 +1580,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
                    }
                }
-                uint64_t err = -t[j] - q[j];
+                cl_ulong err = -t[j] - q[j];
                if (q[j] > -t[j]) err = q[j] + t[j];
                vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, "
                           "%.13la}: *%lld vs. %lld  (index: %d)\n",
--- a/test_conformance/math_brute_force/macro_unary.cpp
+++ b/test_conformance/math_brute_force/macro_unary.cpp
@@ -34,13 +34,14 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        sizeNames[vectorSize],
                        "* out, __global float",
                        sizeNames[vectorSize],
-                        "* in)\n"
+                        "* in )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
                        name,
                        "( in[i] );\n"
                        "}\n" };
    const char *c3[] = {
        "__kernel void math_kernel",
        sizeNames[vectorSize],
@@ -115,7 +116,7 @@ static int BuildKernelDouble(const char *name, int vectorSize,
                        sizeNames[vectorSize],
                        "* out, __global double",
                        sizeNames[vectorSize],
-                        "* in)\n"
+                        "* in )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
@@ -178,7 +179,6 @@ static int BuildKernelDouble(const char *name, int vectorSize,
        kernSize = sizeof(c3) / sizeof(c3[0]);
    }
    char testName[32];
    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
@@ -258,6 +258,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
    test_info.subBufferSize = BUFFER_SIZE
        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale = getTestScale(sizeof(cl_float));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize
@@ -279,6 +280,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
    test_info.f = f;
    test_info.ftz =
        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
@@ -328,8 +330,8 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                &region, &error);
            if (error || NULL == test_info.tinfo[i].outBuf[j])
            {
-                vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
+                vlog_error("Error: Unable to create sub-buffer of "
-                           "for region {%zd, %zd}\n",
+                           "gOutBuffer for region {%zd, %zd}\n",
                           region.origin, region.size);
                goto exit;
            }
@@ -355,6 +357,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
    }
    // Run the kernels
    if (!gSkipCorrectnessTesting)
    {
        error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info);
@@ -501,7 +504,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }
    // Get that moving
    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
@@ -569,7 +571,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }
    // Get that moving
    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
@@ -594,6 +595,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
            return error;
        }
    }
    // Wait for the last buffer
    out[j] = (cl_int *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
                                          CL_TRUE, CL_MAP_READ, 0, buffer_size,
@@ -711,12 +713,14 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
    size_t i, j;
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    // Init test_info
    memset(&test_info, 0, sizeof(test_info));
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE
        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale = getTestScale(sizeof(cl_double));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize
@@ -782,12 +786,9 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            /* Qualcomm fix: 9461 read-write flags must be compatible with
             * parent buffer */
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
                &region, &error);
            /* Qualcomm fix: end */
            if (error || NULL == test_info.tinfo[i].outBuf[j])
            {
                vlog_error("Error: Unable to create sub-buffer of gInBuffer "
@@ -817,6 +818,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
    }
    // Run the kernels
    if (!gSkipCorrectnessTesting)
    {
        error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info);
@@ -846,8 +848,9 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        // Run the kernels
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
            if ((error = clSetKernelArg(test_info.k[j][0], 0,
                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
            {
@@ -900,6 +903,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
    vlog("\n");
 exit:
    // Release
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
@@ -936,9 +940,9 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
    cl_uint base = job_id * (cl_uint)job->step;
    ThreadInfo *tinfo = job->tinfo + thread_id;
    dptr dfunc = job->f->dfunc;
    int ftz = job->ftz;
    cl_uint j, k;
    cl_int error;
    int ftz = job->ftz;
    const char *name = job->f->name;
    Force64BitFPUPrecision();
@@ -1027,7 +1031,6 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }
    // Get that moving
    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
@@ -1052,6 +1055,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
            return error;
        }
    }
    // Wait for the last buffer
    out[j] = (cl_long *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
                                           CL_TRUE, CL_MAP_READ, 0, buffer_size,
@@ -1062,14 +1066,12 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data)
        return error;
    }
    // Verify data
    cl_long *t = (cl_long *)r;
    for (j = 0; j < buffer_elements; j++)
    {
        cl_long *q = out[0];
        // If we aren't getting the correctly rounded result
        if (gMinVectorSizeIndex == 0 && t[j] != q[j])
        {
--- a/test_conformance/math_brute_force/mad.cpp
+++ b/test_conformance/math_brute_force/mad.cpp
@@ -44,6 +44,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
                        name,
                        "( in1[i], in2[i], in3[i] );\n"
                        "}\n" };
    const char *c3[] = {
        "__kernel void math_kernel",
        sizeNames[vectorSize],
@@ -66,7 +67,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
-        "       float3 f0, f1, f2;\n"
+        "       float3 f0;\n"
        "       float3 f1;\n"
        "       float3 f2;\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 1:\n"
@@ -133,6 +136,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
                        name,
                        "( in1[i], in2[i], in3[i] );\n"
                        "}\n" };
    const char *c3[] = {
        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
        "__kernel void math_kernel",
@@ -156,7 +160,9 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
-        "       double3 d0, d1, d2;\n"
+        "       double3 d0;\n"
        "       double3 d1;\n"
        "       double3 d2;\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 1:\n"
@@ -247,37 +253,42 @@ int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode)
    uint64_t step = getTestStep(sizeof(float), bufferSize);
    // Init the kernels
-    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
+    {
-                                   f->nameInCode, relaxedMode };
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-    if ((error = ThreadPool_Do(BuildKernel_FloatFn,
+                                       f->nameInCode, relaxedMode };
-                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+        if ((error = ThreadPool_Do(BuildKernel_FloatFn,
-                               &build_info)))
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-        return error;
+                                   &build_info)))
            return error;
    }
    for (i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
-        uint32_t *p = (uint32_t *)gIn;
+        cl_uint *p = (cl_uint *)gIn;
-        uint32_t *p2 = (uint32_t *)gIn2;
+        cl_uint *p2 = (cl_uint *)gIn2;
-        uint32_t *p3 = (uint32_t *)gIn3;
+        cl_uint *p3 = (cl_uint *)gIn3;
        for (j = 0; j < bufferSize / sizeof(float); j++)
        {
            p[j] = genrand_int32(d);
            p2[j] = genrand_int32(d);
            p3[j] = genrand_int32(d);
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
                                          bufferSize, gIn2, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
                                          bufferSize, gIn3, 0, NULL, NULL)))
        {
@@ -379,15 +390,15 @@ int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode)
        if (gWimpyMode)
            vlog("Wimp pass");
        else
-            vlog("pass");
+            vlog("passed");
    }
    if (gMeasureTimes)
    {
        // Init input array
-        uint32_t *p = (uint32_t *)gIn;
+        cl_uint *p = (cl_uint *)gIn;
-        uint32_t *p2 = (uint32_t *)gIn2;
+        cl_uint *p2 = (cl_uint *)gIn2;
-        uint32_t *p3 = (uint32_t *)gIn3;
+        cl_uint *p3 = (cl_uint *)gIn3;
        for (j = 0; j < bufferSize / sizeof(float); j++)
        {
            p[j] = genrand_int32(d);
@@ -508,18 +519,18 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
    double maxErrorVal2 = 0.0f;
    double maxErrorVal3 = 0.0f;
    size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    uint64_t step = getTestStep(sizeof(double), bufferSize);
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                   f->nameInCode, relaxedMode };
    if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
                               &build_info)))
    {
-        return error;
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                       f->nameInCode, relaxedMode };
        if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                   &build_info)))
            return error;
    }
    for (i = 0; i < (1ULL << 32); i += step)
@@ -534,18 +545,21 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
            p2[j] = DoubleFromUInt32(genrand_int32(d));
            p3[j] = DoubleFromUInt32(genrand_int32(d));
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
                                          bufferSize, gIn2, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
                                          bufferSize, gIn3, 0, NULL, NULL)))
        {
@@ -647,7 +661,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
        if (gWimpyMode)
            vlog("Wimp pass");
        else
-            vlog("pass");
+            vlog("passed");
    }
    if (gMeasureTimes)
--- a/test_conformance/math_brute_force/ternary.cpp
+++ b/test_conformance/math_brute_force/ternary.cpp
@@ -52,7 +52,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
    const char *c3[] = {
        "__kernel void math_kernel",
        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2 , "
+        "( __global float* out, __global float* in, __global float* in2, "
        "__global float* in3)\n"
        "{\n"
        "   size_t i = get_global_id(0);\n"
@@ -71,7 +71,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
-        "       float3 f0, f1, f2;\n"
+        "       float3 f0;\n"
        "       float3 f1;\n"
        "       float3 f2;\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 1:\n"
@@ -143,7 +145,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
        "__kernel void math_kernel",
        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2 , "
+        "( __global double* out, __global double* in, __global double* in2, "
        "__global double* in3)\n"
        "{\n"
        "   size_t i = get_global_id(0);\n"
@@ -162,7 +164,9 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
-        "       double3 d0, d1, d2;\n"
+        "       double3 d0;\n"
        "       double3 d1;\n"
        "       double3 d2;\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 1:\n"
@@ -235,7 +239,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
                             info->programs + i, info->relaxedMode);
 }
 // A table of more difficult cases to get right
 static const float specialValuesFloat[] = {
    -NAN,
@@ -315,7 +318,7 @@ static const float specialValuesFloat[] = {
    +0.0f
 };
-static size_t specialValuesFloatCount =
+static const size_t specialValuesFloatCount =
    sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
@@ -324,6 +327,9 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    uint64_t i;
    uint32_t j, k;
    int error;
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
    float maxError = 0.0f;
@@ -332,33 +338,34 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    float maxErrorVal2 = 0.0f;
    float maxErrorVal3 = 0.0f;
    size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
    uint64_t step = getTestStep(sizeof(float), bufferSize);
-    int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport;
+
    cl_uchar overflow[BUFFER_SIZE / sizeof(float)];
    float float_ulps;
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    if (gIsEmbedded)
        float_ulps = f->float_embedded_ulps;
    else
        float_ulps = f->float_ulps;
    int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport;
    // Init the kernels
-    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
+    {
-                                   f->nameInCode, relaxedMode };
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-    if ((error = ThreadPool_Do(BuildKernel_FloatFn,
+                                       f->nameInCode, relaxedMode };
-                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+        if ((error = ThreadPool_Do(BuildKernel_FloatFn,
-                               &build_info)))
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-        return error;
+                                   &build_info)))
            return error;
    }
    for (i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
-        uint32_t *p = (uint32_t *)gIn;
+        cl_uint *p = (cl_uint *)gIn;
-        uint32_t *p2 = (uint32_t *)gIn2;
+        cl_uint *p2 = (cl_uint *)gIn2;
-        uint32_t *p3 = (uint32_t *)gIn3;
+        cl_uint *p3 = (cl_uint *)gIn3;
        j = 0;
        if (i == 0)
        { // test edge cases
@@ -393,18 +400,21 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
            p2[j] = genrand_int32(d);
            p3[j] = genrand_int32(d);
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
                                          bufferSize, gIn2, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
                                          bufferSize, gIn3, 0, NULL, NULL)))
        {
@@ -493,7 +503,6 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                    (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
        }
        // Read the data back
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
@@ -963,9 +972,9 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    if (gMeasureTimes)
    {
        // Init input array
-        uint32_t *p = (uint32_t *)gIn;
+        cl_uint *p = (cl_uint *)gIn;
-        uint32_t *p2 = (uint32_t *)gIn2;
+        cl_uint *p2 = (cl_uint *)gIn2;
-        uint32_t *p3 = (uint32_t *)gIn3;
+        cl_uint *p3 = (cl_uint *)gIn3;
        for (j = 0; j < bufferSize / sizeof(float); j++)
        {
            p[j] = genrand_int32(d);
@@ -1160,21 +1169,21 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
    double maxErrorVal = 0.0f;
    double maxErrorVal2 = 0.0f;
    double maxErrorVal3 = 0.0f;
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
    uint64_t step = getTestStep(sizeof(double), bufferSize);
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    Force64BitFPUPrecision();
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                   f->nameInCode, relaxedMode };
    if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
                               &build_info)))
    {
-        return error;
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                       f->nameInCode, relaxedMode };
        if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                   &build_info)))
            return error;
    }
    for (i = 0; i < (1ULL << 32); i += step)
@@ -1213,18 +1222,21 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
            p2[j] = DoubleFromUInt32(genrand_int32(d));
            p3[j] = DoubleFromUInt32(genrand_int32(d));
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
                                          bufferSize, gIn2, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
            return error;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
                                          bufferSize, gIn3, 0, NULL, NULL)))
        {
@@ -1287,7 +1299,6 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
            }
        }
        // Get that moving
        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
--- a/test_conformance/math_brute_force/unary.cpp
+++ b/test_conformance/math_brute_force/unary.cpp
@@ -37,13 +37,14 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        sizeNames[vectorSize],
                        "* out, __global float",
                        sizeNames[vectorSize],
-                        "* in)\n"
+                        "* in )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
                        name,
                        "( in[i] );\n"
                        "}\n" };
    const char *c3[] = {
        "__kernel void math_kernel",
        sizeNames[vectorSize],
@@ -89,7 +90,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
        "}\n"
    };
    const char **kern = c;
    size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -118,7 +118,7 @@ static int BuildKernelDouble(const char *name, int vectorSize,
                        sizeNames[vectorSize],
                        "* out, __global double",
                        sizeNames[vectorSize],
-                        "* in)\n"
+                        "* in )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
@@ -181,7 +181,6 @@ static int BuildKernelDouble(const char *name, int vectorSize,
        kernSize = sizeof(c3) / sizeof(c3[0]);
    }
    char testName[32];
    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
@@ -249,7 +248,7 @@ typedef struct TestInfo
    int isRangeLimited; // 1 if the function is only to be evaluated over a
                        // range
    float half_sin_cos_tan_limit;
-    bool relaxedMode; // True if test is to be run in relaxed mode, false
+    bool relaxedMode; // True if test is running in relaxed mode, false
                      // otherwise.
 } TestInfo;
@@ -269,10 +268,10 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    // Init test_info
    memset(&test_info, 0, sizeof(test_info));
    test_info.threadCount = GetThreadCount();
    test_info.subBufferSize = BUFFER_SIZE
        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale = getTestScale(sizeof(cl_float));
    if (gWimpyMode)
    {
        test_info.subBufferSize = gWimpyBufferSize
@@ -345,8 +344,8 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                &region, &error);
            if (error || NULL == test_info.tinfo[i].outBuf[j])
            {
-                vlog_error("Error: Unable to create sub-buffer of gInBuffer "
+                vlog_error("Error: Unable to create sub-buffer of "
-                           "for region {%zd, %zd}\n",
+                           "gInBuffer for region {%zd, %zd}\n",
                           region.origin, region.size);
                goto exit;
            }
@@ -390,6 +389,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
    }
    // Run the kernels
    if (!gSkipCorrectnessTesting || skipTestingRelaxed)
    {
        error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info);
@@ -443,8 +443,9 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        // Run the kernels
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
            if ((error = clSetKernelArg(test_info.k[j][0], 0,
                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
            {
@@ -479,9 +480,9 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                }
                uint64_t endTime = GetTime();
-                double current_time = SubtractTime(endTime, startTime);
+                double time = SubtractTime(endTime, startTime);
-                sum += current_time;
+                sum += time;
-                if (current_time < bestTime) bestTime = current_time;
+                if (time < bestTime) bestTime = time;
            }
            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
@@ -497,6 +498,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    vlog("\n");
 exit:
    // Release
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
@@ -553,7 +555,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
    cl_uint *out[VECTOR_SIZE_COUNT];
    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
-        out[j] = (uint32_t *)clEnqueueMapBuffer(
+        out[j] = (cl_uint *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
            buffer_size, 0, NULL, e + j, &error);
        if (error || NULL == out[j])
@@ -627,7 +629,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
            return error;
        }
-        // run the kernel
+        // Run the kernel
        size_t vectorCount =
            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -655,7 +657,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }
    // Get that moving
    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
@@ -670,7 +671,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
    // an in order queue.
    for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
    {
-        out[j] = (uint32_t *)clEnqueueMapBuffer(
+        out[j] = (cl_uint *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
            buffer_size, 0, NULL, NULL, &error);
        if (error || NULL == out[j])
@@ -680,6 +681,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data)
            return error;
        }
    }
    // Wait for the last buffer
    out[j] = (uint32_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
                                            CL_TRUE, CL_MAP_READ, 0,
@@ -1246,12 +1248,9 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            /* Qualcomm fix: 9461 read-write flags must be compatible with
             * parent buffer */
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
                &region, &error);
            /* Qualcomm fix: end */
            if (error || NULL == test_info.tinfo[i].outBuf[j])
            {
                vlog_error("Error: Unable to create sub-buffer of gInBuffer "
@@ -1281,6 +1280,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
    }
    // Run the kernels
    if (!gSkipCorrectnessTesting)
    {
        error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info);
@@ -1334,8 +1334,9 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
        // Run the kernels
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
            if ((error = clSetKernelArg(test_info.k[j][0], 0,
                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
            {
@@ -1370,9 +1371,9 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
                }
                uint64_t endTime = GetTime();
-                double current_time = SubtractTime(endTime, startTime);
+                double time = SubtractTime(endTime, startTime);
-                sum += current_time;
+                sum += time;
-                if (current_time < bestTime) bestTime = current_time;
+                if (time < bestTime) bestTime = time;
            }
            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
@@ -1393,6 +1394,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
    vlog("\n");
 exit:
    // Release
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
--- a/test_conformance/math_brute_force/unary_two_results.cpp
+++ b/test_conformance/math_brute_force/unary_two_results.cpp
@@ -36,7 +36,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
                        sizeNames[vectorSize],
                        "* out2, __global float",
                        sizeNames[vectorSize],
-                        "* in)\n"
+                        "* in )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
@@ -93,6 +93,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
        "   }\n"
        "}\n"
    };
    const char **kern = c;
    size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -121,7 +122,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
                        sizeNames[vectorSize],
                        "* out2, __global double",
                        sizeNames[vectorSize],
-                        "* in)\n"
+                        "* in )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
@@ -179,6 +180,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
        "   }\n"
        "}\n"
    };
    const char **kern = c;
    size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -242,17 +244,19 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
    cl_uchar overflow[BUFFER_SIZE / sizeof(float)];
    int isFract = 0 == strcmp("fract", f->nameInCode);
    int skipNanInf = isFract && !gInfNanSupport;
    float float_ulps = getAllowedUlpError(f, relaxedMode);
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    float float_ulps = getAllowedUlpError(f, relaxedMode);
    // Init the kernels
-    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
+    {
-                                   f->nameInCode, relaxedMode };
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-    if ((error = ThreadPool_Do(BuildKernel_FloatFn,
+                                       f->nameInCode, relaxedMode };
-                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+        if ((error = ThreadPool_Do(BuildKernel_FloatFn,
-                               &build_info)))
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-        return error;
+                                   &build_info)))
            return error;
    }
    for (i = 0; i < (1ULL << 32); i += step)
    {
@@ -282,6 +286,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                }
            }
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
@@ -454,7 +459,6 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                    if (relaxedMode || skipNanInf)
                    {
                        if (skipNanInf && overflow[j]) continue;
                        // Note: no double rounding here.  Reference functions
                        // calculate in single precision.
                        if (IsFloatInfinity(correct) || IsFloatNaN(correct)
@@ -670,6 +674,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        uint32_t *p = (uint32_t *)gIn;
        for (j = 0; j < bufferSize / sizeof(float); j++)
            p[j] = genrand_int32(d);
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
@@ -706,7 +711,6 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
            double bestTime = INFINITY;
            for (k = 0; k < PERF_LOOP_COUNT; k++)
            {
                uint64_t startTime = GetTime();
                if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                                    &localCount, NULL, 0, NULL,
@@ -775,13 +779,13 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
    Force64BitFPUPrecision();
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                   f->nameInCode, relaxedMode };
    if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
                               &build_info)))
    {
-        return error;
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                       f->nameInCode, relaxedMode };
        if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                   &build_info)))
            return error;
    }
    for (i = 0; i < (1ULL << 32); i += step)
@@ -1103,7 +1107,6 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
            double bestTime = INFINITY;
            for (k = 0; k < PERF_LOOP_COUNT; k++)
            {
                uint64_t startTime = GetTime();
                if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                                    &localCount, NULL, 0, NULL,
--- a/test_conformance/math_brute_force/unary_two_results_i.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i.cpp
@@ -37,13 +37,14 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
                        sizeNames[vectorSize],
                        "* out2, __global float",
                        sizeNames[vectorSize],
-                        "* in)\n"
+                        "* in )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
                        name,
                        "( in[i], out2 + i );\n"
                        "}\n" };
    const char *c3[] = {
        "__kernel void math_kernel",
        sizeNames[vectorSize],
@@ -93,6 +94,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
        "   }\n"
        "}\n"
    };
    const char **kern = c;
    size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -121,13 +123,14 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
                        sizeNames[vectorSize],
                        "* out2, __global double",
                        sizeNames[vectorSize],
-                        "* in)\n"
+                        "* in )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
                        name,
                        "( in[i], out2 + i );\n"
                        "}\n" };
    const char *c3[] = {
        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
        "__kernel void math_kernel",
@@ -178,6 +181,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
        "   }\n"
        "}\n"
    };
    const char **kern = c;
    size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -240,13 +244,13 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
    float maxErrorVal = 0.0f;
    float maxErrorVal2 = 0.0f;
    size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
    float float_ulps;
    uint64_t step = getTestStep(sizeof(float), bufferSize);
    int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(float)) + 1);
    cl_ulong maxiError;
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    float float_ulps;
    if (gIsEmbedded)
        float_ulps = f->float_embedded_ulps;
    else
@@ -255,12 +259,14 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
    maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0;
    // Init the kernels
-    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
+    {
-                                   f->nameInCode, relaxedMode };
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-    if ((error = ThreadPool_Do(BuildKernel_FloatFn,
+                                       f->nameInCode, relaxedMode };
-                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+        if ((error = ThreadPool_Do(BuildKernel_FloatFn,
-                               &build_info)))
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-        return error;
+                                   &build_info)))
            return error;
    }
    for (i = 0; i < (1ULL << 32); i += step)
    {
@@ -600,22 +606,21 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
    double maxErrorVal2 = 0.0f;
    cl_ulong maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0;
    size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
-
+    uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
-    uint64_t step = getTestStep(sizeof(double), bufferSize);
+    int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(cl_double)) + 1);
    int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(double)) + 1);
    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
    Force64BitFPUPrecision();
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                   f->nameInCode, relaxedMode };
    if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
                               &build_info)))
    {
-        return error;
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                       f->nameInCode, relaxedMode };
        if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                   &build_info)))
            return error;
    }
    for (i = 0; i < (1ULL << 32); i += step)
@@ -624,12 +629,12 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
        double *p = (double *)gIn;
        if (gWimpyMode)
        {
-            for (j = 0; j < bufferSize / sizeof(double); j++)
+            for (j = 0; j < bufferSize / sizeof(cl_double); j++)
                p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
        }
        else
        {
-            for (j = 0; j < bufferSize / sizeof(double); j++)
+            for (j = 0; j < bufferSize / sizeof(cl_double); j++)
                p[j] = DoubleFromUInt32((uint32_t)i + j);
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
@@ -928,7 +933,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
            double clocksPerOp = bestTime * (double)gDeviceFrequency
                * gComputeDevices * gSimdSize * 1e6
                / (bufferSize / sizeof(double));
-            vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sd%s",
+            vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
                      f->name, sizeNames[j]);
        }
        for (; j < gMaxVectorSizeIndex; j++) vlog("\t     -- ");
--- a/test_conformance/math_brute_force/unary_u.cpp
+++ b/test_conformance/math_brute_force/unary_u.cpp
@@ -33,13 +33,14 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
                        sizeNames[vectorSize],
                        "* out, __global uint",
                        sizeNames[vectorSize],
-                        "* in)\n"
+                        "* in )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
                        name,
                        "( in[i] );\n"
                        "}\n" };
    const char *c3[] = {
        "__kernel void math_kernel",
        sizeNames[vectorSize],
@@ -112,7 +113,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
                        sizeNames[vectorSize],
                        "* out, __global ulong",
                        sizeNames[vectorSize],
-                        "* in)\n"
+                        "* in )\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
                        "   out[i] = ",
@@ -120,51 +121,53 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
                        "( in[i] );\n"
                        "}\n" };
-    const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+    const char *c3[] = {
-                         "__kernel void math_kernel",
+        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                         sizeNames[vectorSize],
+        "__kernel void math_kernel",
-                         "( __global double* out, __global ulong* in)\n"
+        sizeNames[vectorSize],
-                         "{\n"
+        "( __global double* out, __global ulong* in                 )\n"
-                         "   size_t i = get_global_id(0);\n"
+        "{\n"
-                         "   if( i + 1 < get_global_size(0) )\n"
+        "   size_t i = get_global_id(0);\n"
-                         "   {\n"
+        "   if( i + 1 < get_global_size(0) )\n"
-                         "       ulong3 u0 = vload3( 0, in + 3 * i );\n"
+        "   {\n"
-                         "       double3 f0 = ",
+        "       ulong3 u0 = vload3( 0, in + 3 * i );\n"
-                         name,
+        "       double3 f0 = ",
-                         "( u0 );\n"
+        name,
-                         "       vstore3( f0, 0, out + 3*i );\n"
+        "( u0 );\n"
-                         "   }\n"
+        "       vstore3( f0, 0, out + 3*i );\n"
-                         "   else\n"
+        "   }\n"
-                         "   {\n"
+        "   else\n"
-                         "       size_t parity = i & 1;   // Figure out how "
+        "   {\n"
-                         "many elements are left over after BUFFER_SIZE % "
+        "       size_t parity = i & 1;   // Figure out how many elements are "
-                         "(3*sizeof(float)). Assume power of two buffer size \n"
+        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-                         "       ulong3 u0;\n"
+        "buffer size \n"
-                         "       switch( parity )\n"
+        "       ulong3 u0;\n"
-                         "       {\n"
+        "       switch( parity )\n"
-                         "           case 1:\n"
+        "       {\n"
-                         "               u0 = (ulong3)( in[3*i], "
+        "           case 1:\n"
-                         "0xdeaddeaddeaddeadUL, 0xdeaddeaddeaddeadUL ); \n"
+        "               u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, "
-                         "               break;\n"
+        "0xdeaddeaddeaddeadUL ); \n"
-                         "           case 0:\n"
+        "               break;\n"
-                         "               u0 = (ulong3)( in[3*i], in[3*i+1], "
+        "           case 0:\n"
-                         "0xdeaddeaddeaddeadUL ); \n"
+        "               u0 = (ulong3)( in[3*i], in[3*i+1], "
-                         "               break;\n"
+        "0xdeaddeaddeaddeadUL ); \n"
-                         "       }\n"
+        "               break;\n"
-                         "       double3 f0 = ",
+        "       }\n"
-                         name,
+        "       double3 f0 = ",
-                         "( u0 );\n"
+        name,
-                         "       switch( parity )\n"
+        "( u0 );\n"
-                         "       {\n"
+        "       switch( parity )\n"
-                         "           case 0:\n"
+        "       {\n"
-                         "               out[3*i+1] = f0.y; \n"
+        "           case 0:\n"
-                         "               // fall through\n"
+        "               out[3*i+1] = f0.y; \n"
-                         "           case 1:\n"
+        "               // fall through\n"
-                         "               out[3*i] = f0.x; \n"
+        "           case 1:\n"
-                         "               break;\n"
+        "               out[3*i] = f0.x; \n"
-                         "       }\n"
+        "               break;\n"
-                         "   }\n"
+        "       }\n"
-                         "}\n" };
+        "   }\n"
        "}\n"
    };
    const char **kern = c;
    size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -175,7 +178,6 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
        kernSize = sizeof(c3) / sizeof(c3[0]);
    }
    char testName[32];
    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
@@ -221,27 +223,28 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
    float maxErrorVal = 0.0f;
    size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
    uint64_t step = getTestStep(sizeof(float), bufferSize);
    int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(double)) + 1);
    int isRangeLimited = 0;
    float float_ulps;
    float half_sin_cos_tan_limit = 0;
    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
    float float_ulps;
    if (gIsEmbedded)
        float_ulps = f->float_embedded_ulps;
    else
        float_ulps = f->float_ulps;
    // Init the kernels
-    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
+    {
-                                   f->nameInCode, relaxedMode };
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-    if ((error = ThreadPool_Do(BuildKernel_FloatFn,
+                                       f->nameInCode, relaxedMode };
-                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+        if ((error = ThreadPool_Do(BuildKernel_FloatFn,
-                               &build_info)))
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-        return error;
+                                   &build_info)))
            return error;
    }
    if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
    {
@@ -317,7 +320,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                            &localCount, NULL, 0, NULL, NULL)))
            {
-                vlog_error("FAILURE -- could not execute kernel\n");
+                vlog_error("FAILED -- could not execute kernel\n");
                goto exit;
            }
        }
@@ -419,7 +422,6 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
        }
    }
    if (!gSkipCorrectnessTesting)
    {
        if (gWimpyMode)
@@ -477,7 +479,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
                                                    &localCount, NULL, 0, NULL,
                                                    NULL)))
                {
-                    vlog_error("FAILURE -- could not execute kernel\n");
+                    vlog_error("FAILED -- could not execute kernel\n");
                    goto exit;
                }
@@ -540,13 +542,13 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
    Force64BitFPUPrecision();
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                   f->nameInCode, relaxedMode };
    if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
                               &build_info)))
    {
-        return error;
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                       f->nameInCode, relaxedMode };
        if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                   &build_info)))
            return error;
    }
    for (i = 0; i < (1ULL << 32); i += step)
@@ -599,7 +601,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                            &localCount, NULL, 0, NULL, NULL)))
            {
-                vlog_error("FAILURE -- could not execute kernel\n");
+                vlog_error("FAILED -- could not execute kernel\n");
                goto exit;
            }
        }
@@ -627,7 +629,6 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
        if (gSkipCorrectnessTesting) break;
        // Verify data
        uint64_t *t = (uint64_t *)gOut_Ref;
        for (j = 0; j < bufferSize / sizeof(cl_double); j++)
@@ -741,7 +742,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
                                                    &localCount, NULL, 0, NULL,
                                                    NULL)))
                {
-                    vlog_error("FAILURE -- could not execute kernel\n");
+                    vlog_error("FAILED -- could not execute kernel\n");
                    goto exit;
                }