Apply clang-format on math_brute_force (#1104)

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
2026-03-26 08:49:02 +00:00 · 2021-01-14 13:27:18 +00:00
parent ffa75c37ce
commit e5f89249fa
22 changed files with 14745 additions and 10770 deletions
--- a/test_conformance/math_brute_force/FunctionList.cpp
+++ b/test_conformance/math_brute_force/FunctionList.cpp
@@ -105,11 +105,14 @@
 extern const vtbl _unary; // float foo( float )
 extern const vtbl _unary_u; // float foo( uint ),  double foo( ulong )
 extern const vtbl _i_unary; // int foo( float )
-extern const vtbl _macro_unary;         // int foo( float ),  returns {0,1} for scalar, { 0, -1 } for vector
+extern const vtbl _macro_unary; // int foo( float ),  returns {0,1} for scalar,
                                // { 0, -1 } for vector
 extern const vtbl _binary; // float foo( float, float )
-extern const vtbl _binary_nextafter;    // float foo( float, float ), special handling for nextafter
+extern const vtbl _binary_nextafter; // float foo( float, float ), special
                                     // handling for nextafter
 extern const vtbl _binary_operator; // float .op. float
-extern const vtbl _macro_binary;        // int foo( float, float ), returns {0,1} for scalar, { 0, -1 } for vector
+extern const vtbl _macro_binary; // int foo( float, float ), returns {0,1} for
                                 // scalar, { 0, -1 } for vector
 extern const vtbl _binary_i; // float foo( float, int )
 extern const vtbl _ternary; // float foo( float, float, float )
 extern const vtbl _unary_two_results; // float foo( float, float * )
--- a/test_conformance/math_brute_force/FunctionList.h
+++ b/test_conformance/math_brute_force/FunctionList.h
@@ -30,8 +30,7 @@
 #include "harness/mt19937.h"
-typedef union fptr
+typedef union fptr {
 {
    void *p;
    double (*f_f)(double);
    double (*f_u)(cl_uint);
@@ -48,8 +47,7 @@ typedef union fptr
    float (*f_fma)(float, float, float, int);
 } fptr;
-typedef union dptr
+typedef union dptr {
 {
    void *p;
    long double (*f_f)(long double);
    long double (*f_u)(cl_ulong);
@@ -77,7 +75,8 @@ typedef struct vtbl
 typedef struct Func
 {
    const char *name; // common name, to be used as an argument in the shell
-  const char      *nameInCode;        // name as it appears in the __kernel, usually the same as name, but different for multiplication
+    const char *nameInCode; // name as it appears in the __kernel, usually the
                            // same as name, but different for multiplication
    fptr func;
    dptr dfunc;
    fptr rfunc;
@@ -97,5 +96,3 @@ extern const Func  functionList[];
 extern const size_t functionListCount;
 #endif
--- a/test_conformance/math_brute_force/Sleep.cpp
+++ b/test_conformance/math_brute_force/Sleep.cpp
@@ -27,22 +27,19 @@
    io_object_t iterator;
 } sleepInfo;
-    void sleepCallback(    void *            refcon,
+void sleepCallback(void* refcon, io_service_t service, natural_t messageType,
                        io_service_t        service,
                        natural_t        messageType,
                   void* messageArgument);
-    void sleepCallback(    void *            refcon UNUSED,
+void sleepCallback(void* refcon UNUSED, io_service_t service UNUSED,
-                        io_service_t        service UNUSED,
+                   natural_t messageType, void* messageArgument)
                        natural_t        messageType,
                        void *            messageArgument )
 {
    IOReturn result;
    /*
    service -- The IOService whose state has changed.
-    messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the IOService's family.
+    messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the
-    messageArgument -- An argument for the message, dependent on the messageType.
+    IOService's family. messageArgument -- An argument for the message,
    dependent on the messageType.
    */
    switch (messageType)
    {
@@ -59,7 +56,8 @@
            // this notification. Important: if you don’t respond, there will
            // be a 30-second timeout before the computer sleeps.
            // IOCancelPowerChange(root_port,(long)messageArgument);
-                result = IOCancelPowerChange(sleepInfo.connection,(long)messageArgument);
+            result = IOCancelPowerChange(sleepInfo.connection,
                                         (long)messageArgument);
            if (kIOReturnSuccess != result)
                vlog("sleep prevention failed. (%d)\n", result);
            break;
@@ -71,17 +69,13 @@
 #endif
 void PreventSleep(void)
 {
 #if defined(__APPLE__)
    vlog("Disabling sleep... ");
    sleepInfo.iterator = (io_object_t)0;
    sleepInfo.port = NULL;
-    sleepInfo.connection = IORegisterForSystemPower
+    sleepInfo.connection = IORegisterForSystemPower(
                            (
        &sleepInfo, // void * refcon,
        &sleepInfo.port, // IONotificationPortRef * thePortRef,
        sleepCallback, // IOServiceInterestCallback callback,
@@ -113,6 +107,3 @@ void ResumeSleep( void )
    vlog("*** ResumeSleep() is not implemented on this platform.\n");
 #endif
 }
--- a/test_conformance/math_brute_force/Sleep.h
+++ b/test_conformance/math_brute_force/Sleep.h
@@ -20,5 +20,3 @@ void PreventSleep( void );
 void ResumeSleep(void);
 #endif /* SLEEP_H */
--- a/test_conformance/math_brute_force/Utility.cpp
+++ b/test_conformance/math_brute_force/Utility.cpp
@@ -17,9 +17,9 @@
 #include "FunctionList.h"
 #if defined(__PPC__)
-// Global varaiable used to hold the FPU control register state. The FPSCR register can not
+// Global varaiable used to hold the FPU control register state. The FPSCR
-// be used because not all Power implementations retain or observed the NI (non-IEEE
+// register can not be used because not all Power implementations retain or
-// mode) bit.
+// observed the NI (non-IEEE mode) bit.
 __thread fpu_control_t fpu_control = 0;
 #endif
@@ -47,11 +47,13 @@ void AddD(double *rhi, double *rlo, double a, double b)
 {
    double zhi, zlo;
    zhi = a + b;
-    if(fabs(a) > fabs(b)) {
+    if (fabs(a) > fabs(b))
    {
        zlo = zhi - a;
        zlo = b - zlo;
    }
-    else {
+    else
    {
        zlo = zhi - b;
        zlo = a - zlo;
    }
@@ -86,7 +88,8 @@ void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
 {
    double r, s;
    r = xh + yh;
-    s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl) : (yh - r + xh + xl + yl);
+    s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl)
                              : (yh - r + xh + xl + yl);
    *rhi = r + s;
    *rlo = (r - (*rhi)) + s;
 }
@@ -100,69 +103,58 @@ void DivideDD(double *chi, double *clo, double a, double b)
    *clo = rhi / b;
 }
-// These functions comapre two floats/doubles. Since some platforms may choose to
+// These functions comapre two floats/doubles. Since some platforms may choose
-// flush denormals to zeros before comparison, comparison like a < b may give wrong
+// to flush denormals to zeros before comparison, comparison like a < b may give
-// result in "certain cases" where we do need correct compasion result when operands
+// wrong result in "certain cases" where we do need correct compasion result
-// are denormals .... these functions comapre floats/doubles using signed integer/long int
+// when operands are denormals .... these functions comapre floats/doubles using
-// rep. In other cases, when flushing to zeros is fine, these should not be used.
+// signed integer/long int rep. In other cases, when flushing to zeros is fine,
-// Also these doesn't check for nans and assume nans are handled separately as special edge case
+// these should not be used. Also these doesn't check for nans and assume nans
-// by the caller which calls these functions
+// are handled separately as special edge case by the caller which calls these
-// return 0 if both are equal, 1 if x > y and -1 if x < y.
+// functions return 0 if both are equal, 1 if x > y and -1 if x < y.
-inline
+inline int compareFloats(float x, float y)
 int compareFloats(float x, float y)
 {
    int32f_t a, b;
    a.f = x;
    b.f = y;
-    if( a.i & 0x80000000 )
+    if (a.i & 0x80000000) a.i = 0x80000000 - a.i;
-        a.i = 0x80000000 - a.i;
+    if (b.i & 0x80000000) b.i = 0x80000000 - b.i;
    if( b.i & 0x80000000 )
        b.i = 0x80000000 - b.i;
-    if( a.i == b.i )
+    if (a.i == b.i) return 0;
        return 0;
    return a.i < b.i ? -1 : 1;
 }
-inline
+inline int compareDoubles(double x, double y)
 int compareDoubles(double x, double y)
 {
    int64d_t a, b;
    a.d = x;
    b.d = y;
-    if( a.l & 0x8000000000000000LL )
+    if (a.l & 0x8000000000000000LL) a.l = 0x8000000000000000LL - a.l;
-        a.l = 0x8000000000000000LL - a.l;
+    if (b.l & 0x8000000000000000LL) b.l = 0x8000000000000000LL - b.l;
    if( b.l & 0x8000000000000000LL )
        b.l = 0x8000000000000000LL - b.l;
-    if( a.l == b.l )
+    if (a.l == b.l) return 0;
        return 0;
    return a.l < b.l ? -1 : 1;
 }
-void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed)
+void logFunctionInfo(const char *fname, unsigned int float_size,
                     unsigned int isFastRelaxed)
 {
    char const *fpSizeStr = NULL;
    char const *fpFastRelaxedStr = "";
-    switch (float_size) {
+    switch (float_size)
-    case sizeof(cl_double):
+    {
-        fpSizeStr = "fp64";
+        case sizeof(cl_double): fpSizeStr = "fp64"; break;
-        break;
+        case sizeof(cl_float): fpSizeStr = "fp32"; break;
-    case sizeof(cl_float):
+        case sizeof(cl_half): fpSizeStr = "fp16"; break;
        fpSizeStr = "fp32";
        break;
    case sizeof(cl_half):
        fpSizeStr = "fp16";
        break;
    }
-    if (isFastRelaxed) {
+    if (isFastRelaxed)
    {
        fpFastRelaxedStr = "rlx";
    }
    vlog("%15s %4s %4s", fname, fpSizeStr, fpFastRelaxedStr);
--- a/test_conformance/math_brute_force/Utility.h
+++ b/test_conformance/math_brute_force/Utility.h
@@ -110,13 +110,17 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
 static inline double DoubleFromUInt32(uint32_t bits);
 static inline double DoubleFromUInt32(uint32_t bits)
 {
-    union{ uint64_t u; double d;} u;
+    union {
        uint64_t u;
        double d;
    } u;
    // split 0x89abcdef to 0x89abc00000000def
    u.u = bits & 0xfffU;
    u.u |= (uint64_t)(bits & ~0xfffU) << 32;
-    // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
+    // sign extend the leading bit of def segment as sign bit so that the middle
    // region consists of either all 1s or 0s
    u.u -= (bits & 0x800U) << 1;
    // return result
@@ -128,8 +132,10 @@ void _LogBuildError( cl_program p, int line, const char *file );
 #define PERF_LOOP_COUNT 100
-//The spec is fairly clear that we may enforce a hard cutoff to prevent premature flushing to zero.
+// The spec is fairly clear that we may enforce a hard cutoff to prevent
-// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + ulp_limit to be flushed to zero.
+// premature flushing to zero.
 // However, to avoid conflict for 1.0, we are letting results at TYPE_MIN +
 // ulp_limit to be flushed to zero.
 static inline int IsFloatResultSubnormal(double x, float ulps)
 {
    x = fabs(x) - MAKE_HEX_DOUBLE(0x1.0p-149, 0x1, -149) * (double)ulps;
@@ -150,21 +156,30 @@ static inline int IsDoubleResultSubnormal( long double x, float ulps )
 static inline int IsFloatInfinity(double x)
 {
-  union { cl_float d; cl_uint u; } u;
+    union {
        cl_float d;
        cl_uint u;
    } u;
    u.d = (cl_float)x;
    return ((u.u & 0x7fffffffU) == 0x7F800000U);
 }
 static inline int IsFloatMaxFloat(double x)
 {
-  union { cl_float d; cl_uint u; } u;
+    union {
        cl_float d;
        cl_uint u;
    } u;
    u.d = (cl_float)x;
    return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
 }
 static inline int IsFloatNaN(double x)
 {
-  union { cl_float d; cl_uint u; } u;
+    union {
        cl_float d;
        cl_uint u;
    } u;
    u.d = (cl_float)x;
    return ((u.u & 0x7fffffffU) > 0x7F800000U);
 }
@@ -190,42 +205,46 @@ static inline void Force64BitFPUPrecision(void)
    new_cw = orig_cw | 0x0300; // set precision to 64-bit
    __asm__ __volatile__("fldcw  %0" ::"m"(new_cw));
 #elif defined(_WIN32) && defined(__INTEL_COMPILER)
-    // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not* work on win.x64:
+    // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not*
-    // > On the x64 architecture, changing the floating point precision is not supported.
+    // work on win.x64: > On the x64 architecture, changing the floating point
-    // (Taken from http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx)
+    // precision is not supported. (Taken from
    // http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx)
    int cw;
-    __asm { fnstcw cw };    // Get current value of FPU control word.
+    __asm { fnstcw cw }
-    cw = cw & 0xfffffcff | ( 3 << 8 ); // Set Precision Control to Double Extended Precision.
+    ; // Get current value of FPU control word.
-    __asm { fldcw cw };     // Set new value of FPU control word.
+    cw = cw & 0xfffffcff
        | (3 << 8); // Set Precision Control to Double Extended Precision.
    __asm { fldcw cw }
    ; // Set new value of FPU control word.
 #else
    /* Implement for other platforms if needed */
 #endif
 }
-extern
+extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
 void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
-typedef union
+typedef union {
 {
    int32_t i;
    float f;
 } int32f_t;
-typedef union
+typedef union {
 {
    int64_t l;
    double d;
 } int64d_t;
 void MulD(double *rhi, double *rlo, double u, double v);
 void AddD(double *rhi, double *rlo, double a, double b);
-void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
+void MulDD(double *rhi, double *rlo, double xh, double xl, double yh,
-void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
+           double yl);
 void AddDD(double *rhi, double *rlo, double xh, double xl, double yh,
           double yl);
 void DivideDD(double *chi, double *clo, double a, double b);
 int compareFloats(float x, float y);
 int compareDoubles(double x, double y);
-void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed);
+void logFunctionInfo(const char *fname, unsigned int float_size,
                     unsigned int isFastRelaxed);
 float getAllowedUlpError(const Func *f, const bool relaxed);
--- a/test_conformance/math_brute_force/binary.cpp
+++ b/test_conformance/math_brute_force/binary.cpp
--- a/test_conformance/math_brute_force/binaryOperator.cpp
+++ b/test_conformance/math_brute_force/binaryOperator.cpp
--- a/test_conformance/math_brute_force/binary_i.cpp
+++ b/test_conformance/math_brute_force/binary_i.cpp
--- a/test_conformance/math_brute_force/binary_two_results_i.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i.cpp
--- a/test_conformance/math_brute_force/i_unary.cpp
+++ b/test_conformance/math_brute_force/i_unary.cpp
@@ -33,24 +33,38 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
 static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
                       cl_program *p, bool relaxedMode)
 {
-    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n"
+    const char *c[] = { "__kernel void math_kernel",
                        sizeNames[vectorSize],
                        "( __global int",
                        sizeNames[vectorSize],
                        "* out, __global float",
                        sizeNames[vectorSize],
                        "* in)\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
-                            "   out[i] = ", name, "( in[i] );\n"
+                        "   out[i] = ",
-                            "}\n"
+                        name,
-                        };
+                        "( in[i] );\n"
-    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in)\n"
+                        "}\n" };
    const char *c3[] = {
        "__kernel void math_kernel",
        sizeNames[vectorSize],
        "( __global int* out, __global float* in)\n"
        "{\n"
        "   size_t i = get_global_id(0);\n"
        "   if( i + 1 < get_global_size(0) )\n"
        "   {\n"
        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-                            "       int3 i0 = ", name, "( f0 );\n"
+        "       int3 i0 = ",
        name,
        "( f0 );\n"
        "       vstore3( i0, 0, out + 3*i );\n"
        "   }\n"
        "   else\n"
        "   {\n"
-                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
        "       float3 f0;\n"
        "       switch( parity )\n"
        "       {\n"
@@ -61,7 +75,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
        "               break;\n"
        "       }\n"
-                            "       int3 i0 = ", name, "( f0 );\n"
+        "       int3 i0 = ",
        name,
        "( f0 );\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 0:\n"
@@ -86,7 +102,8 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
    }
    char testName[32];
-    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
@@ -95,26 +112,40 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
                             cl_program *p, bool relaxedMode)
 {
    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n"
+                        "__kernel void math_kernel",
                        sizeNames[vectorSize],
                        "( __global int",
                        sizeNames[vectorSize],
                        "* out, __global double",
                        sizeNames[vectorSize],
                        "* in)\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
-                            "   out[i] = ", name, "( in[i] );\n"
+                        "   out[i] = ",
-                            "}\n"
+                        name,
-                        };
+                        "( in[i] );\n"
                        "}\n" };
-    const char *c3[] = {"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+    const char *c3[] = {
-                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global double* in)\n"
+        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
        "__kernel void math_kernel",
        sizeNames[vectorSize],
        "( __global int* out, __global double* in)\n"
        "{\n"
        "   size_t i = get_global_id(0);\n"
        "   if( i + 1 < get_global_size(0) )\n"
        "   {\n"
        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-                        "       int3 i0 = ", name, "( f0 );\n"
+        "       int3 i0 = ",
        name,
        "( f0 );\n"
        "       vstore3( i0, 0, out + 3*i );\n"
        "   }\n"
        "   else\n"
        "   {\n"
-                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
        "       double3 f0;\n"
        "       switch( parity )\n"
        "       {\n"
@@ -125,7 +156,9 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
        "               break;\n"
        "       }\n"
-                        "       int3 i0 = ", name, "( f0 );\n"
+        "       int3 i0 = ",
        name,
        "( f0 );\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 0:\n"
@@ -150,7 +183,8 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
    char testName[32];
-    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
@@ -164,8 +198,10 @@ typedef struct BuildKernelInfo
    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 } BuildKernelInfo;
-static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
-static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+                                  void *p);
 static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                  void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
@@ -173,8 +209,10 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi
                       info->programs + i, info->relaxedMode);
 }
-static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
-static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+                                   void *p);
 static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                   void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
@@ -206,12 +244,14 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                   f->nameInCode, relaxedMode };
-    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+    if ((error = ThreadPool_Do(BuildKernel_FloatFn,
                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
                               &build_info)))
        return error;
    /*
        for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
-        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i,
-            return error;
+       programs + i) ) ) return error;
    */
    for (i = 0; i < (1ULL << 32); i += step)
@@ -228,7 +268,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
            for (j = 0; j < bufferSize / sizeof(float); j++)
                p[j] = (uint32_t)i + j;
        }
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -239,9 +280,12 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, bufferSize);
-            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            if ((error =
                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
                                          bufferSize, gOut[j], 0, NULL, NULL)))
            {
-                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
                           error, j);
                goto exit;
            }
        }
@@ -251,10 +295,22 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
-            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                            &localCount, NULL, 0, NULL, NULL)))
            {
                vlog_error("FAILED -- could not execute kernel\n");
                goto exit;
@@ -262,8 +318,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        }
        // Get that moving
-        if( (error = clFlush(gQueue) ))
+        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
            vlog( "clFlush failed\n" );
        // Calculate the correctly rounded reference result
        int *r = (int *)gOut_Ref;
@@ -274,15 +329,16 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        // Read the data back
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
                                         bufferSize, gOut[j], 0, NULL, NULL)))
            {
                vlog_error("ReadArray failed %d\n", error);
                goto exit;
            }
        }
-        if( gSkipCorrectnessTesting )
+        if (gSkipCorrectnessTesting) break;
            break;
        // Verify data
        uint32_t *t = (uint32_t *)gOut_Ref;
@@ -298,14 +354,15 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                    {
                        unsigned int correct0 = f->func.i_f(0.0);
                        unsigned int correct1 = f->func.i_f(-0.0);
-                        if( q[j] == correct0 || q[j] == correct1 )
+                        if (q[j] == correct0 || q[j] == correct1) continue;
                            continue;
                    }
                    uint32_t err = t[j] - q[j];
-                    if( q[j] > t[j] )
+                    if (q[j] > t[j]) err = q[j] - t[j];
-                        err = q[j] - t[j];
+                    vlog_error("\nERROR: %s%s: %d ulp error at %a (0x%8.8x): "
-                    vlog_error( "\nERROR: %s%s: %d ulp error at %a (0x%8.8x): *%d vs. %d\n", f->name, sizeNames[k], err, ((float*) gIn)[j], ((cl_uint*) gIn)[j], t[j], q[j] );
+                               "*%d vs. %d\n",
                               f->name, sizeNames[k], err, ((float *)gIn)[j],
                               ((cl_uint *)gIn)[j], t[j], q[j]);
                    error = -1;
                    goto exit;
                }
@@ -316,8 +373,10 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            if (gVerboseBruteForce)
            {
-               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-           } else
+                     bufferSize);
            }
            else
            {
                vlog(".");
            }
@@ -339,7 +398,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        uint32_t *p = (uint32_t *)gIn;
        for (j = 0; j < bufferSize / sizeof(float); j++)
            p[j] = genrand_int32(d);
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -351,15 +411,27 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            double sum = 0.0;
            double bestTime = INFINITY;
            for (k = 0; k < PERF_LOOP_COUNT; k++)
            {
                uint64_t startTime = GetTime();
-                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                                    &localCount, NULL, 0, NULL,
                                                    NULL)))
                {
                    vlog_error("FAILED -- could not execute kernel\n");
                    goto exit;
@@ -375,14 +447,15 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                uint64_t endTime = GetTime();
                double time = SubtractTime(endTime, startTime);
                sum += time;
-                if( time < bestTime )
+                if (time < bestTime) bestTime = time;
                    bestTime = time;
            }
-            if( gReportAverageTimes )
+            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
-                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double)gDeviceFrequency
-            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+                * gComputeDevices * gSimdSize * 1e6
-            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+                / (bufferSize / sizeof(float));
            vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
                      f->name, sizeNames[j]);
        }
    }
@@ -431,8 +504,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
    }
    /*
        for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
-        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels +
-            return error;
+       i, programs + i) ) ) return error;
    */
    for (i = 0; i < (1ULL << 32); i += step)
@@ -449,7 +522,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
            for (j = 0; j < bufferSize / sizeof(cl_double); j++)
                p[j] = DoubleFromUInt32((uint32_t)i + j);
        }
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -460,9 +534,12 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, bufferSize);
-            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            if ((error =
                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
                                          bufferSize, gOut[j], 0, NULL, NULL)))
            {
-                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
                           error, j);
                goto exit;
            }
        }
@@ -472,10 +549,22 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
-            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                            &localCount, NULL, 0, NULL, NULL)))
            {
                vlog_error("FAILED -- could not execute kernel\n");
                goto exit;
@@ -483,8 +572,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        }
        // Get that moving
-        if( (error = clFlush(gQueue) ))
+        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
            vlog( "clFlush failed\n" );
        // Calculate the correctly rounded reference result
        int *r = (int *)gOut_Ref;
@@ -495,15 +583,16 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        // Read the data back
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
                                         bufferSize, gOut[j], 0, NULL, NULL)))
            {
                vlog_error("ReadArray failed %d\n", error);
                goto exit;
            }
        }
-        if( gSkipCorrectnessTesting )
+        if (gSkipCorrectnessTesting) break;
            break;
        // Verify data
        uint32_t *t = (uint32_t *)gOut_Ref;
@@ -519,14 +608,15 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                    {
                        unsigned int correct0 = f->dfunc.i_f(0.0);
                        unsigned int correct1 = f->dfunc.i_f(-0.0);
-                        if( q[j] == correct0 || q[j] == correct1 )
+                        if (q[j] == correct0 || q[j] == correct1) continue;
                            continue;
                    }
                    uint32_t err = t[j] - q[j];
-                    if( q[j] > t[j] )
+                    if (q[j] > t[j]) err = q[j] - t[j];
-                        err = q[j] - t[j];
+                    vlog_error(
-                    vlog_error( "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n", f->name, sizeNames[k], err, ((double*) gIn)[j], t[j], q[j] );
+                        "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n",
                        f->name, sizeNames[k], err, ((double *)gIn)[j], t[j],
                        q[j]);
                    error = -1;
                    goto exit;
                }
@@ -537,13 +627,14 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            if (gVerboseBruteForce)
            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-            } else
+                     bufferSize);
            }
            else
            {
                vlog(".");
            }
            fflush(stdout);
        }
    }
@@ -561,7 +652,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        double *p = (double *)gIn;
        for (j = 0; j < bufferSize / sizeof(cl_double); j++)
            p[j] = DoubleFromUInt32(genrand_int32(d));
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -573,15 +665,27 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            double sum = 0.0;
            double bestTime = INFINITY;
            for (k = 0; k < PERF_LOOP_COUNT; k++)
            {
                uint64_t startTime = GetTime();
-                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                                    &localCount, NULL, 0, NULL,
                                                    NULL)))
                {
                    vlog_error("FAILED -- could not execute kernel\n");
                    goto exit;
@@ -597,17 +701,17 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                uint64_t endTime = GetTime();
                double time = SubtractTime(endTime, startTime);
                sum += time;
-                if( time < bestTime )
+                if (time < bestTime) bestTime = time;
                    bestTime = time;
            }
-            if( gReportAverageTimes )
+            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
-                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double)gDeviceFrequency
-            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+                * gComputeDevices * gSimdSize * 1e6
-            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+                / (bufferSize / sizeof(double));
            vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
                      f->name, sizeNames[j]);
        }
-        for( ; j < gMaxVectorSizeIndex; j++ )
+        for (; j < gMaxVectorSizeIndex; j++) vlog("\t     -- ");
            vlog( "\t     -- " );
    }
    vlog("\n");
@@ -624,4 +728,3 @@ exit:
    return error;
 }
--- a/test_conformance/math_brute_force/macro_binary.cpp
+++ b/test_conformance/math_brute_force/macro_binary.cpp
--- a/test_conformance/math_brute_force/macro_unary.cpp
+++ b/test_conformance/math_brute_force/macro_unary.cpp
@@ -33,24 +33,38 @@ static int BuildKernelDouble(const char *name, int vectorSize,
 static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                       cl_kernel *k, cl_program *p, bool relaxedMode)
 {
-    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n"
+    const char *c[] = { "__kernel void math_kernel",
                        sizeNames[vectorSize],
                        "( __global int",
                        sizeNames[vectorSize],
                        "* out, __global float",
                        sizeNames[vectorSize],
                        "* in)\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
-                            "   out[i] = ", name, "( in[i] );\n"
+                        "   out[i] = ",
-                            "}\n"
+                        name,
-                        };
+                        "( in[i] );\n"
-    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in)\n"
+                        "}\n" };
    const char *c3[] = {
        "__kernel void math_kernel",
        sizeNames[vectorSize],
        "( __global int* out, __global float* in)\n"
        "{\n"
        "   size_t i = get_global_id(0);\n"
        "   if( i + 1 < get_global_size(0) )\n"
        "   {\n"
        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-                            "       int3 i0 = ", name, "( f0 );\n"
+        "       int3 i0 = ",
        name,
        "( f0 );\n"
        "       vstore3( i0, 0, out + 3*i );\n"
        "   }\n"
        "   else\n"
        "   {\n"
-                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
        "       int3 i0;\n"
        "       float3 f0;\n"
        "       switch( parity )\n"
@@ -62,7 +76,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
        "               f0 = (float3)( in[3*i], in[3*i+1], 0xdead ); \n"
        "               break;\n"
        "       }\n"
-                            "       i0 = ", name, "( f0 );\n"
+        "       i0 = ",
        name,
        "( f0 );\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 0:\n"
@@ -86,7 +102,8 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
    }
    char testName[32];
-    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
                       relaxedMode);
@@ -97,26 +114,40 @@ static int BuildKernelDouble(const char *name, int vectorSize,
                             bool relaxedMode)
 {
    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n"
+                        "__kernel void math_kernel",
                        sizeNames[vectorSize],
                        "( __global long",
                        sizeNames[vectorSize],
                        "* out, __global double",
                        sizeNames[vectorSize],
                        "* in)\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
-                            "   out[i] = ", name, "( in[i] );\n"
+                        "   out[i] = ",
-                            "}\n"
+                        name,
-                        };
+                        "( in[i] );\n"
                        "}\n" };
-    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+    const char *c3[] = {
-                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long* out, __global double* in)\n"
+        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
        "__kernel void math_kernel",
        sizeNames[vectorSize],
        "( __global long* out, __global double* in)\n"
        "{\n"
        "   size_t i = get_global_id(0);\n"
        "   if( i + 1 < get_global_size(0) )\n"
        "   {\n"
        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-                        "       long3 l0 = ", name, "( d0 );\n"
+        "       long3 l0 = ",
        name,
        "( d0 );\n"
        "       vstore3( l0, 0, out + 3*i );\n"
        "   }\n"
        "   else\n"
        "   {\n"
-                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
        "       double3 d0;\n"
        "       switch( parity )\n"
        "       {\n"
@@ -127,7 +158,9 @@ static int BuildKernelDouble(const char *name, int vectorSize,
        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
        "               break;\n"
        "       }\n"
-                        "       long3 l0 = ", name, "( d0 );\n"
+        "       long3 l0 = ",
        name,
        "( d0 );\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 0:\n"
@@ -152,7 +185,8 @@ static int BuildKernelDouble(const char *name, int vectorSize,
    char testName[32];
-    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
                       relaxedMode);
@@ -168,8 +202,10 @@ typedef struct BuildKernelInfo
    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 } BuildKernelInfo;
-static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
-static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+                                  void *p);
 static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                  void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
@@ -177,8 +213,10 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi
                       info->kernels[i], info->programs + i, info->relaxedMode);
 }
-static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
-static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+                                   void *p);
 static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                   void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
@@ -200,8 +238,11 @@ typedef struct TestInfo
    size_t subBufferSize; // Size of the sub-buffer in elements
    const Func *f; // A pointer to the function info
    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    cl_kernel
-    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
                               // worker thread:  k[vector_size][thread_id]
    ThreadInfo *
        tinfo; // An array of thread specific information for each worker thread
    cl_uint threadCount; // Number of worker threads
    cl_uint jobCount; // Number of jobs
    cl_uint step; // step between each chunk and the next.
@@ -223,11 +264,14 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
    // Init test_info
    memset(&test_info, 0, sizeof(test_info));
    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.subBufferSize = BUFFER_SIZE
        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale = getTestScale(sizeof(cl_float));
    if (gWimpyMode)
    {
-        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.subBufferSize = gWimpyBufferSize
            / (sizeof(cl_float)
               * RoundUpToNextPowerOfTwo(test_info.threadCount));
    }
    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
@@ -242,8 +286,10 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
    }
    test_info.f = f;
-    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    test_info.ftz =
-    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
@@ -256,34 +302,49 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        }
        memset(test_info.k[i], 0, array_size);
    }
-    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    test_info.tinfo =
        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
    if (NULL == test_info.tinfo)
    {
-        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        vlog_error(
            "Error: Unable to allocate storage for thread specific data.\n");
        error = CL_OUT_OF_HOST_MEMORY;
        goto exit;
    }
-    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
    for (i = 0; i < test_info.threadCount; i++)
    {
-        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        cl_buffer_region region = {
-        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            i * test_info.subBufferSize * sizeof(cl_float),
            test_info.subBufferSize * sizeof(cl_float)
        };
        test_info.tinfo[i].inBuf =
            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
        if (error || NULL == test_info.tinfo[i].inBuf)
        {
-            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                       "region {%zd, %zd}\n",
                       region.origin, region.size);
            goto exit;
        }
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
                &region, &error);
            if (error || NULL == test_info.tinfo[i].outBuf[j])
            {
-                vlog_error( "Error: Unable to create sub-buffer of gOutBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
                           "for region {%zd, %zd}\n",
                           region.origin, region.size);
                goto exit;
            }
        }
-        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        test_info.tinfo[i].tQueue =
            clCreateCommandQueue(gContext, gDevice, 0, &error);
        if (NULL == test_info.tinfo[i].tQueue || error)
        {
            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
@@ -297,7 +358,9 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
            test_info.programs,  f->nameInCode,         relaxedMode
        };
-        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        if ((error = ThreadPool_Do(BuildKernel_FloatFn,
                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                   &build_info)))
            goto exit;
    }
@@ -305,8 +368,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
    {
        error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info);
-        if( error )
+        if (error) goto exit;
            goto exit;
        if (gWimpyMode)
            vlog("Wimp pass");
@@ -320,7 +382,8 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        cl_uint *p = (cl_uint *)gIn;
        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
            p[j] = genrand_int32(d);
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -331,16 +394,29 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if ((error = clSetKernelArg(test_info.k[j][0], 0,
                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
            {
                LogBuildError(test_info.programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(test_info.programs[j]);
                goto exit;
            }
            double sum = 0.0;
            double bestTime = INFINITY;
            for (i = 0; i < PERF_LOOP_COUNT; i++)
            {
                uint64_t startTime = GetTime();
-                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
                                                    1, NULL, &localCount, NULL,
                                                    0, NULL, NULL)))
                {
                    vlog_error("FAILED -- could not execute kernel\n");
                    goto exit;
@@ -356,14 +432,15 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                uint64_t endTime = GetTime();
                double time = SubtractTime(endTime, startTime);
                sum += time;
-                if( time < bestTime )
+                if (time < bestTime) bestTime = time;
                    bestTime = time;
            }
-            if( gReportAverageTimes )
+            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
-                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double)gDeviceFrequency
-            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+                * gComputeDevices * gSimdSize * 1e6
-            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+                / (BUFFER_SIZE / sizeof(float));
            vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
                      f->name, sizeNames[j]);
        }
    }
@@ -413,8 +490,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
    const char *name = job->f->name;
    int signbit_test = 0;
-    if(!strcmp(name, "signbit"))
+    if (!strcmp(name, "signbit")) signbit_test = 1;
        signbit_test = 1;
 #define ref_func(s) (signbit_test ? func.i_f_f(s) : func.i_f(s))
@@ -423,25 +499,27 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
    cl_int *out[VECTOR_SIZE_COUNT];
    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
-        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        out[j] = (cl_int *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
            buffer_size, 0, NULL, e + j, &error);
        if (error || NULL == out[j])
        {
-            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
                       error);
            return error;
        }
    }
    // Get that moving
-    if( (error = clFlush(tinfo->tQueue) ))
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
        vlog( "clFlush failed\n" );
    // Write the new values to the input array
    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    for( j = 0; j < buffer_elements; j++ )
+    for (j = 0; j < buffer_elements; j++) p[j] = base + j * scale;
        p[j] = base + j * scale;
-    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
                                      buffer_size, p, 0, NULL, NULL)))
    {
        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
        return error;
@@ -461,24 +539,39 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
            return error;
        }
-        // Fill the result buffer with garbage, so that old results don't carry over
+        // Fill the result buffer with garbage, so that old results don't carry
        // over
        uint32_t pattern = 0xffffdead;
        memset_pattern4(out[j], &pattern, buffer_size);
-        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
        {
            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
            return error;
        }
        // run the kernel
-        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        size_t vectorCount =
-        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                 // own copy of the cl_kernel
        cl_program program = job->programs[j];
-        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+                                    &tinfo->outBuf[j])))
        {
            LogBuildError(program);
            return error;
        }
        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
                                    &tinfo->inBuf)))
        {
            LogBuildError(program);
            return error;
        }
-        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                            &vectorCount, NULL, 0, NULL, NULL)))
        {
            vlog_error("FAILED -- could not execute kernel\n");
            return error;
@@ -487,30 +580,33 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
    // Get that moving
-    if( (error = clFlush(tinfo->tQueue) ))
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
        vlog( "clFlush 2 failed\n" );
-    if( gSkipCorrectnessTesting )
+    if (gSkipCorrectnessTesting) return CL_SUCCESS;
        return CL_SUCCESS;
    // Calculate the correctly rounded reference result
    cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
    float *s = (float *)p;
-    for( j = 0; j < buffer_elements; j++ )
+    for (j = 0; j < buffer_elements; j++) r[j] = ref_func(s[j]);
        r[j] = ref_func( s[j] );
-    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    // Read the data back -- no need to wait for the first N-1 buffers. This is
    // an in order queue.
    for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
    {
-        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        out[j] = (cl_int *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
            buffer_size, 0, NULL, NULL, &error);
        if (error || NULL == out[j])
        {
-            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
                       error);
            return error;
        }
    }
    // Wait for the last buffer
-    out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    out[j] = (cl_int *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
                                          CL_TRUE, CL_MAP_READ, 0, buffer_size,
                                          0, NULL, NULL, &error);
    if (error || NULL == out[j])
    {
        vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
@@ -535,15 +631,14 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
                    {
                        int correct = ref_func(+0.0f);
                        int correct2 = ref_func(-0.0f);
-                        if( correct == q[j] || correct2 == q[j] )
+                        if (correct == q[j] || correct2 == q[j]) continue;
                            continue;
                    }
                }
                uint32_t err = t[j] - q[j];
-                if( q[j] > t[j] )
+                if (q[j] > t[j]) err = q[j] - t[j];
-                    err = q[j] - t[j];
+                vlog_error("\nERROR: %s: %d ulp error at %a: *%d vs. %d\n",
-                vlog_error( "\nERROR: %s: %d ulp error at %a: *%d vs. %d\n", name,  err, ((float*) s)[j], t[j], q[j] );
+                           name, err, ((float *)s)[j], t[j], q[j]);
                error = -1;
                goto exit;
            }
@@ -561,15 +656,15 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
                        {
                            int correct = -ref_func(+0.0f);
                            int correct2 = -ref_func(-0.0f);
-                            if( correct == q[j] || correct2 == q[j] )
+                            if (correct == q[j] || correct2 == q[j]) continue;
                                continue;
                        }
                    }
                    uint32_t err = -t[j] - q[j];
-                    if( q[j] > -t[j] )
+                    if (q[j] > -t[j]) err = q[j] + t[j];
-                        err = q[j] + t[j];
+                    vlog_error(
-                    vlog_error( "\nERROR: %s%s: %d ulp error at %a: *%d vs. %d\n", name, sizeNames[k], err, ((float*) s)[j], -t[j], q[j] );
+                        "\nERROR: %s%s: %d ulp error at %a: *%d vs. %d\n", name,
                        sizeNames[k], err, ((float *)s)[j], -t[j], q[j]);
                    error = -1;
                    goto exit;
                }
@@ -581,9 +676,11 @@ exit:
    ret = error;
    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
-        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
        {
-            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
                       j, error);
            return error;
        }
    }
@@ -599,8 +696,12 @@ exit:
    {
        if (gVerboseBruteForce)
        {
-           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
-       } else
+                 "ThreadCount:%2u\n",
                 base, job->step, job->scale, buffer_elements,
                 job->threadCount);
        }
        else
        {
            vlog(".");
        }
@@ -622,11 +723,14 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
    // Init test_info
    memset(&test_info, 0, sizeof(test_info));
    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.subBufferSize = BUFFER_SIZE
        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
    test_info.scale = getTestScale(sizeof(cl_double));
    if (gWimpyMode)
    {
-        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.subBufferSize = gWimpyBufferSize
            / (sizeof(cl_double)
               * RoundUpToNextPowerOfTwo(test_info.threadCount));
    }
    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
@@ -643,7 +747,8 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
    test_info.f = f;
    test_info.ftz = f->ftz || gForceFTZ;
-    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
@@ -656,36 +761,52 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        }
        memset(test_info.k[i], 0, array_size);
    }
-    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    test_info.tinfo =
        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
    if (NULL == test_info.tinfo)
    {
-        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        vlog_error(
            "Error: Unable to allocate storage for thread specific data.\n");
        error = CL_OUT_OF_HOST_MEMORY;
        goto exit;
    }
-    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
    for (i = 0; i < test_info.threadCount; i++)
    {
-        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        cl_buffer_region region = {
-        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            i * test_info.subBufferSize * sizeof(cl_double),
            test_info.subBufferSize * sizeof(cl_double)
        };
        test_info.tinfo[i].inBuf =
            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
        if (error || NULL == test_info.tinfo[i].inBuf)
        {
-            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                       "region {%zd, %zd}\n",
                       region.origin, region.size);
            goto exit;
        }
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            /* Qualcomm fix: 9461 read-write flags must be compatible with
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+             * parent buffer */
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
                &region, &error);
            /* Qualcomm fix: end */
            if (error || NULL == test_info.tinfo[i].outBuf[j])
            {
-                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                vlog_error("Error: Unable to create sub-buffer of gInBuffer "
                           "for region {%zd, %zd}\n",
                           region.origin, region.size);
                goto exit;
            }
        }
-        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        test_info.tinfo[i].tQueue =
            clCreateCommandQueue(gContext, gDevice, 0, &error);
        if (NULL == test_info.tinfo[i].tQueue || error)
        {
            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
@@ -699,7 +820,9 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
            test_info.programs,  f->nameInCode,         relaxedMode
        };
-        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                   &build_info)))
            goto exit;
    }
@@ -707,8 +830,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
    {
        error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info);
-        if( error )
+        if (error) goto exit;
            goto exit;
        if (gWimpyMode)
            vlog("Wimp pass");
@@ -722,7 +844,8 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        cl_ulong *p = (cl_ulong *)gIn;
        for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
            p[j] = DoubleFromUInt32(genrand_int32(d));
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -734,15 +857,27 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if ((error = clSetKernelArg(test_info.k[j][0], 0,
-            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
            {
                LogBuildError(test_info.programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(test_info.programs[j]);
                goto exit;
            }
            double sum = 0.0;
            double bestTime = INFINITY;
            for (i = 0; i < PERF_LOOP_COUNT; i++)
            {
                uint64_t startTime = GetTime();
-                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
                                                    1, NULL, &localCount, NULL,
                                                    0, NULL, NULL)))
                {
                    vlog_error("FAILED -- could not execute kernel\n");
                    goto exit;
@@ -758,17 +893,17 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                uint64_t endTime = GetTime();
                double time = SubtractTime(endTime, startTime);
                sum += time;
-                if( time < bestTime )
+                if (time < bestTime) bestTime = time;
                    bestTime = time;
            }
-            if( gReportAverageTimes )
+            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
-                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double)gDeviceFrequency
-            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+                * gComputeDevices * gSimdSize * 1e6
-            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+                / (BUFFER_SIZE / sizeof(double));
            vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
                      f->name, sizeNames[j]);
        }
-        for( ; j < gMaxVectorSizeIndex; j++ )
+        for (; j < gMaxVectorSizeIndex; j++) vlog("\t     -- ");
            vlog( "\t     -- " );
    }
    vlog("\n");
@@ -822,24 +957,27 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
    cl_long *out[VECTOR_SIZE_COUNT];
    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
-        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        out[j] = (cl_long *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
            buffer_size, 0, NULL, e + j, &error);
        if (error || NULL == out[j])
        {
-            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
                       error);
            return error;
        }
    }
    // Get that moving
-    if( (error = clFlush(tinfo->tQueue) ))
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
        vlog( "clFlush failed\n" );
    // Write the new values to the input array
    cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
    for (j = 0; j < buffer_elements; j++)
        p[j] = DoubleFromUInt32(base + j * scale);
-    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
                                      buffer_size, p, 0, NULL, NULL)))
    {
        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
        return error;
@@ -859,24 +997,39 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
            return error;
        }
-        // Fill the result buffer with garbage, so that old results don't carry over
+        // Fill the result buffer with garbage, so that old results don't carry
        // over
        uint32_t pattern = 0xffffdead;
        memset_pattern4(out[j], &pattern, buffer_size);
-        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
        {
            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
            return error;
        }
        // run the kernel
-        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        size_t vectorCount =
-        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                 // own copy of the cl_kernel
        cl_program program = job->programs[j];
-        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+                                    &tinfo->outBuf[j])))
        {
            LogBuildError(program);
            return error;
        }
        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
                                    &tinfo->inBuf)))
        {
            LogBuildError(program);
            return error;
        }
-        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                            &vectorCount, NULL, 0, NULL, NULL)))
        {
            vlog_error("FAILED -- could not execute kernel\n");
            return error;
@@ -885,30 +1038,33 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
    // Get that moving
-    if( (error = clFlush(tinfo->tQueue) ))
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
        vlog( "clFlush 2 failed\n" );
-    if( gSkipCorrectnessTesting )
+    if (gSkipCorrectnessTesting) return CL_SUCCESS;
        return CL_SUCCESS;
    // Calculate the correctly rounded reference result
    cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
    cl_double *s = (cl_double *)p;
-    for( j = 0; j < buffer_elements; j++ )
+    for (j = 0; j < buffer_elements; j++) r[j] = dfunc.i_f(s[j]);
        r[j] = dfunc.i_f( s[j] );
-    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    // Read the data back -- no need to wait for the first N-1 buffers. This is
    // an in order queue.
    for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
    {
-        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        out[j] = (cl_long *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
            buffer_size, 0, NULL, NULL, &error);
        if (error || NULL == out[j])
        {
-            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
                       error);
            return error;
        }
    }
    // Wait for the last buffer
-    out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    out[j] = (cl_long *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
                                           CL_TRUE, CL_MAP_READ, 0, buffer_size,
                                           0, NULL, NULL, &error);
    if (error || NULL == out[j])
    {
        vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
@@ -933,15 +1089,14 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                {
                    cl_long correct = dfunc.i_f(+0.0f);
                    cl_long correct2 = dfunc.i_f(-0.0f);
-                    if( correct == q[j] || correct2 == q[j] )
+                    if (correct == q[j] || correct2 == q[j]) continue;
                        continue;
                }
            }
            cl_ulong err = t[j] - q[j];
-            if( q[j] > t[j] )
+            if (q[j] > t[j]) err = q[j] - t[j];
-                err = q[j] - t[j];
+            vlog_error("\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n",
-            vlog_error( "\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n", name,  err, ((double*) gIn)[j], t[j], q[j] );
+                       name, err, ((double *)gIn)[j], t[j], q[j]);
            return -1;
        }
@@ -958,40 +1113,44 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                    {
                        int64_t correct = -dfunc.i_f(+0.0f);
                        int64_t correct2 = -dfunc.i_f(-0.0f);
-                        if( correct == q[j] || correct2 == q[j] )
+                        if (correct == q[j] || correct2 == q[j]) continue;
                            continue;
                    }
                }
                cl_ulong err = -t[j] - q[j];
-                if( q[j] > -t[j] )
+                if (q[j] > -t[j]) err = q[j] + t[j];
-                    err = q[j] + t[j];
+                vlog_error(
-                vlog_error( "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n", name, sizeNames[k], err, ((double*) gIn)[j], -t[j], q[j] );
+                    "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n",
                    name, sizeNames[k], err, ((double *)gIn)[j], -t[j], q[j]);
                return -1;
            }
        }
    }
    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
-        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
        {
-            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
                       j, error);
            return error;
        }
    }
-    if( (error = clFlush(tinfo->tQueue) ))
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
        vlog( "clFlush 3 failed\n" );
    if (0 == (base & 0x0fffffff))
    {
        if (gVerboseBruteForce)
        {
-           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
-       } else
+                 "ThreadCount:%2u\n",
                 base, job->step, job->scale, buffer_elements,
                 job->threadCount);
        }
        else
        {
            vlog(".");
        }
@@ -1000,7 +1159,3 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
    return CL_SUCCESS;
 }
--- a/test_conformance/math_brute_force/mad.cpp
+++ b/test_conformance/math_brute_force/mad.cpp
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
--- a/test_conformance/math_brute_force/reference_math.h
+++ b/test_conformance/math_brute_force/reference_math.h
@@ -234,5 +234,3 @@ long double reference_assignmentl( long double x );
 int reference_notl(long double x);
 #endif
--- a/test_conformance/math_brute_force/ternary.cpp
+++ b/test_conformance/math_brute_force/ternary.cpp
--- a/test_conformance/math_brute_force/unary.cpp
+++ b/test_conformance/math_brute_force/unary.cpp
--- a/test_conformance/math_brute_force/unary_two_results.cpp
+++ b/test_conformance/math_brute_force/unary_two_results.cpp
@@ -32,27 +32,43 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
 static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
                       cl_program *p, bool relaxedMode)
 {
-    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in)\n"
+    const char *c[] = { "__kernel void math_kernel",
                        sizeNames[vectorSize],
                        "( __global float",
                        sizeNames[vectorSize],
                        "* out, __global float",
                        sizeNames[vectorSize],
                        "* out2, __global float",
                        sizeNames[vectorSize],
                        "* in)\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
-                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                        "   out[i] = ",
-                            "}\n"
+                        name,
-                        };
+                        "( in[i], out2 + i );\n"
                        "}\n" };
-    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* out2, __global float* in)\n"
+    const char *c3[] = {
        "__kernel void math_kernel",
        sizeNames[vectorSize],
        "( __global float* out, __global float* out2, __global float* in)\n"
        "{\n"
        "   size_t i = get_global_id(0);\n"
        "   if( i + 1 < get_global_size(0) )\n"
        "   {\n"
        "       float3 f0 = vload3( 0, in + 3 * i );\n"
        "       float3 iout = NAN;\n"
-                            "       f0 = ", name, "( f0, &iout );\n"
+        "       f0 = ",
        name,
        "( f0, &iout );\n"
        "       vstore3( f0, 0, out + 3*i );\n"
        "       vstore3( iout, 0, out2 + 3*i );\n"
        "   }\n"
        "   else\n"
        "   {\n"
-                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
        "       float3 iout = NAN;\n"
        "       float3 f0;\n"
        "       switch( parity )\n"
@@ -64,7 +80,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
        "               break;\n"
        "       }\n"
-                            "       f0 = ", name, "( f0, &iout );\n"
+        "       f0 = ",
        name,
        "( f0, &iout );\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 0:\n"
@@ -89,7 +107,8 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
    }
    char testName[32];
-    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
@@ -98,28 +117,44 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
                             cl_program *p, bool relaxedMode)
 {
    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in)\n"
+                        "__kernel void math_kernel",
                        sizeNames[vectorSize],
                        "( __global double",
                        sizeNames[vectorSize],
                        "* out, __global double",
                        sizeNames[vectorSize],
                        "* out2, __global double",
                        sizeNames[vectorSize],
                        "* in)\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
-                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                        "   out[i] = ",
-                            "}\n"
+                        name,
-                        };
+                        "( in[i], out2 + i );\n"
                        "}\n" };
-    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+    const char *c3[] = {
-                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* out2, __global double* in)\n"
+        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
        "__kernel void math_kernel",
        sizeNames[vectorSize],
        "( __global double* out, __global double* out2, __global double* in)\n"
        "{\n"
        "   size_t i = get_global_id(0);\n"
        "   if( i + 1 < get_global_size(0) )\n"
        "   {\n"
        "       double3 f0 = vload3( 0, in + 3 * i );\n"
        "       double3 iout = NAN;\n"
-                            "       f0 = ", name, "( f0, &iout );\n"
+        "       f0 = ",
        name,
        "( f0, &iout );\n"
        "       vstore3( f0, 0, out + 3*i );\n"
        "       vstore3( iout, 0, out2 + 3*i );\n"
        "   }\n"
        "   else\n"
        "   {\n"
-                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
        "       double3 iout = NAN;\n"
        "       double3 f0;\n"
        "       switch( parity )\n"
@@ -131,7 +166,9 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
        "               break;\n"
        "       }\n"
-                            "       f0 = ", name, "( f0, &iout );\n"
+        "       f0 = ",
        name,
        "( f0, &iout );\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 0:\n"
@@ -156,7 +193,8 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
    }
    char testName[32];
-    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
@@ -170,8 +208,10 @@ typedef struct BuildKernelInfo
    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 } BuildKernelInfo;
-static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
-static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+                                  void *p);
 static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                  void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
@@ -179,8 +219,10 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi
                       info->programs + i, info->relaxedMode);
 }
-static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
-static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+                                   void *p);
 static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                   void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
@@ -215,12 +257,14 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                   f->nameInCode, relaxedMode };
-    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+    if ((error = ThreadPool_Do(BuildKernel_FloatFn,
                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
                               &build_info)))
        return error;
    /*
        for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
-        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i,
-            return error;
+       programs + i) ) ) return error;
    */
    for (i = 0; i < (1ULL << 32); i += step)
@@ -251,7 +295,8 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                }
            }
        }
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -262,16 +307,22 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, bufferSize);
-            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            if ((error =
                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
                                          bufferSize, gOut[j], 0, NULL, NULL)))
            {
-                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
                           error, j);
                goto exit;
            }
            memset_pattern4(gOut2[j], &pattern, bufferSize);
-            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL)))
+            if ((error =
                     clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0,
                                          bufferSize, gOut2[j], 0, NULL, NULL)))
            {
-                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
                           error, j);
                goto exit;
            }
        }
@@ -281,11 +332,28 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
-            if( ( error = clSetKernelArg(kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
                                        &gOutBuffer2[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
-            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                            &localCount, NULL, 0, NULL, NULL)))
            {
                vlog_error("FAILED -- could not execute kernel\n");
                goto exit;
@@ -293,8 +361,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        }
        // Get that moving
-        if( (error = clFlush(gQueue) ))
+        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
            vlog( "clFlush failed\n" );
        FPU_mode_type oldMode;
        RoundingMode oldRoundMode = kRoundToNearestEven;
@@ -302,8 +369,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            // Calculate the correctly rounded reference result
            memset(&oldMode, 0, sizeof(oldMode));
-            if( ftz )
+            if (ftz) ForceFTZ(&oldMode);
                ForceFTZ( &oldMode );
            // Set the rounding mode to match the device
            if (gIsInRTZMode)
@@ -328,7 +394,8 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                    r[j] = (float)f->func.f_fpf(s[j], &dd);
                r2[j] = (float)dd;
-                overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+                overflow[j] =
                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
            }
        }
        else
@@ -345,18 +412,21 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
            }
        }
-        if( isFract && ftz )
+        if (isFract && ftz) RestoreFPState(&oldMode);
            RestoreFPState( &oldMode );
        // Read the data back
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
                                         bufferSize, gOut[j], 0, NULL, NULL)))
            {
                vlog_error("ReadArray failed %d\n", error);
                goto exit;
            }
-            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
                                         bufferSize, gOut2[j], 0, NULL, NULL)))
            {
                vlog_error("ReadArray2 failed %d\n", error);
                goto exit;
@@ -365,8 +435,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        if (gSkipCorrectnessTesting)
        {
-            if (isFract && gIsInRTZMode)
+            if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
                (void)set_round(oldRoundMode, kfloat);
            break;
        }
@@ -393,26 +462,31 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                    else
                        correct = f->func.f_fpf(s[j], &correct2);
-                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                    // Per section 10 paragraph 6, accept any result if an input
                    // or output is a infinity or NaN or overflow
                    if (relaxedMode || skipNanInf)
                    {
-                        if (skipNanInf && overflow[j])
+                        if (skipNanInf && overflow[j]) continue;
                            continue;
-                        // Note: no double rounding here.  Reference functions calculate in single precision.
+                        // Note: no double rounding here.  Reference functions
-                        if( IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                        // calculate in single precision.
-                            IsFloatInfinity(correct2)|| IsFloatNaN(correct2)    ||
+                        if (IsFloatInfinity(correct) || IsFloatNaN(correct)
-                            IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        )
+                            || IsFloatInfinity(correct2) || IsFloatNaN(correct2)
                            || IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
                            continue;
                    }
-                    typedef int (*CheckForSubnormal) (double,float); // If we are in fast relaxed math, we have a different calculation for the subnormal threshold.
+                    typedef int (*CheckForSubnormal)(
                        double, float); // If we are in fast relaxed math, we
                                        // have a different calculation for the
                                        // subnormal threshold.
                    CheckForSubnormal isFloatResultSubnormalPtr;
                    if (relaxedMode)
                    {
                        err = Abs_Error(test, correct);
                        err2 = Abs_Error(test2, correct2);
-                      isFloatResultSubnormalPtr = &IsFloatResultSubnormalAbsError;
+                        isFloatResultSubnormalPtr =
                            &IsFloatResultSubnormalAbsError;
                    }
                    else
                    {
@@ -420,14 +494,16 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                        err2 = Ulp_Error(test2, correct2);
                        isFloatResultSubnormalPtr = &IsFloatResultSubnormal;
                    }
-                    int fail = ! (fabsf(err) <= float_ulps && fabsf(err2) <= float_ulps);
+                    int fail = !(fabsf(err) <= float_ulps
                                 && fabsf(err2) <= float_ulps);
                    if (ftz)
                    {
                        // retry per section 6.5.3.2
                        if ((*isFloatResultSubnormalPtr)(correct, float_ulps))
                        {
-                            if( (*isFloatResultSubnormalPtr) (correct2, float_ulps ))
+                            if ((*isFloatResultSubnormalPtr)(correct2,
                                                             float_ulps))
                            {
                                fail = fail && !(test == 0.0f && test2 == 0.0f);
                                if (!fail)
@@ -438,16 +514,18 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                            }
                            else
                            {
-                                fail = fail && ! ( test == 0.0f && fabsf(err2) <= float_ulps);
+                                fail = fail
-                                if( ! fail )
+                                    && !(test == 0.0f
-                                    err = 0.0f;
+                                         && fabsf(err2) <= float_ulps);
                                if (!fail) err = 0.0f;
                            }
                        }
-                        else if( (*isFloatResultSubnormalPtr)(correct2, float_ulps ) )
+                        else if ((*isFloatResultSubnormalPtr)(correct2,
                                                              float_ulps))
                        {
-                            fail = fail && ! ( test2 == 0.0f && fabsf(err) <= float_ulps);
+                            fail = fail
-                            if( ! fail )
+                                && !(test2 == 0.0f && fabsf(err) <= float_ulps);
-                                err2 = 0.0f;
+                            if (!fail) err2 = 0.0f;
                        }
@@ -458,8 +536,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                            double correct2p, correct2n;
                            float errp, err2p, errn, err2n;
-                            if( skipNanInf )
+                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
                                feclearexcept(FE_OVERFLOW);
                            if (relaxedMode)
                            {
                                correctp = f->rfunc.f_fpf(0.0, &correct2p);
@@ -471,17 +548,23 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                                correctn = f->func.f_fpf(-0.0, &correct2n);
                            }
-                            // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                            // Per section 10 paragraph 6, accept any result if
                            // an input or output is a infinity or NaN or
                            // overflow
                            if (skipNanInf)
                            {
-                                if( fetestexcept(FE_OVERFLOW) )
+                                if (fetestexcept(FE_OVERFLOW)) continue;
                                    continue;
-                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                // Note: no double rounding here.  Reference
-                                if( IsFloatInfinity(correctp) || IsFloatNaN(correctp)   ||
+                                // functions calculate in single precision.
-                                    IsFloatInfinity(correctn) || IsFloatNaN(correctn)   ||
+                                if (IsFloatInfinity(correctp)
-                                    IsFloatInfinity(correct2p) || IsFloatNaN(correct2p) ||
+                                    || IsFloatNaN(correctp)
-                                    IsFloatInfinity(correct2n) || IsFloatNaN(correct2n) )
+                                    || IsFloatInfinity(correctn)
                                    || IsFloatNaN(correctn)
                                    || IsFloatInfinity(correct2p)
                                    || IsFloatNaN(correct2p)
                                    || IsFloatInfinity(correct2n)
                                    || IsFloatNaN(correct2n))
                                    continue;
                            }
@@ -500,38 +583,48 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                                err2n = Ulp_Error(test, correct2n);
                            }
-                            fail =  fail && ((!(fabsf(errp) <= float_ulps)) && (!(fabsf(err2p) <= float_ulps))    &&
+                            fail = fail
-                                            ((!(fabsf(errn) <= float_ulps)) && (!(fabsf(err2n) <= float_ulps))) );
+                                && ((!(fabsf(errp) <= float_ulps))
-                            if( fabsf( errp ) < fabsf(err ) )
+                                    && (!(fabsf(err2p) <= float_ulps))
-                                err = errp;
+                                    && ((!(fabsf(errn) <= float_ulps))
-                            if( fabsf( errn ) < fabsf(err ) )
+                                        && (!(fabsf(err2n) <= float_ulps))));
-                                err = errn;
+                            if (fabsf(errp) < fabsf(err)) err = errp;
-                            if( fabsf( err2p ) < fabsf(err2 ) )
+                            if (fabsf(errn) < fabsf(err)) err = errn;
-                                err2 = err2p;
+                            if (fabsf(err2p) < fabsf(err2)) err2 = err2p;
-                            if( fabsf( err2n ) < fabsf(err2 ) )
+                            if (fabsf(err2n) < fabsf(err2)) err2 = err2n;
                                err2 = err2n;
                            // retry per section 6.5.3.4
-                            if(  (*isFloatResultSubnormalPtr)( correctp, float_ulps ) || (*isFloatResultSubnormalPtr)( correctn, float_ulps )  )
+                            if ((*isFloatResultSubnormalPtr)(correctp,
                                                             float_ulps)
                                || (*isFloatResultSubnormalPtr)(correctn,
                                                                float_ulps))
                            {
-                              if( (*isFloatResultSubnormalPtr)( correct2p, float_ulps ) || (*isFloatResultSubnormalPtr)( correct2n, float_ulps ) )
+                                if ((*isFloatResultSubnormalPtr)(correct2p,
                                                                 float_ulps)
                                    || (*isFloatResultSubnormalPtr)(correct2n,
                                                                    float_ulps))
                                {
-                                fail = fail && !( test == 0.0f && test2 == 0.0f);
+                                    fail = fail
-                                if( ! fail )
+                                        && !(test == 0.0f && test2 == 0.0f);
-                                  err = err2 = 0.0f;
+                                    if (!fail) err = err2 = 0.0f;
                                }
                                else
                                {
-                                fail = fail && ! (test == 0.0f && fabsf(err2) <= float_ulps);
+                                    fail = fail
-                                if( ! fail )
+                                        && !(test == 0.0f
-                                  err = 0.0f;
+                                             && fabsf(err2) <= float_ulps);
                                    if (!fail) err = 0.0f;
                                }
                            }
-                            else if( (*isFloatResultSubnormalPtr)( correct2p, float_ulps ) || (*isFloatResultSubnormalPtr)( correct2n, float_ulps ) )
+                            else if ((*isFloatResultSubnormalPtr)(correct2p,
                                                                  float_ulps)
                                     || (*isFloatResultSubnormalPtr)(
                                         correct2n, float_ulps))
                            {
-                                fail = fail && ! (test2 == 0.0f && (fabsf(err) <= float_ulps));
+                                fail = fail
-                                if( ! fail )
+                                    && !(test2 == 0.0f
-                                    err2 = 0.0f;
+                                         && (fabsf(err) <= float_ulps));
                                if (!fail) err2 = 0.0f;
                            }
                        }
                    }
@@ -547,7 +640,11 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                    }
                    if (fail)
                    {
-                        vlog_error( "\nERROR: %s%s: {%f, %f} ulp error at %a: *{%a, %a} vs. {%a, %a}\n", f->name, sizeNames[k], err, err2, ((float*) gIn)[j], ((float*) gOut_Ref)[j], ((float*) gOut_Ref2)[j], test, test2 );
+                        vlog_error("\nERROR: %s%s: {%f, %f} ulp error at %a: "
                                   "*{%a, %a} vs. {%a, %a}\n",
                                   f->name, sizeNames[k], err, err2,
                                   ((float *)gIn)[j], ((float *)gOut_Ref)[j],
                                   ((float *)gOut_Ref2)[j], test, test2);
                        error = -1;
                        goto exit;
                    }
@@ -555,15 +652,16 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
            }
        }
-        if (isFract && gIsInRTZMode)
+        if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
            (void)set_round(oldRoundMode, kfloat);
        if (0 == (i & 0x0fffffff))
        {
            if (gVerboseBruteForce)
            {
-               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-           } else
+                     bufferSize);
            }
            else
            {
                vlog(".");
            }
@@ -585,7 +683,8 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        uint32_t *p = (uint32_t *)gIn;
        for (j = 0; j < bufferSize / sizeof(float); j++)
            p[j] = genrand_int32(d);
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -597,9 +696,24 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j]) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
-            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
                                        &gOutBuffer2[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            double sum = 0.0;
            double bestTime = INFINITY;
@@ -607,7 +721,9 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
            {
                uint64_t startTime = GetTime();
-                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                                    &localCount, NULL, 0, NULL,
                                                    NULL)))
                {
                    vlog_error("FAILED -- could not execute kernel\n");
                    goto exit;
@@ -623,19 +739,21 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                uint64_t endTime = GetTime();
                double time = SubtractTime(endTime, startTime);
                sum += time;
-                if( time < bestTime )
+                if (time < bestTime) bestTime = time;
                    bestTime = time;
            }
-            if( gReportAverageTimes )
+            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
-                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double)gDeviceFrequency
-            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+                * gComputeDevices * gSimdSize * 1e6
-            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+                / (bufferSize / sizeof(float));
            vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
                      f->name, sizeNames[j]);
        }
    }
    if (!gSkipCorrectnessTesting)
-        vlog( "\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0, maxErrorVal1 );
+        vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
             maxErrorVal1);
    vlog("\n");
 exit:
@@ -680,8 +798,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
    }
    /*
        for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
-        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels +
-            return error;
+       i, programs + i) ) ) return error;
    */
    for (i = 0; i < (1ULL << 32); i += step)
@@ -698,7 +816,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
            for (j = 0; j < bufferSize / sizeof(cl_double); j++)
                p[j] = DoubleFromUInt32((uint32_t)i + j);
        }
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -709,16 +828,22 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, bufferSize);
-            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            if ((error =
                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
                                          bufferSize, gOut[j], 0, NULL, NULL)))
            {
-                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
                           error, j);
                goto exit;
            }
            memset_pattern4(gOut2[j], &pattern, bufferSize);
-            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL)))
+            if ((error =
                     clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0,
                                          bufferSize, gOut2[j], 0, NULL, NULL)))
            {
-                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
                           error, j);
                goto exit;
            }
        }
@@ -728,11 +853,28 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
-            if( ( error = clSetKernelArg(kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
                                        &gOutBuffer2[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
-            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                            &localCount, NULL, 0, NULL, NULL)))
            {
                vlog_error("FAILED -- could not execute kernel\n");
                goto exit;
@@ -740,8 +882,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
        }
        // Get that moving
-        if( (error = clFlush(gQueue) ))
+        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
            vlog( "clFlush failed\n" );
        // Calculate the correctly rounded reference result
        double *r = (double *)gOut_Ref;
@@ -757,20 +898,23 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
        // Read the data back
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
                                         bufferSize, gOut[j], 0, NULL, NULL)))
            {
                vlog_error("ReadArray failed %d\n", error);
                goto exit;
            }
-            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
                                         bufferSize, gOut2[j], 0, NULL, NULL)))
            {
                vlog_error("ReadArray2 failed %d\n", error);
                goto exit;
            }
        }
-        if( gSkipCorrectnessTesting )
+        if (gSkipCorrectnessTesting) break;
            break;
        // Verify data
        uint64_t *t = (uint64_t *)gOut_Ref;
@@ -791,13 +935,15 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
                    long double correct = f->dfunc.f_fpf(s[j], &correct2);
                    float err = Bruteforce_Ulp_Error_Double(test, correct);
                    float err2 = Bruteforce_Ulp_Error_Double(test2, correct2);
-                    int fail = ! (fabsf(err) <= f->double_ulps && fabsf(err2) <= f->double_ulps);
+                    int fail = !(fabsf(err) <= f->double_ulps
                                 && fabsf(err2) <= f->double_ulps);
                    if (ftz)
                    {
                        // retry per section 6.5.3.2
                        if (IsDoubleResultSubnormal(correct, f->double_ulps))
                        {
-                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) )
+                            if (IsDoubleResultSubnormal(correct2,
                                                        f->double_ulps))
                            {
                                fail = fail && !(test == 0.0f && test2 == 0.0f);
                                if (!fail)
@@ -808,60 +954,80 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
                            }
                            else
                            {
-                                fail = fail && ! ( test == 0.0f && fabsf(err2) <= f->double_ulps);
+                                fail = fail
-                                if( ! fail )
+                                    && !(test == 0.0f
-                                    err = 0.0f;
+                                         && fabsf(err2) <= f->double_ulps);
                                if (!fail) err = 0.0f;
                            }
                        }
-                        else if( IsDoubleResultSubnormal( correct2, f->double_ulps ) )
+                        else if (IsDoubleResultSubnormal(correct2,
                                                         f->double_ulps))
                        {
-                            fail = fail && ! ( test2 == 0.0f && fabsf(err) <= f->double_ulps);
+                            fail = fail
-                            if( ! fail )
+                                && !(test2 == 0.0f
-                                err2 = 0.0f;
+                                     && fabsf(err) <= f->double_ulps);
                            if (!fail) err2 = 0.0f;
                        }
                        // retry per section 6.5.3.3
                        if (IsDoubleSubnormal(s[j]))
                        {
                            long double correct2p, correct2n;
-                            long double correctp = f->dfunc.f_fpf( 0.0, &correct2p );
+                            long double correctp =
-                            long double correctn = f->dfunc.f_fpf( -0.0, &correct2n );
+                                f->dfunc.f_fpf(0.0, &correct2p);
-                            float errp = Bruteforce_Ulp_Error_Double( test, correctp  );
+                            long double correctn =
-                            float err2p = Bruteforce_Ulp_Error_Double( test, correct2p  );
+                                f->dfunc.f_fpf(-0.0, &correct2n);
-                            float errn = Bruteforce_Ulp_Error_Double( test, correctn  );
+                            float errp =
-                            float err2n = Bruteforce_Ulp_Error_Double( test, correct2n  );
+                                Bruteforce_Ulp_Error_Double(test, correctp);
-                            fail =  fail && ((!(fabsf(errp) <= f->double_ulps)) && (!(fabsf(err2p) <= f->double_ulps))    &&
+                            float err2p =
-                                            ((!(fabsf(errn) <= f->double_ulps)) && (!(fabsf(err2n) <= f->double_ulps))) );
+                                Bruteforce_Ulp_Error_Double(test, correct2p);
-                            if( fabsf( errp ) < fabsf(err ) )
+                            float errn =
-                                err = errp;
+                                Bruteforce_Ulp_Error_Double(test, correctn);
-                            if( fabsf( errn ) < fabsf(err ) )
+                            float err2n =
-                                err = errn;
+                                Bruteforce_Ulp_Error_Double(test, correct2n);
-                            if( fabsf( err2p ) < fabsf(err2 ) )
+                            fail = fail
-                                err2 = err2p;
+                                && ((!(fabsf(errp) <= f->double_ulps))
-                            if( fabsf( err2n ) < fabsf(err2 ) )
+                                    && (!(fabsf(err2p) <= f->double_ulps))
-                                err2 = err2n;
+                                    && ((!(fabsf(errn) <= f->double_ulps))
                                        && (!(fabsf(err2n)
                                              <= f->double_ulps))));
                            if (fabsf(errp) < fabsf(err)) err = errp;
                            if (fabsf(errn) < fabsf(err)) err = errn;
                            if (fabsf(err2p) < fabsf(err2)) err2 = err2p;
                            if (fabsf(err2n) < fabsf(err2)) err2 = err2n;
                            // retry per section 6.5.3.4
-                            if( IsDoubleResultSubnormal( correctp, f->double_ulps ) || IsDoubleResultSubnormal( correctn, f->double_ulps ) )
+                            if (IsDoubleResultSubnormal(correctp,
                                                        f->double_ulps)
                                || IsDoubleResultSubnormal(correctn,
                                                           f->double_ulps))
                            {
-                                if( IsDoubleResultSubnormal( correct2p, f->double_ulps ) || IsDoubleResultSubnormal( correct2n, f->double_ulps ) )
+                                if (IsDoubleResultSubnormal(correct2p,
                                                            f->double_ulps)
                                    || IsDoubleResultSubnormal(correct2n,
                                                               f->double_ulps))
                                {
-                                    fail = fail && !( test == 0.0f && test2 == 0.0f);
+                                    fail = fail
-                                    if( ! fail )
+                                        && !(test == 0.0f && test2 == 0.0f);
-                                        err = err2 = 0.0f;
+                                    if (!fail) err = err2 = 0.0f;
                                }
                                else
                                {
-                                    fail = fail && ! (test == 0.0f && fabsf(err2) <= f->double_ulps);
+                                    fail = fail
-                                    if( ! fail )
+                                        && !(test == 0.0f
-                                        err = 0.0f;
+                                             && fabsf(err2) <= f->double_ulps);
                                    if (!fail) err = 0.0f;
                                }
                            }
-                            else if( IsDoubleResultSubnormal( correct2p, f->double_ulps ) || IsDoubleResultSubnormal( correct2n, f->double_ulps ) )
+                            else if (IsDoubleResultSubnormal(correct2p,
                                                             f->double_ulps)
                                     || IsDoubleResultSubnormal(correct2n,
                                                                f->double_ulps))
                            {
-                                fail = fail && ! (test2 == 0.0f && (fabsf(err) <= f->double_ulps));
+                                fail = fail
-                                if( ! fail )
+                                    && !(test2 == 0.0f
-                                    err2 = 0.0f;
+                                         && (fabsf(err) <= f->double_ulps));
                                if (!fail) err2 = 0.0f;
                            }
                        }
                    }
@@ -877,7 +1043,12 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
                    }
                    if (fail)
                    {
-                        vlog_error( "\nERROR: %sD%s: {%f, %f} ulp error at %.13la: *{%.13la, %.13la} vs. {%.13la, %.13la}\n", f->name, sizeNames[k], err, err2, ((double*) gIn)[j], ((double*) gOut_Ref)[j], ((double*) gOut_Ref2)[j], test, test2 );
+                        vlog_error(
                            "\nERROR: %sD%s: {%f, %f} ulp error at %.13la: "
                            "*{%.13la, %.13la} vs. {%.13la, %.13la}\n",
                            f->name, sizeNames[k], err, err2,
                            ((double *)gIn)[j], ((double *)gOut_Ref)[j],
                            ((double *)gOut_Ref2)[j], test, test2);
                        error = -1;
                        goto exit;
                    }
@@ -889,8 +1060,10 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            if (gVerboseBruteForce)
            {
-               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-           } else
+                     bufferSize);
            }
            else
            {
                vlog(".");
            }
@@ -912,7 +1085,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
        double *p = (double *)gIn;
        for (j = 0; j < bufferSize / sizeof(double); j++)
            p[j] = DoubleFromUInt32(genrand_int32(d));
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -924,9 +1098,24 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j]) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
-            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
                                        &gOutBuffer2[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            double sum = 0.0;
            double bestTime = INFINITY;
@@ -934,7 +1123,9 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
            {
                uint64_t startTime = GetTime();
-                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                                    &localCount, NULL, 0, NULL,
                                                    NULL)))
                {
                    vlog_error("FAILED -- could not execute kernel\n");
                    goto exit;
@@ -950,21 +1141,22 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
                uint64_t endTime = GetTime();
                double time = SubtractTime(endTime, startTime);
                sum += time;
-                if( time < bestTime )
+                if (time < bestTime) bestTime = time;
                    bestTime = time;
            }
-            if( gReportAverageTimes )
+            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
-                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double)gDeviceFrequency
-            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+                * gComputeDevices * gSimdSize * 1e6
-            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+                / (bufferSize / sizeof(double));
            vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
                      f->name, sizeNames[j]);
        }
-        for( ; j < gMaxVectorSizeIndex; j++ )
+        for (; j < gMaxVectorSizeIndex; j++) vlog("\t     -- ");
            vlog( "\t     -- " );
    }
    if (!gSkipCorrectnessTesting)
-        vlog( "\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0, maxErrorVal1 );
+        vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
             maxErrorVal1);
    vlog("\n");
 exit:
@@ -977,6 +1169,3 @@ exit:
    return error;
 }
--- a/test_conformance/math_brute_force/unary_two_results_i.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i.cpp
@@ -34,26 +34,42 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
 static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
                       cl_program *p, bool relaxedMode)
 {
-    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in)\n"
+    const char *c[] = { "__kernel void math_kernel",
                        sizeNames[vectorSize],
                        "( __global float",
                        sizeNames[vectorSize],
                        "* out, __global int",
                        sizeNames[vectorSize],
                        "* out2, __global float",
                        sizeNames[vectorSize],
                        "* in)\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
-                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                        "   out[i] = ",
-                            "}\n"
+                        name,
-                        };
+                        "( in[i], out2 + i );\n"
-    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global int* out2, __global float* in)\n"
+                        "}\n" };
    const char *c3[] = {
        "__kernel void math_kernel",
        sizeNames[vectorSize],
        "( __global float* out, __global int* out2, __global float* in)\n"
        "{\n"
        "   size_t i = get_global_id(0);\n"
        "   if( i + 1 < get_global_size(0) )\n"
        "   {\n"
        "       float3 f0 = vload3( 0, in + 3 * i );\n"
        "       int3 iout = INT_MIN;\n"
-                            "       f0 = ", name, "( f0, &iout );\n"
+        "       f0 = ",
        name,
        "( f0, &iout );\n"
        "       vstore3( f0, 0, out + 3*i );\n"
        "       vstore3( iout, 0, out2 + 3*i );\n"
        "   }\n"
        "   else\n"
        "   {\n"
-                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
        "       int3 iout = INT_MIN;\n"
        "       float3 f0;\n"
        "       switch( parity )\n"
@@ -65,7 +81,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
        "               break;\n"
        "       }\n"
-                            "       f0 = ", name, "( f0, &iout );\n"
+        "       f0 = ",
        name,
        "( f0, &iout );\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 0:\n"
@@ -90,7 +108,8 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
    }
    char testName[32];
-    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
@@ -99,27 +118,43 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
                             cl_program *p, bool relaxedMode)
 {
    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in)\n"
+                        "__kernel void math_kernel",
                        sizeNames[vectorSize],
                        "( __global double",
                        sizeNames[vectorSize],
                        "* out, __global int",
                        sizeNames[vectorSize],
                        "* out2, __global double",
                        sizeNames[vectorSize],
                        "* in)\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
-                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                        "   out[i] = ",
-                            "}\n"
+                        name,
-                        };
+                        "( in[i], out2 + i );\n"
-    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "}\n" };
-                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global int* out2, __global double* in)\n"
+    const char *c3[] = {
        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
        "__kernel void math_kernel",
        sizeNames[vectorSize],
        "( __global double* out, __global int* out2, __global double* in)\n"
        "{\n"
        "   size_t i = get_global_id(0);\n"
        "   if( i + 1 < get_global_size(0) )\n"
        "   {\n"
        "       double3 f0 = vload3( 0, in + 3 * i );\n"
        "       int3 iout = INT_MIN;\n"
-                        "       f0 = ", name, "( f0, &iout );\n"
+        "       f0 = ",
        name,
        "( f0, &iout );\n"
        "       vstore3( f0, 0, out + 3*i );\n"
        "       vstore3( iout, 0, out2 + 3*i );\n"
        "   }\n"
        "   else\n"
        "   {\n"
-                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
        "       int3 iout = INT_MIN;\n"
        "       double3 f0;\n"
        "       switch( parity )\n"
@@ -131,7 +166,9 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
        "               break;\n"
        "       }\n"
-                        "       f0 = ", name, "( f0, &iout );\n"
+        "       f0 = ",
        name,
        "( f0, &iout );\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 0:\n"
@@ -156,7 +193,8 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
    }
    char testName[32];
-    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
@@ -170,8 +208,10 @@ typedef struct BuildKernelInfo
    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 } BuildKernelInfo;
-static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
-static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+                                  void *p);
 static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                  void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
@@ -179,8 +219,10 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi
                       info->programs + i, info->relaxedMode);
 }
-static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
-static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+                                   void *p);
 static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                   void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
@@ -225,12 +267,14 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                   f->nameInCode, relaxedMode };
-    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+    if ((error = ThreadPool_Do(BuildKernel_FloatFn,
                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
                               &build_info)))
        return error;
    /*
        for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
-        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i,
-            return error;
+       programs + i) ) ) return error;
    */
    for (i = 0; i < (1ULL << 32); i += step)
@@ -247,7 +291,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
            for (j = 0; j < bufferSize / sizeof(float); j++)
                p[j] = (uint32_t)i + j;
        }
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -258,16 +303,22 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, bufferSize);
-            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            if ((error =
                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
                                          bufferSize, gOut[j], 0, NULL, NULL)))
            {
-                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
                           error, j);
                goto exit;
            }
            memset_pattern4(gOut2[j], &pattern, bufferSize);
-            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            if ((error =
                     clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0,
                                          bufferSize, gOut2[j], 0, NULL, NULL)))
            {
-                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
                           error, j);
                goto exit;
            }
        }
@@ -277,11 +328,28 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
-            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
                                        &gOutBuffer2[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
-            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                            &localCount, NULL, 0, NULL, NULL)))
            {
                vlog_error("FAILED -- could not execute kernel\n");
                goto exit;
@@ -289,8 +357,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
        }
        // Get that moving
-        if( (error = clFlush(gQueue) ))
+        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
            vlog( "clFlush failed\n" );
        // Calculate the correctly rounded reference result
        float *r = (float *)gOut_Ref;
@@ -302,20 +369,23 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
        // Read the data back
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
                                         bufferSize, gOut[j], 0, NULL, NULL)))
            {
                vlog_error("ReadArray failed %d\n", error);
                goto exit;
            }
-            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
                                         bufferSize, gOut2[j], 0, NULL, NULL)))
            {
                vlog_error("ReadArray2 failed %d\n", error);
                goto exit;
            }
        }
-        if( gSkipCorrectnessTesting )
+        if (gSkipCorrectnessTesting) break;
            break;
        // Verify data
        uint32_t *t = (uint32_t *)gOut_Ref;
@@ -335,15 +405,15 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
                    double correct = f->func.f_fpI(s[j], &correct2);
                    float err = Ulp_Error(test, correct);
                    cl_long iErr = (int64_t)q2[j] - (int64_t)correct2;
-                    int fail = ! (fabsf(err) <= float_ulps && abs_cl_long( iErr ) <= maxiError );
+                    int fail = !(fabsf(err) <= float_ulps
                                 && abs_cl_long(iErr) <= maxiError);
                    if (ftz)
                    {
                        // retry per section 6.5.3.2
                        if (IsFloatResultSubnormal(correct, float_ulps))
                        {
                            fail = fail && !(test == 0.0f && iErr == 0);
-                            if( ! fail )
+                            if (!fail) err = 0.0f;
                                err = 0.0f;
                        }
                        // retry per section 6.5.3.3
@@ -354,18 +424,22 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
                            double correct4 = f->func.f_fpI(-0.0, &correct6);
                            float err2 = Ulp_Error(test, correct3);
                            float err3 = Ulp_Error(test, correct4);
-                            cl_long iErr2 = (long long) q2[j] - (long long) correct5;
+                            cl_long iErr2 =
-                            cl_long iErr3 = (long long) q2[j] - (long long) correct6;
+                                (long long)q2[j] - (long long)correct5;
                            cl_long iErr3 =
                                (long long)q2[j] - (long long)correct6;
                            // Did +0 work?
-                            if( fabsf(err2) <= float_ulps && abs_cl_long( iErr2 ) <= maxiError )
+                            if (fabsf(err2) <= float_ulps
                                && abs_cl_long(iErr2) <= maxiError)
                            {
                                err = err2;
                                iErr = iErr2;
                                fail = 0;
                            }
                            // Did -0 work?
-                            else if(fabsf(err3) <= float_ulps && abs_cl_long( iErr3 ) <= maxiError)
+                            else if (fabsf(err3) <= float_ulps
                                     && abs_cl_long(iErr3) <= maxiError)
                            {
                                err = err3;
                                iErr = iErr3;
@@ -373,9 +447,16 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
                            }
                            // retry per section 6.5.3.4
-                            if( fail && (IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps )) )
+                            if (fail
                                && (IsFloatResultSubnormal(correct2, float_ulps)
                                    || IsFloatResultSubnormal(correct3,
                                                              float_ulps)))
                            {
-                                fail = fail && ! ( test == 0.0f && (abs_cl_long( iErr2 ) <= maxiError || abs_cl_long( iErr3 ) <= maxiError) );
+                                fail = fail
                                    && !(test == 0.0f
                                         && (abs_cl_long(iErr2) <= maxiError
                                             || abs_cl_long(iErr3)
                                                 <= maxiError));
                                if (!fail)
                                {
                                    err = 0.0f;
@@ -397,7 +478,11 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
                    if (fail)
                    {
-                        vlog_error( "\nERROR: %s%s: {%f, %d} ulp error at %a: *{%a, %d} vs. {%a, %d}\n", f->name, sizeNames[k], err, (int) iErr, ((float*) gIn)[j], ((float*) gOut_Ref)[j], ((int*) gOut_Ref2)[j], test, q2[j] );
+                        vlog_error("\nERROR: %s%s: {%f, %d} ulp error at %a: "
                                   "*{%a, %d} vs. {%a, %d}\n",
                                   f->name, sizeNames[k], err, (int)iErr,
                                   ((float *)gIn)[j], ((float *)gOut_Ref)[j],
                                   ((int *)gOut_Ref2)[j], test, q2[j]);
                        error = -1;
                        goto exit;
                    }
@@ -409,8 +494,10 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            if (gVerboseBruteForce)
            {
-               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-           } else
+                     bufferSize);
            }
            else
            {
                vlog(".");
            }
@@ -432,7 +519,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
        uint32_t *p = (uint32_t *)gIn;
        for (j = 0; j < bufferSize / sizeof(float); j++)
            p[j] = genrand_int32(d);
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -444,16 +532,33 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
-            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
                                        &gOutBuffer2[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            double sum = 0.0;
            double bestTime = INFINITY;
            for (k = 0; k < PERF_LOOP_COUNT; k++)
            {
                uint64_t startTime = GetTime();
-                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                                    &localCount, NULL, 0, NULL,
                                                    NULL)))
                {
                    vlog_error("FAILED -- could not execute kernel\n");
                    goto exit;
@@ -469,14 +574,15 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
                uint64_t endTime = GetTime();
                double time = SubtractTime(endTime, startTime);
                sum += time;
-                if( time < bestTime )
+                if (time < bestTime) bestTime = time;
                    bestTime = time;
            }
-            if( gReportAverageTimes )
+            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
-                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double)gDeviceFrequency
-            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+                * gComputeDevices * gSimdSize * 1e6
-            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+                / (bufferSize / sizeof(float));
            vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
                      f->name, sizeNames[j]);
        }
    }
@@ -528,8 +634,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
    }
    /*
        for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
-        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels +
-            return error;
+       i, programs + i) ) ) return error;
    */
    for (i = 0; i < (1ULL << 32); i += step)
@@ -546,7 +652,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
            for (j = 0; j < bufferSize / sizeof(double); j++)
                p[j] = DoubleFromUInt32((uint32_t)i + j);
        }
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -557,16 +664,22 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, bufferSize);
-            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            if ((error =
                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
                                          bufferSize, gOut[j], 0, NULL, NULL)))
            {
-                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
                           error, j);
                goto exit;
            }
            memset_pattern4(gOut2[j], &pattern, bufferSize);
-            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            if ((error =
                     clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0,
                                          bufferSize, gOut2[j], 0, NULL, NULL)))
            {
-                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
                           error, j);
                goto exit;
            }
        }
@@ -576,11 +689,28 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
-            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
                                        &gOutBuffer2[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
-            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                            &localCount, NULL, 0, NULL, NULL)))
            {
                vlog_error("FAILED -- could not execute kernel\n");
                goto exit;
@@ -588,8 +718,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
        }
        // Get that moving
-        if( (error = clFlush(gQueue) ))
+        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
            vlog( "clFlush failed\n" );
        // Calculate the correctly rounded reference result
        double *r = (double *)gOut_Ref;
@@ -601,20 +730,23 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
        // Read the data back
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
                                         bufferSize, gOut[j], 0, NULL, NULL)))
            {
                vlog_error("ReadArray failed %d\n", error);
                goto exit;
            }
-            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
                                         bufferSize, gOut2[j], 0, NULL, NULL)))
            {
                vlog_error("ReadArray2 failed %d\n", error);
                goto exit;
            }
        }
-        if( gSkipCorrectnessTesting )
+        if (gSkipCorrectnessTesting) break;
            break;
        // Verify data
        uint64_t *t = (uint64_t *)gOut_Ref;
@@ -634,37 +766,45 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
                    long double correct = f->dfunc.f_fpI(s[j], &correct2);
                    float err = Bruteforce_Ulp_Error_Double(test, correct);
                    cl_long iErr = (long long)q2[j] - (long long)correct2;
-                    int fail = ! (fabsf(err) <= f->double_ulps && abs_cl_long( iErr ) <= maxiError );
+                    int fail = !(fabsf(err) <= f->double_ulps
                                 && abs_cl_long(iErr) <= maxiError);
                    if (ftz)
                    {
                        // retry per section 6.5.3.2
                        if (IsDoubleResultSubnormal(correct, f->double_ulps))
                        {
                            fail = fail && !(test == 0.0f && iErr == 0);
-                            if( ! fail )
+                            if (!fail) err = 0.0f;
                                err = 0.0f;
                        }
                        // retry per section 6.5.3.3
                        if (IsDoubleSubnormal(s[j]))
                        {
                            int correct5, correct6;
-                            long double correct3 = f->dfunc.f_fpI( 0.0, &correct5 );
+                            long double correct3 =
-                            long double correct4 = f->dfunc.f_fpI( -0.0, &correct6 );
+                                f->dfunc.f_fpI(0.0, &correct5);
-                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            long double correct4 =
-                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                f->dfunc.f_fpI(-0.0, &correct6);
-                            cl_long iErr2 = (long long) q2[j] - (long long) correct5;
+                            float err2 =
-                            cl_long iErr3 = (long long) q2[j] - (long long) correct6;
+                                Bruteforce_Ulp_Error_Double(test, correct3);
                            float err3 =
                                Bruteforce_Ulp_Error_Double(test, correct4);
                            cl_long iErr2 =
                                (long long)q2[j] - (long long)correct5;
                            cl_long iErr3 =
                                (long long)q2[j] - (long long)correct6;
                            // Did +0 work?
-                            if( fabsf(err2) <= f->double_ulps && abs_cl_long( iErr2 ) <= maxiError )
+                            if (fabsf(err2) <= f->double_ulps
                                && abs_cl_long(iErr2) <= maxiError)
                            {
                                err = err2;
                                iErr = iErr2;
                                fail = 0;
                            }
                            // Did -0 work?
-                            else if(fabsf(err3) <= f->double_ulps && abs_cl_long( iErr3 ) <= maxiError)
+                            else if (fabsf(err3) <= f->double_ulps
                                     && abs_cl_long(iErr3) <= maxiError)
                            {
                                err = err3;
                                iErr = iErr3;
@@ -672,9 +812,17 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
                            }
                            // retry per section 6.5.3.4
-                            if( fail && (IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )) )
+                            if (fail
                                && (IsDoubleResultSubnormal(correct2,
                                                            f->double_ulps)
                                    || IsDoubleResultSubnormal(correct3,
                                                               f->double_ulps)))
                            {
-                                fail = fail && ! ( test == 0.0f && (abs_cl_long( iErr2 ) <= maxiError || abs_cl_long( iErr3 ) <= maxiError) );
+                                fail = fail
                                    && !(test == 0.0f
                                         && (abs_cl_long(iErr2) <= maxiError
                                             || abs_cl_long(iErr3)
                                                 <= maxiError));
                                if (!fail)
                                {
                                    err = 0.0f;
@@ -696,7 +844,11 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
                    if (fail)
                    {
-                        vlog_error( "\nERROR: %sD%s: {%f, %d} ulp error at %.13la: *{%.13la, %d} vs. {%.13la, %d}\n", f->name, sizeNames[k], err, (int) iErr, ((double*) gIn)[j], ((double*) gOut_Ref)[j], ((int*) gOut_Ref2)[j], test, q2[j] );
+                        vlog_error("\nERROR: %sD%s: {%f, %d} ulp error at "
                                   "%.13la: *{%.13la, %d} vs. {%.13la, %d}\n",
                                   f->name, sizeNames[k], err, (int)iErr,
                                   ((double *)gIn)[j], ((double *)gOut_Ref)[j],
                                   ((int *)gOut_Ref2)[j], test, q2[j]);
                        error = -1;
                        goto exit;
                    }
@@ -708,8 +860,10 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            if (gVerboseBruteForce)
            {
-               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-           } else
+                     bufferSize);
            }
            else
            {
                vlog(".");
            }
@@ -732,7 +886,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
        for (j = 0; j < bufferSize / sizeof(double); j++)
            p[j] = DoubleFromUInt32(genrand_int32(d));
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -744,16 +899,33 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
-            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
                                        &gOutBuffer2[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            double sum = 0.0;
            double bestTime = INFINITY;
            for (k = 0; k < PERF_LOOP_COUNT; k++)
            {
                uint64_t startTime = GetTime();
-                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                                    &localCount, NULL, 0, NULL,
                                                    NULL)))
                {
                    vlog_error("FAILED -- could not execute kernel\n");
                    goto exit;
@@ -769,17 +941,17 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
                uint64_t endTime = GetTime();
                double time = SubtractTime(endTime, startTime);
                sum += time;
-                if( time < bestTime )
+                if (time < bestTime) bestTime = time;
                    bestTime = time;
            }
-            if( gReportAverageTimes )
+            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
-                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double)gDeviceFrequency
-            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+                * gComputeDevices * gSimdSize * 1e6
-            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sd%s", f->name, sizeNames[j] );
+                / (bufferSize / sizeof(double));
            vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sd%s",
                      f->name, sizeNames[j]);
        }
-        for( ; j < gMaxVectorSizeIndex; j++ )
+        for (; j < gMaxVectorSizeIndex; j++) vlog("\t     -- ");
            vlog( "\t     -- " );
    }
    if (!gSkipCorrectnessTesting)
@@ -796,6 +968,3 @@ exit:
    return error;
 }
--- a/test_conformance/math_brute_force/unary_u.cpp
+++ b/test_conformance/math_brute_force/unary_u.cpp
@@ -33,25 +33,38 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
 static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
                       cl_program *p, bool relaxedMode)
 {
-    const char *c[] = {
+    const char *c[] = { "__kernel void math_kernel",
-                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global uint", sizeNames[vectorSize], "* in)\n"
+                        sizeNames[vectorSize],
                        "( __global float",
                        sizeNames[vectorSize],
                        "* out, __global uint",
                        sizeNames[vectorSize],
                        "* in)\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
-                            "   out[i] = ", name, "( in[i] );\n"
+                        "   out[i] = ",
-                            "}\n"
+                        name,
-                        };
+                        "( in[i] );\n"
-    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global uint* in)\n"
+                        "}\n" };
    const char *c3[] = {
        "__kernel void math_kernel",
        sizeNames[vectorSize],
        "( __global float* out, __global uint* in)\n"
        "{\n"
        "   size_t i = get_global_id(0);\n"
        "   if( i + 1 < get_global_size(0) )\n"
        "   {\n"
        "       uint3 u0 = vload3( 0, in + 3 * i );\n"
-                            "       float3 f0 = ", name, "( u0 );\n"
+        "       float3 f0 = ",
        name,
        "( u0 );\n"
        "       vstore3( f0, 0, out + 3*i );\n"
        "   }\n"
        "   else\n"
        "   {\n"
-                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       size_t parity = i & 1;   // Figure out how many elements are "
        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
        "buffer size \n"
        "       uint3 u0;\n"
        "       float3 f0;\n"
        "       switch( parity )\n"
@@ -63,7 +76,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
        "               u0 = (uint3)( in[3*i], in[3*i+1], 0xdead ); \n"
        "               break;\n"
        "       }\n"
-                            "       f0 = ", name, "( u0 );\n"
+        "       f0 = ",
        name,
        "( u0 );\n"
        "       switch( parity )\n"
        "       {\n"
        "           case 0:\n"
@@ -87,7 +102,8 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
    }
    char testName[32];
-    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
@@ -95,39 +111,55 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
 static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
                             cl_program *p, bool relaxedMode)
 {
-    const char *c[] = {
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel",
-                "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global ulong", sizeNames[vectorSize], "* in)\n"
+                        sizeNames[vectorSize],
                        "( __global double",
                        sizeNames[vectorSize],
                        "* out, __global ulong",
                        sizeNames[vectorSize],
                        "* in)\n"
                        "{\n"
                        "   int i = get_global_id(0);\n"
-                            "   out[i] = ", name, "( in[i] );\n"
+                        "   out[i] = ",
-                            "}\n"
+                        name,
-                        };
+                        "( in[i] );\n"
                        "}\n" };
    const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global ulong* in)\n"
+                         "__kernel void math_kernel",
                         sizeNames[vectorSize],
                         "( __global double* out, __global ulong* in)\n"
                         "{\n"
                         "   size_t i = get_global_id(0);\n"
                         "   if( i + 1 < get_global_size(0) )\n"
                         "   {\n"
                         "       ulong3 u0 = vload3( 0, in + 3 * i );\n"
-                        "       double3 f0 = ", name, "( u0 );\n"
+                         "       double3 f0 = ",
                         name,
                         "( u0 );\n"
                         "       vstore3( f0, 0, out + 3*i );\n"
                         "   }\n"
                         "   else\n"
                         "   {\n"
-                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                         "       size_t parity = i & 1;   // Figure out how "
                         "many elements are left over after BUFFER_SIZE % "
                         "(3*sizeof(float)). Assume power of two buffer size \n"
                         "       ulong3 u0;\n"
                         "       switch( parity )\n"
                         "       {\n"
                         "           case 1:\n"
-                        "               u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, 0xdeaddeaddeaddeadUL ); \n"
+                         "               u0 = (ulong3)( in[3*i], "
                         "0xdeaddeaddeaddeadUL, 0xdeaddeaddeaddeadUL ); \n"
                         "               break;\n"
                         "           case 0:\n"
-                        "               u0 = (ulong3)( in[3*i], in[3*i+1], 0xdeaddeaddeaddeadUL ); \n"
+                         "               u0 = (ulong3)( in[3*i], in[3*i+1], "
                         "0xdeaddeaddeaddeadUL ); \n"
                         "               break;\n"
                         "       }\n"
-                        "       double3 f0 = ", name, "( u0 );\n"
+                         "       double3 f0 = ",
                         name,
                         "( u0 );\n"
                         "       switch( parity )\n"
                         "       {\n"
                         "           case 0:\n"
@@ -138,8 +170,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
                         "               break;\n"
                         "       }\n"
                         "   }\n"
-                        "}\n"
+                         "}\n" };
                    };
    const char **kern = c;
    size_t kernSize = sizeof(c) / sizeof(c[0]);
@@ -152,7 +183,8 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
    char testName[32];
-    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
             sizeNames[vectorSize]);
    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
@@ -166,8 +198,10 @@ typedef struct BuildKernelInfo
    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 } BuildKernelInfo;
-static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
-static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+                                  void *p);
 static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                  void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
@@ -175,8 +209,10 @@ static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, voi
                       info->programs + i, info->relaxedMode);
 }
-static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
-static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+                                   void *p);
 static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
                                   void *p)
 {
    BuildKernelInfo *info = (BuildKernelInfo *)p;
    cl_uint i = info->offset + job_id;
@@ -212,23 +248,29 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
    // Init the kernels
    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                   f->nameInCode, relaxedMode };
-    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+    if ((error = ThreadPool_Do(BuildKernel_FloatFn,
                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
                               &build_info)))
        return error;
    /*
        for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
-        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i,
-            return error;
+       programs + i) ) ) return error;
    */
    if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
    {
        isRangeLimited = 1;
-        half_sin_cos_tan_limit = 1.0f + float_ulps * (FLT_EPSILON/2.0f);             // out of range results from finite inputs must be in [-1,1]
+        half_sin_cos_tan_limit = 1.0f
            + float_ulps
                * (FLT_EPSILON / 2.0f); // out of range results from finite
                                        // inputs must be in [-1,1]
    }
    else if (0 == strcmp(f->name, "half_tan"))
    {
        isRangeLimited = 1;
-        half_sin_cos_tan_limit = INFINITY;             // out of range resut from finite inputs must be numeric
+        half_sin_cos_tan_limit =
            INFINITY; // out of range resut from finite inputs must be numeric
    }
@@ -246,7 +288,8 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
            for (j = 0; j < bufferSize / sizeof(float); j++)
                p[j] = (uint32_t)i + j;
        }
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL)))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -257,9 +300,12 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, bufferSize);
-            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            if ((error =
                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
                                          bufferSize, gOut[j], 0, NULL, NULL)))
            {
-                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
                           error, j);
                goto exit;
            }
        }
@@ -269,10 +315,22 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ))){ LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
-            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)))
+            if ((error =
                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                            &localCount, NULL, 0, NULL, NULL)))
            {
                vlog_error("FAILURE -- could not execute kernel\n");
                goto exit;
@@ -280,8 +338,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
        }
        // Get that moving
-        if( (error = clFlush(gQueue) ))
+        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
            vlog( "clFlush failed\n" );
        // Calculate the correctly rounded reference result
        float *r = (float *)gOut_Ref;
@@ -292,15 +349,16 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
        // Read the data back
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
                                         bufferSize, gOut[j], 0, NULL, NULL)))
            {
                vlog_error("ReadArray failed %d\n", error);
                goto exit;
            }
        }
-        if( gSkipCorrectnessTesting )
+        if (gSkipCorrectnessTesting) break;
            break;
        // Verify data
@@ -320,7 +378,9 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
                    int fail = !(fabsf(err) <= float_ulps);
                    // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
-                    if( isRangeLimited && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) && fabsf(s[j]) < INFINITY )
+                    if (isRangeLimited
                        && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16)
                        && fabsf(s[j]) < INFINITY)
                    {
                        if (fabsf(test) <= half_sin_cos_tan_limit)
                        {
@@ -337,8 +397,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
                            if (IsFloatResultSubnormal(correct, float_ulps))
                            {
                                fail = fail && (test != 0.0f);
-                                if( ! fail )
+                                if (!fail) err = 0.0f;
                                    err = 0.0f;
                            }
                        }
                    }
@@ -349,7 +408,10 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
                    }
                    if (fail)
                    {
-                        vlog_error( "\n%s%s: %f ulp error at 0x%8.8x: *%a vs. %a\n", f->name, sizeNames[k], err, ((uint32_t*) gIn)[j], ((float*) gOut_Ref)[j], test );
+                        vlog_error(
                            "\n%s%s: %f ulp error at 0x%8.8x: *%a vs. %a\n",
                            f->name, sizeNames[k], err, ((uint32_t *)gIn)[j],
                            ((float *)gOut_Ref)[j], test);
                        error = -1;
                        goto exit;
                    }
@@ -361,8 +423,10 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
        {
            if (gVerboseBruteForce)
            {
-               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-           } else
+                     bufferSize);
            }
            else
            {
                vlog(".");
            }
@@ -383,7 +447,8 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
    {
        // Init input array
        uint32_t *p = (uint32_t *)gIn;
-        if( strstr( f->name, "exp" ) || strstr( f->name, "sin" ) || strstr( f->name, "cos" ) || strstr( f->name, "tan" ) )
+        if (strstr(f->name, "exp") || strstr(f->name, "sin")
            || strstr(f->name, "cos") || strstr(f->name, "tan"))
            for (j = 0; j < bufferSize / sizeof(float); j++)
                ((float *)p)[j] = (float)genrand_real1(d);
        else if (strstr(f->name, "log"))
@@ -392,7 +457,8 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
        else
            for (j = 0; j < bufferSize / sizeof(float); j++)
                p[j] = genrand_int32(d);
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -404,15 +470,27 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            double sum = 0.0;
            double bestTime = INFINITY;
            for (k = 0; k < PERF_LOOP_COUNT; k++)
            {
                uint64_t startTime = GetTime();
-                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                                    &localCount, NULL, 0, NULL,
                                                    NULL)))
                {
                    vlog_error("FAILURE -- could not execute kernel\n");
                    goto exit;
@@ -428,19 +506,19 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
                uint64_t endTime = GetTime();
                double time = SubtractTime(endTime, startTime);
                sum += time;
-                if( time < bestTime )
+                if (time < bestTime) bestTime = time;
                    bestTime = time;
            }
-            if( gReportAverageTimes )
+            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
-                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double)gDeviceFrequency
-            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+                * gComputeDevices * gSimdSize * 1e6
-            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+                / (bufferSize / sizeof(float));
            vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
                      f->name, sizeNames[j]);
        }
    }
-    if( ! gSkipCorrectnessTesting )
+    if (!gSkipCorrectnessTesting) vlog("\t%8.2f @ %a", maxError, maxErrorVal);
        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
    vlog("\n");
 exit:
@@ -487,18 +565,18 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
    }
    /*
        for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
-        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels +
-            return error;
+       i, programs + i) ) ) return error;
    */
    for (i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        cl_ulong *p = (cl_ulong *)gIn;
-        for( j = 0; j < bufferSize / sizeof( cl_ulong ); j++ )
+        for (j = 0; j < bufferSize / sizeof(cl_ulong); j++) p[j] = random64(d);
            p[j] = random64(d);
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL)))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -509,9 +587,12 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, bufferSize);
-            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            if ((error =
                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
                                          bufferSize, gOut[j], 0, NULL, NULL)))
            {
-                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
                           error, j);
                goto exit;
            }
        }
@@ -521,10 +602,22 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ))){ LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
-            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)))
+            if ((error =
                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                            &localCount, NULL, 0, NULL, NULL)))
            {
                vlog_error("FAILURE -- could not execute kernel\n");
                goto exit;
@@ -532,8 +625,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
        }
        // Get that moving
-        if( (error = clFlush(gQueue) ))
+        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
            vlog( "clFlush failed\n" );
        // Calculate the correctly rounded reference result
        double *r = (double *)gOut_Ref;
@@ -544,15 +636,16 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
        // Read the data back
        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
-            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
                                         bufferSize, gOut[j], 0, NULL, NULL)))
            {
                vlog_error("ReadArray failed %d\n", error);
                goto exit;
            }
        }
-        if( gSkipCorrectnessTesting )
+        if (gSkipCorrectnessTesting) break;
            break;
        // Verify data
@@ -577,11 +670,11 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
                        if (ftz)
                        {
                            // retry per section 6.5.3.2
-                            if( IsDoubleResultSubnormal(correct, f->double_ulps) )
+                            if (IsDoubleResultSubnormal(correct,
                                                        f->double_ulps))
                            {
                                fail = fail && (test != 0.0);
-                                if( ! fail )
+                                if (!fail) err = 0.0f;
                                    err = 0.0f;
                            }
                        }
                    }
@@ -592,7 +685,11 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
                    }
                    if (fail)
                    {
-                        vlog_error( "\n%s%sD: %f ulp error at 0x%16.16llx: *%.13la vs. %.13la\n", f->name, sizeNames[k], err, ((uint64_t*) gIn)[j], ((double*) gOut_Ref)[j], test );
+                        vlog_error("\n%s%sD: %f ulp error at 0x%16.16llx: "
                                   "*%.13la vs. %.13la\n",
                                   f->name, sizeNames[k], err,
                                   ((uint64_t *)gIn)[j],
                                   ((double *)gOut_Ref)[j], test);
                        error = -1;
                        goto exit;
                    }
@@ -604,8 +701,10 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
        {
            if (gVerboseBruteForce)
            {
-               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-           } else
+                     bufferSize);
            }
            else
            {
                vlog(".");
            }
@@ -626,9 +725,9 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
        // Init input array
        double *p = (double *)gIn;
-        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        for (j = 0; j < bufferSize / sizeof(double); j++) p[j] = random64(d);
-            p[j] = random64(d);
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+                                          bufferSize, gIn, 0, NULL, NULL)))
        {
            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
            return error;
@@ -640,15 +739,27 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+                                        &gOutBuffer[j])))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
                                        &gInBuffer)))
            {
                LogBuildError(programs[j]);
                goto exit;
            }
            double sum = 0.0;
            double bestTime = INFINITY;
            for (k = 0; k < PERF_LOOP_COUNT; k++)
            {
                uint64_t startTime = GetTime();
-                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
                                                    &localCount, NULL, 0, NULL,
                                                    NULL)))
                {
                    vlog_error("FAILURE -- could not execute kernel\n");
                    goto exit;
@@ -664,21 +775,20 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
                uint64_t endTime = GetTime();
                double time = SubtractTime(endTime, startTime);
                sum += time;
-                if( time < bestTime )
+                if (time < bestTime) bestTime = time;
                    bestTime = time;
            }
-            if( gReportAverageTimes )
+            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
-                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double)gDeviceFrequency
-            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+                * gComputeDevices * gSimdSize * 1e6
-            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+                / (bufferSize / sizeof(double));
            vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
                      f->name, sizeNames[j]);
        }
-        for( ; j < gMaxVectorSizeIndex; j++ )
+        for (; j < gMaxVectorSizeIndex; j++) vlog("\t     -- ");
            vlog( "\t     -- " );
    }
-    if( ! gSkipCorrectnessTesting )
+    if (!gSkipCorrectnessTesting) vlog("\t%8.2f @ %a", maxError, maxErrorVal);
        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
    vlog("\n");
 exit:
@@ -691,4 +801,3 @@ exit:
    return error;
 }
`@@ -20,5 +20,3 @@ void PreventSleep( void );`
	`void ResumeSleep(void);`	`void ResumeSleep(void);`

	`#endif /* SLEEP_H */`	`#endif /* SLEEP_H */`
`@@ -234,5 +234,3 @@ long double reference_assignmentl( long double x );`
	`int reference_notl(long double x);`	`int reference_notl(long double x);`

	`#endif`	`#endif`