Reduce scope of variables (#1228)

Make variables local to loops, with appropriate types. These variables are not read after the loop without being reset first, so this patch doesn't change behaviour. These variables should now be used for one purpose only, making it easier to reason about the code. This will make future refactoring easier. Signed-off-by: Marco Antognini <marco.antognini@arm.com>
2026-03-19 06:09:01 +00:00 · 2021-04-28 09:30:51 +01:00
parent cba7a8a537
commit 01497c402e
26 changed files with 420 additions and 471 deletions
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -286,7 +286,6 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
    TestInfo test_info;
    cl_int error;
-    size_t i, j;
    float maxError = 0.0f;
    double maxErrorVal = 0.0;
    double maxErrorVal2 = 0.0;
@@ -321,7 +320,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)

    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
        test_info.k[i] = (cl_kernel *)malloc(array_size);
@@ -344,7 +343,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
    }
    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (i = 0; i < test_info.threadCount; i++)
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
    {
        cl_buffer_region region = {
            i * test_info.subBufferSize * sizeof(cl_double),
@@ -371,7 +370,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
        }

-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
@@ -413,7 +412,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);

        // Accumulate the arithmetic errors
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            if (test_info.tinfo[i].maxError > maxError)
            {
@@ -437,12 +436,12 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
        if (test_info.k[i])
        {
-            for (j = 0; j < test_info.threadCount; j++)
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
                clReleaseKernel(test_info.k[i][j]);

            free(test_info.k[i]);
@@ -450,12 +449,12 @@ exit:
    }
    if (test_info.tinfo)
    {
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            free_mtdata(test_info.tinfo[i].d);
            clReleaseMemObject(test_info.tinfo[i].inBuf);
            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
        }
@@ -477,7 +476,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    dptr func = job->f->dfunc;
    int ftz = job->ftz;
    MTdata d = tinfo->d;
-    cl_uint j, k;
    cl_int error;
    const char *name = job->f->name;

@@ -492,7 +490,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        out[j] = (cl_ulong *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
@@ -511,11 +509,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // Init input array
    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
    cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
-    j = 0;
+    cl_uint idx = 0;
    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;

-    if (job_id <= (cl_uint)indx)
+    if (job_id <= (cl_uint)lastSpecialJobIndex)
    { // test edge cases
        cl_double *fp = (cl_double *)p;
        cl_double *fp2 = (cl_double *)p2;
@@ -524,10 +522,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        x = (job_id * buffer_elements) % specialValuesCount;
        y = (job_id * buffer_elements) / specialValuesCount;

-        for (; j < buffer_elements; j++)
+        for (; idx < buffer_elements; idx++)
        {
-            fp[j] = specialValues[x];
-            fp2[j] = specialValues[y];
+            fp[idx] = specialValues[x];
+            fp2[idx] = specialValues[y];
            if (++x >= specialValuesCount)
            {
                x = 0;
@@ -538,10 +536,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    }

    // Init any remaining values.
-    for (; j < buffer_elements; j++)
+    for (; idx < buffer_elements; idx++)
    {
-        p[j] = genrand_int64(d);
-        p2[j] = genrand_int64(d);
+        p[idx] = genrand_int64(d);
+        p2[idx] = genrand_int64(d);
    }

    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -558,7 +556,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        goto exit;
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        // Wait for the map to finish
        if ((error = clWaitForEvents(1, e + j)))
@@ -626,12 +624,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
    s = (cl_double *)gIn + thread_id * buffer_elements;
    s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
        r[j] = (cl_double)func.f_ff(s[j], s2[j]);

    // Read the data back -- no need to wait for the first N-1 buffers but wait
    // for the last buffer. This is an in order queue.
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
        out[j] = (cl_ulong *)clEnqueueMapBuffer(
@@ -647,9 +645,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Verify data
    t = (cl_ulong *)r;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
    {
-        for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
        {
            cl_ulong *q = out[k];

@@ -794,7 +792,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -276,7 +276,6 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
    TestInfo test_info;
    cl_int error;
-    size_t i, j;
    float maxError = 0.0f;
    double maxErrorVal = 0.0;
    double maxErrorVal2 = 0.0;
@@ -313,7 +312,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)

    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
        test_info.k[i] = (cl_kernel *)malloc(array_size);
@@ -336,7 +335,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    }
    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (i = 0; i < test_info.threadCount; i++)
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
    {
        cl_buffer_region region = {
            i * test_info.subBufferSize * sizeof(cl_float),
@@ -363,7 +362,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
        }

-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
@@ -405,7 +404,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);

        // Accumulate the arithmetic errors
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            if (test_info.tinfo[i].maxError > maxError)
            {
@@ -429,12 +428,12 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
        if (test_info.k[i])
        {
-            for (j = 0; j < test_info.threadCount; j++)
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
                clReleaseKernel(test_info.k[i][j]);

            free(test_info.k[i]);
@@ -442,12 +441,12 @@ exit:
    }
    if (test_info.tinfo)
    {
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            free_mtdata(test_info.tinfo[i].d);
            clReleaseMemObject(test_info.tinfo[i].inBuf);
            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
        }
@@ -470,7 +469,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    bool relaxedMode = job->relaxedMode;
    float ulps = getAllowedUlpError(job->f, relaxedMode);
    MTdata d = tinfo->d;
-    cl_uint j, k;
    cl_int error;
    cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
    const char *name = job->f->name;
@@ -498,7 +496,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_uint *out[VECTOR_SIZE_COUNT];
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        out[j] = (cl_uint *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
@@ -517,12 +515,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // Init input array
    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
-    j = 0;
-
+    cl_uint idx = 0;
    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;

-    if (job_id <= (cl_uint)indx)
+    if (job_id <= (cl_uint)lastSpecialJobIndex)
    { // test edge cases
        float *fp = (float *)p;
        float *fp2 = (float *)p2;
@@ -531,10 +528,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        x = (job_id * buffer_elements) % specialValuesCount;
        y = (job_id * buffer_elements) / specialValuesCount;

-        for (; j < buffer_elements; j++)
+        for (; idx < buffer_elements; idx++)
        {
-            fp[j] = specialValues[x];
-            fp2[j] = specialValues[y];
+            fp[idx] = specialValues[x];
+            fp2[idx] = specialValues[y];
            ++x;
            if (x >= specialValuesCount)
            {
@@ -546,10 +543,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    }

    // Init any remaining values.
-    for (; j < buffer_elements; j++)
+    for (; idx < buffer_elements; idx++)
    {
-        p[j] = genrand_int32(d);
-        p2[j] = genrand_int32(d);
+        p[idx] = genrand_int32(d);
+        p2[idx] = genrand_int32(d);
    }

    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -566,7 +563,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        goto exit;
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        // Wait for the map to finish
        if ((error = clWaitForEvents(1, e + j)))
@@ -661,7 +658,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    s2 = (float *)gIn2 + thread_id * buffer_elements;
    if (skipNanInf)
    {
-        for (j = 0; j < buffer_elements; j++)
+        for (size_t j = 0; j < buffer_elements; j++)
        {
            feclearexcept(FE_OVERFLOW);
            r[j] = (float)ref_func(s[j], s2[j]);
@@ -671,7 +668,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    }
    else
    {
-        for (j = 0; j < buffer_elements; j++)
+        for (size_t j = 0; j < buffer_elements; j++)
            r[j] = (float)ref_func(s[j], s2[j]);
    }

@@ -679,7 +676,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Read the data back -- no need to wait for the first N-1 buffers but wait
    // for the last buffer. This is an in order queue.
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
        out[j] = (cl_uint *)clEnqueueMapBuffer(
@@ -697,9 +694,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    {
        // Verify data
        t = (cl_uint *)r;
-        for (j = 0; j < buffer_elements; j++)
+        for (size_t j = 0; j < buffer_elements; j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                cl_uint *q = out[k];

@@ -956,7 +953,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    if (isFDim && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -288,7 +288,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
 {
    TestInfo test_info;
    cl_int error;
-    size_t i, j;
    float maxError = 0.0f;
    double maxErrorVal = 0.0;
    cl_int maxErrorVal2 = 0;
@@ -319,7 +318,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)

    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
        test_info.k[i] = (cl_kernel *)malloc(array_size);
@@ -342,7 +341,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
    }
    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (i = 0; i < test_info.threadCount; i++)
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
    {
        cl_buffer_region region = {
            i * test_info.subBufferSize * sizeof(cl_double),
@@ -372,7 +371,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
        }

-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
@@ -414,7 +413,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);

        // Accumulate the arithmetic errors
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            if (test_info.tinfo[i].maxError > maxError)
            {
@@ -438,12 +437,12 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
        if (test_info.k[i])
        {
-            for (j = 0; j < test_info.threadCount; j++)
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
                clReleaseKernel(test_info.k[i][j]);

            free(test_info.k[i]);
@@ -451,12 +450,12 @@ exit:
    }
    if (test_info.tinfo)
    {
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            free_mtdata(test_info.tinfo[i].d);
            clReleaseMemObject(test_info.tinfo[i].inBuf);
            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
        }
@@ -478,7 +477,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    dptr func = job->f->dfunc;
    int ftz = job->ftz;
    MTdata d = tinfo->d;
-    cl_uint j, k;
    cl_int error;
    const char *name = job->f->name;
    cl_ulong *t;
@@ -491,7 +489,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        out[j] = (cl_ulong *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
@@ -510,11 +508,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // Init input array
    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
    cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
-    j = 0;
+    size_t idx = 0;
    int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
-    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;

-    if (job_id <= (cl_uint)indx)
+    if (job_id <= (cl_uint)lastSpecialJobIndex)
    { // test edge cases
        cl_double *fp = (cl_double *)p;
        cl_int *ip2 = (cl_int *)p2;
@@ -523,10 +521,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        x = (job_id * buffer_elements) % specialValuesCount;
        y = (job_id * buffer_elements) / specialValuesCount;

-        for (; j < buffer_elements; j++)
+        for (; idx < buffer_elements; idx++)
        {
-            fp[j] = specialValues[x];
-            ip2[j] = specialValuesInt[y];
+            fp[idx] = specialValues[x];
+            ip2[idx] = specialValuesInt[y];
            if (++x >= specialValuesCount)
            {
                x = 0;
@@ -537,10 +535,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    }

    // Init any remaining values.
-    for (; j < buffer_elements; j++)
+    for (; idx < buffer_elements; idx++)
    {
-        p[j] = DoubleFromUInt32(genrand_int32(d));
-        p2[j] = genrand_int32(d);
+        p[idx] = DoubleFromUInt32(genrand_int32(d));
+        p2[idx] = genrand_int32(d);
    }

    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -557,7 +555,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        goto exit;
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        // Wait for the map to finish
        if ((error = clWaitForEvents(1, e + j)))
@@ -625,12 +623,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
    s = (cl_double *)gIn + thread_id * buffer_elements;
    s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
        r[j] = (cl_double)func.f_fi(s[j], s2[j]);

    // Read the data back -- no need to wait for the first N-1 buffers but wait
    // for the last buffer. This is an in order queue.
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
        out[j] = (cl_ulong *)clEnqueueMapBuffer(
@@ -646,9 +644,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Verify data
    t = (cl_ulong *)r;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
    {
-        for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
        {
            cl_ulong *q = out[k];

@@ -713,7 +711,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -280,7 +280,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
 {
    TestInfo test_info;
    cl_int error;
-    size_t i, j;
    float maxError = 0.0f;
    double maxErrorVal = 0.0;
    cl_int maxErrorVal2 = 0;
@@ -312,7 +311,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)

    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
        test_info.k[i] = (cl_kernel *)malloc(array_size);
@@ -335,7 +334,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
    }
    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (i = 0; i < test_info.threadCount; i++)
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
    {
        cl_buffer_region region = {
            i * test_info.subBufferSize * sizeof(cl_float),
@@ -365,7 +364,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
        }

-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
@@ -407,7 +406,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);

        // Accumulate the arithmetic errors
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            if (test_info.tinfo[i].maxError > maxError)
            {
@@ -431,12 +430,12 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
        if (test_info.k[i])
        {
-            for (j = 0; j < test_info.threadCount; j++)
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
                clReleaseKernel(test_info.k[i][j]);

            free(test_info.k[i]);
@@ -444,12 +443,12 @@ exit:
    }
    if (test_info.tinfo)
    {
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            free_mtdata(test_info.tinfo[i].d);
            clReleaseMemObject(test_info.tinfo[i].inBuf);
            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
        }
@@ -471,7 +470,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    int ftz = job->ftz;
    float ulps = job->ulps;
    MTdata d = tinfo->d;
-    cl_uint j, k;
    cl_int error;
    const char *name = job->f->name;
    cl_uint *t = 0;
@@ -482,7 +480,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_uint *out[VECTOR_SIZE_COUNT];
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        out[j] = (cl_uint *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
@@ -501,12 +499,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // Init input array
    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
-    j = 0;
-
+    size_t idx = 0;
    int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
-    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;

-    if (job_id <= (cl_uint)indx)
+    if (job_id <= (cl_uint)lastSpecialJobIndex)
    { // test edge cases
        float *fp = (float *)p;
        cl_int *ip2 = (cl_int *)p2;
@@ -515,10 +512,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        x = (job_id * buffer_elements) % specialValuesCount;
        y = (job_id * buffer_elements) / specialValuesCount;

-        for (; j < buffer_elements; j++)
+        for (; idx < buffer_elements; idx++)
        {
-            fp[j] = specialValues[x];
-            ip2[j] = specialValuesInt[y];
+            fp[idx] = specialValues[x];
+            ip2[idx] = specialValuesInt[y];
            ++x;
            if (x >= specialValuesCount)
            {
@@ -530,10 +527,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    }

    // Init any remaining values.
-    for (; j < buffer_elements; j++)
+    for (; idx < buffer_elements; idx++)
    {
-        p[j] = genrand_int32(d);
-        p2[j] = genrand_int32(d);
+        p[idx] = genrand_int32(d);
+        p2[idx] = genrand_int32(d);
    }

    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -550,7 +547,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        goto exit;
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        // Wait for the map to finish
        if ((error = clWaitForEvents(1, e + j)))
@@ -618,11 +615,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    r = (float *)gOut_Ref + thread_id * buffer_elements;
    s = (float *)gIn + thread_id * buffer_elements;
    s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
-    for (j = 0; j < buffer_elements; j++) r[j] = (float)func.f_fi(s[j], s2[j]);
+    for (size_t j = 0; j < buffer_elements; j++)
+        r[j] = (float)func.f_fi(s[j], s2[j]);

    // Read the data back -- no need to wait for the first N-1 buffers but wait
    // for the last buffer. This is an in order queue.
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
        out[j] = (cl_uint *)clEnqueueMapBuffer(
@@ -638,9 +636,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Verify data
    t = (cl_uint *)r;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
    {
-        for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
        {
            cl_uint *q = out[k];

@@ -707,7 +705,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -284,7 +284,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
 {
    TestInfo test_info;
    cl_int error;
-    size_t i, j;
    float maxError = 0.0f;
    double maxErrorVal = 0.0;
    double maxErrorVal2 = 0.0;
@@ -315,7 +314,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,

    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
        test_info.k[i] = (cl_kernel *)malloc(array_size);
@@ -338,7 +337,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
    }
    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (i = 0; i < test_info.threadCount; i++)
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
    {
        cl_buffer_region region = {
            i * test_info.subBufferSize * sizeof(cl_double),
@@ -365,7 +364,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
            goto exit;
        }

-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
@@ -407,7 +406,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);

        // Accumulate the arithmetic errors
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            if (test_info.tinfo[i].maxError > maxError)
            {
@@ -431,12 +430,12 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,

 exit:
    // Release
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
        if (test_info.k[i])
        {
-            for (j = 0; j < test_info.threadCount; j++)
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
                clReleaseKernel(test_info.k[i][j]);

            free(test_info.k[i]);
@@ -444,12 +443,12 @@ exit:
    }
    if (test_info.tinfo)
    {
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            free_mtdata(test_info.tinfo[i].d);
            clReleaseMemObject(test_info.tinfo[i].inBuf);
            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
        }
@@ -472,7 +471,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    int ftz = job->ftz;
    bool relaxedMode = job->relaxedMode;
    MTdata d = tinfo->d;
-    cl_uint j, k;
    cl_int error;
    const char *name = job->f->name;
    cl_ulong *t;
@@ -485,7 +483,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        out[j] = (cl_ulong *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
@@ -504,11 +502,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // Init input array
    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
    cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
-    j = 0;
+    cl_uint idx = 0;
    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;

-    if (job_id <= (cl_uint)indx)
+    if (job_id <= (cl_uint)lastSpecialJobIndex)
    { // test edge cases
        cl_double *fp = (cl_double *)p;
        cl_double *fp2 = (cl_double *)p2;
@@ -517,10 +515,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        x = (job_id * buffer_elements) % specialValuesCount;
        y = (job_id * buffer_elements) / specialValuesCount;

-        for (; j < buffer_elements; j++)
+        for (; idx < buffer_elements; idx++)
        {
-            fp[j] = specialValues[x];
-            fp2[j] = specialValues[y];
+            fp[idx] = specialValues[x];
+            fp2[idx] = specialValues[y];
            if (++x >= specialValuesCount)
            {
                x = 0;
@@ -531,10 +529,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    }

    // Init any remaining values.
-    for (; j < buffer_elements; j++)
+    for (; idx < buffer_elements; idx++)
    {
-        p[j] = genrand_int64(d);
-        p2[j] = genrand_int64(d);
+        p[idx] = genrand_int64(d);
+        p2[idx] = genrand_int64(d);
    }

    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -551,7 +549,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        goto exit;
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        // Wait for the map to finish
        if ((error = clWaitForEvents(1, e + j)))
@@ -619,12 +617,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
    s = (cl_double *)gIn + thread_id * buffer_elements;
    s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
        r[j] = (cl_double)func.f_ff(s[j], s2[j]);

    // Read the data back -- no need to wait for the first N-1 buffers but wait
    // for the last buffer. This is an in order queue.
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
        out[j] = (cl_ulong *)clEnqueueMapBuffer(
@@ -640,9 +638,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Verify data
    t = (cl_ulong *)r;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
    {
-        for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
        {
            cl_ulong *q = out[k];

@@ -763,7 +761,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -274,7 +274,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
 {
    TestInfo test_info;
    cl_int error;
-    size_t i, j;
    float maxError = 0.0f;
    double maxErrorVal = 0.0;
    double maxErrorVal2 = 0.0;
@@ -307,7 +306,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,

    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
        test_info.k[i] = (cl_kernel *)malloc(array_size);
@@ -330,7 +329,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
    }
    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (i = 0; i < test_info.threadCount; i++)
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
    {
        cl_buffer_region region = {
            i * test_info.subBufferSize * sizeof(cl_float),
@@ -357,7 +356,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
            goto exit;
        }

-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
@@ -399,7 +398,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);

        // Accumulate the arithmetic errors
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            if (test_info.tinfo[i].maxError > maxError)
            {
@@ -423,12 +422,12 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,

 exit:
    // Release
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
        if (test_info.k[i])
        {
-            for (j = 0; j < test_info.threadCount; j++)
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
                clReleaseKernel(test_info.k[i][j]);

            free(test_info.k[i]);
@@ -436,12 +435,12 @@ exit:
    }
    if (test_info.tinfo)
    {
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            free_mtdata(test_info.tinfo[i].d);
            clReleaseMemObject(test_info.tinfo[i].inBuf);
            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
        }
@@ -464,7 +463,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    bool relaxedMode = job->relaxedMode;
    float ulps = getAllowedUlpError(job->f, relaxedMode);
    MTdata d = tinfo->d;
-    cl_uint j, k;
    cl_int error;
    cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
    const char *name = job->f->name;
@@ -482,7 +480,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_uint *out[VECTOR_SIZE_COUNT];
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        out[j] = (cl_uint *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
@@ -501,12 +499,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // Init input array
    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
-    j = 0;
-
+    cl_uint idx = 0;
    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;

-    if (job_id <= (cl_uint)indx)
+    if (job_id <= (cl_uint)lastSpecialJobIndex)
    {
        // Insert special values
        uint32_t x, y;
@@ -514,10 +511,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        x = (job_id * buffer_elements) % specialValuesCount;
        y = (job_id * buffer_elements) / specialValuesCount;

-        for (; j < buffer_elements; j++)
+        for (; idx < buffer_elements; idx++)
        {
-            p[j] = ((cl_uint *)specialValues)[x];
-            p2[j] = ((cl_uint *)specialValues)[y];
+            p[idx] = ((cl_uint *)specialValues)[x];
+            p2[idx] = ((cl_uint *)specialValues)[y];
            ++x;
            if (x >= specialValuesCount)
            {
@@ -527,28 +524,28 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
            }
            if (relaxedMode && strcmp(name, "divide") == 0)
            {
-                cl_uint pj = p[j] & 0x7fffffff;
-                cl_uint p2j = p2[j] & 0x7fffffff;
+                cl_uint pj = p[idx] & 0x7fffffff;
+                cl_uint p2j = p2[idx] & 0x7fffffff;
                // Replace values outside [2^-62, 2^62] with QNaN
-                if (pj < 0x20800000 || pj > 0x5e800000) p[j] = 0x7fc00000;
-                if (p2j < 0x20800000 || p2j > 0x5e800000) p2[j] = 0x7fc00000;
+                if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
+                if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
            }
        }
    }

    // Init any remaining values.
-    for (; j < buffer_elements; j++)
+    for (; idx < buffer_elements; idx++)
    {
-        p[j] = genrand_int32(d);
-        p2[j] = genrand_int32(d);
+        p[idx] = genrand_int32(d);
+        p2[idx] = genrand_int32(d);

        if (relaxedMode && strcmp(name, "divide") == 0)
        {
-            cl_uint pj = p[j] & 0x7fffffff;
-            cl_uint p2j = p2[j] & 0x7fffffff;
+            cl_uint pj = p[idx] & 0x7fffffff;
+            cl_uint p2j = p2[idx] & 0x7fffffff;
            // Replace values outside [2^-62, 2^62] with QNaN
-            if (pj < 0x20800000 || pj > 0x5e800000) p[j] = 0x7fc00000;
-            if (p2j < 0x20800000 || p2j > 0x5e800000) p2[j] = 0x7fc00000;
+            if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
+            if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
        }
    }

@@ -566,7 +563,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        goto exit;
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        // Wait for the map to finish
        if ((error = clWaitForEvents(1, e + j)))
@@ -649,12 +646,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    s2 = (float *)gIn2 + thread_id * buffer_elements;
    if (gInfNanSupport)
    {
-        for (j = 0; j < buffer_elements; j++)
+        for (size_t j = 0; j < buffer_elements; j++)
            r[j] = (float)func.f_ff(s[j], s2[j]);
    }
    else
    {
-        for (j = 0; j < buffer_elements; j++)
+        for (size_t j = 0; j < buffer_elements; j++)
        {
            feclearexcept(FE_OVERFLOW);
            r[j] = (float)func.f_ff(s[j], s2[j]);
@@ -669,7 +666,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Read the data back -- no need to wait for the first N-1 buffers but wait
    // for the last buffer. This is an in order queue.
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
        out[j] = (cl_uint *)clEnqueueMapBuffer(
@@ -685,9 +682,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Verify data
    t = (cl_uint *)r;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
    {
-        for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
        {
            cl_uint *q = out[k];

@@ -892,7 +889,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -154,13 +154,12 @@ static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
    double *r = cri->r + off;
    int *i = cri->i + off;
    long double (*f)(long double, long double, int *) = cri->f_ffpI;
-    cl_uint j;

    if (off + count > lim) count = lim - off;

    Force64BitFPUPrecision();

-    for (j = 0; j < count; ++j)
+    for (cl_uint j = 0; j < count; ++j)
        r[j] = (double)f((long double)x[j], (long double)y[j], i + j);

    return CL_SUCCESS;
@@ -168,8 +167,6 @@ static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)

 int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
@@ -198,12 +195,12 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        double *p = (double *)gIn;
        double *p2 = (double *)gIn2;
-        for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
        {
            p[j] = DoubleFromUInt32(genrand_int32(d));
            p2[j] = DoubleFromUInt32(genrand_int32(d));
@@ -224,7 +221,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -249,7 +246,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
@@ -311,12 +308,12 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
        {
            double *r = (double *)gOut_Ref;
            int *r2 = (int *)gOut_Ref2;
-            for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
                r[j] = (double)f->dfunc.f_ffpI(s[j], s2[j], r2 + j);
        }

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -339,9 +336,9 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
        // Verify data
        uint64_t *t = (uint64_t *)gOut_Ref;
        int32_t *t2 = (int32_t *)gOut_Ref2;
-        for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                uint64_t *q = (uint64_t *)gOut[k];
                int32_t *q2 = (int32_t *)gOut2[k];
@@ -572,7 +569,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -152,11 +152,10 @@ static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
    float *r = cri->r + off;
    int *i = cri->i + off;
    double (*f)(double, double, int *) = cri->f_ffpI;
-    cl_uint j;

    if (off + count > lim) count = lim - off;

-    for (j = 0; j < count; ++j)
+    for (cl_uint j = 0; j < count; ++j)
        r[j] = (float)f((double)x[j], (double)y[j], i + j);

    return CL_SUCCESS;
@@ -164,8 +163,6 @@ static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)

 int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;

    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
@@ -199,12 +196,12 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        cl_uint *p = (cl_uint *)gIn;
        cl_uint *p2 = (cl_uint *)gIn2;
-        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
        {
            p[j] = genrand_int32(d);
            p2[j] = genrand_int32(d);
@@ -225,7 +222,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -250,7 +247,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
@@ -312,12 +309,12 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        {
            float *r = (float *)gOut_Ref;
            int *r2 = (int *)gOut_Ref2;
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
                r[j] = (float)f->func.f_ffpI(s[j], s2[j], r2 + j);
        }

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -340,9 +337,9 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        // Verify data
        uint32_t *t = (uint32_t *)gOut_Ref;
        int32_t *t2 = (int32_t *)gOut_Ref2;
-        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                uint32_t *q = (uint32_t *)(gOut[k]);
                int32_t *q2 = (int32_t *)gOut2[k];
@@ -557,7 +554,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -119,8 +119,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)

 int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
@@ -148,18 +146,18 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        double *p = (double *)gIn;
        if (gWimpyMode)
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
                p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
        }
        else
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
                p[j] = DoubleFromUInt32((uint32_t)i + j);
        }

@@ -171,7 +169,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -186,7 +184,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
@@ -219,11 +217,11 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
        // Calculate the correctly rounded reference result
        int *r = (int *)gOut_Ref;
        double *s = (double *)gIn;
-        for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
            r[j] = f->dfunc.i_f(s[j]);

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -238,9 +236,9 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)

        // Verify data
        uint32_t *t = (uint32_t *)gOut_Ref;
-        for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                uint32_t *q = (uint32_t *)(gOut[k]);
                // If we aren't getting the correctly rounded result
@@ -294,7 +292,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 exit:
    RestoreFPState(&oldMode);
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -117,8 +117,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)

 int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
@@ -145,18 +143,18 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        cl_uint *p = (cl_uint *)gIn;
        if (gWimpyMode)
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
                p[j] = (cl_uint)i + j * scale;
        }
        else
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
                p[j] = (uint32_t)i + j;
        }

@@ -168,7 +166,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -183,7 +181,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
@@ -216,11 +214,11 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
        // Calculate the correctly rounded reference result
        int *r = (int *)gOut_Ref;
        float *s = (float *)gIn;
-        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
            r[j] = f->func.i_f(s[j]);

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -235,9 +233,9 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)

        // Verify data
        uint32_t *t = (uint32_t *)gOut_Ref;
-        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                uint32_t *q = (uint32_t *)(gOut[k]);
                // If we aren't getting the correctly rounded result
@@ -290,7 +288,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 exit:
    RestoreFPState(&oldMode);
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -273,7 +273,6 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
    TestInfo test_info;
    cl_int error;
-    size_t i, j;

    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);

@@ -300,7 +299,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)

    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
        test_info.k[i] = (cl_kernel *)malloc(array_size);
@@ -323,7 +322,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
    }
    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (i = 0; i < test_info.threadCount; i++)
+    for (size_t i = 0; i < test_info.threadCount; i++)
    {
        cl_buffer_region region = {
            i * test_info.subBufferSize * sizeof(cl_double),
@@ -350,7 +349,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
        }

-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
@@ -403,12 +402,12 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
        if (test_info.k[i])
        {
-            for (j = 0; j < test_info.threadCount; j++)
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
                clReleaseKernel(test_info.k[i][j]);

            free(test_info.k[i]);
@@ -416,12 +415,12 @@ exit:
    }
    if (test_info.tinfo)
    {
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            free_mtdata(test_info.tinfo[i].d);
            clReleaseMemObject(test_info.tinfo[i].inBuf);
            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
        }
@@ -442,7 +441,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    dptr dfunc = job->f->dfunc;
    int ftz = job->ftz;
    MTdata d = tinfo->d;
-    cl_uint j, k;
    cl_int error;
    const char *name = job->f->name;
    cl_long *t;
@@ -455,7 +453,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_long *out[VECTOR_SIZE_COUNT];
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        out[j] = (cl_long *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
@@ -474,21 +472,21 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // Init input array
    double *p = (double *)gIn + thread_id * buffer_elements;
    double *p2 = (double *)gIn2 + thread_id * buffer_elements;
-    j = 0;
+    cl_uint idx = 0;
    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;

-    if (job_id <= (cl_uint)indx)
+    if (job_id <= (cl_uint)lastSpecialJobIndex)
    { // test edge cases
        uint32_t x, y;

        x = (job_id * buffer_elements) % specialValuesCount;
        y = (job_id * buffer_elements) / specialValuesCount;

-        for (; j < buffer_elements; j++)
+        for (; idx < buffer_elements; idx++)
        {
-            p[j] = specialValues[x];
-            p2[j] = specialValues[y];
+            p[idx] = specialValues[x];
+            p2[idx] = specialValues[y];
            if (++x >= specialValuesCount)
            {
                x = 0;
@@ -499,10 +497,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    }

    // Init any remaining values.
-    for (; j < buffer_elements; j++)
+    for (; idx < buffer_elements; idx++)
    {
-        ((cl_ulong *)p)[j] = genrand_int64(d);
-        ((cl_ulong *)p2)[j] = genrand_int64(d);
+        ((cl_ulong *)p)[idx] = genrand_int64(d);
+        ((cl_ulong *)p2)[idx] = genrand_int64(d);
    }

    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -519,7 +517,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        goto exit;
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        // Wait for the map to finish
        if ((error = clWaitForEvents(1, e + j)))
@@ -587,11 +585,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
    s = (cl_double *)gIn + thread_id * buffer_elements;
    s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
-    for (j = 0; j < buffer_elements; j++) r[j] = dfunc.i_ff(s[j], s2[j]);
+    for (size_t j = 0; j < buffer_elements; j++) r[j] = dfunc.i_ff(s[j], s2[j]);

    // Read the data back -- no need to wait for the first N-1 buffers but wait
    // for the last buffer. This is an in order queue.
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
        out[j] = (cl_long *)clEnqueueMapBuffer(
@@ -607,7 +605,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Verify data
    t = (cl_long *)r;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
    {
        cl_long *q = out[0];

@@ -656,7 +654,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        }


-        for (k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
+        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
        {
            q = (cl_long *)out[k];
            // If we aren't getting the correctly rounded result
@@ -704,7 +702,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -263,7 +263,6 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
    TestInfo test_info;
    cl_int error;
-    size_t i, j;

    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);

@@ -291,7 +290,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)

    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
        test_info.k[i] = (cl_kernel *)malloc(array_size);
@@ -314,7 +313,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    }
    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (i = 0; i < test_info.threadCount; i++)
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
    {
        cl_buffer_region region = {
            i * test_info.subBufferSize * sizeof(cl_float),
@@ -341,7 +340,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
        }

-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
@@ -394,12 +393,12 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
        if (test_info.k[i])
        {
-            for (j = 0; j < test_info.threadCount; j++)
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
                clReleaseKernel(test_info.k[i][j]);

            free(test_info.k[i]);
@@ -407,12 +406,12 @@ exit:
    }
    if (test_info.tinfo)
    {
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            free_mtdata(test_info.tinfo[i].d);
            clReleaseMemObject(test_info.tinfo[i].inBuf);
            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
        }
@@ -433,7 +432,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    fptr func = job->f->func;
    int ftz = job->ftz;
    MTdata d = tinfo->d;
-    cl_uint j, k;
    cl_int error;
    const char *name = job->f->name;
    cl_int *t = 0;
@@ -444,7 +442,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_int *out[VECTOR_SIZE_COUNT];
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        out[j] = (cl_int *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
@@ -463,12 +461,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // Init input array
    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
-    j = 0;
+    cl_uint idx = 0;

    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;

-    if (job_id <= (cl_uint)indx)
+    if (job_id <= (cl_uint)lastSpecialJobIndex)
    { // test edge cases
        float *fp = (float *)p;
        float *fp2 = (float *)p2;
@@ -477,10 +475,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        x = (job_id * buffer_elements) % specialValuesCount;
        y = (job_id * buffer_elements) / specialValuesCount;

-        for (; j < buffer_elements; j++)
+        for (; idx < buffer_elements; idx++)
        {
-            fp[j] = specialValues[x];
-            fp2[j] = specialValues[y];
+            fp[idx] = specialValues[x];
+            fp2[idx] = specialValues[y];
            ++x;
            if (x >= specialValuesCount)
            {
@@ -492,10 +490,10 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    }

    // Init any remaining values.
-    for (; j < buffer_elements; j++)
+    for (; idx < buffer_elements; idx++)
    {
-        p[j] = genrand_int32(d);
-        p2[j] = genrand_int32(d);
+        p[idx] = genrand_int32(d);
+        p2[idx] = genrand_int32(d);
    }

    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -512,7 +510,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        goto exit;
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        // Wait for the map to finish
        if ((error = clWaitForEvents(1, e + j)))
@@ -580,11 +578,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
    s = (float *)gIn + thread_id * buffer_elements;
    s2 = (float *)gIn2 + thread_id * buffer_elements;
-    for (j = 0; j < buffer_elements; j++) r[j] = func.i_ff(s[j], s2[j]);
+    for (size_t j = 0; j < buffer_elements; j++) r[j] = func.i_ff(s[j], s2[j]);

    // Read the data back -- no need to wait for the first N-1 buffers but wait
    // for the last buffer. This is an in order queue.
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
        out[j] = (cl_int *)clEnqueueMapBuffer(
@@ -600,7 +598,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Verify data
    t = (cl_int *)r;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
    {
        cl_int *q = out[0];

@@ -646,7 +644,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
            goto exit;
        }

-        for (k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
+        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
        {
            q = out[k];
            // If we aren't getting the correctly rounded result
@@ -693,7 +691,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -151,7 +151,6 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 {
    TestInfo test_info;
    cl_int error;
-    size_t i, j;

    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);

@@ -178,7 +177,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)

    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
        test_info.k[i] = (cl_kernel *)malloc(array_size);
@@ -201,7 +200,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
    }
    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (i = 0; i < test_info.threadCount; i++)
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
    {
        cl_buffer_region region = {
            i * test_info.subBufferSize * sizeof(cl_double),
@@ -218,7 +217,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
        }

-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
@@ -269,12 +268,12 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
        if (test_info.k[i])
        {
-            for (j = 0; j < test_info.threadCount; j++)
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
                clReleaseKernel(test_info.k[i][j]);

            free(test_info.k[i]);
@@ -282,10 +281,10 @@ exit:
    }
    if (test_info.tinfo)
    {
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
        }
@@ -306,7 +305,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    ThreadInfo *tinfo = job->tinfo + thread_id;
    dptr dfunc = job->f->dfunc;
    int ftz = job->ftz;
-    cl_uint j, k;
    cl_int error;
    const char *name = job->f->name;

@@ -315,7 +313,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_long *out[VECTOR_SIZE_COUNT];
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        out[j] = (cl_long *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
@@ -333,7 +331,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Write the new values to the input array
    cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
        p[j] = DoubleFromUInt32(base + j * scale);

    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -343,7 +341,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        return error;
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        // Wait for the map to finish
        if ((error = clWaitForEvents(1, e + j)))
@@ -404,11 +402,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // Calculate the correctly rounded reference result
    cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
    cl_double *s = (cl_double *)p;
-    for (j = 0; j < buffer_elements; j++) r[j] = dfunc.i_f(s[j]);
+    for (size_t j = 0; j < buffer_elements; j++) r[j] = dfunc.i_f(s[j]);

    // Read the data back -- no need to wait for the first N-1 buffers but wait
    // for the last buffer. This is an in order queue.
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
        out[j] = (cl_long *)clEnqueueMapBuffer(
@@ -424,7 +422,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Verify data
    cl_long *t = (cl_long *)r;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
    {
        cl_long *q = out[0];

@@ -450,7 +448,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        }


-        for (k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
+        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
        {
            q = out[k];
            // If we aren't getting the correctly rounded result
@@ -476,7 +474,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -150,7 +150,6 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 {
    TestInfo test_info;
    cl_int error;
-    size_t i, j;

    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);

@@ -178,7 +177,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)

    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
        test_info.k[i] = (cl_kernel *)malloc(array_size);
@@ -201,7 +200,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
    }
    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (i = 0; i < test_info.threadCount; i++)
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
    {
        cl_buffer_region region = {
            i * test_info.subBufferSize * sizeof(cl_float),
@@ -218,7 +217,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
        }

-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
@@ -269,12 +268,12 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
        if (test_info.k[i])
        {
-            for (j = 0; j < test_info.threadCount; j++)
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
                clReleaseKernel(test_info.k[i][j]);

            free(test_info.k[i]);
@@ -282,10 +281,10 @@ exit:
    }
    if (test_info.tinfo)
    {
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
        }
@@ -306,7 +305,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    ThreadInfo *tinfo = job->tinfo + thread_id;
    fptr func = job->f->func;
    int ftz = job->ftz;
-    cl_uint j, k;
    cl_int error = CL_SUCCESS;
    cl_int ret = CL_SUCCESS;
    const char *name = job->f->name;
@@ -319,7 +317,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_int *out[VECTOR_SIZE_COUNT];
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        out[j] = (cl_int *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
@@ -337,7 +335,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Init input array
    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    for (j = 0; j < buffer_elements; j++) p[j] = base + j * scale;
+    for (size_t j = 0; j < buffer_elements; j++) p[j] = base + j * scale;

    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
                                      buffer_size, p, 0, NULL, NULL)))
@@ -346,7 +344,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        return error;
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        // Wait for the map to finish
        if ((error = clWaitForEvents(1, e + j)))
@@ -407,11 +405,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // Calculate the correctly rounded reference result
    cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
    float *s = (float *)p;
-    for (j = 0; j < buffer_elements; j++) r[j] = ref_func(s[j]);
+    for (size_t j = 0; j < buffer_elements; j++) r[j] = ref_func(s[j]);

    // Read the data back -- no need to wait for the first N-1 buffers but wait
    // for the last buffer. This is an in order queue.
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
        out[j] = (cl_int *)clEnqueueMapBuffer(
@@ -427,9 +425,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Verify data
    cl_int *t = (cl_int *)r;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
    {
-        for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
        {
            cl_int *q = out[0];

@@ -456,7 +454,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
            }


-            for (k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
+            for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
+                 k++)
            {
                q = out[k];
                // If we aren't getting the correctly rounded result
@@ -486,7 +485,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

 exit:
    ret = error;
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ b/test_conformance/math_brute_force/mad_double.cpp
@@ -132,8 +132,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)

 int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
@@ -155,13 +153,13 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        double *p = (double *)gIn;
        double *p2 = (double *)gIn2;
        double *p3 = (double *)gIn3;
-        for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
        {
            p[j] = DoubleFromUInt32(genrand_int32(d));
            p2[j] = DoubleFromUInt32(genrand_int32(d));
@@ -190,7 +188,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -205,7 +203,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
@@ -252,11 +250,11 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
        double *s = (double *)gIn;
        double *s2 = (double *)gIn2;
        double *s3 = (double *)gIn3;
-        for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
            r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -293,7 +291,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ b/test_conformance/math_brute_force/mad_float.cpp
@@ -130,8 +130,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)

 int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;

    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
@@ -154,13 +152,13 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        cl_uint *p = (cl_uint *)gIn;
        cl_uint *p2 = (cl_uint *)gIn2;
        cl_uint *p3 = (cl_uint *)gIn3;
-        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
        {
            p[j] = genrand_int32(d);
            p2[j] = genrand_int32(d);
@@ -189,7 +187,7 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -204,7 +202,7 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
@@ -251,11 +249,11 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
        float *s = (float *)gIn;
        float *s2 = (float *)gIn2;
        float *s3 = (float *)gIn3;
-        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
            r[j] = (float)f->func.f_fff(s[j], s2[j], s3[j]);

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -292,7 +290,7 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -208,8 +208,6 @@ static const size_t specialValuesCount =
 int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
                                         bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
@@ -234,22 +232,23 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        double *p = (double *)gIn;
        double *p2 = (double *)gIn2;
        double *p3 = (double *)gIn3;
-        j = 0;
+        size_t idx = 0;
+
        if (i == 0)
        { // test edge cases
            uint32_t x, y, z;
            x = y = z = 0;
-            for (; j < BUFFER_SIZE / sizeof(double); j++)
+            for (; idx < BUFFER_SIZE / sizeof(double); idx++)
            {
-                p[j] = specialValues[x];
-                p2[j] = specialValues[y];
-                p3[j] = specialValues[z];
+                p[idx] = specialValues[x];
+                p2[idx] = specialValues[y];
+                p3[idx] = specialValues[z];
                if (++x >= specialValuesCount)
                {
                    x = 0;
@@ -260,15 +259,15 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
                    }
                }
            }
-            if (j == BUFFER_SIZE / sizeof(double))
+            if (idx == BUFFER_SIZE / sizeof(double))
                vlog_error("Test Error: not all special cases tested!\n");
        }

-        for (; j < BUFFER_SIZE / sizeof(double); j++)
+        for (; idx < BUFFER_SIZE / sizeof(double); idx++)
        {
-            p[j] = DoubleFromUInt32(genrand_int32(d));
-            p2[j] = DoubleFromUInt32(genrand_int32(d));
-            p3[j] = DoubleFromUInt32(genrand_int32(d));
+            p[idx] = DoubleFromUInt32(genrand_int32(d));
+            p2[idx] = DoubleFromUInt32(genrand_int32(d));
+            p3[idx] = DoubleFromUInt32(genrand_int32(d));
        }

        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
@@ -293,7 +292,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -308,7 +307,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
@@ -355,11 +354,11 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
        double *s = (double *)gIn;
        double *s2 = (double *)gIn2;
        double *s3 = (double *)gIn3;
-        for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
            r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -374,9 +373,9 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,

        // Verify data
        uint64_t *t = (uint64_t *)gOut_Ref;
-        for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                uint64_t *q = (uint64_t *)(gOut[k]);

@@ -731,7 +730,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,

 exit:
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -215,8 +215,6 @@ static const size_t specialValuesCount =

 int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;

    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
@@ -250,13 +248,14 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        cl_uint *p = (cl_uint *)gIn;
        cl_uint *p2 = (cl_uint *)gIn2;
        cl_uint *p3 = (cl_uint *)gIn3;
-        j = 0;
+        size_t idx = 0;
+
        if (i == 0)
        { // test edge cases
            float *fp = (float *)gIn;
@@ -264,11 +263,11 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
            float *fp3 = (float *)gIn3;
            uint32_t x, y, z;
            x = y = z = 0;
-            for (; j < BUFFER_SIZE / sizeof(float); j++)
+            for (; idx < BUFFER_SIZE / sizeof(float); idx++)
            {
-                fp[j] = specialValues[x];
-                fp2[j] = specialValues[y];
-                fp3[j] = specialValues[z];
+                fp[idx] = specialValues[x];
+                fp2[idx] = specialValues[y];
+                fp3[idx] = specialValues[z];

                if (++x >= specialValuesCount)
                {
@@ -280,15 +279,15 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                    }
                }
            }
-            if (j == BUFFER_SIZE / sizeof(float))
+            if (idx == BUFFER_SIZE / sizeof(float))
                vlog_error("Test Error: not all special cases tested!\n");
        }

-        for (; j < BUFFER_SIZE / sizeof(float); j++)
+        for (; idx < BUFFER_SIZE / sizeof(float); idx++)
        {
-            p[j] = genrand_int32(d);
-            p2[j] = genrand_int32(d);
-            p3[j] = genrand_int32(d);
+            p[idx] = genrand_int32(d);
+            p2[idx] = genrand_int32(d);
+            p3[idx] = genrand_int32(d);
        }

        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
@@ -313,7 +312,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -328,7 +327,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
@@ -377,7 +376,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        float *s3 = (float *)gIn3;
        if (skipNanInf)
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
            {
                feclearexcept(FE_OVERFLOW);
                r[j] =
@@ -388,13 +387,13 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        }
        else
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
                r[j] =
                    (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
        }

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -409,9 +408,9 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)

        // Verify data
        uint32_t *t = (uint32_t *)gOut_Ref;
-        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                uint32_t *q = (uint32_t *)(gOut[k]);

@@ -866,7 +865,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -159,7 +159,6 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
    TestInfo test_info;
    cl_int error;
-    size_t i, j;
    float maxError = 0.0f;
    double maxErrorVal = 0.0;

@@ -189,7 +188,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)

    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
        test_info.k[i] = (cl_kernel *)malloc(array_size);
@@ -212,7 +211,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
    }
    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (i = 0; i < test_info.threadCount; i++)
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
    {
        cl_buffer_region region = {
            i * test_info.subBufferSize * sizeof(cl_double),
@@ -229,7 +228,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
        }

-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
@@ -269,7 +268,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);

        // Accumulate the arithmetic errors
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            if (test_info.tinfo[i].maxError > maxError)
            {
@@ -292,12 +291,12 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
        if (test_info.k[i])
        {
-            for (j = 0; j < test_info.threadCount; j++)
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
                clReleaseKernel(test_info.k[i][j]);

            free(test_info.k[i]);
@@ -305,10 +304,10 @@ exit:
    }
    if (test_info.tinfo)
    {
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
        }
@@ -329,7 +328,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    ThreadInfo *tinfo = job->tinfo + thread_id;
    float ulps = job->ulps;
    dptr func = job->f->dfunc;
-    cl_uint j, k;
    cl_int error;
    int ftz = job->ftz;

@@ -338,7 +336,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        out[j] = (cl_ulong *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
@@ -356,7 +354,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Write the new values to the input array
    cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
        p[j] = DoubleFromUInt32(base + j * scale);

    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -366,7 +364,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        return error;
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        // Wait for the map to finish
        if ((error = clWaitForEvents(1, e + j)))
@@ -428,11 +426,12 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // Calculate the correctly rounded reference result
    cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
    cl_double *s = (cl_double *)p;
-    for (j = 0; j < buffer_elements; j++) r[j] = (cl_double)func.f_f(s[j]);
+    for (size_t j = 0; j < buffer_elements; j++)
+        r[j] = (cl_double)func.f_f(s[j]);

    // Read the data back -- no need to wait for the first N-1 buffers but wait
    // for the last buffer. This is an in order queue.
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
        out[j] = (cl_ulong *)clEnqueueMapBuffer(
@@ -448,9 +447,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Verify data
    cl_ulong *t = (cl_ulong *)r;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
    {
-        for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
        {
            cl_ulong *q = out[k];

@@ -516,7 +515,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -157,7 +157,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
    TestInfo test_info;
    cl_int error;
-    size_t i, j;
    float maxError = 0.0f;
    double maxErrorVal = 0.0;
    int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
@@ -189,7 +188,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    test_info.relaxedMode = relaxedMode;
    // cl_kernels aren't thread safe, so we make one for each vector size for
    // every thread
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
        test_info.k[i] = (cl_kernel *)malloc(array_size);
@@ -212,7 +211,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
    }
    memset(test_info.tinfo, 0,
           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (i = 0; i < test_info.threadCount; i++)
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
    {
        cl_buffer_region region = {
            i * test_info.subBufferSize * sizeof(cl_float),
@@ -229,7 +228,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
            goto exit;
        }

-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
@@ -287,7 +286,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);

        // Accumulate the arithmetic errors
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            if (test_info.tinfo[i].maxError > maxError)
            {
@@ -316,12 +315,12 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
    {
        clReleaseProgram(test_info.programs[i]);
        if (test_info.k[i])
        {
-            for (j = 0; j < test_info.threadCount; j++)
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
                clReleaseKernel(test_info.k[i][j]);

            free(test_info.k[i]);
@@ -329,10 +328,10 @@ exit:
    }
    if (test_info.tinfo)
    {
-        for (i = 0; i < test_info.threadCount; i++)
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
        {
            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
        }
@@ -360,7 +359,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        func = job->f->rfunc;
    }

-    cl_uint j, k;
    cl_int error;

    int isRangeLimited = job->isRangeLimited;
@@ -370,7 +368,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // start the map of the output arrays
    cl_event e[VECTOR_SIZE_COUNT];
    cl_uint *out[VECTOR_SIZE_COUNT];
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        out[j] = (cl_uint *)clEnqueueMapBuffer(
            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
@@ -388,7 +386,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Write the new values to the input array
    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
    {
        p[j] = base + j * scale;
        if (relaxedMode)
@@ -421,7 +419,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        return error;
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        // Wait for the map to finish
        if ((error = clWaitForEvents(1, e + j)))
@@ -482,11 +480,11 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
    // Calculate the correctly rounded reference result
    float *r = (float *)gOut_Ref + thread_id * buffer_elements;
    float *s = (float *)p;
-    for (j = 0; j < buffer_elements; j++) r[j] = (float)func.f_f(s[j]);
+    for (size_t j = 0; j < buffer_elements; j++) r[j] = (float)func.f_f(s[j]);

    // Read the data back -- no need to wait for the first N-1 buffers but wait
    // for the last buffer. This is an in order queue.
-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
        out[j] = (cl_uint *)clEnqueueMapBuffer(
@@ -502,9 +500,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)

    // Verify data
    uint32_t *t = (uint32_t *)r;
-    for (j = 0; j < buffer_elements; j++)
+    for (size_t j = 0; j < buffer_elements; j++)
    {
-        for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
        {
            uint32_t *q = out[k];

@@ -695,7 +693,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
        }
    }

-    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
    {
        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                             out[j], 0, NULL, NULL)))
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -126,8 +126,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)

 int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
@@ -154,18 +152,18 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        double *p = (double *)gIn;
        if (gWimpyMode)
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
                p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
        }
        else
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
                p[j] = DoubleFromUInt32((uint32_t)i + j);
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
@@ -176,7 +174,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -201,7 +199,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
@@ -240,7 +238,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
        double *r = (double *)gOut_Ref;
        double *r2 = (double *)gOut_Ref2;
        double *s = (double *)gIn;
-        for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
        {
            long double dd;
            r[j] = (double)f->dfunc.f_fpf(s[j], &dd);
@@ -248,7 +246,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
        }

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -271,9 +269,9 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
        // Verify data
        uint64_t *t = (uint64_t *)gOut_Ref;
        uint64_t *t2 = (uint64_t *)gOut_Ref2;
-        for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                uint64_t *q = (uint64_t *)(gOut[k]);
                uint64_t *q2 = (uint64_t *)(gOut2[k]);
@@ -438,7 +436,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -124,8 +124,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)

 int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    uint32_t l;
    int error;
    char const *testing_mode;
@@ -155,13 +153,13 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        uint32_t *p = (uint32_t *)gIn;
        if (gWimpyMode)
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
            {
                p[j] = (uint32_t)i + j * scale;
                if (relaxedMode && strcmp(f->name, "sincos") == 0)
@@ -173,7 +171,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        }
        else
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
            {
                p[j] = (uint32_t)i + j;
                if (relaxedMode && strcmp(f->name, "sincos") == 0)
@@ -192,7 +190,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -217,7 +215,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
@@ -272,7 +270,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)

        if (skipNanInf)
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
            {
                double dd;
                feclearexcept(FE_OVERFLOW);
@@ -289,7 +287,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        }
        else
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
            {
                double dd;
                if (relaxedMode)
@@ -304,7 +302,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        if (isFract && ftz) RestoreFPState(&oldMode);

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -331,9 +329,9 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
        // Verify data
        uint32_t *t = (uint32_t *)gOut_Ref;
        uint32_t *t2 = (uint32_t *)gOut_Ref2;
-        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                uint32_t *q = (uint32_t *)gOut[k];
                uint32_t *q2 = (uint32_t *)gOut2[k];
@@ -572,7 +570,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -133,8 +133,6 @@ static cl_ulong abs_cl_long(cl_long i)

 int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
@@ -162,18 +160,18 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        double *p = (double *)gIn;
        if (gWimpyMode)
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
                p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
        }
        else
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
                p[j] = DoubleFromUInt32((uint32_t)i + j);
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
@@ -184,7 +182,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -209,7 +207,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
@@ -248,11 +246,11 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
        double *r = (double *)gOut_Ref;
        int *r2 = (int *)gOut_Ref2;
        double *s = (double *)gIn;
-        for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
            r[j] = (double)f->dfunc.f_fpI(s[j], r2 + j);

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -275,9 +273,9 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
        // Verify data
        uint64_t *t = (uint64_t *)gOut_Ref;
        int32_t *t2 = (int32_t *)gOut_Ref2;
-        for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                uint64_t *q = (uint64_t *)(gOut[k]);
                int32_t *q2 = (int32_t *)(gOut2[k]);
@@ -409,7 +407,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -131,8 +131,6 @@ static cl_ulong abs_cl_long(cl_long i)

 int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
@@ -165,18 +163,18 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        uint32_t *p = (uint32_t *)gIn;
        if (gWimpyMode)
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
                p[j] = (uint32_t)i + j * scale;
        }
        else
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
                p[j] = (uint32_t)i + j;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
@@ -187,7 +185,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -212,7 +210,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
@@ -251,11 +249,11 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
        float *r = (float *)gOut_Ref;
        int *r2 = (int *)gOut_Ref2;
        float *s = (float *)gIn;
-        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
            r[j] = (float)f->func.f_fpI(s[j], r2 + j);

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -278,9 +276,9 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
        // Verify data
        uint32_t *t = (uint32_t *)gOut_Ref;
        int32_t *t2 = (int32_t *)gOut_Ref2;
-        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                uint32_t *q = (uint32_t *)(gOut[k]);
                int32_t *q2 = (int32_t *)(gOut2[k]);
@@ -407,7 +405,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -126,8 +126,6 @@ static cl_ulong random64(MTdata d)

 int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
@@ -150,11 +148,12 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
            return error;
    }

-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        cl_ulong *p = (cl_ulong *)gIn;
-        for (j = 0; j < BUFFER_SIZE / sizeof(cl_ulong); j++) p[j] = random64(d);
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_ulong); j++)
+            p[j] = random64(d);

        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
@@ -164,7 +163,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -179,7 +178,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
@@ -211,11 +210,11 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
        // Calculate the correctly rounded reference result
        double *r = (double *)gOut_Ref;
        cl_ulong *s = (cl_ulong *)gIn;
-        for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
            r[j] = (double)f->dfunc.f_u(s[j]);

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -230,9 +229,9 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)

        // Verify data
        uint64_t *t = (uint64_t *)gOut_Ref;
-        for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                uint64_t *q = (uint64_t *)(gOut[k]);

@@ -306,7 +305,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -118,8 +118,6 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)

 int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
 {
-    uint64_t i;
-    uint32_t j, k;
    int error;
    cl_program programs[VECTOR_SIZE_COUNT];
    cl_kernel kernels[VECTOR_SIZE_COUNT];
@@ -165,18 +163,18 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
    }


-    for (i = 0; i < (1ULL << 32); i += step)
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
    {
        // Init input array
        uint32_t *p = (uint32_t *)gIn;
        if (gWimpyMode)
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
                p[j] = (uint32_t)i + j * scale;
        }
        else
        {
-            for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
                p[j] = (uint32_t)i + j;
        }
        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
@@ -187,7 +185,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
        }

        // write garbage into output arrays
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            uint32_t pattern = 0xffffdead;
            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
@@ -202,7 +200,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
        }

        // Run the kernels
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
@@ -234,11 +232,11 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
        // Calculate the correctly rounded reference result
        float *r = (float *)gOut_Ref;
        cl_uint *s = (cl_uint *)gIn;
-        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
            r[j] = (float)f->func.f_u(s[j]);

        // Read the data back
-        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
        {
            if ((error =
                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
@@ -254,9 +252,9 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)

        // Verify data
        uint32_t *t = (uint32_t *)gOut_Ref;
-        for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
+        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
        {
-            for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
            {
                uint32_t *q = (uint32_t *)(gOut[k]);

@@ -339,7 +337,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)

 exit:
    // Release
-    for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
    {
        clReleaseKernel(kernels[k]);
        clReleaseProgram(programs[k]);