From ed839ebf10c5b7334ac16b0fe13e324f3b47799a Mon Sep 17 00:00:00 2001
From: Marco Antognini <marco.antognini@arm.com>
Date: Mon, 24 May 2021 16:34:54 +0100
Subject: [PATCH] Avoid manual memory management (#1260)

* Avoid manual memory management

Prefer std::vector over malloc and free. This will allow removing goto
statements by leveraging RAII.

Use appropriate type (bool) to store overflow predicates and allocate
std::vector<bool> of appropriate sizes: before this change the
allocation was unnecessary bigger than required.

No longer attempt to catch "out of host memory" issues, given that in
such situation it is generally not possible to cleanly report an error.
Rely on std::bad_alloc exception to report such issues.

Introduce a new header for common code in the math_brute_force
component. It is currently complementary to utility.h and is expected to
hold cleaned up content extracted from future refactoring operations.

List all headers as source in CMake for better compatibility with IDEs.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>

* Remove manual or unnecessary memset

In order to use non-POD types as fields of TestInfo, memset must be
replaced with a compatible zero-initialisation.

Remove an unnecessary memset in MakeKernels.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
---
 .../math_brute_force/CMakeLists.txt           |  6 ++
 .../math_brute_force/binary_double.cpp        | 76 +++++++-----------
 .../math_brute_force/binary_float.cpp         | 80 +++++++------------
 .../math_brute_force/binary_i_double.cpp      | 76 +++++++-----------
 .../math_brute_force/binary_i_float.cpp       | 76 +++++++-----------
 .../binary_operator_double.cpp                | 76 +++++++-----------
 .../binary_operator_float.cpp                 | 80 +++++++------------
 test_conformance/math_brute_force/common.h    | 27 +++++++
 .../math_brute_force/macro_binary_double.cpp  | 78 +++++++-----------
 .../math_brute_force/macro_binary_float.cpp   | 76 +++++++-----------
 .../math_brute_force/macro_unary_double.cpp   | 72 ++++++-----------
 .../math_brute_force/macro_unary_float.cpp    | 72 ++++++-----------
 test_conformance/math_brute_force/main.cpp    |  8 +-
 .../math_brute_force/unary_double.cpp         | 72 ++++++-----------
 .../math_brute_force/unary_float.cpp          | 72 ++++++-----------
 15 files changed, 366 insertions(+), 581 deletions(-)
 create mode 100644 test_conformance/math_brute_force/common.h
diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index d8dfc403..28d2716f 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -9,7 +9,9 @@ set(${MODULE_NAME}_SOURCES
     binary_operator_float.cpp
     binary_two_results_i_double.cpp
     binary_two_results_i_float.cpp
+    common.h
     function_list.cpp
+    function_list.h
     i_unary_double.cpp
     i_unary_float.cpp
     macro_binary_double.cpp
@@ -20,9 +22,12 @@ set(${MODULE_NAME}_SOURCES
     mad_float.cpp
     main.cpp
     reference_math.cpp
+    reference_math.h
     sleep.cpp
+    sleep.h
     ternary_double.cpp
     ternary_float.cpp
+    test_functions.h
     unary_double.cpp
     unary_float.cpp
     unary_two_results_double.cpp
@@ -32,6 +37,7 @@ set(${MODULE_NAME}_SOURCES
     unary_u_double.cpp
     unary_u_float.cpp
     utility.cpp
+    utility.h
 )
 
 include(../CMakeCommon.txt)
diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index 9c6b59b4..a2b7d28b 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -115,7 +116,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -126,7 +127,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -149,11 +151,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -284,11 +289,11 @@ constexpr size_t specialValuesCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     int ftz = job->ftz;
@@ -647,7 +652,7 @@ exit:
 
 int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -656,7 +661,6 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -685,27 +689,10 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -802,27 +789,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index 9c7081dc..97712ee8 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -113,7 +114,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -124,7 +125,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -147,11 +149,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -274,18 +279,18 @@ constexpr size_t specialValuesCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_float);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
     bool relaxedMode = job->relaxedMode;
     float ulps = getAllowedUlpError(job->f, relaxedMode);
     MTdata d = tinfo->d;
     cl_int error;
-    cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
+    std::vector<bool> overflow(buffer_elements, false);
     const char *name = job->f->name;
     int isFDim = job->isFDim;
     int skipNanInf = job->skipNanInf;
@@ -447,7 +452,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             vlog_error("Error: clFinish failed! err: %d\n", error);
             goto exit;
         }
-        free(overflow);
         return CL_SUCCESS;
     }
 
@@ -799,7 +803,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     }
 
 exit:
-    if (overflow) free(overflow);
     return error;
 }
 
@@ -807,7 +810,7 @@ exit:
 
 int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -816,7 +819,6 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -846,27 +848,10 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -963,27 +948,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index 2fcc8c10..f15c21ed 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -114,7 +115,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -125,7 +126,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -148,11 +150,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -287,11 +292,11 @@ constexpr size_t specialValuesIntCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     int ftz = job->ftz;
@@ -568,7 +573,7 @@ exit:
 
 int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -577,7 +582,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -602,27 +606,10 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -722,27 +709,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index e1538e3c..9e27b007 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -112,7 +113,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -123,7 +124,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -146,11 +148,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -279,11 +284,11 @@ constexpr size_t specialValuesIntCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_float);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
     float ulps = job->ulps;
@@ -561,7 +566,7 @@ exit:
 
 int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -570,7 +575,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -596,27 +600,10 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -716,27 +703,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 605a3144..c407fdaa 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -114,7 +115,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *operator_symbol;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -125,7 +126,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->operator_symbol, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -148,11 +150,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -281,11 +286,11 @@ constexpr size_t specialValuesCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     int ftz = job->ftz;
@@ -619,7 +624,7 @@ exit:
 int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
                                            bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -628,7 +633,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -653,27 +657,10 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -770,27 +757,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 8448af54..7fbb07c2 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -112,7 +113,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *operator_symbol;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -123,7 +124,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->operator_symbol, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -146,11 +148,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -271,18 +276,18 @@ constexpr size_t specialValuesCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_float);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
     bool relaxedMode = job->relaxedMode;
     float ulps = getAllowedUlpError(job->f, relaxedMode);
     MTdata d = tinfo->d;
     cl_int error;
-    cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
+    std::vector<bool> overflow(buffer_elements, false);
     const char *name = job->f->name;
     cl_uint *t = 0;
     cl_float *r = 0;
@@ -445,7 +450,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     if (gSkipCorrectnessTesting)
     {
-        free(overflow);
         return CL_SUCCESS;
     }
 
@@ -738,7 +742,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     }
 
 exit:
-    if (overflow) free(overflow);
     return error;
 }
 
@@ -747,7 +750,7 @@ exit:
 int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
                                         bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -756,7 +759,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -783,27 +785,10 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -900,27 +885,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
new file mode 100644
index 00000000..3eafb6de
--- /dev/null
+++ b/test_conformance/math_brute_force/common.h
@@ -0,0 +1,27 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef COMMON_H
+#define COMMON_H
+
+#include "utility.h"
+
+#include <array>
+#include <vector>
+
+// Array of thread-specific kernels for each vector size.
+using KernelMatrix = std::array<std::vector<cl_kernel>, VECTOR_SIZE_COUNT>;
+
+#endif /* COMMON_H */
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index 11281261..6db6aa56 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -113,7 +114,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -124,7 +125,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -142,11 +144,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -270,11 +275,11 @@ constexpr size_t specialValuesCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     dptr dfunc = job->f->dfunc;
     int ftz = job->ftz;
     MTdata d = tinfo->d;
@@ -577,13 +582,12 @@ exit:
 
 int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -607,28 +611,11 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (size_t i = 0; i < test_info.threadCount; i++)
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
             i * test_info.subBufferSize * sizeof(cl_double),
@@ -711,27 +698,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index 6475e4bb..d6d5c8eb 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -111,7 +112,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -122,7 +123,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -140,11 +142,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -260,11 +265,11 @@ constexpr size_t specialValuesCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_float);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
     MTdata d = tinfo->d;
@@ -565,13 +570,12 @@ exit:
 
 int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -596,27 +600,10 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -700,27 +687,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 860e4596..1978c185 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -107,7 +108,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -118,7 +119,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -134,11 +136,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -148,12 +153,12 @@ struct TestInfo
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint scale = job->scale;
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     dptr dfunc = job->f->dfunc;
     int ftz = job->ftz;
     cl_int error;
@@ -362,13 +367,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
 int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -392,27 +396,10 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -484,25 +471,18 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        clReleaseMemObject(threadInfo.inBuf);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index 58a2a954..ece5e9b6 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -106,7 +107,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -117,7 +118,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -133,11 +135,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -147,12 +152,12 @@ struct TestInfo
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_float);
     cl_uint scale = job->scale;
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
     cl_int error = CL_SUCCESS;
@@ -376,13 +381,12 @@ exit:
 
 int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -407,27 +411,10 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -499,25 +486,18 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        clReleaseMemObject(threadInfo.inBuf);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index e52f2f0a..6691f462 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -1055,8 +1055,6 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
                 cl_uint kernel_count, cl_kernel *k, cl_program *p,
                 bool relaxedMode)
 {
-    int error = 0;
-    cl_uint i;
     char options[200] = "";
 
     if (gForceFTZ)
@@ -1074,7 +1072,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
         strcat(options, " -cl-fast-relaxed-math");
     }
 
-    error =
+    int error =
         create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
     if (error != CL_SUCCESS)
     {
@@ -1082,9 +1080,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
         return error;
     }
 
-
-    memset(k, 0, kernel_count * sizeof(*k));
-    for (i = 0; i < kernel_count; i++)
+    for (cl_uint i = 0; i < kernel_count; i++)
     {
         k[i] = clCreateKernel(*p, name, &error);
         if (NULL == k[i] || error)
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index dcd21884..2d455047 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -107,7 +108,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -118,7 +119,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -136,11 +138,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -157,12 +162,12 @@ struct TestInfo
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint scale = job->scale;
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     cl_int error;
@@ -389,14 +394,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
 int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -422,27 +426,10 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -526,25 +513,18 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        clReleaseMemObject(threadInfo.inBuf);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index f176fb95..83d27b0b 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -105,7 +106,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -116,7 +117,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -134,11 +136,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -155,12 +160,12 @@ struct TestInfo
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_float);
     cl_uint scale = job->scale;
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     const char *fname = job->f->name;
     bool relaxedMode = job->relaxedMode;
@@ -541,7 +546,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
 int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -550,7 +555,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -576,27 +580,10 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -704,25 +691,18 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        clReleaseMemObject(threadInfo.inBuf);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;