diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp
index e946d744..2fc31d26 100644
--- a/test_common/harness/parseParameters.cpp
+++ b/test_common/harness/parseParameters.cpp
@@ -36,6 +36,7 @@ std::string gCompilationCachePath = ".";
 std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
 bool gDisableSPIRVValidation = false;
 std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR;
+unsigned gNumWorkerThreads;
 
 void helpInfo()
 {
@@ -48,6 +49,8 @@ void helpInfo()
             online     Use online compilation (default)
             binary     Use binary offline compilation
             spir-v     Use SPIR-V offline compilation
+    --num-worker-threads <num>
+        Select parallel execution with the specified number of worker threads.
 
 For offline compilation (binary and spir-v modes) only:
     --compilation-cache-mode <cache-mode>
@@ -137,6 +140,23 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
                 return -1;
             }
         }
+        else if (!strcmp(argv[i], "--num-worker-threads"))
+        {
+            delArg++;
+            if ((i + 1) < argc)
+            {
+                delArg++;
+                const char *numthstr = argv[i + 1];
+
+                gNumWorkerThreads = atoi(numthstr);
+            }
+            else
+            {
+                log_error(
+                    "A parameter to --num-worker-threads must be provided!\n");
+                return -1;
+            }
+        }
         else if (!strcmp(argv[i], "--compilation-cache-mode"))
         {
             delArg++;
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index 8fb98b08..95ea8163 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -20,7 +20,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <cassert>
+#include <deque>
+#include <mutex>
 #include <stdexcept>
+#include <thread>
 #include <vector>
 #include "errorHelpers.h"
 #include "kernelHelpers.h"
@@ -583,10 +586,12 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
     FPU_mode_type oldMode;
     DisableFTZ(&oldMode);
 #endif
+    extern unsigned gNumWorkerThreads;
+    test_harness_config config = { forceNoContextCreation, num_elements,
+                                   queueProps, gNumWorkerThreads };
 
     int error = parseAndCallCommandLineTests(argc, argv, device, testNum,
-                                             testList, forceNoContextCreation,
-                                             queueProps, num_elements);
+                                             testList, config);
 
 #if defined(__APPLE__) && defined(__arm__)
     // Restore the old FP mode before leaving.
@@ -677,9 +682,7 @@ static void print_results(int failed, int count, const char *name)
 int parseAndCallCommandLineTests(int argc, const char *argv[],
                                  cl_device_id device, int testNum,
                                  test_definition testList[],
-                                 int forceNoContextCreation,
-                                 cl_command_queue_properties queueProps,
-                                 int num_elements)
+                                 const test_harness_config &config)
 {
     int ret = EXIT_SUCCESS;
 
@@ -725,8 +728,7 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
         std::vector<test_status> resultTestList(testNum, TEST_PASS);
 
         callTestFunctions(testList, selectedTestList, resultTestList.data(),
-                          testNum, device, forceNoContextCreation, num_elements,
-                          queueProps);
+                          testNum, device, config);
 
         print_results(gFailCount, gTestCount, "sub-test");
         print_results(gTestsFailed, gTestsFailed + gTestsPassed, "test");
@@ -754,21 +756,97 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
     return ret;
 }
 
+struct test_harness_state
+{
+    test_definition *tests;
+    test_status *results;
+    cl_device_id device;
+    test_harness_config config;
+};
+
+static std::deque<int> gTestQueue;
+static std::mutex gTestStateMutex;
+
+void test_function_runner(test_harness_state *state)
+{
+    int testID;
+    test_definition test;
+    while (true)
+    {
+        // Attempt to get a test
+        {
+            std::lock_guard<std::mutex> lock(gTestStateMutex);
+
+            // The queue is empty, we're done
+            if (gTestQueue.size() == 0)
+            {
+                return;
+            }
+
+            // Get the test at the front of the queue
+            testID = gTestQueue.front();
+            gTestQueue.pop_front();
+            test = state->tests[testID];
+        }
+
+        // Execute test
+        auto status =
+            callSingleTestFunction(test, state->device, state->config);
+
+        // Store result
+        {
+            std::lock_guard<std::mutex> lock(gTestStateMutex);
+            state->results[testID] = status;
+        }
+    }
+}
+
 void callTestFunctions(test_definition testList[],
                        unsigned char selectedTestList[],
                        test_status resultTestList[], int testNum,
-                       cl_device_id deviceToUse, int forceNoContextCreation,
-                       int numElementsToUse,
-                       cl_command_queue_properties queueProps)
+                       cl_device_id deviceToUse,
+                       const test_harness_config &config)
 {
-    for (int i = 0; i < testNum; ++i)
+    // Execute tests serially
+    if (config.numWorkerThreads == 0)
     {
-        if (selectedTestList[i])
+        for (int i = 0; i < testNum; ++i)
         {
-            resultTestList[i] = callSingleTestFunction(
-                testList[i], deviceToUse, forceNoContextCreation,
-                numElementsToUse, queueProps);
+            if (selectedTestList[i])
+            {
+                resultTestList[i] =
+                    callSingleTestFunction(testList[i], deviceToUse, config);
+            }
         }
+        // Execute tests in parallel with the specified number of worker threads
+    }
+    else
+    {
+        // Queue all tests that need to run
+        for (int i = 0; i < testNum; ++i)
+        {
+            if (selectedTestList[i])
+            {
+                gTestQueue.push_back(i);
+            }
+        }
+
+        // Spawn thread pool
+        std::vector<std::thread *> threads;
+        test_harness_state state = { testList, resultTestList, deviceToUse,
+                                     config };
+        for (int i = 0; i < config.numWorkerThreads; i++)
+        {
+            log_info("Spawning worker thread %i\n", i);
+            threads.push_back(new std::thread(test_function_runner, &state));
+        }
+
+        // Wait for all threads to complete
+        for (auto th : threads)
+        {
+            th->join();
+        }
+        assert(gTestQueue.size() == 0);
     }
 }
 
@@ -781,9 +859,7 @@ void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info,
 // Actual function execution
 test_status callSingleTestFunction(test_definition test,
                                    cl_device_id deviceToUse,
-                                   int forceNoContextCreation,
-                                   int numElementsToUse,
-                                   const cl_queue_properties queueProps)
+                                   const test_harness_config &config)
 {
     test_status status;
     cl_int error;
@@ -811,7 +887,7 @@ test_status callSingleTestFunction(test_definition test,
     }
 
     /* Create a context to work with, unless we're told not to */
-    if (!forceNoContextCreation)
+    if (!config.forceNoContextCreation)
     {
         context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL,
                                   &error);
@@ -825,15 +901,16 @@ test_status callSingleTestFunction(test_definition test,
 
         if (device_version < Version(2, 0))
         {
-            queue =
-                clCreateCommandQueue(context, deviceToUse, queueProps, &error);
+            queue = clCreateCommandQueue(context, deviceToUse,
+                                         config.queueProps, &error);
         }
         else
         {
             const cl_command_queue_properties cmd_queueProps =
-                (queueProps) ? CL_QUEUE_PROPERTIES : 0;
-            cl_command_queue_properties queueCreateProps[] = { cmd_queueProps,
-                                                               queueProps, 0 };
+                (config.queueProps) ? CL_QUEUE_PROPERTIES : 0;
+            cl_command_queue_properties queueCreateProps[] = {
+                cmd_queueProps, config.queueProps, 0
+            };
             queue = clCreateCommandQueueWithProperties(
                 context, deviceToUse, &queueCreateProps[0], &error);
         }
@@ -858,7 +935,8 @@ test_status callSingleTestFunction(test_definition test,
     }
     else
     {
-        int ret = test.func(deviceToUse, context, queue, numElementsToUse);
+        int ret =
+            test.func(deviceToUse, context, queue, config.numElementsToUse);
         if (ret == TEST_SKIPPED_ITSELF)
         {
             /* Tests can also let us know they're not supported by the
@@ -885,7 +963,7 @@ test_status callSingleTestFunction(test_definition test,
     }
 
     /* Release the context */
-    if (!forceNoContextCreation)
+    if (!config.forceNoContextCreation)
     {
         int error = clFinish(queue);
         if (error)
diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h
index 4e0529d6..b266db80 100644
--- a/test_common/harness/testHarness.h
+++ b/test_common/harness/testHarness.h
@@ -85,6 +85,14 @@ typedef enum test_status
     TEST_SKIPPED_ITSELF = -100,
 } test_status;
 
+struct test_harness_config
+{
+    int forceNoContextCreation;
+    int numElementsToUse;
+    cl_command_queue_properties queueProps;
+    unsigned numWorkerThreads;
+};
+
 extern int gFailCount;
 extern int gTestCount;
 extern cl_uint gReSeed;
@@ -117,9 +125,7 @@ extern int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
 extern int parseAndCallCommandLineTests(int argc, const char *argv[],
                                         cl_device_id device, int testNum,
                                         test_definition testList[],
-                                        int forceNoContextCreation,
-                                        cl_command_queue_properties queueProps,
-                                        int num_elements);
+                                        const test_harness_config &config);
 
 // Call this function if you need to do all the setup work yourself, and just
 // need the function list called/ managed.
@@ -131,21 +137,19 @@ extern int parseAndCallCommandLineTests(int argc, const char *argv[],
 //    resultTestList is an array of statuses which contain the result of each
 //    selected test testNum is the number of tests in testList, selectedTestList
 //    and resultTestList contextProps are used to create a testing context for
-//    each test deviceToUse and numElementsToUse are all just passed to each
+//    each test deviceToUse and config are all just passed to each
 //    test function
 extern void callTestFunctions(test_definition testList[],
                               unsigned char selectedTestList[],
                               test_status resultTestList[], int testNum,
                               cl_device_id deviceToUse,
-                              int forceNoContextCreation, int numElementsToUse,
-                              cl_command_queue_properties queueProps);
+                              const test_harness_config &config);
 
 // This function is called by callTestFunctions, once per function, to do setup,
 // call, logging and cleanup
-extern test_status
-callSingleTestFunction(test_definition test, cl_device_id deviceToUse,
-                       int forceNoContextCreation, int numElementsToUse,
-                       cl_command_queue_properties queueProps);
+extern test_status callSingleTestFunction(test_definition test,
+                                          cl_device_id deviceToUse,
+                                          const test_harness_config &config);
 
 ///// Miscellaneous steps
 
diff --git a/test_conformance/gl/main.cpp b/test_conformance/gl/main.cpp
index 32aafef5..e5d6b65d 100644
--- a/test_conformance/gl/main.cpp
+++ b/test_conformance/gl/main.cpp
@@ -316,8 +316,12 @@ int main(int argc, const char *argv[])
 
             // Note: don't use the entire harness, because we have a different
             // way of obtaining the device (via the context)
-            error = parseAndCallCommandLineTests(
-                argc_, argv, deviceIDs[i], test_num, test_list, true, 0, 1024);
+            test_harness_config config{};
+            config.forceNoContextCreation = true;
+            config.numElementsToUse = 1024;
+            config.queueProps = 0;
+            error = parseAndCallCommandLineTests(argc_, argv, deviceIDs[i],
+                                                 test_num, test_list, config);
             if (error != 0) break;
         }
 
@@ -397,9 +401,12 @@ int main(int argc, const char *argv[])
 
             // Note: don't use the entire harness, because we have a different
             // way of obtaining the device (via the context)
-            error = parseAndCallCommandLineTests(argc_, argv_, deviceIDs[i],
-                                                 test_num32, test_list32, true,
-                                                 0, 1024);
+            test_harness_config config{};
+            config.forceNoContextCreation = true;
+            config.numElementsToUse = 1024;
+            config.queueProps = 0;
+            error = parseAndCallCommandLineTests(
+                argc_, argv_, deviceIDs[i], test_num32, test_list32, config);
             if (error != 0) break;
         }
 
diff --git a/test_conformance/gles/main.cpp b/test_conformance/gles/main.cpp
index 60e020d8..0327b70e 100644
--- a/test_conformance/gles/main.cpp
+++ b/test_conformance/gles/main.cpp
@@ -266,9 +266,13 @@ int main(int argc, const char *argv[])
     }
 
         // Note: don't use the entire harness, because we have a different way of obtaining the device (via the context)
-        error = parseAndCallCommandLineTests( argc_tmp, argv_tmp, deviceIDs[i], test_num, test_list, true, 0, 1024 );
-        if( error != 0 )
-          break;
+    test_harness_config config{};
+    config.forceNoContextCreation = true;
+    config.numElementsToUse = 1024;
+    config.queueProps = 0;
+    error = parseAndCallCommandLineTests(argc_tmp, argv_tmp, deviceIDs[i],
+                                         test_num, test_list, config);
+    if (error != 0) break;
     }
 
     // Clean-up.
@@ -338,7 +342,12 @@ int main(int argc, const char *argv[])
         goto cleanup;
 #else
         // Note: don't use the entire harness, because we have a different way of obtaining the device (via the context)
-        error = parseAndCallCommandLineTests( argc_, argv_, deviceIDs[ i ], test_num32, test_list32, true, 0, 1024 );
+        test_harness_config config{};
+        config.forceNoContextCreation = true;
+        config.numElementsToUse = 1024;
+        config.queueProps = 0;
+        error = parseAndCallCommandLineTests(argc_, argv_, deviceIDs[i],
+                                             test_num32, test_list32, config);
         if( error != 0 )
           break;
 #endif
diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp
index 2eeb0c36..5901420a 100644
--- a/test_conformance/vulkan/main.cpp
+++ b/test_conformance/vulkan/main.cpp
@@ -340,7 +340,11 @@ int main(int argc, const char *argv[])
     // Execute tests.
     // Note: don't use the entire harness, because we have a different way of
     // obtaining the device (via the context)
+    test_harness_config config{};
+    config.forceNoContextCreation = true;
+    config.numElementsToUse = 1024;
+    config.queueProps = 0;
     errNum = parseAndCallCommandLineTests(argCount, argList, devices[device_no],
-                                          test_num, test_list, true, 0, 1024);
+                                          test_num, test_list, config);
     return errNum;
 }