Add basic support to the harness for parallel test execution (#1687)

This change introduces a new command-line parameter to enable
parallel execution by a specified number of worker threads. When
parallel execution is requested, tests are distributed across
the worker threads. This behaviour is disabled by default.

This does not currently work for all suites as some of them are
using global variables to configure tests. For the suites that
do not use global state, this change reduced the execution time
by up to 5x on an 8-core machine.

Signed-off-by: Kévin Petit <kpet@free.fr>
This commit is contained in:
Kévin Petit
2023-04-25 19:30:42 +01:00
committed by GitHub
parent 033aa195c5
commit ff1369d94e
6 changed files with 168 additions and 46 deletions

View File

@@ -20,7 +20,10 @@
#include <stdlib.h>
#include <string.h>
#include <cassert>
#include <deque>
#include <mutex>
#include <stdexcept>
#include <thread>
#include <vector>
#include "errorHelpers.h"
#include "kernelHelpers.h"
@@ -583,10 +586,12 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
FPU_mode_type oldMode;
DisableFTZ(&oldMode);
#endif
extern unsigned gNumWorkerThreads;
test_harness_config config = { forceNoContextCreation, num_elements,
queueProps, gNumWorkerThreads };
int error = parseAndCallCommandLineTests(argc, argv, device, testNum,
testList, forceNoContextCreation,
queueProps, num_elements);
testList, config);
#if defined(__APPLE__) && defined(__arm__)
// Restore the old FP mode before leaving.
@@ -677,9 +682,7 @@ static void print_results(int failed, int count, const char *name)
int parseAndCallCommandLineTests(int argc, const char *argv[],
cl_device_id device, int testNum,
test_definition testList[],
int forceNoContextCreation,
cl_command_queue_properties queueProps,
int num_elements)
const test_harness_config &config)
{
int ret = EXIT_SUCCESS;
@@ -725,8 +728,7 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
std::vector<test_status> resultTestList(testNum, TEST_PASS);
callTestFunctions(testList, selectedTestList, resultTestList.data(),
testNum, device, forceNoContextCreation, num_elements,
queueProps);
testNum, device, config);
print_results(gFailCount, gTestCount, "sub-test");
print_results(gTestsFailed, gTestsFailed + gTestsPassed, "test");
@@ -754,21 +756,97 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
return ret;
}
struct test_harness_state
{
test_definition *tests;
test_status *results;
cl_device_id device;
test_harness_config config;
};
static std::deque<int> gTestQueue;
static std::mutex gTestStateMutex;
void test_function_runner(test_harness_state *state)
{
int testID;
test_definition test;
while (true)
{
// Attempt to get a test
{
std::lock_guard<std::mutex> lock(gTestStateMutex);
// The queue is empty, we're done
if (gTestQueue.size() == 0)
{
return;
}
// Get the test at the front of the queue
testID = gTestQueue.front();
gTestQueue.pop_front();
test = state->tests[testID];
}
// Execute test
auto status =
callSingleTestFunction(test, state->device, state->config);
// Store result
{
std::lock_guard<std::mutex> lock(gTestStateMutex);
state->results[testID] = status;
}
}
}
void callTestFunctions(test_definition testList[],
unsigned char selectedTestList[],
test_status resultTestList[], int testNum,
cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse,
cl_command_queue_properties queueProps)
cl_device_id deviceToUse,
const test_harness_config &config)
{
for (int i = 0; i < testNum; ++i)
// Execute tests serially
if (config.numWorkerThreads == 0)
{
if (selectedTestList[i])
for (int i = 0; i < testNum; ++i)
{
resultTestList[i] = callSingleTestFunction(
testList[i], deviceToUse, forceNoContextCreation,
numElementsToUse, queueProps);
if (selectedTestList[i])
{
resultTestList[i] =
callSingleTestFunction(testList[i], deviceToUse, config);
}
}
// Execute tests in parallel with the specified number of worker threads
}
else
{
// Queue all tests that need to run
for (int i = 0; i < testNum; ++i)
{
if (selectedTestList[i])
{
gTestQueue.push_back(i);
}
}
// Spawn thread pool
std::vector<std::thread *> threads;
test_harness_state state = { testList, resultTestList, deviceToUse,
config };
for (int i = 0; i < config.numWorkerThreads; i++)
{
log_info("Spawning worker thread %i\n", i);
threads.push_back(new std::thread(test_function_runner, &state));
}
// Wait for all threads to complete
for (auto th : threads)
{
th->join();
}
assert(gTestQueue.size() == 0);
}
}
@@ -781,9 +859,7 @@ void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info,
// Actual function execution
test_status callSingleTestFunction(test_definition test,
cl_device_id deviceToUse,
int forceNoContextCreation,
int numElementsToUse,
const cl_queue_properties queueProps)
const test_harness_config &config)
{
test_status status;
cl_int error;
@@ -811,7 +887,7 @@ test_status callSingleTestFunction(test_definition test,
}
/* Create a context to work with, unless we're told not to */
if (!forceNoContextCreation)
if (!config.forceNoContextCreation)
{
context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL,
&error);
@@ -825,15 +901,16 @@ test_status callSingleTestFunction(test_definition test,
if (device_version < Version(2, 0))
{
queue =
clCreateCommandQueue(context, deviceToUse, queueProps, &error);
queue = clCreateCommandQueue(context, deviceToUse,
config.queueProps, &error);
}
else
{
const cl_command_queue_properties cmd_queueProps =
(queueProps) ? CL_QUEUE_PROPERTIES : 0;
cl_command_queue_properties queueCreateProps[] = { cmd_queueProps,
queueProps, 0 };
(config.queueProps) ? CL_QUEUE_PROPERTIES : 0;
cl_command_queue_properties queueCreateProps[] = {
cmd_queueProps, config.queueProps, 0
};
queue = clCreateCommandQueueWithProperties(
context, deviceToUse, &queueCreateProps[0], &error);
}
@@ -858,7 +935,8 @@ test_status callSingleTestFunction(test_definition test,
}
else
{
int ret = test.func(deviceToUse, context, queue, numElementsToUse);
int ret =
test.func(deviceToUse, context, queue, config.numElementsToUse);
if (ret == TEST_SKIPPED_ITSELF)
{
/* Tests can also let us know they're not supported by the
@@ -885,7 +963,7 @@ test_status callSingleTestFunction(test_definition test,
}
/* Release the context */
if (!forceNoContextCreation)
if (!config.forceNoContextCreation)
{
int error = clFinish(queue);
if (error)