Add basic support to the harness for parallel test execution (#1687)

This change introduces a new command-line parameter to enable
parallel execution by a specified number of worker threads. When
parallel execution is requested, tests are distributed across
the worker threads. This behaviour is disabled by default.

This does not currently work for all suites as some of them are
using global variables to configure tests. For the suites that
do not use global state, this change reduced the execution time
by up to 5x on an 8-core machine.

Signed-off-by: Kévin Petit <kpet@free.fr>
This commit is contained in:
Kévin Petit
2023-04-25 19:30:42 +01:00
committed by GitHub
parent 033aa195c5
commit ff1369d94e
6 changed files with 168 additions and 46 deletions

View File

@@ -36,6 +36,7 @@ std::string gCompilationCachePath = ".";
std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
bool gDisableSPIRVValidation = false;
std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR;
unsigned gNumWorkerThreads;
void helpInfo()
{
@@ -48,6 +49,8 @@ void helpInfo()
online Use online compilation (default)
binary Use binary offline compilation
spir-v Use SPIR-V offline compilation
--num-worker-threads <num>
Select parallel execution with the specified number of worker threads.
For offline compilation (binary and spir-v modes) only:
--compilation-cache-mode <cache-mode>
@@ -137,6 +140,23 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
return -1;
}
}
else if (!strcmp(argv[i], "--num-worker-threads"))
{
delArg++;
if ((i + 1) < argc)
{
delArg++;
const char *numthstr = argv[i + 1];
gNumWorkerThreads = atoi(numthstr);
}
else
{
log_error(
"A parameter to --num-worker-threads must be provided!\n");
return -1;
}
}
else if (!strcmp(argv[i], "--compilation-cache-mode"))
{
delArg++;

View File

@@ -20,7 +20,10 @@
#include <stdlib.h>
#include <string.h>
#include <cassert>
#include <deque>
#include <mutex>
#include <stdexcept>
#include <thread>
#include <vector>
#include "errorHelpers.h"
#include "kernelHelpers.h"
@@ -583,10 +586,12 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
FPU_mode_type oldMode;
DisableFTZ(&oldMode);
#endif
extern unsigned gNumWorkerThreads;
test_harness_config config = { forceNoContextCreation, num_elements,
queueProps, gNumWorkerThreads };
int error = parseAndCallCommandLineTests(argc, argv, device, testNum,
testList, forceNoContextCreation,
queueProps, num_elements);
testList, config);
#if defined(__APPLE__) && defined(__arm__)
// Restore the old FP mode before leaving.
@@ -677,9 +682,7 @@ static void print_results(int failed, int count, const char *name)
int parseAndCallCommandLineTests(int argc, const char *argv[],
cl_device_id device, int testNum,
test_definition testList[],
int forceNoContextCreation,
cl_command_queue_properties queueProps,
int num_elements)
const test_harness_config &config)
{
int ret = EXIT_SUCCESS;
@@ -725,8 +728,7 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
std::vector<test_status> resultTestList(testNum, TEST_PASS);
callTestFunctions(testList, selectedTestList, resultTestList.data(),
testNum, device, forceNoContextCreation, num_elements,
queueProps);
testNum, device, config);
print_results(gFailCount, gTestCount, "sub-test");
print_results(gTestsFailed, gTestsFailed + gTestsPassed, "test");
@@ -754,21 +756,97 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
return ret;
}
struct test_harness_state
{
test_definition *tests;
test_status *results;
cl_device_id device;
test_harness_config config;
};
static std::deque<int> gTestQueue;
static std::mutex gTestStateMutex;
void test_function_runner(test_harness_state *state)
{
int testID;
test_definition test;
while (true)
{
// Attempt to get a test
{
std::lock_guard<std::mutex> lock(gTestStateMutex);
// The queue is empty, we're done
if (gTestQueue.size() == 0)
{
return;
}
// Get the test at the front of the queue
testID = gTestQueue.front();
gTestQueue.pop_front();
test = state->tests[testID];
}
// Execute test
auto status =
callSingleTestFunction(test, state->device, state->config);
// Store result
{
std::lock_guard<std::mutex> lock(gTestStateMutex);
state->results[testID] = status;
}
}
}
void callTestFunctions(test_definition testList[],
unsigned char selectedTestList[],
test_status resultTestList[], int testNum,
cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse,
cl_command_queue_properties queueProps)
cl_device_id deviceToUse,
const test_harness_config &config)
{
for (int i = 0; i < testNum; ++i)
// Execute tests serially
if (config.numWorkerThreads == 0)
{
if (selectedTestList[i])
for (int i = 0; i < testNum; ++i)
{
resultTestList[i] = callSingleTestFunction(
testList[i], deviceToUse, forceNoContextCreation,
numElementsToUse, queueProps);
if (selectedTestList[i])
{
resultTestList[i] =
callSingleTestFunction(testList[i], deviceToUse, config);
}
}
// Execute tests in parallel with the specified number of worker threads
}
else
{
// Queue all tests that need to run
for (int i = 0; i < testNum; ++i)
{
if (selectedTestList[i])
{
gTestQueue.push_back(i);
}
}
// Spawn thread pool
std::vector<std::thread *> threads;
test_harness_state state = { testList, resultTestList, deviceToUse,
config };
for (int i = 0; i < config.numWorkerThreads; i++)
{
log_info("Spawning worker thread %i\n", i);
threads.push_back(new std::thread(test_function_runner, &state));
}
// Wait for all threads to complete
for (auto th : threads)
{
th->join();
}
assert(gTestQueue.size() == 0);
}
}
@@ -781,9 +859,7 @@ void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info,
// Actual function execution
test_status callSingleTestFunction(test_definition test,
cl_device_id deviceToUse,
int forceNoContextCreation,
int numElementsToUse,
const cl_queue_properties queueProps)
const test_harness_config &config)
{
test_status status;
cl_int error;
@@ -811,7 +887,7 @@ test_status callSingleTestFunction(test_definition test,
}
/* Create a context to work with, unless we're told not to */
if (!forceNoContextCreation)
if (!config.forceNoContextCreation)
{
context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL,
&error);
@@ -825,15 +901,16 @@ test_status callSingleTestFunction(test_definition test,
if (device_version < Version(2, 0))
{
queue =
clCreateCommandQueue(context, deviceToUse, queueProps, &error);
queue = clCreateCommandQueue(context, deviceToUse,
config.queueProps, &error);
}
else
{
const cl_command_queue_properties cmd_queueProps =
(queueProps) ? CL_QUEUE_PROPERTIES : 0;
cl_command_queue_properties queueCreateProps[] = { cmd_queueProps,
queueProps, 0 };
(config.queueProps) ? CL_QUEUE_PROPERTIES : 0;
cl_command_queue_properties queueCreateProps[] = {
cmd_queueProps, config.queueProps, 0
};
queue = clCreateCommandQueueWithProperties(
context, deviceToUse, &queueCreateProps[0], &error);
}
@@ -858,7 +935,8 @@ test_status callSingleTestFunction(test_definition test,
}
else
{
int ret = test.func(deviceToUse, context, queue, numElementsToUse);
int ret =
test.func(deviceToUse, context, queue, config.numElementsToUse);
if (ret == TEST_SKIPPED_ITSELF)
{
/* Tests can also let us know they're not supported by the
@@ -885,7 +963,7 @@ test_status callSingleTestFunction(test_definition test,
}
/* Release the context */
if (!forceNoContextCreation)
if (!config.forceNoContextCreation)
{
int error = clFinish(queue);
if (error)

View File

@@ -85,6 +85,14 @@ typedef enum test_status
TEST_SKIPPED_ITSELF = -100,
} test_status;
struct test_harness_config
{
int forceNoContextCreation;
int numElementsToUse;
cl_command_queue_properties queueProps;
unsigned numWorkerThreads;
};
extern int gFailCount;
extern int gTestCount;
extern cl_uint gReSeed;
@@ -117,9 +125,7 @@ extern int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
extern int parseAndCallCommandLineTests(int argc, const char *argv[],
cl_device_id device, int testNum,
test_definition testList[],
int forceNoContextCreation,
cl_command_queue_properties queueProps,
int num_elements);
const test_harness_config &config);
// Call this function if you need to do all the setup work yourself, and just
// need the function list called/ managed.
@@ -131,21 +137,19 @@ extern int parseAndCallCommandLineTests(int argc, const char *argv[],
// resultTestList is an array of statuses which contain the result of each
// selected test testNum is the number of tests in testList, selectedTestList
// and resultTestList contextProps are used to create a testing context for
// each test deviceToUse and numElementsToUse are all just passed to each
// each test deviceToUse and config are all just passed to each
// test function
extern void callTestFunctions(test_definition testList[],
unsigned char selectedTestList[],
test_status resultTestList[], int testNum,
cl_device_id deviceToUse,
int forceNoContextCreation, int numElementsToUse,
cl_command_queue_properties queueProps);
const test_harness_config &config);
// This function is called by callTestFunctions, once per function, to do setup,
// call, logging and cleanup
extern test_status
callSingleTestFunction(test_definition test, cl_device_id deviceToUse,
int forceNoContextCreation, int numElementsToUse,
cl_command_queue_properties queueProps);
extern test_status callSingleTestFunction(test_definition test,
cl_device_id deviceToUse,
const test_harness_config &config);
///// Miscellaneous steps

View File

@@ -316,8 +316,12 @@ int main(int argc, const char *argv[])
// Note: don't use the entire harness, because we have a different
// way of obtaining the device (via the context)
error = parseAndCallCommandLineTests(
argc_, argv, deviceIDs[i], test_num, test_list, true, 0, 1024);
test_harness_config config{};
config.forceNoContextCreation = true;
config.numElementsToUse = 1024;
config.queueProps = 0;
error = parseAndCallCommandLineTests(argc_, argv, deviceIDs[i],
test_num, test_list, config);
if (error != 0) break;
}
@@ -397,9 +401,12 @@ int main(int argc, const char *argv[])
// Note: don't use the entire harness, because we have a different
// way of obtaining the device (via the context)
error = parseAndCallCommandLineTests(argc_, argv_, deviceIDs[i],
test_num32, test_list32, true,
0, 1024);
test_harness_config config{};
config.forceNoContextCreation = true;
config.numElementsToUse = 1024;
config.queueProps = 0;
error = parseAndCallCommandLineTests(
argc_, argv_, deviceIDs[i], test_num32, test_list32, config);
if (error != 0) break;
}

View File

@@ -266,9 +266,13 @@ int main(int argc, const char *argv[])
}
// Note: don't use the entire harness, because we have a different way of obtaining the device (via the context)
error = parseAndCallCommandLineTests( argc_tmp, argv_tmp, deviceIDs[i], test_num, test_list, true, 0, 1024 );
if( error != 0 )
break;
test_harness_config config{};
config.forceNoContextCreation = true;
config.numElementsToUse = 1024;
config.queueProps = 0;
error = parseAndCallCommandLineTests(argc_tmp, argv_tmp, deviceIDs[i],
test_num, test_list, config);
if (error != 0) break;
}
// Clean-up.
@@ -338,7 +342,12 @@ int main(int argc, const char *argv[])
goto cleanup;
#else
// Note: don't use the entire harness, because we have a different way of obtaining the device (via the context)
error = parseAndCallCommandLineTests( argc_, argv_, deviceIDs[ i ], test_num32, test_list32, true, 0, 1024 );
test_harness_config config{};
config.forceNoContextCreation = true;
config.numElementsToUse = 1024;
config.queueProps = 0;
error = parseAndCallCommandLineTests(argc_, argv_, deviceIDs[i],
test_num32, test_list32, config);
if( error != 0 )
break;
#endif

View File

@@ -340,7 +340,11 @@ int main(int argc, const char *argv[])
// Execute tests.
// Note: don't use the entire harness, because we have a different way of
// obtaining the device (via the context)
test_harness_config config{};
config.forceNoContextCreation = true;
config.numElementsToUse = 1024;
config.queueProps = 0;
errNum = parseAndCallCommandLineTests(argCount, argList, devices[device_no],
test_num, test_list, true, 0, 1024);
test_num, test_list, config);
return errNum;
}