mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 14:09:03 +00:00
590 lines
24 KiB
C++
590 lines
24 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "testBase.h"
|
|
#include "../../test_common/harness/typeWrappers.h"
|
|
#include "../../test_common/harness/testHarness.h"
|
|
#include "../../test_common/harness/conversions.h"
|
|
|
|
#include <vector>
|
|
|
|
typedef long long int lld;
|
|
typedef long long unsigned llu;
|
|
|
|
const char *test_kernels[] = {
|
|
"__kernel void kernelA(__global int *dst)\n"
|
|
"{\n"
|
|
"\n"
|
|
" dst[get_global_id(0)]*=3;\n"
|
|
"\n"
|
|
"}\n"
|
|
"__kernel void kernelB(__global int *dst)\n"
|
|
"{\n"
|
|
"\n"
|
|
" dst[get_global_id(0)]++;\n"
|
|
"\n"
|
|
"}\n"
|
|
};
|
|
|
|
#define TEST_SIZE 512
|
|
#define MAX_QUEUES 1000
|
|
|
|
const char *printPartition(cl_device_partition_property partition)
|
|
{
|
|
switch (partition) {
|
|
case (0): return "<NONE>";
|
|
case (CL_DEVICE_PARTITION_EQUALLY): return "CL_DEVICE_PARTITION_EQUALLY";
|
|
case (CL_DEVICE_PARTITION_BY_COUNTS): return "CL_DEVICE_PARTITION_BY_COUNTS";
|
|
case (CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN): return "CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN";
|
|
default: return "<unknown>";
|
|
} // switch
|
|
}
|
|
|
|
const char *printAffinity(cl_device_affinity_domain affinity)
|
|
{
|
|
switch (affinity) {
|
|
case (0): return "<NONE>";
|
|
case (CL_DEVICE_AFFINITY_DOMAIN_NUMA): return "CL_DEVICE_AFFINITY_DOMAIN_NUMA";
|
|
case (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE): return "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE";
|
|
case (CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE): return "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE";
|
|
case (CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE): return "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE";
|
|
case (CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE): return "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE";
|
|
case (CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE): return "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE";
|
|
default: return "<unknown>";
|
|
} // switch
|
|
}
|
|
int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, const cl_device_id *parentDevice )
|
|
{
|
|
int error = CL_SUCCESS;
|
|
|
|
/* Create the program object from source */
|
|
*outProgram = clCreateProgramWithSource( context, numKernelLines, kernelProgram, NULL, &error );
|
|
if( *outProgram == NULL || error != CL_SUCCESS)
|
|
{
|
|
print_error( error, "clCreateProgramWithSource failed" );
|
|
return error;
|
|
}
|
|
|
|
/* Compile the program */
|
|
int buildProgramFailed = 0;
|
|
int printedSource = 0;
|
|
error = clBuildProgram( *outProgram, ((parentDevice == NULL) ? 0 : 1), parentDevice, NULL, NULL, NULL );
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
unsigned int i;
|
|
print_error(error, "clBuildProgram failed");
|
|
buildProgramFailed = 1;
|
|
printedSource = 1;
|
|
log_error( "Original source is: ------------\n" );
|
|
for( i = 0; i < numKernelLines; i++ )
|
|
log_error( "%s", kernelProgram[ i ] );
|
|
}
|
|
|
|
// Verify the build status on all devices
|
|
cl_uint deviceCount = 0;
|
|
error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
|
|
if (error != CL_SUCCESS) {
|
|
print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
|
|
return error;
|
|
}
|
|
|
|
if (deviceCount == 0) {
|
|
log_error("No devices found for program.\n");
|
|
return -1;
|
|
}
|
|
|
|
cl_device_id *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
|
|
if( NULL == devices )
|
|
return -1;
|
|
memset( devices, 0, deviceCount * sizeof( cl_device_id ));
|
|
error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
|
|
if (error != CL_SUCCESS) {
|
|
print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
|
|
free( devices );
|
|
return error;
|
|
}
|
|
|
|
cl_uint z;
|
|
for( z = 0; z < deviceCount; z++ )
|
|
{
|
|
char deviceName[4096] = "";
|
|
error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
|
|
if (error != CL_SUCCESS || deviceName[0] == '\0') {
|
|
log_error("Device \"%d\" failed to return a name\n", z);
|
|
print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
|
|
}
|
|
|
|
cl_build_status buildStatus;
|
|
error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
|
|
if (error != CL_SUCCESS) {
|
|
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
|
|
free( devices );
|
|
return error;
|
|
}
|
|
|
|
if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
|
|
char log[10240] = "";
|
|
if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
|
|
|
|
char statusString[64] = "";
|
|
if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
|
|
sprintf(statusString, "CL_BUILD_SUCCESS");
|
|
else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
|
|
sprintf(statusString, "CL_BUILD_NONE");
|
|
else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
|
|
sprintf(statusString, "CL_BUILD_ERROR");
|
|
else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
|
|
sprintf(statusString, "CL_BUILD_IN_PROGRESS");
|
|
else
|
|
sprintf(statusString, "UNKNOWN (%d)", buildStatus);
|
|
|
|
if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
|
|
error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
|
|
if (error != CL_SUCCESS || log[0]=='\0'){
|
|
log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
|
|
if (error) {
|
|
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
|
free( devices );
|
|
return error;
|
|
} else {
|
|
log_error("clGetProgramBuildInfo returned an empty log.\n");
|
|
free( devices );
|
|
return -1;
|
|
}
|
|
}
|
|
// In this case we've already printed out the code above.
|
|
if (!printedSource)
|
|
{
|
|
unsigned int i;
|
|
log_error( "Original source is: ------------\n" );
|
|
for( i = 0; i < numKernelLines; i++ )
|
|
log_error( "%s", kernelProgram[ i ] );
|
|
printedSource = 1;
|
|
}
|
|
log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
|
|
log_error( "%s\n", log );
|
|
log_error( "\n----------\n" );
|
|
free( devices );
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* And create a kernel from it */
|
|
*outKernel = clCreateKernel( *outProgram, kernelName, &error );
|
|
if( *outKernel == NULL || error != CL_SUCCESS)
|
|
{
|
|
print_error( error, "Unable to create kernel" );
|
|
free( devices );
|
|
return error;
|
|
}
|
|
|
|
free( devices );
|
|
return 0;
|
|
}
|
|
|
|
template<class T>
|
|
class AutoDestructArray
|
|
{
|
|
public:
|
|
AutoDestructArray(T* arr) : m_arr(arr) {}
|
|
~AutoDestructArray() { if (m_arr) delete [] m_arr; }
|
|
|
|
private:
|
|
T* m_arr;
|
|
};
|
|
|
|
int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices, int num_elements, cl_device_id *parentDevice = NULL)
|
|
{
|
|
int error;
|
|
clContextWrapper context;
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernels[2];
|
|
clMemWrapper stream;
|
|
clCommandQueueWrapper queues[MAX_QUEUES];
|
|
size_t threads[1], localThreads[1];
|
|
int data[TEST_SIZE];
|
|
int outputData[TEST_SIZE];
|
|
int expectedResults[TEST_SIZE];
|
|
int *expectedResultsOneDeviceArray = new int[deviceCount * TEST_SIZE];
|
|
int **expectedResultsOneDevice = (int**)alloca(sizeof(int**) * deviceCount);
|
|
size_t i;
|
|
AutoDestructArray<int> autoDestruct(expectedResultsOneDeviceArray);
|
|
|
|
for (i=0; i<deviceCount; i++) {
|
|
expectedResultsOneDevice[i] = expectedResultsOneDeviceArray + (i * TEST_SIZE);
|
|
}
|
|
|
|
memset(queues, 0, sizeof(queues));
|
|
|
|
RandomSeed seed( gRandomSeed );
|
|
|
|
if (queueCount > MAX_QUEUES) {
|
|
log_error("Number of queues (%ld) is greater than the number for which the test was written (%d).", queueCount, MAX_QUEUES);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Testing with %ld queues on %ld devices, %ld kernel executions.\n", queueCount, deviceCount, queueCount*num_elements/TEST_SIZE);
|
|
|
|
for (i=0; i<deviceCount; i++) {
|
|
size_t deviceNameSize;
|
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 0, NULL, &deviceNameSize);
|
|
test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
|
|
char *deviceName = (char *)alloca(deviceNameSize * (sizeof(char)));
|
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, deviceNameSize, deviceName, NULL);
|
|
test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
|
|
log_info("Device %ld is \"%s\".\n", i, deviceName);
|
|
}
|
|
|
|
/* Create a context */
|
|
context = clCreateContext( NULL, (cl_uint)deviceCount, devices, notify_callback, NULL, &error );
|
|
test_error( error, "Unable to create testing context" );
|
|
|
|
/* Create our kernels (they all have the same arguments so we don't need multiple ones for each device) */
|
|
if( create_single_kernel_helper( context, &program, &kernels[0], 1, test_kernels, "kernelA", parentDevice ) != 0 )
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
kernels[1] = clCreateKernel(program, "kernelB", &error);
|
|
test_error(error, "clCreateKernel failed");
|
|
|
|
|
|
/* Now create I/O streams */
|
|
for( i = 0; i < TEST_SIZE; i++ )
|
|
data[i] = genrand_int32(seed);
|
|
|
|
stream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * TEST_SIZE, data, &error);
|
|
test_error( error, "Unable to create test array" );
|
|
|
|
// Update the expected results
|
|
for( i = 0; i < TEST_SIZE; i++ ) {
|
|
expectedResults[i] = data[i];
|
|
for (size_t j=0; j<deviceCount; j++)
|
|
expectedResultsOneDevice[j][i] = data[i];
|
|
}
|
|
|
|
|
|
// Set the arguments
|
|
error = clSetKernelArg( kernels[0], 0, sizeof( stream ), &stream);
|
|
test_error( error, "Unable to set kernel arguments" );
|
|
error = clSetKernelArg( kernels[1], 0, sizeof( stream ), &stream);
|
|
test_error( error, "Unable to set kernel arguments" );
|
|
|
|
/* Run the test */
|
|
threads[0] = (size_t)TEST_SIZE;
|
|
|
|
error = get_max_common_work_group_size( context, kernels[0], threads[0], &localThreads[ 0 ] );
|
|
test_error( error, "Unable to calc work group size" );
|
|
|
|
/* Create work queues */
|
|
for( i = 0; i < queueCount; i++ )
|
|
{
|
|
queues[i] = clCreateCommandQueueWithProperties( context, devices[ i % deviceCount ], 0, &error );
|
|
if (error != CL_SUCCESS || queues[i] == NULL) {
|
|
log_info("Could not create queue[%d].\n", (int)i);
|
|
queueCount = i;
|
|
break;
|
|
}
|
|
}
|
|
log_info("Testing with %d queues.\n", (int)queueCount);
|
|
|
|
/* Enqueue executions */
|
|
for( int z = 0; z<num_elements/TEST_SIZE; z++) {
|
|
for( i = 0; i < queueCount; i++ )
|
|
{
|
|
// Randomly choose a kernel to execute.
|
|
int kernel_selection = (int)get_random_float(0, 2, seed);
|
|
error = clEnqueueNDRangeKernel( queues[ i ], kernels[ kernel_selection ], 1, NULL, threads, localThreads, 0, NULL, NULL );
|
|
test_error( error, "Kernel execution failed" );
|
|
|
|
// Update the expected results
|
|
for( int j = 0; j < TEST_SIZE; j++ ) {
|
|
expectedResults[j] = (kernel_selection) ? expectedResults[j]+1 : expectedResults[j]*3;
|
|
expectedResultsOneDevice[i % deviceCount][j] = (kernel_selection) ? expectedResultsOneDevice[i % deviceCount][j]+1 : expectedResultsOneDevice[i % deviceCount][j]*3;
|
|
}
|
|
|
|
// Force the queue to finish so the next one will be in sync
|
|
error = clFinish(queues[i]);
|
|
test_error( error, "clFinish failed");
|
|
}
|
|
}
|
|
|
|
/* Read results */
|
|
int errors = 0;
|
|
for (int q = 0; q<(int)queueCount; q++) {
|
|
error = clEnqueueReadBuffer( queues[ q ], stream, CL_TRUE, 0, sizeof(cl_int)*TEST_SIZE, (char *)outputData, 0, NULL, NULL );
|
|
test_error( error, "Unable to get result data set" );
|
|
|
|
int errorsThisTime = 0;
|
|
/* Verify all of the data now */
|
|
for( i = 0; i < TEST_SIZE; i++ )
|
|
{
|
|
if( expectedResults[ i ] != outputData[ i ] )
|
|
{
|
|
log_error( "ERROR: Sample data did not verify for queue %d on device %ld (sample %d, expected %d, got %d)\n",
|
|
q, q % deviceCount, (int)i, expectedResults[ i ], outputData[ i ] );
|
|
for (size_t j=0; j<deviceCount; j++) {
|
|
if (expectedResultsOneDevice[j][i] == outputData[i])
|
|
log_info("Sample consistent with only device %ld having modified the data.\n", j);
|
|
}
|
|
errorsThisTime++;
|
|
break;
|
|
}
|
|
}
|
|
if (errorsThisTime)
|
|
errors++;
|
|
}
|
|
|
|
/* All done now! */
|
|
if (errors)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
int init_device_partition_test(cl_device_id parentDevice, cl_uint &maxComputeUnits, cl_uint &maxSubDevices)
|
|
{
|
|
int err = clGetDeviceInfo(parentDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(maxComputeUnits), &maxComputeUnits, NULL);
|
|
test_error( err, "Unable to get maximal number of compute units" );
|
|
err = clGetDeviceInfo(parentDevice, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(maxSubDevices), &maxSubDevices, NULL);
|
|
test_error( err, "Unable to get maximal number of sub-devices" );
|
|
|
|
log_info("Maximal number of sub-devices on device %p is %d.\n", parentDevice, maxSubDevices );
|
|
return 0;
|
|
}
|
|
|
|
int test_device_partition_type_support(cl_device_id parentDevice, const cl_device_partition_property partitionType, const cl_device_affinity_domain affinityDomain)
|
|
{
|
|
typedef std::vector< cl_device_partition_property > properties_t;
|
|
properties_t supportedProps( 3 ); // only 3 types defined in the spec (but implementation can define more)
|
|
size_t const propSize = sizeof( cl_device_partition_property ); // Size of one property in bytes.
|
|
size_t size; // size of all properties in bytes.
|
|
cl_int err;
|
|
size = 0;
|
|
err = clGetDeviceInfo( parentDevice, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL, & size );
|
|
if ( err == CL_SUCCESS ) {
|
|
if ( size % propSize != 0 ) {
|
|
log_error( "ERROR: clGetDeviceInfo: Bad size of returned partition properties (%llu), it must me a multiply of partition property size (%llu)\n", llu( size ), llu( propSize ) );
|
|
return -1;
|
|
}
|
|
supportedProps.resize( size / propSize );
|
|
size = 0;
|
|
err = clGetDeviceInfo( parentDevice, CL_DEVICE_PARTITION_PROPERTIES, supportedProps.size() * propSize, & supportedProps.front(), & size );
|
|
test_error_ret( err, "Unable to get device partition properties (2)", -1 );
|
|
} else if ( err == CL_INVALID_VALUE ) {
|
|
log_error( "ERROR: clGetDeviceInfo: CL_DEVICE_PARTITION_PROPERTIES is not supported.\n" );
|
|
return -1;
|
|
} else {
|
|
test_error_ret( err, "Unable to get device partition properties (1)", -1 );
|
|
};
|
|
for ( int i = 0; i < supportedProps.size(); i++)
|
|
{
|
|
if (supportedProps[i] == partitionType)
|
|
{
|
|
if (partitionType == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
|
|
{
|
|
cl_device_affinity_domain supportedAffinityDomain;
|
|
err = clGetDeviceInfo(parentDevice, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(supportedAffinityDomain), &supportedAffinityDomain, NULL);
|
|
test_error( err, "Unable to get supported affinity domains" );
|
|
if (supportedAffinityDomain & affinityDomain)
|
|
return 0;
|
|
}
|
|
else
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
int test_partition_of_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_device_partition_property *partition_type,
|
|
cl_uint starting_property, cl_uint ending_property)
|
|
{
|
|
cl_uint maxComputeUnits;
|
|
cl_uint maxSubDevices; // maximal number of sub-devices that can be created in one call to clCreateSubDevices
|
|
int err = 0;
|
|
|
|
if (init_device_partition_test(deviceID, maxComputeUnits, maxSubDevices) != 0)
|
|
return -1;
|
|
|
|
if (maxComputeUnits <= 1)
|
|
return 0;
|
|
// confirm that this devices reports how it was partitioned
|
|
if (partition_type != NULL)
|
|
{ // if we're not the root device
|
|
size_t psize;
|
|
err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_TYPE, 0, NULL, &psize);
|
|
test_error( err, "Unable to get CL_DEVICE_PARTITION_TYPE" );
|
|
cl_device_partition_property *properties_returned = (cl_device_partition_property *)alloca(psize);
|
|
err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_TYPE, psize, (void *) properties_returned, NULL);
|
|
test_error( err, "Unable to get CL_DEVICE_PARTITION_TYPE" );
|
|
|
|
// test returned type
|
|
for (cl_uint i = 0;i < psize / sizeof(cl_device_partition_property);i++) {
|
|
if (properties_returned[i] != partition_type[i]) {
|
|
if (!(partition_type[0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN &&
|
|
i == 1 && partition_type[1] == CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE &&
|
|
(properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_NUMA ||
|
|
properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE ||
|
|
properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE ||
|
|
properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE ||
|
|
properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE))) {
|
|
log_error("properties_returned[%d] 0x%x != 0x%x partition_type[%d].", i, properties_returned[i], partition_type[i], i);
|
|
return -1;
|
|
}
|
|
}
|
|
} // for
|
|
}
|
|
|
|
#define PROPERTY_TYPES 8
|
|
cl_device_partition_property partitionProp[PROPERTY_TYPES][5] = {
|
|
{ CL_DEVICE_PARTITION_EQUALLY, maxComputeUnits / 2, 0, 0, 0 } ,
|
|
{ CL_DEVICE_PARTITION_BY_COUNTS, 1, maxComputeUnits - 1, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, 0 } ,
|
|
{ CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NUMA, 0, 0, 0 } ,
|
|
{ CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE, 0, 0, 0 } ,
|
|
{ CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE, 0, 0, 0 } ,
|
|
{ CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE, 0, 0, 0 } ,
|
|
{ CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE, 0, 0, 0 } ,
|
|
{ CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, 0, 0, 0 }
|
|
};
|
|
|
|
// loop thru each type, creating sub-devices for each type
|
|
for (cl_uint i = starting_property;i < ending_property;i++) {
|
|
|
|
if (test_device_partition_type_support(deviceID, partitionProp[i][0], partitionProp[i][1]) != 0)
|
|
{
|
|
if (partitionProp[i][0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
|
|
{
|
|
log_info( "Device partition type \"%s\" \"%s\" is not supported on device %p. Skipping test...\n",
|
|
printPartition(partitionProp[i][0]),
|
|
printAffinity(partitionProp[i][1]), deviceID);
|
|
}
|
|
else
|
|
{
|
|
log_info( "Device partition type \"%s\" is not supported on device %p. Skipping test...\n",
|
|
printPartition(partitionProp[i][0]), deviceID);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (partitionProp[i][0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
|
|
{
|
|
log_info("Testing on device %p partition type \"%s\" \"%s\"\n", deviceID, printPartition(partitionProp[i][0]),
|
|
printAffinity(partitionProp[i][1]));
|
|
}
|
|
else
|
|
{
|
|
log_info("Testing on device %p partition type \"%s\" (%d,%d)\n", deviceID, printPartition(partitionProp[i][0]),
|
|
partitionProp[i][1], partitionProp[i][2]);
|
|
}
|
|
|
|
cl_uint deviceCount;
|
|
|
|
// how many sub-devices can we create?
|
|
err = clCreateSubDevices(deviceID, partitionProp[i], 0, NULL, &deviceCount);
|
|
if ( err == CL_DEVICE_PARTITION_FAILED ) {
|
|
log_info( "The device %p could not be further partitioned.\n", deviceID );
|
|
continue;
|
|
}
|
|
test_error( err, "Failed to get number of sub-devices" );
|
|
|
|
// get the list of subDevices
|
|
// create room for 1 more device_id, so that we can put the parent device in there.
|
|
cl_device_id *subDevices = (cl_device_id*)alloca(sizeof(cl_device_id) * (deviceCount + 1));
|
|
err = clCreateSubDevices(deviceID, partitionProp[i], deviceCount, subDevices, &deviceCount);
|
|
test_error( err, "Actual creation of sub-devices failed" );
|
|
|
|
log_info("Testing on all devices in context\n");
|
|
err = test_device_set(deviceCount, deviceCount, subDevices, num_elements);
|
|
if (err == 0)
|
|
{
|
|
log_info("Testing on a parent device for context\n");
|
|
|
|
// add the parent device
|
|
subDevices[deviceCount] = deviceID;
|
|
err = test_device_set(deviceCount + 1, deviceCount, subDevices, num_elements, &deviceID);
|
|
}
|
|
if (err != 0)
|
|
{
|
|
printf("error! returning %d\n",err);
|
|
return err;
|
|
}
|
|
|
|
// now, recurse and test the FIRST of these sub-devices, to make sure it can be further partitioned
|
|
err = test_partition_of_device(subDevices[0], context, queue, num_elements, partitionProp[i], starting_property, ending_property);
|
|
if (err != 0)
|
|
{
|
|
printf("error! returning %d\n",err);
|
|
return err;
|
|
}
|
|
|
|
for (cl_uint j=0;j < deviceCount;j++)
|
|
{
|
|
err = clReleaseDevice(subDevices[j]);
|
|
test_error( err, "\n Releasing sub-device failed \n" );
|
|
}
|
|
|
|
} // for
|
|
|
|
log_info("Testing on all device %p finished\n", deviceID);
|
|
return 0;
|
|
}
|
|
|
|
|
|
int test_partition_equally(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 0, 1);
|
|
}
|
|
|
|
int test_partition_by_counts(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 1, 2);
|
|
}
|
|
|
|
int test_partition_by_affinity_domain_numa(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 2, 3);
|
|
}
|
|
|
|
int test_partition_by_affinity_domain_l4_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 3, 4);
|
|
}
|
|
|
|
int test_partition_by_affinity_domain_l3_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 4, 5);
|
|
}
|
|
|
|
int test_partition_by_affinity_domain_l2_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 5, 6);
|
|
}
|
|
|
|
int test_partition_by_affinity_domain_l1_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 6, 7);
|
|
}
|
|
|
|
int test_partition_by_affinity_domain_next_partitionable(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 7, 8);
|
|
}
|
|
|
|
int test_partition_all(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 0, 8);
|
|
}
|