switch SVM tests to the new test registration framework (#2168)

Switches the SVM tests to the new test registration framework.

The first commit is the best to review and contains the actual changes.
The second commit purely has formatting changes.

Note that several of these changes were a bit more than mechanical
because many of the SVM tests create a new context vs. using the context
provided by the harness and passed to each test function. The previous
code named the context provided by the harness differently, and hence
could use the name "context" in each test function, but with the new
test registration framework this is no longer possible. Instead, I am
creating the new context using the name "contextWrapper" and then
assigning it to the "context" passed to the test function, which seems
like the best way to avoid using the wrong context unintentionally. I am
open to suggestions to do this differently.

I have verified that the same calls are made before and after these
changes, and specifically that there are no context leaks.
This commit is contained in:
Ben Ashbaugh
2024-12-03 14:51:23 -08:00
committed by GitHub
parent e361b387d9
commit d99b302f90
16 changed files with 1066 additions and 854 deletions

View File

@@ -81,25 +81,6 @@ extern cl_int verify_linked_lists_on_device(int qi, cl_command_queue q, c
extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator, cl_kernel k, size_t numLists ); extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator, cl_kernel k, size_t numLists );
extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists ); extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists );
extern int test_svm_byte_granularity(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_pointer_passing(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements);
extern int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_enqueue_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector<std::string> extensions_list = std::vector<std::string>()); extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector<std::string> extensions_list = std::vector<std::string>());
extern const char *linked_list_create_and_verify_kernels[]; extern const char *linked_list_create_and_verify_kernels[];

View File

@@ -260,26 +260,6 @@ cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeSt
return 0; return 0;
} }
test_definition test_list[] = {
ADD_TEST(svm_byte_granularity),
ADD_TEST(svm_set_kernel_exec_info_svm_ptrs),
ADD_TEST(svm_fine_grain_memory_consistency),
ADD_TEST(svm_fine_grain_sync_buffers),
ADD_TEST(svm_shared_address_space_fine_grain),
ADD_TEST(svm_shared_sub_buffers),
ADD_TEST(svm_shared_address_space_fine_grain_buffers),
ADD_TEST(svm_allocate_shared_buffer),
ADD_TEST(svm_allocate_shared_buffer_negative),
ADD_TEST(svm_shared_address_space_coarse_grain_old_api),
ADD_TEST(svm_shared_address_space_coarse_grain_new_api),
ADD_TEST(svm_cross_buffer_pointers_coarse_grain),
ADD_TEST(svm_pointer_passing),
ADD_TEST(svm_enqueue_api),
ADD_TEST_VERSION(svm_migrate, Version(2, 1)),
};
const int test_num = ARRAY_SIZE( test_list );
test_status InitCL(cl_device_id device) { test_status InitCL(cl_device_id device) {
auto version = get_device_cl_version(device); auto version = get_device_cl_version(device);
auto expected_min_version = Version(2, 0); auto expected_min_version = Version(2, 0);
@@ -310,6 +290,7 @@ test_status InitCL(cl_device_id device) {
int main(int argc, const char *argv[]) int main(int argc, const char *argv[])
{ {
return runTestHarnessWithCheck(argc, argv, test_num, test_list, true, 0, InitCL); return runTestHarnessWithCheck(
argc, argv, test_registry::getInstance().num_tests(),
test_registry::getInstance().definitions(), true, 0, InitCL);
} }

View File

@@ -41,71 +41,83 @@ const char* flag_set_names[] = {
}; };
int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_allocate_shared_buffer)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
cl_device_svm_capabilities caps; cl_device_svm_capabilities caps;
err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL); err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES,
test_error(err,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES"); sizeof(cl_device_svm_capabilities), &caps, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
// under construction... // under construction...
err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); err =
if(err) return -1; create_cl_objects(deviceID, NULL, &contextWrapper, &program, &queues[0],
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (err) return -1;
size_t size = 1024; size_t size = 1024;
// iteration over flag combos // iteration over flag combos
int num_flags = sizeof(flag_set)/sizeof(cl_mem_flags); int num_flags = sizeof(flag_set) / sizeof(cl_mem_flags);
for(int i = 0; i < num_flags; i++) for (int i = 0; i < num_flags; i++)
{
if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
|| ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 && (caps & CL_DEVICE_SVM_ATOMICS) == 0))
{ {
log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]); if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0
continue; && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
} || ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0
&& (caps & CL_DEVICE_SVM_ATOMICS) == 0))
log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]);
cl_char *pBufData1 = (cl_char*) clSVMAlloc(context, flag_set[i], size, 0);
if(pBufData1 == NULL)
{
log_error("SVMalloc returned NULL");
return -1;
}
{
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size, pBufData1, &err);
test_error(err,"clCreateBuffer failed");
cl_char *pBufData2 = NULL;
cl_uint flags = CL_MAP_READ | CL_MAP_READ;
if(flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE;
if(flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ;
if(!(flag_set[i] & CL_MEM_HOST_NO_ACCESS))
{
pBufData2 = (cl_char*) clEnqueueMapBuffer(queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL,NULL, &err);
test_error(err, "clEnqueueMapBuffer failed");
if(pBufData2 != pBufData1 || NULL == pBufData1)
{ {
log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc"); log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]);
return -1; continue;
} }
err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0, NULL, NULL);
test_error(err, "clEnqueueUnmapMemObject failed"); log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]);
err = clFinish(queues[0]); cl_char *pBufData1 =
test_error(err, "clFinish failed"); (cl_char *)clSVMAlloc(context, flag_set[i], size, 0);
} if (pBufData1 == NULL)
{
log_error("SVMalloc returned NULL");
return -1;
}
{
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
size, pBufData1, &err);
test_error(err, "clCreateBuffer failed");
cl_char *pBufData2 = NULL;
cl_uint flags = CL_MAP_READ | CL_MAP_READ;
if (flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE;
if (flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ;
if (!(flag_set[i] & CL_MEM_HOST_NO_ACCESS))
{
pBufData2 = (cl_char *)clEnqueueMapBuffer(
queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL, NULL,
&err);
test_error(err, "clEnqueueMapBuffer failed");
if (pBufData2 != pBufData1 || NULL == pBufData1)
{
log_error("SVM pointer returned by clEnqueueMapBuffer "
"doesn't match pointer returned by clSVMalloc");
return -1;
}
err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0,
NULL, NULL);
test_error(err, "clEnqueueUnmapMemObject failed");
err = clFinish(queues[0]);
test_error(err, "clFinish failed");
}
}
clSVMFree(context, pBufData1);
} }
clSVMFree(context, pBufData1); return 0;
}
return 0;
} }

View File

@@ -41,12 +41,9 @@ const char* svm_flag_set_names[] = {
}; };
int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID, REGISTER_TEST(svm_allocate_shared_buffer_negative)
cl_context context2,
cl_command_queue queue,
int num_elements)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
@@ -58,8 +55,10 @@ int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID,
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES"); test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
// under construction... // under construction...
err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], err =
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); create_cl_objects(deviceID, NULL, &contextWrapper, &program, &queues[0],
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (err) return err; if (err) return err;
size_t size = 1024; size_t size = 1024;

View File

@@ -48,100 +48,115 @@ const char *byte_manipulation_kernels[] = {
}; };
REGISTER_TEST(svm_byte_granularity)
int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
{ {
clContextWrapper context; clContextWrapper contextWrapper;
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper k1,k2; clKernelWrapper k1, k2;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER); err = create_cl_objects(deviceID, &byte_manipulation_kernels[0],
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. &contextWrapper, &program, &queues[0], &num_devices,
if(err < 0) return -1; // fail test. CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
context = contextWrapper;
if (err == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (err < 0) return -1; // fail test.
cl_uint num_devices_plus_host = num_devices + 1; cl_uint num_devices_plus_host = num_devices + 1;
k1 = clCreateKernel(program, "write_owned_locations", &err); k1 = clCreateKernel(program, "write_owned_locations", &err);
test_error(err, "clCreateKernel failed"); test_error(err, "clCreateKernel failed");
k2 = clCreateKernel(program, "sum_neighbor_locations", &err); k2 = clCreateKernel(program, "sum_neighbor_locations", &err);
test_error(err, "clCreateKernel failed"); test_error(err, "clCreateKernel failed");
cl_char *pA = (cl_char*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_char) * num_elements, 0); cl_char *pA = (cl_char *)clSVMAlloc(
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
sizeof(cl_char) * num_elements, 0);
cl_uint **error_counts = (cl_uint**) malloc(sizeof(void*) * num_devices); cl_uint **error_counts = (cl_uint **)malloc(sizeof(void *) * num_devices);
for(cl_uint i=0; i < num_devices; i++) { for (cl_uint i = 0; i < num_devices; i++)
error_counts[i] = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint), 0); {
*error_counts[i] = 0; error_counts[i] = (cl_uint *)clSVMAlloc(
} context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
for(int i=0; i < num_elements; i++) pA[i] = -1; sizeof(cl_uint), 0);
*error_counts[i] = 0;
}
for (int i = 0; i < num_elements; i++) pA[i] = -1;
err |= clSetKernelArgSVMPointer(k1, 0, pA); err |= clSetKernelArgSVMPointer(k1, 0, pA);
err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host); err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host);
test_error(err, "clSetKernelArg failed");
// get all the devices going simultaneously
size_t element_num = num_elements;
for(cl_uint d=0; d < num_devices; d++) // device ids starting at 1.
{
err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d);
test_error(err, "clSetKernelArg failed");
err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, 0, NULL, NULL);
test_error(err,"clEnqueueNDRangeKernel failed");
}
for(cl_uint d=0; d < num_devices; d++) clFlush(queues[d]);
cl_uint host_id = num_devices; // host code will take the id above the devices.
for(int i = (int)num_devices; i < num_elements; i+= num_devices_plus_host) pA[i] = host_id;
for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
// now check that each device can see the byte writes made by the other devices.
err |= clSetKernelArgSVMPointer(k2, 0, pA);
err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host);
test_error(err, "clSetKernelArg failed");
// adjusted so k2 doesn't read past end of buffer
size_t adjusted_num_elements = num_elements - num_devices;
for(cl_uint id = 0; id < num_devices; id++)
{
err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]);
test_error(err, "clSetKernelArg failed"); test_error(err, "clSetKernelArg failed");
err = clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, &adjusted_num_elements, NULL, 0, NULL, NULL); // get all the devices going simultaneously
test_error(err,"clEnqueueNDRangeKernel failed"); size_t element_num = num_elements;
} for (cl_uint d = 0; d < num_devices; d++) // device ids starting at 1.
{
err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d);
test_error(err, "clSetKernelArg failed");
err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL,
0, NULL, NULL);
test_error(err, "clEnqueueNDRangeKernel failed");
}
for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]); for (cl_uint d = 0; d < num_devices; d++) clFlush(queues[d]);
bool failed = false; cl_uint host_id =
num_devices; // host code will take the id above the devices.
for (int i = (int)num_devices; i < num_elements; i += num_devices_plus_host)
pA[i] = host_id;
// see if any of the devices found errors for (cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
for(cl_uint i=0; i < num_devices; i++) {
if(*error_counts[i] > 0)
failed = true;
}
cl_uint expected = (num_devices_plus_host * (num_devices_plus_host - 1))/2;
// check that host can see the byte writes made by the devices.
for(cl_uint i = 0; i < num_elements - num_devices_plus_host; i++)
{
int sum = 0;
for(cl_uint j=0; j < num_devices_plus_host; j++) sum += pA[i+j];
if(sum != expected)
failed = true;
}
clSVMFree(context, pA); // now check that each device can see the byte writes made by the other
for(cl_uint i=0; i < num_devices; i++) clSVMFree(context, error_counts[i]); // devices.
if(failed) err |= clSetKernelArgSVMPointer(k2, 0, pA);
return -1; err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host);
return 0; test_error(err, "clSetKernelArg failed");
// adjusted so k2 doesn't read past end of buffer
size_t adjusted_num_elements = num_elements - num_devices;
for (cl_uint id = 0; id < num_devices; id++)
{
err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]);
test_error(err, "clSetKernelArg failed");
err =
clEnqueueNDRangeKernel(queues[id], k2, 1, NULL,
&adjusted_num_elements, NULL, 0, NULL, NULL);
test_error(err, "clEnqueueNDRangeKernel failed");
}
for (cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
bool failed = false;
// see if any of the devices found errors
for (cl_uint i = 0; i < num_devices; i++)
{
if (*error_counts[i] > 0) failed = true;
}
cl_uint expected =
(num_devices_plus_host * (num_devices_plus_host - 1)) / 2;
// check that host can see the byte writes made by the devices.
for (cl_uint i = 0; i < num_elements - num_devices_plus_host; i++)
{
int sum = 0;
for (cl_uint j = 0; j < num_devices_plus_host; j++) sum += pA[i + j];
if (sum != expected) failed = true;
}
clSVMFree(context, pA);
for (cl_uint i = 0; i < num_devices; i++)
clSVMFree(context, error_counts[i]);
if (failed) return -1;
return 0;
} }

View File

@@ -128,93 +128,128 @@ cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes,
// on another device or the host. // on another device or the host.
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly. // The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
// This basic test is performed for all combinations of devices and the host. // This basic test is performed for all combinations of devices and the host.
int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_cross_buffer_pointers_coarse_grain)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0],
if(error) return -1; &contextWrapper, &program, &queues[0],
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (error) return -1;
size_t numLists = num_elements; size_t numLists = num_elements;
cl_int ListLength = 32; cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error); clKernelWrapper kernel_create_lists =
test_error(error, "clCreateKernel failed"); clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error); clKernelWrapper kernel_verify_lists =
test_error(error, "clCreateKernel failed"); clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
// this buffer holds some of the linked list nodes. // this buffer holds some of the linked list nodes.
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0); Node *pNodes = (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE,
sizeof(Node) * ListLength * numLists, 0);
// this buffer holds some of the linked list nodes. // this buffer holds some of the linked list nodes.
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0); Node *pNodes2 = (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE,
sizeof(Node) * ListLength * numLists, 0);
{
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
test_error(error, "clCreateBuffer failed.");
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the index into the nodes buffer that is used for node allocation
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(size_t), NULL, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the verify kernel.
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
//error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) pNodes);
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &allocator);
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), (void *) &ListLength);
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes);
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &num_correct);
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), (void *) &ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host).
// Do this for all possible combinations of devices and host within the platform.
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
{ {
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on clMemWrapper nodes = clCreateBuffer(
{ context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists,
if(ci == num_devices) // last device index represents the host, note the num_device+1 above. pNodes, &error);
{ test_error(error, "clCreateBuffer failed.");
error = create_linked_lists_on_host(queues[0], nodes, nodes2, ListLength, numLists);
if(error) return -1;
}
else
{
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
if(error) return -1;
}
if(vi == num_devices) clMemWrapper nodes2 = clCreateBuffer(
{ context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists,
error = verify_linked_lists_on_host(vi, queues[0], nodes, nodes2, ListLength, numLists); pNodes2, &error);
if(error) return -1; test_error(error, "clCreateBuffer failed.");
}
else
{
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
if(error) return -1;
}
} // inner loop, vi
} // outer loop, ci
}
clSVMFree(context, pNodes2); // this buffer holds the index into the nodes buffer that is used for
clSVMFree(context, pNodes); // node allocation
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(size_t), NULL, &error);
test_error(error, "clCreateBuffer failed.");
return 0; // this buffer holds the count of correct nodes which is computed by the
// verify kernel.
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void *),
(void *)&nodes);
// error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *)
// pNodes);
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void *),
(void *)&nodes2);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void *),
(void *)&allocator);
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int),
(void *)&ListLength);
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void *),
(void *)&nodes);
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void *),
(void *)&nodes2);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void *),
(void *)&num_correct);
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int),
(void *)&ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the
// host). Do this for all possible combinations of devices and host
// within the platform.
for (int ci = 0; ci < (int)num_devices + 1;
ci++) // ci is CreationIndex, index of device/q to create linked
// list on
{
for (int vi = 0; vi < (int)num_devices + 1;
vi++) // vi is VerificationIndex, index of device/q to verify
// linked list on
{
if (ci == num_devices) // last device index represents the host,
// note the num_device+1 above.
{
error = create_linked_lists_on_host(
queues[0], nodes, nodes2, ListLength, numLists);
if (error) return -1;
}
else
{
error = create_linked_lists_on_device(
ci, queues[ci], allocator, kernel_create_lists,
numLists);
if (error) return -1;
}
if (vi == num_devices)
{
error = verify_linked_lists_on_host(
vi, queues[0], nodes, nodes2, ListLength, numLists);
if (error) return -1;
}
else
{
error = verify_linked_lists_on_device(
vi, queues[vi], num_correct, kernel_verify_lists,
ListLength, numLists);
if (error) return -1;
}
} // inner loop, vi
} // outer loop, ci
}
clSVMFree(context, pNodes2);
clSVMFree(context, pNodes);
return 0;
} }

View File

@@ -70,244 +70,282 @@ void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_point
data->status.store(1, std::memory_order_release); data->status.store(1, std::memory_order_release);
} }
int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_enqueue_api)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0; cl_uint num_devices = 0;
const size_t elementNum = 1024; const size_t elementNum = 1024;
const size_t numSVMBuffers = 32; const size_t numSVMBuffers = 32;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
RandomSeed seed(0); RandomSeed seed(0);
error = create_cl_objects(deviceID, NULL, &context, NULL, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); error = create_cl_objects(deviceID, NULL, &contextWrapper, NULL, &queues[0],
if(error) return TEST_FAIL; &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (error) return TEST_FAIL;
queue = queues[0]; queue = queues[0];
//all possible sizes of vectors and scalars // all possible sizes of vectors and scalars
size_t typeSizes[] = { size_t typeSizes[] = {
sizeof(cl_uchar), sizeof(cl_uchar), sizeof(cl_uchar2), sizeof(cl_uchar3),
sizeof(cl_uchar2), sizeof(cl_uchar4), sizeof(cl_uchar8), sizeof(cl_uchar16),
sizeof(cl_uchar3), sizeof(cl_ushort), sizeof(cl_ushort2), sizeof(cl_ushort3),
sizeof(cl_uchar4), sizeof(cl_ushort4), sizeof(cl_ushort8), sizeof(cl_ushort16),
sizeof(cl_uchar8), sizeof(cl_uint), sizeof(cl_uint2), sizeof(cl_uint3),
sizeof(cl_uchar16), sizeof(cl_uint4), sizeof(cl_uint8), sizeof(cl_uint16),
sizeof(cl_ushort), sizeof(cl_ulong), sizeof(cl_ulong2), sizeof(cl_ulong3),
sizeof(cl_ushort2), sizeof(cl_ulong4), sizeof(cl_ulong8), sizeof(cl_ulong16),
sizeof(cl_ushort3), };
sizeof(cl_ushort4),
sizeof(cl_ushort8),
sizeof(cl_ushort16),
sizeof(cl_uint),
sizeof(cl_uint2),
sizeof(cl_uint3),
sizeof(cl_uint4),
sizeof(cl_uint8),
sizeof(cl_uint16),
sizeof(cl_ulong),
sizeof(cl_ulong2),
sizeof(cl_ulong3),
sizeof(cl_ulong4),
sizeof(cl_ulong8),
sizeof(cl_ulong16),
};
enum allocationTypes { enum allocationTypes
host,
svm
};
struct TestType {
allocationTypes srcAlloc;
allocationTypes dstAlloc;
TestType(allocationTypes type1, allocationTypes type2): srcAlloc(type1), dstAlloc(type2){}
};
std::vector<TestType> testTypes;
testTypes.push_back(TestType(host, host));
testTypes.push_back(TestType(host, svm));
testTypes.push_back(TestType(svm, host));
testTypes.push_back(TestType(svm, svm));
for (const auto test_case : testTypes)
{
log_info("clEnqueueSVMMemcpy case: src_alloc = %s, dst_alloc = %s\n", test_case.srcAlloc == svm ? "svm" : "host", test_case.dstAlloc == svm ? "svm" : "host");
for (size_t i = 0; i < ARRAY_SIZE(typeSizes); ++i)
{ {
//generate initial data host,
std::vector<cl_uchar> fillData0(typeSizes[i]), fillData1(typeSizes[i]); svm
generate_data(fillData0, typeSizes[i], seed); };
generate_data(fillData1, typeSizes[i], seed);
size_t data_size = elementNum * typeSizes[i];
std::vector<cl_uchar> srcHostData(data_size, 0);
std::vector<cl_uchar> dstHostData(data_size, 0);
generate_data(srcHostData, srcHostData.size(), seed);
generate_data(dstHostData, dstHostData.size(), seed);
cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, data_size, 0); struct TestType
cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, data_size, 0); {
allocationTypes srcAlloc;
allocationTypes dstAlloc;
TestType(allocationTypes type1, allocationTypes type2)
: srcAlloc(type1), dstAlloc(type2)
{}
};
clEventWrapper userEvent = clCreateUserEvent(context, &error); std::vector<TestType> testTypes;
test_error(error, "clCreateUserEvent failed");
clEventWrapper eventMemFillList[2];
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], typeSizes[i], data_size, 1, &userEvent, &eventMemFillList[0]); testTypes.push_back(TestType(host, host));
test_error(error, "clEnqueueSVMMemFill failed"); testTypes.push_back(TestType(host, svm));
error = clEnqueueSVMMemFill(queue, dstBuffer, &fillData1[0], typeSizes[i], data_size, 1, &userEvent, &eventMemFillList[1]); testTypes.push_back(TestType(svm, host));
test_error(error, "clEnqueueSVMMemFill failed"); testTypes.push_back(TestType(svm, svm));
error = clSetUserEventStatus(userEvent, CL_COMPLETE); for (const auto test_case : testTypes)
test_error(error, "clSetUserEventStatus failed"); {
log_info("clEnqueueSVMMemcpy case: src_alloc = %s, dst_alloc = %s\n",
test_case.srcAlloc == svm ? "svm" : "host",
test_case.dstAlloc == svm ? "svm" : "host");
for (size_t i = 0; i < ARRAY_SIZE(typeSizes); ++i)
{
// generate initial data
std::vector<cl_uchar> fillData0(typeSizes[i]),
fillData1(typeSizes[i]);
generate_data(fillData0, typeSizes[i], seed);
generate_data(fillData1, typeSizes[i], seed);
size_t data_size = elementNum * typeSizes[i];
std::vector<cl_uchar> srcHostData(data_size, 0);
std::vector<cl_uchar> dstHostData(data_size, 0);
generate_data(srcHostData, srcHostData.size(), seed);
generate_data(dstHostData, dstHostData.size(), seed);
cl_uchar * src_ptr; cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(
cl_uchar * dst_ptr; context, CL_MEM_READ_WRITE, data_size, 0);
if (test_case.srcAlloc == host) { cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(
src_ptr = srcHostData.data(); context, CL_MEM_READ_WRITE, data_size, 0);
} else if (test_case.srcAlloc == svm) {
src_ptr = srcBuffer;
}
if (test_case.dstAlloc == host) {
dst_ptr = dstHostData.data();
} else if (test_case.dstAlloc == svm) {
dst_ptr = dstBuffer;
}
clEventWrapper eventMemcpy;
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dst_ptr, src_ptr, data_size, 2, &eventMemFillList[0], &eventMemcpy);
test_error(error, "clEnqueueSVMMemcpy failed");
//coarse grain only supported. Synchronization required using map clEventWrapper userEvent = clCreateUserEvent(context, &error);
clEventWrapper eventMap[2]; test_error(error, "clCreateUserEvent failed");
clEventWrapper eventMemFillList[2];
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, srcBuffer, data_size, 1, &eventMemcpy, &eventMap[0]); error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0],
test_error(error, "clEnqueueSVMMap srcBuffer failed"); typeSizes[i], data_size, 1, &userEvent,
&eventMemFillList[0]);
test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemFill(queue, dstBuffer, &fillData1[0],
typeSizes[i], data_size, 1, &userEvent,
&eventMemFillList[1]);
test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, dstBuffer, data_size, 1, &eventMemcpy, &eventMap[1]); error = clSetUserEventStatus(userEvent, CL_COMPLETE);
test_error(error, "clEnqueueSVMMap dstBuffer failed"); test_error(error, "clSetUserEventStatus failed");
error = clWaitForEvents(2, &eventMap[0]); cl_uchar *src_ptr;
test_error(error, "clWaitForEvents failed"); cl_uchar *dst_ptr;
if (test_case.srcAlloc == host)
{
src_ptr = srcHostData.data();
}
else if (test_case.srcAlloc == svm)
{
src_ptr = srcBuffer;
}
if (test_case.dstAlloc == host)
{
dst_ptr = dstHostData.data();
}
else if (test_case.dstAlloc == svm)
{
dst_ptr = dstBuffer;
}
clEventWrapper eventMemcpy;
error =
clEnqueueSVMMemcpy(queue, CL_FALSE, dst_ptr, src_ptr, data_size,
2, &eventMemFillList[0], &eventMemcpy);
test_error(error, "clEnqueueSVMMemcpy failed");
//data verification // coarse grain only supported. Synchronization required using map
for (size_t j = 0; j < data_size; ++j) clEventWrapper eventMap[2];
{
if (dst_ptr[j] != src_ptr[j]) { error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, srcBuffer,
log_error("Invalid data at index %zu, dst_ptr %d, src_ptr %d\n", j, data_size, 1, &eventMemcpy, &eventMap[0]);
dst_ptr[j], src_ptr[j]); test_error(error, "clEnqueueSVMMap srcBuffer failed");
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, dstBuffer,
data_size, 1, &eventMemcpy, &eventMap[1]);
test_error(error, "clEnqueueSVMMap dstBuffer failed");
error = clWaitForEvents(2, &eventMap[0]);
test_error(error, "clWaitForEvents failed");
// data verification
for (size_t j = 0; j < data_size; ++j)
{
if (dst_ptr[j] != src_ptr[j])
{
log_error(
"Invalid data at index %zu, dst_ptr %d, src_ptr %d\n",
j, dst_ptr[j], src_ptr[j]);
return TEST_FAIL;
}
}
clEventWrapper eventUnmap[2];
error =
clEnqueueSVMUnmap(queue, srcBuffer, 0, nullptr, &eventUnmap[0]);
test_error(error, "clEnqueueSVMUnmap srcBuffer failed");
error =
clEnqueueSVMUnmap(queue, dstBuffer, 0, nullptr, &eventUnmap[1]);
test_error(error, "clEnqueueSVMUnmap dstBuffer failed");
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData1[0],
typeSizes[i], data_size / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemFill(queue, dstBuffer + data_size / 2,
&fillData1[0], typeSizes[i],
data_size / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer,
data_size / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemcpy failed");
error = clEnqueueSVMMemcpy(
queue, CL_TRUE, dstBuffer + data_size / 2,
srcBuffer + data_size / 2, data_size / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemcpy failed");
void *ptrs[] = { (void *)srcBuffer, (void *)dstBuffer };
clEventWrapper eventFree;
error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree);
test_error(error, "clEnqueueSVMFree failed");
error = clWaitForEvents(1, &eventFree);
test_error(error, "clWaitForEvents failed");
// event info verification for new SVM commands
cl_command_type commandType;
for (auto &check_event : eventMemFillList)
{
error =
clGetEventInfo(check_event, CL_EVENT_COMMAND_TYPE,
sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MEMFILL)
{
log_error("Invalid command type returned for "
"clEnqueueSVMMemFill\n");
return TEST_FAIL;
}
}
error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE,
sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MEMCPY)
{
log_error(
"Invalid command type returned for clEnqueueSVMMemcpy\n");
return TEST_FAIL;
}
for (size_t map_id = 0; map_id < ARRAY_SIZE(eventMap); map_id++)
{
error =
clGetEventInfo(eventMap[map_id], CL_EVENT_COMMAND_TYPE,
sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MAP)
{
log_error(
"Invalid command type returned for clEnqueueSVMMap\n");
return TEST_FAIL;
}
error =
clGetEventInfo(eventUnmap[map_id], CL_EVENT_COMMAND_TYPE,
sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_UNMAP)
{
log_error("Invalid command type returned for "
"clEnqueueSVMUnmap\n");
return TEST_FAIL;
}
}
error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE,
sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_FREE)
{
log_error(
"Invalid command type returned for clEnqueueSVMFree\n");
return TEST_FAIL;
}
}
}
std::vector<void *> buffers(numSVMBuffers, 0);
for (size_t i = 0; i < numSVMBuffers; ++i)
buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0);
// verify if callback is triggered correctly
CallbackData data;
data.status = 0;
error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0],
callback_svm_free, &data, 0, 0, 0);
test_error(error, "clEnqueueSVMFree failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
// wait for the callback
while (data.status.load(std::memory_order_acquire) == 0)
{
usleep(1);
}
// check if number of SVM pointers returned in the callback matches with
// expected
if (data.num_svm_pointers != buffers.size())
{
log_error("Invalid number of SVM pointers returned in the callback, "
"expected: %zu, got: %d\n",
buffers.size(), data.num_svm_pointers);
return TEST_FAIL;
}
// check if pointers returned in callback are correct
for (size_t i = 0; i < buffers.size(); ++i)
{
if (data.svm_pointers[i] != buffers[i])
{
log_error(
"Invalid SVM pointer returned in the callback, idx: %zu\n", i);
return TEST_FAIL; return TEST_FAIL;
} }
}
clEventWrapper eventUnmap[2];
error = clEnqueueSVMUnmap(queue, srcBuffer, 0, nullptr, &eventUnmap[0]);
test_error(error, "clEnqueueSVMUnmap srcBuffer failed");
error = clEnqueueSVMUnmap(queue, dstBuffer, 0, nullptr, &eventUnmap[1]);
test_error(error, "clEnqueueSVMUnmap dstBuffer failed");
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData1[0], typeSizes[i], data_size / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemFill(queue, dstBuffer + data_size / 2, &fillData1[0], typeSizes[i], data_size / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, data_size / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemcpy failed");
error = clEnqueueSVMMemcpy(queue, CL_TRUE, dstBuffer + data_size / 2, srcBuffer + data_size / 2, data_size / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemcpy failed");
void *ptrs[] = { (void *)srcBuffer, (void *)dstBuffer };
clEventWrapper eventFree;
error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree);
test_error(error, "clEnqueueSVMFree failed");
error = clWaitForEvents(1, &eventFree);
test_error(error, "clWaitForEvents failed");
//event info verification for new SVM commands
cl_command_type commandType;
for (auto &check_event : eventMemFillList) {
error = clGetEventInfo(check_event, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MEMFILL)
{
log_error("Invalid command type returned for clEnqueueSVMMemFill\n");
return TEST_FAIL;
}
}
error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MEMCPY)
{
log_error("Invalid command type returned for clEnqueueSVMMemcpy\n");
return TEST_FAIL;
}
for (size_t map_id = 0; map_id < ARRAY_SIZE(eventMap); map_id++) {
error = clGetEventInfo(eventMap[map_id], CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MAP)
{
log_error("Invalid command type returned for clEnqueueSVMMap\n");
return TEST_FAIL;
}
error = clGetEventInfo(eventUnmap[map_id], CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_UNMAP)
{
log_error("Invalid command type returned for clEnqueueSVMUnmap\n");
return TEST_FAIL;
}
}
error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_FREE)
{
log_error("Invalid command type returned for clEnqueueSVMFree\n");
return TEST_FAIL;
}
} }
}
std::vector<void *> buffers(numSVMBuffers, 0);
for(size_t i = 0; i < numSVMBuffers; ++i) buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0);
//verify if callback is triggered correctly return 0;
CallbackData data;
data.status = 0;
error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], callback_svm_free, &data, 0, 0, 0);
test_error(error, "clEnqueueSVMFree failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
//wait for the callback
while(data.status.load(std::memory_order_acquire) == 0) {
usleep(1);
}
//check if number of SVM pointers returned in the callback matches with expected
if (data.num_svm_pointers != buffers.size())
{
log_error("Invalid number of SVM pointers returned in the callback, "
"expected: %zu, got: %d\n",
buffers.size(), data.num_svm_pointers);
return TEST_FAIL;
}
//check if pointers returned in callback are correct
for (size_t i = 0; i < buffers.size(); ++i)
{
if (data.svm_pointers[i] != buffers[i])
{
log_error("Invalid SVM pointer returned in the callback, idx: %zu\n",
i);
return TEST_FAIL;
}
}
return 0;
} }

View File

@@ -139,44 +139,55 @@ int launch_kernels_and_verify(clContextWrapper &context, clCommandQueueWrapper*
// Each bin in the hash table is a linked list. Each bin is protected against simultaneous // Each bin in the hash table is a linked list. Each bin is protected against simultaneous
// update using a lock free technique. The correctness of the list is verfied on the host. // update using a lock free technique. The correctness of the list is verfied on the host.
// This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering. // This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering.
int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_fine_grain_memory_consistency)
{ {
clContextWrapper context; clContextWrapper contextWrapper;
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper kernel; clKernelWrapper kernel;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
std::vector<std::string> required_extensions; std::vector<std::string> required_extensions;
required_extensions.push_back("cl_khr_int64_base_atomics"); required_extensions.push_back("cl_khr_int64_base_atomics");
required_extensions.push_back("cl_khr_int64_extended_atomics"); required_extensions.push_back("cl_khr_int64_extended_atomics");
// Make pragmas visible for 64-bit addresses // Make pragmas visible for 64-bit addresses
hash_table_kernel[4] = sizeof(void *) == 8 ? '1' : '0'; hash_table_kernel[4] = sizeof(void *) == 8 ? '1' : '0';
char *source[] = { hash_table_kernel }; char *source[] = { hash_table_kernel };
err = create_cl_objects(deviceID, (const char**)source, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS, required_extensions); err = create_cl_objects(
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. deviceID, (const char **)source, &contextWrapper, &program, &queues[0],
if(err < 0) return -1; // fail test. &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS,
required_extensions);
context = contextWrapper;
if (err == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (err < 0) return -1; // fail test.
kernel = clCreateKernel(program, "build_hash_table", &err); kernel = clCreateKernel(program, "build_hash_table", &err);
test_error(err, "clCreateKernel failed"); test_error(err, "clCreateKernel failed");
size_t num_pixels = num_elements; size_t num_pixels = num_elements;
int result; int result;
cl_uint numBins = 1; // all work groups in all devices and the host code will hammer on this one lock. cl_uint numBins = 1; // all work groups in all devices and the host code
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels); // will hammer on this one lock.
if(result == -1) return result; result = launch_kernels_and_verify(contextWrapper, queues, kernel,
num_devices, numBins, num_pixels);
if (result == -1) return result;
numBins = 2; // 2 locks within in same cache line will get hit from different devices and host. numBins = 2; // 2 locks within in same cache line will get hit from
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels); // different devices and host.
if(result == -1) return result; result = launch_kernels_and_verify(contextWrapper, queues, kernel,
num_devices, numBins, num_pixels);
if (result == -1) return result;
numBins = 29; // locks span a few cache lines. numBins = 29; // locks span a few cache lines.
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels); result = launch_kernels_and_verify(contextWrapper, queues, kernel,
if(result == -1) return result; num_devices, numBins, num_pixels);
if (result == -1) return result;
return result; return result;
} }

View File

@@ -44,67 +44,88 @@ void spawnAnalysisTask(int location)
// Concept: a device kernel is used to search an input image for regions that match a target pattern. // Concept: a device kernel is used to search an input image for regions that match a target pattern.
// The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices). // The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices).
// The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets. // The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets.
int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_fine_grain_sync_buffers)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
err = create_cl_objects(deviceID, &find_targets_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS); err = create_cl_objects(deviceID, &find_targets_kernel[0], &contextWrapper,
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. &program, &queues[0], &num_devices,
if(err < 0) return -1; // fail test. CL_DEVICE_SVM_FINE_GRAIN_BUFFER
| CL_DEVICE_SVM_ATOMICS);
context = contextWrapper;
if (err == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (err < 0) return -1; // fail test.
clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err); clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
test_error(err, "clCreateKernel failed"); test_error(err, "clCreateKernel failed");
size_t num_pixels = num_elements; size_t num_pixels = num_elements;
//cl_uint num_pixels = 1024*1024*32; // cl_uint num_pixels = 1024*1024*32;
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0); cl_uint *pInputImage = (cl_uint *)clSVMAlloc(
cl_uint *pNumTargetsFound = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_uint), 0); context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
cl_int *pTargetLocations = (cl_int* ) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int) * MAX_TARGETS, 0); sizeof(cl_uint) * num_pixels, 0);
cl_uint *pNumTargetsFound = (cl_uint *)clSVMAlloc(
context,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
sizeof(cl_uint), 0);
cl_int *pTargetLocations = (cl_int *)clSVMAlloc(
context,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
sizeof(cl_int) * MAX_TARGETS, 0);
cl_uint targetDescriptor = 777; cl_uint targetDescriptor = 777;
*pNumTargetsFound = 0; *pNumTargetsFound = 0;
cl_uint i; cl_uint i;
for(i=0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1; for (i = 0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1;
for(i=0; i < num_pixels; i++) pInputImage[i] = 0; for (i = 0; i < num_pixels; i++) pInputImage[i] = 0;
pInputImage[0] = targetDescriptor; pInputImage[0] = targetDescriptor;
pInputImage[3] = targetDescriptor; pInputImage[3] = targetDescriptor;
pInputImage[num_pixels - 1] = targetDescriptor; pInputImage[num_pixels - 1] = targetDescriptor;
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage); err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &targetDescriptor); err |=
err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound); clSetKernelArg(kernel, 1, sizeof(cl_uint), (void *)&targetDescriptor);
err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations); err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
test_error(err, "clSetKernelArg failed"); err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
test_error(err, "clSetKernelArg failed");
cl_event done; cl_event done;
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, 0, NULL, &done); err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL,
test_error(err,"clEnqueueNDRangeKernel failed"); 0, NULL, &done);
clFlush(queues[0]); test_error(err, "clEnqueueNDRangeKernel failed");
clFlush(queues[0]);
i=0; i = 0;
cl_int status; cl_int status;
// check for new targets, if found spawn a task to analyze target. // check for new targets, if found spawn a task to analyze target.
do { do
err = clGetEventInfo(done,CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL);
test_error(err,"clGetEventInfo failed");
if( AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1) // -1 indicates slot not used yet.
{ {
spawnAnalysisTask(pTargetLocations[i]); err = clGetEventInfo(done, CL_EVENT_COMMAND_EXECUTION_STATUS,
i++; sizeof(cl_int), &status, NULL);
} test_error(err, "clGetEventInfo failed");
} while (status != CL_COMPLETE || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1); if (AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed)
!= -1) // -1 indicates slot not used yet.
{
spawnAnalysisTask(pTargetLocations[i]);
i++;
}
} while (status != CL_COMPLETE
|| AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed)
!= -1);
clReleaseEvent(done); clReleaseEvent(done);
clSVMFree(context, pInputImage); clSVMFree(context, pInputImage);
clSVMFree(context, pNumTargetsFound); clSVMFree(context, pNumTargetsFound);
clSVMFree(context, pTargetLocations); clSVMFree(context, pTargetLocations);
if(i != 3) return -1; if (i != 3) return -1;
return 0; return 0;
} }

View File

@@ -75,7 +75,7 @@ wait_and_release(const char* s, cl_event* evs, int n)
return 0; return 0;
} }
int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_migrate)
{ {
std::vector<cl_uint> amem(GLOBAL_SIZE); std::vector<cl_uint> amem(GLOBAL_SIZE);
std::vector<cl_uint> bmem(GLOBAL_SIZE); std::vector<cl_uint> bmem(GLOBAL_SIZE);
@@ -86,15 +86,17 @@ int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue
RandomSeed seed(0); RandomSeed seed(0);
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0; cl_uint num_devices = 0;
clProgramWrapper program; clProgramWrapper program;
cl_int error; cl_int error;
error = create_cl_objects(deviceID, &sources[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); error = create_cl_objects(deviceID, &sources[0], &contextWrapper, &program,
if (error) &queues[0], &num_devices,
return -1; CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (error) return -1;
if (num_devices > 1) { if (num_devices > 1) {
log_info(" Running on two devices.\n"); log_info(" Running on two devices.\n");

View File

@@ -35,81 +35,107 @@ const char *SVMPointerPassing_test_kernel[] = {
// The buffer is initialized to known values at each location. // The buffer is initialized to known values at each location.
// The kernel checks that it finds the expected value at each location. // The kernel checks that it finds the expected value at each location.
// TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--. // TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--.
int test_svm_pointer_passing(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_pointer_passing)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0],
if(error) return -1; &contextWrapper, &program, &queues[0],
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (error) return -1;
clKernelWrapper kernel_verify_char = clCreateKernel(program, "verify_char", &error); clKernelWrapper kernel_verify_char =
test_error(error,"clCreateKernel failed"); clCreateKernel(program, "verify_char", &error);
test_error(error, "clCreateKernel failed");
size_t bufSize = 256; size_t bufSize = 256;
cl_uchar *pbuf_svm_alloc = (cl_uchar*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_uchar)*bufSize, 0); cl_uchar *pbuf_svm_alloc = (cl_uchar *)clSVMAlloc(
context, CL_MEM_READ_WRITE, sizeof(cl_uchar) * bufSize, 0);
cl_int *pNumCorrect = NULL; cl_int *pNumCorrect = NULL;
pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0); pNumCorrect =
(cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0);
{
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar)*bufSize, pbuf_svm_alloc, &error);
test_error(error, "clCreateBuffer failed.");
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error);
test_error(error, "clCreateBuffer failed.");
error = clSetKernelArg(kernel_verify_char, 1, sizeof(void*), (void *) &num_correct);
test_error(error, "clSetKernelArg failed");
// put values into buf so that we can expect to see these values in the kernel when we pass a pointer to them.
cl_command_queue cmdq = queues[0];
cl_uchar* pbuf_map_buffer = (cl_uchar*) clEnqueueMapBuffer(cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uchar)*bufSize, 0, NULL,NULL, &error);
test_error2(error, pbuf_map_buffer, "clEnqueueMapBuffer failed");
for(int i = 0; i<(int)bufSize; i++)
{ {
pbuf_map_buffer[i]= (cl_uchar)i; clMemWrapper buf =
} clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
error = clEnqueueUnmapMemObject(cmdq, buf, pbuf_map_buffer, 0,NULL,NULL); sizeof(cl_uchar) * bufSize, pbuf_svm_alloc, &error);
test_error(error, "clEnqueueUnmapMemObject failed."); test_error(error, "clCreateBuffer failed.");
for (cl_uint ii = 0; ii<num_devices; ++ii) // iterate over all devices in the platform. clMemWrapper num_correct = clCreateBuffer(
{ context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error);
cmdq = queues[ii]; test_error(error, "clCreateBuffer failed.");
for(int i = 0; i<(int)bufSize; i++)
{
cl_uchar* pChar = &pbuf_svm_alloc[i];
error = clSetKernelArgSVMPointer(kernel_verify_char, 0, pChar); // pass a pointer to a location within the buffer
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar), (void *) &i ); // pass the expected value at the above location.
test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL, &bufSize, NULL, 0, NULL, NULL);
test_error(error,"clEnqueueNDRangeKernel failed");
pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error); error = clSetKernelArg(kernel_verify_char, 1, sizeof(void *),
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed"); (void *)&num_correct);
cl_int correct_count = *pNumCorrect; test_error(error, "clSetKernelArg failed");
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
// put values into buf so that we can expect to see these values in the
// kernel when we pass a pointer to them.
cl_command_queue cmdq = queues[0];
cl_uchar *pbuf_map_buffer = (cl_uchar *)clEnqueueMapBuffer(
cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0,
sizeof(cl_uchar) * bufSize, 0, NULL, NULL, &error);
test_error2(error, pbuf_map_buffer, "clEnqueueMapBuffer failed");
for (int i = 0; i < (int)bufSize; i++)
{
pbuf_map_buffer[i] = (cl_uchar)i;
}
error =
clEnqueueUnmapMemObject(cmdq, buf, pbuf_map_buffer, 0, NULL, NULL);
test_error(error, "clEnqueueUnmapMemObject failed."); test_error(error, "clEnqueueUnmapMemObject failed.");
if(correct_count != 1) for (cl_uint ii = 0; ii < num_devices;
++ii) // iterate over all devices in the platform.
{ {
log_error("Passing pointer directly to kernel for byte #%d failed on device %d\n", i, ii); cmdq = queues[ii];
return -1; for (int i = 0; i < (int)bufSize; i++)
{
cl_uchar *pChar = &pbuf_svm_alloc[i];
error = clSetKernelArgSVMPointer(
kernel_verify_char, 0,
pChar); // pass a pointer to a location within the buffer
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar),
(void *)&i); // pass the expected value
// at the above location.
test_error(error, "clSetKernelArg failed");
error =
clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL,
&bufSize, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
pNumCorrect = (cl_int *)clEnqueueMapBuffer(
cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0,
sizeof(cl_int), 0, NULL, NULL, &error);
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
cl_int correct_count = *pNumCorrect;
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect,
0, NULL, NULL);
test_error(error, "clEnqueueUnmapMemObject failed.");
if (correct_count != 1)
{
log_error("Passing pointer directly to kernel for byte #%d "
"failed on device %d\n",
i, ii);
return -1;
}
}
} }
}
error = clFinish(cmdq);
test_error(error, "clFinish failed");
} }
error = clFinish(cmdq);
test_error(error, "clFinish failed");
}
clSVMFree(context, pbuf_svm_alloc);
clSVMFree(context, pNumCorrect);
clSVMFree(context, pbuf_svm_alloc); return 0;
clSVMFree(context, pNumCorrect);
return 0;
} }

View File

@@ -42,7 +42,7 @@ const char *set_kernel_exec_info_svm_ptrs_kernel[] = {
// Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag. // Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
// //
int test_svm_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_set_kernel_exec_info_svm_ptrs)
{ {
clContextWrapper c = NULL; clContextWrapper c = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;

View File

@@ -272,12 +272,14 @@ int shared_address_space_coarse_grain(cl_device_id deviceID, cl_context context2
return 0; return 0;
} }
int test_svm_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_shared_address_space_coarse_grain_old_api)
{ {
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_FALSE); return shared_address_space_coarse_grain(deviceID, context, queue,
num_elements, CL_FALSE);
} }
int test_svm_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_shared_address_space_coarse_grain_new_api)
{ {
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_TRUE); return shared_address_space_coarse_grain(deviceID, context, queue,
num_elements, CL_TRUE);
} }

View File

@@ -20,82 +20,103 @@
// This is done by creating a linked list on a device and then verifying the correctness of the list // This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within // on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
// the platform. The test passes only if every combination passes. // the platform. The test passes only if every combination passes.
int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_shared_address_space_fine_grain)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM); error = create_cl_objects(
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. deviceID, &linked_list_create_and_verify_kernels[0], &contextWrapper,
if(error < 0) return -1; // fail test. &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM);
context = contextWrapper;
if (error == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (error < 0) return -1; // fail test.
size_t numLists = num_elements; size_t numLists = num_elements;
cl_int ListLength = 32; cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error); clKernelWrapper kernel_create_lists =
test_error(error, "clCreateKernel failed"); clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error); clKernelWrapper kernel_verify_lists =
test_error(error, "clCreateKernel failed"); clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
// this allocation holds the linked list nodes. // this allocation holds the linked list nodes.
// FIXME: remove the alignment once prototype can handle it // FIXME: remove the alignment once prototype can handle it
Node* pNodes = (Node*) align_malloc(numLists*ListLength*sizeof(Node),128); Node *pNodes =
test_error2(error, pNodes, "malloc failed"); (Node *)align_malloc(numLists * ListLength * sizeof(Node), 128);
test_error2(error, pNodes, "malloc failed");
// this allocation holds an index into the nodes buffer, it is used for node allocation // this allocation holds an index into the nodes buffer, it is used for node
size_t *pAllocator = (size_t *)align_malloc(sizeof(size_t), 128); // allocation
test_error2(error, pAllocator, "malloc failed"); size_t *pAllocator = (size_t *)align_malloc(sizeof(size_t), 128);
test_error2(error, pAllocator, "malloc failed");
// this allocation holds the count of correct nodes, which is computed by the verify kernel. // this allocation holds the count of correct nodes, which is computed by
cl_int* pNum_correct = (cl_int*) align_malloc(sizeof(cl_int), 128); // the verify kernel.
test_error2(error, pNum_correct, "malloc failed"); cl_int *pNum_correct = (cl_int *)align_malloc(sizeof(cl_int), 128);
test_error2(error, pNum_correct, "malloc failed");
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes); error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator); error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),(void *) &ListLength); error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),
(void *)&ListLength);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes); error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNum_correct); error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNum_correct);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength); error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int),
test_error(error, "clSetKernelArg failed"); (void *)&ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host). // Create linked list on one device and verify on another device (or the
// Do this for all possible combinations of devices and host within the platform. // host). Do this for all possible combinations of devices and host within
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on // the platform.
{ for (int ci = 0; ci < (int)num_devices + 1;
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on ci++) // ci is CreationIndex, index of device/q to create linked list
// on
{ {
if(ci == num_devices) // last device index represents the host, note the num_device+1 above. for (int vi = 0; vi < (int)num_devices + 1;
{ vi++) // vi is VerificationIndex, index of device/q to verify
log_info("creating linked list on host "); // linked list on
create_linked_lists(pNodes, numLists, ListLength); {
} if (ci == num_devices) // last device index represents the host,
else // note the num_device+1 above.
{ {
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists); log_info("creating linked list on host ");
if(error) return -1; create_linked_lists(pNodes, numLists, ListLength);
} }
else
{
error = create_linked_lists_on_device_no_map(
ci, queues[ci], pAllocator, kernel_create_lists, numLists);
if (error) return -1;
}
if(vi == num_devices) if (vi == num_devices)
{ {
error = verify_linked_lists(pNodes, numLists, ListLength); error = verify_linked_lists(pNodes, numLists, ListLength);
if(error) return -1; if (error) return -1;
} }
else else
{ {
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNum_correct, kernel_verify_lists, ListLength, numLists); error = verify_linked_lists_on_device_no_map(
if(error) return -1; vi, queues[vi], pNum_correct, kernel_verify_lists,
} ListLength, numLists);
if (error) return -1;
}
}
} }
}
align_free(pNodes); align_free(pNodes);
align_free(pAllocator); align_free(pAllocator);
align_free(pNum_correct); align_free(pNum_correct);
return 0; return 0;
} }

View File

@@ -61,78 +61,104 @@ cl_int verify_linked_lists_on_device_no_map(int vi, cl_command_queue cmdq,cl_int
// This is done by creating a linked list on a device and then verifying the correctness of the list // This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within // on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
// the platform. The test passes only if every combination passes. // the platform. The test passes only if every combination passes.
int test_svm_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_shared_address_space_fine_grain_buffers)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER); error = create_cl_objects(
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. deviceID, &linked_list_create_and_verify_kernels[0], &contextWrapper,
if(error < 0) return -1; // fail test. &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
context = contextWrapper;
if (error == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (error < 0) return -1; // fail test.
size_t numLists = num_elements; size_t numLists = num_elements;
cl_int ListLength = 32; cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error); clKernelWrapper kernel_create_lists =
test_error(error, "clCreateKernel failed"); clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error); clKernelWrapper kernel_verify_lists =
test_error(error, "clCreateKernel failed"); clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
// this buffer holds the linked list nodes. // this buffer holds the linked list nodes.
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(Node)*ListLength*numLists, 0); Node *pNodes = (Node *)clSVMAlloc(
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
sizeof(Node) * ListLength * numLists, 0);
// this buffer holds an index into the nodes buffer, it is used for node allocation // this buffer holds an index into the nodes buffer, it is used for node
size_t *pAllocator = (size_t*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(size_t), 0); // allocation
size_t *pAllocator = (size_t *)clSVMAlloc(
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
sizeof(size_t), 0);
// this buffer holds the count of correct nodes, which is computed by the verify kernel. // this buffer holds the count of correct nodes, which is computed by the
cl_int *pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_int), 0); // verify kernel.
cl_int *pNumCorrect = (cl_int *)clSVMAlloc(
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
sizeof(cl_int), 0);
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes); error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator); error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int), (void *) &ListLength); error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),
(void *)&ListLength);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes); error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNumCorrect); error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNumCorrect);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength); error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int),
test_error(error, "clSetKernelArg failed"); (void *)&ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host). // Create linked list on one device and verify on another device (or the
// Do this for all possible combinations of devices and host within the platform. // host). Do this for all possible combinations of devices and host within
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on // the platform.
{ for (int ci = 0; ci < (int)num_devices + 1;
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on ci++) // ci is CreationIndex, index of device/q to create linked list
// on
{ {
if(ci == num_devices) // last device index represents the host, note the num_device+1 above. for (int vi = 0; vi < (int)num_devices + 1;
{ vi++) // vi is VerificationIndex, index of device/q to verify
log_info("SVM: creating linked list on host "); // linked list on
create_linked_lists(pNodes, numLists, ListLength); {
} if (ci == num_devices) // last device index represents the host,
else // note the num_device+1 above.
{ {
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists); log_info("SVM: creating linked list on host ");
if(error) return -1; create_linked_lists(pNodes, numLists, ListLength);
} }
else
{
error = create_linked_lists_on_device_no_map(
ci, queues[ci], pAllocator, kernel_create_lists, numLists);
if (error) return -1;
}
if(vi == num_devices) if (vi == num_devices)
{ {
error = verify_linked_lists(pNodes, numLists, ListLength); error = verify_linked_lists(pNodes, numLists, ListLength);
if(error) return -1; if (error) return -1;
} }
else else
{ {
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNumCorrect, kernel_verify_lists, ListLength, numLists); error = verify_linked_lists_on_device_no_map(
if(error) return -1; vi, queues[vi], pNumCorrect, kernel_verify_lists,
} ListLength, numLists);
if (error) return -1;
}
}
} }
}
clSVMFree(context, pNodes); clSVMFree(context, pNodes);
clSVMFree(context, pAllocator); clSVMFree(context, pAllocator);
clSVMFree(context, pNumCorrect); clSVMFree(context, pNumCorrect);
return 0; return 0;
} }

View File

@@ -125,118 +125,160 @@ cl_int verify_linked_lists_on_host_sb(int ci, cl_command_queue cmdq, cl_mem node
// on another device or the host. // on another device or the host.
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly. // The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
// This basic test is performed for all combinations of devices and the host. // This basic test is performed for all combinations of devices and the host.
int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_shared_sub_buffers)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0],
if(error) return -1; &contextWrapper, &program, &queues[0],
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (error) return -1;
size_t numLists = num_elements; size_t numLists = num_elements;
if(numLists & 0x1) numLists++; // force even size, so we can easily create two sub-buffers of same size. if (numLists & 0x1)
numLists++; // force even size, so we can easily create two sub-buffers
// of same size.
cl_int ListLength = 32; cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error); clKernelWrapper kernel_create_lists =
test_error(error, "clCreateKernel failed"); clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error); clKernelWrapper kernel_verify_lists =
test_error(error, "clCreateKernel failed"); clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
size_t nodes_bufsize = sizeof(Node)*ListLength*numLists; size_t nodes_bufsize = sizeof(Node) * ListLength * numLists;
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0); Node *pNodes =
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0); (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
Node *pNodes2 =
(Node *)clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
{
// this buffer holds some of the linked list nodes.
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, nodes_bufsize, pNodes, &error);
test_error(error, "clCreateBuffer failed.");
cl_buffer_region r;
r.origin = 0;
r.size = nodes_bufsize / 2;
// this should inherit the flag settings from nodes buffer
clMemWrapper nodes_sb1 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
test_error(error, "clCreateSubBuffer");
r.origin = nodes_bufsize / 2;
clMemWrapper nodes_sb2 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
test_error(error, "clCreateSubBuffer");
// this buffer holds some of the linked list nodes.
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
test_error(error, "clCreateBuffer failed.");
r.origin = 0;
r.size = nodes_bufsize / 2;
// this should inherit the flag settings from nodes buffer
clMemWrapper nodes2_sb1 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
test_error(error, "clCreateSubBuffer");
r.origin = nodes_bufsize / 2;
clMemWrapper nodes2_sb2 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION,(void*)&r, &error);
test_error(error, "clCreateSubBuffer");
// this buffer holds the index into the nodes buffer that is used for node allocation
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(size_t), NULL, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the verify kernel.
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes_sb1);
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &nodes_sb2);
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void*), (void *) &allocator);
error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),(void *) &ListLength);
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes_sb1);
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &nodes_sb2);
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void*), (void *) &num_correct);
error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),(void *) &ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host).
// Do this for all possible combinations of devices and host within the platform.
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
{ {
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on // this buffer holds some of the linked list nodes.
{ clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
if(ci == num_devices) // last device index represents the host, note the num_device+1 above. nodes_bufsize, pNodes, &error);
{ test_error(error, "clCreateBuffer failed.");
error = create_linked_lists_on_host_sb(queues[0], nodes, nodes2, ListLength, numLists);
if(error) return -1;
}
else
{
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
if(error) return -1;
}
if(vi == num_devices) cl_buffer_region r;
{ r.origin = 0;
error = verify_linked_lists_on_host_sb(vi, queues[0], nodes, nodes2, ListLength, numLists); r.size = nodes_bufsize / 2;
if(error) return -1; // this should inherit the flag settings from nodes buffer
} clMemWrapper nodes_sb1 = clCreateSubBuffer(
else nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
{ test_error(error, "clCreateSubBuffer");
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists); r.origin = nodes_bufsize / 2;
if(error) return -1; clMemWrapper nodes_sb2 = clCreateSubBuffer(
} nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
} // inner loop, vi test_error(error, "clCreateSubBuffer");
} // outer loop, ci
}
clSVMFree(context, pNodes2);
clSVMFree(context, pNodes);
return 0;
// this buffer holds some of the linked list nodes.
clMemWrapper nodes2 = clCreateBuffer(
context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists,
pNodes2, &error);
test_error(error, "clCreateBuffer failed.");
r.origin = 0;
r.size = nodes_bufsize / 2;
// this should inherit the flag settings from nodes buffer
clMemWrapper nodes2_sb1 = clCreateSubBuffer(
nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
test_error(error, "clCreateSubBuffer");
r.origin = nodes_bufsize / 2;
clMemWrapper nodes2_sb2 = clCreateSubBuffer(
nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
test_error(error, "clCreateSubBuffer");
// this buffer holds the index into the nodes buffer that is used for
// node allocation
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(size_t), NULL, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the
// verify kernel.
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void *),
(void *)&nodes_sb1);
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void *),
(void *)&nodes2_sb1);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void *),
(void *)&nodes_sb2);
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void *),
(void *)&nodes2_sb2);
error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void *),
(void *)&allocator);
error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),
(void *)&ListLength);
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void *),
(void *)&nodes_sb1);
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void *),
(void *)&nodes2_sb1);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void *),
(void *)&nodes_sb2);
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void *),
(void *)&nodes2_sb2);
error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void *),
(void *)&num_correct);
error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),
(void *)&ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the
// host). Do this for all possible combinations of devices and host
// within the platform.
for (int ci = 0; ci < (int)num_devices + 1;
ci++) // ci is CreationIndex, index of device/q to create linked
// list on
{
for (int vi = 0; vi < (int)num_devices + 1;
vi++) // vi is VerificationIndex, index of device/q to verify
// linked list on
{
if (ci == num_devices) // last device index represents the host,
// note the num_device+1 above.
{
error = create_linked_lists_on_host_sb(
queues[0], nodes, nodes2, ListLength, numLists);
if (error) return -1;
}
else
{
error = create_linked_lists_on_device(
ci, queues[ci], allocator, kernel_create_lists,
numLists);
if (error) return -1;
}
if (vi == num_devices)
{
error = verify_linked_lists_on_host_sb(
vi, queues[0], nodes, nodes2, ListLength, numLists);
if (error) return -1;
}
else
{
error = verify_linked_lists_on_device(
vi, queues[vi], num_correct, kernel_verify_lists,
ListLength, numLists);
if (error) return -1;
}
} // inner loop, vi
} // outer loop, ci
}
clSVMFree(context, pNodes2);
clSVMFree(context, pNodes);
return 0;
} }