switch SVM tests to the new test registration framework (#2168)

Switches the SVM tests to the new test registration framework.

The first commit is the best to review and contains the actual changes.
The second commit purely has formatting changes.

Note that several of these changes were a bit more than mechanical
because many of the SVM tests create a new context vs. using the context
provided by the harness and passed to each test function. The previous
code named the context provided by the harness differently, and hence
could use the name "context" in each test function, but with the new
test registration framework this is no longer possible. Instead, I am
creating the new context using the name "contextWrapper" and then
assigning it to the "context" passed to the test function, which seems
like the best way to avoid using the wrong context unintentionally. I am
open to suggestions to do this differently.

I have verified that the same calls are made before and after these
changes, and specifically that there are no context leaks.
This commit is contained in:
Ben Ashbaugh
2024-12-03 14:51:23 -08:00
committed by GitHub
parent e361b387d9
commit d99b302f90
16 changed files with 1066 additions and 854 deletions

View File

@@ -81,25 +81,6 @@ extern cl_int verify_linked_lists_on_device(int qi, cl_command_queue q, c
extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator, cl_kernel k, size_t numLists ); extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator, cl_kernel k, size_t numLists );
extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists ); extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists );
extern int test_svm_byte_granularity(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_pointer_passing(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements);
extern int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_enqueue_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector<std::string> extensions_list = std::vector<std::string>()); extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector<std::string> extensions_list = std::vector<std::string>());
extern const char *linked_list_create_and_verify_kernels[]; extern const char *linked_list_create_and_verify_kernels[];

View File

@@ -260,26 +260,6 @@ cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeSt
return 0; return 0;
} }
test_definition test_list[] = {
ADD_TEST(svm_byte_granularity),
ADD_TEST(svm_set_kernel_exec_info_svm_ptrs),
ADD_TEST(svm_fine_grain_memory_consistency),
ADD_TEST(svm_fine_grain_sync_buffers),
ADD_TEST(svm_shared_address_space_fine_grain),
ADD_TEST(svm_shared_sub_buffers),
ADD_TEST(svm_shared_address_space_fine_grain_buffers),
ADD_TEST(svm_allocate_shared_buffer),
ADD_TEST(svm_allocate_shared_buffer_negative),
ADD_TEST(svm_shared_address_space_coarse_grain_old_api),
ADD_TEST(svm_shared_address_space_coarse_grain_new_api),
ADD_TEST(svm_cross_buffer_pointers_coarse_grain),
ADD_TEST(svm_pointer_passing),
ADD_TEST(svm_enqueue_api),
ADD_TEST_VERSION(svm_migrate, Version(2, 1)),
};
const int test_num = ARRAY_SIZE( test_list );
test_status InitCL(cl_device_id device) { test_status InitCL(cl_device_id device) {
auto version = get_device_cl_version(device); auto version = get_device_cl_version(device);
auto expected_min_version = Version(2, 0); auto expected_min_version = Version(2, 0);
@@ -310,6 +290,7 @@ test_status InitCL(cl_device_id device) {
int main(int argc, const char *argv[]) int main(int argc, const char *argv[])
{ {
return runTestHarnessWithCheck(argc, argv, test_num, test_list, true, 0, InitCL); return runTestHarnessWithCheck(
argc, argv, test_registry::getInstance().num_tests(),
test_registry::getInstance().definitions(), true, 0, InitCL);
} }

View File

@@ -41,20 +41,24 @@ const char* flag_set_names[] = {
}; };
int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_allocate_shared_buffer)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
cl_device_svm_capabilities caps; cl_device_svm_capabilities caps;
err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL); err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES,
sizeof(cl_device_svm_capabilities), &caps, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES"); test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
// under construction... // under construction...
err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); err =
create_cl_objects(deviceID, NULL, &contextWrapper, &program, &queues[0],
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (err) return -1; if (err) return -1;
size_t size = 1024; size_t size = 1024;
@@ -63,15 +67,18 @@ int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context2,
int num_flags = sizeof(flag_set) / sizeof(cl_mem_flags); int num_flags = sizeof(flag_set) / sizeof(cl_mem_flags);
for (int i = 0; i < num_flags; i++) for (int i = 0; i < num_flags; i++)
{ {
if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0) if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0
|| ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 && (caps & CL_DEVICE_SVM_ATOMICS) == 0)) && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
|| ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0
&& (caps & CL_DEVICE_SVM_ATOMICS) == 0))
{ {
log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]); log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]);
continue; continue;
} }
log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]); log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]);
cl_char *pBufData1 = (cl_char*) clSVMAlloc(context, flag_set[i], size, 0); cl_char *pBufData1 =
(cl_char *)clSVMAlloc(context, flag_set[i], size, 0);
if (pBufData1 == NULL) if (pBufData1 == NULL)
{ {
log_error("SVMalloc returned NULL"); log_error("SVMalloc returned NULL");
@@ -79,7 +86,8 @@ int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context2,
} }
{ {
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size, pBufData1, &err); clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
size, pBufData1, &err);
test_error(err, "clCreateBuffer failed"); test_error(err, "clCreateBuffer failed");
cl_char *pBufData2 = NULL; cl_char *pBufData2 = NULL;
@@ -89,15 +97,19 @@ int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context2,
if (!(flag_set[i] & CL_MEM_HOST_NO_ACCESS)) if (!(flag_set[i] & CL_MEM_HOST_NO_ACCESS))
{ {
pBufData2 = (cl_char*) clEnqueueMapBuffer(queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL,NULL, &err); pBufData2 = (cl_char *)clEnqueueMapBuffer(
queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL, NULL,
&err);
test_error(err, "clEnqueueMapBuffer failed"); test_error(err, "clEnqueueMapBuffer failed");
if (pBufData2 != pBufData1 || NULL == pBufData1) if (pBufData2 != pBufData1 || NULL == pBufData1)
{ {
log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc"); log_error("SVM pointer returned by clEnqueueMapBuffer "
"doesn't match pointer returned by clSVMalloc");
return -1; return -1;
} }
err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0, NULL, NULL); err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0,
NULL, NULL);
test_error(err, "clEnqueueUnmapMemObject failed"); test_error(err, "clEnqueueUnmapMemObject failed");
err = clFinish(queues[0]); err = clFinish(queues[0]);
test_error(err, "clFinish failed"); test_error(err, "clFinish failed");

View File

@@ -41,12 +41,9 @@ const char* svm_flag_set_names[] = {
}; };
int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID, REGISTER_TEST(svm_allocate_shared_buffer_negative)
cl_context context2,
cl_command_queue queue,
int num_elements)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
@@ -58,8 +55,10 @@ int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID,
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES"); test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
// under construction... // under construction...
err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], err =
create_cl_objects(deviceID, NULL, &contextWrapper, &program, &queues[0],
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (err) return err; if (err) return err;
size_t size = 1024; size_t size = 1024;

View File

@@ -48,10 +48,9 @@ const char *byte_manipulation_kernels[] = {
}; };
REGISTER_TEST(svm_byte_granularity)
int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
{ {
clContextWrapper context; clContextWrapper contextWrapper;
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper k1, k2; clKernelWrapper k1, k2;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
@@ -59,8 +58,13 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER); err = create_cl_objects(deviceID, &byte_manipulation_kernels[0],
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. &contextWrapper, &program, &queues[0], &num_devices,
CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
context = contextWrapper;
if (err == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (err < 0) return -1; // fail test. if (err < 0) return -1; // fail test.
cl_uint num_devices_plus_host = num_devices + 1; cl_uint num_devices_plus_host = num_devices + 1;
@@ -71,12 +75,17 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu
test_error(err, "clCreateKernel failed"); test_error(err, "clCreateKernel failed");
cl_char *pA = (cl_char*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_char) * num_elements, 0); cl_char *pA = (cl_char *)clSVMAlloc(
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
sizeof(cl_char) * num_elements, 0);
cl_uint **error_counts = (cl_uint **)malloc(sizeof(void *) * num_devices); cl_uint **error_counts = (cl_uint **)malloc(sizeof(void *) * num_devices);
for(cl_uint i=0; i < num_devices; i++) { for (cl_uint i = 0; i < num_devices; i++)
error_counts[i] = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint), 0); {
error_counts[i] = (cl_uint *)clSVMAlloc(
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
sizeof(cl_uint), 0);
*error_counts[i] = 0; *error_counts[i] = 0;
} }
for (int i = 0; i < num_elements; i++) pA[i] = -1; for (int i = 0; i < num_elements; i++) pA[i] = -1;
@@ -91,18 +100,22 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu
{ {
err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d); err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d);
test_error(err, "clSetKernelArg failed"); test_error(err, "clSetKernelArg failed");
err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, 0, NULL, NULL); err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL,
0, NULL, NULL);
test_error(err, "clEnqueueNDRangeKernel failed"); test_error(err, "clEnqueueNDRangeKernel failed");
} }
for (cl_uint d = 0; d < num_devices; d++) clFlush(queues[d]); for (cl_uint d = 0; d < num_devices; d++) clFlush(queues[d]);
cl_uint host_id = num_devices; // host code will take the id above the devices. cl_uint host_id =
for(int i = (int)num_devices; i < num_elements; i+= num_devices_plus_host) pA[i] = host_id; num_devices; // host code will take the id above the devices.
for (int i = (int)num_devices; i < num_elements; i += num_devices_plus_host)
pA[i] = host_id;
for (cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]); for (cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
// now check that each device can see the byte writes made by the other devices. // now check that each device can see the byte writes made by the other
// devices.
err |= clSetKernelArgSVMPointer(k2, 0, pA); err |= clSetKernelArgSVMPointer(k2, 0, pA);
err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host); err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host);
@@ -115,7 +128,9 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu
err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]); err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]);
test_error(err, "clSetKernelArg failed"); test_error(err, "clSetKernelArg failed");
err = clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, &adjusted_num_elements, NULL, 0, NULL, NULL); err =
clEnqueueNDRangeKernel(queues[id], k2, 1, NULL,
&adjusted_num_elements, NULL, 0, NULL, NULL);
test_error(err, "clEnqueueNDRangeKernel failed"); test_error(err, "clEnqueueNDRangeKernel failed");
} }
@@ -124,24 +139,24 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu
bool failed = false; bool failed = false;
// see if any of the devices found errors // see if any of the devices found errors
for(cl_uint i=0; i < num_devices; i++) { for (cl_uint i = 0; i < num_devices; i++)
if(*error_counts[i] > 0) {
failed = true; if (*error_counts[i] > 0) failed = true;
} }
cl_uint expected = (num_devices_plus_host * (num_devices_plus_host - 1))/2; cl_uint expected =
(num_devices_plus_host * (num_devices_plus_host - 1)) / 2;
// check that host can see the byte writes made by the devices. // check that host can see the byte writes made by the devices.
for (cl_uint i = 0; i < num_elements - num_devices_plus_host; i++) for (cl_uint i = 0; i < num_elements - num_devices_plus_host; i++)
{ {
int sum = 0; int sum = 0;
for (cl_uint j = 0; j < num_devices_plus_host; j++) sum += pA[i + j]; for (cl_uint j = 0; j < num_devices_plus_host; j++) sum += pA[i + j];
if(sum != expected) if (sum != expected) failed = true;
failed = true;
} }
clSVMFree(context, pA); clSVMFree(context, pA);
for(cl_uint i=0; i < num_devices; i++) clSVMFree(context, error_counts[i]); for (cl_uint i = 0; i < num_devices; i++)
clSVMFree(context, error_counts[i]);
if(failed) if (failed) return -1;
return -1;
return 0; return 0;
} }

View File

@@ -128,85 +128,120 @@ cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes,
// on another device or the host. // on another device or the host.
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly. // The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
// This basic test is performed for all combinations of devices and the host. // This basic test is performed for all combinations of devices and the host.
int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_cross_buffer_pointers_coarse_grain)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0],
&contextWrapper, &program, &queues[0],
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (error) return -1; if (error) return -1;
size_t numLists = num_elements; size_t numLists = num_elements;
cl_int ListLength = 32; cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error); clKernelWrapper kernel_create_lists =
clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed"); test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error); clKernelWrapper kernel_verify_lists =
clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed"); test_error(error, "clCreateKernel failed");
// this buffer holds some of the linked list nodes. // this buffer holds some of the linked list nodes.
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0); Node *pNodes = (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE,
sizeof(Node) * ListLength * numLists, 0);
// this buffer holds some of the linked list nodes. // this buffer holds some of the linked list nodes.
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0); Node *pNodes2 = (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE,
sizeof(Node) * ListLength * numLists, 0);
{ {
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error); clMemWrapper nodes = clCreateBuffer(
context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists,
pNodes, &error);
test_error(error, "clCreateBuffer failed."); test_error(error, "clCreateBuffer failed.");
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error); clMemWrapper nodes2 = clCreateBuffer(
context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists,
pNodes2, &error);
test_error(error, "clCreateBuffer failed."); test_error(error, "clCreateBuffer failed.");
// this buffer holds the index into the nodes buffer that is used for node allocation // this buffer holds the index into the nodes buffer that is used for
// node allocation
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(size_t), NULL, &error); sizeof(size_t), NULL, &error);
test_error(error, "clCreateBuffer failed."); test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the verify kernel. // this buffer holds the count of correct nodes which is computed by the
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error); // verify kernel.
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed."); test_error(error, "clCreateBuffer failed.");
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes); error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void *),
//error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) pNodes); (void *)&nodes);
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2); // error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *)
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &allocator); // pNodes);
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), (void *) &ListLength); error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void *),
(void *)&nodes2);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void *),
(void *)&allocator);
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int),
(void *)&ListLength);
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes); error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void *),
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2); (void *)&nodes);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &num_correct); error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void *),
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), (void *) &ListLength); (void *)&nodes2);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void *),
(void *)&num_correct);
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int),
(void *)&ListLength);
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host). // Create linked list on one device and verify on another device (or the
// Do this for all possible combinations of devices and host within the platform. // host). Do this for all possible combinations of devices and host
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on // within the platform.
for (int ci = 0; ci < (int)num_devices + 1;
ci++) // ci is CreationIndex, index of device/q to create linked
// list on
{ {
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on for (int vi = 0; vi < (int)num_devices + 1;
vi++) // vi is VerificationIndex, index of device/q to verify
// linked list on
{ {
if(ci == num_devices) // last device index represents the host, note the num_device+1 above. if (ci == num_devices) // last device index represents the host,
// note the num_device+1 above.
{ {
error = create_linked_lists_on_host(queues[0], nodes, nodes2, ListLength, numLists); error = create_linked_lists_on_host(
queues[0], nodes, nodes2, ListLength, numLists);
if (error) return -1; if (error) return -1;
} }
else else
{ {
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists); error = create_linked_lists_on_device(
ci, queues[ci], allocator, kernel_create_lists,
numLists);
if (error) return -1; if (error) return -1;
} }
if (vi == num_devices) if (vi == num_devices)
{ {
error = verify_linked_lists_on_host(vi, queues[0], nodes, nodes2, ListLength, numLists); error = verify_linked_lists_on_host(
vi, queues[0], nodes, nodes2, ListLength, numLists);
if (error) return -1; if (error) return -1;
} }
else else
{ {
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists); error = verify_linked_lists_on_device(
vi, queues[vi], num_correct, kernel_verify_lists,
ListLength, numLists);
if (error) return -1; if (error) return -1;
} }
} // inner loop, vi } // inner loop, vi

View File

@@ -70,9 +70,9 @@ void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_point
data->status.store(1, std::memory_order_release); data->status.store(1, std::memory_order_release);
} }
int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_enqueue_api)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0; cl_uint num_devices = 0;
const size_t elementNum = 1024; const size_t elementNum = 1024;
@@ -80,48 +80,38 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
RandomSeed seed(0); RandomSeed seed(0);
error = create_cl_objects(deviceID, NULL, &context, NULL, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); error = create_cl_objects(deviceID, NULL, &contextWrapper, NULL, &queues[0],
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (error) return TEST_FAIL; if (error) return TEST_FAIL;
queue = queues[0]; queue = queues[0];
// all possible sizes of vectors and scalars // all possible sizes of vectors and scalars
size_t typeSizes[] = { size_t typeSizes[] = {
sizeof(cl_uchar), sizeof(cl_uchar), sizeof(cl_uchar2), sizeof(cl_uchar3),
sizeof(cl_uchar2), sizeof(cl_uchar4), sizeof(cl_uchar8), sizeof(cl_uchar16),
sizeof(cl_uchar3), sizeof(cl_ushort), sizeof(cl_ushort2), sizeof(cl_ushort3),
sizeof(cl_uchar4), sizeof(cl_ushort4), sizeof(cl_ushort8), sizeof(cl_ushort16),
sizeof(cl_uchar8), sizeof(cl_uint), sizeof(cl_uint2), sizeof(cl_uint3),
sizeof(cl_uchar16), sizeof(cl_uint4), sizeof(cl_uint8), sizeof(cl_uint16),
sizeof(cl_ushort), sizeof(cl_ulong), sizeof(cl_ulong2), sizeof(cl_ulong3),
sizeof(cl_ushort2), sizeof(cl_ulong4), sizeof(cl_ulong8), sizeof(cl_ulong16),
sizeof(cl_ushort3),
sizeof(cl_ushort4),
sizeof(cl_ushort8),
sizeof(cl_ushort16),
sizeof(cl_uint),
sizeof(cl_uint2),
sizeof(cl_uint3),
sizeof(cl_uint4),
sizeof(cl_uint8),
sizeof(cl_uint16),
sizeof(cl_ulong),
sizeof(cl_ulong2),
sizeof(cl_ulong3),
sizeof(cl_ulong4),
sizeof(cl_ulong8),
sizeof(cl_ulong16),
}; };
enum allocationTypes { enum allocationTypes
{
host, host,
svm svm
}; };
struct TestType { struct TestType
{
allocationTypes srcAlloc; allocationTypes srcAlloc;
allocationTypes dstAlloc; allocationTypes dstAlloc;
TestType(allocationTypes type1, allocationTypes type2): srcAlloc(type1), dstAlloc(type2){} TestType(allocationTypes type1, allocationTypes type2)
: srcAlloc(type1), dstAlloc(type2)
{}
}; };
std::vector<TestType> testTypes; std::vector<TestType> testTypes;
@@ -133,11 +123,14 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
for (const auto test_case : testTypes) for (const auto test_case : testTypes)
{ {
log_info("clEnqueueSVMMemcpy case: src_alloc = %s, dst_alloc = %s\n", test_case.srcAlloc == svm ? "svm" : "host", test_case.dstAlloc == svm ? "svm" : "host"); log_info("clEnqueueSVMMemcpy case: src_alloc = %s, dst_alloc = %s\n",
test_case.srcAlloc == svm ? "svm" : "host",
test_case.dstAlloc == svm ? "svm" : "host");
for (size_t i = 0; i < ARRAY_SIZE(typeSizes); ++i) for (size_t i = 0; i < ARRAY_SIZE(typeSizes); ++i)
{ {
// generate initial data // generate initial data
std::vector<cl_uchar> fillData0(typeSizes[i]), fillData1(typeSizes[i]); std::vector<cl_uchar> fillData0(typeSizes[i]),
fillData1(typeSizes[i]);
generate_data(fillData0, typeSizes[i], seed); generate_data(fillData0, typeSizes[i], seed);
generate_data(fillData1, typeSizes[i], seed); generate_data(fillData1, typeSizes[i], seed);
size_t data_size = elementNum * typeSizes[i]; size_t data_size = elementNum * typeSizes[i];
@@ -146,16 +139,22 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
generate_data(srcHostData, srcHostData.size(), seed); generate_data(srcHostData, srcHostData.size(), seed);
generate_data(dstHostData, dstHostData.size(), seed); generate_data(dstHostData, dstHostData.size(), seed);
cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, data_size, 0); cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(
cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, data_size, 0); context, CL_MEM_READ_WRITE, data_size, 0);
cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(
context, CL_MEM_READ_WRITE, data_size, 0);
clEventWrapper userEvent = clCreateUserEvent(context, &error); clEventWrapper userEvent = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed"); test_error(error, "clCreateUserEvent failed");
clEventWrapper eventMemFillList[2]; clEventWrapper eventMemFillList[2];
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], typeSizes[i], data_size, 1, &userEvent, &eventMemFillList[0]); error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0],
typeSizes[i], data_size, 1, &userEvent,
&eventMemFillList[0]);
test_error(error, "clEnqueueSVMMemFill failed"); test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemFill(queue, dstBuffer, &fillData1[0], typeSizes[i], data_size, 1, &userEvent, &eventMemFillList[1]); error = clEnqueueSVMMemFill(queue, dstBuffer, &fillData1[0],
typeSizes[i], data_size, 1, &userEvent,
&eventMemFillList[1]);
test_error(error, "clEnqueueSVMMemFill failed"); test_error(error, "clEnqueueSVMMemFill failed");
error = clSetUserEventStatus(userEvent, CL_COMPLETE); error = clSetUserEventStatus(userEvent, CL_COMPLETE);
@@ -163,27 +162,37 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
cl_uchar *src_ptr; cl_uchar *src_ptr;
cl_uchar *dst_ptr; cl_uchar *dst_ptr;
if (test_case.srcAlloc == host) { if (test_case.srcAlloc == host)
{
src_ptr = srcHostData.data(); src_ptr = srcHostData.data();
} else if (test_case.srcAlloc == svm) { }
else if (test_case.srcAlloc == svm)
{
src_ptr = srcBuffer; src_ptr = srcBuffer;
} }
if (test_case.dstAlloc == host) { if (test_case.dstAlloc == host)
{
dst_ptr = dstHostData.data(); dst_ptr = dstHostData.data();
} else if (test_case.dstAlloc == svm) { }
else if (test_case.dstAlloc == svm)
{
dst_ptr = dstBuffer; dst_ptr = dstBuffer;
} }
clEventWrapper eventMemcpy; clEventWrapper eventMemcpy;
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dst_ptr, src_ptr, data_size, 2, &eventMemFillList[0], &eventMemcpy); error =
clEnqueueSVMMemcpy(queue, CL_FALSE, dst_ptr, src_ptr, data_size,
2, &eventMemFillList[0], &eventMemcpy);
test_error(error, "clEnqueueSVMMemcpy failed"); test_error(error, "clEnqueueSVMMemcpy failed");
// coarse grain only supported. Synchronization required using map // coarse grain only supported. Synchronization required using map
clEventWrapper eventMap[2]; clEventWrapper eventMap[2];
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, srcBuffer, data_size, 1, &eventMemcpy, &eventMap[0]); error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, srcBuffer,
data_size, 1, &eventMemcpy, &eventMap[0]);
test_error(error, "clEnqueueSVMMap srcBuffer failed"); test_error(error, "clEnqueueSVMMap srcBuffer failed");
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, dstBuffer, data_size, 1, &eventMemcpy, &eventMap[1]); error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, dstBuffer,
data_size, 1, &eventMemcpy, &eventMap[1]);
test_error(error, "clEnqueueSVMMap dstBuffer failed"); test_error(error, "clEnqueueSVMMap dstBuffer failed");
error = clWaitForEvents(2, &eventMap[0]); error = clWaitForEvents(2, &eventMap[0]);
@@ -192,29 +201,39 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
// data verification // data verification
for (size_t j = 0; j < data_size; ++j) for (size_t j = 0; j < data_size; ++j)
{ {
if (dst_ptr[j] != src_ptr[j]) { if (dst_ptr[j] != src_ptr[j])
log_error("Invalid data at index %zu, dst_ptr %d, src_ptr %d\n", j, {
dst_ptr[j], src_ptr[j]); log_error(
"Invalid data at index %zu, dst_ptr %d, src_ptr %d\n",
j, dst_ptr[j], src_ptr[j]);
return TEST_FAIL; return TEST_FAIL;
} }
} }
clEventWrapper eventUnmap[2]; clEventWrapper eventUnmap[2];
error = clEnqueueSVMUnmap(queue, srcBuffer, 0, nullptr, &eventUnmap[0]); error =
clEnqueueSVMUnmap(queue, srcBuffer, 0, nullptr, &eventUnmap[0]);
test_error(error, "clEnqueueSVMUnmap srcBuffer failed"); test_error(error, "clEnqueueSVMUnmap srcBuffer failed");
error = clEnqueueSVMUnmap(queue, dstBuffer, 0, nullptr, &eventUnmap[1]); error =
clEnqueueSVMUnmap(queue, dstBuffer, 0, nullptr, &eventUnmap[1]);
test_error(error, "clEnqueueSVMUnmap dstBuffer failed"); test_error(error, "clEnqueueSVMUnmap dstBuffer failed");
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData1[0], typeSizes[i], data_size / 2, 0, 0, 0); error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData1[0],
typeSizes[i], data_size / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemFill failed"); test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemFill(queue, dstBuffer + data_size / 2, &fillData1[0], typeSizes[i], data_size / 2, 0, 0, 0); error = clEnqueueSVMMemFill(queue, dstBuffer + data_size / 2,
&fillData1[0], typeSizes[i],
data_size / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemFill failed"); test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, data_size / 2, 0, 0, 0); error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer,
data_size / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemcpy failed"); test_error(error, "clEnqueueSVMMemcpy failed");
error = clEnqueueSVMMemcpy(queue, CL_TRUE, dstBuffer + data_size / 2, srcBuffer + data_size / 2, data_size / 2, 0, 0, 0); error = clEnqueueSVMMemcpy(
queue, CL_TRUE, dstBuffer + data_size / 2,
srcBuffer + data_size / 2, data_size / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemcpy failed"); test_error(error, "clEnqueueSVMMemcpy failed");
void *ptrs[] = { (void *)srcBuffer, (void *)dstBuffer }; void *ptrs[] = { (void *)srcBuffer, (void *)dstBuffer };
@@ -228,68 +247,87 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
// event info verification for new SVM commands // event info verification for new SVM commands
cl_command_type commandType; cl_command_type commandType;
for (auto &check_event : eventMemFillList) { for (auto &check_event : eventMemFillList)
error = clGetEventInfo(check_event, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); {
error =
clGetEventInfo(check_event, CL_EVENT_COMMAND_TYPE,
sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed"); test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MEMFILL) if (commandType != CL_COMMAND_SVM_MEMFILL)
{ {
log_error("Invalid command type returned for clEnqueueSVMMemFill\n"); log_error("Invalid command type returned for "
"clEnqueueSVMMemFill\n");
return TEST_FAIL; return TEST_FAIL;
} }
} }
error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE,
sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed"); test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MEMCPY) if (commandType != CL_COMMAND_SVM_MEMCPY)
{ {
log_error("Invalid command type returned for clEnqueueSVMMemcpy\n"); log_error(
"Invalid command type returned for clEnqueueSVMMemcpy\n");
return TEST_FAIL; return TEST_FAIL;
} }
for (size_t map_id = 0; map_id < ARRAY_SIZE(eventMap); map_id++) { for (size_t map_id = 0; map_id < ARRAY_SIZE(eventMap); map_id++)
error = clGetEventInfo(eventMap[map_id], CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); {
error =
clGetEventInfo(eventMap[map_id], CL_EVENT_COMMAND_TYPE,
sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed"); test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MAP) if (commandType != CL_COMMAND_SVM_MAP)
{ {
log_error("Invalid command type returned for clEnqueueSVMMap\n"); log_error(
"Invalid command type returned for clEnqueueSVMMap\n");
return TEST_FAIL; return TEST_FAIL;
} }
error = clGetEventInfo(eventUnmap[map_id], CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); error =
clGetEventInfo(eventUnmap[map_id], CL_EVENT_COMMAND_TYPE,
sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed"); test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_UNMAP) if (commandType != CL_COMMAND_SVM_UNMAP)
{ {
log_error("Invalid command type returned for clEnqueueSVMUnmap\n"); log_error("Invalid command type returned for "
"clEnqueueSVMUnmap\n");
return TEST_FAIL; return TEST_FAIL;
} }
} }
error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE,
sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed"); test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_FREE) if (commandType != CL_COMMAND_SVM_FREE)
{ {
log_error("Invalid command type returned for clEnqueueSVMFree\n"); log_error(
"Invalid command type returned for clEnqueueSVMFree\n");
return TEST_FAIL; return TEST_FAIL;
} }
} }
} }
std::vector<void *> buffers(numSVMBuffers, 0); std::vector<void *> buffers(numSVMBuffers, 0);
for(size_t i = 0; i < numSVMBuffers; ++i) buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0); for (size_t i = 0; i < numSVMBuffers; ++i)
buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0);
// verify if callback is triggered correctly // verify if callback is triggered correctly
CallbackData data; CallbackData data;
data.status = 0; data.status = 0;
error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], callback_svm_free, &data, 0, 0, 0); error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0],
callback_svm_free, &data, 0, 0, 0);
test_error(error, "clEnqueueSVMFree failed"); test_error(error, "clEnqueueSVMFree failed");
error = clFinish(queue); error = clFinish(queue);
test_error(error, "clFinish failed"); test_error(error, "clFinish failed");
// wait for the callback // wait for the callback
while(data.status.load(std::memory_order_acquire) == 0) { while (data.status.load(std::memory_order_acquire) == 0)
{
usleep(1); usleep(1);
} }
//check if number of SVM pointers returned in the callback matches with expected // check if number of SVM pointers returned in the callback matches with
// expected
if (data.num_svm_pointers != buffers.size()) if (data.num_svm_pointers != buffers.size())
{ {
log_error("Invalid number of SVM pointers returned in the callback, " log_error("Invalid number of SVM pointers returned in the callback, "
@@ -303,8 +341,8 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
{ {
if (data.svm_pointers[i] != buffers[i]) if (data.svm_pointers[i] != buffers[i])
{ {
log_error("Invalid SVM pointer returned in the callback, idx: %zu\n", log_error(
i); "Invalid SVM pointer returned in the callback, idx: %zu\n", i);
return TEST_FAIL; return TEST_FAIL;
} }
} }

View File

@@ -139,9 +139,9 @@ int launch_kernels_and_verify(clContextWrapper &context, clCommandQueueWrapper*
// Each bin in the hash table is a linked list. Each bin is protected against simultaneous // Each bin in the hash table is a linked list. Each bin is protected against simultaneous
// update using a lock free technique. The correctness of the list is verfied on the host. // update using a lock free technique. The correctness of the list is verfied on the host.
// This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering. // This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering.
int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_fine_grain_memory_consistency)
{ {
clContextWrapper context; clContextWrapper contextWrapper;
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper kernel; clKernelWrapper kernel;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
@@ -157,8 +157,14 @@ int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c,
char *source[] = { hash_table_kernel }; char *source[] = { hash_table_kernel };
err = create_cl_objects(deviceID, (const char**)source, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS, required_extensions); err = create_cl_objects(
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. deviceID, (const char **)source, &contextWrapper, &program, &queues[0],
&num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS,
required_extensions);
context = contextWrapper;
if (err == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (err < 0) return -1; // fail test. if (err < 0) return -1; // fail test.
kernel = clCreateKernel(program, "build_hash_table", &err); kernel = clCreateKernel(program, "build_hash_table", &err);
@@ -166,16 +172,21 @@ int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c,
size_t num_pixels = num_elements; size_t num_pixels = num_elements;
int result; int result;
cl_uint numBins = 1; // all work groups in all devices and the host code will hammer on this one lock. cl_uint numBins = 1; // all work groups in all devices and the host code
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels); // will hammer on this one lock.
result = launch_kernels_and_verify(contextWrapper, queues, kernel,
num_devices, numBins, num_pixels);
if (result == -1) return result; if (result == -1) return result;
numBins = 2; // 2 locks within in same cache line will get hit from different devices and host. numBins = 2; // 2 locks within in same cache line will get hit from
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels); // different devices and host.
result = launch_kernels_and_verify(contextWrapper, queues, kernel,
num_devices, numBins, num_pixels);
if (result == -1) return result; if (result == -1) return result;
numBins = 29; // locks span a few cache lines. numBins = 29; // locks span a few cache lines.
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels); result = launch_kernels_and_verify(contextWrapper, queues, kernel,
num_devices, numBins, num_pixels);
if (result == -1) return result; if (result == -1) return result;
return result; return result;

View File

@@ -44,16 +44,22 @@ void spawnAnalysisTask(int location)
// Concept: a device kernel is used to search an input image for regions that match a target pattern. // Concept: a device kernel is used to search an input image for regions that match a target pattern.
// The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices). // The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices).
// The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets. // The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets.
int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_fine_grain_sync_buffers)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int err = CL_SUCCESS; cl_int err = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
err = create_cl_objects(deviceID, &find_targets_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS); err = create_cl_objects(deviceID, &find_targets_kernel[0], &contextWrapper,
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. &program, &queues[0], &num_devices,
CL_DEVICE_SVM_FINE_GRAIN_BUFFER
| CL_DEVICE_SVM_ATOMICS);
context = contextWrapper;
if (err == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (err < 0) return -1; // fail test. if (err < 0) return -1; // fail test.
clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err); clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
@@ -62,9 +68,17 @@ int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_com
size_t num_pixels = num_elements; size_t num_pixels = num_elements;
// cl_uint num_pixels = 1024*1024*32; // cl_uint num_pixels = 1024*1024*32;
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0); cl_uint *pInputImage = (cl_uint *)clSVMAlloc(
cl_uint *pNumTargetsFound = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_uint), 0); context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
cl_int *pTargetLocations = (cl_int* ) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int) * MAX_TARGETS, 0); sizeof(cl_uint) * num_pixels, 0);
cl_uint *pNumTargetsFound = (cl_uint *)clSVMAlloc(
context,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
sizeof(cl_uint), 0);
cl_int *pTargetLocations = (cl_int *)clSVMAlloc(
context,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
sizeof(cl_int) * MAX_TARGETS, 0);
cl_uint targetDescriptor = 777; cl_uint targetDescriptor = 777;
*pNumTargetsFound = 0; *pNumTargetsFound = 0;
@@ -76,13 +90,15 @@ int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_com
pInputImage[num_pixels - 1] = targetDescriptor; pInputImage[num_pixels - 1] = targetDescriptor;
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage); err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &targetDescriptor); err |=
clSetKernelArg(kernel, 1, sizeof(cl_uint), (void *)&targetDescriptor);
err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound); err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations); err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
test_error(err, "clSetKernelArg failed"); test_error(err, "clSetKernelArg failed");
cl_event done; cl_event done;
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, 0, NULL, &done); err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL,
0, NULL, &done);
test_error(err, "clEnqueueNDRangeKernel failed"); test_error(err, "clEnqueueNDRangeKernel failed");
clFlush(queues[0]); clFlush(queues[0]);
@@ -90,15 +106,20 @@ int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_com
i = 0; i = 0;
cl_int status; cl_int status;
// check for new targets, if found spawn a task to analyze target. // check for new targets, if found spawn a task to analyze target.
do { do
err = clGetEventInfo(done,CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL); {
err = clGetEventInfo(done, CL_EVENT_COMMAND_EXECUTION_STATUS,
sizeof(cl_int), &status, NULL);
test_error(err, "clGetEventInfo failed"); test_error(err, "clGetEventInfo failed");
if( AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1) // -1 indicates slot not used yet. if (AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed)
!= -1) // -1 indicates slot not used yet.
{ {
spawnAnalysisTask(pTargetLocations[i]); spawnAnalysisTask(pTargetLocations[i]);
i++; i++;
} }
} while (status != CL_COMPLETE || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1); } while (status != CL_COMPLETE
|| AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed)
!= -1);
clReleaseEvent(done); clReleaseEvent(done);
clSVMFree(context, pInputImage); clSVMFree(context, pInputImage);

View File

@@ -75,7 +75,7 @@ wait_and_release(const char* s, cl_event* evs, int n)
return 0; return 0;
} }
int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_migrate)
{ {
std::vector<cl_uint> amem(GLOBAL_SIZE); std::vector<cl_uint> amem(GLOBAL_SIZE);
std::vector<cl_uint> bmem(GLOBAL_SIZE); std::vector<cl_uint> bmem(GLOBAL_SIZE);
@@ -86,15 +86,17 @@ int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue
RandomSeed seed(0); RandomSeed seed(0);
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0; cl_uint num_devices = 0;
clProgramWrapper program; clProgramWrapper program;
cl_int error; cl_int error;
error = create_cl_objects(deviceID, &sources[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); error = create_cl_objects(deviceID, &sources[0], &contextWrapper, &program,
if (error) &queues[0], &num_devices,
return -1; CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (error) return -1;
if (num_devices > 1) { if (num_devices > 1) {
log_info(" Running on two devices.\n"); log_info(" Running on two devices.\n");

View File

@@ -35,69 +35,95 @@ const char *SVMPointerPassing_test_kernel[] = {
// The buffer is initialized to known values at each location. // The buffer is initialized to known values at each location.
// The kernel checks that it finds the expected value at each location. // The kernel checks that it finds the expected value at each location.
// TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--. // TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--.
int test_svm_pointer_passing(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_pointer_passing)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0],
&contextWrapper, &program, &queues[0],
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (error) return -1; if (error) return -1;
clKernelWrapper kernel_verify_char = clCreateKernel(program, "verify_char", &error); clKernelWrapper kernel_verify_char =
clCreateKernel(program, "verify_char", &error);
test_error(error, "clCreateKernel failed"); test_error(error, "clCreateKernel failed");
size_t bufSize = 256; size_t bufSize = 256;
cl_uchar *pbuf_svm_alloc = (cl_uchar*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_uchar)*bufSize, 0); cl_uchar *pbuf_svm_alloc = (cl_uchar *)clSVMAlloc(
context, CL_MEM_READ_WRITE, sizeof(cl_uchar) * bufSize, 0);
cl_int *pNumCorrect = NULL; cl_int *pNumCorrect = NULL;
pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0); pNumCorrect =
(cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0);
{ {
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar)*bufSize, pbuf_svm_alloc, &error); clMemWrapper buf =
clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
sizeof(cl_uchar) * bufSize, pbuf_svm_alloc, &error);
test_error(error, "clCreateBuffer failed."); test_error(error, "clCreateBuffer failed.");
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error); clMemWrapper num_correct = clCreateBuffer(
context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error);
test_error(error, "clCreateBuffer failed."); test_error(error, "clCreateBuffer failed.");
error = clSetKernelArg(kernel_verify_char, 1, sizeof(void*), (void *) &num_correct); error = clSetKernelArg(kernel_verify_char, 1, sizeof(void *),
(void *)&num_correct);
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
// put values into buf so that we can expect to see these values in the kernel when we pass a pointer to them. // put values into buf so that we can expect to see these values in the
// kernel when we pass a pointer to them.
cl_command_queue cmdq = queues[0]; cl_command_queue cmdq = queues[0];
cl_uchar* pbuf_map_buffer = (cl_uchar*) clEnqueueMapBuffer(cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uchar)*bufSize, 0, NULL,NULL, &error); cl_uchar *pbuf_map_buffer = (cl_uchar *)clEnqueueMapBuffer(
cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0,
sizeof(cl_uchar) * bufSize, 0, NULL, NULL, &error);
test_error2(error, pbuf_map_buffer, "clEnqueueMapBuffer failed"); test_error2(error, pbuf_map_buffer, "clEnqueueMapBuffer failed");
for (int i = 0; i < (int)bufSize; i++) for (int i = 0; i < (int)bufSize; i++)
{ {
pbuf_map_buffer[i] = (cl_uchar)i; pbuf_map_buffer[i] = (cl_uchar)i;
} }
error = clEnqueueUnmapMemObject(cmdq, buf, pbuf_map_buffer, 0,NULL,NULL); error =
clEnqueueUnmapMemObject(cmdq, buf, pbuf_map_buffer, 0, NULL, NULL);
test_error(error, "clEnqueueUnmapMemObject failed."); test_error(error, "clEnqueueUnmapMemObject failed.");
for (cl_uint ii = 0; ii<num_devices; ++ii) // iterate over all devices in the platform. for (cl_uint ii = 0; ii < num_devices;
++ii) // iterate over all devices in the platform.
{ {
cmdq = queues[ii]; cmdq = queues[ii];
for (int i = 0; i < (int)bufSize; i++) for (int i = 0; i < (int)bufSize; i++)
{ {
cl_uchar *pChar = &pbuf_svm_alloc[i]; cl_uchar *pChar = &pbuf_svm_alloc[i];
error = clSetKernelArgSVMPointer(kernel_verify_char, 0, pChar); // pass a pointer to a location within the buffer error = clSetKernelArgSVMPointer(
kernel_verify_char, 0,
pChar); // pass a pointer to a location within the buffer
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar), (void *) &i ); // pass the expected value at the above location. error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar),
(void *)&i); // pass the expected value
// at the above location.
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL, &bufSize, NULL, 0, NULL, NULL); error =
clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL,
&bufSize, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed"); test_error(error, "clEnqueueNDRangeKernel failed");
pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error); pNumCorrect = (cl_int *)clEnqueueMapBuffer(
cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0,
sizeof(cl_int), 0, NULL, NULL, &error);
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed"); test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
cl_int correct_count = *pNumCorrect; cl_int correct_count = *pNumCorrect;
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL); error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect,
0, NULL, NULL);
test_error(error, "clEnqueueUnmapMemObject failed."); test_error(error, "clEnqueueUnmapMemObject failed.");
if (correct_count != 1) if (correct_count != 1)
{ {
log_error("Passing pointer directly to kernel for byte #%d failed on device %d\n", i, ii); log_error("Passing pointer directly to kernel for byte #%d "
"failed on device %d\n",
i, ii);
return -1; return -1;
} }
} }

View File

@@ -42,7 +42,7 @@ const char *set_kernel_exec_info_svm_ptrs_kernel[] = {
// Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag. // Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
// //
int test_svm_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_set_kernel_exec_info_svm_ptrs)
{ {
clContextWrapper c = NULL; clContextWrapper c = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;

View File

@@ -272,12 +272,14 @@ int shared_address_space_coarse_grain(cl_device_id deviceID, cl_context context2
return 0; return 0;
} }
int test_svm_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_shared_address_space_coarse_grain_old_api)
{ {
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_FALSE); return shared_address_space_coarse_grain(deviceID, context, queue,
num_elements, CL_FALSE);
} }
int test_svm_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_shared_address_space_coarse_grain_new_api)
{ {
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_TRUE); return shared_address_space_coarse_grain(deviceID, context, queue,
num_elements, CL_TRUE);
} }

View File

@@ -20,64 +20,83 @@
// This is done by creating a linked list on a device and then verifying the correctness of the list // This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within // on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
// the platform. The test passes only if every combination passes. // the platform. The test passes only if every combination passes.
int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_shared_address_space_fine_grain)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM); error = create_cl_objects(
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. deviceID, &linked_list_create_and_verify_kernels[0], &contextWrapper,
&program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM);
context = contextWrapper;
if (error == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (error < 0) return -1; // fail test. if (error < 0) return -1; // fail test.
size_t numLists = num_elements; size_t numLists = num_elements;
cl_int ListLength = 32; cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error); clKernelWrapper kernel_create_lists =
clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed"); test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error); clKernelWrapper kernel_verify_lists =
clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed"); test_error(error, "clCreateKernel failed");
// this allocation holds the linked list nodes. // this allocation holds the linked list nodes.
// FIXME: remove the alignment once prototype can handle it // FIXME: remove the alignment once prototype can handle it
Node* pNodes = (Node*) align_malloc(numLists*ListLength*sizeof(Node),128); Node *pNodes =
(Node *)align_malloc(numLists * ListLength * sizeof(Node), 128);
test_error2(error, pNodes, "malloc failed"); test_error2(error, pNodes, "malloc failed");
// this allocation holds an index into the nodes buffer, it is used for node allocation // this allocation holds an index into the nodes buffer, it is used for node
// allocation
size_t *pAllocator = (size_t *)align_malloc(sizeof(size_t), 128); size_t *pAllocator = (size_t *)align_malloc(sizeof(size_t), 128);
test_error2(error, pAllocator, "malloc failed"); test_error2(error, pAllocator, "malloc failed");
// this allocation holds the count of correct nodes, which is computed by the verify kernel. // this allocation holds the count of correct nodes, which is computed by
// the verify kernel.
cl_int *pNum_correct = (cl_int *)align_malloc(sizeof(cl_int), 128); cl_int *pNum_correct = (cl_int *)align_malloc(sizeof(cl_int), 128);
test_error2(error, pNum_correct, "malloc failed"); test_error2(error, pNum_correct, "malloc failed");
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes); error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator); error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),(void *) &ListLength); error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),
(void *)&ListLength);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes); error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNum_correct); error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNum_correct);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength); error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int),
(void *)&ListLength);
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host). // Create linked list on one device and verify on another device (or the
// Do this for all possible combinations of devices and host within the platform. // host). Do this for all possible combinations of devices and host within
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on // the platform.
for (int ci = 0; ci < (int)num_devices + 1;
ci++) // ci is CreationIndex, index of device/q to create linked list
// on
{ {
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on for (int vi = 0; vi < (int)num_devices + 1;
vi++) // vi is VerificationIndex, index of device/q to verify
// linked list on
{ {
if(ci == num_devices) // last device index represents the host, note the num_device+1 above. if (ci == num_devices) // last device index represents the host,
// note the num_device+1 above.
{ {
log_info("creating linked list on host "); log_info("creating linked list on host ");
create_linked_lists(pNodes, numLists, ListLength); create_linked_lists(pNodes, numLists, ListLength);
} }
else else
{ {
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists); error = create_linked_lists_on_device_no_map(
ci, queues[ci], pAllocator, kernel_create_lists, numLists);
if (error) return -1; if (error) return -1;
} }
@@ -88,7 +107,9 @@ int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context c
} }
else else
{ {
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNum_correct, kernel_verify_lists, ListLength, numLists); error = verify_linked_lists_on_device_no_map(
vi, queues[vi], pNum_correct, kernel_verify_lists,
ListLength, numLists);
if (error) return -1; if (error) return -1;
} }
} }

View File

@@ -61,59 +61,83 @@ cl_int verify_linked_lists_on_device_no_map(int vi, cl_command_queue cmdq,cl_int
// This is done by creating a linked list on a device and then verifying the correctness of the list // This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within // on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
// the platform. The test passes only if every combination passes. // the platform. The test passes only if every combination passes.
int test_svm_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_shared_address_space_fine_grain_buffers)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER); error = create_cl_objects(
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. deviceID, &linked_list_create_and_verify_kernels[0], &contextWrapper,
&program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
context = contextWrapper;
if (error == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (error < 0) return -1; // fail test. if (error < 0) return -1; // fail test.
size_t numLists = num_elements; size_t numLists = num_elements;
cl_int ListLength = 32; cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error); clKernelWrapper kernel_create_lists =
clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed"); test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error); clKernelWrapper kernel_verify_lists =
clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed"); test_error(error, "clCreateKernel failed");
// this buffer holds the linked list nodes. // this buffer holds the linked list nodes.
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(Node)*ListLength*numLists, 0); Node *pNodes = (Node *)clSVMAlloc(
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
sizeof(Node) * ListLength * numLists, 0);
// this buffer holds an index into the nodes buffer, it is used for node allocation // this buffer holds an index into the nodes buffer, it is used for node
size_t *pAllocator = (size_t*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(size_t), 0); // allocation
size_t *pAllocator = (size_t *)clSVMAlloc(
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
sizeof(size_t), 0);
// this buffer holds the count of correct nodes, which is computed by the verify kernel. // this buffer holds the count of correct nodes, which is computed by the
cl_int *pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_int), 0); // verify kernel.
cl_int *pNumCorrect = (cl_int *)clSVMAlloc(
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
sizeof(cl_int), 0);
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes); error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator); error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int), (void *) &ListLength); error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),
(void *)&ListLength);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes); error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNumCorrect); error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNumCorrect);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength); error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int),
(void *)&ListLength);
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host). // Create linked list on one device and verify on another device (or the
// Do this for all possible combinations of devices and host within the platform. // host). Do this for all possible combinations of devices and host within
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on // the platform.
for (int ci = 0; ci < (int)num_devices + 1;
ci++) // ci is CreationIndex, index of device/q to create linked list
// on
{ {
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on for (int vi = 0; vi < (int)num_devices + 1;
vi++) // vi is VerificationIndex, index of device/q to verify
// linked list on
{ {
if(ci == num_devices) // last device index represents the host, note the num_device+1 above. if (ci == num_devices) // last device index represents the host,
// note the num_device+1 above.
{ {
log_info("SVM: creating linked list on host "); log_info("SVM: creating linked list on host ");
create_linked_lists(pNodes, numLists, ListLength); create_linked_lists(pNodes, numLists, ListLength);
} }
else else
{ {
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists); error = create_linked_lists_on_device_no_map(
ci, queues[ci], pAllocator, kernel_create_lists, numLists);
if (error) return -1; if (error) return -1;
} }
@@ -124,7 +148,9 @@ int test_svm_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_c
} }
else else
{ {
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNumCorrect, kernel_verify_lists, ListLength, numLists); error = verify_linked_lists_on_device_no_map(
vi, queues[vi], pNumCorrect, kernel_verify_lists,
ListLength, numLists);
if (error) return -1; if (error) return -1;
} }
} }

View File

@@ -125,111 +125,153 @@ cl_int verify_linked_lists_on_host_sb(int ci, cl_command_queue cmdq, cl_mem node
// on another device or the host. // on another device or the host.
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly. // The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
// This basic test is performed for all combinations of devices and the host. // This basic test is performed for all combinations of devices and the host.
int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) REGISTER_TEST(svm_shared_sub_buffers)
{ {
clContextWrapper context = NULL; clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL; clProgramWrapper program = NULL;
cl_uint num_devices = 0; cl_uint num_devices = 0;
cl_int error = CL_SUCCESS; cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ]; clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0],
&contextWrapper, &program, &queues[0],
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
context = contextWrapper;
if (error) return -1; if (error) return -1;
size_t numLists = num_elements; size_t numLists = num_elements;
if(numLists & 0x1) numLists++; // force even size, so we can easily create two sub-buffers of same size. if (numLists & 0x1)
numLists++; // force even size, so we can easily create two sub-buffers
// of same size.
cl_int ListLength = 32; cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error); clKernelWrapper kernel_create_lists =
clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed"); test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error); clKernelWrapper kernel_verify_lists =
clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed"); test_error(error, "clCreateKernel failed");
size_t nodes_bufsize = sizeof(Node) * ListLength * numLists; size_t nodes_bufsize = sizeof(Node) * ListLength * numLists;
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0); Node *pNodes =
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0); (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
Node *pNodes2 =
(Node *)clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
{ {
// this buffer holds some of the linked list nodes. // this buffer holds some of the linked list nodes.
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, nodes_bufsize, pNodes, &error); clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
nodes_bufsize, pNodes, &error);
test_error(error, "clCreateBuffer failed."); test_error(error, "clCreateBuffer failed.");
cl_buffer_region r; cl_buffer_region r;
r.origin = 0; r.origin = 0;
r.size = nodes_bufsize / 2; r.size = nodes_bufsize / 2;
// this should inherit the flag settings from nodes buffer // this should inherit the flag settings from nodes buffer
clMemWrapper nodes_sb1 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error); clMemWrapper nodes_sb1 = clCreateSubBuffer(
nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
test_error(error, "clCreateSubBuffer"); test_error(error, "clCreateSubBuffer");
r.origin = nodes_bufsize / 2; r.origin = nodes_bufsize / 2;
clMemWrapper nodes_sb2 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error); clMemWrapper nodes_sb2 = clCreateSubBuffer(
nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
test_error(error, "clCreateSubBuffer"); test_error(error, "clCreateSubBuffer");
// this buffer holds some of the linked list nodes. // this buffer holds some of the linked list nodes.
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error); clMemWrapper nodes2 = clCreateBuffer(
context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists,
pNodes2, &error);
test_error(error, "clCreateBuffer failed."); test_error(error, "clCreateBuffer failed.");
r.origin = 0; r.origin = 0;
r.size = nodes_bufsize / 2; r.size = nodes_bufsize / 2;
// this should inherit the flag settings from nodes buffer // this should inherit the flag settings from nodes buffer
clMemWrapper nodes2_sb1 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error); clMemWrapper nodes2_sb1 = clCreateSubBuffer(
nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
test_error(error, "clCreateSubBuffer"); test_error(error, "clCreateSubBuffer");
r.origin = nodes_bufsize / 2; r.origin = nodes_bufsize / 2;
clMemWrapper nodes2_sb2 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION,(void*)&r, &error); clMemWrapper nodes2_sb2 = clCreateSubBuffer(
nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
test_error(error, "clCreateSubBuffer"); test_error(error, "clCreateSubBuffer");
// this buffer holds the index into the nodes buffer that is used for
// this buffer holds the index into the nodes buffer that is used for node allocation // node allocation
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(size_t), NULL, &error); sizeof(size_t), NULL, &error);
test_error(error, "clCreateBuffer failed."); test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the verify kernel. // this buffer holds the count of correct nodes which is computed by the
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error); // verify kernel.
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed."); test_error(error, "clCreateBuffer failed.");
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes_sb1); error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void *),
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2_sb1); (void *)&nodes_sb1);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &nodes_sb2); error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void *),
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void*), (void *) &nodes2_sb2); (void *)&nodes2_sb1);
error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void*), (void *) &allocator); error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void *),
error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),(void *) &ListLength); (void *)&nodes_sb2);
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void *),
(void *)&nodes2_sb2);
error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void *),
(void *)&allocator);
error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),
(void *)&ListLength);
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes_sb1); error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void *),
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2_sb1); (void *)&nodes_sb1);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &nodes_sb2); error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void *),
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void*), (void *) &nodes2_sb2); (void *)&nodes2_sb1);
error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void*), (void *) &num_correct); error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void *),
error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),(void *) &ListLength); (void *)&nodes_sb2);
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void *),
(void *)&nodes2_sb2);
error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void *),
(void *)&num_correct);
error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),
(void *)&ListLength);
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host). // Create linked list on one device and verify on another device (or the
// Do this for all possible combinations of devices and host within the platform. // host). Do this for all possible combinations of devices and host
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on // within the platform.
for (int ci = 0; ci < (int)num_devices + 1;
ci++) // ci is CreationIndex, index of device/q to create linked
// list on
{ {
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on for (int vi = 0; vi < (int)num_devices + 1;
vi++) // vi is VerificationIndex, index of device/q to verify
// linked list on
{ {
if(ci == num_devices) // last device index represents the host, note the num_device+1 above. if (ci == num_devices) // last device index represents the host,
// note the num_device+1 above.
{ {
error = create_linked_lists_on_host_sb(queues[0], nodes, nodes2, ListLength, numLists); error = create_linked_lists_on_host_sb(
queues[0], nodes, nodes2, ListLength, numLists);
if (error) return -1; if (error) return -1;
} }
else else
{ {
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists); error = create_linked_lists_on_device(
ci, queues[ci], allocator, kernel_create_lists,
numLists);
if (error) return -1; if (error) return -1;
} }
if (vi == num_devices) if (vi == num_devices)
{ {
error = verify_linked_lists_on_host_sb(vi, queues[0], nodes, nodes2, ListLength, numLists); error = verify_linked_lists_on_host_sb(
vi, queues[0], nodes, nodes2, ListLength, numLists);
if (error) return -1; if (error) return -1;
} }
else else
{ {
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists); error = verify_linked_lists_on_device(
vi, queues[vi], num_correct, kernel_verify_lists,
ListLength, numLists);
if (error) return -1; if (error) return -1;
} }
} // inner loop, vi } // inner loop, vi