mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-26 08:49:02 +00:00
switch SVM tests to the new test registration framework (#2168)
Switches the SVM tests to the new test registration framework. The first commit is the best to review and contains the actual changes. The second commit purely has formatting changes. Note that several of these changes were a bit more than mechanical because many of the SVM tests create a new context vs. using the context provided by the harness and passed to each test function. The previous code named the context provided by the harness differently, and hence could use the name "context" in each test function, but with the new test registration framework this is no longer possible. Instead, I am creating the new context using the name "contextWrapper" and then assigning it to the "context" passed to the test function, which seems like the best way to avoid using the wrong context unintentionally. I am open to suggestions to do this differently. I have verified that the same calls are made before and after these changes, and specifically that there are no context leaks.
This commit is contained in:
@@ -81,25 +81,6 @@ extern cl_int verify_linked_lists_on_device(int qi, cl_command_queue q, c
|
||||
extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator, cl_kernel k, size_t numLists );
|
||||
extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists );
|
||||
|
||||
extern int test_svm_byte_granularity(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_pointer_passing(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_enqueue_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector<std::string> extensions_list = std::vector<std::string>());
|
||||
|
||||
extern const char *linked_list_create_and_verify_kernels[];
|
||||
|
||||
@@ -260,26 +260,6 @@ cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeSt
|
||||
return 0;
|
||||
}
|
||||
|
||||
test_definition test_list[] = {
|
||||
ADD_TEST(svm_byte_granularity),
|
||||
ADD_TEST(svm_set_kernel_exec_info_svm_ptrs),
|
||||
ADD_TEST(svm_fine_grain_memory_consistency),
|
||||
ADD_TEST(svm_fine_grain_sync_buffers),
|
||||
ADD_TEST(svm_shared_address_space_fine_grain),
|
||||
ADD_TEST(svm_shared_sub_buffers),
|
||||
ADD_TEST(svm_shared_address_space_fine_grain_buffers),
|
||||
ADD_TEST(svm_allocate_shared_buffer),
|
||||
ADD_TEST(svm_allocate_shared_buffer_negative),
|
||||
ADD_TEST(svm_shared_address_space_coarse_grain_old_api),
|
||||
ADD_TEST(svm_shared_address_space_coarse_grain_new_api),
|
||||
ADD_TEST(svm_cross_buffer_pointers_coarse_grain),
|
||||
ADD_TEST(svm_pointer_passing),
|
||||
ADD_TEST(svm_enqueue_api),
|
||||
ADD_TEST_VERSION(svm_migrate, Version(2, 1)),
|
||||
};
|
||||
|
||||
const int test_num = ARRAY_SIZE( test_list );
|
||||
|
||||
test_status InitCL(cl_device_id device) {
|
||||
auto version = get_device_cl_version(device);
|
||||
auto expected_min_version = Version(2, 0);
|
||||
@@ -310,6 +290,7 @@ test_status InitCL(cl_device_id device) {
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarnessWithCheck(argc, argv, test_num, test_list, true, 0, InitCL);
|
||||
return runTestHarnessWithCheck(
|
||||
argc, argv, test_registry::getInstance().num_tests(),
|
||||
test_registry::getInstance().definitions(), true, 0, InitCL);
|
||||
}
|
||||
|
||||
|
||||
@@ -41,20 +41,24 @@ const char* flag_set_names[] = {
|
||||
};
|
||||
|
||||
|
||||
int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_allocate_shared_buffer)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
cl_device_svm_capabilities caps;
|
||||
err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
|
||||
err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES,
|
||||
sizeof(cl_device_svm_capabilities), &caps, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
|
||||
|
||||
// under construction...
|
||||
err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
err =
|
||||
create_cl_objects(deviceID, NULL, &contextWrapper, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (err) return -1;
|
||||
|
||||
size_t size = 1024;
|
||||
@@ -63,15 +67,18 @@ int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context2,
|
||||
int num_flags = sizeof(flag_set) / sizeof(cl_mem_flags);
|
||||
for (int i = 0; i < num_flags; i++)
|
||||
{
|
||||
if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
|
||||
|| ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 && (caps & CL_DEVICE_SVM_ATOMICS) == 0))
|
||||
if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0
|
||||
&& (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
|
||||
|| ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0
|
||||
&& (caps & CL_DEVICE_SVM_ATOMICS) == 0))
|
||||
{
|
||||
log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]);
|
||||
cl_char *pBufData1 = (cl_char*) clSVMAlloc(context, flag_set[i], size, 0);
|
||||
cl_char *pBufData1 =
|
||||
(cl_char *)clSVMAlloc(context, flag_set[i], size, 0);
|
||||
if (pBufData1 == NULL)
|
||||
{
|
||||
log_error("SVMalloc returned NULL");
|
||||
@@ -79,7 +86,8 @@ int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context2,
|
||||
}
|
||||
|
||||
{
|
||||
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size, pBufData1, &err);
|
||||
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
|
||||
size, pBufData1, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
cl_char *pBufData2 = NULL;
|
||||
@@ -89,15 +97,19 @@ int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context2,
|
||||
|
||||
if (!(flag_set[i] & CL_MEM_HOST_NO_ACCESS))
|
||||
{
|
||||
pBufData2 = (cl_char*) clEnqueueMapBuffer(queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL,NULL, &err);
|
||||
pBufData2 = (cl_char *)clEnqueueMapBuffer(
|
||||
queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL, NULL,
|
||||
&err);
|
||||
test_error(err, "clEnqueueMapBuffer failed");
|
||||
|
||||
if (pBufData2 != pBufData1 || NULL == pBufData1)
|
||||
{
|
||||
log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc");
|
||||
log_error("SVM pointer returned by clEnqueueMapBuffer "
|
||||
"doesn't match pointer returned by clSVMalloc");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0, NULL, NULL);
|
||||
err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0,
|
||||
NULL, NULL);
|
||||
test_error(err, "clEnqueueUnmapMemObject failed");
|
||||
err = clFinish(queues[0]);
|
||||
test_error(err, "clFinish failed");
|
||||
|
||||
@@ -41,12 +41,9 @@ const char* svm_flag_set_names[] = {
|
||||
};
|
||||
|
||||
|
||||
int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID,
|
||||
cl_context context2,
|
||||
cl_command_queue queue,
|
||||
int num_elements)
|
||||
REGISTER_TEST(svm_allocate_shared_buffer_negative)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
@@ -58,8 +55,10 @@ int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID,
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
|
||||
|
||||
// under construction...
|
||||
err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0],
|
||||
err =
|
||||
create_cl_objects(deviceID, NULL, &contextWrapper, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (err) return err;
|
||||
|
||||
size_t size = 1024;
|
||||
|
||||
@@ -48,10 +48,9 @@ const char *byte_manipulation_kernels[] = {
|
||||
};
|
||||
|
||||
|
||||
|
||||
int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_byte_granularity)
|
||||
{
|
||||
clContextWrapper context;
|
||||
clContextWrapper contextWrapper;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper k1, k2;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
@@ -59,8 +58,13 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
|
||||
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
err = create_cl_objects(deviceID, &byte_manipulation_kernels[0],
|
||||
&contextWrapper, &program, &queues[0], &num_devices,
|
||||
CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (err == 1)
|
||||
return 0; // no devices capable of requested SVM level, so don't execute
|
||||
// but count test as passing.
|
||||
if (err < 0) return -1; // fail test.
|
||||
|
||||
cl_uint num_devices_plus_host = num_devices + 1;
|
||||
@@ -71,12 +75,17 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu
|
||||
test_error(err, "clCreateKernel failed");
|
||||
|
||||
|
||||
cl_char *pA = (cl_char*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_char) * num_elements, 0);
|
||||
cl_char *pA = (cl_char *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
sizeof(cl_char) * num_elements, 0);
|
||||
|
||||
cl_uint **error_counts = (cl_uint **)malloc(sizeof(void *) * num_devices);
|
||||
|
||||
for(cl_uint i=0; i < num_devices; i++) {
|
||||
error_counts[i] = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint), 0);
|
||||
for (cl_uint i = 0; i < num_devices; i++)
|
||||
{
|
||||
error_counts[i] = (cl_uint *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
sizeof(cl_uint), 0);
|
||||
*error_counts[i] = 0;
|
||||
}
|
||||
for (int i = 0; i < num_elements; i++) pA[i] = -1;
|
||||
@@ -91,18 +100,22 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu
|
||||
{
|
||||
err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, 0, NULL, NULL);
|
||||
err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL,
|
||||
0, NULL, NULL);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
for (cl_uint d = 0; d < num_devices; d++) clFlush(queues[d]);
|
||||
|
||||
cl_uint host_id = num_devices; // host code will take the id above the devices.
|
||||
for(int i = (int)num_devices; i < num_elements; i+= num_devices_plus_host) pA[i] = host_id;
|
||||
cl_uint host_id =
|
||||
num_devices; // host code will take the id above the devices.
|
||||
for (int i = (int)num_devices; i < num_elements; i += num_devices_plus_host)
|
||||
pA[i] = host_id;
|
||||
|
||||
for (cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
|
||||
|
||||
// now check that each device can see the byte writes made by the other devices.
|
||||
// now check that each device can see the byte writes made by the other
|
||||
// devices.
|
||||
|
||||
err |= clSetKernelArgSVMPointer(k2, 0, pA);
|
||||
err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host);
|
||||
@@ -115,7 +128,9 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu
|
||||
err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, &adjusted_num_elements, NULL, 0, NULL, NULL);
|
||||
err =
|
||||
clEnqueueNDRangeKernel(queues[id], k2, 1, NULL,
|
||||
&adjusted_num_elements, NULL, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
@@ -124,24 +139,24 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu
|
||||
bool failed = false;
|
||||
|
||||
// see if any of the devices found errors
|
||||
for(cl_uint i=0; i < num_devices; i++) {
|
||||
if(*error_counts[i] > 0)
|
||||
failed = true;
|
||||
for (cl_uint i = 0; i < num_devices; i++)
|
||||
{
|
||||
if (*error_counts[i] > 0) failed = true;
|
||||
}
|
||||
cl_uint expected = (num_devices_plus_host * (num_devices_plus_host - 1))/2;
|
||||
cl_uint expected =
|
||||
(num_devices_plus_host * (num_devices_plus_host - 1)) / 2;
|
||||
// check that host can see the byte writes made by the devices.
|
||||
for (cl_uint i = 0; i < num_elements - num_devices_plus_host; i++)
|
||||
{
|
||||
int sum = 0;
|
||||
for (cl_uint j = 0; j < num_devices_plus_host; j++) sum += pA[i + j];
|
||||
if(sum != expected)
|
||||
failed = true;
|
||||
if (sum != expected) failed = true;
|
||||
}
|
||||
|
||||
clSVMFree(context, pA);
|
||||
for(cl_uint i=0; i < num_devices; i++) clSVMFree(context, error_counts[i]);
|
||||
for (cl_uint i = 0; i < num_devices; i++)
|
||||
clSVMFree(context, error_counts[i]);
|
||||
|
||||
if(failed)
|
||||
return -1;
|
||||
if (failed) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -128,85 +128,120 @@ cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes,
|
||||
// on another device or the host.
|
||||
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
|
||||
// This basic test is performed for all combinations of devices and the host.
|
||||
int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_cross_buffer_pointers_coarse_grain)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0],
|
||||
&contextWrapper, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (error) return -1;
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
clKernelWrapper kernel_create_lists =
|
||||
clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
clKernelWrapper kernel_verify_lists =
|
||||
clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
|
||||
Node *pNodes = (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE,
|
||||
sizeof(Node) * ListLength * numLists, 0);
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
|
||||
Node *pNodes2 = (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE,
|
||||
sizeof(Node) * ListLength * numLists, 0);
|
||||
|
||||
{
|
||||
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
|
||||
clMemWrapper nodes = clCreateBuffer(
|
||||
context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists,
|
||||
pNodes, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
|
||||
clMemWrapper nodes2 = clCreateBuffer(
|
||||
context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists,
|
||||
pNodes2, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the index into the nodes buffer that is used for node allocation
|
||||
// this buffer holds the index into the nodes buffer that is used for
|
||||
// node allocation
|
||||
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(size_t), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the count of correct nodes which is computed by the verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
// this buffer holds the count of correct nodes which is computed by the
|
||||
// verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
|
||||
//error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) pNodes);
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), (void *) &ListLength);
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void *),
|
||||
(void *)&nodes);
|
||||
// error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *)
|
||||
// pNodes);
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void *),
|
||||
(void *)&nodes2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void *),
|
||||
(void *)&allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
|
||||
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), (void *) &ListLength);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void *),
|
||||
(void *)&nodes);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void *),
|
||||
(void *)&nodes2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void *),
|
||||
(void *)&num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
// Create linked list on one device and verify on another device (or the
|
||||
// host). Do this for all possible combinations of devices and host
|
||||
// within the platform.
|
||||
for (int ci = 0; ci < (int)num_devices + 1;
|
||||
ci++) // ci is CreationIndex, index of device/q to create linked
|
||||
// list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
for (int vi = 0; vi < (int)num_devices + 1;
|
||||
vi++) // vi is VerificationIndex, index of device/q to verify
|
||||
// linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
if (ci == num_devices) // last device index represents the host,
|
||||
// note the num_device+1 above.
|
||||
{
|
||||
error = create_linked_lists_on_host(queues[0], nodes, nodes2, ListLength, numLists);
|
||||
error = create_linked_lists_on_host(
|
||||
queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
|
||||
error = create_linked_lists_on_device(
|
||||
ci, queues[ci], allocator, kernel_create_lists,
|
||||
numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
|
||||
if (vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists_on_host(vi, queues[0], nodes, nodes2, ListLength, numLists);
|
||||
error = verify_linked_lists_on_host(
|
||||
vi, queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
|
||||
error = verify_linked_lists_on_device(
|
||||
vi, queues[vi], num_correct, kernel_verify_lists,
|
||||
ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
} // inner loop, vi
|
||||
|
||||
@@ -70,9 +70,9 @@ void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_point
|
||||
data->status.store(1, std::memory_order_release);
|
||||
}
|
||||
|
||||
int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_enqueue_api)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
cl_uint num_devices = 0;
|
||||
const size_t elementNum = 1024;
|
||||
@@ -80,48 +80,38 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
|
||||
cl_int error = CL_SUCCESS;
|
||||
RandomSeed seed(0);
|
||||
|
||||
error = create_cl_objects(deviceID, NULL, &context, NULL, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
error = create_cl_objects(deviceID, NULL, &contextWrapper, NULL, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (error) return TEST_FAIL;
|
||||
|
||||
queue = queues[0];
|
||||
|
||||
// all possible sizes of vectors and scalars
|
||||
size_t typeSizes[] = {
|
||||
sizeof(cl_uchar),
|
||||
sizeof(cl_uchar2),
|
||||
sizeof(cl_uchar3),
|
||||
sizeof(cl_uchar4),
|
||||
sizeof(cl_uchar8),
|
||||
sizeof(cl_uchar16),
|
||||
sizeof(cl_ushort),
|
||||
sizeof(cl_ushort2),
|
||||
sizeof(cl_ushort3),
|
||||
sizeof(cl_ushort4),
|
||||
sizeof(cl_ushort8),
|
||||
sizeof(cl_ushort16),
|
||||
sizeof(cl_uint),
|
||||
sizeof(cl_uint2),
|
||||
sizeof(cl_uint3),
|
||||
sizeof(cl_uint4),
|
||||
sizeof(cl_uint8),
|
||||
sizeof(cl_uint16),
|
||||
sizeof(cl_ulong),
|
||||
sizeof(cl_ulong2),
|
||||
sizeof(cl_ulong3),
|
||||
sizeof(cl_ulong4),
|
||||
sizeof(cl_ulong8),
|
||||
sizeof(cl_ulong16),
|
||||
sizeof(cl_uchar), sizeof(cl_uchar2), sizeof(cl_uchar3),
|
||||
sizeof(cl_uchar4), sizeof(cl_uchar8), sizeof(cl_uchar16),
|
||||
sizeof(cl_ushort), sizeof(cl_ushort2), sizeof(cl_ushort3),
|
||||
sizeof(cl_ushort4), sizeof(cl_ushort8), sizeof(cl_ushort16),
|
||||
sizeof(cl_uint), sizeof(cl_uint2), sizeof(cl_uint3),
|
||||
sizeof(cl_uint4), sizeof(cl_uint8), sizeof(cl_uint16),
|
||||
sizeof(cl_ulong), sizeof(cl_ulong2), sizeof(cl_ulong3),
|
||||
sizeof(cl_ulong4), sizeof(cl_ulong8), sizeof(cl_ulong16),
|
||||
};
|
||||
|
||||
enum allocationTypes {
|
||||
enum allocationTypes
|
||||
{
|
||||
host,
|
||||
svm
|
||||
};
|
||||
|
||||
struct TestType {
|
||||
struct TestType
|
||||
{
|
||||
allocationTypes srcAlloc;
|
||||
allocationTypes dstAlloc;
|
||||
TestType(allocationTypes type1, allocationTypes type2): srcAlloc(type1), dstAlloc(type2){}
|
||||
TestType(allocationTypes type1, allocationTypes type2)
|
||||
: srcAlloc(type1), dstAlloc(type2)
|
||||
{}
|
||||
};
|
||||
|
||||
std::vector<TestType> testTypes;
|
||||
@@ -133,11 +123,14 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
|
||||
|
||||
for (const auto test_case : testTypes)
|
||||
{
|
||||
log_info("clEnqueueSVMMemcpy case: src_alloc = %s, dst_alloc = %s\n", test_case.srcAlloc == svm ? "svm" : "host", test_case.dstAlloc == svm ? "svm" : "host");
|
||||
log_info("clEnqueueSVMMemcpy case: src_alloc = %s, dst_alloc = %s\n",
|
||||
test_case.srcAlloc == svm ? "svm" : "host",
|
||||
test_case.dstAlloc == svm ? "svm" : "host");
|
||||
for (size_t i = 0; i < ARRAY_SIZE(typeSizes); ++i)
|
||||
{
|
||||
// generate initial data
|
||||
std::vector<cl_uchar> fillData0(typeSizes[i]), fillData1(typeSizes[i]);
|
||||
std::vector<cl_uchar> fillData0(typeSizes[i]),
|
||||
fillData1(typeSizes[i]);
|
||||
generate_data(fillData0, typeSizes[i], seed);
|
||||
generate_data(fillData1, typeSizes[i], seed);
|
||||
size_t data_size = elementNum * typeSizes[i];
|
||||
@@ -146,16 +139,22 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
|
||||
generate_data(srcHostData, srcHostData.size(), seed);
|
||||
generate_data(dstHostData, dstHostData.size(), seed);
|
||||
|
||||
cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, data_size, 0);
|
||||
cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, data_size, 0);
|
||||
cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE, data_size, 0);
|
||||
cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE, data_size, 0);
|
||||
|
||||
clEventWrapper userEvent = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
clEventWrapper eventMemFillList[2];
|
||||
|
||||
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], typeSizes[i], data_size, 1, &userEvent, &eventMemFillList[0]);
|
||||
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0],
|
||||
typeSizes[i], data_size, 1, &userEvent,
|
||||
&eventMemFillList[0]);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
error = clEnqueueSVMMemFill(queue, dstBuffer, &fillData1[0], typeSizes[i], data_size, 1, &userEvent, &eventMemFillList[1]);
|
||||
error = clEnqueueSVMMemFill(queue, dstBuffer, &fillData1[0],
|
||||
typeSizes[i], data_size, 1, &userEvent,
|
||||
&eventMemFillList[1]);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
error = clSetUserEventStatus(userEvent, CL_COMPLETE);
|
||||
@@ -163,27 +162,37 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
|
||||
|
||||
cl_uchar *src_ptr;
|
||||
cl_uchar *dst_ptr;
|
||||
if (test_case.srcAlloc == host) {
|
||||
if (test_case.srcAlloc == host)
|
||||
{
|
||||
src_ptr = srcHostData.data();
|
||||
} else if (test_case.srcAlloc == svm) {
|
||||
}
|
||||
else if (test_case.srcAlloc == svm)
|
||||
{
|
||||
src_ptr = srcBuffer;
|
||||
}
|
||||
if (test_case.dstAlloc == host) {
|
||||
if (test_case.dstAlloc == host)
|
||||
{
|
||||
dst_ptr = dstHostData.data();
|
||||
} else if (test_case.dstAlloc == svm) {
|
||||
}
|
||||
else if (test_case.dstAlloc == svm)
|
||||
{
|
||||
dst_ptr = dstBuffer;
|
||||
}
|
||||
clEventWrapper eventMemcpy;
|
||||
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dst_ptr, src_ptr, data_size, 2, &eventMemFillList[0], &eventMemcpy);
|
||||
error =
|
||||
clEnqueueSVMMemcpy(queue, CL_FALSE, dst_ptr, src_ptr, data_size,
|
||||
2, &eventMemFillList[0], &eventMemcpy);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
// coarse grain only supported. Synchronization required using map
|
||||
clEventWrapper eventMap[2];
|
||||
|
||||
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, srcBuffer, data_size, 1, &eventMemcpy, &eventMap[0]);
|
||||
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, srcBuffer,
|
||||
data_size, 1, &eventMemcpy, &eventMap[0]);
|
||||
test_error(error, "clEnqueueSVMMap srcBuffer failed");
|
||||
|
||||
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, dstBuffer, data_size, 1, &eventMemcpy, &eventMap[1]);
|
||||
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, dstBuffer,
|
||||
data_size, 1, &eventMemcpy, &eventMap[1]);
|
||||
test_error(error, "clEnqueueSVMMap dstBuffer failed");
|
||||
|
||||
error = clWaitForEvents(2, &eventMap[0]);
|
||||
@@ -192,29 +201,39 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
|
||||
// data verification
|
||||
for (size_t j = 0; j < data_size; ++j)
|
||||
{
|
||||
if (dst_ptr[j] != src_ptr[j]) {
|
||||
log_error("Invalid data at index %zu, dst_ptr %d, src_ptr %d\n", j,
|
||||
dst_ptr[j], src_ptr[j]);
|
||||
if (dst_ptr[j] != src_ptr[j])
|
||||
{
|
||||
log_error(
|
||||
"Invalid data at index %zu, dst_ptr %d, src_ptr %d\n",
|
||||
j, dst_ptr[j], src_ptr[j]);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
clEventWrapper eventUnmap[2];
|
||||
error = clEnqueueSVMUnmap(queue, srcBuffer, 0, nullptr, &eventUnmap[0]);
|
||||
error =
|
||||
clEnqueueSVMUnmap(queue, srcBuffer, 0, nullptr, &eventUnmap[0]);
|
||||
test_error(error, "clEnqueueSVMUnmap srcBuffer failed");
|
||||
|
||||
error = clEnqueueSVMUnmap(queue, dstBuffer, 0, nullptr, &eventUnmap[1]);
|
||||
error =
|
||||
clEnqueueSVMUnmap(queue, dstBuffer, 0, nullptr, &eventUnmap[1]);
|
||||
test_error(error, "clEnqueueSVMUnmap dstBuffer failed");
|
||||
|
||||
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData1[0], typeSizes[i], data_size / 2, 0, 0, 0);
|
||||
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData1[0],
|
||||
typeSizes[i], data_size / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
error = clEnqueueSVMMemFill(queue, dstBuffer + data_size / 2, &fillData1[0], typeSizes[i], data_size / 2, 0, 0, 0);
|
||||
error = clEnqueueSVMMemFill(queue, dstBuffer + data_size / 2,
|
||||
&fillData1[0], typeSizes[i],
|
||||
data_size / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, data_size / 2, 0, 0, 0);
|
||||
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer,
|
||||
data_size / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
error = clEnqueueSVMMemcpy(queue, CL_TRUE, dstBuffer + data_size / 2, srcBuffer + data_size / 2, data_size / 2, 0, 0, 0);
|
||||
error = clEnqueueSVMMemcpy(
|
||||
queue, CL_TRUE, dstBuffer + data_size / 2,
|
||||
srcBuffer + data_size / 2, data_size / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
void *ptrs[] = { (void *)srcBuffer, (void *)dstBuffer };
|
||||
@@ -228,68 +247,87 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
|
||||
|
||||
// event info verification for new SVM commands
|
||||
cl_command_type commandType;
|
||||
for (auto &check_event : eventMemFillList) {
|
||||
error = clGetEventInfo(check_event, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
for (auto &check_event : eventMemFillList)
|
||||
{
|
||||
error =
|
||||
clGetEventInfo(check_event, CL_EVENT_COMMAND_TYPE,
|
||||
sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MEMFILL)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMMemFill\n");
|
||||
log_error("Invalid command type returned for "
|
||||
"clEnqueueSVMMemFill\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE,
|
||||
sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MEMCPY)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMMemcpy\n");
|
||||
log_error(
|
||||
"Invalid command type returned for clEnqueueSVMMemcpy\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
for (size_t map_id = 0; map_id < ARRAY_SIZE(eventMap); map_id++) {
|
||||
error = clGetEventInfo(eventMap[map_id], CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
for (size_t map_id = 0; map_id < ARRAY_SIZE(eventMap); map_id++)
|
||||
{
|
||||
error =
|
||||
clGetEventInfo(eventMap[map_id], CL_EVENT_COMMAND_TYPE,
|
||||
sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MAP)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMMap\n");
|
||||
log_error(
|
||||
"Invalid command type returned for clEnqueueSVMMap\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventUnmap[map_id], CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
error =
|
||||
clGetEventInfo(eventUnmap[map_id], CL_EVENT_COMMAND_TYPE,
|
||||
sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_UNMAP)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMUnmap\n");
|
||||
log_error("Invalid command type returned for "
|
||||
"clEnqueueSVMUnmap\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE,
|
||||
sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_FREE)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMFree\n");
|
||||
log_error(
|
||||
"Invalid command type returned for clEnqueueSVMFree\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<void *> buffers(numSVMBuffers, 0);
|
||||
for(size_t i = 0; i < numSVMBuffers; ++i) buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0);
|
||||
for (size_t i = 0; i < numSVMBuffers; ++i)
|
||||
buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0);
|
||||
|
||||
// verify if callback is triggered correctly
|
||||
CallbackData data;
|
||||
data.status = 0;
|
||||
|
||||
error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], callback_svm_free, &data, 0, 0, 0);
|
||||
error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0],
|
||||
callback_svm_free, &data, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMFree failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
// wait for the callback
|
||||
while(data.status.load(std::memory_order_acquire) == 0) {
|
||||
while (data.status.load(std::memory_order_acquire) == 0)
|
||||
{
|
||||
usleep(1);
|
||||
}
|
||||
|
||||
//check if number of SVM pointers returned in the callback matches with expected
|
||||
// check if number of SVM pointers returned in the callback matches with
|
||||
// expected
|
||||
if (data.num_svm_pointers != buffers.size())
|
||||
{
|
||||
log_error("Invalid number of SVM pointers returned in the callback, "
|
||||
@@ -303,8 +341,8 @@ int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue q
|
||||
{
|
||||
if (data.svm_pointers[i] != buffers[i])
|
||||
{
|
||||
log_error("Invalid SVM pointer returned in the callback, idx: %zu\n",
|
||||
i);
|
||||
log_error(
|
||||
"Invalid SVM pointer returned in the callback, idx: %zu\n", i);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -139,9 +139,9 @@ int launch_kernels_and_verify(clContextWrapper &context, clCommandQueueWrapper*
|
||||
// Each bin in the hash table is a linked list. Each bin is protected against simultaneous
|
||||
// update using a lock free technique. The correctness of the list is verfied on the host.
|
||||
// This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering.
|
||||
int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_fine_grain_memory_consistency)
|
||||
{
|
||||
clContextWrapper context;
|
||||
clContextWrapper contextWrapper;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
@@ -157,8 +157,14 @@ int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c,
|
||||
|
||||
char *source[] = { hash_table_kernel };
|
||||
|
||||
err = create_cl_objects(deviceID, (const char**)source, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS, required_extensions);
|
||||
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
err = create_cl_objects(
|
||||
deviceID, (const char **)source, &contextWrapper, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS,
|
||||
required_extensions);
|
||||
context = contextWrapper;
|
||||
if (err == 1)
|
||||
return 0; // no devices capable of requested SVM level, so don't execute
|
||||
// but count test as passing.
|
||||
if (err < 0) return -1; // fail test.
|
||||
|
||||
kernel = clCreateKernel(program, "build_hash_table", &err);
|
||||
@@ -166,16 +172,21 @@ int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c,
|
||||
size_t num_pixels = num_elements;
|
||||
|
||||
int result;
|
||||
cl_uint numBins = 1; // all work groups in all devices and the host code will hammer on this one lock.
|
||||
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
|
||||
cl_uint numBins = 1; // all work groups in all devices and the host code
|
||||
// will hammer on this one lock.
|
||||
result = launch_kernels_and_verify(contextWrapper, queues, kernel,
|
||||
num_devices, numBins, num_pixels);
|
||||
if (result == -1) return result;
|
||||
|
||||
numBins = 2; // 2 locks within in same cache line will get hit from different devices and host.
|
||||
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
|
||||
numBins = 2; // 2 locks within in same cache line will get hit from
|
||||
// different devices and host.
|
||||
result = launch_kernels_and_verify(contextWrapper, queues, kernel,
|
||||
num_devices, numBins, num_pixels);
|
||||
if (result == -1) return result;
|
||||
|
||||
numBins = 29; // locks span a few cache lines.
|
||||
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
|
||||
result = launch_kernels_and_verify(contextWrapper, queues, kernel,
|
||||
num_devices, numBins, num_pixels);
|
||||
if (result == -1) return result;
|
||||
|
||||
return result;
|
||||
|
||||
@@ -44,16 +44,22 @@ void spawnAnalysisTask(int location)
|
||||
// Concept: a device kernel is used to search an input image for regions that match a target pattern.
|
||||
// The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices).
|
||||
// The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets.
|
||||
int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_fine_grain_sync_buffers)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
err = create_cl_objects(deviceID, &find_targets_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
|
||||
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
err = create_cl_objects(deviceID, &find_targets_kernel[0], &contextWrapper,
|
||||
&program, &queues[0], &num_devices,
|
||||
CL_DEVICE_SVM_FINE_GRAIN_BUFFER
|
||||
| CL_DEVICE_SVM_ATOMICS);
|
||||
context = contextWrapper;
|
||||
if (err == 1)
|
||||
return 0; // no devices capable of requested SVM level, so don't execute
|
||||
// but count test as passing.
|
||||
if (err < 0) return -1; // fail test.
|
||||
|
||||
clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
|
||||
@@ -62,9 +68,17 @@ int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_com
|
||||
size_t num_pixels = num_elements;
|
||||
// cl_uint num_pixels = 1024*1024*32;
|
||||
|
||||
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
|
||||
cl_uint *pNumTargetsFound = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_uint), 0);
|
||||
cl_int *pTargetLocations = (cl_int* ) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int) * MAX_TARGETS, 0);
|
||||
cl_uint *pInputImage = (cl_uint *)clSVMAlloc(
|
||||
context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
sizeof(cl_uint) * num_pixels, 0);
|
||||
cl_uint *pNumTargetsFound = (cl_uint *)clSVMAlloc(
|
||||
context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
|
||||
sizeof(cl_uint), 0);
|
||||
cl_int *pTargetLocations = (cl_int *)clSVMAlloc(
|
||||
context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
|
||||
sizeof(cl_int) * MAX_TARGETS, 0);
|
||||
|
||||
cl_uint targetDescriptor = 777;
|
||||
*pNumTargetsFound = 0;
|
||||
@@ -76,13 +90,15 @@ int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_com
|
||||
pInputImage[num_pixels - 1] = targetDescriptor;
|
||||
|
||||
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &targetDescriptor);
|
||||
err |=
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_uint), (void *)&targetDescriptor);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
cl_event done;
|
||||
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, 0, NULL, &done);
|
||||
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL,
|
||||
0, NULL, &done);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
clFlush(queues[0]);
|
||||
|
||||
@@ -90,15 +106,20 @@ int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_com
|
||||
i = 0;
|
||||
cl_int status;
|
||||
// check for new targets, if found spawn a task to analyze target.
|
||||
do {
|
||||
err = clGetEventInfo(done,CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL);
|
||||
do
|
||||
{
|
||||
err = clGetEventInfo(done, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(cl_int), &status, NULL);
|
||||
test_error(err, "clGetEventInfo failed");
|
||||
if( AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1) // -1 indicates slot not used yet.
|
||||
if (AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed)
|
||||
!= -1) // -1 indicates slot not used yet.
|
||||
{
|
||||
spawnAnalysisTask(pTargetLocations[i]);
|
||||
i++;
|
||||
}
|
||||
} while (status != CL_COMPLETE || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1);
|
||||
} while (status != CL_COMPLETE
|
||||
|| AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed)
|
||||
!= -1);
|
||||
|
||||
clReleaseEvent(done);
|
||||
clSVMFree(context, pInputImage);
|
||||
|
||||
@@ -75,7 +75,7 @@ wait_and_release(const char* s, cl_event* evs, int n)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_migrate)
|
||||
{
|
||||
std::vector<cl_uint> amem(GLOBAL_SIZE);
|
||||
std::vector<cl_uint> bmem(GLOBAL_SIZE);
|
||||
@@ -86,15 +86,17 @@ int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue
|
||||
|
||||
RandomSeed seed(0);
|
||||
|
||||
clContextWrapper context = NULL;
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
cl_uint num_devices = 0;
|
||||
clProgramWrapper program;
|
||||
cl_int error;
|
||||
|
||||
error = create_cl_objects(deviceID, &sources[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if (error)
|
||||
return -1;
|
||||
error = create_cl_objects(deviceID, &sources[0], &contextWrapper, &program,
|
||||
&queues[0], &num_devices,
|
||||
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (error) return -1;
|
||||
|
||||
if (num_devices > 1) {
|
||||
log_info(" Running on two devices.\n");
|
||||
|
||||
@@ -35,69 +35,95 @@ const char *SVMPointerPassing_test_kernel[] = {
|
||||
// The buffer is initialized to known values at each location.
|
||||
// The kernel checks that it finds the expected value at each location.
|
||||
// TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--.
|
||||
int test_svm_pointer_passing(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_pointer_passing)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0],
|
||||
&contextWrapper, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (error) return -1;
|
||||
|
||||
clKernelWrapper kernel_verify_char = clCreateKernel(program, "verify_char", &error);
|
||||
clKernelWrapper kernel_verify_char =
|
||||
clCreateKernel(program, "verify_char", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
size_t bufSize = 256;
|
||||
cl_uchar *pbuf_svm_alloc = (cl_uchar*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_uchar)*bufSize, 0);
|
||||
cl_uchar *pbuf_svm_alloc = (cl_uchar *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE, sizeof(cl_uchar) * bufSize, 0);
|
||||
|
||||
cl_int *pNumCorrect = NULL;
|
||||
pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0);
|
||||
pNumCorrect =
|
||||
(cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0);
|
||||
|
||||
{
|
||||
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar)*bufSize, pbuf_svm_alloc, &error);
|
||||
clMemWrapper buf =
|
||||
clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
|
||||
sizeof(cl_uchar) * bufSize, pbuf_svm_alloc, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error);
|
||||
clMemWrapper num_correct = clCreateBuffer(
|
||||
context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error = clSetKernelArg(kernel_verify_char, 1, sizeof(void*), (void *) &num_correct);
|
||||
error = clSetKernelArg(kernel_verify_char, 1, sizeof(void *),
|
||||
(void *)&num_correct);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// put values into buf so that we can expect to see these values in the kernel when we pass a pointer to them.
|
||||
// put values into buf so that we can expect to see these values in the
|
||||
// kernel when we pass a pointer to them.
|
||||
cl_command_queue cmdq = queues[0];
|
||||
cl_uchar* pbuf_map_buffer = (cl_uchar*) clEnqueueMapBuffer(cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uchar)*bufSize, 0, NULL,NULL, &error);
|
||||
cl_uchar *pbuf_map_buffer = (cl_uchar *)clEnqueueMapBuffer(
|
||||
cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0,
|
||||
sizeof(cl_uchar) * bufSize, 0, NULL, NULL, &error);
|
||||
test_error2(error, pbuf_map_buffer, "clEnqueueMapBuffer failed");
|
||||
for (int i = 0; i < (int)bufSize; i++)
|
||||
{
|
||||
pbuf_map_buffer[i] = (cl_uchar)i;
|
||||
}
|
||||
error = clEnqueueUnmapMemObject(cmdq, buf, pbuf_map_buffer, 0,NULL,NULL);
|
||||
error =
|
||||
clEnqueueUnmapMemObject(cmdq, buf, pbuf_map_buffer, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
|
||||
for (cl_uint ii = 0; ii<num_devices; ++ii) // iterate over all devices in the platform.
|
||||
for (cl_uint ii = 0; ii < num_devices;
|
||||
++ii) // iterate over all devices in the platform.
|
||||
{
|
||||
cmdq = queues[ii];
|
||||
for (int i = 0; i < (int)bufSize; i++)
|
||||
{
|
||||
cl_uchar *pChar = &pbuf_svm_alloc[i];
|
||||
error = clSetKernelArgSVMPointer(kernel_verify_char, 0, pChar); // pass a pointer to a location within the buffer
|
||||
error = clSetKernelArgSVMPointer(
|
||||
kernel_verify_char, 0,
|
||||
pChar); // pass a pointer to a location within the buffer
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar), (void *) &i ); // pass the expected value at the above location.
|
||||
error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar),
|
||||
(void *)&i); // pass the expected value
|
||||
// at the above location.
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL, &bufSize, NULL, 0, NULL, NULL);
|
||||
error =
|
||||
clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL,
|
||||
&bufSize, NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
|
||||
pNumCorrect = (cl_int *)clEnqueueMapBuffer(
|
||||
cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0,
|
||||
sizeof(cl_int), 0, NULL, NULL, &error);
|
||||
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
|
||||
cl_int correct_count = *pNumCorrect;
|
||||
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
|
||||
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect,
|
||||
0, NULL, NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
|
||||
if (correct_count != 1)
|
||||
{
|
||||
log_error("Passing pointer directly to kernel for byte #%d failed on device %d\n", i, ii);
|
||||
log_error("Passing pointer directly to kernel for byte #%d "
|
||||
"failed on device %d\n",
|
||||
i, ii);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ const char *set_kernel_exec_info_svm_ptrs_kernel[] = {
|
||||
|
||||
// Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
|
||||
//
|
||||
int test_svm_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_set_kernel_exec_info_svm_ptrs)
|
||||
{
|
||||
clContextWrapper c = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
|
||||
@@ -272,12 +272,14 @@ int shared_address_space_coarse_grain(cl_device_id deviceID, cl_context context2
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_svm_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_shared_address_space_coarse_grain_old_api)
|
||||
{
|
||||
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_FALSE);
|
||||
return shared_address_space_coarse_grain(deviceID, context, queue,
|
||||
num_elements, CL_FALSE);
|
||||
}
|
||||
|
||||
int test_svm_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_shared_address_space_coarse_grain_new_api)
|
||||
{
|
||||
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_TRUE);
|
||||
return shared_address_space_coarse_grain(deviceID, context, queue,
|
||||
num_elements, CL_TRUE);
|
||||
}
|
||||
|
||||
@@ -20,64 +20,83 @@
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
|
||||
// the platform. The test passes only if every combination passes.
|
||||
int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_shared_address_space_fine_grain)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM);
|
||||
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
error = create_cl_objects(
|
||||
deviceID, &linked_list_create_and_verify_kernels[0], &contextWrapper,
|
||||
&program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM);
|
||||
context = contextWrapper;
|
||||
if (error == 1)
|
||||
return 0; // no devices capable of requested SVM level, so don't execute
|
||||
// but count test as passing.
|
||||
if (error < 0) return -1; // fail test.
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
clKernelWrapper kernel_create_lists =
|
||||
clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
clKernelWrapper kernel_verify_lists =
|
||||
clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this allocation holds the linked list nodes.
|
||||
// FIXME: remove the alignment once prototype can handle it
|
||||
Node* pNodes = (Node*) align_malloc(numLists*ListLength*sizeof(Node),128);
|
||||
Node *pNodes =
|
||||
(Node *)align_malloc(numLists * ListLength * sizeof(Node), 128);
|
||||
test_error2(error, pNodes, "malloc failed");
|
||||
|
||||
// this allocation holds an index into the nodes buffer, it is used for node allocation
|
||||
// this allocation holds an index into the nodes buffer, it is used for node
|
||||
// allocation
|
||||
size_t *pAllocator = (size_t *)align_malloc(sizeof(size_t), 128);
|
||||
test_error2(error, pAllocator, "malloc failed");
|
||||
|
||||
// this allocation holds the count of correct nodes, which is computed by the verify kernel.
|
||||
// this allocation holds the count of correct nodes, which is computed by
|
||||
// the verify kernel.
|
||||
cl_int *pNum_correct = (cl_int *)align_malloc(sizeof(cl_int), 128);
|
||||
test_error2(error, pNum_correct, "malloc failed");
|
||||
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),(void *) &ListLength);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNum_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
// Create linked list on one device and verify on another device (or the
|
||||
// host). Do this for all possible combinations of devices and host within
|
||||
// the platform.
|
||||
for (int ci = 0; ci < (int)num_devices + 1;
|
||||
ci++) // ci is CreationIndex, index of device/q to create linked list
|
||||
// on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
for (int vi = 0; vi < (int)num_devices + 1;
|
||||
vi++) // vi is VerificationIndex, index of device/q to verify
|
||||
// linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
if (ci == num_devices) // last device index represents the host,
|
||||
// note the num_device+1 above.
|
||||
{
|
||||
log_info("creating linked list on host ");
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
|
||||
error = create_linked_lists_on_device_no_map(
|
||||
ci, queues[ci], pAllocator, kernel_create_lists, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
|
||||
@@ -88,7 +107,9 @@ int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context c
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNum_correct, kernel_verify_lists, ListLength, numLists);
|
||||
error = verify_linked_lists_on_device_no_map(
|
||||
vi, queues[vi], pNum_correct, kernel_verify_lists,
|
||||
ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,59 +61,83 @@ cl_int verify_linked_lists_on_device_no_map(int vi, cl_command_queue cmdq,cl_int
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
|
||||
// the platform. The test passes only if every combination passes.
|
||||
int test_svm_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_shared_address_space_fine_grain_buffers)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
|
||||
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
error = create_cl_objects(
|
||||
deviceID, &linked_list_create_and_verify_kernels[0], &contextWrapper,
|
||||
&program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (error == 1)
|
||||
return 0; // no devices capable of requested SVM level, so don't execute
|
||||
// but count test as passing.
|
||||
if (error < 0) return -1; // fail test.
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
clKernelWrapper kernel_create_lists =
|
||||
clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
clKernelWrapper kernel_verify_lists =
|
||||
clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this buffer holds the linked list nodes.
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(Node)*ListLength*numLists, 0);
|
||||
Node *pNodes = (Node *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
sizeof(Node) * ListLength * numLists, 0);
|
||||
|
||||
// this buffer holds an index into the nodes buffer, it is used for node allocation
|
||||
size_t *pAllocator = (size_t*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(size_t), 0);
|
||||
// this buffer holds an index into the nodes buffer, it is used for node
|
||||
// allocation
|
||||
size_t *pAllocator = (size_t *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
sizeof(size_t), 0);
|
||||
|
||||
// this buffer holds the count of correct nodes, which is computed by the verify kernel.
|
||||
cl_int *pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_int), 0);
|
||||
// this buffer holds the count of correct nodes, which is computed by the
|
||||
// verify kernel.
|
||||
cl_int *pNumCorrect = (cl_int *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
sizeof(cl_int), 0);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNumCorrect);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
// Create linked list on one device and verify on another device (or the
|
||||
// host). Do this for all possible combinations of devices and host within
|
||||
// the platform.
|
||||
for (int ci = 0; ci < (int)num_devices + 1;
|
||||
ci++) // ci is CreationIndex, index of device/q to create linked list
|
||||
// on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
for (int vi = 0; vi < (int)num_devices + 1;
|
||||
vi++) // vi is VerificationIndex, index of device/q to verify
|
||||
// linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
if (ci == num_devices) // last device index represents the host,
|
||||
// note the num_device+1 above.
|
||||
{
|
||||
log_info("SVM: creating linked list on host ");
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
|
||||
error = create_linked_lists_on_device_no_map(
|
||||
ci, queues[ci], pAllocator, kernel_create_lists, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
|
||||
@@ -124,7 +148,9 @@ int test_svm_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_c
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNumCorrect, kernel_verify_lists, ListLength, numLists);
|
||||
error = verify_linked_lists_on_device_no_map(
|
||||
vi, queues[vi], pNumCorrect, kernel_verify_lists,
|
||||
ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,111 +125,153 @@ cl_int verify_linked_lists_on_host_sb(int ci, cl_command_queue cmdq, cl_mem node
|
||||
// on another device or the host.
|
||||
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
|
||||
// This basic test is performed for all combinations of devices and the host.
|
||||
int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_shared_sub_buffers)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0],
|
||||
&contextWrapper, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (error) return -1;
|
||||
|
||||
size_t numLists = num_elements;
|
||||
if(numLists & 0x1) numLists++; // force even size, so we can easily create two sub-buffers of same size.
|
||||
if (numLists & 0x1)
|
||||
numLists++; // force even size, so we can easily create two sub-buffers
|
||||
// of same size.
|
||||
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
clKernelWrapper kernel_create_lists =
|
||||
clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
clKernelWrapper kernel_verify_lists =
|
||||
clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
size_t nodes_bufsize = sizeof(Node) * ListLength * numLists;
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
|
||||
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
|
||||
Node *pNodes =
|
||||
(Node *)clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
|
||||
Node *pNodes2 =
|
||||
(Node *)clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
|
||||
|
||||
{
|
||||
// this buffer holds some of the linked list nodes.
|
||||
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, nodes_bufsize, pNodes, &error);
|
||||
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
|
||||
nodes_bufsize, pNodes, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
cl_buffer_region r;
|
||||
r.origin = 0;
|
||||
r.size = nodes_bufsize / 2;
|
||||
// this should inherit the flag settings from nodes buffer
|
||||
clMemWrapper nodes_sb1 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
|
||||
clMemWrapper nodes_sb1 = clCreateSubBuffer(
|
||||
nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
r.origin = nodes_bufsize / 2;
|
||||
clMemWrapper nodes_sb2 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
|
||||
clMemWrapper nodes_sb2 = clCreateSubBuffer(
|
||||
nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
|
||||
clMemWrapper nodes2 = clCreateBuffer(
|
||||
context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists,
|
||||
pNodes2, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
r.origin = 0;
|
||||
r.size = nodes_bufsize / 2;
|
||||
// this should inherit the flag settings from nodes buffer
|
||||
clMemWrapper nodes2_sb1 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
|
||||
clMemWrapper nodes2_sb1 = clCreateSubBuffer(
|
||||
nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
r.origin = nodes_bufsize / 2;
|
||||
clMemWrapper nodes2_sb2 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION,(void*)&r, &error);
|
||||
clMemWrapper nodes2_sb2 = clCreateSubBuffer(
|
||||
nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
|
||||
|
||||
|
||||
// this buffer holds the index into the nodes buffer that is used for node allocation
|
||||
// this buffer holds the index into the nodes buffer that is used for
|
||||
// node allocation
|
||||
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(size_t), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the count of correct nodes which is computed by the verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
// this buffer holds the count of correct nodes which is computed by the
|
||||
// verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes_sb1);
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &nodes_sb2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void*), (void *) &allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),(void *) &ListLength);
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void *),
|
||||
(void *)&nodes_sb1);
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void *),
|
||||
(void *)&nodes2_sb1);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void *),
|
||||
(void *)&nodes_sb2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void *),
|
||||
(void *)&nodes2_sb2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void *),
|
||||
(void *)&allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
|
||||
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes_sb1);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &nodes_sb2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void*), (void *) &num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),(void *) &ListLength);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void *),
|
||||
(void *)&nodes_sb1);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void *),
|
||||
(void *)&nodes2_sb1);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void *),
|
||||
(void *)&nodes_sb2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void *),
|
||||
(void *)&nodes2_sb2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void *),
|
||||
(void *)&num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
// Create linked list on one device and verify on another device (or the
|
||||
// host). Do this for all possible combinations of devices and host
|
||||
// within the platform.
|
||||
for (int ci = 0; ci < (int)num_devices + 1;
|
||||
ci++) // ci is CreationIndex, index of device/q to create linked
|
||||
// list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
for (int vi = 0; vi < (int)num_devices + 1;
|
||||
vi++) // vi is VerificationIndex, index of device/q to verify
|
||||
// linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
if (ci == num_devices) // last device index represents the host,
|
||||
// note the num_device+1 above.
|
||||
{
|
||||
error = create_linked_lists_on_host_sb(queues[0], nodes, nodes2, ListLength, numLists);
|
||||
error = create_linked_lists_on_host_sb(
|
||||
queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
|
||||
error = create_linked_lists_on_device(
|
||||
ci, queues[ci], allocator, kernel_create_lists,
|
||||
numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
|
||||
if (vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists_on_host_sb(vi, queues[0], nodes, nodes2, ListLength, numLists);
|
||||
error = verify_linked_lists_on_host_sb(
|
||||
vi, queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
|
||||
error = verify_linked_lists_on_device(
|
||||
vi, queues[vi], num_correct, kernel_verify_lists,
|
||||
ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
} // inner loop, vi
|
||||
|
||||
Reference in New Issue
Block a user