mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
switch SVM tests to the new test registration framework (#2168)
Switches the SVM tests to the new test registration framework. The first commit is the best to review and contains the actual changes. The second commit purely has formatting changes. Note that several of these changes were a bit more than mechanical because many of the SVM tests create a new context vs. using the context provided by the harness and passed to each test function. The previous code named the context provided by the harness differently, and hence could use the name "context" in each test function, but with the new test registration framework this is no longer possible. Instead, I am creating the new context using the name "contextWrapper" and then assigning it to the "context" passed to the test function, which seems like the best way to avoid using the wrong context unintentionally. I am open to suggestions to do this differently. I have verified that the same calls are made before and after these changes, and specifically that there are no context leaks.
This commit is contained in:
@@ -81,25 +81,6 @@ extern cl_int verify_linked_lists_on_device(int qi, cl_command_queue q, c
|
||||
extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator, cl_kernel k, size_t numLists );
|
||||
extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists );
|
||||
|
||||
extern int test_svm_byte_granularity(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_pointer_passing(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_enqueue_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector<std::string> extensions_list = std::vector<std::string>());
|
||||
|
||||
extern const char *linked_list_create_and_verify_kernels[];
|
||||
|
||||
@@ -260,26 +260,6 @@ cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeSt
|
||||
return 0;
|
||||
}
|
||||
|
||||
test_definition test_list[] = {
|
||||
ADD_TEST(svm_byte_granularity),
|
||||
ADD_TEST(svm_set_kernel_exec_info_svm_ptrs),
|
||||
ADD_TEST(svm_fine_grain_memory_consistency),
|
||||
ADD_TEST(svm_fine_grain_sync_buffers),
|
||||
ADD_TEST(svm_shared_address_space_fine_grain),
|
||||
ADD_TEST(svm_shared_sub_buffers),
|
||||
ADD_TEST(svm_shared_address_space_fine_grain_buffers),
|
||||
ADD_TEST(svm_allocate_shared_buffer),
|
||||
ADD_TEST(svm_allocate_shared_buffer_negative),
|
||||
ADD_TEST(svm_shared_address_space_coarse_grain_old_api),
|
||||
ADD_TEST(svm_shared_address_space_coarse_grain_new_api),
|
||||
ADD_TEST(svm_cross_buffer_pointers_coarse_grain),
|
||||
ADD_TEST(svm_pointer_passing),
|
||||
ADD_TEST(svm_enqueue_api),
|
||||
ADD_TEST_VERSION(svm_migrate, Version(2, 1)),
|
||||
};
|
||||
|
||||
const int test_num = ARRAY_SIZE( test_list );
|
||||
|
||||
test_status InitCL(cl_device_id device) {
|
||||
auto version = get_device_cl_version(device);
|
||||
auto expected_min_version = Version(2, 0);
|
||||
@@ -310,6 +290,7 @@ test_status InitCL(cl_device_id device) {
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarnessWithCheck(argc, argv, test_num, test_list, true, 0, InitCL);
|
||||
return runTestHarnessWithCheck(
|
||||
argc, argv, test_registry::getInstance().num_tests(),
|
||||
test_registry::getInstance().definitions(), true, 0, InitCL);
|
||||
}
|
||||
|
||||
|
||||
@@ -41,71 +41,83 @@ const char* flag_set_names[] = {
|
||||
};
|
||||
|
||||
|
||||
int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_allocate_shared_buffer)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
cl_device_svm_capabilities caps;
|
||||
err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
|
||||
test_error(err,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
|
||||
cl_device_svm_capabilities caps;
|
||||
err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES,
|
||||
sizeof(cl_device_svm_capabilities), &caps, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
|
||||
|
||||
// under construction...
|
||||
err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(err) return -1;
|
||||
// under construction...
|
||||
err =
|
||||
create_cl_objects(deviceID, NULL, &contextWrapper, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (err) return -1;
|
||||
|
||||
size_t size = 1024;
|
||||
size_t size = 1024;
|
||||
|
||||
// iteration over flag combos
|
||||
int num_flags = sizeof(flag_set)/sizeof(cl_mem_flags);
|
||||
for(int i = 0; i < num_flags; i++)
|
||||
{
|
||||
if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
|
||||
|| ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 && (caps & CL_DEVICE_SVM_ATOMICS) == 0))
|
||||
// iteration over flag combos
|
||||
int num_flags = sizeof(flag_set) / sizeof(cl_mem_flags);
|
||||
for (int i = 0; i < num_flags; i++)
|
||||
{
|
||||
log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]);
|
||||
cl_char *pBufData1 = (cl_char*) clSVMAlloc(context, flag_set[i], size, 0);
|
||||
if(pBufData1 == NULL)
|
||||
{
|
||||
log_error("SVMalloc returned NULL");
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size, pBufData1, &err);
|
||||
test_error(err,"clCreateBuffer failed");
|
||||
|
||||
cl_char *pBufData2 = NULL;
|
||||
cl_uint flags = CL_MAP_READ | CL_MAP_READ;
|
||||
if(flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE;
|
||||
if(flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ;
|
||||
|
||||
if(!(flag_set[i] & CL_MEM_HOST_NO_ACCESS))
|
||||
{
|
||||
pBufData2 = (cl_char*) clEnqueueMapBuffer(queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL,NULL, &err);
|
||||
test_error(err, "clEnqueueMapBuffer failed");
|
||||
|
||||
if(pBufData2 != pBufData1 || NULL == pBufData1)
|
||||
if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0
|
||||
&& (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
|
||||
|| ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0
|
||||
&& (caps & CL_DEVICE_SVM_ATOMICS) == 0))
|
||||
{
|
||||
log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc");
|
||||
return -1;
|
||||
log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]);
|
||||
continue;
|
||||
}
|
||||
err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueUnmapMemObject failed");
|
||||
err = clFinish(queues[0]);
|
||||
test_error(err, "clFinish failed");
|
||||
}
|
||||
|
||||
log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]);
|
||||
cl_char *pBufData1 =
|
||||
(cl_char *)clSVMAlloc(context, flag_set[i], size, 0);
|
||||
if (pBufData1 == NULL)
|
||||
{
|
||||
log_error("SVMalloc returned NULL");
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
|
||||
size, pBufData1, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
cl_char *pBufData2 = NULL;
|
||||
cl_uint flags = CL_MAP_READ | CL_MAP_READ;
|
||||
if (flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE;
|
||||
if (flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ;
|
||||
|
||||
if (!(flag_set[i] & CL_MEM_HOST_NO_ACCESS))
|
||||
{
|
||||
pBufData2 = (cl_char *)clEnqueueMapBuffer(
|
||||
queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL, NULL,
|
||||
&err);
|
||||
test_error(err, "clEnqueueMapBuffer failed");
|
||||
|
||||
if (pBufData2 != pBufData1 || NULL == pBufData1)
|
||||
{
|
||||
log_error("SVM pointer returned by clEnqueueMapBuffer "
|
||||
"doesn't match pointer returned by clSVMalloc");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0,
|
||||
NULL, NULL);
|
||||
test_error(err, "clEnqueueUnmapMemObject failed");
|
||||
err = clFinish(queues[0]);
|
||||
test_error(err, "clFinish failed");
|
||||
}
|
||||
}
|
||||
|
||||
clSVMFree(context, pBufData1);
|
||||
}
|
||||
|
||||
clSVMFree(context, pBufData1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -41,12 +41,9 @@ const char* svm_flag_set_names[] = {
|
||||
};
|
||||
|
||||
|
||||
int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID,
|
||||
cl_context context2,
|
||||
cl_command_queue queue,
|
||||
int num_elements)
|
||||
REGISTER_TEST(svm_allocate_shared_buffer_negative)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
@@ -58,8 +55,10 @@ int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID,
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
|
||||
|
||||
// under construction...
|
||||
err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
err =
|
||||
create_cl_objects(deviceID, NULL, &contextWrapper, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (err) return err;
|
||||
|
||||
size_t size = 1024;
|
||||
|
||||
@@ -48,100 +48,115 @@ const char *byte_manipulation_kernels[] = {
|
||||
};
|
||||
|
||||
|
||||
|
||||
int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_byte_granularity)
|
||||
{
|
||||
clContextWrapper context;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper k1,k2;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
clContextWrapper contextWrapper;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper k1, k2;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
|
||||
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(err < 0) return -1; // fail test.
|
||||
err = create_cl_objects(deviceID, &byte_manipulation_kernels[0],
|
||||
&contextWrapper, &program, &queues[0], &num_devices,
|
||||
CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (err == 1)
|
||||
return 0; // no devices capable of requested SVM level, so don't execute
|
||||
// but count test as passing.
|
||||
if (err < 0) return -1; // fail test.
|
||||
|
||||
cl_uint num_devices_plus_host = num_devices + 1;
|
||||
cl_uint num_devices_plus_host = num_devices + 1;
|
||||
|
||||
k1 = clCreateKernel(program, "write_owned_locations", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
k2 = clCreateKernel(program, "sum_neighbor_locations", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
k1 = clCreateKernel(program, "write_owned_locations", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
k2 = clCreateKernel(program, "sum_neighbor_locations", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
|
||||
|
||||
cl_char *pA = (cl_char*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_char) * num_elements, 0);
|
||||
cl_char *pA = (cl_char *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
sizeof(cl_char) * num_elements, 0);
|
||||
|
||||
cl_uint **error_counts = (cl_uint**) malloc(sizeof(void*) * num_devices);
|
||||
cl_uint **error_counts = (cl_uint **)malloc(sizeof(void *) * num_devices);
|
||||
|
||||
for(cl_uint i=0; i < num_devices; i++) {
|
||||
error_counts[i] = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint), 0);
|
||||
*error_counts[i] = 0;
|
||||
}
|
||||
for(int i=0; i < num_elements; i++) pA[i] = -1;
|
||||
for (cl_uint i = 0; i < num_devices; i++)
|
||||
{
|
||||
error_counts[i] = (cl_uint *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
sizeof(cl_uint), 0);
|
||||
*error_counts[i] = 0;
|
||||
}
|
||||
for (int i = 0; i < num_elements; i++) pA[i] = -1;
|
||||
|
||||
err |= clSetKernelArgSVMPointer(k1, 0, pA);
|
||||
err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
// get all the devices going simultaneously
|
||||
size_t element_num = num_elements;
|
||||
for(cl_uint d=0; d < num_devices; d++) // device ids starting at 1.
|
||||
{
|
||||
err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, 0, NULL, NULL);
|
||||
test_error(err,"clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
for(cl_uint d=0; d < num_devices; d++) clFlush(queues[d]);
|
||||
|
||||
cl_uint host_id = num_devices; // host code will take the id above the devices.
|
||||
for(int i = (int)num_devices; i < num_elements; i+= num_devices_plus_host) pA[i] = host_id;
|
||||
|
||||
for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
|
||||
|
||||
// now check that each device can see the byte writes made by the other devices.
|
||||
|
||||
err |= clSetKernelArgSVMPointer(k2, 0, pA);
|
||||
err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
// adjusted so k2 doesn't read past end of buffer
|
||||
size_t adjusted_num_elements = num_elements - num_devices;
|
||||
for(cl_uint id = 0; id < num_devices; id++)
|
||||
{
|
||||
err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]);
|
||||
err |= clSetKernelArgSVMPointer(k1, 0, pA);
|
||||
err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, &adjusted_num_elements, NULL, 0, NULL, NULL);
|
||||
test_error(err,"clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
// get all the devices going simultaneously
|
||||
size_t element_num = num_elements;
|
||||
for (cl_uint d = 0; d < num_devices; d++) // device ids starting at 1.
|
||||
{
|
||||
err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL,
|
||||
0, NULL, NULL);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
|
||||
for (cl_uint d = 0; d < num_devices; d++) clFlush(queues[d]);
|
||||
|
||||
bool failed = false;
|
||||
cl_uint host_id =
|
||||
num_devices; // host code will take the id above the devices.
|
||||
for (int i = (int)num_devices; i < num_elements; i += num_devices_plus_host)
|
||||
pA[i] = host_id;
|
||||
|
||||
// see if any of the devices found errors
|
||||
for(cl_uint i=0; i < num_devices; i++) {
|
||||
if(*error_counts[i] > 0)
|
||||
failed = true;
|
||||
}
|
||||
cl_uint expected = (num_devices_plus_host * (num_devices_plus_host - 1))/2;
|
||||
// check that host can see the byte writes made by the devices.
|
||||
for(cl_uint i = 0; i < num_elements - num_devices_plus_host; i++)
|
||||
{
|
||||
int sum = 0;
|
||||
for(cl_uint j=0; j < num_devices_plus_host; j++) sum += pA[i+j];
|
||||
if(sum != expected)
|
||||
failed = true;
|
||||
}
|
||||
for (cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
|
||||
|
||||
clSVMFree(context, pA);
|
||||
for(cl_uint i=0; i < num_devices; i++) clSVMFree(context, error_counts[i]);
|
||||
// now check that each device can see the byte writes made by the other
|
||||
// devices.
|
||||
|
||||
if(failed)
|
||||
return -1;
|
||||
return 0;
|
||||
err |= clSetKernelArgSVMPointer(k2, 0, pA);
|
||||
err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
// adjusted so k2 doesn't read past end of buffer
|
||||
size_t adjusted_num_elements = num_elements - num_devices;
|
||||
for (cl_uint id = 0; id < num_devices; id++)
|
||||
{
|
||||
err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
err =
|
||||
clEnqueueNDRangeKernel(queues[id], k2, 1, NULL,
|
||||
&adjusted_num_elements, NULL, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
for (cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
|
||||
|
||||
bool failed = false;
|
||||
|
||||
// see if any of the devices found errors
|
||||
for (cl_uint i = 0; i < num_devices; i++)
|
||||
{
|
||||
if (*error_counts[i] > 0) failed = true;
|
||||
}
|
||||
cl_uint expected =
|
||||
(num_devices_plus_host * (num_devices_plus_host - 1)) / 2;
|
||||
// check that host can see the byte writes made by the devices.
|
||||
for (cl_uint i = 0; i < num_elements - num_devices_plus_host; i++)
|
||||
{
|
||||
int sum = 0;
|
||||
for (cl_uint j = 0; j < num_devices_plus_host; j++) sum += pA[i + j];
|
||||
if (sum != expected) failed = true;
|
||||
}
|
||||
|
||||
clSVMFree(context, pA);
|
||||
for (cl_uint i = 0; i < num_devices; i++)
|
||||
clSVMFree(context, error_counts[i]);
|
||||
|
||||
if (failed) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -128,93 +128,128 @@ cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes,
|
||||
// on another device or the host.
|
||||
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
|
||||
// This basic test is performed for all combinations of devices and the host.
|
||||
int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_cross_buffer_pointers_coarse_grain)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0],
|
||||
&contextWrapper, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (error) return -1;
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
clKernelWrapper kernel_create_lists =
|
||||
clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
clKernelWrapper kernel_verify_lists =
|
||||
clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
|
||||
// this buffer holds some of the linked list nodes.
|
||||
Node *pNodes = (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE,
|
||||
sizeof(Node) * ListLength * numLists, 0);
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
|
||||
// this buffer holds some of the linked list nodes.
|
||||
Node *pNodes2 = (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE,
|
||||
sizeof(Node) * ListLength * numLists, 0);
|
||||
|
||||
{
|
||||
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the index into the nodes buffer that is used for node allocation
|
||||
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(size_t), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the count of correct nodes which is computed by the verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
|
||||
//error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) pNodes);
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), (void *) &ListLength);
|
||||
|
||||
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), (void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
error = create_linked_lists_on_host(queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
clMemWrapper nodes = clCreateBuffer(
|
||||
context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists,
|
||||
pNodes, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists_on_host(vi, queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
} // inner loop, vi
|
||||
} // outer loop, ci
|
||||
}
|
||||
clMemWrapper nodes2 = clCreateBuffer(
|
||||
context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists,
|
||||
pNodes2, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
clSVMFree(context, pNodes2);
|
||||
clSVMFree(context, pNodes);
|
||||
// this buffer holds the index into the nodes buffer that is used for
|
||||
// node allocation
|
||||
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(size_t), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
return 0;
|
||||
// this buffer holds the count of correct nodes which is computed by the
|
||||
// verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void *),
|
||||
(void *)&nodes);
|
||||
// error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *)
|
||||
// pNodes);
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void *),
|
||||
(void *)&nodes2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void *),
|
||||
(void *)&allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
|
||||
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void *),
|
||||
(void *)&nodes);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void *),
|
||||
(void *)&nodes2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void *),
|
||||
(void *)&num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the
|
||||
// host). Do this for all possible combinations of devices and host
|
||||
// within the platform.
|
||||
for (int ci = 0; ci < (int)num_devices + 1;
|
||||
ci++) // ci is CreationIndex, index of device/q to create linked
|
||||
// list on
|
||||
{
|
||||
for (int vi = 0; vi < (int)num_devices + 1;
|
||||
vi++) // vi is VerificationIndex, index of device/q to verify
|
||||
// linked list on
|
||||
{
|
||||
if (ci == num_devices) // last device index represents the host,
|
||||
// note the num_device+1 above.
|
||||
{
|
||||
error = create_linked_lists_on_host(
|
||||
queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device(
|
||||
ci, queues[ci], allocator, kernel_create_lists,
|
||||
numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
|
||||
if (vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists_on_host(
|
||||
vi, queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device(
|
||||
vi, queues[vi], num_correct, kernel_verify_lists,
|
||||
ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
} // inner loop, vi
|
||||
} // outer loop, ci
|
||||
}
|
||||
|
||||
clSVMFree(context, pNodes2);
|
||||
clSVMFree(context, pNodes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -70,244 +70,282 @@ void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_point
|
||||
data->status.store(1, std::memory_order_release);
|
||||
}
|
||||
|
||||
int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_enqueue_api)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
cl_uint num_devices = 0;
|
||||
const size_t elementNum = 1024;
|
||||
const size_t numSVMBuffers = 32;
|
||||
cl_int error = CL_SUCCESS;
|
||||
RandomSeed seed(0);
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
cl_uint num_devices = 0;
|
||||
const size_t elementNum = 1024;
|
||||
const size_t numSVMBuffers = 32;
|
||||
cl_int error = CL_SUCCESS;
|
||||
RandomSeed seed(0);
|
||||
|
||||
error = create_cl_objects(deviceID, NULL, &context, NULL, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return TEST_FAIL;
|
||||
error = create_cl_objects(deviceID, NULL, &contextWrapper, NULL, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (error) return TEST_FAIL;
|
||||
|
||||
queue = queues[0];
|
||||
queue = queues[0];
|
||||
|
||||
//all possible sizes of vectors and scalars
|
||||
size_t typeSizes[] = {
|
||||
sizeof(cl_uchar),
|
||||
sizeof(cl_uchar2),
|
||||
sizeof(cl_uchar3),
|
||||
sizeof(cl_uchar4),
|
||||
sizeof(cl_uchar8),
|
||||
sizeof(cl_uchar16),
|
||||
sizeof(cl_ushort),
|
||||
sizeof(cl_ushort2),
|
||||
sizeof(cl_ushort3),
|
||||
sizeof(cl_ushort4),
|
||||
sizeof(cl_ushort8),
|
||||
sizeof(cl_ushort16),
|
||||
sizeof(cl_uint),
|
||||
sizeof(cl_uint2),
|
||||
sizeof(cl_uint3),
|
||||
sizeof(cl_uint4),
|
||||
sizeof(cl_uint8),
|
||||
sizeof(cl_uint16),
|
||||
sizeof(cl_ulong),
|
||||
sizeof(cl_ulong2),
|
||||
sizeof(cl_ulong3),
|
||||
sizeof(cl_ulong4),
|
||||
sizeof(cl_ulong8),
|
||||
sizeof(cl_ulong16),
|
||||
};
|
||||
// all possible sizes of vectors and scalars
|
||||
size_t typeSizes[] = {
|
||||
sizeof(cl_uchar), sizeof(cl_uchar2), sizeof(cl_uchar3),
|
||||
sizeof(cl_uchar4), sizeof(cl_uchar8), sizeof(cl_uchar16),
|
||||
sizeof(cl_ushort), sizeof(cl_ushort2), sizeof(cl_ushort3),
|
||||
sizeof(cl_ushort4), sizeof(cl_ushort8), sizeof(cl_ushort16),
|
||||
sizeof(cl_uint), sizeof(cl_uint2), sizeof(cl_uint3),
|
||||
sizeof(cl_uint4), sizeof(cl_uint8), sizeof(cl_uint16),
|
||||
sizeof(cl_ulong), sizeof(cl_ulong2), sizeof(cl_ulong3),
|
||||
sizeof(cl_ulong4), sizeof(cl_ulong8), sizeof(cl_ulong16),
|
||||
};
|
||||
|
||||
enum allocationTypes {
|
||||
host,
|
||||
svm
|
||||
};
|
||||
|
||||
struct TestType {
|
||||
allocationTypes srcAlloc;
|
||||
allocationTypes dstAlloc;
|
||||
TestType(allocationTypes type1, allocationTypes type2): srcAlloc(type1), dstAlloc(type2){}
|
||||
};
|
||||
|
||||
std::vector<TestType> testTypes;
|
||||
|
||||
testTypes.push_back(TestType(host, host));
|
||||
testTypes.push_back(TestType(host, svm));
|
||||
testTypes.push_back(TestType(svm, host));
|
||||
testTypes.push_back(TestType(svm, svm));
|
||||
|
||||
for (const auto test_case : testTypes)
|
||||
{
|
||||
log_info("clEnqueueSVMMemcpy case: src_alloc = %s, dst_alloc = %s\n", test_case.srcAlloc == svm ? "svm" : "host", test_case.dstAlloc == svm ? "svm" : "host");
|
||||
for (size_t i = 0; i < ARRAY_SIZE(typeSizes); ++i)
|
||||
enum allocationTypes
|
||||
{
|
||||
//generate initial data
|
||||
std::vector<cl_uchar> fillData0(typeSizes[i]), fillData1(typeSizes[i]);
|
||||
generate_data(fillData0, typeSizes[i], seed);
|
||||
generate_data(fillData1, typeSizes[i], seed);
|
||||
size_t data_size = elementNum * typeSizes[i];
|
||||
std::vector<cl_uchar> srcHostData(data_size, 0);
|
||||
std::vector<cl_uchar> dstHostData(data_size, 0);
|
||||
generate_data(srcHostData, srcHostData.size(), seed);
|
||||
generate_data(dstHostData, dstHostData.size(), seed);
|
||||
host,
|
||||
svm
|
||||
};
|
||||
|
||||
cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, data_size, 0);
|
||||
cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, data_size, 0);
|
||||
struct TestType
|
||||
{
|
||||
allocationTypes srcAlloc;
|
||||
allocationTypes dstAlloc;
|
||||
TestType(allocationTypes type1, allocationTypes type2)
|
||||
: srcAlloc(type1), dstAlloc(type2)
|
||||
{}
|
||||
};
|
||||
|
||||
clEventWrapper userEvent = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
clEventWrapper eventMemFillList[2];
|
||||
std::vector<TestType> testTypes;
|
||||
|
||||
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], typeSizes[i], data_size, 1, &userEvent, &eventMemFillList[0]);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
error = clEnqueueSVMMemFill(queue, dstBuffer, &fillData1[0], typeSizes[i], data_size, 1, &userEvent, &eventMemFillList[1]);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
testTypes.push_back(TestType(host, host));
|
||||
testTypes.push_back(TestType(host, svm));
|
||||
testTypes.push_back(TestType(svm, host));
|
||||
testTypes.push_back(TestType(svm, svm));
|
||||
|
||||
error = clSetUserEventStatus(userEvent, CL_COMPLETE);
|
||||
test_error(error, "clSetUserEventStatus failed");
|
||||
for (const auto test_case : testTypes)
|
||||
{
|
||||
log_info("clEnqueueSVMMemcpy case: src_alloc = %s, dst_alloc = %s\n",
|
||||
test_case.srcAlloc == svm ? "svm" : "host",
|
||||
test_case.dstAlloc == svm ? "svm" : "host");
|
||||
for (size_t i = 0; i < ARRAY_SIZE(typeSizes); ++i)
|
||||
{
|
||||
// generate initial data
|
||||
std::vector<cl_uchar> fillData0(typeSizes[i]),
|
||||
fillData1(typeSizes[i]);
|
||||
generate_data(fillData0, typeSizes[i], seed);
|
||||
generate_data(fillData1, typeSizes[i], seed);
|
||||
size_t data_size = elementNum * typeSizes[i];
|
||||
std::vector<cl_uchar> srcHostData(data_size, 0);
|
||||
std::vector<cl_uchar> dstHostData(data_size, 0);
|
||||
generate_data(srcHostData, srcHostData.size(), seed);
|
||||
generate_data(dstHostData, dstHostData.size(), seed);
|
||||
|
||||
cl_uchar * src_ptr;
|
||||
cl_uchar * dst_ptr;
|
||||
if (test_case.srcAlloc == host) {
|
||||
src_ptr = srcHostData.data();
|
||||
} else if (test_case.srcAlloc == svm) {
|
||||
src_ptr = srcBuffer;
|
||||
}
|
||||
if (test_case.dstAlloc == host) {
|
||||
dst_ptr = dstHostData.data();
|
||||
} else if (test_case.dstAlloc == svm) {
|
||||
dst_ptr = dstBuffer;
|
||||
}
|
||||
clEventWrapper eventMemcpy;
|
||||
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dst_ptr, src_ptr, data_size, 2, &eventMemFillList[0], &eventMemcpy);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE, data_size, 0);
|
||||
cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE, data_size, 0);
|
||||
|
||||
//coarse grain only supported. Synchronization required using map
|
||||
clEventWrapper eventMap[2];
|
||||
clEventWrapper userEvent = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
clEventWrapper eventMemFillList[2];
|
||||
|
||||
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, srcBuffer, data_size, 1, &eventMemcpy, &eventMap[0]);
|
||||
test_error(error, "clEnqueueSVMMap srcBuffer failed");
|
||||
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0],
|
||||
typeSizes[i], data_size, 1, &userEvent,
|
||||
&eventMemFillList[0]);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
error = clEnqueueSVMMemFill(queue, dstBuffer, &fillData1[0],
|
||||
typeSizes[i], data_size, 1, &userEvent,
|
||||
&eventMemFillList[1]);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, dstBuffer, data_size, 1, &eventMemcpy, &eventMap[1]);
|
||||
test_error(error, "clEnqueueSVMMap dstBuffer failed");
|
||||
error = clSetUserEventStatus(userEvent, CL_COMPLETE);
|
||||
test_error(error, "clSetUserEventStatus failed");
|
||||
|
||||
error = clWaitForEvents(2, &eventMap[0]);
|
||||
test_error(error, "clWaitForEvents failed");
|
||||
cl_uchar *src_ptr;
|
||||
cl_uchar *dst_ptr;
|
||||
if (test_case.srcAlloc == host)
|
||||
{
|
||||
src_ptr = srcHostData.data();
|
||||
}
|
||||
else if (test_case.srcAlloc == svm)
|
||||
{
|
||||
src_ptr = srcBuffer;
|
||||
}
|
||||
if (test_case.dstAlloc == host)
|
||||
{
|
||||
dst_ptr = dstHostData.data();
|
||||
}
|
||||
else if (test_case.dstAlloc == svm)
|
||||
{
|
||||
dst_ptr = dstBuffer;
|
||||
}
|
||||
clEventWrapper eventMemcpy;
|
||||
error =
|
||||
clEnqueueSVMMemcpy(queue, CL_FALSE, dst_ptr, src_ptr, data_size,
|
||||
2, &eventMemFillList[0], &eventMemcpy);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
//data verification
|
||||
for (size_t j = 0; j < data_size; ++j)
|
||||
{
|
||||
if (dst_ptr[j] != src_ptr[j]) {
|
||||
log_error("Invalid data at index %zu, dst_ptr %d, src_ptr %d\n", j,
|
||||
dst_ptr[j], src_ptr[j]);
|
||||
// coarse grain only supported. Synchronization required using map
|
||||
clEventWrapper eventMap[2];
|
||||
|
||||
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, srcBuffer,
|
||||
data_size, 1, &eventMemcpy, &eventMap[0]);
|
||||
test_error(error, "clEnqueueSVMMap srcBuffer failed");
|
||||
|
||||
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, dstBuffer,
|
||||
data_size, 1, &eventMemcpy, &eventMap[1]);
|
||||
test_error(error, "clEnqueueSVMMap dstBuffer failed");
|
||||
|
||||
error = clWaitForEvents(2, &eventMap[0]);
|
||||
test_error(error, "clWaitForEvents failed");
|
||||
|
||||
// data verification
|
||||
for (size_t j = 0; j < data_size; ++j)
|
||||
{
|
||||
if (dst_ptr[j] != src_ptr[j])
|
||||
{
|
||||
log_error(
|
||||
"Invalid data at index %zu, dst_ptr %d, src_ptr %d\n",
|
||||
j, dst_ptr[j], src_ptr[j]);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
clEventWrapper eventUnmap[2];
|
||||
error =
|
||||
clEnqueueSVMUnmap(queue, srcBuffer, 0, nullptr, &eventUnmap[0]);
|
||||
test_error(error, "clEnqueueSVMUnmap srcBuffer failed");
|
||||
|
||||
error =
|
||||
clEnqueueSVMUnmap(queue, dstBuffer, 0, nullptr, &eventUnmap[1]);
|
||||
test_error(error, "clEnqueueSVMUnmap dstBuffer failed");
|
||||
|
||||
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData1[0],
|
||||
typeSizes[i], data_size / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
error = clEnqueueSVMMemFill(queue, dstBuffer + data_size / 2,
|
||||
&fillData1[0], typeSizes[i],
|
||||
data_size / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer,
|
||||
data_size / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
error = clEnqueueSVMMemcpy(
|
||||
queue, CL_TRUE, dstBuffer + data_size / 2,
|
||||
srcBuffer + data_size / 2, data_size / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
void *ptrs[] = { (void *)srcBuffer, (void *)dstBuffer };
|
||||
|
||||
clEventWrapper eventFree;
|
||||
error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree);
|
||||
test_error(error, "clEnqueueSVMFree failed");
|
||||
|
||||
error = clWaitForEvents(1, &eventFree);
|
||||
test_error(error, "clWaitForEvents failed");
|
||||
|
||||
// event info verification for new SVM commands
|
||||
cl_command_type commandType;
|
||||
for (auto &check_event : eventMemFillList)
|
||||
{
|
||||
error =
|
||||
clGetEventInfo(check_event, CL_EVENT_COMMAND_TYPE,
|
||||
sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MEMFILL)
|
||||
{
|
||||
log_error("Invalid command type returned for "
|
||||
"clEnqueueSVMMemFill\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE,
|
||||
sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MEMCPY)
|
||||
{
|
||||
log_error(
|
||||
"Invalid command type returned for clEnqueueSVMMemcpy\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
for (size_t map_id = 0; map_id < ARRAY_SIZE(eventMap); map_id++)
|
||||
{
|
||||
error =
|
||||
clGetEventInfo(eventMap[map_id], CL_EVENT_COMMAND_TYPE,
|
||||
sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MAP)
|
||||
{
|
||||
log_error(
|
||||
"Invalid command type returned for clEnqueueSVMMap\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
error =
|
||||
clGetEventInfo(eventUnmap[map_id], CL_EVENT_COMMAND_TYPE,
|
||||
sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_UNMAP)
|
||||
{
|
||||
log_error("Invalid command type returned for "
|
||||
"clEnqueueSVMUnmap\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE,
|
||||
sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_FREE)
|
||||
{
|
||||
log_error(
|
||||
"Invalid command type returned for clEnqueueSVMFree\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<void *> buffers(numSVMBuffers, 0);
|
||||
for (size_t i = 0; i < numSVMBuffers; ++i)
|
||||
buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0);
|
||||
|
||||
// verify if callback is triggered correctly
|
||||
CallbackData data;
|
||||
data.status = 0;
|
||||
|
||||
error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0],
|
||||
callback_svm_free, &data, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMFree failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
// wait for the callback
|
||||
while (data.status.load(std::memory_order_acquire) == 0)
|
||||
{
|
||||
usleep(1);
|
||||
}
|
||||
|
||||
// check if number of SVM pointers returned in the callback matches with
|
||||
// expected
|
||||
if (data.num_svm_pointers != buffers.size())
|
||||
{
|
||||
log_error("Invalid number of SVM pointers returned in the callback, "
|
||||
"expected: %zu, got: %d\n",
|
||||
buffers.size(), data.num_svm_pointers);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
// check if pointers returned in callback are correct
|
||||
for (size_t i = 0; i < buffers.size(); ++i)
|
||||
{
|
||||
if (data.svm_pointers[i] != buffers[i])
|
||||
{
|
||||
log_error(
|
||||
"Invalid SVM pointer returned in the callback, idx: %zu\n", i);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
clEventWrapper eventUnmap[2];
|
||||
error = clEnqueueSVMUnmap(queue, srcBuffer, 0, nullptr, &eventUnmap[0]);
|
||||
test_error(error, "clEnqueueSVMUnmap srcBuffer failed");
|
||||
|
||||
error = clEnqueueSVMUnmap(queue, dstBuffer, 0, nullptr, &eventUnmap[1]);
|
||||
test_error(error, "clEnqueueSVMUnmap dstBuffer failed");
|
||||
|
||||
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData1[0], typeSizes[i], data_size / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
error = clEnqueueSVMMemFill(queue, dstBuffer + data_size / 2, &fillData1[0], typeSizes[i], data_size / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, data_size / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
error = clEnqueueSVMMemcpy(queue, CL_TRUE, dstBuffer + data_size / 2, srcBuffer + data_size / 2, data_size / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
void *ptrs[] = { (void *)srcBuffer, (void *)dstBuffer };
|
||||
|
||||
clEventWrapper eventFree;
|
||||
error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree);
|
||||
test_error(error, "clEnqueueSVMFree failed");
|
||||
|
||||
error = clWaitForEvents(1, &eventFree);
|
||||
test_error(error, "clWaitForEvents failed");
|
||||
|
||||
//event info verification for new SVM commands
|
||||
cl_command_type commandType;
|
||||
for (auto &check_event : eventMemFillList) {
|
||||
error = clGetEventInfo(check_event, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MEMFILL)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMMemFill\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MEMCPY)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMMemcpy\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
for (size_t map_id = 0; map_id < ARRAY_SIZE(eventMap); map_id++) {
|
||||
error = clGetEventInfo(eventMap[map_id], CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MAP)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMMap\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventUnmap[map_id], CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_UNMAP)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMUnmap\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_FREE)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMFree\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<void *> buffers(numSVMBuffers, 0);
|
||||
for(size_t i = 0; i < numSVMBuffers; ++i) buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0);
|
||||
|
||||
//verify if callback is triggered correctly
|
||||
CallbackData data;
|
||||
data.status = 0;
|
||||
|
||||
error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], callback_svm_free, &data, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMFree failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
//wait for the callback
|
||||
while(data.status.load(std::memory_order_acquire) == 0) {
|
||||
usleep(1);
|
||||
}
|
||||
|
||||
//check if number of SVM pointers returned in the callback matches with expected
|
||||
if (data.num_svm_pointers != buffers.size())
|
||||
{
|
||||
log_error("Invalid number of SVM pointers returned in the callback, "
|
||||
"expected: %zu, got: %d\n",
|
||||
buffers.size(), data.num_svm_pointers);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
//check if pointers returned in callback are correct
|
||||
for (size_t i = 0; i < buffers.size(); ++i)
|
||||
{
|
||||
if (data.svm_pointers[i] != buffers[i])
|
||||
{
|
||||
log_error("Invalid SVM pointer returned in the callback, idx: %zu\n",
|
||||
i);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -139,44 +139,55 @@ int launch_kernels_and_verify(clContextWrapper &context, clCommandQueueWrapper*
|
||||
// Each bin in the hash table is a linked list. Each bin is protected against simultaneous
|
||||
// update using a lock free technique. The correctness of the list is verfied on the host.
|
||||
// This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering.
|
||||
int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_fine_grain_memory_consistency)
|
||||
{
|
||||
clContextWrapper context;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
clContextWrapper contextWrapper;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
std::vector<std::string> required_extensions;
|
||||
required_extensions.push_back("cl_khr_int64_base_atomics");
|
||||
required_extensions.push_back("cl_khr_int64_extended_atomics");
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
std::vector<std::string> required_extensions;
|
||||
required_extensions.push_back("cl_khr_int64_base_atomics");
|
||||
required_extensions.push_back("cl_khr_int64_extended_atomics");
|
||||
|
||||
// Make pragmas visible for 64-bit addresses
|
||||
hash_table_kernel[4] = sizeof(void *) == 8 ? '1' : '0';
|
||||
// Make pragmas visible for 64-bit addresses
|
||||
hash_table_kernel[4] = sizeof(void *) == 8 ? '1' : '0';
|
||||
|
||||
char *source[] = { hash_table_kernel };
|
||||
char *source[] = { hash_table_kernel };
|
||||
|
||||
err = create_cl_objects(deviceID, (const char**)source, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS, required_extensions);
|
||||
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(err < 0) return -1; // fail test.
|
||||
err = create_cl_objects(
|
||||
deviceID, (const char **)source, &contextWrapper, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS,
|
||||
required_extensions);
|
||||
context = contextWrapper;
|
||||
if (err == 1)
|
||||
return 0; // no devices capable of requested SVM level, so don't execute
|
||||
// but count test as passing.
|
||||
if (err < 0) return -1; // fail test.
|
||||
|
||||
kernel = clCreateKernel(program, "build_hash_table", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
size_t num_pixels = num_elements;
|
||||
kernel = clCreateKernel(program, "build_hash_table", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
size_t num_pixels = num_elements;
|
||||
|
||||
int result;
|
||||
cl_uint numBins = 1; // all work groups in all devices and the host code will hammer on this one lock.
|
||||
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
|
||||
if(result == -1) return result;
|
||||
int result;
|
||||
cl_uint numBins = 1; // all work groups in all devices and the host code
|
||||
// will hammer on this one lock.
|
||||
result = launch_kernels_and_verify(contextWrapper, queues, kernel,
|
||||
num_devices, numBins, num_pixels);
|
||||
if (result == -1) return result;
|
||||
|
||||
numBins = 2; // 2 locks within in same cache line will get hit from different devices and host.
|
||||
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
|
||||
if(result == -1) return result;
|
||||
numBins = 2; // 2 locks within in same cache line will get hit from
|
||||
// different devices and host.
|
||||
result = launch_kernels_and_verify(contextWrapper, queues, kernel,
|
||||
num_devices, numBins, num_pixels);
|
||||
if (result == -1) return result;
|
||||
|
||||
numBins = 29; // locks span a few cache lines.
|
||||
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
|
||||
if(result == -1) return result;
|
||||
numBins = 29; // locks span a few cache lines.
|
||||
result = launch_kernels_and_verify(contextWrapper, queues, kernel,
|
||||
num_devices, numBins, num_pixels);
|
||||
if (result == -1) return result;
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -44,67 +44,88 @@ void spawnAnalysisTask(int location)
|
||||
// Concept: a device kernel is used to search an input image for regions that match a target pattern.
|
||||
// The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices).
|
||||
// The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets.
|
||||
int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_fine_grain_sync_buffers)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
err = create_cl_objects(deviceID, &find_targets_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
|
||||
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(err < 0) return -1; // fail test.
|
||||
err = create_cl_objects(deviceID, &find_targets_kernel[0], &contextWrapper,
|
||||
&program, &queues[0], &num_devices,
|
||||
CL_DEVICE_SVM_FINE_GRAIN_BUFFER
|
||||
| CL_DEVICE_SVM_ATOMICS);
|
||||
context = contextWrapper;
|
||||
if (err == 1)
|
||||
return 0; // no devices capable of requested SVM level, so don't execute
|
||||
// but count test as passing.
|
||||
if (err < 0) return -1; // fail test.
|
||||
|
||||
clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
|
||||
size_t num_pixels = num_elements;
|
||||
//cl_uint num_pixels = 1024*1024*32;
|
||||
size_t num_pixels = num_elements;
|
||||
// cl_uint num_pixels = 1024*1024*32;
|
||||
|
||||
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
|
||||
cl_uint *pNumTargetsFound = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_uint), 0);
|
||||
cl_int *pTargetLocations = (cl_int* ) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int) * MAX_TARGETS, 0);
|
||||
cl_uint *pInputImage = (cl_uint *)clSVMAlloc(
|
||||
context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
sizeof(cl_uint) * num_pixels, 0);
|
||||
cl_uint *pNumTargetsFound = (cl_uint *)clSVMAlloc(
|
||||
context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
|
||||
sizeof(cl_uint), 0);
|
||||
cl_int *pTargetLocations = (cl_int *)clSVMAlloc(
|
||||
context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
|
||||
sizeof(cl_int) * MAX_TARGETS, 0);
|
||||
|
||||
cl_uint targetDescriptor = 777;
|
||||
*pNumTargetsFound = 0;
|
||||
cl_uint i;
|
||||
for(i=0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1;
|
||||
for(i=0; i < num_pixels; i++) pInputImage[i] = 0;
|
||||
pInputImage[0] = targetDescriptor;
|
||||
pInputImage[3] = targetDescriptor;
|
||||
pInputImage[num_pixels - 1] = targetDescriptor;
|
||||
cl_uint targetDescriptor = 777;
|
||||
*pNumTargetsFound = 0;
|
||||
cl_uint i;
|
||||
for (i = 0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1;
|
||||
for (i = 0; i < num_pixels; i++) pInputImage[i] = 0;
|
||||
pInputImage[0] = targetDescriptor;
|
||||
pInputImage[3] = targetDescriptor;
|
||||
pInputImage[num_pixels - 1] = targetDescriptor;
|
||||
|
||||
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &targetDescriptor);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
|
||||
err |=
|
||||
clSetKernelArg(kernel, 1, sizeof(cl_uint), (void *)&targetDescriptor);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
cl_event done;
|
||||
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, 0, NULL, &done);
|
||||
test_error(err,"clEnqueueNDRangeKernel failed");
|
||||
clFlush(queues[0]);
|
||||
cl_event done;
|
||||
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL,
|
||||
0, NULL, &done);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
clFlush(queues[0]);
|
||||
|
||||
|
||||
i=0;
|
||||
cl_int status;
|
||||
// check for new targets, if found spawn a task to analyze target.
|
||||
do {
|
||||
err = clGetEventInfo(done,CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL);
|
||||
test_error(err,"clGetEventInfo failed");
|
||||
if( AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1) // -1 indicates slot not used yet.
|
||||
i = 0;
|
||||
cl_int status;
|
||||
// check for new targets, if found spawn a task to analyze target.
|
||||
do
|
||||
{
|
||||
spawnAnalysisTask(pTargetLocations[i]);
|
||||
i++;
|
||||
}
|
||||
} while (status != CL_COMPLETE || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1);
|
||||
err = clGetEventInfo(done, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(cl_int), &status, NULL);
|
||||
test_error(err, "clGetEventInfo failed");
|
||||
if (AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed)
|
||||
!= -1) // -1 indicates slot not used yet.
|
||||
{
|
||||
spawnAnalysisTask(pTargetLocations[i]);
|
||||
i++;
|
||||
}
|
||||
} while (status != CL_COMPLETE
|
||||
|| AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed)
|
||||
!= -1);
|
||||
|
||||
clReleaseEvent(done);
|
||||
clSVMFree(context, pInputImage);
|
||||
clSVMFree(context, pNumTargetsFound);
|
||||
clSVMFree(context, pTargetLocations);
|
||||
clReleaseEvent(done);
|
||||
clSVMFree(context, pInputImage);
|
||||
clSVMFree(context, pNumTargetsFound);
|
||||
clSVMFree(context, pTargetLocations);
|
||||
|
||||
if(i != 3) return -1;
|
||||
return 0;
|
||||
if (i != 3) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ wait_and_release(const char* s, cl_event* evs, int n)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_migrate)
|
||||
{
|
||||
std::vector<cl_uint> amem(GLOBAL_SIZE);
|
||||
std::vector<cl_uint> bmem(GLOBAL_SIZE);
|
||||
@@ -86,15 +86,17 @@ int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue
|
||||
|
||||
RandomSeed seed(0);
|
||||
|
||||
clContextWrapper context = NULL;
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
cl_uint num_devices = 0;
|
||||
clProgramWrapper program;
|
||||
cl_int error;
|
||||
|
||||
error = create_cl_objects(deviceID, &sources[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if (error)
|
||||
return -1;
|
||||
error = create_cl_objects(deviceID, &sources[0], &contextWrapper, &program,
|
||||
&queues[0], &num_devices,
|
||||
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (error) return -1;
|
||||
|
||||
if (num_devices > 1) {
|
||||
log_info(" Running on two devices.\n");
|
||||
|
||||
@@ -35,81 +35,107 @@ const char *SVMPointerPassing_test_kernel[] = {
|
||||
// The buffer is initialized to known values at each location.
|
||||
// The kernel checks that it finds the expected value at each location.
|
||||
// TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--.
|
||||
int test_svm_pointer_passing(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_pointer_passing)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0],
|
||||
&contextWrapper, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (error) return -1;
|
||||
|
||||
clKernelWrapper kernel_verify_char = clCreateKernel(program, "verify_char", &error);
|
||||
test_error(error,"clCreateKernel failed");
|
||||
clKernelWrapper kernel_verify_char =
|
||||
clCreateKernel(program, "verify_char", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
size_t bufSize = 256;
|
||||
cl_uchar *pbuf_svm_alloc = (cl_uchar*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_uchar)*bufSize, 0);
|
||||
size_t bufSize = 256;
|
||||
cl_uchar *pbuf_svm_alloc = (cl_uchar *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE, sizeof(cl_uchar) * bufSize, 0);
|
||||
|
||||
cl_int *pNumCorrect = NULL;
|
||||
pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0);
|
||||
cl_int *pNumCorrect = NULL;
|
||||
pNumCorrect =
|
||||
(cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0);
|
||||
|
||||
{
|
||||
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar)*bufSize, pbuf_svm_alloc, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error = clSetKernelArg(kernel_verify_char, 1, sizeof(void*), (void *) &num_correct);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// put values into buf so that we can expect to see these values in the kernel when we pass a pointer to them.
|
||||
cl_command_queue cmdq = queues[0];
|
||||
cl_uchar* pbuf_map_buffer = (cl_uchar*) clEnqueueMapBuffer(cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uchar)*bufSize, 0, NULL,NULL, &error);
|
||||
test_error2(error, pbuf_map_buffer, "clEnqueueMapBuffer failed");
|
||||
for(int i = 0; i<(int)bufSize; i++)
|
||||
{
|
||||
pbuf_map_buffer[i]= (cl_uchar)i;
|
||||
}
|
||||
error = clEnqueueUnmapMemObject(cmdq, buf, pbuf_map_buffer, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
clMemWrapper buf =
|
||||
clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
|
||||
sizeof(cl_uchar) * bufSize, pbuf_svm_alloc, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
for (cl_uint ii = 0; ii<num_devices; ++ii) // iterate over all devices in the platform.
|
||||
{
|
||||
cmdq = queues[ii];
|
||||
for(int i = 0; i<(int)bufSize; i++)
|
||||
{
|
||||
cl_uchar* pChar = &pbuf_svm_alloc[i];
|
||||
error = clSetKernelArgSVMPointer(kernel_verify_char, 0, pChar); // pass a pointer to a location within the buffer
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar), (void *) &i ); // pass the expected value at the above location.
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL, &bufSize, NULL, 0, NULL, NULL);
|
||||
test_error(error,"clEnqueueNDRangeKernel failed");
|
||||
clMemWrapper num_correct = clCreateBuffer(
|
||||
context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
|
||||
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
|
||||
cl_int correct_count = *pNumCorrect;
|
||||
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
|
||||
error = clSetKernelArg(kernel_verify_char, 1, sizeof(void *),
|
||||
(void *)&num_correct);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// put values into buf so that we can expect to see these values in the
|
||||
// kernel when we pass a pointer to them.
|
||||
cl_command_queue cmdq = queues[0];
|
||||
cl_uchar *pbuf_map_buffer = (cl_uchar *)clEnqueueMapBuffer(
|
||||
cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0,
|
||||
sizeof(cl_uchar) * bufSize, 0, NULL, NULL, &error);
|
||||
test_error2(error, pbuf_map_buffer, "clEnqueueMapBuffer failed");
|
||||
for (int i = 0; i < (int)bufSize; i++)
|
||||
{
|
||||
pbuf_map_buffer[i] = (cl_uchar)i;
|
||||
}
|
||||
error =
|
||||
clEnqueueUnmapMemObject(cmdq, buf, pbuf_map_buffer, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
|
||||
if(correct_count != 1)
|
||||
for (cl_uint ii = 0; ii < num_devices;
|
||||
++ii) // iterate over all devices in the platform.
|
||||
{
|
||||
log_error("Passing pointer directly to kernel for byte #%d failed on device %d\n", i, ii);
|
||||
return -1;
|
||||
cmdq = queues[ii];
|
||||
for (int i = 0; i < (int)bufSize; i++)
|
||||
{
|
||||
cl_uchar *pChar = &pbuf_svm_alloc[i];
|
||||
error = clSetKernelArgSVMPointer(
|
||||
kernel_verify_char, 0,
|
||||
pChar); // pass a pointer to a location within the buffer
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar),
|
||||
(void *)&i); // pass the expected value
|
||||
// at the above location.
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error =
|
||||
clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL,
|
||||
&bufSize, NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
pNumCorrect = (cl_int *)clEnqueueMapBuffer(
|
||||
cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0,
|
||||
sizeof(cl_int), 0, NULL, NULL, &error);
|
||||
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
|
||||
cl_int correct_count = *pNumCorrect;
|
||||
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect,
|
||||
0, NULL, NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
|
||||
if (correct_count != 1)
|
||||
{
|
||||
log_error("Passing pointer directly to kernel for byte #%d "
|
||||
"failed on device %d\n",
|
||||
i, ii);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed");
|
||||
}
|
||||
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed");
|
||||
}
|
||||
|
||||
clSVMFree(context, pbuf_svm_alloc);
|
||||
clSVMFree(context, pNumCorrect);
|
||||
|
||||
clSVMFree(context, pbuf_svm_alloc);
|
||||
clSVMFree(context, pNumCorrect);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ const char *set_kernel_exec_info_svm_ptrs_kernel[] = {
|
||||
|
||||
// Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
|
||||
//
|
||||
int test_svm_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_set_kernel_exec_info_svm_ptrs)
|
||||
{
|
||||
clContextWrapper c = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
|
||||
@@ -272,12 +272,14 @@ int shared_address_space_coarse_grain(cl_device_id deviceID, cl_context context2
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_svm_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_shared_address_space_coarse_grain_old_api)
|
||||
{
|
||||
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_FALSE);
|
||||
return shared_address_space_coarse_grain(deviceID, context, queue,
|
||||
num_elements, CL_FALSE);
|
||||
}
|
||||
|
||||
int test_svm_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_shared_address_space_coarse_grain_new_api)
|
||||
{
|
||||
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_TRUE);
|
||||
return shared_address_space_coarse_grain(deviceID, context, queue,
|
||||
num_elements, CL_TRUE);
|
||||
}
|
||||
|
||||
@@ -20,82 +20,103 @@
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
|
||||
// the platform. The test passes only if every combination passes.
|
||||
int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_shared_address_space_fine_grain)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM);
|
||||
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(error < 0) return -1; // fail test.
|
||||
error = create_cl_objects(
|
||||
deviceID, &linked_list_create_and_verify_kernels[0], &contextWrapper,
|
||||
&program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM);
|
||||
context = contextWrapper;
|
||||
if (error == 1)
|
||||
return 0; // no devices capable of requested SVM level, so don't execute
|
||||
// but count test as passing.
|
||||
if (error < 0) return -1; // fail test.
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
clKernelWrapper kernel_create_lists =
|
||||
clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
clKernelWrapper kernel_verify_lists =
|
||||
clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this allocation holds the linked list nodes.
|
||||
// FIXME: remove the alignment once prototype can handle it
|
||||
Node* pNodes = (Node*) align_malloc(numLists*ListLength*sizeof(Node),128);
|
||||
test_error2(error, pNodes, "malloc failed");
|
||||
// this allocation holds the linked list nodes.
|
||||
// FIXME: remove the alignment once prototype can handle it
|
||||
Node *pNodes =
|
||||
(Node *)align_malloc(numLists * ListLength * sizeof(Node), 128);
|
||||
test_error2(error, pNodes, "malloc failed");
|
||||
|
||||
// this allocation holds an index into the nodes buffer, it is used for node allocation
|
||||
size_t *pAllocator = (size_t *)align_malloc(sizeof(size_t), 128);
|
||||
test_error2(error, pAllocator, "malloc failed");
|
||||
// this allocation holds an index into the nodes buffer, it is used for node
|
||||
// allocation
|
||||
size_t *pAllocator = (size_t *)align_malloc(sizeof(size_t), 128);
|
||||
test_error2(error, pAllocator, "malloc failed");
|
||||
|
||||
// this allocation holds the count of correct nodes, which is computed by the verify kernel.
|
||||
cl_int* pNum_correct = (cl_int*) align_malloc(sizeof(cl_int), 128);
|
||||
test_error2(error, pNum_correct, "malloc failed");
|
||||
// this allocation holds the count of correct nodes, which is computed by
|
||||
// the verify kernel.
|
||||
cl_int *pNum_correct = (cl_int *)align_malloc(sizeof(cl_int), 128);
|
||||
test_error2(error, pNum_correct, "malloc failed");
|
||||
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),(void *) &ListLength);
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNum_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNum_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
// Create linked list on one device and verify on another device (or the
|
||||
// host). Do this for all possible combinations of devices and host within
|
||||
// the platform.
|
||||
for (int ci = 0; ci < (int)num_devices + 1;
|
||||
ci++) // ci is CreationIndex, index of device/q to create linked list
|
||||
// on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
log_info("creating linked list on host ");
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
for (int vi = 0; vi < (int)num_devices + 1;
|
||||
vi++) // vi is VerificationIndex, index of device/q to verify
|
||||
// linked list on
|
||||
{
|
||||
if (ci == num_devices) // last device index represents the host,
|
||||
// note the num_device+1 above.
|
||||
{
|
||||
log_info("creating linked list on host ");
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device_no_map(
|
||||
ci, queues[ci], pAllocator, kernel_create_lists, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNum_correct, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
if (vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if (error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device_no_map(
|
||||
vi, queues[vi], pNum_correct, kernel_verify_lists,
|
||||
ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
align_free(pNodes);
|
||||
align_free(pAllocator);
|
||||
align_free(pNum_correct);
|
||||
return 0;
|
||||
align_free(pNodes);
|
||||
align_free(pAllocator);
|
||||
align_free(pNum_correct);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -61,78 +61,104 @@ cl_int verify_linked_lists_on_device_no_map(int vi, cl_command_queue cmdq,cl_int
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
|
||||
// the platform. The test passes only if every combination passes.
|
||||
int test_svm_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_shared_address_space_fine_grain_buffers)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
|
||||
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(error < 0) return -1; // fail test.
|
||||
error = create_cl_objects(
|
||||
deviceID, &linked_list_create_and_verify_kernels[0], &contextWrapper,
|
||||
&program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (error == 1)
|
||||
return 0; // no devices capable of requested SVM level, so don't execute
|
||||
// but count test as passing.
|
||||
if (error < 0) return -1; // fail test.
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
clKernelWrapper kernel_create_lists =
|
||||
clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
clKernelWrapper kernel_verify_lists =
|
||||
clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this buffer holds the linked list nodes.
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(Node)*ListLength*numLists, 0);
|
||||
// this buffer holds the linked list nodes.
|
||||
Node *pNodes = (Node *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
sizeof(Node) * ListLength * numLists, 0);
|
||||
|
||||
// this buffer holds an index into the nodes buffer, it is used for node allocation
|
||||
size_t *pAllocator = (size_t*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(size_t), 0);
|
||||
// this buffer holds an index into the nodes buffer, it is used for node
|
||||
// allocation
|
||||
size_t *pAllocator = (size_t *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
sizeof(size_t), 0);
|
||||
|
||||
// this buffer holds the count of correct nodes, which is computed by the verify kernel.
|
||||
cl_int *pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_int), 0);
|
||||
// this buffer holds the count of correct nodes, which is computed by the
|
||||
// verify kernel.
|
||||
cl_int *pNumCorrect = (cl_int *)clSVMAlloc(
|
||||
context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
sizeof(cl_int), 0);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNumCorrect);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNumCorrect);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
// Create linked list on one device and verify on another device (or the
|
||||
// host). Do this for all possible combinations of devices and host within
|
||||
// the platform.
|
||||
for (int ci = 0; ci < (int)num_devices + 1;
|
||||
ci++) // ci is CreationIndex, index of device/q to create linked list
|
||||
// on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
log_info("SVM: creating linked list on host ");
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
for (int vi = 0; vi < (int)num_devices + 1;
|
||||
vi++) // vi is VerificationIndex, index of device/q to verify
|
||||
// linked list on
|
||||
{
|
||||
if (ci == num_devices) // last device index represents the host,
|
||||
// note the num_device+1 above.
|
||||
{
|
||||
log_info("SVM: creating linked list on host ");
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device_no_map(
|
||||
ci, queues[ci], pAllocator, kernel_create_lists, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNumCorrect, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
if (vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if (error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device_no_map(
|
||||
vi, queues[vi], pNumCorrect, kernel_verify_lists,
|
||||
ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clSVMFree(context, pNodes);
|
||||
clSVMFree(context, pAllocator);
|
||||
clSVMFree(context, pNumCorrect);
|
||||
clSVMFree(context, pNodes);
|
||||
clSVMFree(context, pAllocator);
|
||||
clSVMFree(context, pNumCorrect);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -125,118 +125,160 @@ cl_int verify_linked_lists_on_host_sb(int ci, cl_command_queue cmdq, cl_mem node
|
||||
// on another device or the host.
|
||||
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
|
||||
// This basic test is performed for all combinations of devices and the host.
|
||||
int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
REGISTER_TEST(svm_shared_sub_buffers)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
clContextWrapper contextWrapper = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0],
|
||||
&contextWrapper, &program, &queues[0],
|
||||
&num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
context = contextWrapper;
|
||||
if (error) return -1;
|
||||
|
||||
size_t numLists = num_elements;
|
||||
if(numLists & 0x1) numLists++; // force even size, so we can easily create two sub-buffers of same size.
|
||||
size_t numLists = num_elements;
|
||||
if (numLists & 0x1)
|
||||
numLists++; // force even size, so we can easily create two sub-buffers
|
||||
// of same size.
|
||||
|
||||
cl_int ListLength = 32;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
clKernelWrapper kernel_create_lists =
|
||||
clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
clKernelWrapper kernel_verify_lists =
|
||||
clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
size_t nodes_bufsize = sizeof(Node)*ListLength*numLists;
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
|
||||
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
|
||||
size_t nodes_bufsize = sizeof(Node) * ListLength * numLists;
|
||||
Node *pNodes =
|
||||
(Node *)clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
|
||||
Node *pNodes2 =
|
||||
(Node *)clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
|
||||
|
||||
{
|
||||
// this buffer holds some of the linked list nodes.
|
||||
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, nodes_bufsize, pNodes, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
cl_buffer_region r;
|
||||
r.origin = 0;
|
||||
r.size = nodes_bufsize / 2;
|
||||
// this should inherit the flag settings from nodes buffer
|
||||
clMemWrapper nodes_sb1 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
r.origin = nodes_bufsize / 2;
|
||||
clMemWrapper nodes_sb2 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
r.origin = 0;
|
||||
r.size = nodes_bufsize / 2;
|
||||
// this should inherit the flag settings from nodes buffer
|
||||
clMemWrapper nodes2_sb1 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
r.origin = nodes_bufsize / 2;
|
||||
clMemWrapper nodes2_sb2 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION,(void*)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
|
||||
|
||||
|
||||
// this buffer holds the index into the nodes buffer that is used for node allocation
|
||||
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(size_t), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the count of correct nodes which is computed by the verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes_sb1);
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &nodes_sb2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void*), (void *) &allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),(void *) &ListLength);
|
||||
|
||||
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes_sb1);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &nodes_sb2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void*), (void *) &num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),(void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
error = create_linked_lists_on_host_sb(queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
// this buffer holds some of the linked list nodes.
|
||||
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
|
||||
nodes_bufsize, pNodes, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists_on_host_sb(vi, queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
} // inner loop, vi
|
||||
} // outer loop, ci
|
||||
}
|
||||
clSVMFree(context, pNodes2);
|
||||
clSVMFree(context, pNodes);
|
||||
cl_buffer_region r;
|
||||
r.origin = 0;
|
||||
r.size = nodes_bufsize / 2;
|
||||
// this should inherit the flag settings from nodes buffer
|
||||
clMemWrapper nodes_sb1 = clCreateSubBuffer(
|
||||
nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
r.origin = nodes_bufsize / 2;
|
||||
clMemWrapper nodes_sb2 = clCreateSubBuffer(
|
||||
nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
|
||||
return 0;
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
clMemWrapper nodes2 = clCreateBuffer(
|
||||
context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists,
|
||||
pNodes2, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
r.origin = 0;
|
||||
r.size = nodes_bufsize / 2;
|
||||
// this should inherit the flag settings from nodes buffer
|
||||
clMemWrapper nodes2_sb1 = clCreateSubBuffer(
|
||||
nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
r.origin = nodes_bufsize / 2;
|
||||
clMemWrapper nodes2_sb2 = clCreateSubBuffer(
|
||||
nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void *)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
|
||||
|
||||
// this buffer holds the index into the nodes buffer that is used for
|
||||
// node allocation
|
||||
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(size_t), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the count of correct nodes which is computed by the
|
||||
// verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void *),
|
||||
(void *)&nodes_sb1);
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void *),
|
||||
(void *)&nodes2_sb1);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void *),
|
||||
(void *)&nodes_sb2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void *),
|
||||
(void *)&nodes2_sb2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void *),
|
||||
(void *)&allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
|
||||
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void *),
|
||||
(void *)&nodes_sb1);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void *),
|
||||
(void *)&nodes2_sb1);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void *),
|
||||
(void *)&nodes_sb2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void *),
|
||||
(void *)&nodes2_sb2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void *),
|
||||
(void *)&num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),
|
||||
(void *)&ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the
|
||||
// host). Do this for all possible combinations of devices and host
|
||||
// within the platform.
|
||||
for (int ci = 0; ci < (int)num_devices + 1;
|
||||
ci++) // ci is CreationIndex, index of device/q to create linked
|
||||
// list on
|
||||
{
|
||||
for (int vi = 0; vi < (int)num_devices + 1;
|
||||
vi++) // vi is VerificationIndex, index of device/q to verify
|
||||
// linked list on
|
||||
{
|
||||
if (ci == num_devices) // last device index represents the host,
|
||||
// note the num_device+1 above.
|
||||
{
|
||||
error = create_linked_lists_on_host_sb(
|
||||
queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device(
|
||||
ci, queues[ci], allocator, kernel_create_lists,
|
||||
numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
|
||||
if (vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists_on_host_sb(
|
||||
vi, queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device(
|
||||
vi, queues[vi], num_correct, kernel_verify_lists,
|
||||
ListLength, numLists);
|
||||
if (error) return -1;
|
||||
}
|
||||
} // inner loop, vi
|
||||
} // outer loop, ci
|
||||
}
|
||||
clSVMFree(context, pNodes2);
|
||||
clSVMFree(context, pNodes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user