diff --git a/test_conformance/SVM/common.h b/test_conformance/SVM/common.h index f6a937c7..d2b5cfd2 100644 --- a/test_conformance/SVM/common.h +++ b/test_conformance/SVM/common.h @@ -81,25 +81,6 @@ extern cl_int verify_linked_lists_on_device(int qi, cl_command_queue q, c extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator, cl_kernel k, size_t numLists ); extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists ); -extern int test_svm_byte_granularity(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_pointer_passing(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID, - cl_context context, - cl_command_queue queue, - int num_elements); -extern int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_enqueue_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); - extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector extensions_list = std::vector()); extern const char *linked_list_create_and_verify_kernels[]; diff --git a/test_conformance/SVM/main.cpp b/test_conformance/SVM/main.cpp index 819901a3..91a14e5b 100644 --- a/test_conformance/SVM/main.cpp +++ b/test_conformance/SVM/main.cpp @@ -260,26 +260,6 @@ cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeSt return 0; } -test_definition test_list[] = { - ADD_TEST(svm_byte_granularity), - ADD_TEST(svm_set_kernel_exec_info_svm_ptrs), - ADD_TEST(svm_fine_grain_memory_consistency), - ADD_TEST(svm_fine_grain_sync_buffers), - ADD_TEST(svm_shared_address_space_fine_grain), - ADD_TEST(svm_shared_sub_buffers), - ADD_TEST(svm_shared_address_space_fine_grain_buffers), - ADD_TEST(svm_allocate_shared_buffer), - ADD_TEST(svm_allocate_shared_buffer_negative), - ADD_TEST(svm_shared_address_space_coarse_grain_old_api), - ADD_TEST(svm_shared_address_space_coarse_grain_new_api), - ADD_TEST(svm_cross_buffer_pointers_coarse_grain), - ADD_TEST(svm_pointer_passing), - ADD_TEST(svm_enqueue_api), - ADD_TEST_VERSION(svm_migrate, Version(2, 1)), -}; - -const int test_num = ARRAY_SIZE( test_list ); - test_status InitCL(cl_device_id device) { auto version = get_device_cl_version(device); auto expected_min_version = Version(2, 0); @@ -310,6 +290,7 @@ test_status InitCL(cl_device_id device) { int main(int argc, const char *argv[]) { - return runTestHarnessWithCheck(argc, argv, test_num, test_list, true, 0, InitCL); + return runTestHarnessWithCheck( + argc, argv, test_registry::getInstance().num_tests(), + test_registry::getInstance().definitions(), true, 0, InitCL); } - diff --git a/test_conformance/SVM/test_allocate_shared_buffer.cpp b/test_conformance/SVM/test_allocate_shared_buffer.cpp index bf94698c..e4dcffda 100644 --- a/test_conformance/SVM/test_allocate_shared_buffer.cpp +++ b/test_conformance/SVM/test_allocate_shared_buffer.cpp @@ -41,71 +41,83 @@ const char* flag_set_names[] = { }; -int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_allocate_shared_buffer) { - clContextWrapper context = NULL; - clProgramWrapper program = NULL; - cl_uint num_devices = 0; - cl_int err = CL_SUCCESS; - clCommandQueueWrapper queues[MAXQ]; + clContextWrapper contextWrapper = NULL; + clProgramWrapper program = NULL; + cl_uint num_devices = 0; + cl_int err = CL_SUCCESS; + clCommandQueueWrapper queues[MAXQ]; - cl_device_svm_capabilities caps; - err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL); - test_error(err,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES"); + cl_device_svm_capabilities caps; + err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, + sizeof(cl_device_svm_capabilities), &caps, NULL); + test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES"); - // under construction... - err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); - if(err) return -1; + // under construction... + err = + create_cl_objects(deviceID, NULL, &contextWrapper, &program, &queues[0], + &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + context = contextWrapper; + if (err) return -1; - size_t size = 1024; + size_t size = 1024; - // iteration over flag combos - int num_flags = sizeof(flag_set)/sizeof(cl_mem_flags); - for(int i = 0; i < num_flags; i++) - { - if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0) - || ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 && (caps & CL_DEVICE_SVM_ATOMICS) == 0)) + // iteration over flag combos + int num_flags = sizeof(flag_set) / sizeof(cl_mem_flags); + for (int i = 0; i < num_flags; i++) { - log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]); - continue; - } - - log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]); - cl_char *pBufData1 = (cl_char*) clSVMAlloc(context, flag_set[i], size, 0); - if(pBufData1 == NULL) - { - log_error("SVMalloc returned NULL"); - return -1; - } - - { - clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size, pBufData1, &err); - test_error(err,"clCreateBuffer failed"); - - cl_char *pBufData2 = NULL; - cl_uint flags = CL_MAP_READ | CL_MAP_READ; - if(flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE; - if(flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ; - - if(!(flag_set[i] & CL_MEM_HOST_NO_ACCESS)) - { - pBufData2 = (cl_char*) clEnqueueMapBuffer(queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL,NULL, &err); - test_error(err, "clEnqueueMapBuffer failed"); - - if(pBufData2 != pBufData1 || NULL == pBufData1) + if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 + && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0) + || ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 + && (caps & CL_DEVICE_SVM_ATOMICS) == 0)) { - log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc"); - return -1; + log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]); + continue; } - err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0, NULL, NULL); - test_error(err, "clEnqueueUnmapMemObject failed"); - err = clFinish(queues[0]); - test_error(err, "clFinish failed"); - } + + log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]); + cl_char *pBufData1 = + (cl_char *)clSVMAlloc(context, flag_set[i], size, 0); + if (pBufData1 == NULL) + { + log_error("SVMalloc returned NULL"); + return -1; + } + + { + clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, + size, pBufData1, &err); + test_error(err, "clCreateBuffer failed"); + + cl_char *pBufData2 = NULL; + cl_uint flags = CL_MAP_READ | CL_MAP_READ; + if (flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE; + if (flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ; + + if (!(flag_set[i] & CL_MEM_HOST_NO_ACCESS)) + { + pBufData2 = (cl_char *)clEnqueueMapBuffer( + queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL, NULL, + &err); + test_error(err, "clEnqueueMapBuffer failed"); + + if (pBufData2 != pBufData1 || NULL == pBufData1) + { + log_error("SVM pointer returned by clEnqueueMapBuffer " + "doesn't match pointer returned by clSVMalloc"); + return -1; + } + err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0, + NULL, NULL); + test_error(err, "clEnqueueUnmapMemObject failed"); + err = clFinish(queues[0]); + test_error(err, "clFinish failed"); + } + } + + clSVMFree(context, pBufData1); } - clSVMFree(context, pBufData1); - } - - return 0; + return 0; } diff --git a/test_conformance/SVM/test_allocate_shared_buffer_negative.cpp b/test_conformance/SVM/test_allocate_shared_buffer_negative.cpp index 852242dd..5d8513bb 100644 --- a/test_conformance/SVM/test_allocate_shared_buffer_negative.cpp +++ b/test_conformance/SVM/test_allocate_shared_buffer_negative.cpp @@ -41,12 +41,9 @@ const char* svm_flag_set_names[] = { }; -int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID, - cl_context context2, - cl_command_queue queue, - int num_elements) +REGISTER_TEST(svm_allocate_shared_buffer_negative) { - clContextWrapper context = NULL; + clContextWrapper contextWrapper = NULL; clProgramWrapper program = NULL; cl_uint num_devices = 0; cl_int err = CL_SUCCESS; @@ -58,8 +55,10 @@ int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID, test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES"); // under construction... - err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], - &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + err = + create_cl_objects(deviceID, NULL, &contextWrapper, &program, &queues[0], + &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + context = contextWrapper; if (err) return err; size_t size = 1024; diff --git a/test_conformance/SVM/test_byte_granularity.cpp b/test_conformance/SVM/test_byte_granularity.cpp index 6dbb3649..d9d12221 100644 --- a/test_conformance/SVM/test_byte_granularity.cpp +++ b/test_conformance/SVM/test_byte_granularity.cpp @@ -48,100 +48,115 @@ const char *byte_manipulation_kernels[] = { }; - -int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_byte_granularity) { - clContextWrapper context; - clProgramWrapper program; - clKernelWrapper k1,k2; - clCommandQueueWrapper queues[MAXQ]; + clContextWrapper contextWrapper; + clProgramWrapper program; + clKernelWrapper k1, k2; + clCommandQueueWrapper queues[MAXQ]; - cl_uint num_devices = 0; - cl_int err = CL_SUCCESS; + cl_uint num_devices = 0; + cl_int err = CL_SUCCESS; - err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER); - if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. - if(err < 0) return -1; // fail test. + err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], + &contextWrapper, &program, &queues[0], &num_devices, + CL_DEVICE_SVM_FINE_GRAIN_BUFFER); + context = contextWrapper; + if (err == 1) + return 0; // no devices capable of requested SVM level, so don't execute + // but count test as passing. + if (err < 0) return -1; // fail test. - cl_uint num_devices_plus_host = num_devices + 1; + cl_uint num_devices_plus_host = num_devices + 1; - k1 = clCreateKernel(program, "write_owned_locations", &err); - test_error(err, "clCreateKernel failed"); - k2 = clCreateKernel(program, "sum_neighbor_locations", &err); - test_error(err, "clCreateKernel failed"); + k1 = clCreateKernel(program, "write_owned_locations", &err); + test_error(err, "clCreateKernel failed"); + k2 = clCreateKernel(program, "sum_neighbor_locations", &err); + test_error(err, "clCreateKernel failed"); - cl_char *pA = (cl_char*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_char) * num_elements, 0); + cl_char *pA = (cl_char *)clSVMAlloc( + context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, + sizeof(cl_char) * num_elements, 0); - cl_uint **error_counts = (cl_uint**) malloc(sizeof(void*) * num_devices); + cl_uint **error_counts = (cl_uint **)malloc(sizeof(void *) * num_devices); - for(cl_uint i=0; i < num_devices; i++) { - error_counts[i] = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint), 0); - *error_counts[i] = 0; - } - for(int i=0; i < num_elements; i++) pA[i] = -1; + for (cl_uint i = 0; i < num_devices; i++) + { + error_counts[i] = (cl_uint *)clSVMAlloc( + context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, + sizeof(cl_uint), 0); + *error_counts[i] = 0; + } + for (int i = 0; i < num_elements; i++) pA[i] = -1; - err |= clSetKernelArgSVMPointer(k1, 0, pA); - err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host); - test_error(err, "clSetKernelArg failed"); - - // get all the devices going simultaneously - size_t element_num = num_elements; - for(cl_uint d=0; d < num_devices; d++) // device ids starting at 1. - { - err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d); - test_error(err, "clSetKernelArg failed"); - err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, 0, NULL, NULL); - test_error(err,"clEnqueueNDRangeKernel failed"); - } - - for(cl_uint d=0; d < num_devices; d++) clFlush(queues[d]); - - cl_uint host_id = num_devices; // host code will take the id above the devices. - for(int i = (int)num_devices; i < num_elements; i+= num_devices_plus_host) pA[i] = host_id; - - for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]); - - // now check that each device can see the byte writes made by the other devices. - - err |= clSetKernelArgSVMPointer(k2, 0, pA); - err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host); - test_error(err, "clSetKernelArg failed"); - - // adjusted so k2 doesn't read past end of buffer - size_t adjusted_num_elements = num_elements - num_devices; - for(cl_uint id = 0; id < num_devices; id++) - { - err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]); + err |= clSetKernelArgSVMPointer(k1, 0, pA); + err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host); test_error(err, "clSetKernelArg failed"); - err = clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, &adjusted_num_elements, NULL, 0, NULL, NULL); - test_error(err,"clEnqueueNDRangeKernel failed"); - } + // get all the devices going simultaneously + size_t element_num = num_elements; + for (cl_uint d = 0; d < num_devices; d++) // device ids starting at 1. + { + err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d); + test_error(err, "clSetKernelArg failed"); + err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, + 0, NULL, NULL); + test_error(err, "clEnqueueNDRangeKernel failed"); + } - for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]); + for (cl_uint d = 0; d < num_devices; d++) clFlush(queues[d]); - bool failed = false; + cl_uint host_id = + num_devices; // host code will take the id above the devices. + for (int i = (int)num_devices; i < num_elements; i += num_devices_plus_host) + pA[i] = host_id; - // see if any of the devices found errors - for(cl_uint i=0; i < num_devices; i++) { - if(*error_counts[i] > 0) - failed = true; - } - cl_uint expected = (num_devices_plus_host * (num_devices_plus_host - 1))/2; - // check that host can see the byte writes made by the devices. - for(cl_uint i = 0; i < num_elements - num_devices_plus_host; i++) - { - int sum = 0; - for(cl_uint j=0; j < num_devices_plus_host; j++) sum += pA[i+j]; - if(sum != expected) - failed = true; - } + for (cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]); - clSVMFree(context, pA); - for(cl_uint i=0; i < num_devices; i++) clSVMFree(context, error_counts[i]); + // now check that each device can see the byte writes made by the other + // devices. - if(failed) - return -1; - return 0; + err |= clSetKernelArgSVMPointer(k2, 0, pA); + err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host); + test_error(err, "clSetKernelArg failed"); + + // adjusted so k2 doesn't read past end of buffer + size_t adjusted_num_elements = num_elements - num_devices; + for (cl_uint id = 0; id < num_devices; id++) + { + err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]); + test_error(err, "clSetKernelArg failed"); + + err = + clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, + &adjusted_num_elements, NULL, 0, NULL, NULL); + test_error(err, "clEnqueueNDRangeKernel failed"); + } + + for (cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]); + + bool failed = false; + + // see if any of the devices found errors + for (cl_uint i = 0; i < num_devices; i++) + { + if (*error_counts[i] > 0) failed = true; + } + cl_uint expected = + (num_devices_plus_host * (num_devices_plus_host - 1)) / 2; + // check that host can see the byte writes made by the devices. + for (cl_uint i = 0; i < num_elements - num_devices_plus_host; i++) + { + int sum = 0; + for (cl_uint j = 0; j < num_devices_plus_host; j++) sum += pA[i + j]; + if (sum != expected) failed = true; + } + + clSVMFree(context, pA); + for (cl_uint i = 0; i < num_devices; i++) + clSVMFree(context, error_counts[i]); + + if (failed) return -1; + return 0; } diff --git a/test_conformance/SVM/test_cross_buffer_pointers.cpp b/test_conformance/SVM/test_cross_buffer_pointers.cpp index 2baa7ad7..cd2b168c 100644 --- a/test_conformance/SVM/test_cross_buffer_pointers.cpp +++ b/test_conformance/SVM/test_cross_buffer_pointers.cpp @@ -128,93 +128,128 @@ cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, // on another device or the host. // The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly. // This basic test is performed for all combinations of devices and the host. -int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_cross_buffer_pointers_coarse_grain) { - clContextWrapper context = NULL; - clProgramWrapper program = NULL; - cl_uint num_devices = 0; - cl_int error = CL_SUCCESS; - clCommandQueueWrapper queues[MAXQ]; + clContextWrapper contextWrapper = NULL; + clProgramWrapper program = NULL; + cl_uint num_devices = 0; + cl_int error = CL_SUCCESS; + clCommandQueueWrapper queues[MAXQ]; - error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); - if(error) return -1; + error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], + &contextWrapper, &program, &queues[0], + &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + context = contextWrapper; + if (error) return -1; - size_t numLists = num_elements; - cl_int ListLength = 32; + size_t numLists = num_elements; + cl_int ListLength = 32; - clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error); - test_error(error, "clCreateKernel failed"); + clKernelWrapper kernel_create_lists = + clCreateKernel(program, "create_linked_lists", &error); + test_error(error, "clCreateKernel failed"); - clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error); - test_error(error, "clCreateKernel failed"); + clKernelWrapper kernel_verify_lists = + clCreateKernel(program, "verify_linked_lists", &error); + test_error(error, "clCreateKernel failed"); - // this buffer holds some of the linked list nodes. - Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0); + // this buffer holds some of the linked list nodes. + Node *pNodes = (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE, + sizeof(Node) * ListLength * numLists, 0); - // this buffer holds some of the linked list nodes. - Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0); + // this buffer holds some of the linked list nodes. + Node *pNodes2 = (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE, + sizeof(Node) * ListLength * numLists, 0); - { - clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error); - test_error(error, "clCreateBuffer failed."); - - clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error); - test_error(error, "clCreateBuffer failed."); - - // this buffer holds the index into the nodes buffer that is used for node allocation - clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(size_t), NULL, &error); - test_error(error, "clCreateBuffer failed."); - - // this buffer holds the count of correct nodes which is computed by the verify kernel. - clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error); - test_error(error, "clCreateBuffer failed."); - - error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes); - //error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) pNodes); - error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2); - error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &allocator); - error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), (void *) &ListLength); - - error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes); - error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2); - error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &num_correct); - error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), (void *) &ListLength); - test_error(error, "clSetKernelArg failed"); - - // Create linked list on one device and verify on another device (or the host). - // Do this for all possible combinations of devices and host within the platform. - for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on { - for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on - { - if(ci == num_devices) // last device index represents the host, note the num_device+1 above. - { - error = create_linked_lists_on_host(queues[0], nodes, nodes2, ListLength, numLists); - if(error) return -1; - } - else - { - error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists); - if(error) return -1; - } + clMemWrapper nodes = clCreateBuffer( + context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists, + pNodes, &error); + test_error(error, "clCreateBuffer failed."); - if(vi == num_devices) - { - error = verify_linked_lists_on_host(vi, queues[0], nodes, nodes2, ListLength, numLists); - if(error) return -1; - } - else - { - error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists); - if(error) return -1; - } - } // inner loop, vi - } // outer loop, ci - } + clMemWrapper nodes2 = clCreateBuffer( + context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists, + pNodes2, &error); + test_error(error, "clCreateBuffer failed."); - clSVMFree(context, pNodes2); - clSVMFree(context, pNodes); + // this buffer holds the index into the nodes buffer that is used for + // node allocation + clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(size_t), NULL, &error); + test_error(error, "clCreateBuffer failed."); - return 0; + // this buffer holds the count of correct nodes which is computed by the + // verify kernel. + clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_int), NULL, &error); + test_error(error, "clCreateBuffer failed."); + + error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void *), + (void *)&nodes); + // error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) + // pNodes); + error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void *), + (void *)&nodes2); + error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void *), + (void *)&allocator); + error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), + (void *)&ListLength); + + error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void *), + (void *)&nodes); + error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void *), + (void *)&nodes2); + error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void *), + (void *)&num_correct); + error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), + (void *)&ListLength); + test_error(error, "clSetKernelArg failed"); + + // Create linked list on one device and verify on another device (or the + // host). Do this for all possible combinations of devices and host + // within the platform. + for (int ci = 0; ci < (int)num_devices + 1; + ci++) // ci is CreationIndex, index of device/q to create linked + // list on + { + for (int vi = 0; vi < (int)num_devices + 1; + vi++) // vi is VerificationIndex, index of device/q to verify + // linked list on + { + if (ci == num_devices) // last device index represents the host, + // note the num_device+1 above. + { + error = create_linked_lists_on_host( + queues[0], nodes, nodes2, ListLength, numLists); + if (error) return -1; + } + else + { + error = create_linked_lists_on_device( + ci, queues[ci], allocator, kernel_create_lists, + numLists); + if (error) return -1; + } + + if (vi == num_devices) + { + error = verify_linked_lists_on_host( + vi, queues[0], nodes, nodes2, ListLength, numLists); + if (error) return -1; + } + else + { + error = verify_linked_lists_on_device( + vi, queues[vi], num_correct, kernel_verify_lists, + ListLength, numLists); + if (error) return -1; + } + } // inner loop, vi + } // outer loop, ci + } + + clSVMFree(context, pNodes2); + clSVMFree(context, pNodes); + + return 0; } diff --git a/test_conformance/SVM/test_enqueue_api.cpp b/test_conformance/SVM/test_enqueue_api.cpp index 256df43f..27b483fa 100644 --- a/test_conformance/SVM/test_enqueue_api.cpp +++ b/test_conformance/SVM/test_enqueue_api.cpp @@ -70,244 +70,282 @@ void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_point data->status.store(1, std::memory_order_release); } -int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_enqueue_api) { - clContextWrapper context = NULL; - clCommandQueueWrapper queues[MAXQ]; - cl_uint num_devices = 0; - const size_t elementNum = 1024; - const size_t numSVMBuffers = 32; - cl_int error = CL_SUCCESS; - RandomSeed seed(0); + clContextWrapper contextWrapper = NULL; + clCommandQueueWrapper queues[MAXQ]; + cl_uint num_devices = 0; + const size_t elementNum = 1024; + const size_t numSVMBuffers = 32; + cl_int error = CL_SUCCESS; + RandomSeed seed(0); - error = create_cl_objects(deviceID, NULL, &context, NULL, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); - if(error) return TEST_FAIL; + error = create_cl_objects(deviceID, NULL, &contextWrapper, NULL, &queues[0], + &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + context = contextWrapper; + if (error) return TEST_FAIL; - queue = queues[0]; + queue = queues[0]; - //all possible sizes of vectors and scalars - size_t typeSizes[] = { - sizeof(cl_uchar), - sizeof(cl_uchar2), - sizeof(cl_uchar3), - sizeof(cl_uchar4), - sizeof(cl_uchar8), - sizeof(cl_uchar16), - sizeof(cl_ushort), - sizeof(cl_ushort2), - sizeof(cl_ushort3), - sizeof(cl_ushort4), - sizeof(cl_ushort8), - sizeof(cl_ushort16), - sizeof(cl_uint), - sizeof(cl_uint2), - sizeof(cl_uint3), - sizeof(cl_uint4), - sizeof(cl_uint8), - sizeof(cl_uint16), - sizeof(cl_ulong), - sizeof(cl_ulong2), - sizeof(cl_ulong3), - sizeof(cl_ulong4), - sizeof(cl_ulong8), - sizeof(cl_ulong16), - }; + // all possible sizes of vectors and scalars + size_t typeSizes[] = { + sizeof(cl_uchar), sizeof(cl_uchar2), sizeof(cl_uchar3), + sizeof(cl_uchar4), sizeof(cl_uchar8), sizeof(cl_uchar16), + sizeof(cl_ushort), sizeof(cl_ushort2), sizeof(cl_ushort3), + sizeof(cl_ushort4), sizeof(cl_ushort8), sizeof(cl_ushort16), + sizeof(cl_uint), sizeof(cl_uint2), sizeof(cl_uint3), + sizeof(cl_uint4), sizeof(cl_uint8), sizeof(cl_uint16), + sizeof(cl_ulong), sizeof(cl_ulong2), sizeof(cl_ulong3), + sizeof(cl_ulong4), sizeof(cl_ulong8), sizeof(cl_ulong16), + }; - enum allocationTypes { - host, - svm - }; - - struct TestType { - allocationTypes srcAlloc; - allocationTypes dstAlloc; - TestType(allocationTypes type1, allocationTypes type2): srcAlloc(type1), dstAlloc(type2){} - }; - - std::vector testTypes; - - testTypes.push_back(TestType(host, host)); - testTypes.push_back(TestType(host, svm)); - testTypes.push_back(TestType(svm, host)); - testTypes.push_back(TestType(svm, svm)); - - for (const auto test_case : testTypes) - { - log_info("clEnqueueSVMMemcpy case: src_alloc = %s, dst_alloc = %s\n", test_case.srcAlloc == svm ? "svm" : "host", test_case.dstAlloc == svm ? "svm" : "host"); - for (size_t i = 0; i < ARRAY_SIZE(typeSizes); ++i) + enum allocationTypes { - //generate initial data - std::vector fillData0(typeSizes[i]), fillData1(typeSizes[i]); - generate_data(fillData0, typeSizes[i], seed); - generate_data(fillData1, typeSizes[i], seed); - size_t data_size = elementNum * typeSizes[i]; - std::vector srcHostData(data_size, 0); - std::vector dstHostData(data_size, 0); - generate_data(srcHostData, srcHostData.size(), seed); - generate_data(dstHostData, dstHostData.size(), seed); + host, + svm + }; - cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, data_size, 0); - cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, data_size, 0); + struct TestType + { + allocationTypes srcAlloc; + allocationTypes dstAlloc; + TestType(allocationTypes type1, allocationTypes type2) + : srcAlloc(type1), dstAlloc(type2) + {} + }; - clEventWrapper userEvent = clCreateUserEvent(context, &error); - test_error(error, "clCreateUserEvent failed"); - clEventWrapper eventMemFillList[2]; + std::vector testTypes; - error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], typeSizes[i], data_size, 1, &userEvent, &eventMemFillList[0]); - test_error(error, "clEnqueueSVMMemFill failed"); - error = clEnqueueSVMMemFill(queue, dstBuffer, &fillData1[0], typeSizes[i], data_size, 1, &userEvent, &eventMemFillList[1]); - test_error(error, "clEnqueueSVMMemFill failed"); + testTypes.push_back(TestType(host, host)); + testTypes.push_back(TestType(host, svm)); + testTypes.push_back(TestType(svm, host)); + testTypes.push_back(TestType(svm, svm)); - error = clSetUserEventStatus(userEvent, CL_COMPLETE); - test_error(error, "clSetUserEventStatus failed"); + for (const auto test_case : testTypes) + { + log_info("clEnqueueSVMMemcpy case: src_alloc = %s, dst_alloc = %s\n", + test_case.srcAlloc == svm ? "svm" : "host", + test_case.dstAlloc == svm ? "svm" : "host"); + for (size_t i = 0; i < ARRAY_SIZE(typeSizes); ++i) + { + // generate initial data + std::vector fillData0(typeSizes[i]), + fillData1(typeSizes[i]); + generate_data(fillData0, typeSizes[i], seed); + generate_data(fillData1, typeSizes[i], seed); + size_t data_size = elementNum * typeSizes[i]; + std::vector srcHostData(data_size, 0); + std::vector dstHostData(data_size, 0); + generate_data(srcHostData, srcHostData.size(), seed); + generate_data(dstHostData, dstHostData.size(), seed); - cl_uchar * src_ptr; - cl_uchar * dst_ptr; - if (test_case.srcAlloc == host) { - src_ptr = srcHostData.data(); - } else if (test_case.srcAlloc == svm) { - src_ptr = srcBuffer; - } - if (test_case.dstAlloc == host) { - dst_ptr = dstHostData.data(); - } else if (test_case.dstAlloc == svm) { - dst_ptr = dstBuffer; - } - clEventWrapper eventMemcpy; - error = clEnqueueSVMMemcpy(queue, CL_FALSE, dst_ptr, src_ptr, data_size, 2, &eventMemFillList[0], &eventMemcpy); - test_error(error, "clEnqueueSVMMemcpy failed"); + cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc( + context, CL_MEM_READ_WRITE, data_size, 0); + cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc( + context, CL_MEM_READ_WRITE, data_size, 0); - //coarse grain only supported. Synchronization required using map - clEventWrapper eventMap[2]; + clEventWrapper userEvent = clCreateUserEvent(context, &error); + test_error(error, "clCreateUserEvent failed"); + clEventWrapper eventMemFillList[2]; - error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, srcBuffer, data_size, 1, &eventMemcpy, &eventMap[0]); - test_error(error, "clEnqueueSVMMap srcBuffer failed"); + error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], + typeSizes[i], data_size, 1, &userEvent, + &eventMemFillList[0]); + test_error(error, "clEnqueueSVMMemFill failed"); + error = clEnqueueSVMMemFill(queue, dstBuffer, &fillData1[0], + typeSizes[i], data_size, 1, &userEvent, + &eventMemFillList[1]); + test_error(error, "clEnqueueSVMMemFill failed"); - error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, dstBuffer, data_size, 1, &eventMemcpy, &eventMap[1]); - test_error(error, "clEnqueueSVMMap dstBuffer failed"); + error = clSetUserEventStatus(userEvent, CL_COMPLETE); + test_error(error, "clSetUserEventStatus failed"); - error = clWaitForEvents(2, &eventMap[0]); - test_error(error, "clWaitForEvents failed"); + cl_uchar *src_ptr; + cl_uchar *dst_ptr; + if (test_case.srcAlloc == host) + { + src_ptr = srcHostData.data(); + } + else if (test_case.srcAlloc == svm) + { + src_ptr = srcBuffer; + } + if (test_case.dstAlloc == host) + { + dst_ptr = dstHostData.data(); + } + else if (test_case.dstAlloc == svm) + { + dst_ptr = dstBuffer; + } + clEventWrapper eventMemcpy; + error = + clEnqueueSVMMemcpy(queue, CL_FALSE, dst_ptr, src_ptr, data_size, + 2, &eventMemFillList[0], &eventMemcpy); + test_error(error, "clEnqueueSVMMemcpy failed"); - //data verification - for (size_t j = 0; j < data_size; ++j) - { - if (dst_ptr[j] != src_ptr[j]) { - log_error("Invalid data at index %zu, dst_ptr %d, src_ptr %d\n", j, - dst_ptr[j], src_ptr[j]); + // coarse grain only supported. Synchronization required using map + clEventWrapper eventMap[2]; + + error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, srcBuffer, + data_size, 1, &eventMemcpy, &eventMap[0]); + test_error(error, "clEnqueueSVMMap srcBuffer failed"); + + error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, dstBuffer, + data_size, 1, &eventMemcpy, &eventMap[1]); + test_error(error, "clEnqueueSVMMap dstBuffer failed"); + + error = clWaitForEvents(2, &eventMap[0]); + test_error(error, "clWaitForEvents failed"); + + // data verification + for (size_t j = 0; j < data_size; ++j) + { + if (dst_ptr[j] != src_ptr[j]) + { + log_error( + "Invalid data at index %zu, dst_ptr %d, src_ptr %d\n", + j, dst_ptr[j], src_ptr[j]); + return TEST_FAIL; + } + } + clEventWrapper eventUnmap[2]; + error = + clEnqueueSVMUnmap(queue, srcBuffer, 0, nullptr, &eventUnmap[0]); + test_error(error, "clEnqueueSVMUnmap srcBuffer failed"); + + error = + clEnqueueSVMUnmap(queue, dstBuffer, 0, nullptr, &eventUnmap[1]); + test_error(error, "clEnqueueSVMUnmap dstBuffer failed"); + + error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData1[0], + typeSizes[i], data_size / 2, 0, 0, 0); + test_error(error, "clEnqueueSVMMemFill failed"); + + error = clEnqueueSVMMemFill(queue, dstBuffer + data_size / 2, + &fillData1[0], typeSizes[i], + data_size / 2, 0, 0, 0); + test_error(error, "clEnqueueSVMMemFill failed"); + + error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, + data_size / 2, 0, 0, 0); + test_error(error, "clEnqueueSVMMemcpy failed"); + + error = clEnqueueSVMMemcpy( + queue, CL_TRUE, dstBuffer + data_size / 2, + srcBuffer + data_size / 2, data_size / 2, 0, 0, 0); + test_error(error, "clEnqueueSVMMemcpy failed"); + + void *ptrs[] = { (void *)srcBuffer, (void *)dstBuffer }; + + clEventWrapper eventFree; + error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree); + test_error(error, "clEnqueueSVMFree failed"); + + error = clWaitForEvents(1, &eventFree); + test_error(error, "clWaitForEvents failed"); + + // event info verification for new SVM commands + cl_command_type commandType; + for (auto &check_event : eventMemFillList) + { + error = + clGetEventInfo(check_event, CL_EVENT_COMMAND_TYPE, + sizeof(cl_command_type), &commandType, NULL); + test_error(error, "clGetEventInfo failed"); + if (commandType != CL_COMMAND_SVM_MEMFILL) + { + log_error("Invalid command type returned for " + "clEnqueueSVMMemFill\n"); + return TEST_FAIL; + } + } + + error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, + sizeof(cl_command_type), &commandType, NULL); + test_error(error, "clGetEventInfo failed"); + if (commandType != CL_COMMAND_SVM_MEMCPY) + { + log_error( + "Invalid command type returned for clEnqueueSVMMemcpy\n"); + return TEST_FAIL; + } + for (size_t map_id = 0; map_id < ARRAY_SIZE(eventMap); map_id++) + { + error = + clGetEventInfo(eventMap[map_id], CL_EVENT_COMMAND_TYPE, + sizeof(cl_command_type), &commandType, NULL); + test_error(error, "clGetEventInfo failed"); + if (commandType != CL_COMMAND_SVM_MAP) + { + log_error( + "Invalid command type returned for clEnqueueSVMMap\n"); + return TEST_FAIL; + } + + error = + clGetEventInfo(eventUnmap[map_id], CL_EVENT_COMMAND_TYPE, + sizeof(cl_command_type), &commandType, NULL); + test_error(error, "clGetEventInfo failed"); + if (commandType != CL_COMMAND_SVM_UNMAP) + { + log_error("Invalid command type returned for " + "clEnqueueSVMUnmap\n"); + return TEST_FAIL; + } + } + error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, + sizeof(cl_command_type), &commandType, NULL); + test_error(error, "clGetEventInfo failed"); + if (commandType != CL_COMMAND_SVM_FREE) + { + log_error( + "Invalid command type returned for clEnqueueSVMFree\n"); + return TEST_FAIL; + } + } + } + std::vector buffers(numSVMBuffers, 0); + for (size_t i = 0; i < numSVMBuffers; ++i) + buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0); + + // verify if callback is triggered correctly + CallbackData data; + data.status = 0; + + error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], + callback_svm_free, &data, 0, 0, 0); + test_error(error, "clEnqueueSVMFree failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + // wait for the callback + while (data.status.load(std::memory_order_acquire) == 0) + { + usleep(1); + } + + // check if number of SVM pointers returned in the callback matches with + // expected + if (data.num_svm_pointers != buffers.size()) + { + log_error("Invalid number of SVM pointers returned in the callback, " + "expected: %zu, got: %d\n", + buffers.size(), data.num_svm_pointers); + return TEST_FAIL; + } + + // check if pointers returned in callback are correct + for (size_t i = 0; i < buffers.size(); ++i) + { + if (data.svm_pointers[i] != buffers[i]) + { + log_error( + "Invalid SVM pointer returned in the callback, idx: %zu\n", i); return TEST_FAIL; } - } - clEventWrapper eventUnmap[2]; - error = clEnqueueSVMUnmap(queue, srcBuffer, 0, nullptr, &eventUnmap[0]); - test_error(error, "clEnqueueSVMUnmap srcBuffer failed"); - - error = clEnqueueSVMUnmap(queue, dstBuffer, 0, nullptr, &eventUnmap[1]); - test_error(error, "clEnqueueSVMUnmap dstBuffer failed"); - - error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData1[0], typeSizes[i], data_size / 2, 0, 0, 0); - test_error(error, "clEnqueueSVMMemFill failed"); - - error = clEnqueueSVMMemFill(queue, dstBuffer + data_size / 2, &fillData1[0], typeSizes[i], data_size / 2, 0, 0, 0); - test_error(error, "clEnqueueSVMMemFill failed"); - - error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, data_size / 2, 0, 0, 0); - test_error(error, "clEnqueueSVMMemcpy failed"); - - error = clEnqueueSVMMemcpy(queue, CL_TRUE, dstBuffer + data_size / 2, srcBuffer + data_size / 2, data_size / 2, 0, 0, 0); - test_error(error, "clEnqueueSVMMemcpy failed"); - - void *ptrs[] = { (void *)srcBuffer, (void *)dstBuffer }; - - clEventWrapper eventFree; - error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree); - test_error(error, "clEnqueueSVMFree failed"); - - error = clWaitForEvents(1, &eventFree); - test_error(error, "clWaitForEvents failed"); - - //event info verification for new SVM commands - cl_command_type commandType; - for (auto &check_event : eventMemFillList) { - error = clGetEventInfo(check_event, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); - test_error(error, "clGetEventInfo failed"); - if (commandType != CL_COMMAND_SVM_MEMFILL) - { - log_error("Invalid command type returned for clEnqueueSVMMemFill\n"); - return TEST_FAIL; - } - } - - error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); - test_error(error, "clGetEventInfo failed"); - if (commandType != CL_COMMAND_SVM_MEMCPY) - { - log_error("Invalid command type returned for clEnqueueSVMMemcpy\n"); - return TEST_FAIL; - } - for (size_t map_id = 0; map_id < ARRAY_SIZE(eventMap); map_id++) { - error = clGetEventInfo(eventMap[map_id], CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); - test_error(error, "clGetEventInfo failed"); - if (commandType != CL_COMMAND_SVM_MAP) - { - log_error("Invalid command type returned for clEnqueueSVMMap\n"); - return TEST_FAIL; - } - - error = clGetEventInfo(eventUnmap[map_id], CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); - test_error(error, "clGetEventInfo failed"); - if (commandType != CL_COMMAND_SVM_UNMAP) - { - log_error("Invalid command type returned for clEnqueueSVMUnmap\n"); - return TEST_FAIL; - } - } - error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); - test_error(error, "clGetEventInfo failed"); - if (commandType != CL_COMMAND_SVM_FREE) - { - log_error("Invalid command type returned for clEnqueueSVMFree\n"); - return TEST_FAIL; - } } - } - std::vector buffers(numSVMBuffers, 0); - for(size_t i = 0; i < numSVMBuffers; ++i) buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0); - //verify if callback is triggered correctly - CallbackData data; - data.status = 0; - - error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], callback_svm_free, &data, 0, 0, 0); - test_error(error, "clEnqueueSVMFree failed"); - - error = clFinish(queue); - test_error(error, "clFinish failed"); - - //wait for the callback - while(data.status.load(std::memory_order_acquire) == 0) { - usleep(1); - } - - //check if number of SVM pointers returned in the callback matches with expected - if (data.num_svm_pointers != buffers.size()) - { - log_error("Invalid number of SVM pointers returned in the callback, " - "expected: %zu, got: %d\n", - buffers.size(), data.num_svm_pointers); - return TEST_FAIL; - } - - //check if pointers returned in callback are correct - for (size_t i = 0; i < buffers.size(); ++i) - { - if (data.svm_pointers[i] != buffers[i]) - { - log_error("Invalid SVM pointer returned in the callback, idx: %zu\n", - i); - return TEST_FAIL; - } - } - - return 0; + return 0; } diff --git a/test_conformance/SVM/test_fine_grain_memory_consistency.cpp b/test_conformance/SVM/test_fine_grain_memory_consistency.cpp index 50c57061..1c39e33f 100644 --- a/test_conformance/SVM/test_fine_grain_memory_consistency.cpp +++ b/test_conformance/SVM/test_fine_grain_memory_consistency.cpp @@ -139,44 +139,55 @@ int launch_kernels_and_verify(clContextWrapper &context, clCommandQueueWrapper* // Each bin in the hash table is a linked list. Each bin is protected against simultaneous // update using a lock free technique. The correctness of the list is verfied on the host. // This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering. -int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_fine_grain_memory_consistency) { - clContextWrapper context; - clProgramWrapper program; - clKernelWrapper kernel; - clCommandQueueWrapper queues[MAXQ]; + clContextWrapper contextWrapper; + clProgramWrapper program; + clKernelWrapper kernel; + clCommandQueueWrapper queues[MAXQ]; - cl_uint num_devices = 0; - cl_int err = CL_SUCCESS; - std::vector required_extensions; - required_extensions.push_back("cl_khr_int64_base_atomics"); - required_extensions.push_back("cl_khr_int64_extended_atomics"); + cl_uint num_devices = 0; + cl_int err = CL_SUCCESS; + std::vector required_extensions; + required_extensions.push_back("cl_khr_int64_base_atomics"); + required_extensions.push_back("cl_khr_int64_extended_atomics"); - // Make pragmas visible for 64-bit addresses - hash_table_kernel[4] = sizeof(void *) == 8 ? '1' : '0'; + // Make pragmas visible for 64-bit addresses + hash_table_kernel[4] = sizeof(void *) == 8 ? '1' : '0'; - char *source[] = { hash_table_kernel }; + char *source[] = { hash_table_kernel }; - err = create_cl_objects(deviceID, (const char**)source, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS, required_extensions); - if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. - if(err < 0) return -1; // fail test. + err = create_cl_objects( + deviceID, (const char **)source, &contextWrapper, &program, &queues[0], + &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS, + required_extensions); + context = contextWrapper; + if (err == 1) + return 0; // no devices capable of requested SVM level, so don't execute + // but count test as passing. + if (err < 0) return -1; // fail test. - kernel = clCreateKernel(program, "build_hash_table", &err); - test_error(err, "clCreateKernel failed"); - size_t num_pixels = num_elements; + kernel = clCreateKernel(program, "build_hash_table", &err); + test_error(err, "clCreateKernel failed"); + size_t num_pixels = num_elements; - int result; - cl_uint numBins = 1; // all work groups in all devices and the host code will hammer on this one lock. - result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels); - if(result == -1) return result; + int result; + cl_uint numBins = 1; // all work groups in all devices and the host code + // will hammer on this one lock. + result = launch_kernels_and_verify(contextWrapper, queues, kernel, + num_devices, numBins, num_pixels); + if (result == -1) return result; - numBins = 2; // 2 locks within in same cache line will get hit from different devices and host. - result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels); - if(result == -1) return result; + numBins = 2; // 2 locks within in same cache line will get hit from + // different devices and host. + result = launch_kernels_and_verify(contextWrapper, queues, kernel, + num_devices, numBins, num_pixels); + if (result == -1) return result; - numBins = 29; // locks span a few cache lines. - result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels); - if(result == -1) return result; + numBins = 29; // locks span a few cache lines. + result = launch_kernels_and_verify(contextWrapper, queues, kernel, + num_devices, numBins, num_pixels); + if (result == -1) return result; - return result; + return result; } diff --git a/test_conformance/SVM/test_fine_grain_sync_buffers.cpp b/test_conformance/SVM/test_fine_grain_sync_buffers.cpp index 0b94cbf2..f2572a8a 100644 --- a/test_conformance/SVM/test_fine_grain_sync_buffers.cpp +++ b/test_conformance/SVM/test_fine_grain_sync_buffers.cpp @@ -44,67 +44,88 @@ void spawnAnalysisTask(int location) // Concept: a device kernel is used to search an input image for regions that match a target pattern. // The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices). // The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets. -int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_fine_grain_sync_buffers) { - clContextWrapper context = NULL; - clProgramWrapper program = NULL; - cl_uint num_devices = 0; - cl_int err = CL_SUCCESS; - clCommandQueueWrapper queues[MAXQ]; + clContextWrapper contextWrapper = NULL; + clProgramWrapper program = NULL; + cl_uint num_devices = 0; + cl_int err = CL_SUCCESS; + clCommandQueueWrapper queues[MAXQ]; - err = create_cl_objects(deviceID, &find_targets_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS); - if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. - if(err < 0) return -1; // fail test. + err = create_cl_objects(deviceID, &find_targets_kernel[0], &contextWrapper, + &program, &queues[0], &num_devices, + CL_DEVICE_SVM_FINE_GRAIN_BUFFER + | CL_DEVICE_SVM_ATOMICS); + context = contextWrapper; + if (err == 1) + return 0; // no devices capable of requested SVM level, so don't execute + // but count test as passing. + if (err < 0) return -1; // fail test. - clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err); - test_error(err, "clCreateKernel failed"); + clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err); + test_error(err, "clCreateKernel failed"); - size_t num_pixels = num_elements; - //cl_uint num_pixels = 1024*1024*32; + size_t num_pixels = num_elements; + // cl_uint num_pixels = 1024*1024*32; - cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0); - cl_uint *pNumTargetsFound = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_uint), 0); - cl_int *pTargetLocations = (cl_int* ) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int) * MAX_TARGETS, 0); + cl_uint *pInputImage = (cl_uint *)clSVMAlloc( + context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, + sizeof(cl_uint) * num_pixels, 0); + cl_uint *pNumTargetsFound = (cl_uint *)clSVMAlloc( + context, + CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, + sizeof(cl_uint), 0); + cl_int *pTargetLocations = (cl_int *)clSVMAlloc( + context, + CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, + sizeof(cl_int) * MAX_TARGETS, 0); - cl_uint targetDescriptor = 777; - *pNumTargetsFound = 0; - cl_uint i; - for(i=0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1; - for(i=0; i < num_pixels; i++) pInputImage[i] = 0; - pInputImage[0] = targetDescriptor; - pInputImage[3] = targetDescriptor; - pInputImage[num_pixels - 1] = targetDescriptor; + cl_uint targetDescriptor = 777; + *pNumTargetsFound = 0; + cl_uint i; + for (i = 0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1; + for (i = 0; i < num_pixels; i++) pInputImage[i] = 0; + pInputImage[0] = targetDescriptor; + pInputImage[3] = targetDescriptor; + pInputImage[num_pixels - 1] = targetDescriptor; - err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage); - err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &targetDescriptor); - err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound); - err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations); - test_error(err, "clSetKernelArg failed"); + err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage); + err |= + clSetKernelArg(kernel, 1, sizeof(cl_uint), (void *)&targetDescriptor); + err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound); + err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations); + test_error(err, "clSetKernelArg failed"); - cl_event done; - err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, 0, NULL, &done); - test_error(err,"clEnqueueNDRangeKernel failed"); - clFlush(queues[0]); + cl_event done; + err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, + 0, NULL, &done); + test_error(err, "clEnqueueNDRangeKernel failed"); + clFlush(queues[0]); - i=0; - cl_int status; - // check for new targets, if found spawn a task to analyze target. - do { - err = clGetEventInfo(done,CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL); - test_error(err,"clGetEventInfo failed"); - if( AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1) // -1 indicates slot not used yet. + i = 0; + cl_int status; + // check for new targets, if found spawn a task to analyze target. + do { - spawnAnalysisTask(pTargetLocations[i]); - i++; - } - } while (status != CL_COMPLETE || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1); + err = clGetEventInfo(done, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(cl_int), &status, NULL); + test_error(err, "clGetEventInfo failed"); + if (AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) + != -1) // -1 indicates slot not used yet. + { + spawnAnalysisTask(pTargetLocations[i]); + i++; + } + } while (status != CL_COMPLETE + || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) + != -1); - clReleaseEvent(done); - clSVMFree(context, pInputImage); - clSVMFree(context, pNumTargetsFound); - clSVMFree(context, pTargetLocations); + clReleaseEvent(done); + clSVMFree(context, pInputImage); + clSVMFree(context, pNumTargetsFound); + clSVMFree(context, pTargetLocations); - if(i != 3) return -1; - return 0; + if (i != 3) return -1; + return 0; } diff --git a/test_conformance/SVM/test_migrate.cpp b/test_conformance/SVM/test_migrate.cpp index b767a70a..b697b48a 100644 --- a/test_conformance/SVM/test_migrate.cpp +++ b/test_conformance/SVM/test_migrate.cpp @@ -75,7 +75,7 @@ wait_and_release(const char* s, cl_event* evs, int n) return 0; } -int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_migrate) { std::vector amem(GLOBAL_SIZE); std::vector bmem(GLOBAL_SIZE); @@ -86,15 +86,17 @@ int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue RandomSeed seed(0); - clContextWrapper context = NULL; + clContextWrapper contextWrapper = NULL; clCommandQueueWrapper queues[MAXQ]; cl_uint num_devices = 0; clProgramWrapper program; cl_int error; - error = create_cl_objects(deviceID, &sources[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); - if (error) - return -1; + error = create_cl_objects(deviceID, &sources[0], &contextWrapper, &program, + &queues[0], &num_devices, + CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + context = contextWrapper; + if (error) return -1; if (num_devices > 1) { log_info(" Running on two devices.\n"); diff --git a/test_conformance/SVM/test_pointer_passing.cpp b/test_conformance/SVM/test_pointer_passing.cpp index 42baa76a..9493edfc 100644 --- a/test_conformance/SVM/test_pointer_passing.cpp +++ b/test_conformance/SVM/test_pointer_passing.cpp @@ -35,81 +35,107 @@ const char *SVMPointerPassing_test_kernel[] = { // The buffer is initialized to known values at each location. // The kernel checks that it finds the expected value at each location. // TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--. -int test_svm_pointer_passing(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_pointer_passing) { - clContextWrapper context = NULL; - clProgramWrapper program = NULL; - cl_uint num_devices = 0; - cl_int error = CL_SUCCESS; - clCommandQueueWrapper queues[MAXQ]; + clContextWrapper contextWrapper = NULL; + clProgramWrapper program = NULL; + cl_uint num_devices = 0; + cl_int error = CL_SUCCESS; + clCommandQueueWrapper queues[MAXQ]; - error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); - if(error) return -1; + error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], + &contextWrapper, &program, &queues[0], + &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + context = contextWrapper; + if (error) return -1; - clKernelWrapper kernel_verify_char = clCreateKernel(program, "verify_char", &error); - test_error(error,"clCreateKernel failed"); + clKernelWrapper kernel_verify_char = + clCreateKernel(program, "verify_char", &error); + test_error(error, "clCreateKernel failed"); - size_t bufSize = 256; - cl_uchar *pbuf_svm_alloc = (cl_uchar*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_uchar)*bufSize, 0); + size_t bufSize = 256; + cl_uchar *pbuf_svm_alloc = (cl_uchar *)clSVMAlloc( + context, CL_MEM_READ_WRITE, sizeof(cl_uchar) * bufSize, 0); - cl_int *pNumCorrect = NULL; - pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0); + cl_int *pNumCorrect = NULL; + pNumCorrect = + (cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0); - { - clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar)*bufSize, pbuf_svm_alloc, &error); - test_error(error, "clCreateBuffer failed."); - - clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error); - test_error(error, "clCreateBuffer failed."); - - error = clSetKernelArg(kernel_verify_char, 1, sizeof(void*), (void *) &num_correct); - test_error(error, "clSetKernelArg failed"); - - // put values into buf so that we can expect to see these values in the kernel when we pass a pointer to them. - cl_command_queue cmdq = queues[0]; - cl_uchar* pbuf_map_buffer = (cl_uchar*) clEnqueueMapBuffer(cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uchar)*bufSize, 0, NULL,NULL, &error); - test_error2(error, pbuf_map_buffer, "clEnqueueMapBuffer failed"); - for(int i = 0; i<(int)bufSize; i++) { - pbuf_map_buffer[i]= (cl_uchar)i; - } - error = clEnqueueUnmapMemObject(cmdq, buf, pbuf_map_buffer, 0,NULL,NULL); - test_error(error, "clEnqueueUnmapMemObject failed."); + clMemWrapper buf = + clCreateBuffer(context, CL_MEM_USE_HOST_PTR, + sizeof(cl_uchar) * bufSize, pbuf_svm_alloc, &error); + test_error(error, "clCreateBuffer failed."); - for (cl_uint ii = 0; ii