From d99b302f901867fc1e5b196f5f6071c1dd0a10c1 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 3 Dec 2024 14:51:23 -0800 Subject: [PATCH] switch SVM tests to the new test registration framework (#2168) Switches the SVM tests to the new test registration framework. The first commit is the best to review and contains the actual changes. The second commit purely has formatting changes. Note that several of these changes were a bit more than mechanical because many of the SVM tests create a new context vs. using the context provided by the harness and passed to each test function. The previous code named the context provided by the harness differently, and hence could use the name "context" in each test function, but with the new test registration framework this is no longer possible. Instead, I am creating the new context using the name "contextWrapper" and then assigning it to the "context" passed to the test function, which seems like the best way to avoid using the wrong context unintentionally. I am open to suggestions to do this differently. I have verified that the same calls are made before and after these changes, and specifically that there are no context leaks. --- test_conformance/SVM/common.h | 19 - test_conformance/SVM/main.cpp | 25 +- .../SVM/test_allocate_shared_buffer.cpp | 126 ++--- .../test_allocate_shared_buffer_negative.cpp | 13 +- .../SVM/test_byte_granularity.cpp | 173 ++++--- .../SVM/test_cross_buffer_pointers.cpp | 189 ++++--- test_conformance/SVM/test_enqueue_api.cpp | 478 ++++++++++-------- .../test_fine_grain_memory_consistency.cpp | 71 +-- .../SVM/test_fine_grain_sync_buffers.cpp | 121 +++-- test_conformance/SVM/test_migrate.cpp | 12 +- test_conformance/SVM/test_pointer_passing.cpp | 146 +++--- .../test_set_kernel_exec_info_svm_ptrs.cpp | 2 +- ...test_shared_address_space_coarse_grain.cpp | 10 +- .../test_shared_address_space_fine_grain.cpp | 145 +++--- ...hared_address_space_fine_grain_buffers.cpp | 142 +++--- .../SVM/test_shared_sub_buffers.cpp | 248 +++++---- 16 files changed, 1066 insertions(+), 854 deletions(-) diff --git a/test_conformance/SVM/common.h b/test_conformance/SVM/common.h index f6a937c7..d2b5cfd2 100644 --- a/test_conformance/SVM/common.h +++ b/test_conformance/SVM/common.h @@ -81,25 +81,6 @@ extern cl_int verify_linked_lists_on_device(int qi, cl_command_queue q, c extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator, cl_kernel k, size_t numLists ); extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists ); -extern int test_svm_byte_granularity(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_pointer_passing(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID, - cl_context context, - cl_command_queue queue, - int num_elements); -extern int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_enqueue_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_svm_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); - extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector extensions_list = std::vector()); extern const char *linked_list_create_and_verify_kernels[]; diff --git a/test_conformance/SVM/main.cpp b/test_conformance/SVM/main.cpp index 819901a3..91a14e5b 100644 --- a/test_conformance/SVM/main.cpp +++ b/test_conformance/SVM/main.cpp @@ -260,26 +260,6 @@ cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeSt return 0; } -test_definition test_list[] = { - ADD_TEST(svm_byte_granularity), - ADD_TEST(svm_set_kernel_exec_info_svm_ptrs), - ADD_TEST(svm_fine_grain_memory_consistency), - ADD_TEST(svm_fine_grain_sync_buffers), - ADD_TEST(svm_shared_address_space_fine_grain), - ADD_TEST(svm_shared_sub_buffers), - ADD_TEST(svm_shared_address_space_fine_grain_buffers), - ADD_TEST(svm_allocate_shared_buffer), - ADD_TEST(svm_allocate_shared_buffer_negative), - ADD_TEST(svm_shared_address_space_coarse_grain_old_api), - ADD_TEST(svm_shared_address_space_coarse_grain_new_api), - ADD_TEST(svm_cross_buffer_pointers_coarse_grain), - ADD_TEST(svm_pointer_passing), - ADD_TEST(svm_enqueue_api), - ADD_TEST_VERSION(svm_migrate, Version(2, 1)), -}; - -const int test_num = ARRAY_SIZE( test_list ); - test_status InitCL(cl_device_id device) { auto version = get_device_cl_version(device); auto expected_min_version = Version(2, 0); @@ -310,6 +290,7 @@ test_status InitCL(cl_device_id device) { int main(int argc, const char *argv[]) { - return runTestHarnessWithCheck(argc, argv, test_num, test_list, true, 0, InitCL); + return runTestHarnessWithCheck( + argc, argv, test_registry::getInstance().num_tests(), + test_registry::getInstance().definitions(), true, 0, InitCL); } - diff --git a/test_conformance/SVM/test_allocate_shared_buffer.cpp b/test_conformance/SVM/test_allocate_shared_buffer.cpp index bf94698c..e4dcffda 100644 --- a/test_conformance/SVM/test_allocate_shared_buffer.cpp +++ b/test_conformance/SVM/test_allocate_shared_buffer.cpp @@ -41,71 +41,83 @@ const char* flag_set_names[] = { }; -int test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_allocate_shared_buffer) { - clContextWrapper context = NULL; - clProgramWrapper program = NULL; - cl_uint num_devices = 0; - cl_int err = CL_SUCCESS; - clCommandQueueWrapper queues[MAXQ]; + clContextWrapper contextWrapper = NULL; + clProgramWrapper program = NULL; + cl_uint num_devices = 0; + cl_int err = CL_SUCCESS; + clCommandQueueWrapper queues[MAXQ]; - cl_device_svm_capabilities caps; - err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL); - test_error(err,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES"); + cl_device_svm_capabilities caps; + err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, + sizeof(cl_device_svm_capabilities), &caps, NULL); + test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES"); - // under construction... - err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); - if(err) return -1; + // under construction... + err = + create_cl_objects(deviceID, NULL, &contextWrapper, &program, &queues[0], + &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + context = contextWrapper; + if (err) return -1; - size_t size = 1024; + size_t size = 1024; - // iteration over flag combos - int num_flags = sizeof(flag_set)/sizeof(cl_mem_flags); - for(int i = 0; i < num_flags; i++) - { - if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0) - || ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 && (caps & CL_DEVICE_SVM_ATOMICS) == 0)) + // iteration over flag combos + int num_flags = sizeof(flag_set) / sizeof(cl_mem_flags); + for (int i = 0; i < num_flags; i++) { - log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]); - continue; - } - - log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]); - cl_char *pBufData1 = (cl_char*) clSVMAlloc(context, flag_set[i], size, 0); - if(pBufData1 == NULL) - { - log_error("SVMalloc returned NULL"); - return -1; - } - - { - clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size, pBufData1, &err); - test_error(err,"clCreateBuffer failed"); - - cl_char *pBufData2 = NULL; - cl_uint flags = CL_MAP_READ | CL_MAP_READ; - if(flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE; - if(flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ; - - if(!(flag_set[i] & CL_MEM_HOST_NO_ACCESS)) - { - pBufData2 = (cl_char*) clEnqueueMapBuffer(queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL,NULL, &err); - test_error(err, "clEnqueueMapBuffer failed"); - - if(pBufData2 != pBufData1 || NULL == pBufData1) + if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 + && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0) + || ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 + && (caps & CL_DEVICE_SVM_ATOMICS) == 0)) { - log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc"); - return -1; + log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]); + continue; } - err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0, NULL, NULL); - test_error(err, "clEnqueueUnmapMemObject failed"); - err = clFinish(queues[0]); - test_error(err, "clFinish failed"); - } + + log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]); + cl_char *pBufData1 = + (cl_char *)clSVMAlloc(context, flag_set[i], size, 0); + if (pBufData1 == NULL) + { + log_error("SVMalloc returned NULL"); + return -1; + } + + { + clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, + size, pBufData1, &err); + test_error(err, "clCreateBuffer failed"); + + cl_char *pBufData2 = NULL; + cl_uint flags = CL_MAP_READ | CL_MAP_READ; + if (flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE; + if (flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ; + + if (!(flag_set[i] & CL_MEM_HOST_NO_ACCESS)) + { + pBufData2 = (cl_char *)clEnqueueMapBuffer( + queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL, NULL, + &err); + test_error(err, "clEnqueueMapBuffer failed"); + + if (pBufData2 != pBufData1 || NULL == pBufData1) + { + log_error("SVM pointer returned by clEnqueueMapBuffer " + "doesn't match pointer returned by clSVMalloc"); + return -1; + } + err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0, + NULL, NULL); + test_error(err, "clEnqueueUnmapMemObject failed"); + err = clFinish(queues[0]); + test_error(err, "clFinish failed"); + } + } + + clSVMFree(context, pBufData1); } - clSVMFree(context, pBufData1); - } - - return 0; + return 0; } diff --git a/test_conformance/SVM/test_allocate_shared_buffer_negative.cpp b/test_conformance/SVM/test_allocate_shared_buffer_negative.cpp index 852242dd..5d8513bb 100644 --- a/test_conformance/SVM/test_allocate_shared_buffer_negative.cpp +++ b/test_conformance/SVM/test_allocate_shared_buffer_negative.cpp @@ -41,12 +41,9 @@ const char* svm_flag_set_names[] = { }; -int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID, - cl_context context2, - cl_command_queue queue, - int num_elements) +REGISTER_TEST(svm_allocate_shared_buffer_negative) { - clContextWrapper context = NULL; + clContextWrapper contextWrapper = NULL; clProgramWrapper program = NULL; cl_uint num_devices = 0; cl_int err = CL_SUCCESS; @@ -58,8 +55,10 @@ int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID, test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES"); // under construction... - err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], - &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + err = + create_cl_objects(deviceID, NULL, &contextWrapper, &program, &queues[0], + &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + context = contextWrapper; if (err) return err; size_t size = 1024; diff --git a/test_conformance/SVM/test_byte_granularity.cpp b/test_conformance/SVM/test_byte_granularity.cpp index 6dbb3649..d9d12221 100644 --- a/test_conformance/SVM/test_byte_granularity.cpp +++ b/test_conformance/SVM/test_byte_granularity.cpp @@ -48,100 +48,115 @@ const char *byte_manipulation_kernels[] = { }; - -int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_byte_granularity) { - clContextWrapper context; - clProgramWrapper program; - clKernelWrapper k1,k2; - clCommandQueueWrapper queues[MAXQ]; + clContextWrapper contextWrapper; + clProgramWrapper program; + clKernelWrapper k1, k2; + clCommandQueueWrapper queues[MAXQ]; - cl_uint num_devices = 0; - cl_int err = CL_SUCCESS; + cl_uint num_devices = 0; + cl_int err = CL_SUCCESS; - err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER); - if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. - if(err < 0) return -1; // fail test. + err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], + &contextWrapper, &program, &queues[0], &num_devices, + CL_DEVICE_SVM_FINE_GRAIN_BUFFER); + context = contextWrapper; + if (err == 1) + return 0; // no devices capable of requested SVM level, so don't execute + // but count test as passing. + if (err < 0) return -1; // fail test. - cl_uint num_devices_plus_host = num_devices + 1; + cl_uint num_devices_plus_host = num_devices + 1; - k1 = clCreateKernel(program, "write_owned_locations", &err); - test_error(err, "clCreateKernel failed"); - k2 = clCreateKernel(program, "sum_neighbor_locations", &err); - test_error(err, "clCreateKernel failed"); + k1 = clCreateKernel(program, "write_owned_locations", &err); + test_error(err, "clCreateKernel failed"); + k2 = clCreateKernel(program, "sum_neighbor_locations", &err); + test_error(err, "clCreateKernel failed"); - cl_char *pA = (cl_char*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_char) * num_elements, 0); + cl_char *pA = (cl_char *)clSVMAlloc( + context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, + sizeof(cl_char) * num_elements, 0); - cl_uint **error_counts = (cl_uint**) malloc(sizeof(void*) * num_devices); + cl_uint **error_counts = (cl_uint **)malloc(sizeof(void *) * num_devices); - for(cl_uint i=0; i < num_devices; i++) { - error_counts[i] = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint), 0); - *error_counts[i] = 0; - } - for(int i=0; i < num_elements; i++) pA[i] = -1; + for (cl_uint i = 0; i < num_devices; i++) + { + error_counts[i] = (cl_uint *)clSVMAlloc( + context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, + sizeof(cl_uint), 0); + *error_counts[i] = 0; + } + for (int i = 0; i < num_elements; i++) pA[i] = -1; - err |= clSetKernelArgSVMPointer(k1, 0, pA); - err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host); - test_error(err, "clSetKernelArg failed"); - - // get all the devices going simultaneously - size_t element_num = num_elements; - for(cl_uint d=0; d < num_devices; d++) // device ids starting at 1. - { - err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d); - test_error(err, "clSetKernelArg failed"); - err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, 0, NULL, NULL); - test_error(err,"clEnqueueNDRangeKernel failed"); - } - - for(cl_uint d=0; d < num_devices; d++) clFlush(queues[d]); - - cl_uint host_id = num_devices; // host code will take the id above the devices. - for(int i = (int)num_devices; i < num_elements; i+= num_devices_plus_host) pA[i] = host_id; - - for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]); - - // now check that each device can see the byte writes made by the other devices. - - err |= clSetKernelArgSVMPointer(k2, 0, pA); - err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host); - test_error(err, "clSetKernelArg failed"); - - // adjusted so k2 doesn't read past end of buffer - size_t adjusted_num_elements = num_elements - num_devices; - for(cl_uint id = 0; id < num_devices; id++) - { - err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]); + err |= clSetKernelArgSVMPointer(k1, 0, pA); + err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host); test_error(err, "clSetKernelArg failed"); - err = clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, &adjusted_num_elements, NULL, 0, NULL, NULL); - test_error(err,"clEnqueueNDRangeKernel failed"); - } + // get all the devices going simultaneously + size_t element_num = num_elements; + for (cl_uint d = 0; d < num_devices; d++) // device ids starting at 1. + { + err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d); + test_error(err, "clSetKernelArg failed"); + err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, + 0, NULL, NULL); + test_error(err, "clEnqueueNDRangeKernel failed"); + } - for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]); + for (cl_uint d = 0; d < num_devices; d++) clFlush(queues[d]); - bool failed = false; + cl_uint host_id = + num_devices; // host code will take the id above the devices. + for (int i = (int)num_devices; i < num_elements; i += num_devices_plus_host) + pA[i] = host_id; - // see if any of the devices found errors - for(cl_uint i=0; i < num_devices; i++) { - if(*error_counts[i] > 0) - failed = true; - } - cl_uint expected = (num_devices_plus_host * (num_devices_plus_host - 1))/2; - // check that host can see the byte writes made by the devices. - for(cl_uint i = 0; i < num_elements - num_devices_plus_host; i++) - { - int sum = 0; - for(cl_uint j=0; j < num_devices_plus_host; j++) sum += pA[i+j]; - if(sum != expected) - failed = true; - } + for (cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]); - clSVMFree(context, pA); - for(cl_uint i=0; i < num_devices; i++) clSVMFree(context, error_counts[i]); + // now check that each device can see the byte writes made by the other + // devices. - if(failed) - return -1; - return 0; + err |= clSetKernelArgSVMPointer(k2, 0, pA); + err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host); + test_error(err, "clSetKernelArg failed"); + + // adjusted so k2 doesn't read past end of buffer + size_t adjusted_num_elements = num_elements - num_devices; + for (cl_uint id = 0; id < num_devices; id++) + { + err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]); + test_error(err, "clSetKernelArg failed"); + + err = + clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, + &adjusted_num_elements, NULL, 0, NULL, NULL); + test_error(err, "clEnqueueNDRangeKernel failed"); + } + + for (cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]); + + bool failed = false; + + // see if any of the devices found errors + for (cl_uint i = 0; i < num_devices; i++) + { + if (*error_counts[i] > 0) failed = true; + } + cl_uint expected = + (num_devices_plus_host * (num_devices_plus_host - 1)) / 2; + // check that host can see the byte writes made by the devices. + for (cl_uint i = 0; i < num_elements - num_devices_plus_host; i++) + { + int sum = 0; + for (cl_uint j = 0; j < num_devices_plus_host; j++) sum += pA[i + j]; + if (sum != expected) failed = true; + } + + clSVMFree(context, pA); + for (cl_uint i = 0; i < num_devices; i++) + clSVMFree(context, error_counts[i]); + + if (failed) return -1; + return 0; } diff --git a/test_conformance/SVM/test_cross_buffer_pointers.cpp b/test_conformance/SVM/test_cross_buffer_pointers.cpp index 2baa7ad7..cd2b168c 100644 --- a/test_conformance/SVM/test_cross_buffer_pointers.cpp +++ b/test_conformance/SVM/test_cross_buffer_pointers.cpp @@ -128,93 +128,128 @@ cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, // on another device or the host. // The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly. // This basic test is performed for all combinations of devices and the host. -int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_cross_buffer_pointers_coarse_grain) { - clContextWrapper context = NULL; - clProgramWrapper program = NULL; - cl_uint num_devices = 0; - cl_int error = CL_SUCCESS; - clCommandQueueWrapper queues[MAXQ]; + clContextWrapper contextWrapper = NULL; + clProgramWrapper program = NULL; + cl_uint num_devices = 0; + cl_int error = CL_SUCCESS; + clCommandQueueWrapper queues[MAXQ]; - error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); - if(error) return -1; + error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], + &contextWrapper, &program, &queues[0], + &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + context = contextWrapper; + if (error) return -1; - size_t numLists = num_elements; - cl_int ListLength = 32; + size_t numLists = num_elements; + cl_int ListLength = 32; - clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error); - test_error(error, "clCreateKernel failed"); + clKernelWrapper kernel_create_lists = + clCreateKernel(program, "create_linked_lists", &error); + test_error(error, "clCreateKernel failed"); - clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error); - test_error(error, "clCreateKernel failed"); + clKernelWrapper kernel_verify_lists = + clCreateKernel(program, "verify_linked_lists", &error); + test_error(error, "clCreateKernel failed"); - // this buffer holds some of the linked list nodes. - Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0); + // this buffer holds some of the linked list nodes. + Node *pNodes = (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE, + sizeof(Node) * ListLength * numLists, 0); - // this buffer holds some of the linked list nodes. - Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0); + // this buffer holds some of the linked list nodes. + Node *pNodes2 = (Node *)clSVMAlloc(context, CL_MEM_READ_WRITE, + sizeof(Node) * ListLength * numLists, 0); - { - clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error); - test_error(error, "clCreateBuffer failed."); - - clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error); - test_error(error, "clCreateBuffer failed."); - - // this buffer holds the index into the nodes buffer that is used for node allocation - clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(size_t), NULL, &error); - test_error(error, "clCreateBuffer failed."); - - // this buffer holds the count of correct nodes which is computed by the verify kernel. - clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error); - test_error(error, "clCreateBuffer failed."); - - error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes); - //error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) pNodes); - error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2); - error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &allocator); - error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), (void *) &ListLength); - - error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes); - error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2); - error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &num_correct); - error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), (void *) &ListLength); - test_error(error, "clSetKernelArg failed"); - - // Create linked list on one device and verify on another device (or the host). - // Do this for all possible combinations of devices and host within the platform. - for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on { - for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on - { - if(ci == num_devices) // last device index represents the host, note the num_device+1 above. - { - error = create_linked_lists_on_host(queues[0], nodes, nodes2, ListLength, numLists); - if(error) return -1; - } - else - { - error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists); - if(error) return -1; - } + clMemWrapper nodes = clCreateBuffer( + context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists, + pNodes, &error); + test_error(error, "clCreateBuffer failed."); - if(vi == num_devices) - { - error = verify_linked_lists_on_host(vi, queues[0], nodes, nodes2, ListLength, numLists); - if(error) return -1; - } - else - { - error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists); - if(error) return -1; - } - } // inner loop, vi - } // outer loop, ci - } + clMemWrapper nodes2 = clCreateBuffer( + context, CL_MEM_USE_HOST_PTR, sizeof(Node) * ListLength * numLists, + pNodes2, &error); + test_error(error, "clCreateBuffer failed."); - clSVMFree(context, pNodes2); - clSVMFree(context, pNodes); + // this buffer holds the index into the nodes buffer that is used for + // node allocation + clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(size_t), NULL, &error); + test_error(error, "clCreateBuffer failed."); - return 0; + // this buffer holds the count of correct nodes which is computed by the + // verify kernel. + clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_int), NULL, &error); + test_error(error, "clCreateBuffer failed."); + + error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void *), + (void *)&nodes); + // error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) + // pNodes); + error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void *), + (void *)&nodes2); + error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void *), + (void *)&allocator); + error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), + (void *)&ListLength); + + error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void *), + (void *)&nodes); + error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void *), + (void *)&nodes2); + error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void *), + (void *)&num_correct); + error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), + (void *)&ListLength); + test_error(error, "clSetKernelArg failed"); + + // Create linked list on one device and verify on another device (or the + // host). Do this for all possible combinations of devices and host + // within the platform. + for (int ci = 0; ci < (int)num_devices + 1; + ci++) // ci is CreationIndex, index of device/q to create linked + // list on + { + for (int vi = 0; vi < (int)num_devices + 1; + vi++) // vi is VerificationIndex, index of device/q to verify + // linked list on + { + if (ci == num_devices) // last device index represents the host, + // note the num_device+1 above. + { + error = create_linked_lists_on_host( + queues[0], nodes, nodes2, ListLength, numLists); + if (error) return -1; + } + else + { + error = create_linked_lists_on_device( + ci, queues[ci], allocator, kernel_create_lists, + numLists); + if (error) return -1; + } + + if (vi == num_devices) + { + error = verify_linked_lists_on_host( + vi, queues[0], nodes, nodes2, ListLength, numLists); + if (error) return -1; + } + else + { + error = verify_linked_lists_on_device( + vi, queues[vi], num_correct, kernel_verify_lists, + ListLength, numLists); + if (error) return -1; + } + } // inner loop, vi + } // outer loop, ci + } + + clSVMFree(context, pNodes2); + clSVMFree(context, pNodes); + + return 0; } diff --git a/test_conformance/SVM/test_enqueue_api.cpp b/test_conformance/SVM/test_enqueue_api.cpp index 256df43f..27b483fa 100644 --- a/test_conformance/SVM/test_enqueue_api.cpp +++ b/test_conformance/SVM/test_enqueue_api.cpp @@ -70,244 +70,282 @@ void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_point data->status.store(1, std::memory_order_release); } -int test_svm_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_enqueue_api) { - clContextWrapper context = NULL; - clCommandQueueWrapper queues[MAXQ]; - cl_uint num_devices = 0; - const size_t elementNum = 1024; - const size_t numSVMBuffers = 32; - cl_int error = CL_SUCCESS; - RandomSeed seed(0); + clContextWrapper contextWrapper = NULL; + clCommandQueueWrapper queues[MAXQ]; + cl_uint num_devices = 0; + const size_t elementNum = 1024; + const size_t numSVMBuffers = 32; + cl_int error = CL_SUCCESS; + RandomSeed seed(0); - error = create_cl_objects(deviceID, NULL, &context, NULL, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); - if(error) return TEST_FAIL; + error = create_cl_objects(deviceID, NULL, &contextWrapper, NULL, &queues[0], + &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + context = contextWrapper; + if (error) return TEST_FAIL; - queue = queues[0]; + queue = queues[0]; - //all possible sizes of vectors and scalars - size_t typeSizes[] = { - sizeof(cl_uchar), - sizeof(cl_uchar2), - sizeof(cl_uchar3), - sizeof(cl_uchar4), - sizeof(cl_uchar8), - sizeof(cl_uchar16), - sizeof(cl_ushort), - sizeof(cl_ushort2), - sizeof(cl_ushort3), - sizeof(cl_ushort4), - sizeof(cl_ushort8), - sizeof(cl_ushort16), - sizeof(cl_uint), - sizeof(cl_uint2), - sizeof(cl_uint3), - sizeof(cl_uint4), - sizeof(cl_uint8), - sizeof(cl_uint16), - sizeof(cl_ulong), - sizeof(cl_ulong2), - sizeof(cl_ulong3), - sizeof(cl_ulong4), - sizeof(cl_ulong8), - sizeof(cl_ulong16), - }; + // all possible sizes of vectors and scalars + size_t typeSizes[] = { + sizeof(cl_uchar), sizeof(cl_uchar2), sizeof(cl_uchar3), + sizeof(cl_uchar4), sizeof(cl_uchar8), sizeof(cl_uchar16), + sizeof(cl_ushort), sizeof(cl_ushort2), sizeof(cl_ushort3), + sizeof(cl_ushort4), sizeof(cl_ushort8), sizeof(cl_ushort16), + sizeof(cl_uint), sizeof(cl_uint2), sizeof(cl_uint3), + sizeof(cl_uint4), sizeof(cl_uint8), sizeof(cl_uint16), + sizeof(cl_ulong), sizeof(cl_ulong2), sizeof(cl_ulong3), + sizeof(cl_ulong4), sizeof(cl_ulong8), sizeof(cl_ulong16), + }; - enum allocationTypes { - host, - svm - }; - - struct TestType { - allocationTypes srcAlloc; - allocationTypes dstAlloc; - TestType(allocationTypes type1, allocationTypes type2): srcAlloc(type1), dstAlloc(type2){} - }; - - std::vector testTypes; - - testTypes.push_back(TestType(host, host)); - testTypes.push_back(TestType(host, svm)); - testTypes.push_back(TestType(svm, host)); - testTypes.push_back(TestType(svm, svm)); - - for (const auto test_case : testTypes) - { - log_info("clEnqueueSVMMemcpy case: src_alloc = %s, dst_alloc = %s\n", test_case.srcAlloc == svm ? "svm" : "host", test_case.dstAlloc == svm ? "svm" : "host"); - for (size_t i = 0; i < ARRAY_SIZE(typeSizes); ++i) + enum allocationTypes { - //generate initial data - std::vector fillData0(typeSizes[i]), fillData1(typeSizes[i]); - generate_data(fillData0, typeSizes[i], seed); - generate_data(fillData1, typeSizes[i], seed); - size_t data_size = elementNum * typeSizes[i]; - std::vector srcHostData(data_size, 0); - std::vector dstHostData(data_size, 0); - generate_data(srcHostData, srcHostData.size(), seed); - generate_data(dstHostData, dstHostData.size(), seed); + host, + svm + }; - cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, data_size, 0); - cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, data_size, 0); + struct TestType + { + allocationTypes srcAlloc; + allocationTypes dstAlloc; + TestType(allocationTypes type1, allocationTypes type2) + : srcAlloc(type1), dstAlloc(type2) + {} + }; - clEventWrapper userEvent = clCreateUserEvent(context, &error); - test_error(error, "clCreateUserEvent failed"); - clEventWrapper eventMemFillList[2]; + std::vector testTypes; - error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], typeSizes[i], data_size, 1, &userEvent, &eventMemFillList[0]); - test_error(error, "clEnqueueSVMMemFill failed"); - error = clEnqueueSVMMemFill(queue, dstBuffer, &fillData1[0], typeSizes[i], data_size, 1, &userEvent, &eventMemFillList[1]); - test_error(error, "clEnqueueSVMMemFill failed"); + testTypes.push_back(TestType(host, host)); + testTypes.push_back(TestType(host, svm)); + testTypes.push_back(TestType(svm, host)); + testTypes.push_back(TestType(svm, svm)); - error = clSetUserEventStatus(userEvent, CL_COMPLETE); - test_error(error, "clSetUserEventStatus failed"); + for (const auto test_case : testTypes) + { + log_info("clEnqueueSVMMemcpy case: src_alloc = %s, dst_alloc = %s\n", + test_case.srcAlloc == svm ? "svm" : "host", + test_case.dstAlloc == svm ? "svm" : "host"); + for (size_t i = 0; i < ARRAY_SIZE(typeSizes); ++i) + { + // generate initial data + std::vector fillData0(typeSizes[i]), + fillData1(typeSizes[i]); + generate_data(fillData0, typeSizes[i], seed); + generate_data(fillData1, typeSizes[i], seed); + size_t data_size = elementNum * typeSizes[i]; + std::vector srcHostData(data_size, 0); + std::vector dstHostData(data_size, 0); + generate_data(srcHostData, srcHostData.size(), seed); + generate_data(dstHostData, dstHostData.size(), seed); - cl_uchar * src_ptr; - cl_uchar * dst_ptr; - if (test_case.srcAlloc == host) { - src_ptr = srcHostData.data(); - } else if (test_case.srcAlloc == svm) { - src_ptr = srcBuffer; - } - if (test_case.dstAlloc == host) { - dst_ptr = dstHostData.data(); - } else if (test_case.dstAlloc == svm) { - dst_ptr = dstBuffer; - } - clEventWrapper eventMemcpy; - error = clEnqueueSVMMemcpy(queue, CL_FALSE, dst_ptr, src_ptr, data_size, 2, &eventMemFillList[0], &eventMemcpy); - test_error(error, "clEnqueueSVMMemcpy failed"); + cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc( + context, CL_MEM_READ_WRITE, data_size, 0); + cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc( + context, CL_MEM_READ_WRITE, data_size, 0); - //coarse grain only supported. Synchronization required using map - clEventWrapper eventMap[2]; + clEventWrapper userEvent = clCreateUserEvent(context, &error); + test_error(error, "clCreateUserEvent failed"); + clEventWrapper eventMemFillList[2]; - error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, srcBuffer, data_size, 1, &eventMemcpy, &eventMap[0]); - test_error(error, "clEnqueueSVMMap srcBuffer failed"); + error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], + typeSizes[i], data_size, 1, &userEvent, + &eventMemFillList[0]); + test_error(error, "clEnqueueSVMMemFill failed"); + error = clEnqueueSVMMemFill(queue, dstBuffer, &fillData1[0], + typeSizes[i], data_size, 1, &userEvent, + &eventMemFillList[1]); + test_error(error, "clEnqueueSVMMemFill failed"); - error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, dstBuffer, data_size, 1, &eventMemcpy, &eventMap[1]); - test_error(error, "clEnqueueSVMMap dstBuffer failed"); + error = clSetUserEventStatus(userEvent, CL_COMPLETE); + test_error(error, "clSetUserEventStatus failed"); - error = clWaitForEvents(2, &eventMap[0]); - test_error(error, "clWaitForEvents failed"); + cl_uchar *src_ptr; + cl_uchar *dst_ptr; + if (test_case.srcAlloc == host) + { + src_ptr = srcHostData.data(); + } + else if (test_case.srcAlloc == svm) + { + src_ptr = srcBuffer; + } + if (test_case.dstAlloc == host) + { + dst_ptr = dstHostData.data(); + } + else if (test_case.dstAlloc == svm) + { + dst_ptr = dstBuffer; + } + clEventWrapper eventMemcpy; + error = + clEnqueueSVMMemcpy(queue, CL_FALSE, dst_ptr, src_ptr, data_size, + 2, &eventMemFillList[0], &eventMemcpy); + test_error(error, "clEnqueueSVMMemcpy failed"); - //data verification - for (size_t j = 0; j < data_size; ++j) - { - if (dst_ptr[j] != src_ptr[j]) { - log_error("Invalid data at index %zu, dst_ptr %d, src_ptr %d\n", j, - dst_ptr[j], src_ptr[j]); + // coarse grain only supported. Synchronization required using map + clEventWrapper eventMap[2]; + + error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, srcBuffer, + data_size, 1, &eventMemcpy, &eventMap[0]); + test_error(error, "clEnqueueSVMMap srcBuffer failed"); + + error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ, dstBuffer, + data_size, 1, &eventMemcpy, &eventMap[1]); + test_error(error, "clEnqueueSVMMap dstBuffer failed"); + + error = clWaitForEvents(2, &eventMap[0]); + test_error(error, "clWaitForEvents failed"); + + // data verification + for (size_t j = 0; j < data_size; ++j) + { + if (dst_ptr[j] != src_ptr[j]) + { + log_error( + "Invalid data at index %zu, dst_ptr %d, src_ptr %d\n", + j, dst_ptr[j], src_ptr[j]); + return TEST_FAIL; + } + } + clEventWrapper eventUnmap[2]; + error = + clEnqueueSVMUnmap(queue, srcBuffer, 0, nullptr, &eventUnmap[0]); + test_error(error, "clEnqueueSVMUnmap srcBuffer failed"); + + error = + clEnqueueSVMUnmap(queue, dstBuffer, 0, nullptr, &eventUnmap[1]); + test_error(error, "clEnqueueSVMUnmap dstBuffer failed"); + + error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData1[0], + typeSizes[i], data_size / 2, 0, 0, 0); + test_error(error, "clEnqueueSVMMemFill failed"); + + error = clEnqueueSVMMemFill(queue, dstBuffer + data_size / 2, + &fillData1[0], typeSizes[i], + data_size / 2, 0, 0, 0); + test_error(error, "clEnqueueSVMMemFill failed"); + + error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, + data_size / 2, 0, 0, 0); + test_error(error, "clEnqueueSVMMemcpy failed"); + + error = clEnqueueSVMMemcpy( + queue, CL_TRUE, dstBuffer + data_size / 2, + srcBuffer + data_size / 2, data_size / 2, 0, 0, 0); + test_error(error, "clEnqueueSVMMemcpy failed"); + + void *ptrs[] = { (void *)srcBuffer, (void *)dstBuffer }; + + clEventWrapper eventFree; + error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree); + test_error(error, "clEnqueueSVMFree failed"); + + error = clWaitForEvents(1, &eventFree); + test_error(error, "clWaitForEvents failed"); + + // event info verification for new SVM commands + cl_command_type commandType; + for (auto &check_event : eventMemFillList) + { + error = + clGetEventInfo(check_event, CL_EVENT_COMMAND_TYPE, + sizeof(cl_command_type), &commandType, NULL); + test_error(error, "clGetEventInfo failed"); + if (commandType != CL_COMMAND_SVM_MEMFILL) + { + log_error("Invalid command type returned for " + "clEnqueueSVMMemFill\n"); + return TEST_FAIL; + } + } + + error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, + sizeof(cl_command_type), &commandType, NULL); + test_error(error, "clGetEventInfo failed"); + if (commandType != CL_COMMAND_SVM_MEMCPY) + { + log_error( + "Invalid command type returned for clEnqueueSVMMemcpy\n"); + return TEST_FAIL; + } + for (size_t map_id = 0; map_id < ARRAY_SIZE(eventMap); map_id++) + { + error = + clGetEventInfo(eventMap[map_id], CL_EVENT_COMMAND_TYPE, + sizeof(cl_command_type), &commandType, NULL); + test_error(error, "clGetEventInfo failed"); + if (commandType != CL_COMMAND_SVM_MAP) + { + log_error( + "Invalid command type returned for clEnqueueSVMMap\n"); + return TEST_FAIL; + } + + error = + clGetEventInfo(eventUnmap[map_id], CL_EVENT_COMMAND_TYPE, + sizeof(cl_command_type), &commandType, NULL); + test_error(error, "clGetEventInfo failed"); + if (commandType != CL_COMMAND_SVM_UNMAP) + { + log_error("Invalid command type returned for " + "clEnqueueSVMUnmap\n"); + return TEST_FAIL; + } + } + error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, + sizeof(cl_command_type), &commandType, NULL); + test_error(error, "clGetEventInfo failed"); + if (commandType != CL_COMMAND_SVM_FREE) + { + log_error( + "Invalid command type returned for clEnqueueSVMFree\n"); + return TEST_FAIL; + } + } + } + std::vector buffers(numSVMBuffers, 0); + for (size_t i = 0; i < numSVMBuffers; ++i) + buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0); + + // verify if callback is triggered correctly + CallbackData data; + data.status = 0; + + error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], + callback_svm_free, &data, 0, 0, 0); + test_error(error, "clEnqueueSVMFree failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + // wait for the callback + while (data.status.load(std::memory_order_acquire) == 0) + { + usleep(1); + } + + // check if number of SVM pointers returned in the callback matches with + // expected + if (data.num_svm_pointers != buffers.size()) + { + log_error("Invalid number of SVM pointers returned in the callback, " + "expected: %zu, got: %d\n", + buffers.size(), data.num_svm_pointers); + return TEST_FAIL; + } + + // check if pointers returned in callback are correct + for (size_t i = 0; i < buffers.size(); ++i) + { + if (data.svm_pointers[i] != buffers[i]) + { + log_error( + "Invalid SVM pointer returned in the callback, idx: %zu\n", i); return TEST_FAIL; } - } - clEventWrapper eventUnmap[2]; - error = clEnqueueSVMUnmap(queue, srcBuffer, 0, nullptr, &eventUnmap[0]); - test_error(error, "clEnqueueSVMUnmap srcBuffer failed"); - - error = clEnqueueSVMUnmap(queue, dstBuffer, 0, nullptr, &eventUnmap[1]); - test_error(error, "clEnqueueSVMUnmap dstBuffer failed"); - - error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData1[0], typeSizes[i], data_size / 2, 0, 0, 0); - test_error(error, "clEnqueueSVMMemFill failed"); - - error = clEnqueueSVMMemFill(queue, dstBuffer + data_size / 2, &fillData1[0], typeSizes[i], data_size / 2, 0, 0, 0); - test_error(error, "clEnqueueSVMMemFill failed"); - - error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, data_size / 2, 0, 0, 0); - test_error(error, "clEnqueueSVMMemcpy failed"); - - error = clEnqueueSVMMemcpy(queue, CL_TRUE, dstBuffer + data_size / 2, srcBuffer + data_size / 2, data_size / 2, 0, 0, 0); - test_error(error, "clEnqueueSVMMemcpy failed"); - - void *ptrs[] = { (void *)srcBuffer, (void *)dstBuffer }; - - clEventWrapper eventFree; - error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree); - test_error(error, "clEnqueueSVMFree failed"); - - error = clWaitForEvents(1, &eventFree); - test_error(error, "clWaitForEvents failed"); - - //event info verification for new SVM commands - cl_command_type commandType; - for (auto &check_event : eventMemFillList) { - error = clGetEventInfo(check_event, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); - test_error(error, "clGetEventInfo failed"); - if (commandType != CL_COMMAND_SVM_MEMFILL) - { - log_error("Invalid command type returned for clEnqueueSVMMemFill\n"); - return TEST_FAIL; - } - } - - error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); - test_error(error, "clGetEventInfo failed"); - if (commandType != CL_COMMAND_SVM_MEMCPY) - { - log_error("Invalid command type returned for clEnqueueSVMMemcpy\n"); - return TEST_FAIL; - } - for (size_t map_id = 0; map_id < ARRAY_SIZE(eventMap); map_id++) { - error = clGetEventInfo(eventMap[map_id], CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); - test_error(error, "clGetEventInfo failed"); - if (commandType != CL_COMMAND_SVM_MAP) - { - log_error("Invalid command type returned for clEnqueueSVMMap\n"); - return TEST_FAIL; - } - - error = clGetEventInfo(eventUnmap[map_id], CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); - test_error(error, "clGetEventInfo failed"); - if (commandType != CL_COMMAND_SVM_UNMAP) - { - log_error("Invalid command type returned for clEnqueueSVMUnmap\n"); - return TEST_FAIL; - } - } - error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL); - test_error(error, "clGetEventInfo failed"); - if (commandType != CL_COMMAND_SVM_FREE) - { - log_error("Invalid command type returned for clEnqueueSVMFree\n"); - return TEST_FAIL; - } } - } - std::vector buffers(numSVMBuffers, 0); - for(size_t i = 0; i < numSVMBuffers; ++i) buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0); - //verify if callback is triggered correctly - CallbackData data; - data.status = 0; - - error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], callback_svm_free, &data, 0, 0, 0); - test_error(error, "clEnqueueSVMFree failed"); - - error = clFinish(queue); - test_error(error, "clFinish failed"); - - //wait for the callback - while(data.status.load(std::memory_order_acquire) == 0) { - usleep(1); - } - - //check if number of SVM pointers returned in the callback matches with expected - if (data.num_svm_pointers != buffers.size()) - { - log_error("Invalid number of SVM pointers returned in the callback, " - "expected: %zu, got: %d\n", - buffers.size(), data.num_svm_pointers); - return TEST_FAIL; - } - - //check if pointers returned in callback are correct - for (size_t i = 0; i < buffers.size(); ++i) - { - if (data.svm_pointers[i] != buffers[i]) - { - log_error("Invalid SVM pointer returned in the callback, idx: %zu\n", - i); - return TEST_FAIL; - } - } - - return 0; + return 0; } diff --git a/test_conformance/SVM/test_fine_grain_memory_consistency.cpp b/test_conformance/SVM/test_fine_grain_memory_consistency.cpp index 50c57061..1c39e33f 100644 --- a/test_conformance/SVM/test_fine_grain_memory_consistency.cpp +++ b/test_conformance/SVM/test_fine_grain_memory_consistency.cpp @@ -139,44 +139,55 @@ int launch_kernels_and_verify(clContextWrapper &context, clCommandQueueWrapper* // Each bin in the hash table is a linked list. Each bin is protected against simultaneous // update using a lock free technique. The correctness of the list is verfied on the host. // This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering. -int test_svm_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_fine_grain_memory_consistency) { - clContextWrapper context; - clProgramWrapper program; - clKernelWrapper kernel; - clCommandQueueWrapper queues[MAXQ]; + clContextWrapper contextWrapper; + clProgramWrapper program; + clKernelWrapper kernel; + clCommandQueueWrapper queues[MAXQ]; - cl_uint num_devices = 0; - cl_int err = CL_SUCCESS; - std::vector required_extensions; - required_extensions.push_back("cl_khr_int64_base_atomics"); - required_extensions.push_back("cl_khr_int64_extended_atomics"); + cl_uint num_devices = 0; + cl_int err = CL_SUCCESS; + std::vector required_extensions; + required_extensions.push_back("cl_khr_int64_base_atomics"); + required_extensions.push_back("cl_khr_int64_extended_atomics"); - // Make pragmas visible for 64-bit addresses - hash_table_kernel[4] = sizeof(void *) == 8 ? '1' : '0'; + // Make pragmas visible for 64-bit addresses + hash_table_kernel[4] = sizeof(void *) == 8 ? '1' : '0'; - char *source[] = { hash_table_kernel }; + char *source[] = { hash_table_kernel }; - err = create_cl_objects(deviceID, (const char**)source, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS, required_extensions); - if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. - if(err < 0) return -1; // fail test. + err = create_cl_objects( + deviceID, (const char **)source, &contextWrapper, &program, &queues[0], + &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS, + required_extensions); + context = contextWrapper; + if (err == 1) + return 0; // no devices capable of requested SVM level, so don't execute + // but count test as passing. + if (err < 0) return -1; // fail test. - kernel = clCreateKernel(program, "build_hash_table", &err); - test_error(err, "clCreateKernel failed"); - size_t num_pixels = num_elements; + kernel = clCreateKernel(program, "build_hash_table", &err); + test_error(err, "clCreateKernel failed"); + size_t num_pixels = num_elements; - int result; - cl_uint numBins = 1; // all work groups in all devices and the host code will hammer on this one lock. - result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels); - if(result == -1) return result; + int result; + cl_uint numBins = 1; // all work groups in all devices and the host code + // will hammer on this one lock. + result = launch_kernels_and_verify(contextWrapper, queues, kernel, + num_devices, numBins, num_pixels); + if (result == -1) return result; - numBins = 2; // 2 locks within in same cache line will get hit from different devices and host. - result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels); - if(result == -1) return result; + numBins = 2; // 2 locks within in same cache line will get hit from + // different devices and host. + result = launch_kernels_and_verify(contextWrapper, queues, kernel, + num_devices, numBins, num_pixels); + if (result == -1) return result; - numBins = 29; // locks span a few cache lines. - result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels); - if(result == -1) return result; + numBins = 29; // locks span a few cache lines. + result = launch_kernels_and_verify(contextWrapper, queues, kernel, + num_devices, numBins, num_pixels); + if (result == -1) return result; - return result; + return result; } diff --git a/test_conformance/SVM/test_fine_grain_sync_buffers.cpp b/test_conformance/SVM/test_fine_grain_sync_buffers.cpp index 0b94cbf2..f2572a8a 100644 --- a/test_conformance/SVM/test_fine_grain_sync_buffers.cpp +++ b/test_conformance/SVM/test_fine_grain_sync_buffers.cpp @@ -44,67 +44,88 @@ void spawnAnalysisTask(int location) // Concept: a device kernel is used to search an input image for regions that match a target pattern. // The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices). // The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets. -int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_fine_grain_sync_buffers) { - clContextWrapper context = NULL; - clProgramWrapper program = NULL; - cl_uint num_devices = 0; - cl_int err = CL_SUCCESS; - clCommandQueueWrapper queues[MAXQ]; + clContextWrapper contextWrapper = NULL; + clProgramWrapper program = NULL; + cl_uint num_devices = 0; + cl_int err = CL_SUCCESS; + clCommandQueueWrapper queues[MAXQ]; - err = create_cl_objects(deviceID, &find_targets_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS); - if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing. - if(err < 0) return -1; // fail test. + err = create_cl_objects(deviceID, &find_targets_kernel[0], &contextWrapper, + &program, &queues[0], &num_devices, + CL_DEVICE_SVM_FINE_GRAIN_BUFFER + | CL_DEVICE_SVM_ATOMICS); + context = contextWrapper; + if (err == 1) + return 0; // no devices capable of requested SVM level, so don't execute + // but count test as passing. + if (err < 0) return -1; // fail test. - clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err); - test_error(err, "clCreateKernel failed"); + clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err); + test_error(err, "clCreateKernel failed"); - size_t num_pixels = num_elements; - //cl_uint num_pixels = 1024*1024*32; + size_t num_pixels = num_elements; + // cl_uint num_pixels = 1024*1024*32; - cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0); - cl_uint *pNumTargetsFound = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_uint), 0); - cl_int *pTargetLocations = (cl_int* ) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int) * MAX_TARGETS, 0); + cl_uint *pInputImage = (cl_uint *)clSVMAlloc( + context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, + sizeof(cl_uint) * num_pixels, 0); + cl_uint *pNumTargetsFound = (cl_uint *)clSVMAlloc( + context, + CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, + sizeof(cl_uint), 0); + cl_int *pTargetLocations = (cl_int *)clSVMAlloc( + context, + CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, + sizeof(cl_int) * MAX_TARGETS, 0); - cl_uint targetDescriptor = 777; - *pNumTargetsFound = 0; - cl_uint i; - for(i=0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1; - for(i=0; i < num_pixels; i++) pInputImage[i] = 0; - pInputImage[0] = targetDescriptor; - pInputImage[3] = targetDescriptor; - pInputImage[num_pixels - 1] = targetDescriptor; + cl_uint targetDescriptor = 777; + *pNumTargetsFound = 0; + cl_uint i; + for (i = 0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1; + for (i = 0; i < num_pixels; i++) pInputImage[i] = 0; + pInputImage[0] = targetDescriptor; + pInputImage[3] = targetDescriptor; + pInputImage[num_pixels - 1] = targetDescriptor; - err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage); - err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &targetDescriptor); - err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound); - err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations); - test_error(err, "clSetKernelArg failed"); + err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage); + err |= + clSetKernelArg(kernel, 1, sizeof(cl_uint), (void *)&targetDescriptor); + err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound); + err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations); + test_error(err, "clSetKernelArg failed"); - cl_event done; - err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, 0, NULL, &done); - test_error(err,"clEnqueueNDRangeKernel failed"); - clFlush(queues[0]); + cl_event done; + err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, + 0, NULL, &done); + test_error(err, "clEnqueueNDRangeKernel failed"); + clFlush(queues[0]); - i=0; - cl_int status; - // check for new targets, if found spawn a task to analyze target. - do { - err = clGetEventInfo(done,CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL); - test_error(err,"clGetEventInfo failed"); - if( AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1) // -1 indicates slot not used yet. + i = 0; + cl_int status; + // check for new targets, if found spawn a task to analyze target. + do { - spawnAnalysisTask(pTargetLocations[i]); - i++; - } - } while (status != CL_COMPLETE || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1); + err = clGetEventInfo(done, CL_EVENT_COMMAND_EXECUTION_STATUS, + sizeof(cl_int), &status, NULL); + test_error(err, "clGetEventInfo failed"); + if (AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) + != -1) // -1 indicates slot not used yet. + { + spawnAnalysisTask(pTargetLocations[i]); + i++; + } + } while (status != CL_COMPLETE + || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) + != -1); - clReleaseEvent(done); - clSVMFree(context, pInputImage); - clSVMFree(context, pNumTargetsFound); - clSVMFree(context, pTargetLocations); + clReleaseEvent(done); + clSVMFree(context, pInputImage); + clSVMFree(context, pNumTargetsFound); + clSVMFree(context, pTargetLocations); - if(i != 3) return -1; - return 0; + if (i != 3) return -1; + return 0; } diff --git a/test_conformance/SVM/test_migrate.cpp b/test_conformance/SVM/test_migrate.cpp index b767a70a..b697b48a 100644 --- a/test_conformance/SVM/test_migrate.cpp +++ b/test_conformance/SVM/test_migrate.cpp @@ -75,7 +75,7 @@ wait_and_release(const char* s, cl_event* evs, int n) return 0; } -int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_migrate) { std::vector amem(GLOBAL_SIZE); std::vector bmem(GLOBAL_SIZE); @@ -86,15 +86,17 @@ int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue RandomSeed seed(0); - clContextWrapper context = NULL; + clContextWrapper contextWrapper = NULL; clCommandQueueWrapper queues[MAXQ]; cl_uint num_devices = 0; clProgramWrapper program; cl_int error; - error = create_cl_objects(deviceID, &sources[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); - if (error) - return -1; + error = create_cl_objects(deviceID, &sources[0], &contextWrapper, &program, + &queues[0], &num_devices, + CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + context = contextWrapper; + if (error) return -1; if (num_devices > 1) { log_info(" Running on two devices.\n"); diff --git a/test_conformance/SVM/test_pointer_passing.cpp b/test_conformance/SVM/test_pointer_passing.cpp index 42baa76a..9493edfc 100644 --- a/test_conformance/SVM/test_pointer_passing.cpp +++ b/test_conformance/SVM/test_pointer_passing.cpp @@ -35,81 +35,107 @@ const char *SVMPointerPassing_test_kernel[] = { // The buffer is initialized to known values at each location. // The kernel checks that it finds the expected value at each location. // TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--. -int test_svm_pointer_passing(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements) +REGISTER_TEST(svm_pointer_passing) { - clContextWrapper context = NULL; - clProgramWrapper program = NULL; - cl_uint num_devices = 0; - cl_int error = CL_SUCCESS; - clCommandQueueWrapper queues[MAXQ]; + clContextWrapper contextWrapper = NULL; + clProgramWrapper program = NULL; + cl_uint num_devices = 0; + cl_int error = CL_SUCCESS; + clCommandQueueWrapper queues[MAXQ]; - error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); - if(error) return -1; + error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], + &contextWrapper, &program, &queues[0], + &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER); + context = contextWrapper; + if (error) return -1; - clKernelWrapper kernel_verify_char = clCreateKernel(program, "verify_char", &error); - test_error(error,"clCreateKernel failed"); + clKernelWrapper kernel_verify_char = + clCreateKernel(program, "verify_char", &error); + test_error(error, "clCreateKernel failed"); - size_t bufSize = 256; - cl_uchar *pbuf_svm_alloc = (cl_uchar*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_uchar)*bufSize, 0); + size_t bufSize = 256; + cl_uchar *pbuf_svm_alloc = (cl_uchar *)clSVMAlloc( + context, CL_MEM_READ_WRITE, sizeof(cl_uchar) * bufSize, 0); - cl_int *pNumCorrect = NULL; - pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0); + cl_int *pNumCorrect = NULL; + pNumCorrect = + (cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0); - { - clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar)*bufSize, pbuf_svm_alloc, &error); - test_error(error, "clCreateBuffer failed."); - - clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error); - test_error(error, "clCreateBuffer failed."); - - error = clSetKernelArg(kernel_verify_char, 1, sizeof(void*), (void *) &num_correct); - test_error(error, "clSetKernelArg failed"); - - // put values into buf so that we can expect to see these values in the kernel when we pass a pointer to them. - cl_command_queue cmdq = queues[0]; - cl_uchar* pbuf_map_buffer = (cl_uchar*) clEnqueueMapBuffer(cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uchar)*bufSize, 0, NULL,NULL, &error); - test_error2(error, pbuf_map_buffer, "clEnqueueMapBuffer failed"); - for(int i = 0; i<(int)bufSize; i++) { - pbuf_map_buffer[i]= (cl_uchar)i; - } - error = clEnqueueUnmapMemObject(cmdq, buf, pbuf_map_buffer, 0,NULL,NULL); - test_error(error, "clEnqueueUnmapMemObject failed."); + clMemWrapper buf = + clCreateBuffer(context, CL_MEM_USE_HOST_PTR, + sizeof(cl_uchar) * bufSize, pbuf_svm_alloc, &error); + test_error(error, "clCreateBuffer failed."); - for (cl_uint ii = 0; ii