From cd5c1659469862b9a927f88e6e8d0112e774620f Mon Sep 17 00:00:00 2001 From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com> Date: Wed, 19 Jul 2023 13:51:30 +0100 Subject: [PATCH] Deduplicate test_barrier (#1542) Merge test_barrier and test_wg_barrier. Reformat using clang-format kernel source code. Signed-off-by: John Kesapides --- test_conformance/basic/CMakeLists.txt | 2 - test_conformance/basic/test_barrier.cpp | 193 ++++++++++----------- test_conformance/basic/test_wg_barrier.cpp | 159 ----------------- 3 files changed, 93 insertions(+), 261 deletions(-) delete mode 100644 test_conformance/basic/test_wg_barrier.cpp diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index 47c1c980..9dcf1d5a 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -52,14 +52,12 @@ set(${MODULE_NAME}_SOURCES test_kernel_call_kernel_function.cpp test_local_kernel_scope.cpp test_progvar.cpp - test_wg_barrier.cpp test_global_linear_id.cpp test_local_linear_id.cpp test_enqueued_local_size.cpp test_simple_image_pitch.cpp test_get_linear_ids.cpp test_rw_image_access_qualifier.cpp - test_wg_barrier.cpp test_enqueued_local_size.cpp test_global_linear_id.cpp test_local_linear_id.cpp diff --git a/test_conformance/basic/test_barrier.cpp b/test_conformance/basic/test_barrier.cpp index d20af14a..6352b42f 100644 --- a/test_conformance/basic/test_barrier.cpp +++ b/test_conformance/basic/test_barrier.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,143 +21,136 @@ #include #include +#include +#include +#include #include "procs.h" -const char *barrier_kernel_code = -"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n" -"{\n" -" int tid = get_local_id(0);\n" -" int lsize = get_local_size(0);\n" -" int i;\n" -"\n" -" tmp_sum[tid] = 0;\n" -" for (i=tid; i1; i = hadd(i,1))\n" -" {\n" -" barrier(CLK_GLOBAL_MEM_FENCE);\n" -" if (tid + i < lsize)\n" -" tmp_sum[tid] += tmp_sum[tid + i];\n" -" lsize = i; \n" -" }\n" -"\n" -" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n" -" if (tid == 0)\n" -" *sum = tmp_sum[0];\n" -"}\n"; - - -static int -verify_sum(int *inptr, int *outptr, int n) +namespace { +const char *barrier_kernel_code = R"( +__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, + __global int *sum) { - int r = 0; - int i; + int tid = get_local_id(0); + int lsize = get_local_size(0); + int i; - for (i=0; i 1; i = hadd(i, 1)) { - log_error("BARRIER test failed\n"); - return -1; + BARRIER(CLK_GLOBAL_MEM_FENCE); + if (tid + i < lsize) tmp_sum[tid] += tmp_sum[tid + i]; + lsize = i; } - log_info("BARRIER test passed\n"); - return 0; + // no barrier is required here because last person to write to tmp_sum[0] + // was tid 0 + if (tid == 0) *sum = tmp_sum[0]; +} +)"; + + +void generate_random_inputs(std::vector &v) +{ + RandomSeed seed(gRandomSeed); + + auto random_generator = [&seed]() { + return static_cast( + get_random_float(-0x01000000, 0x01000000, seed)); + }; + + std::generate(v.begin(), v.end(), random_generator); } - -int -test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +int test_barrier_common(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + std::string barrier_str) { - cl_mem streams[3]; - cl_int *input_ptr = NULL, *output_ptr = NULL; - cl_program program; - cl_kernel kernel; - size_t global_threads[3]; - size_t local_threads[3]; - int err; - int i; - size_t max_local_workgroup_size[3]; - size_t max_threadgroup_size = 0; - MTdata d; + clMemWrapper streams[3]; + clProgramWrapper program; + clKernelWrapper kernel; - err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" ); + cl_int output; + int err; + + size_t max_threadgroup_size = 0; + std::string build_options = std::string("-DBARRIER=") + barrier_str; + err = create_single_kernel_helper(context, &program, &kernel, 1, + &barrier_kernel_code, "compute_sum", + build_options.c_str()); test_error(err, "Failed to build kernel/program."); - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, - sizeof(max_threadgroup_size), &max_threadgroup_size, NULL); - test_error(err, "clGetKernelWorkgroupInfo failed."); - - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL); - test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); - - // Pick the minimum of the device and the kernel - if (max_threadgroup_size > max_local_workgroup_size[0]) - max_threadgroup_size = max_local_workgroup_size[0]; + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, + &max_threadgroup_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed."); // work group size must divide evenly into the global size - while( num_elements % max_threadgroup_size ) - max_threadgroup_size--; + while (num_elements % max_threadgroup_size) max_threadgroup_size--; - input_ptr = (int*)malloc(sizeof(int) * num_elements); - output_ptr = (int*)malloc(sizeof(int)); + std::vector input(num_elements); streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, &err); + sizeof(cl_int) * num_elements, nullptr, &err); test_error(err, "clCreateBuffer failed."); - streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), + nullptr, &err); test_error(err, "clCreateBuffer failed."); streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * max_threadgroup_size, NULL, &err); + sizeof(cl_int) * max_threadgroup_size, nullptr, &err); test_error(err, "clCreateBuffer failed."); - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include -#include - - -#include "procs.h" - -const char *wg_barrier_kernel_code = -"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n" -"{\n" -" int tid = get_local_id(0);\n" -" int lsize = get_local_size(0);\n" -" int i;\n" -"\n" -" tmp_sum[tid] = 0;\n" -" for (i=tid; i1; i = hadd(i,1))\n" -" {\n" -" work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n" -" if (tid + i < lsize)\n" -" tmp_sum[tid] += tmp_sum[tid + i];\n" -" lsize = i; \n" -" }\n" -"\n" -" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n" -" if (tid == 0)\n" -" *sum = tmp_sum[0];\n" -"}\n"; - - -static int -verify_sum(int *inptr, int *tmpptr, int *outptr, int n) -{ - int i; - int reference = 0; - - for (i=0; i max_local_workgroup_size[0]) - max_threadgroup_size = max_local_workgroup_size[0]; - - // work group size must divide evenly into the global size - while( num_elements % max_threadgroup_size ) - max_threadgroup_size--; - - input_ptr = (int*)malloc(sizeof(int) * num_elements); - output_ptr = (int*)malloc(sizeof(int)); - - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, &err); - test_error(err, "clCreateBuffer failed."); - streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err); - test_error(err, "clCreateBuffer failed."); - streams[2] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * max_threadgroup_size, NULL, &err); - test_error(err, "clCreateBuffer failed."); - - d = init_genrand( gRandomSeed ); - for (i=0; i