From 1c19a4cbdbcaa9d8a683fed26d883735742b41c9 Mon Sep 17 00:00:00 2001 From: Stuart Brady Date: Tue, 28 Jun 2022 17:05:11 +0100 Subject: [PATCH] Add tests for cl_khr_subgroup_rotate (#1439) Signed-off-by: Stuart Brady --- test_conformance/subgroups/CMakeLists.txt | 1 + test_conformance/subgroups/main.cpp | 3 +- test_conformance/subgroups/procs.h | 4 + .../subgroups/subgroup_common_templates.h | 35 +++++- test_conformance/subgroups/subhelpers.h | 6 +- .../subgroups/test_subgroup_rotate.cpp | 109 ++++++++++++++++++ 6 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 test_conformance/subgroups/test_subgroup_rotate.cpp diff --git a/test_conformance/subgroups/CMakeLists.txt b/test_conformance/subgroups/CMakeLists.txt index d48af9cc..1ff249cf 100644 --- a/test_conformance/subgroups/CMakeLists.txt +++ b/test_conformance/subgroups/CMakeLists.txt @@ -15,6 +15,7 @@ set(${MODULE_NAME}_SOURCES test_subgroup_clustered_reduce.cpp test_subgroup_shuffle.cpp test_subgroup_shuffle_relative.cpp + test_subgroup_rotate.cpp ) include(../CMakeCommon.txt) diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp index ebe94558..a3ae910d 100644 --- a/test_conformance/subgroups/main.cpp +++ b/test_conformance/subgroups/main.cpp @@ -41,7 +41,8 @@ test_definition test_list[] = { ADD_TEST(subgroup_functions_ballot), ADD_TEST(subgroup_functions_clustered_reduce), ADD_TEST(subgroup_functions_shuffle), - ADD_TEST(subgroup_functions_shuffle_relative) + ADD_TEST(subgroup_functions_shuffle_relative), + ADD_TEST(subgroup_functions_rotate), }; const int test_num = ARRAY_SIZE(test_list); diff --git a/test_conformance/subgroups/procs.h b/test_conformance/subgroups/procs.h index d09e8242..d4f51bec 100644 --- a/test_conformance/subgroups/procs.h +++ b/test_conformance/subgroups/procs.h @@ -81,4 +81,8 @@ extern int test_subgroup_functions_shuffle_relative(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_subgroup_functions_rotate(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); #endif /*_procs_h*/ diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index 0ffa46c8..5051f2e9 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -501,7 +501,31 @@ template struct SHF l = (((cl_uint)(genrand_int32(gMTdata) & 0x7fffffff) + 1) % (ns * 2 + 1)) - 1; - m[midx] = l; + switch (operation) + { + case ShuffleOp::shuffle: + case ShuffleOp::shuffle_xor: + case ShuffleOp::shuffle_up: + case ShuffleOp::shuffle_down: + // storing information about shuffle index/delta + m[midx] = (cl_int)l; + break; + case ShuffleOp::rotate: + case ShuffleOp::clustered_rotate: + // Storing information about rotate delta. + // The delta must be the same for each thread in + // the subgroup. + if (i == 0) + { + m[midx] = (cl_int)l; + } + else + { + m[midx] = m[midx - 4]; + } + break; + default: break; + } cl_ulong number = genrand_int64(gMTdata); set_value(t[ii + i], number); } @@ -565,6 +589,15 @@ template struct SHF if (l >= ns) skip = true; tr_idx = i + l; break; + // rotate - treat l as delta + case ShuffleOp::rotate: + tr_idx = (i + l) % test_params.subgroup_size; + break; + case ShuffleOp::clustered_rotate: { + tr_idx = ((i & ~(test_params.cluster_size - 1)) + + ((i + l) % test_params.cluster_size)); + break; + } default: break; } diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index 12704db8..a305639a 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -251,7 +251,9 @@ enum class ShuffleOp shuffle, shuffle_up, shuffle_down, - shuffle_xor + shuffle_xor, + rotate, + clustered_rotate, }; enum class ArithmeticOp @@ -317,6 +319,8 @@ static const char *const operation_names(ShuffleOp operation) case ShuffleOp::shuffle_up: return "shuffle_up"; case ShuffleOp::shuffle_down: return "shuffle_down"; case ShuffleOp::shuffle_xor: return "shuffle_xor"; + case ShuffleOp::rotate: return "rotate"; + case ShuffleOp::clustered_rotate: return "clustered_rotate"; default: log_error("Unknown operation request"); break; } return ""; diff --git a/test_conformance/subgroups/test_subgroup_rotate.cpp b/test_conformance/subgroups/test_subgroup_rotate.cpp new file mode 100644 index 00000000..db0f48eb --- /dev/null +++ b/test_conformance/subgroups/test_subgroup_rotate.cpp @@ -0,0 +1,109 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" +#include "subhelpers.h" +#include "subgroup_common_kernels.h" +#include "subgroup_common_templates.h" +#include "harness/conversions.h" +#include "harness/typeWrappers.h" + +namespace { + +template int run_rotate_for_type(RunTestForType rft) +{ + int error = rft.run_impl>("sub_group_rotate"); + return error; +} + +std::string sub_group_clustered_rotate_source = R"( + __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out, + uint cluster_size) { + Type r; + int gid = get_global_id(0); + XY(xy,gid); + Type x = in[gid]; + int delta = xy[gid].z; + switch (cluster_size) { + case 1: r = %s(x, delta, 1); break; + case 2: r = %s(x, delta, 2); break; + case 4: r = %s(x, delta, 4); break; + case 8: r = %s(x, delta, 8); break; + case 16: r = %s(x, delta, 16); break; + case 32: r = %s(x, delta, 32); break; + case 64: r = %s(x, delta, 64); break; + case 128: r = %s(x, delta, 128); break; + } + out[gid] = r; + } +)"; + +template int run_clustered_rotate_for_type(RunTestForType rft) +{ + int error = rft.run_impl>( + "sub_group_clustered_rotate"); + return error; +} + +} + +int test_subgroup_functions_rotate(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + if (!is_extension_available(device, "cl_khr_subgroup_rotate")) + { + log_info("cl_khr_subgroup_rotate is not supported on this device, " + "skipping test.\n"); + return TEST_SKIPPED_ITSELF; + } + + constexpr size_t global_work_size = 2000; + constexpr size_t local_work_size = 200; + WorkGroupParams test_params(global_work_size, local_work_size); + test_params.save_kernel_source(sub_group_generic_source); + RunTestForType rft(device, context, queue, num_elements, test_params); + + int error = run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + error |= run_rotate_for_type(rft); + + WorkGroupParams test_params_clustered(global_work_size, local_work_size, -1, + 3); + test_params_clustered.save_kernel_source(sub_group_clustered_rotate_source); + RunTestForType rft_clustered(device, context, queue, num_elements, + test_params_clustered); + + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + error |= run_clustered_rotate_for_type(rft_clustered); + + return error; +}