Extended subgroups - use 128bit masks (#1215)

* Extended subgroups - use 128bit masks

* Refactoring to avoid kernels code duplication

* unification kernel names as test_ prefix +subgroups function name
* use string literals that improve readability
* use kernel templates that limit code duplication
* WorkGroupParams allows define default kernel - kernel template for multiple functions
* WorkGroupParams allows define  kernel for specific one subgroup function

Co-authored-by: Stuart Brady <stuart.brady@arm.com>
This commit is contained in:
Grzegorz Wawiorko
2021-10-01 12:28:37 +02:00
committed by GitHub
parent 903f1bf65d
commit 92844bead1
12 changed files with 592 additions and 1054 deletions

View File

@@ -15,37 +15,19 @@
//
#include "procs.h"
#include "subhelpers.h"
#include "subgroup_common_kernels.h"
#include "subgroup_common_templates.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
namespace {
static const char* shuffle_down_source =
"__kernel void test_sub_group_shuffle_down(const __global Type *in, "
"__global int4 *xy, __global Type *out)\n"
"{\n"
" int gid = get_global_id(0);\n"
" XY(xy,gid);\n"
" Type x = in[gid];\n"
" out[gid] = sub_group_shuffle_down(x, xy[gid].z);"
"}\n";
static const char* shuffle_up_source =
"__kernel void test_sub_group_shuffle_up(const __global Type *in, __global "
"int4 *xy, __global Type *out)\n"
"{\n"
" int gid = get_global_id(0);\n"
" XY(xy,gid);\n"
" Type x = in[gid];\n"
" out[gid] = sub_group_shuffle_up(x, xy[gid].z);"
"}\n";
template <typename T> int run_shuffle_relative_for_type(RunTestForType rft)
{
int error = rft.run_impl<T, SHF<T, ShuffleOp::shuffle_up>>(
"test_sub_group_shuffle_up", shuffle_up_source);
int error =
rft.run_impl<T, SHF<T, ShuffleOp::shuffle_up>>("sub_group_shuffle_up");
error |= rft.run_impl<T, SHF<T, ShuffleOp::shuffle_down>>(
"test_sub_group_shuffle_down", shuffle_down_source);
"sub_group_shuffle_down");
return error;
}
@@ -62,9 +44,11 @@ int test_subgroup_functions_shuffle_relative(cl_device_id device,
"device, skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
constexpr size_t global_work_size = 2000;
constexpr size_t local_work_size = 200;
WorkGroupParams test_params(global_work_size, local_work_size);
test_params.save_kernel_source(sub_group_generic_source);
RunTestForType rft(device, context, queue, num_elements, test_params);
int error = run_shuffle_relative_for_type<cl_int>(rft);