mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
New subgroups - full changes set (#1074)
* Extended subgroups - extended types types * Extended subgroups - non uniform vote tests * Extended subgroups - non uniform arithmetic tests * Extended subgroups - ballot tests * Extended subgroups - clustered reduce tests * Extended subgroups - shuffle tests * Extended subgroups - formating issues * Extended subgroups - review fixes * Extended subgroups - review fixes Fixed: removed additional brakes, kernel_sstr * Extended subgroups - fix macos build error * Extended subgroups - review fixes Fixed: mac os build error * Extended subgroups - data type verification example * Extended subgroups - error unification * Extended subgroups - fix header years * Extended subgroups - use is_half_nan * Extended subgroups - compare half as float * Review fixes mostly for ballot functions. - Modify kernels for better handling active/inactive workitems - Modify gen/chk functions for handling non uniform workgroup sizes - Introduce new variables naming convention - minor fixes * Extended subgroups - simplification data generation for ballot lsb/msb functions * Extended subgroups - minor fixes * Extended subgroups - move common code to function * Extended subgroups - formatting errors fix * Extended subgroups - fix build error * Extended subgroups - sub_group_elect more sophisticated Define mask which is 4bytes pattern where bit 1 means work item is active. If workitem in subgroup matches pattern then run sub_group_elect() * Extended subgroups - fix Ubuntu build error * Extended subgroups - voting function review fixes * adjust all function for using masks * remove calculate templates * merge code to one common template * check results only in active workitems * normalize values on host side * minor fixes * Extended subgroups - fix typos * Set of fixes and improvements after review * define WorkGroupParams to stop extended parameters list in function * better workitems mask handing (WorkGroupParams) * narrow values of data input generation to avoid overflows (arithmetic func) * implement work item masks for arithmetic functions * enable half type testing for reduction/scan/broadcast * minor fixes * Extended subgroups - fix Linux issues * Extended subgroups - fix sub_group_local_id data type * Extended subgroups - use vector instead of array. * Extended subgroups - change names to subgroup * Extended subgroups - uncomment code, fix build * Extended subgroups - build fix, use cl_half_from_float func * Extended subgroups - remove is_half_nan * Extended subgroups - do no use undef min/max * Extended subgroups - use parenthesis, fix formatting
This commit is contained in:
committed by
GitHub
parent
c5e4ca6c91
commit
71bef8563e
@@ -5,8 +5,16 @@ set(${MODULE_NAME}_SOURCES
|
||||
test_barrier.cpp
|
||||
test_queries.cpp
|
||||
test_workitem.cpp
|
||||
test_workgroup.cpp
|
||||
test_subgroup.cpp
|
||||
test_ifp.cpp
|
||||
test_subgroup_extended_types.cpp
|
||||
subgroup_common_kernels.cpp
|
||||
test_subgroup_non_uniform_vote.cpp
|
||||
test_subgroup_non_uniform_arithmetic.cpp
|
||||
test_subgroup_ballot.cpp
|
||||
test_subgroup_clustered_reduce.cpp
|
||||
test_subgroup_shuffle.cpp
|
||||
test_subgroup_shuffle_relative.cpp
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
|
||||
@@ -27,12 +27,19 @@ test_definition test_list[] = {
|
||||
ADD_TEST_VERSION(sub_group_info_core, Version(2, 1)),
|
||||
ADD_TEST_VERSION(work_item_functions_ext, Version(2, 0)),
|
||||
ADD_TEST_VERSION(work_item_functions_core, Version(2, 1)),
|
||||
ADD_TEST_VERSION(work_group_functions_ext, Version(2, 0)),
|
||||
ADD_TEST_VERSION(work_group_functions_core, Version(2, 1)),
|
||||
ADD_TEST_VERSION(subgroup_functions_ext, Version(2, 0)),
|
||||
ADD_TEST_VERSION(subgroup_functions_core, Version(2, 1)),
|
||||
ADD_TEST_VERSION(barrier_functions_ext, Version(2, 0)),
|
||||
ADD_TEST_VERSION(barrier_functions_core, Version(2, 1)),
|
||||
ADD_TEST_VERSION(ifp_ext, Version(2, 0)),
|
||||
ADD_TEST_VERSION(ifp_core, Version(2, 1))
|
||||
ADD_TEST_VERSION(ifp_core, Version(2, 1)),
|
||||
ADD_TEST(subgroup_functions_extended_types),
|
||||
ADD_TEST(subgroup_functions_non_uniform_vote),
|
||||
ADD_TEST(subgroup_functions_non_uniform_arithmetic),
|
||||
ADD_TEST(subgroup_functions_ballot),
|
||||
ADD_TEST(subgroup_functions_clustered_reduce),
|
||||
ADD_TEST(subgroup_functions_shuffle),
|
||||
ADD_TEST(subgroup_functions_shuffle_relative)
|
||||
};
|
||||
|
||||
const int test_num = ARRAY_SIZE(test_list);
|
||||
|
||||
@@ -37,14 +37,12 @@ extern int test_work_item_functions_core(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_work_group_functions_ext(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_work_group_functions_core(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_subgroup_functions_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_subgroup_functions_core(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_barrier_functions_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
extern int test_barrier_functions_core(cl_device_id device, cl_context context,
|
||||
@@ -56,5 +54,31 @@ extern int test_ifp_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
extern int test_ifp_core(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_subgroup_functions_extended_types(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_subgroup_functions_non_uniform_vote(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_subgroup_functions_non_uniform_arithmetic(
|
||||
cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_subgroup_functions_ballot(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_subgroup_functions_clustered_reduce(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_subgroup_functions_shuffle(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_subgroup_functions_shuffle_relative(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
#endif /*_procs_h*/
|
||||
|
||||
106
test_conformance/subgroups/subgroup_common_kernels.cpp
Normal file
106
test_conformance/subgroups/subgroup_common_kernels.cpp
Normal file
@@ -0,0 +1,106 @@
|
||||
//
|
||||
// Copyright (c) 2021 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "subgroup_common_kernels.h"
|
||||
|
||||
const char* bcast_source =
|
||||
"__kernel void test_bcast(const __global Type *in, "
|
||||
"__global int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" Type x = in[gid];\n"
|
||||
" uint which_sub_group_local_id = xy[gid].z;\n"
|
||||
" out[gid] = sub_group_broadcast(x, which_sub_group_local_id);\n"
|
||||
|
||||
"}\n";
|
||||
|
||||
const char* redadd_source = "__kernel void test_redadd(const __global Type "
|
||||
"*in, __global int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_reduce_add(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char* redmax_source = "__kernel void test_redmax(const __global Type "
|
||||
"*in, __global int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_reduce_max(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char* redmin_source = "__kernel void test_redmin(const __global Type "
|
||||
"*in, __global int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_reduce_min(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char* scinadd_source =
|
||||
"__kernel void test_scinadd(const __global Type *in, __global int4 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_inclusive_add(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char* scinmax_source =
|
||||
"__kernel void test_scinmax(const __global Type *in, __global int4 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_inclusive_max(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char* scinmin_source =
|
||||
"__kernel void test_scinmin(const __global Type *in, __global int4 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_inclusive_min(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char* scexadd_source =
|
||||
"__kernel void test_scexadd(const __global Type *in, __global int4 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_exclusive_add(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char* scexmax_source =
|
||||
"__kernel void test_scexmax(const __global Type *in, __global int4 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_exclusive_max(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char* scexmin_source =
|
||||
"__kernel void test_scexmin(const __global Type *in, __global int4 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_exclusive_min(in[gid]);\n"
|
||||
"}\n";
|
||||
32
test_conformance/subgroups/subgroup_common_kernels.h
Normal file
32
test_conformance/subgroups/subgroup_common_kernels.h
Normal file
@@ -0,0 +1,32 @@
|
||||
//
|
||||
// Copyright (c) 2021 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef SUBGROUPKERNELSOURCES_H
|
||||
#define SUBGROUPKERNELSOURCES_H
|
||||
#include "subhelpers.h"
|
||||
|
||||
|
||||
extern const char* bcast_source;
|
||||
extern const char* redadd_source;
|
||||
extern const char* redmax_source;
|
||||
extern const char* redmin_source;
|
||||
extern const char* scinadd_source;
|
||||
extern const char* scinmax_source;
|
||||
extern const char* scinmin_source;
|
||||
extern const char* scexadd_source;
|
||||
extern const char* scexmax_source;
|
||||
extern const char* scexmin_source;
|
||||
|
||||
#endif
|
||||
911
test_conformance/subgroups/subgroup_common_templates.h
Normal file
911
test_conformance/subgroups/subgroup_common_templates.h
Normal file
@@ -0,0 +1,911 @@
|
||||
//
|
||||
// Copyright (c) 2020 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef SUBGROUPCOMMONTEMPLATES_H
|
||||
#define SUBGROUPCOMMONTEMPLATES_H
|
||||
|
||||
#include "typeWrappers.h"
|
||||
#include <bitset>
|
||||
#include "CL/cl_half.h"
|
||||
#include "subhelpers.h"
|
||||
|
||||
#include <set>
|
||||
|
||||
typedef std::bitset<128> bs128;
|
||||
static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
|
||||
const std::string &mask_type,
|
||||
cl_uint max_sub_group_size)
|
||||
{
|
||||
bs128 mask128;
|
||||
cl_uint4 mask;
|
||||
cl_uint pos = subgroup_local_id;
|
||||
if (mask_type == "eq") mask128.set(pos);
|
||||
if (mask_type == "le" || mask_type == "lt")
|
||||
{
|
||||
for (cl_uint i = 0; i <= pos; i++) mask128.set(i);
|
||||
if (mask_type == "lt") mask128.reset(pos);
|
||||
}
|
||||
if (mask_type == "ge" || mask_type == "gt")
|
||||
{
|
||||
for (cl_uint i = pos; i < max_sub_group_size; i++) mask128.set(i);
|
||||
if (mask_type == "gt") mask128.reset(pos);
|
||||
}
|
||||
|
||||
// convert std::bitset<128> to uint4
|
||||
auto const uint_mask = bs128{ static_cast<unsigned long>(-1) };
|
||||
mask.s0 = (mask128 & uint_mask).to_ulong();
|
||||
mask128 >>= 32;
|
||||
mask.s1 = (mask128 & uint_mask).to_ulong();
|
||||
mask128 >>= 32;
|
||||
mask.s2 = (mask128 & uint_mask).to_ulong();
|
||||
mask128 >>= 32;
|
||||
mask.s3 = (mask128 & uint_mask).to_ulong();
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
// DESCRIPTION :
|
||||
// sub_group_broadcast - each work_item registers it's own value.
|
||||
// All work_items in subgroup takes one value from only one (any) work_item
|
||||
// sub_group_broadcast_first - same as type 0. All work_items in
|
||||
// subgroup takes only one value from only one chosen (the smallest subgroup ID)
|
||||
// work_item
|
||||
// sub_group_non_uniform_broadcast - same as type 0 but
|
||||
// only 4 work_items from subgroup enter the code (are active)
|
||||
template <typename Ty, SubgroupsBroadcastOp operation> struct BC
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
int d = ns > 100 ? 100 : ns;
|
||||
int non_uniform_size = ng % nw;
|
||||
ng = ng / nw;
|
||||
int last_subgroup_size = 0;
|
||||
ii = 0;
|
||||
|
||||
log_info(" sub_group_%s(%s)...\n", operation_names(operation),
|
||||
TypeManager<Ty>::name());
|
||||
if (non_uniform_size)
|
||||
{
|
||||
log_info(" non uniform work group size mode ON\n");
|
||||
ng++;
|
||||
}
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
if (non_uniform_size && k == ng - 1)
|
||||
{
|
||||
set_last_workgroup_params(non_uniform_size, nj, ns, nw,
|
||||
last_subgroup_size);
|
||||
}
|
||||
for (j = 0; j < nj; ++j)
|
||||
{ // for each subgroup
|
||||
ii = j * ns;
|
||||
if (last_subgroup_size && j == nj - 1)
|
||||
{
|
||||
n = last_subgroup_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
}
|
||||
int bcast_if = 0;
|
||||
int bcast_elseif = 0;
|
||||
int bcast_index = (int)(genrand_int32(gMTdata) & 0x7fffffff)
|
||||
% (d > n ? n : d);
|
||||
// l - calculate subgroup local id from which value will be
|
||||
// broadcasted (one the same value for whole subgroup)
|
||||
if (operation != SubgroupsBroadcastOp::broadcast)
|
||||
{
|
||||
// reduce brodcasting index in case of non_uniform and
|
||||
// last workgroup last subgroup
|
||||
if (last_subgroup_size && j == nj - 1
|
||||
&& last_subgroup_size < NR_OF_ACTIVE_WORK_ITEMS)
|
||||
{
|
||||
bcast_if = bcast_index % last_subgroup_size;
|
||||
bcast_elseif = bcast_if;
|
||||
}
|
||||
else
|
||||
{
|
||||
bcast_if = bcast_index % NR_OF_ACTIVE_WORK_ITEMS;
|
||||
bcast_elseif = NR_OF_ACTIVE_WORK_ITEMS
|
||||
+ bcast_index % (n - NR_OF_ACTIVE_WORK_ITEMS);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
if (operation == SubgroupsBroadcastOp::broadcast)
|
||||
{
|
||||
int midx = 4 * ii + 4 * i + 2;
|
||||
m[midx] = (cl_int)bcast_index;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (i < NR_OF_ACTIVE_WORK_ITEMS)
|
||||
{
|
||||
// index of the third
|
||||
// element int the vector.
|
||||
int midx = 4 * ii + 4 * i + 2;
|
||||
// storing information about
|
||||
// broadcasting index -
|
||||
// earlier calculated
|
||||
m[midx] = (cl_int)bcast_if;
|
||||
}
|
||||
else
|
||||
{ // index of the third
|
||||
// element int the vector.
|
||||
int midx = 4 * ii + 4 * i + 3;
|
||||
m[midx] = (cl_int)bcast_elseif;
|
||||
}
|
||||
}
|
||||
|
||||
// calculate value for broadcasting
|
||||
cl_ulong number = genrand_int64(gMTdata);
|
||||
set_value(t[ii + i], number);
|
||||
}
|
||||
}
|
||||
// Now map into work group using map from device
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // for each element in work_group
|
||||
// calculate index as number of subgroup
|
||||
// plus subgroup local id
|
||||
x[j] = t[j];
|
||||
}
|
||||
x += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, l, n;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
Ty tr, rr;
|
||||
int non_uniform_size = ng % nw;
|
||||
ng = ng / nw;
|
||||
int last_subgroup_size = 0;
|
||||
if (non_uniform_size) ng++;
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
if (non_uniform_size && k == ng - 1)
|
||||
{
|
||||
set_last_workgroup_params(non_uniform_size, nj, ns, nw,
|
||||
last_subgroup_size);
|
||||
}
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // inside the work_group
|
||||
mx[j] = x[j]; // read host inputs for work_group
|
||||
my[j] = y[j]; // read device outputs for work_group
|
||||
}
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{ // for each subgroup
|
||||
ii = j * ns;
|
||||
if (last_subgroup_size && j == nj - 1)
|
||||
{
|
||||
n = last_subgroup_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
}
|
||||
|
||||
// Check result
|
||||
if (operation == SubgroupsBroadcastOp::broadcast_first)
|
||||
{
|
||||
int lowest_active_id = -1;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
|
||||
lowest_active_id = i < NR_OF_ACTIVE_WORK_ITEMS
|
||||
? 0
|
||||
: NR_OF_ACTIVE_WORK_ITEMS;
|
||||
// findout if broadcasted
|
||||
// value is the same
|
||||
tr = mx[ii + lowest_active_id];
|
||||
// findout if broadcasted to all
|
||||
rr = my[ii + i];
|
||||
|
||||
if (!compare(rr, tr))
|
||||
{
|
||||
log_error(
|
||||
"ERROR: sub_group_broadcast_first(%s) "
|
||||
"mismatch "
|
||||
"for local id %d in sub group %d in group "
|
||||
"%d\n",
|
||||
TypeManager<Ty>::name(), i, j, k);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
if (operation == SubgroupsBroadcastOp::broadcast)
|
||||
{
|
||||
int midx = 4 * ii + 4 * i + 2;
|
||||
l = (int)m[midx];
|
||||
tr = mx[ii + l];
|
||||
}
|
||||
else
|
||||
{
|
||||
if (i < NR_OF_ACTIVE_WORK_ITEMS)
|
||||
{ // take index of array where info
|
||||
// which work_item will be
|
||||
// broadcast its value is stored
|
||||
int midx = 4 * ii + 4 * i + 2;
|
||||
// take subgroup local id of
|
||||
// this work_item
|
||||
l = (int)m[midx];
|
||||
// take value generated on host
|
||||
// for this work_item
|
||||
tr = mx[ii + l];
|
||||
}
|
||||
else
|
||||
{
|
||||
int midx = 4 * ii + 4 * i + 3;
|
||||
l = (int)m[midx];
|
||||
tr = mx[ii + l];
|
||||
}
|
||||
}
|
||||
rr = my[ii + i]; // read device outputs for
|
||||
// work_item in the subgroup
|
||||
|
||||
if (!compare(rr, tr))
|
||||
{
|
||||
log_error("ERROR: sub_group_%s(%s) "
|
||||
"mismatch for local id %d in sub "
|
||||
"group %d in group %d - got %lu "
|
||||
"expected %lu\n",
|
||||
operation_names(operation),
|
||||
TypeManager<Ty>::name(), i, j, k, rr, tr);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
log_info(" sub_group_%s(%s)... passed\n", operation_names(operation),
|
||||
TypeManager<Ty>::name());
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
|
||||
static float to_float(subgroups::cl_half x) { return cl_half_to_float(x.data); }
|
||||
|
||||
static subgroups::cl_half to_half(float x)
|
||||
{
|
||||
subgroups::cl_half value;
|
||||
value.data = cl_half_from_float(x, CL_HALF_RTE);
|
||||
return value;
|
||||
}
|
||||
|
||||
// for integer types
|
||||
template <typename Ty> inline Ty calculate(Ty a, Ty b, ArithmeticOp operation)
|
||||
{
|
||||
switch (operation)
|
||||
{
|
||||
case ArithmeticOp::add_: return a + b;
|
||||
case ArithmeticOp::max_: return a > b ? a : b;
|
||||
case ArithmeticOp::min_: return a < b ? a : b;
|
||||
case ArithmeticOp::mul_: return a * b;
|
||||
case ArithmeticOp::and_: return a & b;
|
||||
case ArithmeticOp::or_: return a | b;
|
||||
case ArithmeticOp::xor_: return a ^ b;
|
||||
case ArithmeticOp::logical_and: return a && b;
|
||||
case ArithmeticOp::logical_or: return a || b;
|
||||
case ArithmeticOp::logical_xor: return !a ^ !b;
|
||||
default: log_error("Unknown operation request"); break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
// Specialize for floating points.
|
||||
template <>
|
||||
inline cl_double calculate(cl_double a, cl_double b, ArithmeticOp operation)
|
||||
{
|
||||
switch (operation)
|
||||
{
|
||||
case ArithmeticOp::add_: {
|
||||
return a + b;
|
||||
}
|
||||
case ArithmeticOp::max_: {
|
||||
return a > b ? a : b;
|
||||
}
|
||||
case ArithmeticOp::min_: {
|
||||
return a < b ? a : b;
|
||||
}
|
||||
case ArithmeticOp::mul_: {
|
||||
return a * b;
|
||||
}
|
||||
default: log_error("Unknown operation request"); break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline cl_float calculate(cl_float a, cl_float b, ArithmeticOp operation)
|
||||
{
|
||||
switch (operation)
|
||||
{
|
||||
case ArithmeticOp::add_: {
|
||||
return a + b;
|
||||
}
|
||||
case ArithmeticOp::max_: {
|
||||
return a > b ? a : b;
|
||||
}
|
||||
case ArithmeticOp::min_: {
|
||||
return a < b ? a : b;
|
||||
}
|
||||
case ArithmeticOp::mul_: {
|
||||
return a * b;
|
||||
}
|
||||
default: log_error("Unknown operation request"); break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline subgroups::cl_half calculate(subgroups::cl_half a, subgroups::cl_half b,
|
||||
ArithmeticOp operation)
|
||||
{
|
||||
switch (operation)
|
||||
{
|
||||
case ArithmeticOp::add_: return to_half(to_float(a) + to_float(b));
|
||||
case ArithmeticOp::max_:
|
||||
return to_float(a) > to_float(b) || is_half_nan(b.data) ? a : b;
|
||||
case ArithmeticOp::min_:
|
||||
return to_float(a) < to_float(b) || is_half_nan(b.data) ? a : b;
|
||||
case ArithmeticOp::mul_: return to_half(to_float(a) * to_float(b));
|
||||
default: log_error("Unknown operation request"); break;
|
||||
}
|
||||
return to_half(0);
|
||||
}
|
||||
|
||||
template <typename Ty> bool is_floating_point()
|
||||
{
|
||||
return std::is_floating_point<Ty>::value
|
||||
|| std::is_same<Ty, subgroups::cl_half>::value;
|
||||
}
|
||||
|
||||
template <typename Ty, ArithmeticOp operation>
|
||||
void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
|
||||
for (int k = 0; k < ng; ++k)
|
||||
{
|
||||
for (int j = 0; j < nj; ++j)
|
||||
{
|
||||
int ii = j * ns;
|
||||
int n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
cl_ulong out_value;
|
||||
double y;
|
||||
if (operation == ArithmeticOp::mul_
|
||||
|| operation == ArithmeticOp::add_)
|
||||
{
|
||||
// work around to avoid overflow, do not use 0 for
|
||||
// multiplication
|
||||
out_value = (genrand_int32(gMTdata) % 4) + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
out_value = genrand_int64(gMTdata) % (32 * n);
|
||||
if ((operation == ArithmeticOp::logical_and
|
||||
|| operation == ArithmeticOp::logical_or
|
||||
|| operation == ArithmeticOp::logical_xor)
|
||||
&& ((out_value >> 32) & 1) == 0)
|
||||
out_value = 0; // increase probability of false
|
||||
}
|
||||
set_value(t[ii + i], out_value);
|
||||
}
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (int j = 0; j < nw; ++j)
|
||||
{
|
||||
x[j] = t[j];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ty, ShuffleOp operation> struct SHF
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
||||
{
|
||||
int i, ii, j, k, l, n, delta;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
int d = ns > 100 ? 100 : ns;
|
||||
ii = 0;
|
||||
ng = ng / nw;
|
||||
log_info(" sub_group_%s(%s)...\n", operation_names(operation),
|
||||
TypeManager<Ty>::name());
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
for (j = 0; j < nj; ++j)
|
||||
{ // for each subgroup
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
int midx = 4 * ii + 4 * i + 2;
|
||||
l = (int)(genrand_int32(gMTdata) & 0x7fffffff)
|
||||
% (d > n ? n : d);
|
||||
switch (operation)
|
||||
{
|
||||
case ShuffleOp::shuffle:
|
||||
case ShuffleOp::shuffle_xor:
|
||||
// storing information about shuffle index
|
||||
m[midx] = (cl_int)l;
|
||||
break;
|
||||
case ShuffleOp::shuffle_up:
|
||||
delta = l; // calculate delta for shuffle up
|
||||
if (i - delta < 0)
|
||||
{
|
||||
delta = i;
|
||||
}
|
||||
m[midx] = (cl_int)delta;
|
||||
break;
|
||||
case ShuffleOp::shuffle_down:
|
||||
delta = l; // calculate delta for shuffle down
|
||||
if (i + delta >= n)
|
||||
{
|
||||
delta = n - 1 - i;
|
||||
}
|
||||
m[midx] = (cl_int)delta;
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
cl_ulong number = genrand_int64(gMTdata);
|
||||
set_value(t[ii + i], number);
|
||||
}
|
||||
}
|
||||
// Now map into work group using map from device
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // for each element in work_group
|
||||
x[j] = t[j];
|
||||
}
|
||||
x += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, l, n;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
Ty tr, rr;
|
||||
ng = ng / nw;
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // inside the work_group
|
||||
mx[j] = x[j]; // read host inputs for work_group
|
||||
my[j] = y[j]; // read device outputs for work_group
|
||||
}
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{ // for each subgroup
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
{ // inside the subgroup
|
||||
// shuffle index storage
|
||||
int midx = 4 * ii + 4 * i + 2;
|
||||
l = (int)m[midx];
|
||||
rr = my[ii + i];
|
||||
switch (operation)
|
||||
{
|
||||
// shuffle basic - treat l as index
|
||||
case ShuffleOp::shuffle: tr = mx[ii + l]; break;
|
||||
// shuffle up - treat l as delta
|
||||
case ShuffleOp::shuffle_up: tr = mx[ii + i - l]; break;
|
||||
// shuffle up - treat l as delta
|
||||
case ShuffleOp::shuffle_down:
|
||||
tr = mx[ii + i + l];
|
||||
break;
|
||||
// shuffle xor - treat l as mask
|
||||
case ShuffleOp::shuffle_xor:
|
||||
tr = mx[ii + (i ^ l)];
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
if (!compare(rr, tr))
|
||||
{
|
||||
log_error("ERROR: sub_group_%s(%s) mismatch for "
|
||||
"local id %d in sub group %d in group %d\n",
|
||||
operation_names(operation),
|
||||
TypeManager<Ty>::name(), i, j, k);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
log_info(" sub_group_%s(%s)... passed\n", operation_names(operation),
|
||||
TypeManager<Ty>::name());
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Ty, ArithmeticOp operation> struct SCEX_NU
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
||||
{
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
ng = ng / nw;
|
||||
std::string func_name;
|
||||
work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive"
|
||||
: func_name = "sub_group_scan_exclusive";
|
||||
log_info(" %s_%s(%s)...\n", func_name.c_str(),
|
||||
operation_names(operation), TypeManager<Ty>::name());
|
||||
log_info(" test params: global size = %d local size = %d subgroups "
|
||||
"size = %d work item mask = 0x%x \n",
|
||||
test_params.global_workgroup_size, nw, ns, work_items_mask);
|
||||
genrand<Ty, operation>(x, t, m, ns, nw, ng);
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
Ty tr, rr;
|
||||
ng = ng / nw;
|
||||
|
||||
std::string func_name;
|
||||
work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive"
|
||||
: func_name = "sub_group_scan_exclusive";
|
||||
|
||||
uint32_t use_work_items_mask;
|
||||
// for uniform case take into consideration all workitems
|
||||
use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask;
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // inside the work_group
|
||||
mx[j] = x[j]; // read host inputs for work_group
|
||||
my[j] = y[j]; // read device outputs for work_group
|
||||
}
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
std::set<int> active_work_items;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
uint32_t check_work_item = 1 << (i % 32);
|
||||
if (use_work_items_mask & check_work_item)
|
||||
{
|
||||
active_work_items.insert(i);
|
||||
}
|
||||
}
|
||||
if (active_work_items.empty())
|
||||
{
|
||||
log_info(" No acitve workitems in workgroup id = %d "
|
||||
"subgroup id = %d - no calculation\n",
|
||||
k, j);
|
||||
continue;
|
||||
}
|
||||
else if (active_work_items.size() == 1)
|
||||
{
|
||||
log_info(" One active workitem in workgroup id = %d "
|
||||
"subgroup id = %d - no calculation\n",
|
||||
k, j);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
tr = TypeManager<Ty>::identify_limits(operation);
|
||||
int idx = 0;
|
||||
for (const int &active_work_item : active_work_items)
|
||||
{
|
||||
rr = my[ii + active_work_item];
|
||||
if (idx == 0) continue;
|
||||
|
||||
if (!compare_ordered(rr, tr))
|
||||
{
|
||||
log_error(
|
||||
"ERROR: %s_%s(%s) "
|
||||
"mismatch for local id %d in sub group %d in "
|
||||
"group %d Expected: %d Obtained: %d\n",
|
||||
func_name.c_str(), operation_names(operation),
|
||||
TypeManager<Ty>::name(), i, j, k, tr, rr);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
tr = calculate<Ty>(tr, mx[ii + active_work_item],
|
||||
operation);
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
|
||||
log_info(" %s_%s(%s)... passed\n", func_name.c_str(),
|
||||
operation_names(operation), TypeManager<Ty>::name());
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
|
||||
// Test for scan inclusive non uniform functions
|
||||
template <typename Ty, ArithmeticOp operation> struct SCIN_NU
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
||||
{
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
ng = ng / nw;
|
||||
std::string func_name;
|
||||
work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive"
|
||||
: func_name = "sub_group_scan_inclusive";
|
||||
|
||||
genrand<Ty, operation>(x, t, m, ns, nw, ng);
|
||||
log_info(" %s_%s(%s)...\n", func_name.c_str(),
|
||||
operation_names(operation), TypeManager<Ty>::name());
|
||||
log_info(" test params: global size = %d local size = %d subgroups "
|
||||
"size = %d work item mask = 0x%x \n",
|
||||
test_params.global_workgroup_size, nw, ns, work_items_mask);
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
Ty tr, rr;
|
||||
ng = ng / nw;
|
||||
|
||||
std::string func_name;
|
||||
work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive"
|
||||
: func_name = "sub_group_scan_inclusive";
|
||||
|
||||
uint32_t use_work_items_mask;
|
||||
// for uniform case take into consideration all workitems
|
||||
use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask;
|
||||
// std::bitset<32> mask32(use_work_items_mask);
|
||||
// for (int k) mask32.count();
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // inside the work_group
|
||||
mx[j] = x[j]; // read host inputs for work_group
|
||||
my[j] = y[j]; // read device outputs for work_group
|
||||
}
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
std::set<int> active_work_items;
|
||||
int catch_frist_active = -1;
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
uint32_t check_work_item = 1 << (i % 32);
|
||||
if (use_work_items_mask & check_work_item)
|
||||
{
|
||||
if (catch_frist_active == -1)
|
||||
{
|
||||
catch_frist_active = i;
|
||||
}
|
||||
active_work_items.insert(i);
|
||||
}
|
||||
}
|
||||
if (active_work_items.empty())
|
||||
{
|
||||
log_info(" No acitve workitems in workgroup id = %d "
|
||||
"subgroup id = %d - no calculation\n",
|
||||
k, j);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
tr = TypeManager<Ty>::identify_limits(operation);
|
||||
for (const int &active_work_item : active_work_items)
|
||||
{
|
||||
rr = my[ii + active_work_item];
|
||||
if (active_work_items.size() == 1)
|
||||
{
|
||||
tr = mx[ii + catch_frist_active];
|
||||
}
|
||||
else
|
||||
{
|
||||
tr = calculate<Ty>(tr, mx[ii + active_work_item],
|
||||
operation);
|
||||
}
|
||||
if (!compare_ordered<Ty>(rr, tr))
|
||||
{
|
||||
log_error(
|
||||
"ERROR: %s_%s(%s) "
|
||||
"mismatch for local id %d in sub group %d "
|
||||
"in "
|
||||
"group %d Expected: %d Obtained: %d\n",
|
||||
func_name.c_str(), operation_names(operation),
|
||||
TypeManager<Ty>::name(), active_work_item, j, k,
|
||||
tr, rr);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
|
||||
log_info(" %s_%s(%s)... passed\n", func_name.c_str(),
|
||||
operation_names(operation), TypeManager<Ty>::name());
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
|
||||
// Test for reduce non uniform functions
|
||||
template <typename Ty, ArithmeticOp operation> struct RED_NU
|
||||
{
|
||||
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
||||
{
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
ng = ng / nw;
|
||||
std::string func_name;
|
||||
|
||||
work_items_mask ? func_name = "sub_group_non_uniform_reduce"
|
||||
: func_name = "sub_group_reduce";
|
||||
log_info(" %s_%s(%s)...\n", func_name.c_str(),
|
||||
operation_names(operation), TypeManager<Ty>::name());
|
||||
log_info(" test params: global size = %d local size = %d subgroups "
|
||||
"size = %d work item mask = 0x%x \n",
|
||||
test_params.global_workgroup_size, nw, ns, work_items_mask);
|
||||
genrand<Ty, operation>(x, t, m, ns, nw, ng);
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
ng = ng / nw;
|
||||
Ty tr, rr;
|
||||
|
||||
std::string func_name;
|
||||
work_items_mask ? func_name = "sub_group_non_uniform_reduce"
|
||||
: func_name = "sub_group_reduce";
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
// Map to array indexed to array indexed by local ID and sub
|
||||
// group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
mx[j] = x[j];
|
||||
my[j] = y[j];
|
||||
}
|
||||
|
||||
uint32_t use_work_items_mask;
|
||||
use_work_items_mask =
|
||||
!work_items_mask ? 0xFFFFFFFF : work_items_mask;
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
std::set<int> active_work_items;
|
||||
int catch_frist_active = -1;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
uint32_t check_work_item = 1 << (i % 32);
|
||||
if (use_work_items_mask & check_work_item)
|
||||
{
|
||||
if (catch_frist_active == -1)
|
||||
{
|
||||
catch_frist_active = i;
|
||||
tr = mx[ii + i];
|
||||
active_work_items.insert(i);
|
||||
continue;
|
||||
}
|
||||
active_work_items.insert(i);
|
||||
tr = calculate<Ty>(tr, mx[ii + i], operation);
|
||||
}
|
||||
}
|
||||
|
||||
if (active_work_items.empty())
|
||||
{
|
||||
log_info(" No acitve workitems in workgroup id = %d "
|
||||
"subgroup id = %d - no calculation\n",
|
||||
k, j);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const int &active_work_item : active_work_items)
|
||||
{
|
||||
rr = my[ii + active_work_item];
|
||||
if (!compare_ordered<Ty>(rr, tr))
|
||||
{
|
||||
log_error("ERROR: %s_%s(%s) "
|
||||
"mismatch for local id %d in sub group %d in "
|
||||
"group %d Expected: %d Obtained: %d\n",
|
||||
func_name.c_str(), operation_names(operation),
|
||||
TypeManager<Ty>::name(), active_work_item, j,
|
||||
k, tr, rr);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
|
||||
log_info(" %s_%s(%s)... passed\n", func_name.c_str(),
|
||||
operation_names(operation), TypeManager<Ty>::name());
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -59,10 +59,15 @@ static const char *gbar_source =
|
||||
// barrier test functions
|
||||
template <int Which> struct BAR
|
||||
{
|
||||
static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng)
|
||||
static void gen(cl_int *x, cl_int *t, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
ng = ng / nw;
|
||||
int e;
|
||||
|
||||
ii = 0;
|
||||
@@ -79,8 +84,7 @@ template <int Which> struct BAR
|
||||
// Now map into work group using map from device
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
i = m[2 * j + 1] * ns + m[2 * j];
|
||||
x[j] = t[i];
|
||||
x[j] = t[j];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
@@ -89,10 +93,14 @@ template <int Which> struct BAR
|
||||
}
|
||||
|
||||
static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
|
||||
int ns, int nw, int ng)
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
ng = ng / nw;
|
||||
cl_int tr, rr;
|
||||
|
||||
if (Which == 0)
|
||||
@@ -105,9 +113,8 @@ template <int Which> struct BAR
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
i = m[2 * j + 1] * ns + m[2 * j];
|
||||
mx[i] = x[j];
|
||||
my[i] = y[j];
|
||||
mx[j] = x[j];
|
||||
my[j] = y[j];
|
||||
}
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
@@ -123,8 +130,9 @@ template <int Which> struct BAR
|
||||
if (tr != rr)
|
||||
{
|
||||
log_error("ERROR: sub_group_barrier mismatch for local "
|
||||
"id %d in sub group %d in group %d\n",
|
||||
i, j, k);
|
||||
"id %d in sub group %d in group %d expected "
|
||||
"%d got %d\n",
|
||||
i, j, k, tr, rr);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -144,18 +152,18 @@ int test_barrier_functions(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements,
|
||||
bool useCoreSubgroups)
|
||||
{
|
||||
int error;
|
||||
int error = TEST_PASS;
|
||||
|
||||
// Adjust these individually below if desired/needed
|
||||
#define G 2000
|
||||
#define L 200
|
||||
|
||||
error = test<cl_int, BAR<0>, G, L>::run(device, context, queue,
|
||||
num_elements, "test_lbar",
|
||||
lbar_source, 0, useCoreSubgroups);
|
||||
error = test<cl_int, BAR<1>, G, L, G>::run(
|
||||
device, context, queue, num_elements, "test_gbar", gbar_source, 0,
|
||||
useCoreSubgroups);
|
||||
constexpr size_t global_work_size = 2000;
|
||||
constexpr size_t local_work_size = 200;
|
||||
WorkGroupParams test_params(global_work_size, local_work_size);
|
||||
test_params.use_core_subgroups = useCoreSubgroups;
|
||||
error = test<cl_int, BAR<0>>::run(device, context, queue, num_elements,
|
||||
"test_lbar", lbar_source, test_params);
|
||||
error |= test<cl_int, BAR<1>, global_work_size>::run(
|
||||
device, context, queue, num_elements, "test_gbar", gbar_source,
|
||||
test_params);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ static const char *ifp_source =
|
||||
"#define INST_COUNT 0x3\n"
|
||||
"\n"
|
||||
"__kernel void\n"
|
||||
"test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n"
|
||||
"test_ifp(const __global int *in, __global int4 *xy, __global int *out)\n"
|
||||
"{\n"
|
||||
" __local atomic_int loc[NUM_LOC];\n"
|
||||
"\n"
|
||||
@@ -225,10 +225,15 @@ void run_insts(cl_int *x, cl_int *p, int n)
|
||||
|
||||
struct IFP
|
||||
{
|
||||
static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng)
|
||||
static void gen(cl_int *x, cl_int *t, cl_int *,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int k;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
ng = ng / nw;
|
||||
|
||||
// We need at least 2 sub groups per group for this test
|
||||
if (nj == 1) return;
|
||||
@@ -240,11 +245,15 @@ struct IFP
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns,
|
||||
int nw, int ng)
|
||||
static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int i, k;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
ng = ng / nw;
|
||||
|
||||
// We need at least 2 sub groups per group for this tes
|
||||
if (nj == 1) return 0;
|
||||
@@ -275,14 +284,17 @@ struct IFP
|
||||
int test_ifp(cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
int num_elements, bool useCoreSubgroups)
|
||||
{
|
||||
int error;
|
||||
int error = TEST_PASS;
|
||||
|
||||
// Global/local work group sizes
|
||||
// Adjust these individually below if desired/needed
|
||||
#define G 2000
|
||||
#define L 200
|
||||
error = test<cl_int, IFP, G, L>::run(device, context, queue, num_elements,
|
||||
"test_ifp", ifp_source, NUM_LOC + 1,
|
||||
useCoreSubgroups);
|
||||
constexpr size_t global_work_size = 2000;
|
||||
constexpr size_t local_work_size = 200;
|
||||
WorkGroupParams test_params(global_work_size, local_work_size);
|
||||
test_params.use_core_subgroups = useCoreSubgroups;
|
||||
test_params.dynsc = NUM_LOC + 1;
|
||||
error = test<cl_int, IFP>::run(device, context, queue, num_elements,
|
||||
"test_ifp", ifp_source, test_params);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
217
test_conformance/subgroups/test_subgroup.cpp
Normal file
217
test_conformance/subgroups/test_subgroup.cpp
Normal file
@@ -0,0 +1,217 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "subhelpers.h"
|
||||
#include "subgroup_common_kernels.h"
|
||||
#include "subgroup_common_templates.h"
|
||||
#include "harness/conversions.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
|
||||
namespace {
|
||||
// Any/All test functions
|
||||
template <NonUniformVoteOp operation> struct AA
|
||||
{
|
||||
static void gen(cl_int *x, cl_int *t, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
int e;
|
||||
ng = ng / nw;
|
||||
ii = 0;
|
||||
log_info(" sub_group_%s...\n", operation_names(operation));
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
e = (int)(genrand_int32(gMTdata) % 3);
|
||||
|
||||
// Initialize data matrix indexed by local id and sub group id
|
||||
switch (e)
|
||||
{
|
||||
case 0: memset(&t[ii], 0, n * sizeof(cl_int)); break;
|
||||
case 1:
|
||||
memset(&t[ii], 0, n * sizeof(cl_int));
|
||||
i = (int)(genrand_int32(gMTdata) % (cl_uint)n);
|
||||
t[ii + i] = 41;
|
||||
break;
|
||||
case 2: memset(&t[ii], 0xff, n * sizeof(cl_int)); break;
|
||||
}
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
x[j] = t[j];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
cl_int taa, raa;
|
||||
ng = ng / nw;
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
mx[j] = x[j];
|
||||
my[j] = y[j];
|
||||
}
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
// Compute target
|
||||
if (operation == NonUniformVoteOp::any)
|
||||
{
|
||||
taa = 0;
|
||||
for (i = 0; i < n; ++i) taa |= mx[ii + i] != 0;
|
||||
}
|
||||
|
||||
if (operation == NonUniformVoteOp::all)
|
||||
{
|
||||
taa = 1;
|
||||
for (i = 0; i < n; ++i) taa &= mx[ii + i] != 0;
|
||||
}
|
||||
|
||||
// Check result
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
raa = my[ii + i] != 0;
|
||||
if (raa != taa)
|
||||
{
|
||||
log_error("ERROR: sub_group_%s mismatch for local id "
|
||||
"%d in sub group %d in group %d\n",
|
||||
operation_names(operation), i, j, k);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
log_info(" sub_group_%s... passed\n", operation_names(operation));
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
|
||||
static const char *any_source = "__kernel void test_any(const __global Type "
|
||||
"*in, __global int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_any(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *all_source = "__kernel void test_all(const __global Type "
|
||||
"*in, __global int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_all(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
template <typename T>
|
||||
int run_broadcast_scan_reduction_for_type(RunTestForType rft)
|
||||
{
|
||||
int error = rft.run_impl<T, BC<T, SubgroupsBroadcastOp::broadcast>>(
|
||||
"test_bcast", bcast_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("test_redadd",
|
||||
redadd_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("test_redmax",
|
||||
redmax_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("test_redmin",
|
||||
redmin_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>("test_scinadd",
|
||||
scinadd_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>("test_scinmax",
|
||||
scinmax_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>("test_scinmin",
|
||||
scinmin_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>("test_scexadd",
|
||||
scexadd_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>("test_scexmax",
|
||||
scexmax_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>("test_scexmin",
|
||||
scexmin_source);
|
||||
return error;
|
||||
}
|
||||
|
||||
}
|
||||
// Entry point from main
|
||||
int test_subgroup_functions(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements,
|
||||
bool useCoreSubgroups)
|
||||
{
|
||||
constexpr size_t global_work_size = 2000;
|
||||
constexpr size_t local_work_size = 200;
|
||||
WorkGroupParams test_params(global_work_size, local_work_size);
|
||||
RunTestForType rft(device, context, queue, num_elements, test_params);
|
||||
int error =
|
||||
rft.run_impl<cl_int, AA<NonUniformVoteOp::any>>("test_any", any_source);
|
||||
error |=
|
||||
rft.run_impl<cl_int, AA<NonUniformVoteOp::all>>("test_all", all_source);
|
||||
error |= run_broadcast_scan_reduction_for_type<cl_int>(rft);
|
||||
error |= run_broadcast_scan_reduction_for_type<cl_uint>(rft);
|
||||
error |= run_broadcast_scan_reduction_for_type<cl_long>(rft);
|
||||
error |= run_broadcast_scan_reduction_for_type<cl_ulong>(rft);
|
||||
error |= run_broadcast_scan_reduction_for_type<cl_float>(rft);
|
||||
error |= run_broadcast_scan_reduction_for_type<cl_double>(rft);
|
||||
error |= run_broadcast_scan_reduction_for_type<subgroups::cl_half>(rft);
|
||||
return error;
|
||||
}
|
||||
|
||||
int test_subgroup_functions_core(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_subgroup_functions(device, context, queue, num_elements, true);
|
||||
}
|
||||
|
||||
int test_subgroup_functions_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
|
||||
|
||||
if (!hasExtension)
|
||||
{
|
||||
log_info(
|
||||
"Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
return test_subgroup_functions(device, context, queue, num_elements, false);
|
||||
}
|
||||
1089
test_conformance/subgroups/test_subgroup_ballot.cpp
Normal file
1089
test_conformance/subgroups/test_subgroup_ballot.cpp
Normal file
File diff suppressed because it is too large
Load Diff
340
test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
Normal file
340
test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
Normal file
@@ -0,0 +1,340 @@
|
||||
//
|
||||
// Copyright (c) 2021 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "subhelpers.h"
|
||||
#include "subgroup_common_templates.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
|
||||
#define CLUSTER_SIZE 4
|
||||
#define CLUSTER_SIZE_STR "4"
|
||||
|
||||
namespace {
|
||||
static const char *redadd_clustered_source =
|
||||
"__kernel void test_redadd_clustered(const __global Type *in, __global "
|
||||
"int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" xy[gid].w = 0;\n"
|
||||
" if (sizeof(in[gid]) == "
|
||||
"sizeof(sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR ")))\n"
|
||||
" {xy[gid].w = sizeof(in[gid]);}\n"
|
||||
" out[gid] = sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR
|
||||
");\n"
|
||||
"}\n";
|
||||
|
||||
static const char *redmax_clustered_source =
|
||||
"__kernel void test_redmax_clustered(const __global Type *in, __global "
|
||||
"int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" xy[gid].w = 0;\n"
|
||||
" if (sizeof(in[gid]) == "
|
||||
"sizeof(sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR ")))\n"
|
||||
" {xy[gid].w = sizeof(in[gid]);}\n"
|
||||
" out[gid] = sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR
|
||||
");\n"
|
||||
"}\n";
|
||||
|
||||
static const char *redmin_clustered_source =
|
||||
"__kernel void test_redmin_clustered(const __global Type *in, __global "
|
||||
"int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" xy[gid].w = 0;\n"
|
||||
" if (sizeof(in[gid]) == "
|
||||
"sizeof(sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR ")))\n"
|
||||
" {xy[gid].w = sizeof(in[gid]);}\n"
|
||||
" out[gid] = sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR
|
||||
");\n"
|
||||
"}\n";
|
||||
|
||||
static const char *redmul_clustered_source =
|
||||
"__kernel void test_redmul_clustered(const __global Type *in, __global "
|
||||
"int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" xy[gid].w = 0;\n"
|
||||
" if (sizeof(in[gid]) == "
|
||||
"sizeof(sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR ")))\n"
|
||||
" {xy[gid].w = sizeof(in[gid]);}\n"
|
||||
" out[gid] = sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR
|
||||
");\n"
|
||||
"}\n";
|
||||
|
||||
static const char *redand_clustered_source =
|
||||
"__kernel void test_redand_clustered(const __global Type *in, __global "
|
||||
"int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" xy[gid].w = 0;\n"
|
||||
" if (sizeof(in[gid]) == "
|
||||
"sizeof(sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR ")))\n"
|
||||
" {xy[gid].w = sizeof(in[gid]);}\n"
|
||||
" out[gid] = sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR
|
||||
");\n"
|
||||
"}\n";
|
||||
|
||||
static const char *redor_clustered_source =
|
||||
"__kernel void test_redor_clustered(const __global Type *in, __global int4 "
|
||||
"*xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" xy[gid].w = 0;\n"
|
||||
" if (sizeof(in[gid]) == "
|
||||
"sizeof(sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR ")))\n"
|
||||
" {xy[gid].w = sizeof(in[gid]);}\n"
|
||||
" out[gid] = sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR
|
||||
");\n"
|
||||
"}\n";
|
||||
|
||||
static const char *redxor_clustered_source =
|
||||
"__kernel void test_redxor_clustered(const __global Type *in, __global "
|
||||
"int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" xy[gid].w = 0;\n"
|
||||
" if (sizeof(in[gid]) == "
|
||||
"sizeof(sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR ")))\n"
|
||||
" {xy[gid].w = sizeof(in[gid]);}\n"
|
||||
" out[gid] = sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR
|
||||
");\n"
|
||||
"}\n";
|
||||
|
||||
static const char *redand_clustered_logical_source =
|
||||
"__kernel void test_redand_clustered_logical(const __global Type *in, "
|
||||
"__global int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" xy[gid].w = 0;\n"
|
||||
" if (sizeof(in[gid]) == "
|
||||
"sizeof(sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR
|
||||
")))\n"
|
||||
" {xy[gid].w = sizeof(in[gid]);}\n"
|
||||
" out[gid] = "
|
||||
"sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR ");\n"
|
||||
"}\n";
|
||||
|
||||
static const char *redor_clustered_logical_source =
|
||||
"__kernel void test_redor_clustered_logical(const __global Type *in, "
|
||||
"__global int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" xy[gid].w = 0;\n"
|
||||
" if (sizeof(in[gid]) == "
|
||||
"sizeof(sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR
|
||||
")))\n"
|
||||
" {xy[gid].w = sizeof(in[gid]);}\n"
|
||||
" out[gid] = "
|
||||
"sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR ");\n"
|
||||
"}\n";
|
||||
|
||||
static const char *redxor_clustered_logical_source =
|
||||
"__kernel void test_redxor_clustered_logical(const __global Type *in, "
|
||||
"__global int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" xy[gid].w = 0;\n"
|
||||
" if ( sizeof(in[gid]) == "
|
||||
"sizeof(sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR
|
||||
")))\n"
|
||||
" {xy[gid].w = sizeof(in[gid]);}\n"
|
||||
" out[gid] = "
|
||||
"sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR ");\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
// DESCRIPTION:
|
||||
// Test for reduce cluster functions
|
||||
template <typename Ty, ArithmeticOp operation> struct RED_CLU
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
||||
{
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
ng = ng / nw;
|
||||
log_info(" sub_group_clustered_reduce_%s(%s, %d bytes) ...\n",
|
||||
operation_names(operation), TypeManager<Ty>::name(),
|
||||
sizeof(Ty));
|
||||
genrand<Ty, operation>(x, t, m, ns, nw, ng);
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
ng = ng / nw;
|
||||
|
||||
for (int k = 0; k < ng; ++k)
|
||||
{
|
||||
std::vector<cl_int> data_type_sizes;
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (int j = 0; j < nw; ++j)
|
||||
{
|
||||
mx[j] = x[j];
|
||||
my[j] = y[j];
|
||||
data_type_sizes.push_back(m[4 * j + 3]);
|
||||
}
|
||||
|
||||
for (cl_int dts : data_type_sizes)
|
||||
{
|
||||
if (dts != sizeof(Ty))
|
||||
{
|
||||
log_error("ERROR: sub_group_clustered_reduce_%s(%s) "
|
||||
"wrong data type size detected, expected: %d, "
|
||||
"used by device %d, in group %d\n",
|
||||
operation_names(operation),
|
||||
TypeManager<Ty>::name(), sizeof(Ty), dts, k);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < nj; ++j)
|
||||
{
|
||||
int ii = j * ns;
|
||||
int n = ii + ns > nw ? nw - ii : ns;
|
||||
int midx = 4 * ii + 2;
|
||||
std::vector<Ty> clusters_results;
|
||||
int clusters_counter = ns / CLUSTER_SIZE;
|
||||
clusters_results.resize(clusters_counter);
|
||||
|
||||
// Compute target
|
||||
Ty tr = mx[ii];
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
if (i % CLUSTER_SIZE == 0)
|
||||
tr = mx[ii + i];
|
||||
else
|
||||
tr = calculate<Ty>(tr, mx[ii + i], operation);
|
||||
clusters_results[i / CLUSTER_SIZE] = tr;
|
||||
}
|
||||
|
||||
// Check result
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
Ty rr = my[ii + i];
|
||||
tr = clusters_results[i / CLUSTER_SIZE];
|
||||
if (!compare(rr, tr))
|
||||
{
|
||||
log_error(
|
||||
"ERROR: sub_group_clustered_reduce_%s(%s) mismatch "
|
||||
"for local id %d in sub group %d in group %d\n",
|
||||
operation_names(operation), TypeManager<Ty>::name(),
|
||||
i, j, k);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
log_info(" sub_group_clustered_reduce_%s(%s, %d bytes) ... passed\n",
|
||||
operation_names(operation), TypeManager<Ty>::name(),
|
||||
sizeof(Ty));
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
int run_cluster_red_add_max_min_mul_for_type(RunTestForType rft)
|
||||
{
|
||||
int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::add_>>(
|
||||
"test_redadd_clustered", redadd_clustered_source);
|
||||
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::max_>>(
|
||||
"test_redmax_clustered", redmax_clustered_source);
|
||||
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::min_>>(
|
||||
"test_redmin_clustered", redmin_clustered_source);
|
||||
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::mul_>>(
|
||||
"test_redmul_clustered", redmul_clustered_source);
|
||||
return error;
|
||||
}
|
||||
template <typename T> int run_cluster_and_or_xor_for_type(RunTestForType rft)
|
||||
{
|
||||
int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::and_>>(
|
||||
"test_redand_clustered", redand_clustered_source);
|
||||
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::or_>>(
|
||||
"test_redor_clustered", redor_clustered_source);
|
||||
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::xor_>>(
|
||||
"test_redxor_clustered", redxor_clustered_source);
|
||||
return error;
|
||||
}
|
||||
template <typename T>
|
||||
int run_cluster_logical_and_or_xor_for_type(RunTestForType rft)
|
||||
{
|
||||
int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_and>>(
|
||||
"test_redand_clustered_logical", redand_clustered_logical_source);
|
||||
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_or>>(
|
||||
"test_redor_clustered_logical", redor_clustered_logical_source);
|
||||
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_xor>>(
|
||||
"test_redxor_clustered_logical", redxor_clustered_logical_source);
|
||||
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
int test_subgroup_functions_clustered_reduce(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements)
|
||||
{
|
||||
std::vector<std::string> required_extensions = {
|
||||
"cl_khr_subgroup_clustered_reduce"
|
||||
};
|
||||
constexpr size_t global_work_size = 2000;
|
||||
constexpr size_t local_work_size = 200;
|
||||
WorkGroupParams test_params(global_work_size, local_work_size,
|
||||
required_extensions);
|
||||
RunTestForType rft(device, context, queue, num_elements, test_params);
|
||||
|
||||
int error = run_cluster_red_add_max_min_mul_for_type<cl_int>(rft);
|
||||
error |= run_cluster_red_add_max_min_mul_for_type<cl_uint>(rft);
|
||||
error |= run_cluster_red_add_max_min_mul_for_type<cl_long>(rft);
|
||||
error |= run_cluster_red_add_max_min_mul_for_type<cl_ulong>(rft);
|
||||
error |= run_cluster_red_add_max_min_mul_for_type<cl_short>(rft);
|
||||
error |= run_cluster_red_add_max_min_mul_for_type<cl_ushort>(rft);
|
||||
error |= run_cluster_red_add_max_min_mul_for_type<cl_char>(rft);
|
||||
error |= run_cluster_red_add_max_min_mul_for_type<cl_uchar>(rft);
|
||||
error |= run_cluster_red_add_max_min_mul_for_type<cl_float>(rft);
|
||||
error |= run_cluster_red_add_max_min_mul_for_type<cl_double>(rft);
|
||||
error |= run_cluster_red_add_max_min_mul_for_type<subgroups::cl_half>(rft);
|
||||
|
||||
error |= run_cluster_and_or_xor_for_type<cl_int>(rft);
|
||||
error |= run_cluster_and_or_xor_for_type<cl_uint>(rft);
|
||||
error |= run_cluster_and_or_xor_for_type<cl_long>(rft);
|
||||
error |= run_cluster_and_or_xor_for_type<cl_ulong>(rft);
|
||||
error |= run_cluster_and_or_xor_for_type<cl_short>(rft);
|
||||
error |= run_cluster_and_or_xor_for_type<cl_ushort>(rft);
|
||||
error |= run_cluster_and_or_xor_for_type<cl_char>(rft);
|
||||
error |= run_cluster_and_or_xor_for_type<cl_uchar>(rft);
|
||||
|
||||
error |= run_cluster_logical_and_or_xor_for_type<cl_int>(rft);
|
||||
return error;
|
||||
}
|
||||
138
test_conformance/subgroups/test_subgroup_extended_types.cpp
Normal file
138
test_conformance/subgroups/test_subgroup_extended_types.cpp
Normal file
@@ -0,0 +1,138 @@
|
||||
//
|
||||
// Copyright (c) 2021 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "subhelpers.h"
|
||||
#include "subgroup_common_kernels.h"
|
||||
#include "subgroup_common_templates.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T> int run_broadcast_for_extended_type(RunTestForType rft)
|
||||
{
|
||||
int error = rft.run_impl<T, BC<T, SubgroupsBroadcastOp::broadcast>>(
|
||||
"test_bcast", bcast_source);
|
||||
return error;
|
||||
}
|
||||
|
||||
template <typename T> int run_scan_reduction_for_type(RunTestForType rft)
|
||||
{
|
||||
int error = rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("test_redadd",
|
||||
redadd_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("test_redmax",
|
||||
redmax_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("test_redmin",
|
||||
redmin_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>("test_scinadd",
|
||||
scinadd_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>("test_scinmax",
|
||||
scinmax_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>("test_scinmin",
|
||||
scinmin_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>("test_scexadd",
|
||||
scexadd_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>("test_scexmax",
|
||||
scexmax_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>("test_scexmin",
|
||||
scexmin_source);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
int test_subgroup_functions_extended_types(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements)
|
||||
{
|
||||
std::vector<std::string> required_extensions = {
|
||||
"cl_khr_subgroup_extended_types"
|
||||
};
|
||||
constexpr size_t global_work_size = 2000;
|
||||
constexpr size_t local_work_size = 200;
|
||||
WorkGroupParams test_params(global_work_size, local_work_size,
|
||||
required_extensions);
|
||||
RunTestForType rft(device, context, queue, num_elements, test_params);
|
||||
|
||||
int error = run_broadcast_for_extended_type<cl_uint2>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_uint3>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_uint4>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_uint8>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_uint16>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_int2>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_int3>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_int4>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_int8>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_int16>(rft);
|
||||
|
||||
error |= run_broadcast_for_extended_type<cl_ulong2>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_ulong3>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_ulong4>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_ulong8>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_ulong16>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_long2>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_long3>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_long4>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_long8>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_long16>(rft);
|
||||
|
||||
error |= run_broadcast_for_extended_type<cl_float2>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_float3>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_float4>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_float8>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_float16>(rft);
|
||||
|
||||
error |= run_broadcast_for_extended_type<cl_double2>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_double3>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_double4>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_double8>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_double16>(rft);
|
||||
|
||||
error |= run_broadcast_for_extended_type<cl_ushort2>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_ushort3>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_ushort4>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_ushort8>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_ushort16>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_short2>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_short3>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_short4>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_short8>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_short16>(rft);
|
||||
|
||||
error |= run_broadcast_for_extended_type<cl_uchar2>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_uchar3>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_uchar4>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_uchar8>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_uchar16>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_char2>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_char3>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_char4>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_char8>(rft);
|
||||
error |= run_broadcast_for_extended_type<cl_char16>(rft);
|
||||
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_half2>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_half3>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_half4>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_half8>(rft);
|
||||
error |= run_broadcast_for_extended_type<subgroups::cl_half16>(rft);
|
||||
|
||||
error |= run_scan_reduction_for_type<cl_uchar>(rft);
|
||||
error |= run_scan_reduction_for_type<cl_char>(rft);
|
||||
error |= run_scan_reduction_for_type<cl_ushort>(rft);
|
||||
error |= run_scan_reduction_for_type<cl_short>(rft);
|
||||
return error;
|
||||
}
|
||||
@@ -0,0 +1,473 @@
|
||||
//
|
||||
// Copyright (c) 2021 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "subhelpers.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include "subgroup_common_templates.h"
|
||||
|
||||
namespace {
|
||||
|
||||
static const char *scinadd_non_uniform_source = R"(
|
||||
__kernel void test_scinadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_inclusive_add(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scinmax_non_uniform_source = R"(
|
||||
__kernel void test_scinmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_inclusive_max(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scinmin_non_uniform_source = R"(
|
||||
__kernel void test_scinmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_inclusive_min(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scinmul_non_uniform_source = R"(
|
||||
__kernel void test_scinmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_inclusive_mul(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scinand_non_uniform_source = R"(
|
||||
__kernel void test_scinand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_inclusive_and(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scinor_non_uniform_source = R"(
|
||||
__kernel void test_scinor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_inclusive_or(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scinxor_non_uniform_source = R"(
|
||||
__kernel void test_scinxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_inclusive_xor(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scinand_non_uniform_logical_source = R"(
|
||||
__kernel void test_scinand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_inclusive_logical_and(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scinor_non_uniform_logical_source = R"(
|
||||
__kernel void test_scinor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_inclusive_logical_or(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scinxor_non_uniform_logical_source = R"(
|
||||
__kernel void test_scinxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_inclusive_logical_xor(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scexadd_non_uniform_source = R"(
|
||||
__kernel void test_scexadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_exclusive_add(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scexmax_non_uniform_source = R"(
|
||||
__kernel void test_scexmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_exclusive_max(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scexmin_non_uniform_source = R"(
|
||||
__kernel void test_scexmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_exclusive_min(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scexmul_non_uniform_source = R"(
|
||||
__kernel void test_scexmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_exclusive_mul(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scexand_non_uniform_source = R"(
|
||||
__kernel void test_scexand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_exclusive_and(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scexor_non_uniform_source = R"(
|
||||
__kernel void test_scexor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_exclusive_or(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scexxor_non_uniform_source = R"(
|
||||
__kernel void test_scexxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_exclusive_xor(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scexand_non_uniform_logical_source = R"(
|
||||
__kernel void test_scexand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_exclusive_logical_and(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scexor_non_uniform_logical_source = R"(
|
||||
__kernel void test_scexor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_exclusive_logical_or(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *scexxor_non_uniform_logical_source = R"(
|
||||
__kernel void test_scexxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_scan_exclusive_logical_xor(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *redadd_non_uniform_source = R"(
|
||||
__kernel void test_redadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_reduce_add(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *redmax_non_uniform_source = R"(
|
||||
__kernel void test_redmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_reduce_max(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *redmin_non_uniform_source = R"(
|
||||
__kernel void test_redmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_reduce_min(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *redmul_non_uniform_source = R"(
|
||||
__kernel void test_redmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_reduce_mul(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *redand_non_uniform_source = R"(
|
||||
__kernel void test_redand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_reduce_and(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *redor_non_uniform_source = R"(
|
||||
__kernel void test_redor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_reduce_or(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *redxor_non_uniform_source = R"(
|
||||
__kernel void test_redxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_reduce_xor(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *redand_non_uniform_logical_source = R"(
|
||||
__kernel void test_redand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_reduce_logical_and(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *redor_non_uniform_logical_source = R"(
|
||||
__kernel void test_redor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_reduce_logical_or(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *redxor_non_uniform_logical_source = R"(
|
||||
__kernel void test_redxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
int elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_reduce_logical_xor(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
template <typename T>
|
||||
int run_functions_add_mul_max_min_for_type(RunTestForType rft)
|
||||
{
|
||||
int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>(
|
||||
"test_scinadd_non_uniform", scinadd_non_uniform_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::mul_>>(
|
||||
"test_scinmul_non_uniform", scinmul_non_uniform_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>(
|
||||
"test_scinmax_non_uniform", scinmax_non_uniform_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>(
|
||||
"test_scinmin_non_uniform", scinmin_non_uniform_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>(
|
||||
"test_scexadd_non_uniform", scexadd_non_uniform_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::mul_>>(
|
||||
"test_scexmul_non_uniform", scexmul_non_uniform_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>(
|
||||
"test_scexmax_non_uniform", scexmax_non_uniform_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>(
|
||||
"test_scexmin_non_uniform", scexmin_non_uniform_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>(
|
||||
"test_redadd_non_uniform", redadd_non_uniform_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::mul_>>(
|
||||
"test_redmul_non_uniform", redmul_non_uniform_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>(
|
||||
"test_redmax_non_uniform", redmax_non_uniform_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>(
|
||||
"test_redmin_non_uniform", redmin_non_uniform_source);
|
||||
return error;
|
||||
}
|
||||
|
||||
template <typename T> int run_functions_and_or_xor_for_type(RunTestForType rft)
|
||||
{
|
||||
int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::and_>>(
|
||||
"test_scinand_non_uniform", scinand_non_uniform_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::or_>>(
|
||||
"test_scinor_non_uniform", scinor_non_uniform_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::xor_>>(
|
||||
"test_scinxor_non_uniform", scinxor_non_uniform_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::and_>>(
|
||||
"test_scexand_non_uniform", scexand_non_uniform_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::or_>>(
|
||||
"test_scexor_non_uniform", scexor_non_uniform_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::xor_>>(
|
||||
"test_scexxor_non_uniform", scexxor_non_uniform_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::and_>>(
|
||||
"test_redand_non_uniform", redand_non_uniform_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::or_>>(
|
||||
"test_redor_non_uniform", redor_non_uniform_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::xor_>>(
|
||||
"test_redxor_non_uniform", redxor_non_uniform_source);
|
||||
return error;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int run_functions_logical_and_or_xor_for_type(RunTestForType rft)
|
||||
{
|
||||
int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_and>>(
|
||||
"test_scinand_non_uniform_logical", scinand_non_uniform_logical_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_or>>(
|
||||
"test_scinor_non_uniform_logical", scinor_non_uniform_logical_source);
|
||||
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_xor>>(
|
||||
"test_scinxor_non_uniform_logical", scinxor_non_uniform_logical_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_and>>(
|
||||
"test_scexand_non_uniform_logical", scexand_non_uniform_logical_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_or>>(
|
||||
"test_scexor_non_uniform_logical", scexor_non_uniform_logical_source);
|
||||
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_xor>>(
|
||||
"test_scexxor_non_uniform_logical", scexxor_non_uniform_logical_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_and>>(
|
||||
"test_redand_non_uniform_logical", redand_non_uniform_logical_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_or>>(
|
||||
"test_redor_non_uniform_logical", redor_non_uniform_logical_source);
|
||||
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_xor>>(
|
||||
"test_redxor_non_uniform_logical", redxor_non_uniform_logical_source);
|
||||
return error;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements)
|
||||
{
|
||||
std::vector<std::string> required_extensions = {
|
||||
"cl_khr_subgroup_non_uniform_arithmetic"
|
||||
};
|
||||
std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
|
||||
0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
|
||||
0x00ffff00, 0x80000000, 0xaaaaaaaa };
|
||||
|
||||
constexpr size_t global_work_size = 2000;
|
||||
constexpr size_t local_work_size = 200;
|
||||
WorkGroupParams test_params(global_work_size, local_work_size,
|
||||
required_extensions, masks);
|
||||
RunTestForType rft(device, context, queue, num_elements, test_params);
|
||||
|
||||
int error = run_functions_add_mul_max_min_for_type<cl_int>(rft);
|
||||
error |= run_functions_add_mul_max_min_for_type<cl_uint>(rft);
|
||||
error |= run_functions_add_mul_max_min_for_type<cl_long>(rft);
|
||||
error |= run_functions_add_mul_max_min_for_type<cl_ulong>(rft);
|
||||
error |= run_functions_add_mul_max_min_for_type<cl_short>(rft);
|
||||
error |= run_functions_add_mul_max_min_for_type<cl_ushort>(rft);
|
||||
error |= run_functions_add_mul_max_min_for_type<cl_char>(rft);
|
||||
error |= run_functions_add_mul_max_min_for_type<cl_uchar>(rft);
|
||||
error |= run_functions_add_mul_max_min_for_type<cl_float>(rft);
|
||||
error |= run_functions_add_mul_max_min_for_type<cl_double>(rft);
|
||||
error |= run_functions_add_mul_max_min_for_type<subgroups::cl_half>(rft);
|
||||
|
||||
error |= run_functions_and_or_xor_for_type<cl_int>(rft);
|
||||
error |= run_functions_and_or_xor_for_type<cl_uint>(rft);
|
||||
error |= run_functions_and_or_xor_for_type<cl_long>(rft);
|
||||
error |= run_functions_and_or_xor_for_type<cl_ulong>(rft);
|
||||
error |= run_functions_and_or_xor_for_type<cl_short>(rft);
|
||||
error |= run_functions_and_or_xor_for_type<cl_ushort>(rft);
|
||||
error |= run_functions_and_or_xor_for_type<cl_char>(rft);
|
||||
error |= run_functions_and_or_xor_for_type<cl_uchar>(rft);
|
||||
|
||||
error |= run_functions_logical_and_or_xor_for_type<cl_int>(rft);
|
||||
return error;
|
||||
}
|
||||
303
test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
Normal file
303
test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
Normal file
@@ -0,0 +1,303 @@
|
||||
//
|
||||
// Copyright (c) 2021 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "subhelpers.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include <set>
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T, NonUniformVoteOp operation> struct VOTE
|
||||
{
|
||||
static void gen(T *x, T *t, cl_int *m, const WorkGroupParams &test_params)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
int non_uniform_size = ng % nw;
|
||||
ng = ng / nw;
|
||||
int last_subgroup_size = 0;
|
||||
ii = 0;
|
||||
|
||||
log_info(" sub_group_%s%s... \n",
|
||||
(operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
|
||||
operation_names(operation));
|
||||
|
||||
log_info(" test params: global size = %d local size = %d subgroups "
|
||||
"size = %d work item mask = 0x%x data type (%s)\n",
|
||||
test_params.global_workgroup_size, nw, ns, work_items_mask,
|
||||
TypeManager<T>::name());
|
||||
if (non_uniform_size)
|
||||
{
|
||||
log_info(" non uniform work group size mode ON\n");
|
||||
}
|
||||
if (operation == NonUniformVoteOp::elect) return;
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
if (non_uniform_size && k == ng - 1)
|
||||
{
|
||||
set_last_workgroup_params(non_uniform_size, nj, ns, nw,
|
||||
last_subgroup_size);
|
||||
}
|
||||
for (j = 0; j < nj; ++j)
|
||||
{ // for each subgroup
|
||||
ii = j * ns;
|
||||
if (last_subgroup_size && j == nj - 1)
|
||||
{
|
||||
n = last_subgroup_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
}
|
||||
int e = genrand_int32(gMTdata) % 3;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (e == 2)
|
||||
{ // set once 0 and once 1 alternately
|
||||
int value = i % 2;
|
||||
set_value(t[ii + i], value);
|
||||
}
|
||||
else
|
||||
{ // set 0/1 for all work items in subgroup
|
||||
set_value(t[ii + i], e);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now map into work group using map from device
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
x[j] = t[j];
|
||||
}
|
||||
x += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(T *x, T *y, T *mx, T *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
cl_int tr, rr;
|
||||
int non_uniform_size = ng % nw;
|
||||
ng = ng / nw;
|
||||
if (non_uniform_size) ng++;
|
||||
int last_subgroup_size = 0;
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
if (non_uniform_size && k == ng - 1)
|
||||
{
|
||||
set_last_workgroup_params(non_uniform_size, nj, ns, nw,
|
||||
last_subgroup_size);
|
||||
}
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // inside the work_group
|
||||
mx[j] = x[j]; // read host inputs for work_group
|
||||
my[j] = y[j]; // read device outputs for work_group
|
||||
}
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{ // for each subgroup
|
||||
ii = j * ns;
|
||||
if (last_subgroup_size && j == nj - 1)
|
||||
{
|
||||
n = last_subgroup_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
}
|
||||
|
||||
rr = 0;
|
||||
if (operation == NonUniformVoteOp::all
|
||||
|| operation == NonUniformVoteOp::all_equal)
|
||||
tr = 1;
|
||||
if (operation == NonUniformVoteOp::any) tr = 0;
|
||||
|
||||
std::set<int> active_work_items;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
uint32_t check_work_item = 1 << (i % 32);
|
||||
if (work_items_mask & check_work_item)
|
||||
{
|
||||
active_work_items.insert(i);
|
||||
switch (operation)
|
||||
{
|
||||
case NonUniformVoteOp::elect: break;
|
||||
|
||||
case NonUniformVoteOp::all:
|
||||
tr &=
|
||||
!compare_ordered<T>(mx[ii + i], 0) ? 1 : 0;
|
||||
break;
|
||||
case NonUniformVoteOp::any:
|
||||
tr |=
|
||||
!compare_ordered<T>(mx[ii + i], 0) ? 1 : 0;
|
||||
break;
|
||||
case NonUniformVoteOp::all_equal:
|
||||
tr &= compare_ordered<T>(
|
||||
mx[ii + i],
|
||||
mx[ii + *active_work_items.begin()])
|
||||
? 1
|
||||
: 0;
|
||||
break;
|
||||
default:
|
||||
log_error("Unknown operation\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (active_work_items.empty())
|
||||
{
|
||||
log_info(" no one workitem acitve... in workgroup id = %d "
|
||||
"subgroup id = %d\n",
|
||||
k, j);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto lowest_active = active_work_items.begin();
|
||||
for (const int &active_work_item : active_work_items)
|
||||
{
|
||||
i = active_work_item;
|
||||
if (operation == NonUniformVoteOp::elect)
|
||||
{
|
||||
i == *lowest_active ? tr = 1 : tr = 0;
|
||||
}
|
||||
|
||||
// normalize device values on host, non zero set 1.
|
||||
rr = compare_ordered<T>(my[ii + i], 0) ? 0 : 1;
|
||||
|
||||
if (rr != tr)
|
||||
{
|
||||
log_error("ERROR: sub_group_%s() \n",
|
||||
operation_names(operation));
|
||||
log_error(
|
||||
"mismatch for work item %d sub group %d in "
|
||||
"work group %d. Expected: %d Obtained: %d\n",
|
||||
i, j, k, tr, rr);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
|
||||
log_info(" sub_group_%s%s... passed\n",
|
||||
(operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
|
||||
operation_names(operation));
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
static const char *elect_source = R"(
|
||||
__kernel void test_elect(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_elect();
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *non_uniform_any_source = R"(
|
||||
__kernel void test_non_uniform_any(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_any(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *non_uniform_all_source = R"(
|
||||
__kernel void test_non_uniform_all(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_all(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
static const char *non_uniform_all_equal_source = R"(
|
||||
__kernel void test_non_uniform_all_equal(const __global Type *in, __global int4 *xy, __global Type *out) {
|
||||
int gid = get_global_id(0);
|
||||
XY(xy,gid);
|
||||
uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
|
||||
if (elect_work_item & WORK_ITEMS_MASK){
|
||||
out[gid] = sub_group_non_uniform_all_equal(in[gid]);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
template <typename T> int run_vote_all_equal_for_type(RunTestForType rft)
|
||||
{
|
||||
int error = rft.run_impl<T, VOTE<T, NonUniformVoteOp::all_equal>>(
|
||||
"test_non_uniform_all_equal", non_uniform_all_equal_source);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
int test_subgroup_functions_non_uniform_vote(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements)
|
||||
{
|
||||
std::vector<std::string> required_extensions = {
|
||||
"cl_khr_subgroup_non_uniform_vote"
|
||||
};
|
||||
|
||||
std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
|
||||
0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
|
||||
0x00ffff00, 0x80000000 };
|
||||
constexpr size_t global_work_size = 170;
|
||||
constexpr size_t local_work_size = 64;
|
||||
WorkGroupParams test_params(global_work_size, local_work_size,
|
||||
required_extensions, masks);
|
||||
RunTestForType rft(device, context, queue, num_elements, test_params);
|
||||
|
||||
int error = run_vote_all_equal_for_type<cl_int>(rft);
|
||||
error |= run_vote_all_equal_for_type<cl_uint>(rft);
|
||||
error |= run_vote_all_equal_for_type<cl_long>(rft);
|
||||
error |= run_vote_all_equal_for_type<cl_ulong>(rft);
|
||||
error |= run_vote_all_equal_for_type<cl_float>(rft);
|
||||
error |= run_vote_all_equal_for_type<cl_double>(rft);
|
||||
error |= run_vote_all_equal_for_type<subgroups::cl_half>(rft);
|
||||
|
||||
error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::all>>(
|
||||
"test_non_uniform_all", non_uniform_all_source);
|
||||
error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::elect>>(
|
||||
"test_elect", elect_source);
|
||||
error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::any>>(
|
||||
"test_non_uniform_any", non_uniform_any_source);
|
||||
return error;
|
||||
}
|
||||
78
test_conformance/subgroups/test_subgroup_shuffle.cpp
Normal file
78
test_conformance/subgroups/test_subgroup_shuffle.cpp
Normal file
@@ -0,0 +1,78 @@
|
||||
//
|
||||
// Copyright (c) 2021 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "subhelpers.h"
|
||||
#include "subgroup_common_templates.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include <bitset>
|
||||
|
||||
namespace {
|
||||
|
||||
static const char* shuffle_xor_source =
|
||||
"__kernel void test_sub_group_shuffle_xor(const __global Type *in, "
|
||||
"__global int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" Type x = in[gid];\n"
|
||||
" out[gid] = sub_group_shuffle_xor(x, xy[gid].z);"
|
||||
"}\n";
|
||||
|
||||
static const char* shuffle_source =
|
||||
"__kernel void test_sub_group_shuffle(const __global Type *in, __global "
|
||||
"int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" Type x = in[gid];\n"
|
||||
" out[gid] = sub_group_shuffle(x, xy[gid].z);"
|
||||
"}\n";
|
||||
|
||||
template <typename T> int run_shuffle_for_type(RunTestForType rft)
|
||||
{
|
||||
int error = rft.run_impl<T, SHF<T, ShuffleOp::shuffle>>(
|
||||
"test_sub_group_shuffle", shuffle_source);
|
||||
error |= rft.run_impl<T, SHF<T, ShuffleOp::shuffle_xor>>(
|
||||
"test_sub_group_shuffle_xor", shuffle_xor_source);
|
||||
return error;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int test_subgroup_functions_shuffle(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
std::vector<std::string> required_extensions{ "cl_khr_subgroup_shuffle" };
|
||||
constexpr size_t global_work_size = 2000;
|
||||
constexpr size_t local_work_size = 200;
|
||||
WorkGroupParams test_params(global_work_size, local_work_size,
|
||||
required_extensions);
|
||||
RunTestForType rft(device, context, queue, num_elements, test_params);
|
||||
|
||||
int error = run_shuffle_for_type<cl_int>(rft);
|
||||
error |= run_shuffle_for_type<cl_uint>(rft);
|
||||
error |= run_shuffle_for_type<cl_long>(rft);
|
||||
error |= run_shuffle_for_type<cl_ulong>(rft);
|
||||
error |= run_shuffle_for_type<cl_short>(rft);
|
||||
error |= run_shuffle_for_type<cl_ushort>(rft);
|
||||
error |= run_shuffle_for_type<cl_char>(rft);
|
||||
error |= run_shuffle_for_type<cl_uchar>(rft);
|
||||
error |= run_shuffle_for_type<cl_float>(rft);
|
||||
error |= run_shuffle_for_type<cl_double>(rft);
|
||||
error |= run_shuffle_for_type<subgroups::cl_half>(rft);
|
||||
|
||||
return error;
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
//
|
||||
// Copyright (c) 2021 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "subhelpers.h"
|
||||
#include "subgroup_common_templates.h"
|
||||
#include "harness/conversions.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
|
||||
namespace {
|
||||
|
||||
static const char* shuffle_down_source =
|
||||
"__kernel void test_sub_group_shuffle_down(const __global Type *in, "
|
||||
"__global int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" Type x = in[gid];\n"
|
||||
" out[gid] = sub_group_shuffle_down(x, xy[gid].z);"
|
||||
"}\n";
|
||||
static const char* shuffle_up_source =
|
||||
"__kernel void test_sub_group_shuffle_up(const __global Type *in, __global "
|
||||
"int4 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" Type x = in[gid];\n"
|
||||
" out[gid] = sub_group_shuffle_up(x, xy[gid].z);"
|
||||
"}\n";
|
||||
|
||||
template <typename T> int run_shuffle_relative_for_type(RunTestForType rft)
|
||||
{
|
||||
int error = rft.run_impl<T, SHF<T, ShuffleOp::shuffle_up>>(
|
||||
"test_sub_group_shuffle_up", shuffle_up_source);
|
||||
error |= rft.run_impl<T, SHF<T, ShuffleOp::shuffle_down>>(
|
||||
"test_sub_group_shuffle_down", shuffle_down_source);
|
||||
return error;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int test_subgroup_functions_shuffle_relative(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements)
|
||||
{
|
||||
std::vector<std::string> required_extensions = {
|
||||
"cl_khr_subgroup_shuffle_relative"
|
||||
};
|
||||
constexpr size_t global_work_size = 2000;
|
||||
constexpr size_t local_work_size = 200;
|
||||
WorkGroupParams test_params(global_work_size, local_work_size,
|
||||
required_extensions);
|
||||
RunTestForType rft(device, context, queue, num_elements, test_params);
|
||||
|
||||
int error = run_shuffle_relative_for_type<cl_int>(rft);
|
||||
error |= run_shuffle_relative_for_type<cl_uint>(rft);
|
||||
error |= run_shuffle_relative_for_type<cl_long>(rft);
|
||||
error |= run_shuffle_relative_for_type<cl_ulong>(rft);
|
||||
error |= run_shuffle_relative_for_type<cl_short>(rft);
|
||||
error |= run_shuffle_relative_for_type<cl_ushort>(rft);
|
||||
error |= run_shuffle_relative_for_type<cl_char>(rft);
|
||||
error |= run_shuffle_relative_for_type<cl_uchar>(rft);
|
||||
error |= run_shuffle_relative_for_type<cl_float>(rft);
|
||||
error |= run_shuffle_relative_for_type<cl_double>(rft);
|
||||
error |= run_shuffle_relative_for_type<subgroups::cl_half>(rft);
|
||||
|
||||
return error;
|
||||
}
|
||||
@@ -1,727 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "subhelpers.h"
|
||||
#include "harness/conversions.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
|
||||
static const char *any_source = "__kernel void test_any(const __global Type "
|
||||
"*in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_any(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *all_source = "__kernel void test_all(const __global Type "
|
||||
"*in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_all(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *bcast_source =
|
||||
"__kernel void test_bcast(const __global Type *in, __global int2 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" Type x = in[gid];\n"
|
||||
" size_t loid = (size_t)((int)x % 100);\n"
|
||||
" out[gid] = sub_group_broadcast(x, loid);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *redadd_source =
|
||||
"__kernel void test_redadd(const __global Type *in, __global int2 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_reduce_add(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *redmax_source =
|
||||
"__kernel void test_redmax(const __global Type *in, __global int2 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_reduce_max(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *redmin_source =
|
||||
"__kernel void test_redmin(const __global Type *in, __global int2 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_reduce_min(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *scinadd_source =
|
||||
"__kernel void test_scinadd(const __global Type *in, __global int2 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_inclusive_add(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *scinmax_source =
|
||||
"__kernel void test_scinmax(const __global Type *in, __global int2 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_inclusive_max(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *scinmin_source =
|
||||
"__kernel void test_scinmin(const __global Type *in, __global int2 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_inclusive_min(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *scexadd_source =
|
||||
"__kernel void test_scexadd(const __global Type *in, __global int2 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_exclusive_add(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *scexmax_source =
|
||||
"__kernel void test_scexmax(const __global Type *in, __global int2 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_exclusive_max(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *scexmin_source =
|
||||
"__kernel void test_scexmin(const __global Type *in, __global int2 *xy, "
|
||||
"__global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_exclusive_min(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
// Any/All test functions
|
||||
template <int Which> struct AA
|
||||
{
|
||||
static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
int e;
|
||||
|
||||
ii = 0;
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
e = (int)(genrand_int32(gMTdata) % 3);
|
||||
|
||||
// Initialize data matrix indexed by local id and sub group id
|
||||
switch (e)
|
||||
{
|
||||
case 0: memset(&t[ii], 0, n * sizeof(cl_int)); break;
|
||||
case 1:
|
||||
memset(&t[ii], 0, n * sizeof(cl_int));
|
||||
i = (int)(genrand_int32(gMTdata) % (cl_uint)n);
|
||||
t[ii + i] = 41;
|
||||
break;
|
||||
case 2: memset(&t[ii], 0xff, n * sizeof(cl_int)); break;
|
||||
}
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
i = m[2 * j + 1] * ns + m[2 * j];
|
||||
x[j] = t[i];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 2 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
|
||||
int ns, int nw, int ng)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
cl_int taa, raa;
|
||||
|
||||
log_info(" sub_group_%s...\n", Which == 0 ? "any" : "all");
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
i = m[2 * j + 1] * ns + m[2 * j];
|
||||
mx[i] = x[j];
|
||||
my[i] = y[j];
|
||||
}
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
// Compute target
|
||||
if (Which == 0)
|
||||
{
|
||||
taa = 0;
|
||||
for (i = 0; i < n; ++i) taa |= mx[ii + i] != 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
taa = 1;
|
||||
for (i = 0; i < n; ++i) taa &= mx[ii + i] != 0;
|
||||
}
|
||||
|
||||
// Check result
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
raa = my[ii + i] != 0;
|
||||
if (raa != taa)
|
||||
{
|
||||
log_error("ERROR: sub_group_%s mismatch for local id "
|
||||
"%d in sub group %d in group %d\n",
|
||||
Which == 0 ? "any" : "all", i, j, k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 2 * nw;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Reduce functions
|
||||
template <typename Ty, int Which> struct RED
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
|
||||
ii = 0;
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
t[ii + i] = (Ty)(
|
||||
(int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1);
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
i = m[2 * j + 1] * ns + m[2 * j];
|
||||
x[j] = t[i];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 2 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw,
|
||||
int ng)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
Ty tr, rr;
|
||||
|
||||
log_info(" sub_group_reduce_%s(%s)...\n",
|
||||
Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
|
||||
TypeName<Ty>::val());
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
i = m[2 * j + 1] * ns + m[2 * j];
|
||||
mx[i] = x[j];
|
||||
my[i] = y[j];
|
||||
}
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
// Compute target
|
||||
if (Which == 0)
|
||||
{
|
||||
// add
|
||||
tr = mx[ii];
|
||||
for (i = 1; i < n; ++i) tr += mx[ii + i];
|
||||
}
|
||||
else if (Which == 1)
|
||||
{
|
||||
// max
|
||||
tr = mx[ii];
|
||||
for (i = 1; i < n; ++i)
|
||||
tr = tr > mx[ii + i] ? tr : mx[ii + i];
|
||||
}
|
||||
else if (Which == 2)
|
||||
{
|
||||
// min
|
||||
tr = mx[ii];
|
||||
for (i = 1; i < n; ++i)
|
||||
tr = tr > mx[ii + i] ? mx[ii + i] : tr;
|
||||
}
|
||||
|
||||
// Check result
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
rr = my[ii + i];
|
||||
if (rr != tr)
|
||||
{
|
||||
log_error("ERROR: sub_group_reduce_%s(%s) mismatch for "
|
||||
"local id %d in sub group %d in group %d\n",
|
||||
Which == 0 ? "add"
|
||||
: (Which == 1 ? "max" : "min"),
|
||||
TypeName<Ty>::val(), i, j, k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 2 * nw;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Scan Inclusive functions
|
||||
template <typename Ty, int Which> struct SCIN
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
|
||||
ii = 0;
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
// t[ii+i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff)
|
||||
// % ns + 1);
|
||||
t[ii + i] = (Ty)i;
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
i = m[2 * j + 1] * ns + m[2 * j];
|
||||
x[j] = t[i];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 2 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw,
|
||||
int ng)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
Ty tr, rr;
|
||||
|
||||
log_info(" sub_group_scan_inclusive_%s(%s)...\n",
|
||||
Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
|
||||
TypeName<Ty>::val());
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
i = m[2 * j + 1] * ns + m[2 * j];
|
||||
mx[i] = x[j];
|
||||
my[i] = y[j];
|
||||
}
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
// Check result
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
if (Which == 0)
|
||||
{
|
||||
tr = i == 0 ? mx[ii] : tr + mx[ii + i];
|
||||
}
|
||||
else if (Which == 1)
|
||||
{
|
||||
tr = i == 0 ? mx[ii]
|
||||
: (tr > mx[ii + i] ? tr : mx[ii + i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
tr = i == 0 ? mx[ii]
|
||||
: (tr > mx[ii + i] ? mx[ii + i] : tr);
|
||||
}
|
||||
|
||||
rr = my[ii + i];
|
||||
if (rr != tr)
|
||||
{
|
||||
log_error(
|
||||
"ERROR: sub_group_scan_inclusive_%s(%s) mismatch "
|
||||
"for local id %d in sub group %d in group %d\n",
|
||||
Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
|
||||
TypeName<Ty>::val(), i, j, k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 2 * nw;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Scan Exclusive functions
|
||||
template <typename Ty, int Which> struct SCEX
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
|
||||
ii = 0;
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
t[ii + i] = (Ty)(
|
||||
(int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1);
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
i = m[2 * j + 1] * ns + m[2 * j];
|
||||
x[j] = t[i];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 2 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw,
|
||||
int ng)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
Ty tr, trt, rr;
|
||||
|
||||
log_info(" sub_group_scan_exclusive_%s(%s)...\n",
|
||||
Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
|
||||
TypeName<Ty>::val());
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
i = m[2 * j + 1] * ns + m[2 * j];
|
||||
mx[i] = x[j];
|
||||
my[i] = y[j];
|
||||
}
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
// Check result
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
if (Which == 0)
|
||||
{
|
||||
tr = i == 0 ? TypeIdentity<Ty, Which>::val() : tr + trt;
|
||||
}
|
||||
else if (Which == 1)
|
||||
{
|
||||
tr = i == 0 ? TypeIdentity<Ty, Which>::val()
|
||||
: (trt > tr ? trt : tr);
|
||||
}
|
||||
else
|
||||
{
|
||||
tr = i == 0 ? TypeIdentity<Ty, Which>::val()
|
||||
: (trt > tr ? tr : trt);
|
||||
}
|
||||
trt = mx[ii + i];
|
||||
rr = my[ii + i];
|
||||
|
||||
if (rr != tr)
|
||||
{
|
||||
log_error(
|
||||
"ERROR: sub_group_scan_exclusive_%s(%s) mismatch "
|
||||
"for local id %d in sub group %d in group %d\n",
|
||||
Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
|
||||
TypeName<Ty>::val(), i, j, k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 2 * nw;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Broadcast functios
|
||||
template <typename Ty> struct BC
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int i, ii, j, k, l, n;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
int d = ns > 100 ? 100 : ns;
|
||||
|
||||
ii = 0;
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
l = (int)(genrand_int32(gMTdata) & 0x7fffffff)
|
||||
% (d > n ? n : d);
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
t[ii + i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff)
|
||||
% 100 * 100
|
||||
+ l);
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
i = m[2 * j + 1] * ns + m[2 * j];
|
||||
x[j] = t[i];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 2 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw,
|
||||
int ng)
|
||||
{
|
||||
int ii, i, j, k, l, n;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
Ty tr, rr;
|
||||
|
||||
log_info(" sub_group_broadcast(%s)...\n", TypeName<Ty>::val());
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
i = m[2 * j + 1] * ns + m[2 * j];
|
||||
mx[i] = x[j];
|
||||
my[i] = y[j];
|
||||
}
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
l = (int)mx[ii] % 100;
|
||||
tr = mx[ii + l];
|
||||
|
||||
// Check result
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
rr = my[ii + i];
|
||||
if (rr != tr)
|
||||
{
|
||||
log_error("ERROR: sub_group_broadcast(%s) mismatch for "
|
||||
"local id %d in sub group %d in group %d\n",
|
||||
TypeName<Ty>::val(), i, j, k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 2 * nw;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
#define G 2000
|
||||
#define L 200
|
||||
struct run_for_type
|
||||
{
|
||||
run_for_type(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements,
|
||||
bool useCoreSubgroups)
|
||||
{
|
||||
device_ = device;
|
||||
context_ = context;
|
||||
queue_ = queue;
|
||||
num_elements_ = num_elements;
|
||||
useCoreSubgroups_ = useCoreSubgroups;
|
||||
}
|
||||
|
||||
template <typename T> cl_int run()
|
||||
{
|
||||
cl_int error;
|
||||
error = test<T, BC<T>, G, L>::run(device_, context_, queue_,
|
||||
num_elements_, "test_bcast",
|
||||
bcast_source, 0, useCoreSubgroups_);
|
||||
error |= test<T, RED<T, 0>, G, L>::run(
|
||||
device_, context_, queue_, num_elements_, "test_redadd",
|
||||
redadd_source, 0, useCoreSubgroups_);
|
||||
error |= test<T, RED<T, 1>, G, L>::run(
|
||||
device_, context_, queue_, num_elements_, "test_redmax",
|
||||
redmax_source, 0, useCoreSubgroups_);
|
||||
error |= test<T, RED<T, 2>, G, L>::run(
|
||||
device_, context_, queue_, num_elements_, "test_redmin",
|
||||
redmin_source, 0, useCoreSubgroups_);
|
||||
error |= test<T, SCIN<T, 0>, G, L>::run(
|
||||
device_, context_, queue_, num_elements_, "test_scinadd",
|
||||
scinadd_source, 0, useCoreSubgroups_);
|
||||
error |= test<T, SCIN<T, 1>, G, L>::run(
|
||||
device_, context_, queue_, num_elements_, "test_scinmax",
|
||||
scinmax_source, 0, useCoreSubgroups_);
|
||||
error |= test<T, SCIN<T, 2>, G, L>::run(
|
||||
device_, context_, queue_, num_elements_, "test_scinmin",
|
||||
scinmin_source, 0, useCoreSubgroups_);
|
||||
error |= test<T, SCEX<T, 0>, G, L>::run(
|
||||
device_, context_, queue_, num_elements_, "test_scexadd",
|
||||
scexadd_source, 0, useCoreSubgroups_);
|
||||
error |= test<T, SCEX<T, 1>, G, L>::run(
|
||||
device_, context_, queue_, num_elements_, "test_scexmax",
|
||||
scexmax_source, 0, useCoreSubgroups_);
|
||||
error |= test<T, SCEX<T, 2>, G, L>::run(
|
||||
device_, context_, queue_, num_elements_, "test_scexmin",
|
||||
scexmin_source, 0, useCoreSubgroups_);
|
||||
return error;
|
||||
}
|
||||
|
||||
private:
|
||||
cl_device_id device_;
|
||||
cl_context context_;
|
||||
cl_command_queue queue_;
|
||||
int num_elements_;
|
||||
bool useCoreSubgroups_;
|
||||
};
|
||||
|
||||
// Entry point from main
|
||||
int test_work_group_functions(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements,
|
||||
bool useCoreSubgroups)
|
||||
{
|
||||
int error;
|
||||
error = test<int, AA<0>, G, L>::run(device, context, queue, num_elements,
|
||||
"test_any", any_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<int, AA<1>, G, L>::run(device, context, queue, num_elements,
|
||||
"test_all", all_source, 0,
|
||||
useCoreSubgroups);
|
||||
run_for_type rft(device, context, queue, num_elements, useCoreSubgroups);
|
||||
error |= rft.run<cl_uint>();
|
||||
error |= rft.run<cl_int>();
|
||||
error |= rft.run<cl_ulong>();
|
||||
error |= rft.run<cl_long>();
|
||||
error |= rft.run<float>();
|
||||
error |= rft.run<double>();
|
||||
// error |= rft.run<cl_half>();
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
int test_work_group_functions_core(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_work_group_functions(device, context, queue, num_elements,
|
||||
true);
|
||||
}
|
||||
|
||||
int test_work_group_functions_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
|
||||
|
||||
if (!hasExtension)
|
||||
{
|
||||
log_info(
|
||||
"Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
return test_work_group_functions(device, context, queue, num_elements,
|
||||
false);
|
||||
}
|
||||
Reference in New Issue
Block a user