mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-23 23:49:02 +00:00
New subgroups - full changes set (#1074)
* Extended subgroups - extended types types * Extended subgroups - non uniform vote tests * Extended subgroups - non uniform arithmetic tests * Extended subgroups - ballot tests * Extended subgroups - clustered reduce tests * Extended subgroups - shuffle tests * Extended subgroups - formating issues * Extended subgroups - review fixes * Extended subgroups - review fixes Fixed: removed additional brakes, kernel_sstr * Extended subgroups - fix macos build error * Extended subgroups - review fixes Fixed: mac os build error * Extended subgroups - data type verification example * Extended subgroups - error unification * Extended subgroups - fix header years * Extended subgroups - use is_half_nan * Extended subgroups - compare half as float * Review fixes mostly for ballot functions. - Modify kernels for better handling active/inactive workitems - Modify gen/chk functions for handling non uniform workgroup sizes - Introduce new variables naming convention - minor fixes * Extended subgroups - simplification data generation for ballot lsb/msb functions * Extended subgroups - minor fixes * Extended subgroups - move common code to function * Extended subgroups - formatting errors fix * Extended subgroups - fix build error * Extended subgroups - sub_group_elect more sophisticated Define mask which is 4bytes pattern where bit 1 means work item is active. If workitem in subgroup matches pattern then run sub_group_elect() * Extended subgroups - fix Ubuntu build error * Extended subgroups - voting function review fixes * adjust all function for using masks * remove calculate templates * merge code to one common template * check results only in active workitems * normalize values on host side * minor fixes * Extended subgroups - fix typos * Set of fixes and improvements after review * define WorkGroupParams to stop extended parameters list in function * better workitems mask handing (WorkGroupParams) * narrow values of data input generation to avoid overflows (arithmetic func) * implement work item masks for arithmetic functions * enable half type testing for reduction/scan/broadcast * minor fixes * Extended subgroups - fix Linux issues * Extended subgroups - fix sub_group_local_id data type * Extended subgroups - use vector instead of array. * Extended subgroups - change names to subgroup * Extended subgroups - uncomment code, fix build * Extended subgroups - build fix, use cl_half_from_float func * Extended subgroups - remove is_half_nan * Extended subgroups - do no use undef min/max * Extended subgroups - use parenthesis, fix formatting
This commit is contained in:
committed by
GitHub
parent
c5e4ca6c91
commit
71bef8563e
911
test_conformance/subgroups/subgroup_common_templates.h
Normal file
911
test_conformance/subgroups/subgroup_common_templates.h
Normal file
@@ -0,0 +1,911 @@
|
||||
//
|
||||
// Copyright (c) 2020 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef SUBGROUPCOMMONTEMPLATES_H
|
||||
#define SUBGROUPCOMMONTEMPLATES_H
|
||||
|
||||
#include "typeWrappers.h"
|
||||
#include <bitset>
|
||||
#include "CL/cl_half.h"
|
||||
#include "subhelpers.h"
|
||||
|
||||
#include <set>
|
||||
|
||||
typedef std::bitset<128> bs128;
|
||||
static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
|
||||
const std::string &mask_type,
|
||||
cl_uint max_sub_group_size)
|
||||
{
|
||||
bs128 mask128;
|
||||
cl_uint4 mask;
|
||||
cl_uint pos = subgroup_local_id;
|
||||
if (mask_type == "eq") mask128.set(pos);
|
||||
if (mask_type == "le" || mask_type == "lt")
|
||||
{
|
||||
for (cl_uint i = 0; i <= pos; i++) mask128.set(i);
|
||||
if (mask_type == "lt") mask128.reset(pos);
|
||||
}
|
||||
if (mask_type == "ge" || mask_type == "gt")
|
||||
{
|
||||
for (cl_uint i = pos; i < max_sub_group_size; i++) mask128.set(i);
|
||||
if (mask_type == "gt") mask128.reset(pos);
|
||||
}
|
||||
|
||||
// convert std::bitset<128> to uint4
|
||||
auto const uint_mask = bs128{ static_cast<unsigned long>(-1) };
|
||||
mask.s0 = (mask128 & uint_mask).to_ulong();
|
||||
mask128 >>= 32;
|
||||
mask.s1 = (mask128 & uint_mask).to_ulong();
|
||||
mask128 >>= 32;
|
||||
mask.s2 = (mask128 & uint_mask).to_ulong();
|
||||
mask128 >>= 32;
|
||||
mask.s3 = (mask128 & uint_mask).to_ulong();
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
// DESCRIPTION :
|
||||
// sub_group_broadcast - each work_item registers it's own value.
|
||||
// All work_items in subgroup takes one value from only one (any) work_item
|
||||
// sub_group_broadcast_first - same as type 0. All work_items in
|
||||
// subgroup takes only one value from only one chosen (the smallest subgroup ID)
|
||||
// work_item
|
||||
// sub_group_non_uniform_broadcast - same as type 0 but
|
||||
// only 4 work_items from subgroup enter the code (are active)
|
||||
template <typename Ty, SubgroupsBroadcastOp operation> struct BC
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
int d = ns > 100 ? 100 : ns;
|
||||
int non_uniform_size = ng % nw;
|
||||
ng = ng / nw;
|
||||
int last_subgroup_size = 0;
|
||||
ii = 0;
|
||||
|
||||
log_info(" sub_group_%s(%s)...\n", operation_names(operation),
|
||||
TypeManager<Ty>::name());
|
||||
if (non_uniform_size)
|
||||
{
|
||||
log_info(" non uniform work group size mode ON\n");
|
||||
ng++;
|
||||
}
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
if (non_uniform_size && k == ng - 1)
|
||||
{
|
||||
set_last_workgroup_params(non_uniform_size, nj, ns, nw,
|
||||
last_subgroup_size);
|
||||
}
|
||||
for (j = 0; j < nj; ++j)
|
||||
{ // for each subgroup
|
||||
ii = j * ns;
|
||||
if (last_subgroup_size && j == nj - 1)
|
||||
{
|
||||
n = last_subgroup_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
}
|
||||
int bcast_if = 0;
|
||||
int bcast_elseif = 0;
|
||||
int bcast_index = (int)(genrand_int32(gMTdata) & 0x7fffffff)
|
||||
% (d > n ? n : d);
|
||||
// l - calculate subgroup local id from which value will be
|
||||
// broadcasted (one the same value for whole subgroup)
|
||||
if (operation != SubgroupsBroadcastOp::broadcast)
|
||||
{
|
||||
// reduce brodcasting index in case of non_uniform and
|
||||
// last workgroup last subgroup
|
||||
if (last_subgroup_size && j == nj - 1
|
||||
&& last_subgroup_size < NR_OF_ACTIVE_WORK_ITEMS)
|
||||
{
|
||||
bcast_if = bcast_index % last_subgroup_size;
|
||||
bcast_elseif = bcast_if;
|
||||
}
|
||||
else
|
||||
{
|
||||
bcast_if = bcast_index % NR_OF_ACTIVE_WORK_ITEMS;
|
||||
bcast_elseif = NR_OF_ACTIVE_WORK_ITEMS
|
||||
+ bcast_index % (n - NR_OF_ACTIVE_WORK_ITEMS);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
if (operation == SubgroupsBroadcastOp::broadcast)
|
||||
{
|
||||
int midx = 4 * ii + 4 * i + 2;
|
||||
m[midx] = (cl_int)bcast_index;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (i < NR_OF_ACTIVE_WORK_ITEMS)
|
||||
{
|
||||
// index of the third
|
||||
// element int the vector.
|
||||
int midx = 4 * ii + 4 * i + 2;
|
||||
// storing information about
|
||||
// broadcasting index -
|
||||
// earlier calculated
|
||||
m[midx] = (cl_int)bcast_if;
|
||||
}
|
||||
else
|
||||
{ // index of the third
|
||||
// element int the vector.
|
||||
int midx = 4 * ii + 4 * i + 3;
|
||||
m[midx] = (cl_int)bcast_elseif;
|
||||
}
|
||||
}
|
||||
|
||||
// calculate value for broadcasting
|
||||
cl_ulong number = genrand_int64(gMTdata);
|
||||
set_value(t[ii + i], number);
|
||||
}
|
||||
}
|
||||
// Now map into work group using map from device
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // for each element in work_group
|
||||
// calculate index as number of subgroup
|
||||
// plus subgroup local id
|
||||
x[j] = t[j];
|
||||
}
|
||||
x += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, l, n;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
Ty tr, rr;
|
||||
int non_uniform_size = ng % nw;
|
||||
ng = ng / nw;
|
||||
int last_subgroup_size = 0;
|
||||
if (non_uniform_size) ng++;
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
if (non_uniform_size && k == ng - 1)
|
||||
{
|
||||
set_last_workgroup_params(non_uniform_size, nj, ns, nw,
|
||||
last_subgroup_size);
|
||||
}
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // inside the work_group
|
||||
mx[j] = x[j]; // read host inputs for work_group
|
||||
my[j] = y[j]; // read device outputs for work_group
|
||||
}
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{ // for each subgroup
|
||||
ii = j * ns;
|
||||
if (last_subgroup_size && j == nj - 1)
|
||||
{
|
||||
n = last_subgroup_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
}
|
||||
|
||||
// Check result
|
||||
if (operation == SubgroupsBroadcastOp::broadcast_first)
|
||||
{
|
||||
int lowest_active_id = -1;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
|
||||
lowest_active_id = i < NR_OF_ACTIVE_WORK_ITEMS
|
||||
? 0
|
||||
: NR_OF_ACTIVE_WORK_ITEMS;
|
||||
// findout if broadcasted
|
||||
// value is the same
|
||||
tr = mx[ii + lowest_active_id];
|
||||
// findout if broadcasted to all
|
||||
rr = my[ii + i];
|
||||
|
||||
if (!compare(rr, tr))
|
||||
{
|
||||
log_error(
|
||||
"ERROR: sub_group_broadcast_first(%s) "
|
||||
"mismatch "
|
||||
"for local id %d in sub group %d in group "
|
||||
"%d\n",
|
||||
TypeManager<Ty>::name(), i, j, k);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
if (operation == SubgroupsBroadcastOp::broadcast)
|
||||
{
|
||||
int midx = 4 * ii + 4 * i + 2;
|
||||
l = (int)m[midx];
|
||||
tr = mx[ii + l];
|
||||
}
|
||||
else
|
||||
{
|
||||
if (i < NR_OF_ACTIVE_WORK_ITEMS)
|
||||
{ // take index of array where info
|
||||
// which work_item will be
|
||||
// broadcast its value is stored
|
||||
int midx = 4 * ii + 4 * i + 2;
|
||||
// take subgroup local id of
|
||||
// this work_item
|
||||
l = (int)m[midx];
|
||||
// take value generated on host
|
||||
// for this work_item
|
||||
tr = mx[ii + l];
|
||||
}
|
||||
else
|
||||
{
|
||||
int midx = 4 * ii + 4 * i + 3;
|
||||
l = (int)m[midx];
|
||||
tr = mx[ii + l];
|
||||
}
|
||||
}
|
||||
rr = my[ii + i]; // read device outputs for
|
||||
// work_item in the subgroup
|
||||
|
||||
if (!compare(rr, tr))
|
||||
{
|
||||
log_error("ERROR: sub_group_%s(%s) "
|
||||
"mismatch for local id %d in sub "
|
||||
"group %d in group %d - got %lu "
|
||||
"expected %lu\n",
|
||||
operation_names(operation),
|
||||
TypeManager<Ty>::name(), i, j, k, rr, tr);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
log_info(" sub_group_%s(%s)... passed\n", operation_names(operation),
|
||||
TypeManager<Ty>::name());
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
|
||||
static float to_float(subgroups::cl_half x) { return cl_half_to_float(x.data); }
|
||||
|
||||
static subgroups::cl_half to_half(float x)
|
||||
{
|
||||
subgroups::cl_half value;
|
||||
value.data = cl_half_from_float(x, CL_HALF_RTE);
|
||||
return value;
|
||||
}
|
||||
|
||||
// for integer types
|
||||
template <typename Ty> inline Ty calculate(Ty a, Ty b, ArithmeticOp operation)
|
||||
{
|
||||
switch (operation)
|
||||
{
|
||||
case ArithmeticOp::add_: return a + b;
|
||||
case ArithmeticOp::max_: return a > b ? a : b;
|
||||
case ArithmeticOp::min_: return a < b ? a : b;
|
||||
case ArithmeticOp::mul_: return a * b;
|
||||
case ArithmeticOp::and_: return a & b;
|
||||
case ArithmeticOp::or_: return a | b;
|
||||
case ArithmeticOp::xor_: return a ^ b;
|
||||
case ArithmeticOp::logical_and: return a && b;
|
||||
case ArithmeticOp::logical_or: return a || b;
|
||||
case ArithmeticOp::logical_xor: return !a ^ !b;
|
||||
default: log_error("Unknown operation request"); break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
// Specialize for floating points.
|
||||
template <>
|
||||
inline cl_double calculate(cl_double a, cl_double b, ArithmeticOp operation)
|
||||
{
|
||||
switch (operation)
|
||||
{
|
||||
case ArithmeticOp::add_: {
|
||||
return a + b;
|
||||
}
|
||||
case ArithmeticOp::max_: {
|
||||
return a > b ? a : b;
|
||||
}
|
||||
case ArithmeticOp::min_: {
|
||||
return a < b ? a : b;
|
||||
}
|
||||
case ArithmeticOp::mul_: {
|
||||
return a * b;
|
||||
}
|
||||
default: log_error("Unknown operation request"); break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline cl_float calculate(cl_float a, cl_float b, ArithmeticOp operation)
|
||||
{
|
||||
switch (operation)
|
||||
{
|
||||
case ArithmeticOp::add_: {
|
||||
return a + b;
|
||||
}
|
||||
case ArithmeticOp::max_: {
|
||||
return a > b ? a : b;
|
||||
}
|
||||
case ArithmeticOp::min_: {
|
||||
return a < b ? a : b;
|
||||
}
|
||||
case ArithmeticOp::mul_: {
|
||||
return a * b;
|
||||
}
|
||||
default: log_error("Unknown operation request"); break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline subgroups::cl_half calculate(subgroups::cl_half a, subgroups::cl_half b,
|
||||
ArithmeticOp operation)
|
||||
{
|
||||
switch (operation)
|
||||
{
|
||||
case ArithmeticOp::add_: return to_half(to_float(a) + to_float(b));
|
||||
case ArithmeticOp::max_:
|
||||
return to_float(a) > to_float(b) || is_half_nan(b.data) ? a : b;
|
||||
case ArithmeticOp::min_:
|
||||
return to_float(a) < to_float(b) || is_half_nan(b.data) ? a : b;
|
||||
case ArithmeticOp::mul_: return to_half(to_float(a) * to_float(b));
|
||||
default: log_error("Unknown operation request"); break;
|
||||
}
|
||||
return to_half(0);
|
||||
}
|
||||
|
||||
template <typename Ty> bool is_floating_point()
|
||||
{
|
||||
return std::is_floating_point<Ty>::value
|
||||
|| std::is_same<Ty, subgroups::cl_half>::value;
|
||||
}
|
||||
|
||||
template <typename Ty, ArithmeticOp operation>
|
||||
void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
|
||||
for (int k = 0; k < ng; ++k)
|
||||
{
|
||||
for (int j = 0; j < nj; ++j)
|
||||
{
|
||||
int ii = j * ns;
|
||||
int n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
cl_ulong out_value;
|
||||
double y;
|
||||
if (operation == ArithmeticOp::mul_
|
||||
|| operation == ArithmeticOp::add_)
|
||||
{
|
||||
// work around to avoid overflow, do not use 0 for
|
||||
// multiplication
|
||||
out_value = (genrand_int32(gMTdata) % 4) + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
out_value = genrand_int64(gMTdata) % (32 * n);
|
||||
if ((operation == ArithmeticOp::logical_and
|
||||
|| operation == ArithmeticOp::logical_or
|
||||
|| operation == ArithmeticOp::logical_xor)
|
||||
&& ((out_value >> 32) & 1) == 0)
|
||||
out_value = 0; // increase probability of false
|
||||
}
|
||||
set_value(t[ii + i], out_value);
|
||||
}
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (int j = 0; j < nw; ++j)
|
||||
{
|
||||
x[j] = t[j];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ty, ShuffleOp operation> struct SHF
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
||||
{
|
||||
int i, ii, j, k, l, n, delta;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
int d = ns > 100 ? 100 : ns;
|
||||
ii = 0;
|
||||
ng = ng / nw;
|
||||
log_info(" sub_group_%s(%s)...\n", operation_names(operation),
|
||||
TypeManager<Ty>::name());
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
for (j = 0; j < nj; ++j)
|
||||
{ // for each subgroup
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
int midx = 4 * ii + 4 * i + 2;
|
||||
l = (int)(genrand_int32(gMTdata) & 0x7fffffff)
|
||||
% (d > n ? n : d);
|
||||
switch (operation)
|
||||
{
|
||||
case ShuffleOp::shuffle:
|
||||
case ShuffleOp::shuffle_xor:
|
||||
// storing information about shuffle index
|
||||
m[midx] = (cl_int)l;
|
||||
break;
|
||||
case ShuffleOp::shuffle_up:
|
||||
delta = l; // calculate delta for shuffle up
|
||||
if (i - delta < 0)
|
||||
{
|
||||
delta = i;
|
||||
}
|
||||
m[midx] = (cl_int)delta;
|
||||
break;
|
||||
case ShuffleOp::shuffle_down:
|
||||
delta = l; // calculate delta for shuffle down
|
||||
if (i + delta >= n)
|
||||
{
|
||||
delta = n - 1 - i;
|
||||
}
|
||||
m[midx] = (cl_int)delta;
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
cl_ulong number = genrand_int64(gMTdata);
|
||||
set_value(t[ii + i], number);
|
||||
}
|
||||
}
|
||||
// Now map into work group using map from device
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // for each element in work_group
|
||||
x[j] = t[j];
|
||||
}
|
||||
x += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, l, n;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
Ty tr, rr;
|
||||
ng = ng / nw;
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // inside the work_group
|
||||
mx[j] = x[j]; // read host inputs for work_group
|
||||
my[j] = y[j]; // read device outputs for work_group
|
||||
}
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{ // for each subgroup
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
{ // inside the subgroup
|
||||
// shuffle index storage
|
||||
int midx = 4 * ii + 4 * i + 2;
|
||||
l = (int)m[midx];
|
||||
rr = my[ii + i];
|
||||
switch (operation)
|
||||
{
|
||||
// shuffle basic - treat l as index
|
||||
case ShuffleOp::shuffle: tr = mx[ii + l]; break;
|
||||
// shuffle up - treat l as delta
|
||||
case ShuffleOp::shuffle_up: tr = mx[ii + i - l]; break;
|
||||
// shuffle up - treat l as delta
|
||||
case ShuffleOp::shuffle_down:
|
||||
tr = mx[ii + i + l];
|
||||
break;
|
||||
// shuffle xor - treat l as mask
|
||||
case ShuffleOp::shuffle_xor:
|
||||
tr = mx[ii + (i ^ l)];
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
if (!compare(rr, tr))
|
||||
{
|
||||
log_error("ERROR: sub_group_%s(%s) mismatch for "
|
||||
"local id %d in sub group %d in group %d\n",
|
||||
operation_names(operation),
|
||||
TypeManager<Ty>::name(), i, j, k);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
log_info(" sub_group_%s(%s)... passed\n", operation_names(operation),
|
||||
TypeManager<Ty>::name());
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Ty, ArithmeticOp operation> struct SCEX_NU
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
||||
{
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
ng = ng / nw;
|
||||
std::string func_name;
|
||||
work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive"
|
||||
: func_name = "sub_group_scan_exclusive";
|
||||
log_info(" %s_%s(%s)...\n", func_name.c_str(),
|
||||
operation_names(operation), TypeManager<Ty>::name());
|
||||
log_info(" test params: global size = %d local size = %d subgroups "
|
||||
"size = %d work item mask = 0x%x \n",
|
||||
test_params.global_workgroup_size, nw, ns, work_items_mask);
|
||||
genrand<Ty, operation>(x, t, m, ns, nw, ng);
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
Ty tr, rr;
|
||||
ng = ng / nw;
|
||||
|
||||
std::string func_name;
|
||||
work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive"
|
||||
: func_name = "sub_group_scan_exclusive";
|
||||
|
||||
uint32_t use_work_items_mask;
|
||||
// for uniform case take into consideration all workitems
|
||||
use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask;
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // inside the work_group
|
||||
mx[j] = x[j]; // read host inputs for work_group
|
||||
my[j] = y[j]; // read device outputs for work_group
|
||||
}
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
std::set<int> active_work_items;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
uint32_t check_work_item = 1 << (i % 32);
|
||||
if (use_work_items_mask & check_work_item)
|
||||
{
|
||||
active_work_items.insert(i);
|
||||
}
|
||||
}
|
||||
if (active_work_items.empty())
|
||||
{
|
||||
log_info(" No acitve workitems in workgroup id = %d "
|
||||
"subgroup id = %d - no calculation\n",
|
||||
k, j);
|
||||
continue;
|
||||
}
|
||||
else if (active_work_items.size() == 1)
|
||||
{
|
||||
log_info(" One active workitem in workgroup id = %d "
|
||||
"subgroup id = %d - no calculation\n",
|
||||
k, j);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
tr = TypeManager<Ty>::identify_limits(operation);
|
||||
int idx = 0;
|
||||
for (const int &active_work_item : active_work_items)
|
||||
{
|
||||
rr = my[ii + active_work_item];
|
||||
if (idx == 0) continue;
|
||||
|
||||
if (!compare_ordered(rr, tr))
|
||||
{
|
||||
log_error(
|
||||
"ERROR: %s_%s(%s) "
|
||||
"mismatch for local id %d in sub group %d in "
|
||||
"group %d Expected: %d Obtained: %d\n",
|
||||
func_name.c_str(), operation_names(operation),
|
||||
TypeManager<Ty>::name(), i, j, k, tr, rr);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
tr = calculate<Ty>(tr, mx[ii + active_work_item],
|
||||
operation);
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
|
||||
log_info(" %s_%s(%s)... passed\n", func_name.c_str(),
|
||||
operation_names(operation), TypeManager<Ty>::name());
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
|
||||
// Test for scan inclusive non uniform functions
|
||||
template <typename Ty, ArithmeticOp operation> struct SCIN_NU
|
||||
{
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
||||
{
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
ng = ng / nw;
|
||||
std::string func_name;
|
||||
work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive"
|
||||
: func_name = "sub_group_scan_inclusive";
|
||||
|
||||
genrand<Ty, operation>(x, t, m, ns, nw, ng);
|
||||
log_info(" %s_%s(%s)...\n", func_name.c_str(),
|
||||
operation_names(operation), TypeManager<Ty>::name());
|
||||
log_info(" test params: global size = %d local size = %d subgroups "
|
||||
"size = %d work item mask = 0x%x \n",
|
||||
test_params.global_workgroup_size, nw, ns, work_items_mask);
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
Ty tr, rr;
|
||||
ng = ng / nw;
|
||||
|
||||
std::string func_name;
|
||||
work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive"
|
||||
: func_name = "sub_group_scan_inclusive";
|
||||
|
||||
uint32_t use_work_items_mask;
|
||||
// for uniform case take into consideration all workitems
|
||||
use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask;
|
||||
// std::bitset<32> mask32(use_work_items_mask);
|
||||
// for (int k) mask32.count();
|
||||
for (k = 0; k < ng; ++k)
|
||||
{ // for each work_group
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{ // inside the work_group
|
||||
mx[j] = x[j]; // read host inputs for work_group
|
||||
my[j] = y[j]; // read device outputs for work_group
|
||||
}
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
std::set<int> active_work_items;
|
||||
int catch_frist_active = -1;
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
uint32_t check_work_item = 1 << (i % 32);
|
||||
if (use_work_items_mask & check_work_item)
|
||||
{
|
||||
if (catch_frist_active == -1)
|
||||
{
|
||||
catch_frist_active = i;
|
||||
}
|
||||
active_work_items.insert(i);
|
||||
}
|
||||
}
|
||||
if (active_work_items.empty())
|
||||
{
|
||||
log_info(" No acitve workitems in workgroup id = %d "
|
||||
"subgroup id = %d - no calculation\n",
|
||||
k, j);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
tr = TypeManager<Ty>::identify_limits(operation);
|
||||
for (const int &active_work_item : active_work_items)
|
||||
{
|
||||
rr = my[ii + active_work_item];
|
||||
if (active_work_items.size() == 1)
|
||||
{
|
||||
tr = mx[ii + catch_frist_active];
|
||||
}
|
||||
else
|
||||
{
|
||||
tr = calculate<Ty>(tr, mx[ii + active_work_item],
|
||||
operation);
|
||||
}
|
||||
if (!compare_ordered<Ty>(rr, tr))
|
||||
{
|
||||
log_error(
|
||||
"ERROR: %s_%s(%s) "
|
||||
"mismatch for local id %d in sub group %d "
|
||||
"in "
|
||||
"group %d Expected: %d Obtained: %d\n",
|
||||
func_name.c_str(), operation_names(operation),
|
||||
TypeManager<Ty>::name(), active_work_item, j, k,
|
||||
tr, rr);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
|
||||
log_info(" %s_%s(%s)... passed\n", func_name.c_str(),
|
||||
operation_names(operation), TypeManager<Ty>::name());
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
|
||||
// Test for reduce non uniform functions
|
||||
template <typename Ty, ArithmeticOp operation> struct RED_NU
|
||||
{
|
||||
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
||||
{
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
ng = ng / nw;
|
||||
std::string func_name;
|
||||
|
||||
work_items_mask ? func_name = "sub_group_non_uniform_reduce"
|
||||
: func_name = "sub_group_reduce";
|
||||
log_info(" %s_%s(%s)...\n", func_name.c_str(),
|
||||
operation_names(operation), TypeManager<Ty>::name());
|
||||
log_info(" test params: global size = %d local size = %d subgroups "
|
||||
"size = %d work item mask = 0x%x \n",
|
||||
test_params.global_workgroup_size, nw, ns, work_items_mask);
|
||||
genrand<Ty, operation>(x, t, m, ns, nw, ng);
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
||||
const WorkGroupParams &test_params)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nw = test_params.local_workgroup_size;
|
||||
int ns = test_params.subgroup_size;
|
||||
int ng = test_params.global_workgroup_size;
|
||||
uint32_t work_items_mask = test_params.work_items_mask;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
ng = ng / nw;
|
||||
Ty tr, rr;
|
||||
|
||||
std::string func_name;
|
||||
work_items_mask ? func_name = "sub_group_non_uniform_reduce"
|
||||
: func_name = "sub_group_reduce";
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
// Map to array indexed to array indexed by local ID and sub
|
||||
// group
|
||||
for (j = 0; j < nw; ++j)
|
||||
{
|
||||
mx[j] = x[j];
|
||||
my[j] = y[j];
|
||||
}
|
||||
|
||||
uint32_t use_work_items_mask;
|
||||
use_work_items_mask =
|
||||
!work_items_mask ? 0xFFFFFFFF : work_items_mask;
|
||||
|
||||
for (j = 0; j < nj; ++j)
|
||||
{
|
||||
ii = j * ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
std::set<int> active_work_items;
|
||||
int catch_frist_active = -1;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
uint32_t check_work_item = 1 << (i % 32);
|
||||
if (use_work_items_mask & check_work_item)
|
||||
{
|
||||
if (catch_frist_active == -1)
|
||||
{
|
||||
catch_frist_active = i;
|
||||
tr = mx[ii + i];
|
||||
active_work_items.insert(i);
|
||||
continue;
|
||||
}
|
||||
active_work_items.insert(i);
|
||||
tr = calculate<Ty>(tr, mx[ii + i], operation);
|
||||
}
|
||||
}
|
||||
|
||||
if (active_work_items.empty())
|
||||
{
|
||||
log_info(" No acitve workitems in workgroup id = %d "
|
||||
"subgroup id = %d - no calculation\n",
|
||||
k, j);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const int &active_work_item : active_work_items)
|
||||
{
|
||||
rr = my[ii + active_work_item];
|
||||
if (!compare_ordered<Ty>(rr, tr))
|
||||
{
|
||||
log_error("ERROR: %s_%s(%s) "
|
||||
"mismatch for local id %d in sub group %d in "
|
||||
"group %d Expected: %d Obtained: %d\n",
|
||||
func_name.c_str(), operation_names(operation),
|
||||
TypeManager<Ty>::name(), active_work_item, j,
|
||||
k, tr, rr);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
}
|
||||
}
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 4 * nw;
|
||||
}
|
||||
|
||||
log_info(" %s_%s(%s)... passed\n", func_name.c_str(),
|
||||
operation_names(operation), TypeManager<Ty>::name());
|
||||
return TEST_PASS;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user