mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-26 00:39:03 +00:00
Improve testing of sub_group_ballot (#1382)
Signed-off-by: Stuart Brady <stuart.brady@arm.com>
This commit is contained in:
@@ -277,3 +277,5 @@ double genrand_res53(MTdata d)
|
|||||||
unsigned long a = genrand_int32(d) >> 5, b = genrand_int32(d) >> 6;
|
unsigned long a = genrand_int32(d) >> 5, b = genrand_int32(d) >> 6;
|
||||||
return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
|
return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool genrand_bool(MTdata d) { return ((cl_uint)genrand_int32(d) & 1); }
|
||||||
|
|||||||
@@ -90,6 +90,9 @@ double genrand_res53(MTdata /*data*/);
|
|||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
||||||
|
/* generates a random boolean */
|
||||||
|
bool genrand_bool(MTdata /*data*/);
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
struct MTdataHolder
|
struct MTdataHolder
|
||||||
|
|||||||
@@ -34,6 +34,12 @@ extern MTdata gMTdata;
|
|||||||
typedef std::bitset<128> bs128;
|
typedef std::bitset<128> bs128;
|
||||||
extern cl_half_rounding_mode g_rounding_mode;
|
extern cl_half_rounding_mode g_rounding_mode;
|
||||||
|
|
||||||
|
static bs128 cl_uint4_to_bs128(cl_uint4 v)
|
||||||
|
{
|
||||||
|
return bs128(v.s0) | (bs128(v.s1) << 32) | (bs128(v.s2) << 64)
|
||||||
|
| (bs128(v.s3) << 96);
|
||||||
|
}
|
||||||
|
|
||||||
static cl_uint4 bs128_to_cl_uint4(bs128 v)
|
static cl_uint4 bs128_to_cl_uint4(bs128 v)
|
||||||
{
|
{
|
||||||
bs128 bs128_ffffffff = 0xffffffffU;
|
bs128 bs128_ffffffff = 0xffffffffU;
|
||||||
|
|||||||
@@ -31,45 +31,26 @@ template <typename Ty> struct BALLOT
|
|||||||
|
|
||||||
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
|
||||||
{
|
{
|
||||||
// no work here
|
|
||||||
int gws = test_params.global_workgroup_size;
|
|
||||||
int lws = test_params.local_workgroup_size;
|
|
||||||
int sbs = test_params.subgroup_size;
|
|
||||||
int non_uniform_size = gws % lws;
|
|
||||||
}
|
|
||||||
|
|
||||||
static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
|
||||||
const WorkGroupParams &test_params)
|
|
||||||
{
|
|
||||||
int wi_id, wg_id, sb_id;
|
|
||||||
int gws = test_params.global_workgroup_size;
|
int gws = test_params.global_workgroup_size;
|
||||||
int lws = test_params.local_workgroup_size;
|
int lws = test_params.local_workgroup_size;
|
||||||
int sbs = test_params.subgroup_size;
|
int sbs = test_params.subgroup_size;
|
||||||
int sb_number = (lws + sbs - 1) / sbs;
|
int sb_number = (lws + sbs - 1) / sbs;
|
||||||
int current_sbs = 0;
|
|
||||||
cl_uint expected_result, device_result;
|
|
||||||
int non_uniform_size = gws % lws;
|
int non_uniform_size = gws % lws;
|
||||||
int wg_number = gws / lws;
|
int wg_number = gws / lws;
|
||||||
wg_number = non_uniform_size ? wg_number + 1 : wg_number;
|
wg_number = non_uniform_size ? wg_number + 1 : wg_number;
|
||||||
int last_subgroup_size = 0;
|
int last_subgroup_size = 0;
|
||||||
|
|
||||||
for (wg_id = 0; wg_id < wg_number; ++wg_id)
|
for (int wg_id = 0; wg_id < wg_number; ++wg_id)
|
||||||
{ // for each work_group
|
{ // for each work_group
|
||||||
if (non_uniform_size && wg_id == wg_number - 1)
|
if (non_uniform_size && wg_id == wg_number - 1)
|
||||||
{
|
{
|
||||||
set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
|
set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
|
||||||
last_subgroup_size);
|
last_subgroup_size);
|
||||||
}
|
}
|
||||||
|
for (int sb_id = 0; sb_id < sb_number; ++sb_id)
|
||||||
for (wi_id = 0; wi_id < lws; ++wi_id)
|
|
||||||
{ // inside the work_group
|
|
||||||
// read device outputs for work_group
|
|
||||||
my[wi_id] = y[wi_id];
|
|
||||||
}
|
|
||||||
|
|
||||||
for (sb_id = 0; sb_id < sb_number; ++sb_id)
|
|
||||||
{ // for each subgroup
|
{ // for each subgroup
|
||||||
int wg_offset = sb_id * sbs;
|
int wg_offset = sb_id * sbs;
|
||||||
|
int current_sbs;
|
||||||
if (last_subgroup_size && sb_id == sb_number - 1)
|
if (last_subgroup_size && sb_id == sb_number - 1)
|
||||||
{
|
{
|
||||||
current_sbs = last_subgroup_size;
|
current_sbs = last_subgroup_size;
|
||||||
@@ -78,25 +59,121 @@ template <typename Ty> struct BALLOT
|
|||||||
{
|
{
|
||||||
current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
|
current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
|
||||||
}
|
}
|
||||||
for (wi_id = 0; wi_id < current_sbs; ++wi_id)
|
|
||||||
|
for (int wi_id = 0; wi_id < current_sbs; wi_id++)
|
||||||
{
|
{
|
||||||
device_result = my[wg_offset + wi_id];
|
cl_uint v;
|
||||||
expected_result = 1;
|
if (genrand_bool(gMTdata))
|
||||||
if (!compare(device_result, expected_result))
|
{
|
||||||
|
v = genrand_bool(gMTdata);
|
||||||
|
}
|
||||||
|
else if (genrand_bool(gMTdata))
|
||||||
|
{
|
||||||
|
v = 1U << ((genrand_int32(gMTdata) % 31) + 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
v = genrand_int32(gMTdata);
|
||||||
|
}
|
||||||
|
cl_uint4 v4 = { v, 0, 0, 0 };
|
||||||
|
t[wi_id + wg_offset] = v4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Now map into work group using map from device
|
||||||
|
for (int wi_id = 0; wi_id < lws; ++wi_id)
|
||||||
|
{
|
||||||
|
x[wi_id] = t[wi_id];
|
||||||
|
}
|
||||||
|
x += lws;
|
||||||
|
m += 4 * lws;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
|
||||||
|
const WorkGroupParams &test_params)
|
||||||
|
{
|
||||||
|
int gws = test_params.global_workgroup_size;
|
||||||
|
int lws = test_params.local_workgroup_size;
|
||||||
|
int sbs = test_params.subgroup_size;
|
||||||
|
int sb_number = (lws + sbs - 1) / sbs;
|
||||||
|
int non_uniform_size = gws % lws;
|
||||||
|
int wg_number = gws / lws;
|
||||||
|
wg_number = non_uniform_size ? wg_number + 1 : wg_number;
|
||||||
|
int last_subgroup_size = 0;
|
||||||
|
|
||||||
|
for (int wg_id = 0; wg_id < wg_number; ++wg_id)
|
||||||
|
{ // for each work_group
|
||||||
|
if (non_uniform_size && wg_id == wg_number - 1)
|
||||||
|
{
|
||||||
|
set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
|
||||||
|
last_subgroup_size);
|
||||||
|
}
|
||||||
|
for (int wi_id = 0; wi_id < lws; ++wi_id)
|
||||||
|
{ // inside the work_group
|
||||||
|
mx[wi_id] = x[wi_id]; // read host inputs for work_group
|
||||||
|
my[wi_id] = y[wi_id]; // read device outputs for work_group
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int sb_id = 0; sb_id < sb_number; ++sb_id)
|
||||||
|
{ // for each subgroup
|
||||||
|
int wg_offset = sb_id * sbs;
|
||||||
|
int current_sbs;
|
||||||
|
if (last_subgroup_size && sb_id == sb_number - 1)
|
||||||
|
{
|
||||||
|
current_sbs = last_subgroup_size;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
|
||||||
|
}
|
||||||
|
|
||||||
|
bs128 expected_result_bs = 0;
|
||||||
|
|
||||||
|
std::set<int> active_work_items;
|
||||||
|
for (int wi_id = 0; wi_id < current_sbs; ++wi_id)
|
||||||
|
{
|
||||||
|
if (test_params.work_items_mask.test(wi_id))
|
||||||
|
{
|
||||||
|
bool predicate = (mx[wg_offset + wi_id].s0 != 0);
|
||||||
|
expected_result_bs |= (bs128(predicate) << wi_id);
|
||||||
|
active_work_items.insert(wi_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (active_work_items.empty())
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_uint4 expected_result =
|
||||||
|
bs128_to_cl_uint4(expected_result_bs);
|
||||||
|
for (const int &active_work_item : active_work_items)
|
||||||
|
{
|
||||||
|
int wi_id = active_work_item;
|
||||||
|
|
||||||
|
cl_uint4 device_result = my[wg_offset + wi_id];
|
||||||
|
bs128 device_result_bs = cl_uint4_to_bs128(device_result);
|
||||||
|
|
||||||
|
if (device_result_bs != expected_result_bs)
|
||||||
{
|
{
|
||||||
log_error(
|
log_error(
|
||||||
"ERROR: sub_group_ballot mismatch for local id "
|
"ERROR: sub_group_ballot mismatch for local id "
|
||||||
"%d in sub group %d in group %d obtained %d, "
|
"%d in sub group %d in group %d obtained {%d, %d, "
|
||||||
"expected %d\n",
|
"%d, %d}, expected {%d, %d, %d, %d}\n",
|
||||||
wi_id, sb_id, wg_id, device_result,
|
wi_id, sb_id, wg_id, device_result.s0,
|
||||||
expected_result);
|
device_result.s1, device_result.s2,
|
||||||
|
device_result.s3, expected_result.s0,
|
||||||
|
expected_result.s1, expected_result.s2,
|
||||||
|
expected_result.s3);
|
||||||
return TEST_FAIL;
|
return TEST_FAIL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
x += lws;
|
||||||
y += lws;
|
y += lws;
|
||||||
m += 4 * lws;
|
m += 4 * lws;
|
||||||
}
|
}
|
||||||
|
|
||||||
return TEST_PASS;
|
return TEST_PASS;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -724,27 +801,26 @@ __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type
|
|||||||
}
|
}
|
||||||
)";
|
)";
|
||||||
std::string sub_group_ballot_source = R"(
|
std::string sub_group_ballot_source = R"(
|
||||||
__kernel void test_sub_group_ballot(const __global Type *in, __global int4 *xy, __global Type *out) {
|
__kernel void test_sub_group_ballot(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) {
|
||||||
uint4 full_ballot = sub_group_ballot(1);
|
|
||||||
uint divergence_mask;
|
|
||||||
uint4 partial_ballot;
|
|
||||||
uint gid = get_global_id(0);
|
uint gid = get_global_id(0);
|
||||||
XY(xy,gid);
|
XY(xy,gid);
|
||||||
if (get_sub_group_local_id() & 1) {
|
uint subgroup_local_id = get_sub_group_local_id();
|
||||||
divergence_mask = 0xaaaaaaaa;
|
uint elect_work_item = 1 << (subgroup_local_id % 32);
|
||||||
partial_ballot = sub_group_ballot(1);
|
uint work_item_mask;
|
||||||
} else {
|
if (subgroup_local_id < 32) {
|
||||||
divergence_mask = 0x55555555;
|
work_item_mask = work_item_mask_vector.x;
|
||||||
partial_ballot = sub_group_ballot(1);
|
} else if(subgroup_local_id < 64) {
|
||||||
|
work_item_mask = work_item_mask_vector.y;
|
||||||
|
} else if(subgroup_local_id < 96) {
|
||||||
|
work_item_mask = work_item_mask_vector.z;
|
||||||
|
} else if(subgroup_local_id < 128) {
|
||||||
|
work_item_mask = work_item_mask_vector.w;
|
||||||
}
|
}
|
||||||
size_t lws = get_local_size(0);
|
uint4 value = (uint4)(0, 0, 0, 0);
|
||||||
uint4 masked_ballot = full_ballot;
|
if (elect_work_item & work_item_mask) {
|
||||||
masked_ballot.x &= divergence_mask;
|
value = sub_group_ballot(in[gid].s0);
|
||||||
masked_ballot.y &= divergence_mask;
|
}
|
||||||
masked_ballot.z &= divergence_mask;
|
out[gid] = value;
|
||||||
masked_ballot.w &= divergence_mask;
|
|
||||||
out[gid] = all(masked_ballot == partial_ballot);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
)";
|
)";
|
||||||
std::string sub_group_inverse_ballot_source = R"(
|
std::string sub_group_inverse_ballot_source = R"(
|
||||||
@@ -952,42 +1028,47 @@ int test_subgroup_functions_ballot(cl_device_id device, cl_context context,
|
|||||||
error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::lt_mask>>(
|
error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::lt_mask>>(
|
||||||
"get_sub_group_lt_mask");
|
"get_sub_group_lt_mask");
|
||||||
|
|
||||||
// ballot functions
|
// sub_group_ballot function
|
||||||
WorkGroupParams test_params_ballot(global_work_size, local_work_size);
|
WorkGroupParams test_params_ballot(global_work_size, local_work_size, 3);
|
||||||
test_params_ballot.save_kernel_source(
|
test_params_ballot.save_kernel_source(sub_group_ballot_source);
|
||||||
sub_group_ballot_bit_scan_find_source);
|
|
||||||
test_params_ballot.save_kernel_source(sub_group_ballot_source,
|
|
||||||
"sub_group_ballot");
|
|
||||||
test_params_ballot.save_kernel_source(sub_group_inverse_ballot_source,
|
|
||||||
"sub_group_inverse_ballot");
|
|
||||||
test_params_ballot.save_kernel_source(sub_group_ballot_bit_extract_source,
|
|
||||||
"sub_group_ballot_bit_extract");
|
|
||||||
RunTestForType rft_ballot(device, context, queue, num_elements,
|
RunTestForType rft_ballot(device, context, queue, num_elements,
|
||||||
test_params_ballot);
|
test_params_ballot);
|
||||||
error |= rft_ballot.run_impl<cl_uint, BALLOT<cl_uint>>("sub_group_ballot");
|
|
||||||
error |=
|
error |=
|
||||||
rft_ballot.run_impl<cl_uint4,
|
rft_ballot.run_impl<cl_uint4, BALLOT<cl_uint4>>("sub_group_ballot");
|
||||||
BALLOT_INVERSE<cl_uint4, BallotOp::inverse_ballot>>(
|
|
||||||
|
// ballot arithmetic functions
|
||||||
|
WorkGroupParams test_params_arith(global_work_size, local_work_size);
|
||||||
|
test_params_arith.save_kernel_source(sub_group_ballot_bit_scan_find_source);
|
||||||
|
test_params_arith.save_kernel_source(sub_group_inverse_ballot_source,
|
||||||
|
"sub_group_inverse_ballot");
|
||||||
|
test_params_arith.save_kernel_source(sub_group_ballot_bit_extract_source,
|
||||||
|
"sub_group_ballot_bit_extract");
|
||||||
|
RunTestForType rft_arith(device, context, queue, num_elements,
|
||||||
|
test_params_arith);
|
||||||
|
error |=
|
||||||
|
rft_arith.run_impl<cl_uint4,
|
||||||
|
BALLOT_INVERSE<cl_uint4, BallotOp::inverse_ballot>>(
|
||||||
"sub_group_inverse_ballot");
|
"sub_group_inverse_ballot");
|
||||||
error |= rft_ballot.run_impl<
|
error |= rft_arith.run_impl<
|
||||||
cl_uint4, BALLOT_BIT_EXTRACT<cl_uint4, BallotOp::ballot_bit_extract>>(
|
cl_uint4, BALLOT_BIT_EXTRACT<cl_uint4, BallotOp::ballot_bit_extract>>(
|
||||||
"sub_group_ballot_bit_extract");
|
"sub_group_ballot_bit_extract");
|
||||||
error |= rft_ballot.run_impl<
|
error |= rft_arith.run_impl<
|
||||||
cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_bit_count>>(
|
cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_bit_count>>(
|
||||||
"sub_group_ballot_bit_count");
|
"sub_group_ballot_bit_count");
|
||||||
error |= rft_ballot.run_impl<
|
error |= rft_arith.run_impl<
|
||||||
cl_uint4,
|
cl_uint4,
|
||||||
BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_inclusive_scan>>(
|
BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_inclusive_scan>>(
|
||||||
"sub_group_ballot_inclusive_scan");
|
"sub_group_ballot_inclusive_scan");
|
||||||
error |= rft_ballot.run_impl<
|
error |= rft_arith.run_impl<
|
||||||
cl_uint4,
|
cl_uint4,
|
||||||
BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_exclusive_scan>>(
|
BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_exclusive_scan>>(
|
||||||
"sub_group_ballot_exclusive_scan");
|
"sub_group_ballot_exclusive_scan");
|
||||||
error |= rft_ballot.run_impl<
|
error |= rft_arith.run_impl<
|
||||||
cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_lsb>>(
|
cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_lsb>>(
|
||||||
"sub_group_ballot_find_lsb");
|
"sub_group_ballot_find_lsb");
|
||||||
error |= rft_ballot.run_impl<
|
error |= rft_arith.run_impl<
|
||||||
cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_msb>>(
|
cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_msb>>(
|
||||||
"sub_group_ballot_find_msb");
|
"sub_group_ballot_find_msb");
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user