mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
[subgroups][non_uniform_broadcast] Fix broadcasting index generation (#1680)
* [subgroups][non_uniform_broadcast] Fix broadcasting index generation
The subgroup size may not be greater than `NR_OF_ACTIVE_WORK_ITEMS`.
Broadcasting index needs to be reduced in that case.
Otherwise, if subgroup size == `NR_OF_ACTIVE_WORK_ITEMS` == 4, then we
will encounter "divide-by-zero" error when evaluating `bcast_index %
(n - NR_OF_ACTIVE_WORK_ITEMS)`.
* Revert "[subgroups][non_uniform_broadcast] Fix broadcasting index generation"
This reverts commit 9bbab539de.
* [subgroups][non_uniform_broadcast] Fix broadcasting index generation
Dynamically activate half of the work items in the current subgroup
instead of hardcoding as `NR_OF_ACTIVE_WORK_ITEMS`.
* Apply suggestion
This commit is contained in:
@@ -29,7 +29,7 @@
|
|||||||
// subgroup takes only one value from only one chosen (the smallest subgroup ID)
|
// subgroup takes only one value from only one chosen (the smallest subgroup ID)
|
||||||
// work_item
|
// work_item
|
||||||
// sub_group_non_uniform_broadcast - same as type 0 but
|
// sub_group_non_uniform_broadcast - same as type 0 but
|
||||||
// only 4 work_items from subgroup enter the code (are active)
|
// only half of work_items from subgroup enter the code (are active)
|
||||||
template <typename Ty, SubgroupsBroadcastOp operation> struct BC
|
template <typename Ty, SubgroupsBroadcastOp operation> struct BC
|
||||||
{
|
{
|
||||||
static void log_test(const WorkGroupParams &test_params,
|
static void log_test(const WorkGroupParams &test_params,
|
||||||
@@ -78,24 +78,16 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
|
|||||||
int bcast_elseif = 0;
|
int bcast_elseif = 0;
|
||||||
int bcast_index = (int)(genrand_int32(gMTdata) & 0x7fffffff)
|
int bcast_index = (int)(genrand_int32(gMTdata) & 0x7fffffff)
|
||||||
% (d > n ? n : d);
|
% (d > n ? n : d);
|
||||||
|
int num_of_active_items = n >> 1;
|
||||||
// l - calculate subgroup local id from which value will be
|
// l - calculate subgroup local id from which value will be
|
||||||
// broadcasted (one the same value for whole subgroup)
|
// broadcasted (one the same value for whole subgroup)
|
||||||
if (operation != SubgroupsBroadcastOp::broadcast)
|
if (operation != SubgroupsBroadcastOp::broadcast)
|
||||||
{
|
{
|
||||||
// reduce brodcasting index in case of non_uniform and
|
if (num_of_active_items != 0)
|
||||||
// last workgroup last subgroup
|
bcast_if = bcast_index % num_of_active_items;
|
||||||
if (last_subgroup_size && j == nj - 1
|
if (num_of_active_items != n)
|
||||||
&& last_subgroup_size < NR_OF_ACTIVE_WORK_ITEMS)
|
bcast_elseif = num_of_active_items
|
||||||
{
|
+ bcast_index % (n - num_of_active_items);
|
||||||
bcast_if = bcast_index % last_subgroup_size;
|
|
||||||
bcast_elseif = bcast_if;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
bcast_if = bcast_index % NR_OF_ACTIVE_WORK_ITEMS;
|
|
||||||
bcast_elseif = NR_OF_ACTIVE_WORK_ITEMS
|
|
||||||
+ bcast_index % (n - NR_OF_ACTIVE_WORK_ITEMS);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < n; ++i)
|
for (i = 0; i < n; ++i)
|
||||||
@@ -107,7 +99,7 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (i < NR_OF_ACTIVE_WORK_ITEMS)
|
if (i < num_of_active_items)
|
||||||
{
|
{
|
||||||
// index of the third
|
// index of the third
|
||||||
// element int the vector.
|
// element int the vector.
|
||||||
@@ -182,15 +174,15 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check result
|
// Check result
|
||||||
|
int num_of_active_items = n >> 1;
|
||||||
if (operation == SubgroupsBroadcastOp::broadcast_first)
|
if (operation == SubgroupsBroadcastOp::broadcast_first)
|
||||||
{
|
{
|
||||||
int lowest_active_id = -1;
|
int lowest_active_id = -1;
|
||||||
for (i = 0; i < n; ++i)
|
for (i = 0; i < n; ++i)
|
||||||
{
|
{
|
||||||
|
|
||||||
lowest_active_id = i < NR_OF_ACTIVE_WORK_ITEMS
|
lowest_active_id =
|
||||||
? 0
|
i < num_of_active_items ? 0 : num_of_active_items;
|
||||||
: NR_OF_ACTIVE_WORK_ITEMS;
|
|
||||||
// findout if broadcasted
|
// findout if broadcasted
|
||||||
// value is the same
|
// value is the same
|
||||||
tr = mx[ii + lowest_active_id];
|
tr = mx[ii + lowest_active_id];
|
||||||
@@ -221,7 +213,7 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (i < NR_OF_ACTIVE_WORK_ITEMS)
|
if (i < num_of_active_items)
|
||||||
{ // take index of array where info
|
{ // take index of array where info
|
||||||
// which work_item will be
|
// which work_item will be
|
||||||
// broadcast its value is stored
|
// broadcast its value is stored
|
||||||
|
|||||||
@@ -28,8 +28,6 @@
|
|||||||
#include <regex>
|
#include <regex>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
#define NR_OF_ACTIVE_WORK_ITEMS 4
|
|
||||||
|
|
||||||
extern MTdata gMTdata;
|
extern MTdata gMTdata;
|
||||||
typedef std::bitset<128> bs128;
|
typedef std::bitset<128> bs128;
|
||||||
extern cl_half_rounding_mode g_rounding_mode;
|
extern cl_half_rounding_mode g_rounding_mode;
|
||||||
@@ -1474,8 +1472,6 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
|
|||||||
|
|
||||||
Fns::log_test(test_params, "");
|
Fns::log_test(test_params, "");
|
||||||
|
|
||||||
kernel_sstr << "#define NR_OF_ACTIVE_WORK_ITEMS ";
|
|
||||||
kernel_sstr << NR_OF_ACTIVE_WORK_ITEMS << "\n";
|
|
||||||
// Make sure a test of type Ty is supported by the device
|
// Make sure a test of type Ty is supported by the device
|
||||||
if (!TypeManager<Ty>::type_supported(device))
|
if (!TypeManager<Ty>::type_supported(device))
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -767,7 +767,7 @@ __kernel void test_sub_group_non_uniform_broadcast(const __global Type *in, __gl
|
|||||||
int gid = get_global_id(0);
|
int gid = get_global_id(0);
|
||||||
XY(xy,gid);
|
XY(xy,gid);
|
||||||
Type x = in[gid];
|
Type x = in[gid];
|
||||||
if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {
|
if (xy[gid].x < (get_sub_group_size() >> 1)) {
|
||||||
out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z);
|
out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z);
|
||||||
} else {
|
} else {
|
||||||
out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w);
|
out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w);
|
||||||
@@ -779,7 +779,7 @@ __kernel void test_sub_group_broadcast_first(const __global Type *in, __global i
|
|||||||
int gid = get_global_id(0);
|
int gid = get_global_id(0);
|
||||||
XY(xy,gid);
|
XY(xy,gid);
|
||||||
Type x = in[gid];
|
Type x = in[gid];
|
||||||
if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {
|
if (xy[gid].x < (get_sub_group_size() >> 1)) {
|
||||||
out[gid] = sub_group_broadcast_first(x);;
|
out[gid] = sub_group_broadcast_first(x);;
|
||||||
} else {
|
} else {
|
||||||
out[gid] = sub_group_broadcast_first(x);;
|
out[gid] = sub_group_broadcast_first(x);;
|
||||||
|
|||||||
Reference in New Issue
Block a user