mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Use maximum subgroup size in sub_group_ballot tests (#1344)
sub_group_ballot_bit_count() and sub_group_ballot_find_msb() mask their input according to a subgroup size, which is assumed to be the maximum subgroup size, and not the actual subgroup size excluding non-existent work-items in the "remainder" subgroup. Fix this as per the the clarification made to the OpenCL C specification in revision 3.0.9 for issue KhronosGroup/OpenCL-Docs#626 by pull request KhronosGroup/OpenCL-Docs#689. Signed-off-by: Stuart Brady <stuart.brady@arm.com>
This commit is contained in:
@@ -496,7 +496,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
|
||||
| (bs128(mx[wg_offset + wi_id].s1) << 32)
|
||||
| (bs128(mx[wg_offset + wi_id].s2) << 64)
|
||||
| (bs128(mx[wg_offset + wi_id].s3) << 96);
|
||||
bs &= getImportantBits(wi_id, current_sbs);
|
||||
bs &= getImportantBits(wi_id, sbs);
|
||||
device_result = my[wg_offset + wi_id].s0;
|
||||
if (operation == BallotOp::ballot_inclusive_scan
|
||||
|| operation == BallotOp::ballot_exclusive_scan
|
||||
@@ -516,7 +516,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
|
||||
}
|
||||
else if (operation == BallotOp::ballot_find_lsb)
|
||||
{
|
||||
for (int id = 0; id < current_sbs; ++id)
|
||||
for (int id = 0; id < sbs; ++id)
|
||||
{
|
||||
if (bs.test(id))
|
||||
{
|
||||
@@ -537,7 +537,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
|
||||
}
|
||||
else if (operation == BallotOp::ballot_find_msb)
|
||||
{
|
||||
for (int id = current_sbs - 1; id >= 0; --id)
|
||||
for (int id = sbs - 1; id >= 0; --id)
|
||||
{
|
||||
if (bs.test(id))
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user