Files
OpenCL-CTS/test_conformance/subgroups/test_workitem.cpp
2019-08-01 18:41:12 +01:00

253 lines
8.7 KiB
C++

//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
struct get_test_data {
cl_uint subGroupSize;
cl_uint maxSubGroupSize;
cl_uint numSubGroups;
cl_uint enqNumSubGroups;
cl_uint subGroupId;
cl_uint subGroupLocalId;
bool operator==(get_test_data x) {
return subGroupSize == x.subGroupSize &&
maxSubGroupSize == x.maxSubGroupSize &&
numSubGroups == x.numSubGroups &&
subGroupId == x.subGroupId &&
subGroupLocalId == x.subGroupLocalId;
}
};
static const char * get_test_source =
"#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
"\n"
"typedef struct {\n"
" uint subGroupSize;\n"
" uint maxSubGroupSize;\n"
" uint numSubGroups;\n"
" uint enqNumSubGroups;\n"
" uint subGroupId;\n"
" uint subGroupLocalId;\n"
"} get_test_data;\n"
"\n"
"__kernel void get_test( __global get_test_data *outData )\n"
"{\n"
" int gid = get_global_id( 0 );\n"
" outData[gid].subGroupSize = get_sub_group_size();\n"
" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
" outData[gid].numSubGroups = get_num_sub_groups();\n"
" outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
" outData[gid].subGroupId = get_sub_group_id();\n"
" outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
"}";
static int
check_group(const get_test_data *result, int nw, cl_uint ensg, int maxwgs)
{
int first = -1;
int last = -1;
int i, j;
cl_uint hit[32];
for (i=0; i<nw; ++i) {
if (result[i].subGroupId == 0 && result[i].subGroupLocalId == 0)
first = i;
if (result[i].subGroupId == result[0].numSubGroups-1 && result[i].subGroupLocalId == 0)
last = i;
if (first != -1 && last != -1)
break;
}
if (first == -1 || last == -1) {
log_error("ERROR: expected sub group id's are missing\n");
return -1;
}
// Check them
if (result[first].subGroupSize == 0) {
log_error("ERROR: get_sub_group_size() returned 0\n");
return -1;
}
if (result[first].maxSubGroupSize == 0 || result[first].maxSubGroupSize > maxwgs) {
log_error("ERROR: get_max_subgroup_size() returned incorrect result: %u\n", result[first].maxSubGroupSize);
return -1;
}
if (result[first].subGroupSize > result[first].maxSubGroupSize) {
log_error("ERROR: get_sub_group_size() > get_max_sub_group_size()\n");
return -1;
}
if (result[last].subGroupSize > result[first].subGroupSize) {
log_error("ERROR: last sub group larger than first sub group\n");
return -1;
}
if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg) {
log_error("ERROR: get_num_sub_groups() returned incorrect result: %u \n", result[first].numSubGroups);
return -1;
}
memset(hit, 0, sizeof(hit));
for (i=0; i<nw; ++i) {
if (result[i].maxSubGroupSize != result[first].maxSubGroupSize ||
result[i].numSubGroups != result[first].numSubGroups) {
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
return -1;
}
if (result[i].subGroupId >= result[first].numSubGroups) {
log_error("ERROR: get_sub_group_id() returned out of range value: %u\n", result[i].subGroupId);
return -1;
}
if (result[i].enqNumSubGroups != ensg) {
log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect value: %u\n", result[i].enqNumSubGroups);
return -1;
}
if (result[first].numSubGroups > 1) {
if (result[i].subGroupId < result[first].numSubGroups-1) {
if (result[i].subGroupSize != result[first].subGroupSize) {
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
return -1;
}
if (result[i].subGroupLocalId >= result[first].subGroupSize) {
log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId);
return -1;
}
} else {
if (result[i].subGroupSize != result[last].subGroupSize) {
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
return -1;
}
if (result[i].subGroupLocalId >= result[last].subGroupSize) {
log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId);
return -1;
}
}
} else {
if (result[i].subGroupSize != result[first].subGroupSize) {
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
return -1;
}
if (result[i].subGroupLocalId >= result[first].subGroupSize) {
log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId);
return -1;
}
}
j = (result[first].subGroupSize + 31)/32 * result[i].subGroupId + (result[i].subGroupLocalId >> 5);
if (j < sizeof(hit)/4) {
cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU);
if ((hit[j] & b) != 0) {
log_error("ERROR: get_sub_group_local_id() repeated a result in the same sub group\n");
return -1;
}
hit[j] |= b;
}
}
return 0;
}
int
test_work_item_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
static const size_t lsize = 200;
int error;
int i, j, k, q, r, nw;
int maxwgs;
cl_uint ensg;
size_t global;
size_t local;
get_test_data result[lsize*6];
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper out;
error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &get_test_source, "get_test", "-cl-std=CL2.0");
if (error != 0)
return error;
error = get_max_allowed_work_group_size(context, kernel, &local, NULL);
if (error != 0)
return error;
maxwgs = (int)local;
// Limit it a bit so we have muliple work groups
// Ideally this will still be large enough to give us multiple subgroups
if (local > lsize)
local = lsize;
// Create our buffer
out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL, &error);
test_error(error, "clCreateBuffer failed");
// Set argument
error = clSetKernelArg(kernel, 0, sizeof(out), &out);
test_error(error, "clSetKernelArg failed");
global = local * 5;
// Make sure we have a flexible range
global += 3 * local / 4;
// Collect the data
memset((void *)&result, 0xf0, sizeof(result));
error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result), (void *)&result, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), (void *)&result, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
nw = (int)local;
ensg = result[0].enqNumSubGroups;
// Check the first group
error = check_group(result, nw, ensg, maxwgs);
if (error)
return error;
q = (int)global / nw;
r = (int)global % nw;
// Check the remaining work groups including the last if it is the same size
for (k=1; k<q; ++k) {
for (j=0; j<nw; ++j) {
i = k*nw + j;
if (!(result[i] == result[i-nw])) {
log_error("ERROR: sub group mapping is not identical for all work groups\n");
return -1;
}
}
}
// Check the last group if it wasn't the same size
if (r != 0) {
error = check_group(result + q*nw, r, ensg, maxwgs);
if (error)
return error;
}
return 0;
}