Format subgroups tests according to clang-format (#745)

* Format subgroups tests to clang-format

* Format issue - fix do/while issue
This commit is contained in:
Grzegorz Wawiorko
2020-05-19 11:16:06 +02:00
committed by GitHub
parent a6c3d921ae
commit ec32bd9b5e
7 changed files with 1260 additions and 821 deletions

View File

@@ -23,13 +23,13 @@
MTdata gMTdata; MTdata gMTdata;
test_definition test_list[] = { test_definition test_list[] = {
ADD_TEST( sub_group_info ), ADD_TEST(sub_group_info),
ADD_TEST( work_item_functions ), ADD_TEST(work_item_functions),
ADD_TEST( work_group_functions ), ADD_TEST(work_group_functions),
ADD_TEST( barrier_functions ), ADD_TEST(barrier_functions),
}; };
const int test_num = ARRAY_SIZE( test_list ); const int test_num = ARRAY_SIZE(test_list);
static test_status checkSubGroupsExtension(cl_device_id device) static test_status checkSubGroupsExtension(cl_device_id device)
{ {
@@ -38,19 +38,24 @@ static test_status checkSubGroupsExtension(cl_device_id device)
auto version = get_device_cl_version(device); auto version = get_device_cl_version(device);
auto expected_min_version = Version(2, 0); auto expected_min_version = Version(2, 0);
if (version < expected_min_version) { if (version < expected_min_version)
version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str()); {
version_expected_info("Test", expected_min_version.to_string().c_str(),
version.to_string().c_str());
return TEST_SKIP; return TEST_SKIP;
} }
bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
if ((version == expected_min_version) && !hasExtension) { if ((version == expected_min_version) && !hasExtension)
log_info("Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); {
log_info(
"Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
return TEST_SKIP; return TEST_SKIP;
} }
if ((version > expected_min_version) && !hasExtension) { if ((version > expected_min_version) && !hasExtension)
{
log_error("'cl_khr_subgroups' is a required extension, failing.\n"); log_error("'cl_khr_subgroups' is a required extension, failing.\n");
return TEST_FAIL; return TEST_FAIL;
} }
@@ -58,7 +63,8 @@ static test_status checkSubGroupsExtension(cl_device_id device)
return TEST_PASS; return TEST_PASS;
} }
static test_status InitCL(cl_device_id device) { static test_status InitCL(cl_device_id device)
{
auto version = get_device_cl_version(device); auto version = get_device_cl_version(device);
test_status ret = TEST_PASS; test_status ret = TEST_PASS;
@@ -68,13 +74,15 @@ static test_status InitCL(cl_device_id device) {
int error; int error;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS, error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS,
sizeof(max_sub_groups), &max_sub_groups, NULL); sizeof(max_sub_groups), &max_sub_groups, NULL);
if (error != CL_SUCCESS) { if (error != CL_SUCCESS)
{
print_error(error, "Unable to get max number of subgroups"); print_error(error, "Unable to get max number of subgroups");
return TEST_FAIL; return TEST_FAIL;
} }
if (max_sub_groups == 0) { if (max_sub_groups == 0)
{
ret = TEST_SKIP; ret = TEST_SKIP;
} }
} }
@@ -83,12 +91,11 @@ static test_status InitCL(cl_device_id device) {
ret = checkSubGroupsExtension(device); ret = checkSubGroupsExtension(device);
} }
return ret; return ret;
} }
int main(int argc, const char *argv[]) int main(int argc, const char *argv[])
{ {
gMTdata = init_genrand(0); gMTdata = init_genrand(0);
return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, InitCL); return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0,
InitCL);
} }

View File

@@ -30,11 +30,16 @@ extern MTdata gMTdata;
extern "C" { extern "C" {
#endif #endif
extern int test_sub_group_info(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_sub_group_info(cl_device_id device, cl_context context,
extern int test_work_item_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); cl_command_queue queue, int num_elements);
extern int test_work_group_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_work_item_functions(cl_device_id device, cl_context context,
extern int test_barrier_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); cl_command_queue queue, int num_elements);
extern int test_pipe_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_work_group_functions(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements);
extern int test_barrier_functions(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements);
extern int test_pipe_functions(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@@ -25,89 +25,212 @@
// Some template helpers // Some template helpers
template <typename Ty> struct TypeName; template <typename Ty> struct TypeName;
template <> struct TypeName<cl_half> { static const char * val() { return "half"; } }; template <> struct TypeName<cl_half>
template <> struct TypeName<cl_uint> { static const char * val() { return "uint"; } }; {
template <> struct TypeName<cl_int> { static const char * val() { return "int"; } }; static const char *val() { return "half"; }
template <> struct TypeName<cl_ulong> { static const char * val() { return "ulong"; } }; };
template <> struct TypeName<cl_long> { static const char * val() { return "long"; } }; template <> struct TypeName<cl_uint>
template <> struct TypeName<float> { static const char * val() { return "float"; } }; {
template <> struct TypeName<double> { static const char * val() { return "double"; } }; static const char *val() { return "uint"; }
};
template <> struct TypeName<cl_int>
{
static const char *val() { return "int"; }
};
template <> struct TypeName<cl_ulong>
{
static const char *val() { return "ulong"; }
};
template <> struct TypeName<cl_long>
{
static const char *val() { return "long"; }
};
template <> struct TypeName<float>
{
static const char *val() { return "float"; }
};
template <> struct TypeName<double>
{
static const char *val() { return "double"; }
};
template <typename Ty> struct TypeDef; template <typename Ty> struct TypeDef;
template <> struct TypeDef<cl_half> { static const char * val() { return "typedef half Type;\n"; } }; template <> struct TypeDef<cl_half>
template <> struct TypeDef<cl_uint> { static const char * val() { return "typedef uint Type;\n"; } }; {
template <> struct TypeDef<cl_int> { static const char * val() { return "typedef int Type;\n"; } }; static const char *val() { return "typedef half Type;\n"; }
template <> struct TypeDef<cl_ulong> { static const char * val() { return "typedef ulong Type;\n"; } }; };
template <> struct TypeDef<cl_long> { static const char * val() { return "typedef long Type;\n"; } }; template <> struct TypeDef<cl_uint>
template <> struct TypeDef<float> { static const char * val() { return "typedef float Type;\n"; } }; {
template <> struct TypeDef<double> { static const char * val() { return "typedef double Type;\n"; } }; static const char *val() { return "typedef uint Type;\n"; }
};
template <> struct TypeDef<cl_int>
{
static const char *val() { return "typedef int Type;\n"; }
};
template <> struct TypeDef<cl_ulong>
{
static const char *val() { return "typedef ulong Type;\n"; }
};
template <> struct TypeDef<cl_long>
{
static const char *val() { return "typedef long Type;\n"; }
};
template <> struct TypeDef<float>
{
static const char *val() { return "typedef float Type;\n"; }
};
template <> struct TypeDef<double>
{
static const char *val() { return "typedef double Type;\n"; }
};
template <typename Ty, int Which> struct TypeIdentity; template <typename Ty, int Which> struct TypeIdentity;
// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)0.0; } }; // template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return
// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return -(cl_half)65536.0; } }; // (cl_half)0.0; } }; template <> struct TypeIdentity<cl_half,0> { static
// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)65536.0; } }; // cl_half val() { return -(cl_half)65536.0; } }; template <> struct
// TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)65536.0; }
// };
template <> struct TypeIdentity<cl_uint,0> { static cl_uint val() { return (cl_uint)0; } }; template <> struct TypeIdentity<cl_uint, 0>
template <> struct TypeIdentity<cl_uint,1> { static cl_uint val() { return (cl_uint)0; } }; {
template <> struct TypeIdentity<cl_uint,2> { static cl_uint val() { return (cl_uint)0xffffffff; } }; static cl_uint val() { return (cl_uint)0; }
};
template <> struct TypeIdentity<cl_uint, 1>
{
static cl_uint val() { return (cl_uint)0; }
};
template <> struct TypeIdentity<cl_uint, 2>
{
static cl_uint val() { return (cl_uint)0xffffffff; }
};
template <> struct TypeIdentity<cl_int,0> { static cl_int val() { return (cl_int)0 ; } }; template <> struct TypeIdentity<cl_int, 0>
template <> struct TypeIdentity<cl_int,1> { static cl_int val() { return (cl_int)0x80000000; } }; {
template <> struct TypeIdentity<cl_int,2> { static cl_int val() { return (cl_int)0x7fffffff; } }; static cl_int val() { return (cl_int)0; }
};
template <> struct TypeIdentity<cl_int, 1>
{
static cl_int val() { return (cl_int)0x80000000; }
};
template <> struct TypeIdentity<cl_int, 2>
{
static cl_int val() { return (cl_int)0x7fffffff; }
};
template <> struct TypeIdentity<cl_ulong,0> { static cl_ulong val() { return (cl_ulong)0 ; } }; template <> struct TypeIdentity<cl_ulong, 0>
template <> struct TypeIdentity<cl_ulong,1> { static cl_ulong val() { return (cl_ulong)0 ; } }; {
template <> struct TypeIdentity<cl_ulong,2> { static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL ; } }; static cl_ulong val() { return (cl_ulong)0; }
};
template <> struct TypeIdentity<cl_ulong, 1>
{
static cl_ulong val() { return (cl_ulong)0; }
};
template <> struct TypeIdentity<cl_ulong, 2>
{
static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL; }
};
template <> struct TypeIdentity<cl_long,0> { static cl_long val() { return (cl_long)0; } }; template <> struct TypeIdentity<cl_long, 0>
template <> struct TypeIdentity<cl_long,1> { static cl_long val() { return (cl_long)0x8000000000000000ULL; } }; {
template <> struct TypeIdentity<cl_long,2> { static cl_long val() { return (cl_long)0x7fffffffffffffffULL; } }; static cl_long val() { return (cl_long)0; }
};
template <> struct TypeIdentity<cl_long, 1>
{
static cl_long val() { return (cl_long)0x8000000000000000ULL; }
};
template <> struct TypeIdentity<cl_long, 2>
{
static cl_long val() { return (cl_long)0x7fffffffffffffffULL; }
};
template <> struct TypeIdentity<float,0> { static float val() { return 0.F; } }; template <> struct TypeIdentity<float, 0>
template <> struct TypeIdentity<float,1> { static float val() { return -std::numeric_limits<float>::infinity(); } }; {
template <> struct TypeIdentity<float,2> { static float val() { return std::numeric_limits<float>::infinity(); } }; static float val() { return 0.F; }
};
template <> struct TypeIdentity<float, 1>
{
static float val() { return -std::numeric_limits<float>::infinity(); }
};
template <> struct TypeIdentity<float, 2>
{
static float val() { return std::numeric_limits<float>::infinity(); }
};
template <> struct TypeIdentity<double,0> { static double val() { return 0.L; } }; template <> struct TypeIdentity<double, 0>
{
static double val() { return 0.L; }
};
template <> struct TypeIdentity<double,1> { static double val() { return -std::numeric_limits<double>::infinity(); } }; template <> struct TypeIdentity<double, 1>
template <> struct TypeIdentity<double,2> { static double val() { return std::numeric_limits<double>::infinity(); } }; {
static double val() { return -std::numeric_limits<double>::infinity(); }
};
template <> struct TypeIdentity<double, 2>
{
static double val() { return std::numeric_limits<double>::infinity(); }
};
template <typename Ty> struct TypeCheck; template <typename Ty> struct TypeCheck;
template <> struct TypeCheck<cl_uint> { static bool val(cl_device_id) { return true; } }; template <> struct TypeCheck<cl_uint>
template <> struct TypeCheck<cl_int> { static bool val(cl_device_id) { return true; } }; {
static bool val(cl_device_id) { return true; }
};
template <> struct TypeCheck<cl_int>
{
static bool val(cl_device_id) { return true; }
};
static bool static bool int64_ok(cl_device_id device)
int64_ok(cl_device_id device)
{ {
char profile[128]; char profile[128];
int error; int error;
error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL); error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile),
if (error) { (void *)&profile, NULL);
if (error)
{
log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n"); log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n");
return false; return false;
} }
if (strcmp(profile, "EMBEDDED_PROFILE") == 0) if (strcmp(profile, "EMBEDDED_PROFILE") == 0)
return is_extension_available(device, "cles_khr_int64"); return is_extension_available(device, "cles_khr_int64");
return true; return true;
} }
template <> struct TypeCheck<cl_ulong> { static bool val(cl_device_id device) { return int64_ok(device); } }; template <> struct TypeCheck<cl_ulong>
template <> struct TypeCheck<cl_long> { static bool val(cl_device_id device) { return int64_ok(device); } }; {
template <> struct TypeCheck<cl_float> { static bool val(cl_device_id) { return true; } }; static bool val(cl_device_id device) { return int64_ok(device); }
template <> struct TypeCheck<cl_half> {
static bool val(cl_device_id device) { return is_extension_available(device, "cl_khr_fp16"); }
}; };
template <> struct TypeCheck<double> { template <> struct TypeCheck<cl_long>
static bool val(cl_device_id device) { {
static bool val(cl_device_id device) { return int64_ok(device); }
};
template <> struct TypeCheck<cl_float>
{
static bool val(cl_device_id) { return true; }
};
template <> struct TypeCheck<cl_half>
{
static bool val(cl_device_id device)
{
return is_extension_available(device, "cl_khr_fp16");
}
};
template <> struct TypeCheck<double>
{
static bool val(cl_device_id device)
{
int error; int error;
cl_device_fp_config c; cl_device_fp_config c;
error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c), (void *)&c, NULL); error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c),
if (error) { (void *)&c, NULL);
log_info("clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n"); if (error)
{
log_info(
"clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n");
return false; return false;
} }
return c != 0; return c != 0;
@@ -116,10 +239,10 @@ template <> struct TypeCheck<double> {
// Run a test kernel to compute the result of a built-in on an input // Run a test kernel to compute the result of a built-in on an input
static int static int run_kernel(cl_context context, cl_command_queue queue,
run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t global, size_t local, cl_kernel kernel, size_t global, size_t local,
void *idata, size_t isize, void *mdata, size_t msize, void *idata, size_t isize, void *mdata, size_t msize,
void *odata, size_t osize, size_t tsize=0) void *odata, size_t osize, size_t tsize = 0)
{ {
clMemWrapper in; clMemWrapper in;
clMemWrapper xy; clMemWrapper xy;
@@ -136,8 +259,10 @@ run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error); out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error);
test_error(error, "clCreateBuffer failed"); test_error(error, "clCreateBuffer failed");
if (tsize) { if (tsize)
tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, tsize, NULL, &error); {
tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS,
tsize, NULL, &error);
test_error(error, "clCreateBuffer failed"); test_error(error, "clCreateBuffer failed");
} }
@@ -150,21 +275,26 @@ run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t
error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out); error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out);
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
if (tsize) { if (tsize)
{
error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp); error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp);
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
} }
error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL, NULL); error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL,
NULL);
test_error(error, "clEnqueueWriteBuffer failed"); test_error(error, "clEnqueueWriteBuffer failed");
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed"); test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL, NULL); error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL,
NULL);
test_error(error, "clEnqueueReadBuffer failed"); test_error(error, "clEnqueueReadBuffer failed");
error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL, NULL); error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL,
NULL);
test_error(error, "clEnqueueReadBuffer failed"); test_error(error, "clEnqueueReadBuffer failed");
error = clFinish(queue); error = clFinish(queue);
@@ -174,10 +304,13 @@ run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t
} }
// Driver for testing a single built in function // Driver for testing a single built in function
template <typename Ty, typename Fns, size_t GSIZE, size_t LSIZE, size_t TSIZE=0> template <typename Ty, typename Fns, size_t GSIZE, size_t LSIZE,
struct test { size_t TSIZE = 0>
static int struct test
run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, const char *kname, const char *src, int dynscl=0) {
static int run(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements, const char *kname,
const char *src, int dynscl = 0)
{ {
size_t tmp; size_t tmp;
int error; int error;
@@ -189,24 +322,25 @@ struct test {
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper kernel; clKernelWrapper kernel;
cl_platform_id platform; cl_platform_id platform;
cl_int sgmap[2*GSIZE]; cl_int sgmap[2 * GSIZE];
Ty mapin[LSIZE]; Ty mapin[LSIZE];
Ty mapout[LSIZE]; Ty mapout[LSIZE];
// Make sure a test of type Ty is supported by the device // Make sure a test of type Ty is supported by the device
if (!TypeCheck<Ty>::val(device)) if (!TypeCheck<Ty>::val(device)) return 0;
return 0;
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL); error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
(void *)&platform, NULL);
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM"); test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
kstrings[0] = "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n" kstrings[0] = "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
"#define XY(M,I) M[I].x = get_sub_group_local_id(); M[I].y = get_sub_group_id();\n"; "#define XY(M,I) M[I].x = get_sub_group_local_id(); "
"M[I].y = get_sub_group_id();\n";
kstrings[1] = TypeDef<Ty>::val(); kstrings[1] = TypeDef<Ty>::val();
kstrings[2] = src; kstrings[2] = src;
error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 3, kstrings, kname, "-cl-std=CL2.0"); error = create_single_kernel_helper_with_build_options(
if (error != 0) context, &program, &kernel, 3, kstrings, kname, "-cl-std=CL2.0");
return error; if (error != 0) return error;
// Determine some local dimensions to use for the test. // Determine some local dimensions to use for the test.
global = GSIZE; global = GSIZE;
@@ -215,31 +349,42 @@ struct test {
// Limit it a bit so we have muliple work groups // Limit it a bit so we have muliple work groups
// Ideally this will still be large enough to give us multiple subgroups // Ideally this will still be large enough to give us multiple subgroups
if (local > LSIZE) if (local > LSIZE) local = LSIZE;
local = LSIZE;
// Get the sub group info // Get the sub group info
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr; clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr;
clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)
"clGetKernelSubGroupInfoKHR"); clGetExtensionFunctionAddressForPlatform(
if (clGetKernelSubGroupInfoKHR_ptr == NULL) { platform, "clGetKernelSubGroupInfoKHR");
log_error("ERROR: clGetKernelSubGroupInfoKHR function not available"); if (clGetKernelSubGroupInfoKHR_ptr == NULL)
{
log_error(
"ERROR: clGetKernelSubGroupInfoKHR function not available");
return -1; return -1;
} }
error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, error = clGetKernelSubGroupInfoKHR_ptr(
sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR"); sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
test_error(error,
"clGetKernelSubGroupInfoKHR failed for "
"CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR");
subgroup_size = (int)tmp; subgroup_size = (int)tmp;
error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, error = clGetKernelSubGroupInfoKHR_ptr(
sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR"); sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
test_error(error,
"clGetKernelSubGroupInfoKHR failed for "
"CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR");
num_subgroups = (int)tmp; num_subgroups = (int)tmp;
// Make sure the number of sub groups is what we expect // Make sure the number of sub groups is what we expect
if (num_subgroups != (local + subgroup_size - 1)/ subgroup_size) { if (num_subgroups != (local + subgroup_size - 1) / subgroup_size)
log_error("ERROR: unexpected number of subgroups (%d) returned by clGetKernelSubGroupInfoKHR\n", num_subgroups); {
log_error("ERROR: unexpected number of subgroups (%d) returned by "
"clGetKernelSubGroupInfoKHR\n",
num_subgroups);
return -1; return -1;
} }
@@ -248,38 +393,38 @@ struct test {
size_t input_array_size = GSIZE; size_t input_array_size = GSIZE;
size_t output_array_size = GSIZE; size_t output_array_size = GSIZE;
if (dynscl != 0) { if (dynscl != 0)
input_array_size = (int)global / (int)local * num_subgroups * dynscl; {
output_array_size = (int)global / (int)local * dynscl; input_array_size =
(int)global / (int)local * num_subgroups * dynscl;
output_array_size = (int)global / (int)local * dynscl;
} }
idata.resize(input_array_size); idata.resize(input_array_size);
odata.resize(output_array_size); odata.resize(output_array_size);
// Run the kernel once on zeroes to get the map // Run the kernel once on zeroes to get the map
memset(&idata[0], 0, input_array_size * sizeof(Ty)); memset(&idata[0], 0, input_array_size * sizeof(Ty));
error = run_kernel(context, queue, kernel, global, local, error = run_kernel(context, queue, kernel, global, local, &idata[0],
&idata[0], input_array_size * sizeof(Ty), input_array_size * sizeof(Ty), sgmap,
sgmap, global*sizeof(cl_int)*2, global * sizeof(cl_int) * 2, &odata[0],
&odata[0], output_array_size * sizeof(Ty), output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
TSIZE*sizeof(Ty)); if (error) return error;
if (error)
return error;
// Generate the desired input for the kernel // Generate the desired input for the kernel
Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local, (int)global / (int)local); Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local,
(int)global / (int)local);
error = run_kernel(context, queue, kernel, global, local, error = run_kernel(context, queue, kernel, global, local, &idata[0],
&idata[0], input_array_size * sizeof(Ty), input_array_size * sizeof(Ty), sgmap,
sgmap, global*sizeof(cl_int)*2, global * sizeof(cl_int) * 2, &odata[0],
&odata[0], output_array_size * sizeof(Ty), output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
TSIZE*sizeof(Ty)); if (error) return error;
if (error)
return error;
// Check the result // Check the result
return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap, subgroup_size, (int)local, (int)global / (int)local); return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap,
subgroup_size, (int)local, (int)global / (int)local);
} }
}; };

View File

@@ -18,102 +18,113 @@
#include "harness/conversions.h" #include "harness/conversions.h"
#include "harness/typeWrappers.h" #include "harness/typeWrappers.h"
static const char * lbar_source = static const char *lbar_source =
"__kernel void test_lbar(const __global Type *in, __global int2 *xy, __global Type *out)\n" "__kernel void test_lbar(const __global Type *in, __global int2 *xy, "
"{\n" "__global Type *out)\n"
" __local int tmp[200];\n" "{\n"
" int gid = get_global_id(0);\n" " __local int tmp[200];\n"
" int nid = get_sub_group_size();\n" " int gid = get_global_id(0);\n"
" int lid = get_sub_group_local_id();\n" " int nid = get_sub_group_size();\n"
" xy[gid].x = lid;\n" " int lid = get_sub_group_local_id();\n"
" xy[gid].y = get_sub_group_id();\n" " xy[gid].x = lid;\n"
" if (get_sub_group_id() == 0) {\n" " xy[gid].y = get_sub_group_id();\n"
" tmp[lid] = in[gid];\n" " if (get_sub_group_id() == 0) {\n"
" sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n" " tmp[lid] = in[gid];\n"
" out[gid] = tmp[nid-1-lid];\n" " sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
" } else {\n" " out[gid] = tmp[nid-1-lid];\n"
" out[gid] = -in[gid];\n" " } else {\n"
" }\n" " out[gid] = -in[gid];\n"
"}\n"; " }\n"
"}\n";
static const char * gbar_source = static const char *gbar_source =
"__kernel void test_gbar(const __global Type *in, __global int2 *xy, __global Type *out, __global Type *tmp)\n" "__kernel void test_gbar(const __global Type *in, __global int2 *xy, "
"{\n" "__global Type *out, __global Type *tmp)\n"
" int gid = get_global_id(0);\n" "{\n"
" int nid = get_sub_group_size();\n" " int gid = get_global_id(0);\n"
" int lid = get_sub_group_local_id();\n" " int nid = get_sub_group_size();\n"
" int tof = get_group_id(0)*get_max_sub_group_size();\n" " int lid = get_sub_group_local_id();\n"
" xy[gid].x = lid;\n" " int tof = get_group_id(0)*get_max_sub_group_size();\n"
" xy[gid].y = get_sub_group_id();\n" " xy[gid].x = lid;\n"
" if (get_sub_group_id() == 0) {\n" " xy[gid].y = get_sub_group_id();\n"
" tmp[tof+lid] = in[gid];\n" " if (get_sub_group_id() == 0) {\n"
" sub_group_barrier(CLK_GLOBAL_MEM_FENCE);\n" " tmp[tof+lid] = in[gid];\n"
" out[gid] = tmp[tof+nid-1-lid];\n" " sub_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
" } else {\n" " out[gid] = tmp[tof+nid-1-lid];\n"
" out[gid] = -in[gid];\n" " } else {\n"
" }\n" " out[gid] = -in[gid];\n"
"}\n"; " }\n"
"}\n";
// barrier test functions // barrier test functions
template <int Which> template <int Which> struct BAR
struct BAR { {
static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng) static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng)
{ {
int i, ii, j, k, n; int i, ii, j, k, n;
int nj = (nw + ns - 1)/ns; int nj = (nw + ns - 1) / ns;
int e; int e;
ii = 0; ii = 0;
for (k=0; k<ng; ++k) { for (k = 0; k < ng; ++k)
for (j=0; j<nj; ++j) { {
ii = j*ns; for (j = 0; j < nj; ++j)
{
ii = j * ns;
n = ii + ns > nw ? nw - ii : ns; n = ii + ns > nw ? nw - ii : ns;
for (i=0;i<n;++i) for (i = 0; i < n; ++i) t[ii + i] = genrand_int32(gMTdata);
t[ii+i] = genrand_int32(gMTdata);
} }
// Now map into work group using map from device // Now map into work group using map from device
for (j=0;j<nw;++j) { for (j = 0; j < nw; ++j)
i = m[2*j+1]*ns + m[2*j]; {
i = m[2 * j + 1] * ns + m[2 * j];
x[j] = t[i]; x[j] = t[i];
} }
x += nw; x += nw;
m += 2*nw; m += 2 * nw;
} }
} }
static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m, int ns, int nw, int ng) static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
int ns, int nw, int ng)
{ {
int ii, i, j, k, n; int ii, i, j, k, n;
int nj = (nw + ns - 1)/ns; int nj = (nw + ns - 1) / ns;
cl_int tr, rr; cl_int tr, rr;
if (Which == 0) if (Which == 0)
log_info(" sub_group_barrier(CLK_LOCAL_MEM_FENCE)...\n"); log_info(" sub_group_barrier(CLK_LOCAL_MEM_FENCE)...\n");
else else
log_info(" sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...\n"); log_info(" sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...\n");
for (k=0; k<ng; ++k) { for (k = 0; k < ng; ++k)
{
// Map to array indexed to array indexed by local ID and sub group // Map to array indexed to array indexed by local ID and sub group
for (j=0; j<nw; ++j) { for (j = 0; j < nw; ++j)
i = m[2*j+1]*ns + m[2*j]; {
i = m[2 * j + 1] * ns + m[2 * j];
mx[i] = x[j]; mx[i] = x[j];
my[i] = y[j]; my[i] = y[j];
} }
for (j=0; j<nj; ++j) { for (j = 0; j < nj; ++j)
ii = j*ns; {
ii = j * ns;
n = ii + ns > nw ? nw - ii : ns; n = ii + ns > nw ? nw - ii : ns;
for (i=0; i<n; ++i) { for (i = 0; i < n; ++i)
tr = j == 0 ? mx[ii + n - 1 - i] : -mx[ii + i]; {
tr = j == 0 ? mx[ii + n - 1 - i] : -mx[ii + i];
rr = my[ii + i]; rr = my[ii + i];
if (tr != rr) { if (tr != rr)
log_error("ERROR: sub_group_barrier mismatch for local id %d in sub group %d in group %d\n", {
i, j, k); log_error("ERROR: sub_group_barrier mismatch for local "
"id %d in sub group %d in group %d\n",
i, j, k);
return -1; return -1;
} }
} }
@@ -121,7 +132,7 @@ struct BAR {
x += nw; x += nw;
y += nw; y += nw;
m += 2*nw; m += 2 * nw;
} }
return 0; return 0;
@@ -130,8 +141,8 @@ struct BAR {
// Entry point from main // Entry point from main
int int test_barrier_functions(cl_device_id device, cl_context context,
test_barrier_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) cl_command_queue queue, int num_elements)
{ {
int error; int error;
@@ -139,9 +150,10 @@ test_barrier_functions(cl_device_id device, cl_context context, cl_command_queue
#define G 2000 #define G 2000
#define L 200 #define L 200
error = test<cl_int, BAR<0>, G, L>::run(device, context, queue, num_elements, "test_lbar", lbar_source); error = test<cl_int, BAR<0>, G, L>::run(
error = test<cl_int, BAR<1>, G, L, G>::run(device, context, queue, num_elements, "test_gbar", gbar_source); device, context, queue, num_elements, "test_lbar", lbar_source);
error = test<cl_int, BAR<1>, G, L, G>::run(
device, context, queue, num_elements, "test_gbar", gbar_source);
return error; return error;
} }

View File

@@ -15,117 +15,160 @@
// //
#include "procs.h" #include "procs.h"
typedef struct { typedef struct
{
cl_uint maxSubGroupSize; cl_uint maxSubGroupSize;
cl_uint numSubGroups; cl_uint numSubGroups;
} result_data; } result_data;
static const char * query_kernel_source = static const char *query_kernel_source =
"#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n" "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
"\n" "\n"
"typedef struct {\n" "typedef struct {\n"
" uint maxSubGroupSize;\n" " uint maxSubGroupSize;\n"
" uint numSubGroups;\n" " uint numSubGroups;\n"
"} result_data;\n" "} result_data;\n"
"\n" "\n"
"__kernel void query_kernel( __global result_data *outData )\n" "__kernel void query_kernel( __global result_data *outData )\n"
"{\n" "{\n"
" int gid = get_global_id( 0 );\n" " int gid = get_global_id( 0 );\n"
" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n" " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
" outData[gid].numSubGroups = get_num_sub_groups();\n" " outData[gid].numSubGroups = get_num_sub_groups();\n"
"}"; "}";
int int test_sub_group_info(cl_device_id device, cl_context context,
test_sub_group_info(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) cl_command_queue queue, int num_elements)
{ {
static const size_t gsize0 = 80; static const size_t gsize0 = 80;
int i, error; int i, error;
size_t realSize; size_t realSize;
size_t kernel_max_subgroup_size, kernel_subgroup_count; size_t kernel_max_subgroup_size, kernel_subgroup_count;
size_t global[] = {gsize0,14,10}; size_t global[] = { gsize0, 14, 10 };
size_t local[] = {0,0,0}; size_t local[] = { 0, 0, 0 };
result_data result[gsize0]; result_data result[gsize0];
cl_uint max_dimensions; cl_uint max_dimensions;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dimensions), &max_dimensions, NULL); error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS"); sizeof(max_dimensions), &max_dimensions, NULL);
test_error(error,
"clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
cl_platform_id platform; cl_platform_id platform;
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper kernel; clKernelWrapper kernel;
clMemWrapper out; clMemWrapper out;
error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &query_kernel_source, "query_kernel", "-cl-std=CL2.0"); error = create_single_kernel_helper_with_build_options(
if (error != 0) context, &program, &kernel, 1, &query_kernel_source, "query_kernel",
return error; "-cl-std=CL2.0");
if (error != 0) return error;
// Determine some local dimensions to use for the test. // Determine some local dimensions to use for the test.
if (max_dimensions == 1) { if (max_dimensions == 1)
error = get_max_common_work_group_size(context, kernel, global[0], &local[0]); {
error = get_max_common_work_group_size(context, kernel, global[0],
&local[0]);
test_error(error, "get_max_common_work_group_size failed"); test_error(error, "get_max_common_work_group_size failed");
} else if (max_dimensions == 2) { }
error = get_max_common_2D_work_group_size(context, kernel, global, local); else if (max_dimensions == 2)
{
error =
get_max_common_2D_work_group_size(context, kernel, global, local);
test_error(error, "get_max_common_2D_work_group_size failed"); test_error(error, "get_max_common_2D_work_group_size failed");
} else { }
error = get_max_common_3D_work_group_size(context, kernel, global, local); else
{
error =
get_max_common_3D_work_group_size(context, kernel, global, local);
test_error(error, "get_max_common_3D_work_group_size failed"); test_error(error, "get_max_common_3D_work_group_size failed");
} }
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL); error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
(void *)&platform, NULL);
test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM"); test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM");
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr; clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr;
clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clGetKernelSubGroupInfoKHR"); clGetKernelSubGroupInfoKHR_ptr =
if (clGetKernelSubGroupInfoKHR_ptr == NULL) { (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(
platform, "clGetKernelSubGroupInfoKHR");
if (clGetKernelSubGroupInfoKHR_ptr == NULL)
{
log_error("ERROR: clGetKernelSubGroupInfoKHR function not available"); log_error("ERROR: clGetKernelSubGroupInfoKHR function not available");
return -1; return -1;
} }
error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, error = clGetKernelSubGroupInfoKHR_ptr(
sizeof(local), (void *)&local, sizeof(kernel_max_subgroup_size), (void *)&kernel_max_subgroup_size, &realSize); kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR"); sizeof(local), (void *)&local, sizeof(kernel_max_subgroup_size),
log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR for the kernel is %d.\n", (int)kernel_max_subgroup_size); (void *)&kernel_max_subgroup_size, &realSize);
test_error(error,
"clGetKernelSubGroupInfoKHR failed for "
"CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR");
log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR for the kernel "
"is %d.\n",
(int)kernel_max_subgroup_size);
if (realSize != sizeof(kernel_max_subgroup_size)) { if (realSize != sizeof(kernel_max_subgroup_size))
log_error( "ERROR: Returned size of max sub group size not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_max_subgroup_size), (int)realSize ); {
log_error("ERROR: Returned size of max sub group size not valid! "
"(Expected %d, got %d)\n",
(int)sizeof(kernel_max_subgroup_size), (int)realSize);
return -1; return -1;
} }
error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, error = clGetKernelSubGroupInfoKHR_ptr(
sizeof(local), (void *)&local, sizeof(kernel_subgroup_count), (void *)&kernel_subgroup_count, &realSize); kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR"); sizeof(local), (void *)&local, sizeof(kernel_subgroup_count),
log_info("The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR for the kernel is %d.\n", (int)kernel_subgroup_count); (void *)&kernel_subgroup_count, &realSize);
test_error(error,
"clGetKernelSubGroupInfoKHR failed for "
"CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR");
log_info(
"The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR for the kernel is %d.\n",
(int)kernel_subgroup_count);
if (realSize != sizeof(kernel_subgroup_count)) { if (realSize != sizeof(kernel_subgroup_count))
log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize ); {
log_error("ERROR: Returned size of sub group count not valid! "
"(Expected %d, got %d)\n",
(int)sizeof(kernel_subgroup_count), (int)realSize);
return -1; return -1;
} }
// Verify that the kernel gets the same max_subgroup_size and subgroup_count // Verify that the kernel gets the same max_subgroup_size and subgroup_count
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(result), NULL, &error); out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(result), NULL,
&error);
test_error(error, "clCreateBuffer failed"); test_error(error, "clCreateBuffer failed");
error = clSetKernelArg(kernel, 0, sizeof(out), &out); error = clSetKernelArg(kernel, 0, sizeof(out), &out);
test_error(error, "clSetKernelArg failed"); test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(queue, kernel, max_dimensions, NULL, global, local, 0, NULL, NULL); error = clEnqueueNDRangeKernel(queue, kernel, max_dimensions, NULL, global,
local, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed"); test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), &result, 0, NULL, NULL); error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result),
&result, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed"); test_error(error, "clEnqueueReadBuffer failed");
error = clFinish(queue); error = clFinish(queue);
test_error(error, "clFinish failed"); test_error(error, "clFinish failed");
for (i=0; i<(int)gsize0; ++i) { for (i = 0; i < (int)gsize0; ++i)
if (result[i].maxSubGroupSize != (cl_uint)kernel_max_subgroup_size) { {
log_error("ERROR: get_max_subgroup_size() doesn't match result from clGetKernelSubGroupInfoKHR, %u vs %u\n", if (result[i].maxSubGroupSize != (cl_uint)kernel_max_subgroup_size)
result[i].maxSubGroupSize, (cl_uint)kernel_max_subgroup_size); {
log_error("ERROR: get_max_subgroup_size() doesn't match result "
"from clGetKernelSubGroupInfoKHR, %u vs %u\n",
result[i].maxSubGroupSize,
(cl_uint)kernel_max_subgroup_size);
return -1; return -1;
} }
if (result[i].numSubGroups != (cl_uint)kernel_subgroup_count) { if (result[i].numSubGroups != (cl_uint)kernel_subgroup_count)
log_error("ERROR: get_num_sub_groups() doesn't match result from clGetKernelSubGroupInfoKHR, %u vs %u\n", {
log_error("ERROR: get_num_sub_groups() doesn't match result from "
"clGetKernelSubGroupInfoKHR, %u vs %u\n",
result[i].numSubGroups, (cl_uint)kernel_subgroup_count); result[i].numSubGroups, (cl_uint)kernel_subgroup_count);
return -1; return -1;
} }
@@ -133,4 +176,3 @@ test_sub_group_info(cl_device_id device, cl_context context, cl_command_queue qu
return 0; return 0;
} }

File diff suppressed because it is too large Load Diff

View File

@@ -17,140 +17,186 @@
#include "harness/conversions.h" #include "harness/conversions.h"
#include "harness/typeWrappers.h" #include "harness/typeWrappers.h"
struct get_test_data { struct get_test_data
{
cl_uint subGroupSize; cl_uint subGroupSize;
cl_uint maxSubGroupSize; cl_uint maxSubGroupSize;
cl_uint numSubGroups; cl_uint numSubGroups;
cl_uint enqNumSubGroups; cl_uint enqNumSubGroups;
cl_uint subGroupId; cl_uint subGroupId;
cl_uint subGroupLocalId; cl_uint subGroupLocalId;
bool operator==(get_test_data x) { bool operator==(get_test_data x)
return subGroupSize == x.subGroupSize && {
maxSubGroupSize == x.maxSubGroupSize && return subGroupSize == x.subGroupSize
numSubGroups == x.numSubGroups && && maxSubGroupSize == x.maxSubGroupSize
subGroupId == x.subGroupId && && numSubGroups == x.numSubGroups && subGroupId == x.subGroupId
subGroupLocalId == x.subGroupLocalId; && subGroupLocalId == x.subGroupLocalId;
} }
}; };
static const char * get_test_source = static const char *get_test_source =
"#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n" "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
"\n" "\n"
"typedef struct {\n" "typedef struct {\n"
" uint subGroupSize;\n" " uint subGroupSize;\n"
" uint maxSubGroupSize;\n" " uint maxSubGroupSize;\n"
" uint numSubGroups;\n" " uint numSubGroups;\n"
" uint enqNumSubGroups;\n" " uint enqNumSubGroups;\n"
" uint subGroupId;\n" " uint subGroupId;\n"
" uint subGroupLocalId;\n" " uint subGroupLocalId;\n"
"} get_test_data;\n" "} get_test_data;\n"
"\n" "\n"
"__kernel void get_test( __global get_test_data *outData )\n" "__kernel void get_test( __global get_test_data *outData )\n"
"{\n" "{\n"
" int gid = get_global_id( 0 );\n" " int gid = get_global_id( 0 );\n"
" outData[gid].subGroupSize = get_sub_group_size();\n" " outData[gid].subGroupSize = get_sub_group_size();\n"
" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n" " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
" outData[gid].numSubGroups = get_num_sub_groups();\n" " outData[gid].numSubGroups = get_num_sub_groups();\n"
" outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n" " outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
" outData[gid].subGroupId = get_sub_group_id();\n" " outData[gid].subGroupId = get_sub_group_id();\n"
" outData[gid].subGroupLocalId = get_sub_group_local_id();\n" " outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
"}"; "}";
static int static int check_group(const get_test_data *result, int nw, cl_uint ensg,
check_group(const get_test_data *result, int nw, cl_uint ensg, int maxwgs) int maxwgs)
{ {
int first = -1; int first = -1;
int last = -1; int last = -1;
int i, j; int i, j;
cl_uint hit[32]; cl_uint hit[32];
for (i=0; i<nw; ++i) { for (i = 0; i < nw; ++i)
{
if (result[i].subGroupId == 0 && result[i].subGroupLocalId == 0) if (result[i].subGroupId == 0 && result[i].subGroupLocalId == 0)
first = i; first = i;
if (result[i].subGroupId == result[0].numSubGroups-1 && result[i].subGroupLocalId == 0) if (result[i].subGroupId == result[0].numSubGroups - 1
&& result[i].subGroupLocalId == 0)
last = i; last = i;
if (first != -1 && last != -1) if (first != -1 && last != -1) break;
break;
} }
if (first == -1 || last == -1) { if (first == -1 || last == -1)
{
log_error("ERROR: expected sub group id's are missing\n"); log_error("ERROR: expected sub group id's are missing\n");
return -1; return -1;
} }
// Check them // Check them
if (result[first].subGroupSize == 0) { if (result[first].subGroupSize == 0)
{
log_error("ERROR: get_sub_group_size() returned 0\n"); log_error("ERROR: get_sub_group_size() returned 0\n");
return -1; return -1;
} }
if (result[first].maxSubGroupSize == 0 || result[first].maxSubGroupSize > maxwgs) { if (result[first].maxSubGroupSize == 0
log_error("ERROR: get_max_subgroup_size() returned incorrect result: %u\n", result[first].maxSubGroupSize); || result[first].maxSubGroupSize > maxwgs)
{
log_error(
"ERROR: get_max_subgroup_size() returned incorrect result: %u\n",
result[first].maxSubGroupSize);
return -1; return -1;
} }
if (result[first].subGroupSize > result[first].maxSubGroupSize) { if (result[first].subGroupSize > result[first].maxSubGroupSize)
{
log_error("ERROR: get_sub_group_size() > get_max_sub_group_size()\n"); log_error("ERROR: get_sub_group_size() > get_max_sub_group_size()\n");
return -1; return -1;
} }
if (result[last].subGroupSize > result[first].subGroupSize) { if (result[last].subGroupSize > result[first].subGroupSize)
{
log_error("ERROR: last sub group larger than first sub group\n"); log_error("ERROR: last sub group larger than first sub group\n");
return -1; return -1;
} }
if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg) { if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg)
log_error("ERROR: get_num_sub_groups() returned incorrect result: %u \n", result[first].numSubGroups); {
log_error(
"ERROR: get_num_sub_groups() returned incorrect result: %u \n",
result[first].numSubGroups);
return -1; return -1;
} }
memset(hit, 0, sizeof(hit)); memset(hit, 0, sizeof(hit));
for (i=0; i<nw; ++i) { for (i = 0; i < nw; ++i)
if (result[i].maxSubGroupSize != result[first].maxSubGroupSize || {
result[i].numSubGroups != result[first].numSubGroups) { if (result[i].maxSubGroupSize != result[first].maxSubGroupSize
|| result[i].numSubGroups != result[first].numSubGroups)
{
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n"); log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
return -1; return -1;
} }
if (result[i].subGroupId >= result[first].numSubGroups) { if (result[i].subGroupId >= result[first].numSubGroups)
log_error("ERROR: get_sub_group_id() returned out of range value: %u\n", result[i].subGroupId); {
log_error(
"ERROR: get_sub_group_id() returned out of range value: %u\n",
result[i].subGroupId);
return -1; return -1;
} }
if (result[i].enqNumSubGroups != ensg) { if (result[i].enqNumSubGroups != ensg)
log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect value: %u\n", result[i].enqNumSubGroups); {
log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect "
"value: %u\n",
result[i].enqNumSubGroups);
return -1; return -1;
} }
if (result[first].numSubGroups > 1) { if (result[first].numSubGroups > 1)
if (result[i].subGroupId < result[first].numSubGroups-1) { {
if (result[i].subGroupSize != result[first].subGroupSize) { if (result[i].subGroupId < result[first].numSubGroups - 1)
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n"); {
if (result[i].subGroupSize != result[first].subGroupSize)
{
log_error(
"ERROR: unexpected variation in get_*_sub_group_*()\n");
return -1; return -1;
} }
if (result[i].subGroupLocalId >= result[first].subGroupSize) { if (result[i].subGroupLocalId >= result[first].subGroupSize)
log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId); {
return -1; log_error("ERROR: get_sub_group_local_id() returned out of "
} "bounds value: %u \n",
} else { result[i].subGroupLocalId);
if (result[i].subGroupSize != result[last].subGroupSize) {
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
return -1;
}
if (result[i].subGroupLocalId >= result[last].subGroupSize) {
log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId);
return -1; return -1;
} }
} }
} else { else
if (result[i].subGroupSize != result[first].subGroupSize) { {
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n"); if (result[i].subGroupSize != result[last].subGroupSize)
{
log_error(
"ERROR: unexpected variation in get_*_sub_group_*()\n");
return -1;
}
if (result[i].subGroupLocalId >= result[last].subGroupSize)
{
log_error("ERROR: get_sub_group_local_id() returned out of "
"bounds value: %u \n",
result[i].subGroupLocalId);
return -1;
}
}
}
else
{
if (result[i].subGroupSize != result[first].subGroupSize)
{
log_error(
"ERROR: unexpected variation in get_*_sub_group_*()\n");
return -1; return -1;
} }
if (result[i].subGroupLocalId >= result[first].subGroupSize) { if (result[i].subGroupLocalId >= result[first].subGroupSize)
log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId); {
log_error("ERROR: get_sub_group_local_id() returned out of "
"bounds value: %u \n",
result[i].subGroupLocalId);
return -1; return -1;
} }
} }
j = (result[first].subGroupSize + 31)/32 * result[i].subGroupId + (result[i].subGroupLocalId >> 5); j = (result[first].subGroupSize + 31) / 32 * result[i].subGroupId
if (j < sizeof(hit)/4) { + (result[i].subGroupLocalId >> 5);
if (j < sizeof(hit) / 4)
{
cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU); cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU);
if ((hit[j] & b) != 0) { if ((hit[j] & b) != 0)
log_error("ERROR: get_sub_group_local_id() repeated a result in the same sub group\n"); {
log_error("ERROR: get_sub_group_local_id() repeated a result "
"in the same sub group\n");
return -1; return -1;
} }
hit[j] |= b; hit[j] |= b;
@@ -160,8 +206,8 @@ check_group(const get_test_data *result, int nw, cl_uint ensg, int maxwgs)
return 0; return 0;
} }
int int test_work_item_functions(cl_device_id device, cl_context context,
test_work_item_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) cl_command_queue queue, int num_elements)
{ {
static const size_t lsize = 200; static const size_t lsize = 200;
int error; int error;
@@ -170,28 +216,28 @@ test_work_item_functions(cl_device_id device, cl_context context, cl_command_que
cl_uint ensg; cl_uint ensg;
size_t global; size_t global;
size_t local; size_t local;
get_test_data result[lsize*6]; get_test_data result[lsize * 6];
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper kernel; clKernelWrapper kernel;
clMemWrapper out; clMemWrapper out;
error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &get_test_source, "get_test", "-cl-std=CL2.0"); error = create_single_kernel_helper_with_build_options(
if (error != 0) context, &program, &kernel, 1, &get_test_source, "get_test",
return error; "-cl-std=CL2.0");
if (error != 0) return error;
error = get_max_allowed_work_group_size(context, kernel, &local, NULL); error = get_max_allowed_work_group_size(context, kernel, &local, NULL);
if (error != 0) if (error != 0) return error;
return error;
maxwgs = (int)local; maxwgs = (int)local;
// Limit it a bit so we have muliple work groups // Limit it a bit so we have muliple work groups
// Ideally this will still be large enough to give us multiple subgroups // Ideally this will still be large enough to give us multiple subgroups
if (local > lsize) if (local > lsize) local = lsize;
local = lsize;
// Create our buffer // Create our buffer
out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL, &error); out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL,
&error);
test_error(error, "clCreateBuffer failed"); test_error(error, "clCreateBuffer failed");
// Set argument // Set argument
@@ -206,13 +252,16 @@ test_work_item_functions(cl_device_id device, cl_context context, cl_command_que
// Collect the data // Collect the data
memset((void *)&result, 0xf0, sizeof(result)); memset((void *)&result, 0xf0, sizeof(result));
error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result), (void *)&result, 0, NULL, NULL); error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result),
(void *)&result, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed"); test_error(error, "clEnqueueWriteBuffer failed");
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed"); test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), (void *)&result, 0, NULL, NULL); error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result),
(void *)&result, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed"); test_error(error, "clEnqueueReadBuffer failed");
error = clFinish(queue); error = clFinish(queue);
@@ -223,30 +272,32 @@ test_work_item_functions(cl_device_id device, cl_context context, cl_command_que
// Check the first group // Check the first group
error = check_group(result, nw, ensg, maxwgs); error = check_group(result, nw, ensg, maxwgs);
if (error) if (error) return error;
return error;
q = (int)global / nw; q = (int)global / nw;
r = (int)global % nw; r = (int)global % nw;
// Check the remaining work groups including the last if it is the same size // Check the remaining work groups including the last if it is the same size
for (k=1; k<q; ++k) { for (k = 1; k < q; ++k)
for (j=0; j<nw; ++j) { {
i = k*nw + j; for (j = 0; j < nw; ++j)
if (!(result[i] == result[i-nw])) { {
log_error("ERROR: sub group mapping is not identical for all work groups\n"); i = k * nw + j;
if (!(result[i] == result[i - nw]))
{
log_error("ERROR: sub group mapping is not identical for all "
"work groups\n");
return -1; return -1;
} }
} }
} }
// Check the last group if it wasn't the same size // Check the last group if it wasn't the same size
if (r != 0) { if (r != 0)
error = check_group(result + q*nw, r, ensg, maxwgs); {
if (error) error = check_group(result + q * nw, r, ensg, maxwgs);
return error; if (error) return error;
} }
return 0; return 0;
} }