mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Fix test_subgroups - test as core feature. (#682)
* Fix test_subgroups - test as core feature. * Fix kernels pragma in case of OpenCL 2.1 core subgroups * Change global variable names to gVariable convention * Allow subgroups tests execute 2 paths - as core feature - as extension feature * Fix code formatting. * Set correct OpenCL version * Fix code format * test_subgroups review fixes: * do not use global variables * make IFP as separate tests * use stringstream data type * use special class to load function pointer for specific API * Remove not not used variable * Test subgroups - use common enums * Test subgroups - set TEST_SKIPPED_ITSELF
This commit is contained in:
committed by
GitHub
parent
944b0a8178
commit
58cf793fdb
@@ -6,6 +6,7 @@ set(${MODULE_NAME}_SOURCES
|
||||
test_queries.cpp
|
||||
test_workitem.cpp
|
||||
test_workgroup.cpp
|
||||
test_ifp.cpp
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
|
||||
@@ -23,49 +23,22 @@
|
||||
MTdata gMTdata;
|
||||
|
||||
test_definition test_list[] = {
|
||||
ADD_TEST(sub_group_info),
|
||||
ADD_TEST(work_item_functions),
|
||||
ADD_TEST(work_group_functions),
|
||||
ADD_TEST(barrier_functions),
|
||||
ADD_TEST_VERSION(sub_group_info_ext, Version(2, 0)),
|
||||
ADD_TEST_VERSION(sub_group_info_core, Version(2, 1)),
|
||||
ADD_TEST_VERSION(work_item_functions_ext, Version(2, 0)),
|
||||
ADD_TEST_VERSION(work_item_functions_core, Version(2, 1)),
|
||||
ADD_TEST_VERSION(work_group_functions_ext, Version(2, 0)),
|
||||
ADD_TEST_VERSION(work_group_functions_core, Version(2, 1)),
|
||||
ADD_TEST_VERSION(barrier_functions_ext, Version(2, 0)),
|
||||
ADD_TEST_VERSION(barrier_functions_core, Version(2, 1)),
|
||||
ADD_TEST_VERSION(ifp_ext, Version(2, 0)),
|
||||
ADD_TEST_VERSION(ifp_core, Version(2, 1))
|
||||
};
|
||||
|
||||
const int test_num = ARRAY_SIZE(test_list);
|
||||
|
||||
static test_status checkSubGroupsExtension(cl_device_id device)
|
||||
{
|
||||
// The extension is optional in OpenCL 2.0 (minimum required version) and
|
||||
// required in later versions.
|
||||
auto version = get_device_cl_version(device);
|
||||
auto expected_min_version = Version(2, 0);
|
||||
|
||||
if (version < expected_min_version)
|
||||
{
|
||||
version_expected_info("Test", expected_min_version.to_string().c_str(),
|
||||
version.to_string().c_str());
|
||||
return TEST_SKIP;
|
||||
}
|
||||
|
||||
bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
|
||||
|
||||
if ((version == expected_min_version) && !hasExtension)
|
||||
{
|
||||
log_info(
|
||||
"Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
|
||||
return TEST_SKIP;
|
||||
}
|
||||
|
||||
if ((version > expected_min_version) && !hasExtension)
|
||||
{
|
||||
log_error("'cl_khr_subgroups' is a required extension, failing.\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
static test_status InitCL(cl_device_id device)
|
||||
{
|
||||
|
||||
auto version = get_device_cl_version(device);
|
||||
test_status ret = TEST_PASS;
|
||||
if (version >= Version(3, 0))
|
||||
@@ -86,10 +59,6 @@ static test_status InitCL(cl_device_id device)
|
||||
ret = TEST_SKIP;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = checkSubGroupsExtension(device);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -26,15 +26,35 @@
|
||||
|
||||
extern MTdata gMTdata;
|
||||
|
||||
extern int test_sub_group_info(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
extern int test_work_item_functions(cl_device_id device, cl_context context,
|
||||
extern int test_sub_group_info_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
extern int test_sub_group_info_core(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
extern int test_work_group_functions(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
extern int test_barrier_functions(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
extern int test_work_item_functions_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_work_item_functions_core(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_work_group_functions_ext(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_work_group_functions_core(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_barrier_functions_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
extern int test_barrier_functions_core(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue,
|
||||
int num_elements);
|
||||
extern int test_pipe_functions(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
extern int test_ifp_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
extern int test_ifp_core(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
|
||||
#endif /*_procs_h*/
|
||||
|
||||
@@ -23,6 +23,39 @@
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
class subgroupsAPI {
|
||||
public:
|
||||
subgroupsAPI(cl_platform_id platform, bool useCoreSubgroups)
|
||||
{
|
||||
static_assert(CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE
|
||||
== CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
|
||||
"Enums have to be the same");
|
||||
static_assert(CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
|
||||
== CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
|
||||
"Enums have to be the same");
|
||||
if (useCoreSubgroups)
|
||||
{
|
||||
_clGetKernelSubGroupInfo_ptr = &clGetKernelSubGroupInfo;
|
||||
clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfo";
|
||||
}
|
||||
else
|
||||
{
|
||||
_clGetKernelSubGroupInfo_ptr = (clGetKernelSubGroupInfoKHR_fn)
|
||||
clGetExtensionFunctionAddressForPlatform(
|
||||
platform, "clGetKernelSubGroupInfoKHR");
|
||||
clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfoKHR";
|
||||
}
|
||||
}
|
||||
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr()
|
||||
{
|
||||
return _clGetKernelSubGroupInfo_ptr;
|
||||
}
|
||||
const char *clGetKernelSubGroupInfo_name;
|
||||
|
||||
private:
|
||||
clGetKernelSubGroupInfoKHR_fn _clGetKernelSubGroupInfo_ptr;
|
||||
};
|
||||
|
||||
// Some template helpers
|
||||
template <typename Ty> struct TypeName;
|
||||
template <> struct TypeName<cl_half>
|
||||
@@ -310,7 +343,7 @@ struct test
|
||||
{
|
||||
static int run(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements, const char *kname,
|
||||
const char *src, int dynscl = 0)
|
||||
const char *src, int dynscl, bool useCoreSubgroups)
|
||||
{
|
||||
size_t tmp;
|
||||
int error;
|
||||
@@ -318,7 +351,6 @@ struct test
|
||||
size_t realSize;
|
||||
size_t global;
|
||||
size_t local;
|
||||
const char *kstrings[3];
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
cl_platform_id platform;
|
||||
@@ -332,14 +364,21 @@ struct test
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
|
||||
(void *)&platform, NULL);
|
||||
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
|
||||
std::stringstream kernel_sstr;
|
||||
if (useCoreSubgroups)
|
||||
{
|
||||
kernel_sstr
|
||||
<< "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
|
||||
}
|
||||
kernel_sstr << "#define XY(M,I) M[I].x = get_sub_group_local_id(); "
|
||||
"M[I].y = get_sub_group_id();\n";
|
||||
kernel_sstr << TypeDef<Ty>::val();
|
||||
kernel_sstr << src;
|
||||
const std::string &kernel_str = kernel_sstr.str();
|
||||
const char *kernel_src = kernel_str.c_str();
|
||||
|
||||
kstrings[0] = "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
|
||||
"#define XY(M,I) M[I].x = get_sub_group_local_id(); "
|
||||
"M[I].y = get_sub_group_id();\n";
|
||||
kstrings[1] = TypeDef<Ty>::val();
|
||||
kstrings[2] = src;
|
||||
error = create_single_kernel_helper_with_build_options(
|
||||
context, &program, &kernel, 3, kstrings, kname, "-cl-std=CL2.0");
|
||||
context, &program, &kernel, 1, &kernel_src, kname, "-cl-std=CL2.0");
|
||||
if (error != 0) return error;
|
||||
|
||||
// Determine some local dimensions to use for the test.
|
||||
@@ -352,40 +391,46 @@ struct test
|
||||
if (local > LSIZE) local = LSIZE;
|
||||
|
||||
// Get the sub group info
|
||||
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr;
|
||||
clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)
|
||||
clGetExtensionFunctionAddressForPlatform(
|
||||
platform, "clGetKernelSubGroupInfoKHR");
|
||||
if (clGetKernelSubGroupInfoKHR_ptr == NULL)
|
||||
subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups);
|
||||
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr =
|
||||
subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
|
||||
if (clGetKernelSubGroupInfo_ptr == NULL)
|
||||
{
|
||||
log_error(
|
||||
"ERROR: clGetKernelSubGroupInfoKHR function not available");
|
||||
return -1;
|
||||
log_error("ERROR: %s function not available",
|
||||
subgroupsApiSet.clGetKernelSubGroupInfo_name);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
error = clGetKernelSubGroupInfo_ptr(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
log_error("ERROR: %s function error for "
|
||||
"CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE",
|
||||
subgroupsApiSet.clGetKernelSubGroupInfo_name);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
error = clGetKernelSubGroupInfoKHR_ptr(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
|
||||
sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
|
||||
test_error(error,
|
||||
"clGetKernelSubGroupInfoKHR failed for "
|
||||
"CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR");
|
||||
subgroup_size = (int)tmp;
|
||||
|
||||
error = clGetKernelSubGroupInfoKHR_ptr(
|
||||
kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
|
||||
error = clGetKernelSubGroupInfo_ptr(
|
||||
kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
|
||||
sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
|
||||
test_error(error,
|
||||
"clGetKernelSubGroupInfoKHR failed for "
|
||||
"CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR");
|
||||
num_subgroups = (int)tmp;
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
log_error("ERROR: %s function error for "
|
||||
"CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE",
|
||||
subgroupsApiSet.clGetKernelSubGroupInfo_name);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
num_subgroups = (int)tmp;
|
||||
// Make sure the number of sub groups is what we expect
|
||||
if (num_subgroups != (local + subgroup_size - 1) / subgroup_size)
|
||||
{
|
||||
log_error("ERROR: unexpected number of subgroups (%d) returned by "
|
||||
"clGetKernelSubGroupInfoKHR\n",
|
||||
log_error("ERROR: unexpected number of subgroups (%d) returned\n",
|
||||
num_subgroups);
|
||||
return -1;
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
std::vector<Ty> idata;
|
||||
|
||||
@@ -139,10 +139,10 @@ template <int Which> struct BAR
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Entry point from main
|
||||
int test_barrier_functions(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
cl_command_queue queue, int num_elements,
|
||||
bool useCoreSubgroups)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -150,10 +150,33 @@ int test_barrier_functions(cl_device_id device, cl_context context,
|
||||
#define G 2000
|
||||
#define L 200
|
||||
|
||||
error = test<cl_int, BAR<0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_lbar", lbar_source);
|
||||
error = test<cl_int, BAR<0>, G, L>::run(device, context, queue,
|
||||
num_elements, "test_lbar",
|
||||
lbar_source, 0, useCoreSubgroups);
|
||||
error = test<cl_int, BAR<1>, G, L, G>::run(
|
||||
device, context, queue, num_elements, "test_gbar", gbar_source);
|
||||
device, context, queue, num_elements, "test_gbar", gbar_source, 0,
|
||||
useCoreSubgroups);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
int test_barrier_functions_core(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_barrier_functions(device, context, queue, num_elements, true);
|
||||
}
|
||||
|
||||
int test_barrier_functions_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
|
||||
|
||||
if (!hasExtension)
|
||||
{
|
||||
log_info(
|
||||
"Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
return test_barrier_functions(device, context, queue, num_elements, false);
|
||||
}
|
||||
364
test_conformance/subgroups/test_ifp.cpp
Normal file
364
test_conformance/subgroups/test_ifp.cpp
Normal file
@@ -0,0 +1,364 @@
|
||||
//
|
||||
// Copyright (c) 2020 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "subhelpers.h"
|
||||
#include "harness/conversions.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
|
||||
|
||||
// These need to stay in sync with the kernel source below
|
||||
#define NUM_LOC 49
|
||||
#define INST_LOC_MASK 0x7f
|
||||
#define INST_OP_SHIFT 0
|
||||
#define INST_OP_MASK 0xf
|
||||
#define INST_LOC_SHIFT 4
|
||||
#define INST_VAL_SHIFT 12
|
||||
#define INST_VAL_MASK 0x7ffff
|
||||
#define INST_END 0x0
|
||||
#define INST_STORE 0x1
|
||||
#define INST_WAIT 0x2
|
||||
#define INST_COUNT 0x3
|
||||
|
||||
static const char *ifp_source =
|
||||
"#define NUM_LOC 49\n"
|
||||
"#define INST_LOC_MASK 0x7f\n"
|
||||
"#define INST_OP_SHIFT 0\n"
|
||||
"#define INST_OP_MASK 0xf\n"
|
||||
"#define INST_LOC_SHIFT 4\n"
|
||||
"#define INST_VAL_SHIFT 12\n"
|
||||
"#define INST_VAL_MASK 0x7ffff\n"
|
||||
"#define INST_END 0x0\n"
|
||||
"#define INST_STORE 0x1\n"
|
||||
"#define INST_WAIT 0x2\n"
|
||||
"#define INST_COUNT 0x3\n"
|
||||
"\n"
|
||||
"__kernel void\n"
|
||||
"test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n"
|
||||
"{\n"
|
||||
" __local atomic_int loc[NUM_LOC];\n"
|
||||
"\n"
|
||||
" // Don't run if there is only one sub group\n"
|
||||
" if (get_num_sub_groups() == 1)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" // First initialize loc[]\n"
|
||||
" int lid = (int)get_local_id(0);\n"
|
||||
"\n"
|
||||
" if (lid < NUM_LOC)\n"
|
||||
" atomic_init(loc+lid, 0);\n"
|
||||
"\n"
|
||||
" work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" // Compute pointer to this sub group's \"instructions\"\n"
|
||||
" const __global int *pc = in +\n"
|
||||
" ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n"
|
||||
" (int)get_sub_group_id()) *\n"
|
||||
" (NUM_LOC+1);\n"
|
||||
"\n"
|
||||
" // Set up to \"run\"\n"
|
||||
" bool ok = (int)get_sub_group_local_id() == 0;\n"
|
||||
" bool run = true;\n"
|
||||
"\n"
|
||||
" while (run) {\n"
|
||||
" int inst = *pc++;\n"
|
||||
" int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n"
|
||||
" int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n"
|
||||
" int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n"
|
||||
"\n"
|
||||
" switch (iop) {\n"
|
||||
" case INST_STORE:\n"
|
||||
" if (ok)\n"
|
||||
" atomic_store(loc+iloc, ival);\n"
|
||||
" break;\n"
|
||||
" case INST_WAIT:\n"
|
||||
" if (ok) {\n"
|
||||
" while (atomic_load(loc+iloc) != ival)\n"
|
||||
" ;\n"
|
||||
" }\n"
|
||||
" break;\n"
|
||||
" case INST_COUNT:\n"
|
||||
" if (ok) {\n"
|
||||
" int i;\n"
|
||||
" for (i=0;i<ival;++i)\n"
|
||||
" atomic_fetch_add(loc+iloc, 1);\n"
|
||||
" }\n"
|
||||
" break;\n"
|
||||
" case INST_END:\n"
|
||||
" run = false;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" // Save this group's result\n"
|
||||
" __global int *op = out + (int)get_group_id(0)*NUM_LOC;\n"
|
||||
" if (lid < NUM_LOC)\n"
|
||||
" op[lid] = atomic_load(loc+lid);\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
// Independent forward progress stuff
|
||||
// Note:
|
||||
// Output needs num_groups * NUM_LOC elements
|
||||
// local_size must be > NUM_LOC
|
||||
// Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements
|
||||
|
||||
static inline int inst(int op, int loc, int val)
|
||||
{
|
||||
return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT)
|
||||
| (op << INST_OP_SHIFT);
|
||||
}
|
||||
|
||||
void gen_insts(cl_int *x, cl_int *p, int n)
|
||||
{
|
||||
int i, j0, j1;
|
||||
int val;
|
||||
int ii[NUM_LOC];
|
||||
|
||||
// Create a random permutation of 0...NUM_LOC-1
|
||||
ii[0] = 0;
|
||||
for (i = 1; i < NUM_LOC; ++i)
|
||||
{
|
||||
j0 = random_in_range(0, i, gMTdata);
|
||||
if (j0 != i) ii[i] = ii[j0];
|
||||
ii[j0] = i;
|
||||
}
|
||||
|
||||
// Initialize "instruction pointers"
|
||||
memset(p, 0, n * 4);
|
||||
|
||||
for (i = 0; i < NUM_LOC; ++i)
|
||||
{
|
||||
// Randomly choose 2 different sub groups
|
||||
// One does a random amount of work, and the other waits for it
|
||||
j0 = random_in_range(0, n - 1, gMTdata);
|
||||
|
||||
do
|
||||
{
|
||||
j1 = random_in_range(0, n - 1, gMTdata);
|
||||
} while (j1 == j0);
|
||||
|
||||
// Randomly choose a wait value and assign "instructions"
|
||||
val = random_in_range(100, 200 + 10 * NUM_LOC, gMTdata);
|
||||
x[j0 * (NUM_LOC + 1) + p[j0]] = inst(INST_COUNT, ii[i], val);
|
||||
x[j1 * (NUM_LOC + 1) + p[j1]] = inst(INST_WAIT, ii[i], val);
|
||||
++p[j0];
|
||||
++p[j1];
|
||||
}
|
||||
|
||||
// Last "inst" for each sub group is END
|
||||
for (i = 0; i < n; ++i) x[i * (NUM_LOC + 1) + p[i]] = inst(INST_END, 0, 0);
|
||||
}
|
||||
|
||||
// Execute one group's "instructions"
|
||||
void run_insts(cl_int *x, cl_int *p, int n)
|
||||
{
|
||||
int i, nend;
|
||||
bool scont;
|
||||
cl_int loc[NUM_LOC];
|
||||
|
||||
// Initialize result and "instruction pointers"
|
||||
memset(loc, 0, sizeof(loc));
|
||||
memset(p, 0, 4 * n);
|
||||
|
||||
// Repetitively loop over subgroups with each executing "instructions" until
|
||||
// blocked The loop terminates when all subgroups have hit the "END
|
||||
// instruction"
|
||||
do
|
||||
{
|
||||
nend = 0;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
do
|
||||
{
|
||||
cl_int inst = x[i * (NUM_LOC + 1) + p[i]];
|
||||
cl_int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;
|
||||
cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;
|
||||
cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;
|
||||
scont = false;
|
||||
|
||||
switch (iop)
|
||||
{
|
||||
case INST_STORE:
|
||||
loc[iloc] = ival;
|
||||
++p[i];
|
||||
scont = true;
|
||||
break;
|
||||
case INST_WAIT:
|
||||
if (loc[iloc] == ival)
|
||||
{
|
||||
++p[i];
|
||||
scont = true;
|
||||
}
|
||||
break;
|
||||
case INST_COUNT:
|
||||
loc[iloc] += ival;
|
||||
++p[i];
|
||||
scont = true;
|
||||
break;
|
||||
case INST_END: ++nend; break;
|
||||
}
|
||||
} while (scont);
|
||||
}
|
||||
} while (nend < n);
|
||||
|
||||
// Return result, reusing "p"
|
||||
memcpy(p, loc, sizeof(loc));
|
||||
}
|
||||
|
||||
|
||||
struct IFP
|
||||
{
|
||||
static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng)
|
||||
{
|
||||
int k;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
|
||||
// We need at least 2 sub groups per group for this test
|
||||
if (nj == 1) return;
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
gen_insts(x, t, nj);
|
||||
x += nj * (NUM_LOC + 1);
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns,
|
||||
int nw, int ng)
|
||||
{
|
||||
int i, k;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
|
||||
// We need at least 2 sub groups per group for this tes
|
||||
if (nj == 1) return 0;
|
||||
|
||||
log_info(" independent forward progress...\n");
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
run_insts(x, t, nj);
|
||||
for (i = 0; i < NUM_LOC; ++i)
|
||||
{
|
||||
if (t[i] != y[i])
|
||||
{
|
||||
log_error(
|
||||
"ERROR: mismatch at element %d in work group %d\n", i,
|
||||
k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
x += nj * (NUM_LOC + 1);
|
||||
y += NUM_LOC;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
int test_ifp(cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
int num_elements, bool useCoreSubgroups)
|
||||
{
|
||||
int error;
|
||||
|
||||
// Adjust these individually below if desired/needed
|
||||
#define G 2000
|
||||
#define L 200
|
||||
error = test<cl_int, IFP, G, L>::run(device, context, queue, num_elements,
|
||||
"test_ifp", ifp_source, NUM_LOC + 1,
|
||||
useCoreSubgroups);
|
||||
return error;
|
||||
}
|
||||
|
||||
static test_status checkIFPSupport(cl_device_id device, bool &ifpSupport)
|
||||
{
|
||||
cl_uint ifp_supported;
|
||||
cl_uint error;
|
||||
error = clGetDeviceInfo(device,
|
||||
CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
|
||||
sizeof(ifp_supported), &ifp_supported, NULL);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
print_error(
|
||||
error,
|
||||
"Unable to get CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS "
|
||||
"capability");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
// skip testing ifp
|
||||
if (ifp_supported != 1)
|
||||
{
|
||||
log_info("INDEPENDENT FORWARD PROGRESS not supported...\n");
|
||||
ifpSupport = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("INDEPENDENT FORWARD PROGRESS supported...\n");
|
||||
ifpSupport = true;
|
||||
}
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
int test_ifp_core(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
bool ifpSupport = true;
|
||||
test_status error;
|
||||
error = checkIFPSupport(device, ifpSupport);
|
||||
if (error != TEST_PASS)
|
||||
{
|
||||
return error;
|
||||
}
|
||||
if (ifpSupport == false)
|
||||
{
|
||||
log_info("Independed forward progress skipped.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
return test_ifp(device, context, queue, num_elements, true);
|
||||
}
|
||||
|
||||
int test_ifp_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
|
||||
bool ifpSupport = true;
|
||||
|
||||
if (!hasExtension)
|
||||
{
|
||||
log_info(
|
||||
"Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
// ifp only in subgroup functions tests:
|
||||
test_status error;
|
||||
error = checkIFPSupport(device, ifpSupport);
|
||||
if (error != TEST_PASS)
|
||||
{
|
||||
return error;
|
||||
}
|
||||
if (ifpSupport == false)
|
||||
{
|
||||
log_info(
|
||||
"Error reason: the extension cl_khr_subgroups requires that "
|
||||
"Independed forward progress has to be supported by device.\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
return test_ifp(device, context, queue, num_elements, false);
|
||||
}
|
||||
@@ -14,6 +14,7 @@
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "subhelpers.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -21,23 +22,10 @@ typedef struct
|
||||
cl_uint numSubGroups;
|
||||
} result_data;
|
||||
|
||||
static const char *query_kernel_source =
|
||||
"#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
|
||||
"\n"
|
||||
"typedef struct {\n"
|
||||
" uint maxSubGroupSize;\n"
|
||||
" uint numSubGroups;\n"
|
||||
"} result_data;\n"
|
||||
"\n"
|
||||
"__kernel void query_kernel( __global result_data *outData )\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id( 0 );\n"
|
||||
" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
|
||||
" outData[gid].numSubGroups = get_num_sub_groups();\n"
|
||||
"}";
|
||||
|
||||
int test_sub_group_info(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
cl_command_queue queue, int num_elements,
|
||||
bool useCoreSubgroups)
|
||||
{
|
||||
static const size_t gsize0 = 80;
|
||||
int i, error;
|
||||
@@ -58,9 +46,29 @@ int test_sub_group_info(cl_device_id device, cl_context context,
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper out;
|
||||
std::stringstream kernel_sstr;
|
||||
if (useCoreSubgroups)
|
||||
{
|
||||
kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
|
||||
}
|
||||
kernel_sstr
|
||||
<< "\n"
|
||||
"typedef struct {\n"
|
||||
" uint maxSubGroupSize;\n"
|
||||
" uint numSubGroups;\n"
|
||||
"} result_data;\n"
|
||||
"\n"
|
||||
"__kernel void query_kernel( __global result_data *outData )\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id( 0 );\n"
|
||||
" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
|
||||
" outData[gid].numSubGroups = get_num_sub_groups();\n"
|
||||
"}";
|
||||
|
||||
const std::string &kernel_str = kernel_sstr.str();
|
||||
const char *kernel_src = kernel_str.c_str();
|
||||
error = create_single_kernel_helper_with_build_options(
|
||||
context, &program, &kernel, 1, &query_kernel_source, "query_kernel",
|
||||
context, &program, &kernel, 1, &kernel_src, "query_kernel",
|
||||
"-cl-std=CL2.0");
|
||||
if (error != 0) return error;
|
||||
|
||||
@@ -88,44 +96,50 @@ int test_sub_group_info(cl_device_id device, cl_context context,
|
||||
(void *)&platform, NULL);
|
||||
test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM");
|
||||
|
||||
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr;
|
||||
clGetKernelSubGroupInfoKHR_ptr =
|
||||
(clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(
|
||||
platform, "clGetKernelSubGroupInfoKHR");
|
||||
if (clGetKernelSubGroupInfoKHR_ptr == NULL)
|
||||
subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups);
|
||||
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr =
|
||||
subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
|
||||
if (clGetKernelSubGroupInfo_ptr == NULL)
|
||||
{
|
||||
log_error("ERROR: clGetKernelSubGroupInfoKHR function not available");
|
||||
return -1;
|
||||
log_error("ERROR: %s function not available",
|
||||
subgroupsApiSet.clGetKernelSubGroupInfo_name);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
error = clGetKernelSubGroupInfoKHR_ptr(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
|
||||
sizeof(local), (void *)&local, sizeof(kernel_max_subgroup_size),
|
||||
error = clGetKernelSubGroupInfo_ptr(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, sizeof(local),
|
||||
(void *)&local, sizeof(kernel_max_subgroup_size),
|
||||
(void *)&kernel_max_subgroup_size, &realSize);
|
||||
test_error(error,
|
||||
"clGetKernelSubGroupInfoKHR failed for "
|
||||
"CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR");
|
||||
log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR for the kernel "
|
||||
"is %d.\n",
|
||||
(int)kernel_max_subgroup_size);
|
||||
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
log_error("ERROR: %s function error for "
|
||||
"CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE",
|
||||
subgroupsApiSet.clGetKernelSubGroupInfo_name);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
log_info(
|
||||
"The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE for the kernel is %d.\n",
|
||||
(int)kernel_max_subgroup_size);
|
||||
if (realSize != sizeof(kernel_max_subgroup_size))
|
||||
{
|
||||
log_error("ERROR: Returned size of max sub group size not valid! "
|
||||
"(Expected %d, got %d)\n",
|
||||
(int)sizeof(kernel_max_subgroup_size), (int)realSize);
|
||||
return -1;
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
error = clGetKernelSubGroupInfoKHR_ptr(
|
||||
kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
|
||||
sizeof(local), (void *)&local, sizeof(kernel_subgroup_count),
|
||||
error = clGetKernelSubGroupInfo_ptr(
|
||||
kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, sizeof(local),
|
||||
(void *)&local, sizeof(kernel_subgroup_count),
|
||||
(void *)&kernel_subgroup_count, &realSize);
|
||||
test_error(error,
|
||||
"clGetKernelSubGroupInfoKHR failed for "
|
||||
"CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR");
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
log_error("ERROR: %s function error "
|
||||
"for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE",
|
||||
subgroupsApiSet.clGetKernelSubGroupInfo_name);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
log_info(
|
||||
"The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR for the kernel is %d.\n",
|
||||
"The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE for the kernel is %d.\n",
|
||||
(int)kernel_subgroup_count);
|
||||
|
||||
if (realSize != sizeof(kernel_subgroup_count))
|
||||
@@ -133,7 +147,7 @@ int test_sub_group_info(cl_device_id device, cl_context context,
|
||||
log_error("ERROR: Returned size of sub group count not valid! "
|
||||
"(Expected %d, got %d)\n",
|
||||
(int)sizeof(kernel_subgroup_count), (int)realSize);
|
||||
return -1;
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
// Verify that the kernel gets the same max_subgroup_size and subgroup_count
|
||||
@@ -176,3 +190,24 @@ int test_sub_group_info(cl_device_id device, cl_context context,
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_sub_group_info_core(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_sub_group_info(device, context, queue, num_elements, true);
|
||||
}
|
||||
|
||||
int test_sub_group_info_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
|
||||
|
||||
if (!hasExtension)
|
||||
{
|
||||
log_info(
|
||||
"Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
return test_sub_group_info(device, context, queue, num_elements, false);
|
||||
}
|
||||
@@ -126,98 +126,6 @@ static const char *scexmin_source =
|
||||
" out[gid] = sub_group_scan_exclusive_min(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
// These need to stay in sync with the kernel source below
|
||||
#define NUM_LOC 49
|
||||
#define INST_LOC_MASK 0x7f
|
||||
#define INST_OP_SHIFT 0
|
||||
#define INST_OP_MASK 0xf
|
||||
#define INST_LOC_SHIFT 4
|
||||
#define INST_VAL_SHIFT 12
|
||||
#define INST_VAL_MASK 0x7ffff
|
||||
#define INST_END 0x0
|
||||
#define INST_STORE 0x1
|
||||
#define INST_WAIT 0x2
|
||||
#define INST_COUNT 0x3
|
||||
|
||||
static const char *ifp_source =
|
||||
"#define NUM_LOC 49\n"
|
||||
"#define INST_LOC_MASK 0x7f\n"
|
||||
"#define INST_OP_SHIFT 0\n"
|
||||
"#define INST_OP_MASK 0xf\n"
|
||||
"#define INST_LOC_SHIFT 4\n"
|
||||
"#define INST_VAL_SHIFT 12\n"
|
||||
"#define INST_VAL_MASK 0x7ffff\n"
|
||||
"#define INST_END 0x0\n"
|
||||
"#define INST_STORE 0x1\n"
|
||||
"#define INST_WAIT 0x2\n"
|
||||
"#define INST_COUNT 0x3\n"
|
||||
"\n"
|
||||
"__kernel void\n"
|
||||
"test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n"
|
||||
"{\n"
|
||||
" __local atomic_int loc[NUM_LOC];\n"
|
||||
"\n"
|
||||
" // Don't run if there is only one sub group\n"
|
||||
" if (get_num_sub_groups() == 1)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" // First initialize loc[]\n"
|
||||
" int lid = (int)get_local_id(0);\n"
|
||||
"\n"
|
||||
" if (lid < NUM_LOC)\n"
|
||||
" atomic_init(loc+lid, 0);\n"
|
||||
"\n"
|
||||
" work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" // Compute pointer to this sub group's \"instructions\"\n"
|
||||
" const __global int *pc = in +\n"
|
||||
" ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n"
|
||||
" (int)get_sub_group_id()) *\n"
|
||||
" (NUM_LOC+1);\n"
|
||||
"\n"
|
||||
" // Set up to \"run\"\n"
|
||||
" bool ok = (int)get_sub_group_local_id() == 0;\n"
|
||||
" bool run = true;\n"
|
||||
"\n"
|
||||
" while (run) {\n"
|
||||
" int inst = *pc++;\n"
|
||||
" int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n"
|
||||
" int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n"
|
||||
" int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n"
|
||||
"\n"
|
||||
" switch (iop) {\n"
|
||||
" case INST_STORE:\n"
|
||||
" if (ok)\n"
|
||||
" atomic_store(loc+iloc, ival);\n"
|
||||
" break;\n"
|
||||
" case INST_WAIT:\n"
|
||||
" if (ok) {\n"
|
||||
" while (atomic_load(loc+iloc) != ival)\n"
|
||||
" ;\n"
|
||||
" }\n"
|
||||
" break;\n"
|
||||
" case INST_COUNT:\n"
|
||||
" if (ok) {\n"
|
||||
" int i;\n"
|
||||
" for (i=0;i<ival;++i)\n"
|
||||
" atomic_fetch_add(loc+iloc, 1);\n"
|
||||
" }\n"
|
||||
" break;\n"
|
||||
" case INST_END:\n"
|
||||
" run = false;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" // Save this group's result\n"
|
||||
" __global int *op = out + (int)get_group_id(0)*NUM_LOC;\n"
|
||||
" if (lid < NUM_LOC)\n"
|
||||
" op[lid] = atomic_load(loc+lid);\n"
|
||||
"}\n";
|
||||
|
||||
// Any/All test functions
|
||||
template <int Which> struct AA
|
||||
@@ -713,169 +621,11 @@ template <typename Ty> struct BC
|
||||
}
|
||||
};
|
||||
|
||||
// Independent forward progress stuff
|
||||
// Note:
|
||||
// Output needs num_groups * NUM_LOC elements
|
||||
// local_size must be > NUM_LOC
|
||||
// Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements
|
||||
|
||||
static inline int inst(int op, int loc, int val)
|
||||
{
|
||||
return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT)
|
||||
| (op << INST_OP_SHIFT);
|
||||
}
|
||||
|
||||
void gen_insts(cl_int *x, cl_int *p, int n)
|
||||
{
|
||||
int i, j0, j1;
|
||||
int val;
|
||||
int ii[NUM_LOC];
|
||||
|
||||
// Create a random permutation of 0...NUM_LOC-1
|
||||
ii[0] = 0;
|
||||
for (i = 1; i < NUM_LOC; ++i)
|
||||
{
|
||||
j0 = random_in_range(0, i, gMTdata);
|
||||
if (j0 != i) ii[i] = ii[j0];
|
||||
ii[j0] = i;
|
||||
}
|
||||
|
||||
// Initialize "instruction pointers"
|
||||
memset(p, 0, n * 4);
|
||||
|
||||
for (i = 0; i < NUM_LOC; ++i)
|
||||
{
|
||||
// Randomly choose 2 different sub groups
|
||||
// One does a random amount of work, and the other waits for it
|
||||
j0 = random_in_range(0, n - 1, gMTdata);
|
||||
|
||||
do
|
||||
{
|
||||
j1 = random_in_range(0, n - 1, gMTdata);
|
||||
} while (j1 == j0);
|
||||
|
||||
// Randomly choose a wait value and assign "instructions"
|
||||
val = random_in_range(100, 200 + 10 * NUM_LOC, gMTdata);
|
||||
x[j0 * (NUM_LOC + 1) + p[j0]] = inst(INST_COUNT, ii[i], val);
|
||||
x[j1 * (NUM_LOC + 1) + p[j1]] = inst(INST_WAIT, ii[i], val);
|
||||
++p[j0];
|
||||
++p[j1];
|
||||
}
|
||||
|
||||
// Last "inst" for each sub group is END
|
||||
for (i = 0; i < n; ++i) x[i * (NUM_LOC + 1) + p[i]] = inst(INST_END, 0, 0);
|
||||
}
|
||||
|
||||
// Execute one group's "instructions"
|
||||
void run_insts(cl_int *x, cl_int *p, int n)
|
||||
{
|
||||
int i, nend;
|
||||
bool scont;
|
||||
cl_int loc[NUM_LOC];
|
||||
|
||||
// Initialize result and "instruction pointers"
|
||||
memset(loc, 0, sizeof(loc));
|
||||
memset(p, 0, 4 * n);
|
||||
|
||||
// Repetitively loop over subgroups with each executing "instructions" until
|
||||
// blocked The loop terminates when all subgroups have hit the "END
|
||||
// instruction"
|
||||
do
|
||||
{
|
||||
nend = 0;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
do
|
||||
{
|
||||
cl_int inst = x[i * (NUM_LOC + 1) + p[i]];
|
||||
cl_int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;
|
||||
cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;
|
||||
cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;
|
||||
scont = false;
|
||||
|
||||
switch (iop)
|
||||
{
|
||||
case INST_STORE:
|
||||
loc[iloc] = ival;
|
||||
++p[i];
|
||||
scont = true;
|
||||
break;
|
||||
case INST_WAIT:
|
||||
if (loc[iloc] == ival)
|
||||
{
|
||||
++p[i];
|
||||
scont = true;
|
||||
}
|
||||
break;
|
||||
case INST_COUNT:
|
||||
loc[iloc] += ival;
|
||||
++p[i];
|
||||
scont = true;
|
||||
break;
|
||||
case INST_END: ++nend; break;
|
||||
}
|
||||
} while (scont);
|
||||
}
|
||||
} while (nend < n);
|
||||
|
||||
// Return result, reusing "p"
|
||||
memcpy(p, loc, sizeof(loc));
|
||||
}
|
||||
|
||||
|
||||
struct IFP
|
||||
{
|
||||
static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng)
|
||||
{
|
||||
int k;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
|
||||
// We need at least 2 sub groups per group for this test
|
||||
if (nj == 1) return;
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
gen_insts(x, t, nj);
|
||||
x += nj * (NUM_LOC + 1);
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns,
|
||||
int nw, int ng)
|
||||
{
|
||||
int i, k;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
|
||||
// We need at least 2 sub groups per group for this tes
|
||||
if (nj == 1) return 0;
|
||||
|
||||
log_info(" independent forward progress...\n");
|
||||
|
||||
for (k = 0; k < ng; ++k)
|
||||
{
|
||||
run_insts(x, t, nj);
|
||||
for (i = 0; i < NUM_LOC; ++i)
|
||||
{
|
||||
if (t[i] != y[i])
|
||||
{
|
||||
log_error(
|
||||
"ERROR: mismatch at element %d in work group %d\n", i,
|
||||
k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
x += nj * (NUM_LOC + 1);
|
||||
y += NUM_LOC;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Entry point from main
|
||||
int test_work_group_functions(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
cl_command_queue queue, int num_elements,
|
||||
bool useCoreSubgroups)
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -884,161 +634,242 @@ int test_work_group_functions(cl_device_id device, cl_context context,
|
||||
#define L 200
|
||||
|
||||
error = test<int, AA<0>, G, L>::run(device, context, queue, num_elements,
|
||||
"test_any", any_source);
|
||||
"test_any", any_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<int, AA<1>, G, L>::run(device, context, queue, num_elements,
|
||||
"test_all", all_source);
|
||||
"test_all", all_source, 0,
|
||||
useCoreSubgroups);
|
||||
|
||||
// error |= test<cl_half, BC<cl_half>, G, L>::run(device, context, queue,
|
||||
// num_elements, "test_bcast", bcast_source);
|
||||
error |= test<cl_uint, BC<cl_uint>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
device, context, queue, num_elements, "test_bcast", bcast_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_int, BC<cl_int>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
device, context, queue, num_elements, "test_bcast", bcast_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_ulong, BC<cl_ulong>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
device, context, queue, num_elements, "test_bcast", bcast_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_long, BC<cl_long>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
device, context, queue, num_elements, "test_bcast", bcast_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<float, BC<float>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
device, context, queue, num_elements, "test_bcast", bcast_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<double, BC<double>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
device, context, queue, num_elements, "test_bcast", bcast_source, 0,
|
||||
useCoreSubgroups);
|
||||
|
||||
// error |= test<cl_half, RED<cl_half,0>, G, L>::run(device, context, queue,
|
||||
// num_elements, "test_redadd", redadd_source);
|
||||
error |= test<cl_uint, RED<cl_uint, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
device, context, queue, num_elements, "test_redadd", redadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_int, RED<cl_int, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
device, context, queue, num_elements, "test_redadd", redadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_ulong, RED<cl_ulong, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
device, context, queue, num_elements, "test_redadd", redadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_long, RED<cl_long, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
device, context, queue, num_elements, "test_redadd", redadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<float, RED<float, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
device, context, queue, num_elements, "test_redadd", redadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<double, RED<double, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
device, context, queue, num_elements, "test_redadd", redadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
|
||||
// error |= test<cl_half, RED<cl_half,1>, G, L>::run(device, context, queue,
|
||||
// num_elements, "test_redmax", redmax_source);
|
||||
error |= test<cl_uint, RED<cl_uint, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
device, context, queue, num_elements, "test_redmax", redmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_int, RED<cl_int, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
device, context, queue, num_elements, "test_redmax", redmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_ulong, RED<cl_ulong, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
device, context, queue, num_elements, "test_redmax", redmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_long, RED<cl_long, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
device, context, queue, num_elements, "test_redmax", redmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<float, RED<float, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
device, context, queue, num_elements, "test_redmax", redmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<double, RED<double, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
device, context, queue, num_elements, "test_redmax", redmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
|
||||
// error |= test<cl_half, RED<cl_half,2>, G, L>::run(device, context, queue,
|
||||
// num_elements, "test_redmin", redmin_source);
|
||||
error |= test<cl_uint, RED<cl_uint, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
device, context, queue, num_elements, "test_redmin", redmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_int, RED<cl_int, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
device, context, queue, num_elements, "test_redmin", redmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_ulong, RED<cl_ulong, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
device, context, queue, num_elements, "test_redmin", redmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_long, RED<cl_long, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
device, context, queue, num_elements, "test_redmin", redmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<float, RED<float, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
device, context, queue, num_elements, "test_redmin", redmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<double, RED<double, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
device, context, queue, num_elements, "test_redmin", redmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
|
||||
// error |= test<cl_half, SCIN<cl_half,0>, G, L>::run(device, context,
|
||||
// queue, num_elements, "test_scinadd", scinadd_source);
|
||||
error |= test<cl_uint, SCIN<cl_uint, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_int, SCIN<cl_int, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_ulong, SCIN<cl_ulong, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_long, SCIN<cl_long, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<float, SCIN<float, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<double, SCIN<double, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
|
||||
// error |= test<cl_half, SCIN<cl_half,1>, G, L>::run(device, context,
|
||||
// queue, num_elements, "test_scinmax", scinmax_source);
|
||||
error |= test<cl_uint, SCIN<cl_uint, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_int, SCIN<cl_int, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_ulong, SCIN<cl_ulong, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_long, SCIN<cl_long, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<float, SCIN<float, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<double, SCIN<double, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
|
||||
// error |= test<cl_half, SCIN<cl_half,2>, G, L>::run(device, context,
|
||||
// queue, num_elements, "test_scinmin", scinmin_source);
|
||||
error |= test<cl_uint, SCIN<cl_uint, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_int, SCIN<cl_int, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_ulong, SCIN<cl_ulong, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_long, SCIN<cl_long, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<float, SCIN<float, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<double, SCIN<double, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
|
||||
// error |= test<cl_half, SCEX<cl_half,0>, G, L>::run(device, context,
|
||||
// queue, num_elements, "test_scexadd", scexadd_source);
|
||||
error |= test<cl_uint, SCEX<cl_uint, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_int, SCEX<cl_int, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_ulong, SCEX<cl_ulong, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_long, SCEX<cl_long, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<float, SCEX<float, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<double, SCEX<double, 0>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
|
||||
useCoreSubgroups);
|
||||
|
||||
// error |= test<cl_half, SCEX<cl_half,1>, G, L>::run(device, context,
|
||||
// queue, num_elements, "test_scexmax", scexmax_source);
|
||||
error |= test<cl_uint, SCEX<cl_uint, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_int, SCEX<cl_int, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_ulong, SCEX<cl_ulong, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_long, SCEX<cl_long, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<float, SCEX<float, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<double, SCEX<double, 1>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
|
||||
useCoreSubgroups);
|
||||
|
||||
// error |= test<cl_half, SCEX<cl_half,2>, G, L>::run(device, context,
|
||||
// queue, num_elements, "test_scexmin", scexmin_source);
|
||||
error |= test<cl_uint, SCEX<cl_uint, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_int, SCEX<cl_int, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_ulong, SCEX<cl_ulong, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<cl_long, SCEX<cl_long, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<float, SCEX<float, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
error |= test<double, SCEX<double, 2>, G, L>::run(
|
||||
device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
|
||||
error |= test<cl_int, IFP, G, L>::run(device, context, queue, num_elements,
|
||||
"test_ifp", ifp_source, NUM_LOC + 1);
|
||||
device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
|
||||
useCoreSubgroups);
|
||||
return error;
|
||||
}
|
||||
|
||||
int test_work_group_functions_core(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_work_group_functions(device, context, queue, num_elements,
|
||||
true);
|
||||
}
|
||||
|
||||
int test_work_group_functions_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
|
||||
|
||||
if (!hasExtension)
|
||||
{
|
||||
log_info(
|
||||
"Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
return test_work_group_functions(device, context, queue, num_elements,
|
||||
false);
|
||||
}
|
||||
|
||||
@@ -34,29 +34,6 @@ struct get_test_data
|
||||
}
|
||||
};
|
||||
|
||||
static const char *get_test_source =
|
||||
"#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
|
||||
"\n"
|
||||
"typedef struct {\n"
|
||||
" uint subGroupSize;\n"
|
||||
" uint maxSubGroupSize;\n"
|
||||
" uint numSubGroups;\n"
|
||||
" uint enqNumSubGroups;\n"
|
||||
" uint subGroupId;\n"
|
||||
" uint subGroupLocalId;\n"
|
||||
"} get_test_data;\n"
|
||||
"\n"
|
||||
"__kernel void get_test( __global get_test_data *outData )\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id( 0 );\n"
|
||||
" outData[gid].subGroupSize = get_sub_group_size();\n"
|
||||
" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
|
||||
" outData[gid].numSubGroups = get_num_sub_groups();\n"
|
||||
" outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
|
||||
" outData[gid].subGroupId = get_sub_group_id();\n"
|
||||
" outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
|
||||
"}";
|
||||
|
||||
static int check_group(const get_test_data *result, int nw, cl_uint ensg,
|
||||
int maxwgs)
|
||||
{
|
||||
@@ -207,7 +184,8 @@ static int check_group(const get_test_data *result, int nw, cl_uint ensg,
|
||||
}
|
||||
|
||||
int test_work_item_functions(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
cl_command_queue queue, int num_elements,
|
||||
bool useCoreSubgroups)
|
||||
{
|
||||
static const size_t lsize = 200;
|
||||
int error;
|
||||
@@ -220,9 +198,37 @@ int test_work_item_functions(cl_device_id device, cl_context context,
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper out;
|
||||
|
||||
std::stringstream kernel_sstr;
|
||||
if (useCoreSubgroups)
|
||||
{
|
||||
kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
|
||||
}
|
||||
kernel_sstr
|
||||
<< "\n"
|
||||
"\n"
|
||||
"typedef struct {\n"
|
||||
" uint subGroupSize;\n"
|
||||
" uint maxSubGroupSize;\n"
|
||||
" uint numSubGroups;\n"
|
||||
" uint enqNumSubGroups;\n"
|
||||
" uint subGroupId;\n"
|
||||
" uint subGroupLocalId;\n"
|
||||
"} get_test_data;\n"
|
||||
"\n"
|
||||
"__kernel void get_test( __global get_test_data *outData )\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id( 0 );\n"
|
||||
" outData[gid].subGroupSize = get_sub_group_size();\n"
|
||||
" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
|
||||
" outData[gid].numSubGroups = get_num_sub_groups();\n"
|
||||
" outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
|
||||
" outData[gid].subGroupId = get_sub_group_id();\n"
|
||||
" outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
|
||||
"}";
|
||||
const std::string &kernel_str = kernel_sstr.str();
|
||||
const char *kernel_src = kernel_str.c_str();
|
||||
error = create_single_kernel_helper_with_build_options(
|
||||
context, &program, &kernel, 1, &get_test_source, "get_test",
|
||||
context, &program, &kernel, 1, &kernel_src, "get_test",
|
||||
"-cl-std=CL2.0");
|
||||
if (error != 0) return error;
|
||||
|
||||
@@ -301,3 +307,25 @@ int test_work_item_functions(cl_device_id device, cl_context context,
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_work_item_functions_core(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_work_item_functions(device, context, queue, num_elements, true);
|
||||
}
|
||||
|
||||
int test_work_item_functions_ext(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
|
||||
|
||||
if (!hasExtension)
|
||||
{
|
||||
log_info(
|
||||
"Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
return test_work_item_functions(device, context, queue, num_elements,
|
||||
false);
|
||||
}
|
||||
Reference in New Issue
Block a user