diff --git a/test_conformance/subgroups/CMakeLists.txt b/test_conformance/subgroups/CMakeLists.txt index 174a9fe7..eb6a6079 100644 --- a/test_conformance/subgroups/CMakeLists.txt +++ b/test_conformance/subgroups/CMakeLists.txt @@ -6,6 +6,7 @@ set(${MODULE_NAME}_SOURCES test_queries.cpp test_workitem.cpp test_workgroup.cpp + test_ifp.cpp ) include(../CMakeCommon.txt) diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp index dd53292b..f9a9a9d5 100644 --- a/test_conformance/subgroups/main.cpp +++ b/test_conformance/subgroups/main.cpp @@ -23,49 +23,22 @@ MTdata gMTdata; test_definition test_list[] = { - ADD_TEST(sub_group_info), - ADD_TEST(work_item_functions), - ADD_TEST(work_group_functions), - ADD_TEST(barrier_functions), + ADD_TEST_VERSION(sub_group_info_ext, Version(2, 0)), + ADD_TEST_VERSION(sub_group_info_core, Version(2, 1)), + ADD_TEST_VERSION(work_item_functions_ext, Version(2, 0)), + ADD_TEST_VERSION(work_item_functions_core, Version(2, 1)), + ADD_TEST_VERSION(work_group_functions_ext, Version(2, 0)), + ADD_TEST_VERSION(work_group_functions_core, Version(2, 1)), + ADD_TEST_VERSION(barrier_functions_ext, Version(2, 0)), + ADD_TEST_VERSION(barrier_functions_core, Version(2, 1)), + ADD_TEST_VERSION(ifp_ext, Version(2, 0)), + ADD_TEST_VERSION(ifp_core, Version(2, 1)) }; const int test_num = ARRAY_SIZE(test_list); -static test_status checkSubGroupsExtension(cl_device_id device) -{ - // The extension is optional in OpenCL 2.0 (minimum required version) and - // required in later versions. - auto version = get_device_cl_version(device); - auto expected_min_version = Version(2, 0); - - if (version < expected_min_version) - { - version_expected_info("Test", expected_min_version.to_string().c_str(), - version.to_string().c_str()); - return TEST_SKIP; - } - - bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); - - if ((version == expected_min_version) && !hasExtension) - { - log_info( - "Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); - return TEST_SKIP; - } - - if ((version > expected_min_version) && !hasExtension) - { - log_error("'cl_khr_subgroups' is a required extension, failing.\n"); - return TEST_FAIL; - } - - return TEST_PASS; -} - static test_status InitCL(cl_device_id device) { - auto version = get_device_cl_version(device); test_status ret = TEST_PASS; if (version >= Version(3, 0)) @@ -86,10 +59,6 @@ static test_status InitCL(cl_device_id device) ret = TEST_SKIP; } } - else - { - ret = checkSubGroupsExtension(device); - } return ret; } diff --git a/test_conformance/subgroups/procs.h b/test_conformance/subgroups/procs.h index e3713770..3ebb13b5 100644 --- a/test_conformance/subgroups/procs.h +++ b/test_conformance/subgroups/procs.h @@ -26,15 +26,35 @@ extern MTdata gMTdata; -extern int test_sub_group_info(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements); -extern int test_work_item_functions(cl_device_id device, cl_context context, +extern int test_sub_group_info_ext(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_sub_group_info_core(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -extern int test_work_group_functions(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements); -extern int test_barrier_functions(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements); +extern int test_work_item_functions_ext(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_work_item_functions_core(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_work_group_functions_ext(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_work_group_functions_core(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_barrier_functions_ext(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_barrier_functions_core(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements); extern int test_pipe_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_ifp_ext(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_ifp_core(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); #endif /*_procs_h*/ diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index 9a57e466..6e84ccb3 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -23,6 +23,39 @@ #include #include +class subgroupsAPI { +public: + subgroupsAPI(cl_platform_id platform, bool useCoreSubgroups) + { + static_assert(CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE + == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, + "Enums have to be the same"); + static_assert(CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE + == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, + "Enums have to be the same"); + if (useCoreSubgroups) + { + _clGetKernelSubGroupInfo_ptr = &clGetKernelSubGroupInfo; + clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfo"; + } + else + { + _clGetKernelSubGroupInfo_ptr = (clGetKernelSubGroupInfoKHR_fn) + clGetExtensionFunctionAddressForPlatform( + platform, "clGetKernelSubGroupInfoKHR"); + clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfoKHR"; + } + } + clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr() + { + return _clGetKernelSubGroupInfo_ptr; + } + const char *clGetKernelSubGroupInfo_name; + +private: + clGetKernelSubGroupInfoKHR_fn _clGetKernelSubGroupInfo_ptr; +}; + // Some template helpers template struct TypeName; template <> struct TypeName @@ -310,7 +343,7 @@ struct test { static int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, const char *kname, - const char *src, int dynscl = 0) + const char *src, int dynscl, bool useCoreSubgroups) { size_t tmp; int error; @@ -318,7 +351,6 @@ struct test size_t realSize; size_t global; size_t local; - const char *kstrings[3]; clProgramWrapper program; clKernelWrapper kernel; cl_platform_id platform; @@ -332,14 +364,21 @@ struct test error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL); test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM"); + std::stringstream kernel_sstr; + if (useCoreSubgroups) + { + kernel_sstr + << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"; + } + kernel_sstr << "#define XY(M,I) M[I].x = get_sub_group_local_id(); " + "M[I].y = get_sub_group_id();\n"; + kernel_sstr << TypeDef::val(); + kernel_sstr << src; + const std::string &kernel_str = kernel_sstr.str(); + const char *kernel_src = kernel_str.c_str(); - kstrings[0] = "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n" - "#define XY(M,I) M[I].x = get_sub_group_local_id(); " - "M[I].y = get_sub_group_id();\n"; - kstrings[1] = TypeDef::val(); - kstrings[2] = src; error = create_single_kernel_helper_with_build_options( - context, &program, &kernel, 3, kstrings, kname, "-cl-std=CL2.0"); + context, &program, &kernel, 1, &kernel_src, kname, "-cl-std=CL2.0"); if (error != 0) return error; // Determine some local dimensions to use for the test. @@ -352,40 +391,46 @@ struct test if (local > LSIZE) local = LSIZE; // Get the sub group info - clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr; - clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn) - clGetExtensionFunctionAddressForPlatform( - platform, "clGetKernelSubGroupInfoKHR"); - if (clGetKernelSubGroupInfoKHR_ptr == NULL) + subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups); + clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr = + subgroupsApiSet.clGetKernelSubGroupInfo_ptr(); + if (clGetKernelSubGroupInfo_ptr == NULL) { - log_error( - "ERROR: clGetKernelSubGroupInfoKHR function not available"); - return -1; + log_error("ERROR: %s function not available", + subgroupsApiSet.clGetKernelSubGroupInfo_name); + return TEST_FAIL; + } + error = clGetKernelSubGroupInfo_ptr( + kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, + sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); + if (error != CL_SUCCESS) + { + log_error("ERROR: %s function error for " + "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE", + subgroupsApiSet.clGetKernelSubGroupInfo_name); + return TEST_FAIL; } - error = clGetKernelSubGroupInfoKHR_ptr( - kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, - sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); - test_error(error, - "clGetKernelSubGroupInfoKHR failed for " - "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR"); subgroup_size = (int)tmp; - error = clGetKernelSubGroupInfoKHR_ptr( - kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, + error = clGetKernelSubGroupInfo_ptr( + kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); - test_error(error, - "clGetKernelSubGroupInfoKHR failed for " - "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR"); - num_subgroups = (int)tmp; + if (error != CL_SUCCESS) + { + log_error("ERROR: %s function error for " + "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE", + subgroupsApiSet.clGetKernelSubGroupInfo_name); + return TEST_FAIL; + } + num_subgroups = (int)tmp; // Make sure the number of sub groups is what we expect if (num_subgroups != (local + subgroup_size - 1) / subgroup_size) { - log_error("ERROR: unexpected number of subgroups (%d) returned by " - "clGetKernelSubGroupInfoKHR\n", + log_error("ERROR: unexpected number of subgroups (%d) returned\n", num_subgroups); - return -1; + return TEST_FAIL; } std::vector idata; diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp index b85f4d81..e6ce1d2e 100644 --- a/test_conformance/subgroups/test_barrier.cpp +++ b/test_conformance/subgroups/test_barrier.cpp @@ -139,10 +139,10 @@ template struct BAR } }; - // Entry point from main int test_barrier_functions(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) + cl_command_queue queue, int num_elements, + bool useCoreSubgroups) { int error; @@ -150,10 +150,33 @@ int test_barrier_functions(cl_device_id device, cl_context context, #define G 2000 #define L 200 - error = test, G, L>::run( - device, context, queue, num_elements, "test_lbar", lbar_source); + error = test, G, L>::run(device, context, queue, + num_elements, "test_lbar", + lbar_source, 0, useCoreSubgroups); error = test, G, L, G>::run( - device, context, queue, num_elements, "test_gbar", gbar_source); + device, context, queue, num_elements, "test_gbar", gbar_source, 0, + useCoreSubgroups); return error; } + +int test_barrier_functions_core(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_barrier_functions(device, context, queue, num_elements, true); +} + +int test_barrier_functions_ext(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); + + if (!hasExtension) + { + log_info( + "Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); + return TEST_SKIPPED_ITSELF; + } + + return test_barrier_functions(device, context, queue, num_elements, false); +} \ No newline at end of file diff --git a/test_conformance/subgroups/test_ifp.cpp b/test_conformance/subgroups/test_ifp.cpp new file mode 100644 index 00000000..02850e5f --- /dev/null +++ b/test_conformance/subgroups/test_ifp.cpp @@ -0,0 +1,364 @@ +// +// Copyright (c) 2020 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "procs.h" +#include "subhelpers.h" +#include "harness/conversions.h" +#include "harness/typeWrappers.h" + + +// These need to stay in sync with the kernel source below +#define NUM_LOC 49 +#define INST_LOC_MASK 0x7f +#define INST_OP_SHIFT 0 +#define INST_OP_MASK 0xf +#define INST_LOC_SHIFT 4 +#define INST_VAL_SHIFT 12 +#define INST_VAL_MASK 0x7ffff +#define INST_END 0x0 +#define INST_STORE 0x1 +#define INST_WAIT 0x2 +#define INST_COUNT 0x3 + +static const char *ifp_source = + "#define NUM_LOC 49\n" + "#define INST_LOC_MASK 0x7f\n" + "#define INST_OP_SHIFT 0\n" + "#define INST_OP_MASK 0xf\n" + "#define INST_LOC_SHIFT 4\n" + "#define INST_VAL_SHIFT 12\n" + "#define INST_VAL_MASK 0x7ffff\n" + "#define INST_END 0x0\n" + "#define INST_STORE 0x1\n" + "#define INST_WAIT 0x2\n" + "#define INST_COUNT 0x3\n" + "\n" + "__kernel void\n" + "test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n" + "{\n" + " __local atomic_int loc[NUM_LOC];\n" + "\n" + " // Don't run if there is only one sub group\n" + " if (get_num_sub_groups() == 1)\n" + " return;\n" + "\n" + " // First initialize loc[]\n" + " int lid = (int)get_local_id(0);\n" + "\n" + " if (lid < NUM_LOC)\n" + " atomic_init(loc+lid, 0);\n" + "\n" + " work_group_barrier(CLK_LOCAL_MEM_FENCE);\n" + "\n" + " // Compute pointer to this sub group's \"instructions\"\n" + " const __global int *pc = in +\n" + " ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n" + " (int)get_sub_group_id()) *\n" + " (NUM_LOC+1);\n" + "\n" + " // Set up to \"run\"\n" + " bool ok = (int)get_sub_group_local_id() == 0;\n" + " bool run = true;\n" + "\n" + " while (run) {\n" + " int inst = *pc++;\n" + " int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n" + " int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n" + " int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n" + "\n" + " switch (iop) {\n" + " case INST_STORE:\n" + " if (ok)\n" + " atomic_store(loc+iloc, ival);\n" + " break;\n" + " case INST_WAIT:\n" + " if (ok) {\n" + " while (atomic_load(loc+iloc) != ival)\n" + " ;\n" + " }\n" + " break;\n" + " case INST_COUNT:\n" + " if (ok) {\n" + " int i;\n" + " for (i=0;i NUM_LOC +// Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements + +static inline int inst(int op, int loc, int val) +{ + return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT) + | (op << INST_OP_SHIFT); +} + +void gen_insts(cl_int *x, cl_int *p, int n) +{ + int i, j0, j1; + int val; + int ii[NUM_LOC]; + + // Create a random permutation of 0...NUM_LOC-1 + ii[0] = 0; + for (i = 1; i < NUM_LOC; ++i) + { + j0 = random_in_range(0, i, gMTdata); + if (j0 != i) ii[i] = ii[j0]; + ii[j0] = i; + } + + // Initialize "instruction pointers" + memset(p, 0, n * 4); + + for (i = 0; i < NUM_LOC; ++i) + { + // Randomly choose 2 different sub groups + // One does a random amount of work, and the other waits for it + j0 = random_in_range(0, n - 1, gMTdata); + + do + { + j1 = random_in_range(0, n - 1, gMTdata); + } while (j1 == j0); + + // Randomly choose a wait value and assign "instructions" + val = random_in_range(100, 200 + 10 * NUM_LOC, gMTdata); + x[j0 * (NUM_LOC + 1) + p[j0]] = inst(INST_COUNT, ii[i], val); + x[j1 * (NUM_LOC + 1) + p[j1]] = inst(INST_WAIT, ii[i], val); + ++p[j0]; + ++p[j1]; + } + + // Last "inst" for each sub group is END + for (i = 0; i < n; ++i) x[i * (NUM_LOC + 1) + p[i]] = inst(INST_END, 0, 0); +} + +// Execute one group's "instructions" +void run_insts(cl_int *x, cl_int *p, int n) +{ + int i, nend; + bool scont; + cl_int loc[NUM_LOC]; + + // Initialize result and "instruction pointers" + memset(loc, 0, sizeof(loc)); + memset(p, 0, 4 * n); + + // Repetitively loop over subgroups with each executing "instructions" until + // blocked The loop terminates when all subgroups have hit the "END + // instruction" + do + { + nend = 0; + for (i = 0; i < n; ++i) + { + do + { + cl_int inst = x[i * (NUM_LOC + 1) + p[i]]; + cl_int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK; + cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK; + cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK; + scont = false; + + switch (iop) + { + case INST_STORE: + loc[iloc] = ival; + ++p[i]; + scont = true; + break; + case INST_WAIT: + if (loc[iloc] == ival) + { + ++p[i]; + scont = true; + } + break; + case INST_COUNT: + loc[iloc] += ival; + ++p[i]; + scont = true; + break; + case INST_END: ++nend; break; + } + } while (scont); + } + } while (nend < n); + + // Return result, reusing "p" + memcpy(p, loc, sizeof(loc)); +} + + +struct IFP +{ + static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng) + { + int k; + int nj = (nw + ns - 1) / ns; + + // We need at least 2 sub groups per group for this test + if (nj == 1) return; + + for (k = 0; k < ng; ++k) + { + gen_insts(x, t, nj); + x += nj * (NUM_LOC + 1); + } + } + + static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns, + int nw, int ng) + { + int i, k; + int nj = (nw + ns - 1) / ns; + + // We need at least 2 sub groups per group for this tes + if (nj == 1) return 0; + + log_info(" independent forward progress...\n"); + + for (k = 0; k < ng; ++k) + { + run_insts(x, t, nj); + for (i = 0; i < NUM_LOC; ++i) + { + if (t[i] != y[i]) + { + log_error( + "ERROR: mismatch at element %d in work group %d\n", i, + k); + return -1; + } + } + x += nj * (NUM_LOC + 1); + y += NUM_LOC; + } + + return 0; + } +}; + +int test_ifp(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements, bool useCoreSubgroups) +{ + int error; + + // Adjust these individually below if desired/needed +#define G 2000 +#define L 200 + error = test::run(device, context, queue, num_elements, + "test_ifp", ifp_source, NUM_LOC + 1, + useCoreSubgroups); + return error; +} + +static test_status checkIFPSupport(cl_device_id device, bool &ifpSupport) +{ + cl_uint ifp_supported; + cl_uint error; + error = clGetDeviceInfo(device, + CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, + sizeof(ifp_supported), &ifp_supported, NULL); + if (error != CL_SUCCESS) + { + print_error( + error, + "Unable to get CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS " + "capability"); + return TEST_FAIL; + } + // skip testing ifp + if (ifp_supported != 1) + { + log_info("INDEPENDENT FORWARD PROGRESS not supported...\n"); + ifpSupport = false; + } + else + { + log_info("INDEPENDENT FORWARD PROGRESS supported...\n"); + ifpSupport = true; + } + return TEST_PASS; +} + +int test_ifp_core(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + bool ifpSupport = true; + test_status error; + error = checkIFPSupport(device, ifpSupport); + if (error != TEST_PASS) + { + return error; + } + if (ifpSupport == false) + { + log_info("Independed forward progress skipped.\n"); + return TEST_SKIPPED_ITSELF; + } + + return test_ifp(device, context, queue, num_elements, true); +} + +int test_ifp_ext(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); + bool ifpSupport = true; + + if (!hasExtension) + { + log_info( + "Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); + return TEST_SKIPPED_ITSELF; + } + // ifp only in subgroup functions tests: + test_status error; + error = checkIFPSupport(device, ifpSupport); + if (error != TEST_PASS) + { + return error; + } + if (ifpSupport == false) + { + log_info( + "Error reason: the extension cl_khr_subgroups requires that " + "Independed forward progress has to be supported by device.\n"); + return TEST_FAIL; + } + return test_ifp(device, context, queue, num_elements, false); +} \ No newline at end of file diff --git a/test_conformance/subgroups/test_queries.cpp b/test_conformance/subgroups/test_queries.cpp index 79929295..2ad3d7fa 100644 --- a/test_conformance/subgroups/test_queries.cpp +++ b/test_conformance/subgroups/test_queries.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "procs.h" +#include "subhelpers.h" typedef struct { @@ -21,23 +22,10 @@ typedef struct cl_uint numSubGroups; } result_data; -static const char *query_kernel_source = - "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n" - "\n" - "typedef struct {\n" - " uint maxSubGroupSize;\n" - " uint numSubGroups;\n" - "} result_data;\n" - "\n" - "__kernel void query_kernel( __global result_data *outData )\n" - "{\n" - " int gid = get_global_id( 0 );\n" - " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n" - " outData[gid].numSubGroups = get_num_sub_groups();\n" - "}"; int test_sub_group_info(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) + cl_command_queue queue, int num_elements, + bool useCoreSubgroups) { static const size_t gsize0 = 80; int i, error; @@ -58,9 +46,29 @@ int test_sub_group_info(cl_device_id device, cl_context context, clProgramWrapper program; clKernelWrapper kernel; clMemWrapper out; + std::stringstream kernel_sstr; + if (useCoreSubgroups) + { + kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"; + } + kernel_sstr + << "\n" + "typedef struct {\n" + " uint maxSubGroupSize;\n" + " uint numSubGroups;\n" + "} result_data;\n" + "\n" + "__kernel void query_kernel( __global result_data *outData )\n" + "{\n" + " int gid = get_global_id( 0 );\n" + " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n" + " outData[gid].numSubGroups = get_num_sub_groups();\n" + "}"; + const std::string &kernel_str = kernel_sstr.str(); + const char *kernel_src = kernel_str.c_str(); error = create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &query_kernel_source, "query_kernel", + context, &program, &kernel, 1, &kernel_src, "query_kernel", "-cl-std=CL2.0"); if (error != 0) return error; @@ -88,44 +96,50 @@ int test_sub_group_info(cl_device_id device, cl_context context, (void *)&platform, NULL); test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM"); - clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr; - clGetKernelSubGroupInfoKHR_ptr = - (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform( - platform, "clGetKernelSubGroupInfoKHR"); - if (clGetKernelSubGroupInfoKHR_ptr == NULL) + subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups); + clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr = + subgroupsApiSet.clGetKernelSubGroupInfo_ptr(); + if (clGetKernelSubGroupInfo_ptr == NULL) { - log_error("ERROR: clGetKernelSubGroupInfoKHR function not available"); - return -1; + log_error("ERROR: %s function not available", + subgroupsApiSet.clGetKernelSubGroupInfo_name); + return TEST_FAIL; } - error = clGetKernelSubGroupInfoKHR_ptr( - kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, - sizeof(local), (void *)&local, sizeof(kernel_max_subgroup_size), + error = clGetKernelSubGroupInfo_ptr( + kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, sizeof(local), + (void *)&local, sizeof(kernel_max_subgroup_size), (void *)&kernel_max_subgroup_size, &realSize); - test_error(error, - "clGetKernelSubGroupInfoKHR failed for " - "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR"); - log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR for the kernel " - "is %d.\n", - (int)kernel_max_subgroup_size); - + if (error != CL_SUCCESS) + { + log_error("ERROR: %s function error for " + "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE", + subgroupsApiSet.clGetKernelSubGroupInfo_name); + return TEST_FAIL; + } + log_info( + "The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE for the kernel is %d.\n", + (int)kernel_max_subgroup_size); if (realSize != sizeof(kernel_max_subgroup_size)) { log_error("ERROR: Returned size of max sub group size not valid! " "(Expected %d, got %d)\n", (int)sizeof(kernel_max_subgroup_size), (int)realSize); - return -1; + return TEST_FAIL; } - - error = clGetKernelSubGroupInfoKHR_ptr( - kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, - sizeof(local), (void *)&local, sizeof(kernel_subgroup_count), + error = clGetKernelSubGroupInfo_ptr( + kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, sizeof(local), + (void *)&local, sizeof(kernel_subgroup_count), (void *)&kernel_subgroup_count, &realSize); - test_error(error, - "clGetKernelSubGroupInfoKHR failed for " - "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR"); + if (error != CL_SUCCESS) + { + log_error("ERROR: %s function error " + "for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE", + subgroupsApiSet.clGetKernelSubGroupInfo_name); + return TEST_FAIL; + } log_info( - "The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR for the kernel is %d.\n", + "The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE for the kernel is %d.\n", (int)kernel_subgroup_count); if (realSize != sizeof(kernel_subgroup_count)) @@ -133,7 +147,7 @@ int test_sub_group_info(cl_device_id device, cl_context context, log_error("ERROR: Returned size of sub group count not valid! " "(Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize); - return -1; + return TEST_FAIL; } // Verify that the kernel gets the same max_subgroup_size and subgroup_count @@ -176,3 +190,24 @@ int test_sub_group_info(cl_device_id device, cl_context context, return 0; } + +int test_sub_group_info_core(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_sub_group_info(device, context, queue, num_elements, true); +} + +int test_sub_group_info_ext(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); + + if (!hasExtension) + { + log_info( + "Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); + return TEST_SKIPPED_ITSELF; + } + + return test_sub_group_info(device, context, queue, num_elements, false); +} \ No newline at end of file diff --git a/test_conformance/subgroups/test_workgroup.cpp b/test_conformance/subgroups/test_workgroup.cpp index 572220f7..bc9d6190 100644 --- a/test_conformance/subgroups/test_workgroup.cpp +++ b/test_conformance/subgroups/test_workgroup.cpp @@ -126,98 +126,6 @@ static const char *scexmin_source = " out[gid] = sub_group_scan_exclusive_min(in[gid]);\n" "}\n"; -// These need to stay in sync with the kernel source below -#define NUM_LOC 49 -#define INST_LOC_MASK 0x7f -#define INST_OP_SHIFT 0 -#define INST_OP_MASK 0xf -#define INST_LOC_SHIFT 4 -#define INST_VAL_SHIFT 12 -#define INST_VAL_MASK 0x7ffff -#define INST_END 0x0 -#define INST_STORE 0x1 -#define INST_WAIT 0x2 -#define INST_COUNT 0x3 - -static const char *ifp_source = - "#define NUM_LOC 49\n" - "#define INST_LOC_MASK 0x7f\n" - "#define INST_OP_SHIFT 0\n" - "#define INST_OP_MASK 0xf\n" - "#define INST_LOC_SHIFT 4\n" - "#define INST_VAL_SHIFT 12\n" - "#define INST_VAL_MASK 0x7ffff\n" - "#define INST_END 0x0\n" - "#define INST_STORE 0x1\n" - "#define INST_WAIT 0x2\n" - "#define INST_COUNT 0x3\n" - "\n" - "__kernel void\n" - "test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n" - "{\n" - " __local atomic_int loc[NUM_LOC];\n" - "\n" - " // Don't run if there is only one sub group\n" - " if (get_num_sub_groups() == 1)\n" - " return;\n" - "\n" - " // First initialize loc[]\n" - " int lid = (int)get_local_id(0);\n" - "\n" - " if (lid < NUM_LOC)\n" - " atomic_init(loc+lid, 0);\n" - "\n" - " work_group_barrier(CLK_LOCAL_MEM_FENCE);\n" - "\n" - " // Compute pointer to this sub group's \"instructions\"\n" - " const __global int *pc = in +\n" - " ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n" - " (int)get_sub_group_id()) *\n" - " (NUM_LOC+1);\n" - "\n" - " // Set up to \"run\"\n" - " bool ok = (int)get_sub_group_local_id() == 0;\n" - " bool run = true;\n" - "\n" - " while (run) {\n" - " int inst = *pc++;\n" - " int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n" - " int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n" - " int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n" - "\n" - " switch (iop) {\n" - " case INST_STORE:\n" - " if (ok)\n" - " atomic_store(loc+iloc, ival);\n" - " break;\n" - " case INST_WAIT:\n" - " if (ok) {\n" - " while (atomic_load(loc+iloc) != ival)\n" - " ;\n" - " }\n" - " break;\n" - " case INST_COUNT:\n" - " if (ok) {\n" - " int i;\n" - " for (i=0;i struct AA @@ -713,169 +621,11 @@ template struct BC } }; -// Independent forward progress stuff -// Note: -// Output needs num_groups * NUM_LOC elements -// local_size must be > NUM_LOC -// Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements - -static inline int inst(int op, int loc, int val) -{ - return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT) - | (op << INST_OP_SHIFT); -} - -void gen_insts(cl_int *x, cl_int *p, int n) -{ - int i, j0, j1; - int val; - int ii[NUM_LOC]; - - // Create a random permutation of 0...NUM_LOC-1 - ii[0] = 0; - for (i = 1; i < NUM_LOC; ++i) - { - j0 = random_in_range(0, i, gMTdata); - if (j0 != i) ii[i] = ii[j0]; - ii[j0] = i; - } - - // Initialize "instruction pointers" - memset(p, 0, n * 4); - - for (i = 0; i < NUM_LOC; ++i) - { - // Randomly choose 2 different sub groups - // One does a random amount of work, and the other waits for it - j0 = random_in_range(0, n - 1, gMTdata); - - do - { - j1 = random_in_range(0, n - 1, gMTdata); - } while (j1 == j0); - - // Randomly choose a wait value and assign "instructions" - val = random_in_range(100, 200 + 10 * NUM_LOC, gMTdata); - x[j0 * (NUM_LOC + 1) + p[j0]] = inst(INST_COUNT, ii[i], val); - x[j1 * (NUM_LOC + 1) + p[j1]] = inst(INST_WAIT, ii[i], val); - ++p[j0]; - ++p[j1]; - } - - // Last "inst" for each sub group is END - for (i = 0; i < n; ++i) x[i * (NUM_LOC + 1) + p[i]] = inst(INST_END, 0, 0); -} - -// Execute one group's "instructions" -void run_insts(cl_int *x, cl_int *p, int n) -{ - int i, nend; - bool scont; - cl_int loc[NUM_LOC]; - - // Initialize result and "instruction pointers" - memset(loc, 0, sizeof(loc)); - memset(p, 0, 4 * n); - - // Repetitively loop over subgroups with each executing "instructions" until - // blocked The loop terminates when all subgroups have hit the "END - // instruction" - do - { - nend = 0; - for (i = 0; i < n; ++i) - { - do - { - cl_int inst = x[i * (NUM_LOC + 1) + p[i]]; - cl_int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK; - cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK; - cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK; - scont = false; - - switch (iop) - { - case INST_STORE: - loc[iloc] = ival; - ++p[i]; - scont = true; - break; - case INST_WAIT: - if (loc[iloc] == ival) - { - ++p[i]; - scont = true; - } - break; - case INST_COUNT: - loc[iloc] += ival; - ++p[i]; - scont = true; - break; - case INST_END: ++nend; break; - } - } while (scont); - } - } while (nend < n); - - // Return result, reusing "p" - memcpy(p, loc, sizeof(loc)); -} - - -struct IFP -{ - static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng) - { - int k; - int nj = (nw + ns - 1) / ns; - - // We need at least 2 sub groups per group for this test - if (nj == 1) return; - - for (k = 0; k < ng; ++k) - { - gen_insts(x, t, nj); - x += nj * (NUM_LOC + 1); - } - } - - static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns, - int nw, int ng) - { - int i, k; - int nj = (nw + ns - 1) / ns; - - // We need at least 2 sub groups per group for this tes - if (nj == 1) return 0; - - log_info(" independent forward progress...\n"); - - for (k = 0; k < ng; ++k) - { - run_insts(x, t, nj); - for (i = 0; i < NUM_LOC; ++i) - { - if (t[i] != y[i]) - { - log_error( - "ERROR: mismatch at element %d in work group %d\n", i, - k); - return -1; - } - } - x += nj * (NUM_LOC + 1); - y += NUM_LOC; - } - - return 0; - } -}; - // Entry point from main int test_work_group_functions(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) + cl_command_queue queue, int num_elements, + bool useCoreSubgroups) { int error; @@ -884,161 +634,242 @@ int test_work_group_functions(cl_device_id device, cl_context context, #define L 200 error = test, G, L>::run(device, context, queue, num_elements, - "test_any", any_source); + "test_any", any_source, 0, + useCoreSubgroups); error |= test, G, L>::run(device, context, queue, num_elements, - "test_all", all_source); + "test_all", all_source, 0, + useCoreSubgroups); // error |= test, G, L>::run(device, context, queue, // num_elements, "test_bcast", bcast_source); error |= test, G, L>::run( - device, context, queue, num_elements, "test_bcast", bcast_source); + device, context, queue, num_elements, "test_bcast", bcast_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_bcast", bcast_source); + device, context, queue, num_elements, "test_bcast", bcast_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_bcast", bcast_source); + device, context, queue, num_elements, "test_bcast", bcast_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_bcast", bcast_source); + device, context, queue, num_elements, "test_bcast", bcast_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_bcast", bcast_source); + device, context, queue, num_elements, "test_bcast", bcast_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_bcast", bcast_source); + device, context, queue, num_elements, "test_bcast", bcast_source, 0, + useCoreSubgroups); // error |= test, G, L>::run(device, context, queue, // num_elements, "test_redadd", redadd_source); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redadd", redadd_source); + device, context, queue, num_elements, "test_redadd", redadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redadd", redadd_source); + device, context, queue, num_elements, "test_redadd", redadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redadd", redadd_source); + device, context, queue, num_elements, "test_redadd", redadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redadd", redadd_source); + device, context, queue, num_elements, "test_redadd", redadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redadd", redadd_source); + device, context, queue, num_elements, "test_redadd", redadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redadd", redadd_source); + device, context, queue, num_elements, "test_redadd", redadd_source, 0, + useCoreSubgroups); // error |= test, G, L>::run(device, context, queue, // num_elements, "test_redmax", redmax_source); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redmax", redmax_source); + device, context, queue, num_elements, "test_redmax", redmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redmax", redmax_source); + device, context, queue, num_elements, "test_redmax", redmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redmax", redmax_source); + device, context, queue, num_elements, "test_redmax", redmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redmax", redmax_source); + device, context, queue, num_elements, "test_redmax", redmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redmax", redmax_source); + device, context, queue, num_elements, "test_redmax", redmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redmax", redmax_source); + device, context, queue, num_elements, "test_redmax", redmax_source, 0, + useCoreSubgroups); // error |= test, G, L>::run(device, context, queue, // num_elements, "test_redmin", redmin_source); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redmin", redmin_source); + device, context, queue, num_elements, "test_redmin", redmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redmin", redmin_source); + device, context, queue, num_elements, "test_redmin", redmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redmin", redmin_source); + device, context, queue, num_elements, "test_redmin", redmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redmin", redmin_source); + device, context, queue, num_elements, "test_redmin", redmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redmin", redmin_source); + device, context, queue, num_elements, "test_redmin", redmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_redmin", redmin_source); + device, context, queue, num_elements, "test_redmin", redmin_source, 0, + useCoreSubgroups); // error |= test, G, L>::run(device, context, // queue, num_elements, "test_scinadd", scinadd_source); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinadd", scinadd_source); + device, context, queue, num_elements, "test_scinadd", scinadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinadd", scinadd_source); + device, context, queue, num_elements, "test_scinadd", scinadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinadd", scinadd_source); + device, context, queue, num_elements, "test_scinadd", scinadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinadd", scinadd_source); + device, context, queue, num_elements, "test_scinadd", scinadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinadd", scinadd_source); + device, context, queue, num_elements, "test_scinadd", scinadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinadd", scinadd_source); + device, context, queue, num_elements, "test_scinadd", scinadd_source, 0, + useCoreSubgroups); // error |= test, G, L>::run(device, context, // queue, num_elements, "test_scinmax", scinmax_source); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinmax", scinmax_source); + device, context, queue, num_elements, "test_scinmax", scinmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinmax", scinmax_source); + device, context, queue, num_elements, "test_scinmax", scinmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinmax", scinmax_source); + device, context, queue, num_elements, "test_scinmax", scinmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinmax", scinmax_source); + device, context, queue, num_elements, "test_scinmax", scinmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinmax", scinmax_source); + device, context, queue, num_elements, "test_scinmax", scinmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinmax", scinmax_source); + device, context, queue, num_elements, "test_scinmax", scinmax_source, 0, + useCoreSubgroups); // error |= test, G, L>::run(device, context, // queue, num_elements, "test_scinmin", scinmin_source); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinmin", scinmin_source); + device, context, queue, num_elements, "test_scinmin", scinmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinmin", scinmin_source); + device, context, queue, num_elements, "test_scinmin", scinmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinmin", scinmin_source); + device, context, queue, num_elements, "test_scinmin", scinmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinmin", scinmin_source); + device, context, queue, num_elements, "test_scinmin", scinmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinmin", scinmin_source); + device, context, queue, num_elements, "test_scinmin", scinmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scinmin", scinmin_source); + device, context, queue, num_elements, "test_scinmin", scinmin_source, 0, + useCoreSubgroups); // error |= test, G, L>::run(device, context, // queue, num_elements, "test_scexadd", scexadd_source); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexadd", scexadd_source); + device, context, queue, num_elements, "test_scexadd", scexadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexadd", scexadd_source); + device, context, queue, num_elements, "test_scexadd", scexadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexadd", scexadd_source); + device, context, queue, num_elements, "test_scexadd", scexadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexadd", scexadd_source); + device, context, queue, num_elements, "test_scexadd", scexadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexadd", scexadd_source); + device, context, queue, num_elements, "test_scexadd", scexadd_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexadd", scexadd_source); + device, context, queue, num_elements, "test_scexadd", scexadd_source, 0, + useCoreSubgroups); // error |= test, G, L>::run(device, context, // queue, num_elements, "test_scexmax", scexmax_source); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexmax", scexmax_source); + device, context, queue, num_elements, "test_scexmax", scexmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexmax", scexmax_source); + device, context, queue, num_elements, "test_scexmax", scexmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexmax", scexmax_source); + device, context, queue, num_elements, "test_scexmax", scexmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexmax", scexmax_source); + device, context, queue, num_elements, "test_scexmax", scexmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexmax", scexmax_source); + device, context, queue, num_elements, "test_scexmax", scexmax_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexmax", scexmax_source); + device, context, queue, num_elements, "test_scexmax", scexmax_source, 0, + useCoreSubgroups); // error |= test, G, L>::run(device, context, // queue, num_elements, "test_scexmin", scexmin_source); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexmin", scexmin_source); + device, context, queue, num_elements, "test_scexmin", scexmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexmin", scexmin_source); + device, context, queue, num_elements, "test_scexmin", scexmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexmin", scexmin_source); + device, context, queue, num_elements, "test_scexmin", scexmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexmin", scexmin_source); + device, context, queue, num_elements, "test_scexmin", scexmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexmin", scexmin_source); + device, context, queue, num_elements, "test_scexmin", scexmin_source, 0, + useCoreSubgroups); error |= test, G, L>::run( - device, context, queue, num_elements, "test_scexmin", scexmin_source); - - error |= test::run(device, context, queue, num_elements, - "test_ifp", ifp_source, NUM_LOC + 1); + device, context, queue, num_elements, "test_scexmin", scexmin_source, 0, + useCoreSubgroups); return error; } + +int test_work_group_functions_core(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_work_group_functions(device, context, queue, num_elements, + true); +} + +int test_work_group_functions_ext(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); + + if (!hasExtension) + { + log_info( + "Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); + return TEST_SKIPPED_ITSELF; + } + return test_work_group_functions(device, context, queue, num_elements, + false); +} diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp index 125ad9e9..b77bfe1a 100644 --- a/test_conformance/subgroups/test_workitem.cpp +++ b/test_conformance/subgroups/test_workitem.cpp @@ -34,29 +34,6 @@ struct get_test_data } }; -static const char *get_test_source = - "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n" - "\n" - "typedef struct {\n" - " uint subGroupSize;\n" - " uint maxSubGroupSize;\n" - " uint numSubGroups;\n" - " uint enqNumSubGroups;\n" - " uint subGroupId;\n" - " uint subGroupLocalId;\n" - "} get_test_data;\n" - "\n" - "__kernel void get_test( __global get_test_data *outData )\n" - "{\n" - " int gid = get_global_id( 0 );\n" - " outData[gid].subGroupSize = get_sub_group_size();\n" - " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n" - " outData[gid].numSubGroups = get_num_sub_groups();\n" - " outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n" - " outData[gid].subGroupId = get_sub_group_id();\n" - " outData[gid].subGroupLocalId = get_sub_group_local_id();\n" - "}"; - static int check_group(const get_test_data *result, int nw, cl_uint ensg, int maxwgs) { @@ -207,7 +184,8 @@ static int check_group(const get_test_data *result, int nw, cl_uint ensg, } int test_work_item_functions(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) + cl_command_queue queue, int num_elements, + bool useCoreSubgroups) { static const size_t lsize = 200; int error; @@ -220,9 +198,37 @@ int test_work_item_functions(cl_device_id device, cl_context context, clProgramWrapper program; clKernelWrapper kernel; clMemWrapper out; - + std::stringstream kernel_sstr; + if (useCoreSubgroups) + { + kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"; + } + kernel_sstr + << "\n" + "\n" + "typedef struct {\n" + " uint subGroupSize;\n" + " uint maxSubGroupSize;\n" + " uint numSubGroups;\n" + " uint enqNumSubGroups;\n" + " uint subGroupId;\n" + " uint subGroupLocalId;\n" + "} get_test_data;\n" + "\n" + "__kernel void get_test( __global get_test_data *outData )\n" + "{\n" + " int gid = get_global_id( 0 );\n" + " outData[gid].subGroupSize = get_sub_group_size();\n" + " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n" + " outData[gid].numSubGroups = get_num_sub_groups();\n" + " outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n" + " outData[gid].subGroupId = get_sub_group_id();\n" + " outData[gid].subGroupLocalId = get_sub_group_local_id();\n" + "}"; + const std::string &kernel_str = kernel_sstr.str(); + const char *kernel_src = kernel_str.c_str(); error = create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &get_test_source, "get_test", + context, &program, &kernel, 1, &kernel_src, "get_test", "-cl-std=CL2.0"); if (error != 0) return error; @@ -301,3 +307,25 @@ int test_work_item_functions(cl_device_id device, cl_context context, return 0; } + +int test_work_item_functions_core(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_work_item_functions(device, context, queue, num_elements, true); +} + +int test_work_item_functions_ext(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); + + if (!hasExtension) + { + log_info( + "Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); + return TEST_SKIPPED_ITSELF; + } + + return test_work_item_functions(device, context, queue, num_elements, + false); +} \ No newline at end of file