mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Initial open source release of OpenCL 2.2 CTS.
This commit is contained in:
20
test_conformance/subgroups/CMakeLists.txt
Normal file
20
test_conformance/subgroups/CMakeLists.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
set(MODULE_NAME SUBGROUPS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
test_barrier.cpp
|
||||
test_queries.cpp
|
||||
test_workitem.cpp
|
||||
test_workgroup.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/msvc9.c
|
||||
../../test_common/harness/ThreadPool.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/parseParameters.cpp
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
26
test_conformance/subgroups/Jamfile
Normal file
26
test_conformance/subgroups/Jamfile
Normal file
@@ -0,0 +1,26 @@
|
||||
project
|
||||
: requirements
|
||||
<toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
exe test_subgroups
|
||||
: main.cpp
|
||||
test_queries.cpp
|
||||
test_workitem.cpp
|
||||
test_workgroup.cpp
|
||||
test_barrier.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/mt19937.c
|
||||
: <target-os>windows:<source>../../test_common/harness/msvc9.c
|
||||
;
|
||||
install dist
|
||||
: test_subgroups
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/subgroups
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/subgroups
|
||||
;
|
||||
|
||||
60
test_conformance/subgroups/main.cpp
Normal file
60
test_conformance/subgroups/main.cpp
Normal file
@@ -0,0 +1,60 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
MTdata gMTdata;
|
||||
|
||||
basefn basefn_list[] = {
|
||||
test_sub_group_info,
|
||||
test_work_item_functions,
|
||||
test_work_group_functions,
|
||||
test_barrier_functions,
|
||||
};
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"sub_group_info",
|
||||
"work_item_functions",
|
||||
"work_group_functions",
|
||||
"barrier_functions",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
static const int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
static int
|
||||
checkSubGroupsExtension(cl_device_id device)
|
||||
{
|
||||
if (!is_extension_available(device, "cl_khr_subgroups")) {
|
||||
log_info("Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
|
||||
return CL_INVALID_DEVICE;
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, const char *argv[])
|
||||
{
|
||||
gMTdata = init_genrand(0);
|
||||
return runTestHarnessWithCheck(argc, argv, num_fns, basefn_list, basefn_names, false, false, NULL, checkSubGroupsExtension);
|
||||
}
|
||||
|
||||
43
test_conformance/subgroups/procs.h
Normal file
43
test_conformance/subgroups/procs.h
Normal file
@@ -0,0 +1,43 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _procs_h
|
||||
#define _procs_h
|
||||
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/threadTesting.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
extern MTdata gMTdata;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern int test_sub_group_info(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_work_item_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_work_group_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_barrier_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_pipe_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /*_procs_h*/
|
||||
286
test_conformance/subgroups/subhelpers.h
Normal file
286
test_conformance/subgroups/subhelpers.h
Normal file
@@ -0,0 +1,286 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef SUBHELPERS_H
|
||||
#define SUBHELPERS_H
|
||||
|
||||
#include "testHarness.h"
|
||||
#include "kernelHelpers.h"
|
||||
#include "typeWrappers.h"
|
||||
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
// Some template helpers
|
||||
template <typename Ty> struct TypeName;
|
||||
template <> struct TypeName<cl_half> { static const char * val() { return "half"; } };
|
||||
template <> struct TypeName<cl_uint> { static const char * val() { return "uint"; } };
|
||||
template <> struct TypeName<cl_int> { static const char * val() { return "int"; } };
|
||||
template <> struct TypeName<cl_ulong> { static const char * val() { return "ulong"; } };
|
||||
template <> struct TypeName<cl_long> { static const char * val() { return "long"; } };
|
||||
template <> struct TypeName<float> { static const char * val() { return "float"; } };
|
||||
template <> struct TypeName<double> { static const char * val() { return "double"; } };
|
||||
|
||||
template <typename Ty> struct TypeDef;
|
||||
template <> struct TypeDef<cl_half> { static const char * val() { return "typedef half Type;\n"; } };
|
||||
template <> struct TypeDef<cl_uint> { static const char * val() { return "typedef uint Type;\n"; } };
|
||||
template <> struct TypeDef<cl_int> { static const char * val() { return "typedef int Type;\n"; } };
|
||||
template <> struct TypeDef<cl_ulong> { static const char * val() { return "typedef ulong Type;\n"; } };
|
||||
template <> struct TypeDef<cl_long> { static const char * val() { return "typedef long Type;\n"; } };
|
||||
template <> struct TypeDef<float> { static const char * val() { return "typedef float Type;\n"; } };
|
||||
template <> struct TypeDef<double> { static const char * val() { return "typedef double Type;\n"; } };
|
||||
|
||||
template <typename Ty, int Which> struct TypeIdentity;
|
||||
// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)0.0; } };
|
||||
// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return -(cl_half)65536.0; } };
|
||||
// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)65536.0; } };
|
||||
|
||||
template <> struct TypeIdentity<cl_uint,0> { static cl_uint val() { return (cl_uint)0; } };
|
||||
template <> struct TypeIdentity<cl_uint,1> { static cl_uint val() { return (cl_uint)0; } };
|
||||
template <> struct TypeIdentity<cl_uint,2> { static cl_uint val() { return (cl_uint)0xffffffff; } };
|
||||
|
||||
template <> struct TypeIdentity<cl_int,0> { static cl_int val() { return (cl_int)0 ; } };
|
||||
template <> struct TypeIdentity<cl_int,1> { static cl_int val() { return (cl_int)0x80000000; } };
|
||||
template <> struct TypeIdentity<cl_int,2> { static cl_int val() { return (cl_int)0x7fffffff; } };
|
||||
|
||||
template <> struct TypeIdentity<cl_ulong,0> { static cl_ulong val() { return (cl_ulong)0 ; } };
|
||||
template <> struct TypeIdentity<cl_ulong,1> { static cl_ulong val() { return (cl_ulong)0 ; } };
|
||||
template <> struct TypeIdentity<cl_ulong,2> { static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL ; } };
|
||||
|
||||
template <> struct TypeIdentity<cl_long,0> { static cl_long val() { return (cl_long)0; } };
|
||||
template <> struct TypeIdentity<cl_long,1> { static cl_long val() { return (cl_long)0x8000000000000000ULL; } };
|
||||
template <> struct TypeIdentity<cl_long,2> { static cl_long val() { return (cl_long)0x7fffffffffffffffULL; } };
|
||||
|
||||
|
||||
template <> struct TypeIdentity<float,0> { static float val() { return 0.F; } };
|
||||
template <> struct TypeIdentity<float,1> { static float val() { return -std::numeric_limits<float>::infinity(); } };
|
||||
template <> struct TypeIdentity<float,2> { static float val() { return std::numeric_limits<float>::infinity(); } };
|
||||
|
||||
template <> struct TypeIdentity<double,0> { static double val() { return 0.L; } };
|
||||
|
||||
template <> struct TypeIdentity<double,1> { static double val() { return -std::numeric_limits<double>::infinity(); } };
|
||||
template <> struct TypeIdentity<double,2> { static double val() { return std::numeric_limits<double>::infinity(); } };
|
||||
|
||||
template <typename Ty> struct TypeCheck;
|
||||
template <> struct TypeCheck<cl_uint> { static bool val(cl_device_id) { return true; } };
|
||||
template <> struct TypeCheck<cl_int> { static bool val(cl_device_id) { return true; } };
|
||||
|
||||
static bool
|
||||
int64_ok(cl_device_id device)
|
||||
{
|
||||
char profile[128];
|
||||
int error;
|
||||
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
|
||||
if (error) {
|
||||
log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (strcmp(profile, "EMBEDDED_PROFILE") == 0)
|
||||
return is_extension_available(device, "cles_khr_int64");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <> struct TypeCheck<cl_ulong> { static bool val(cl_device_id device) { return int64_ok(device); } };
|
||||
template <> struct TypeCheck<cl_long> { static bool val(cl_device_id device) { return int64_ok(device); } };
|
||||
template <> struct TypeCheck<cl_float> { static bool val(cl_device_id) { return true; } };
|
||||
template <> struct TypeCheck<cl_half> {
|
||||
static bool val(cl_device_id device) { return is_extension_available(device, "cl_khr_fp16"); }
|
||||
};
|
||||
template <> struct TypeCheck<double> {
|
||||
static bool val(cl_device_id device) {
|
||||
int error;
|
||||
cl_device_fp_config c;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c), (void *)&c, NULL);
|
||||
if (error) {
|
||||
log_info("clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n");
|
||||
return false;
|
||||
}
|
||||
return c != 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Run a test kernel to compute the result of a built-in on an input
|
||||
static int
|
||||
run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t global, size_t local,
|
||||
void *idata, size_t isize, void *mdata, size_t msize,
|
||||
void *odata, size_t osize, size_t tsize=0)
|
||||
{
|
||||
clMemWrapper in;
|
||||
clMemWrapper xy;
|
||||
clMemWrapper out;
|
||||
clMemWrapper tmp;
|
||||
int error;
|
||||
|
||||
in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
if (tsize) {
|
||||
tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, tsize, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
}
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
if (tsize) {
|
||||
error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
}
|
||||
|
||||
error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
// Driver for testing a single built in function
|
||||
template <typename Ty, typename Fns, size_t GSIZE, size_t LSIZE, size_t TSIZE=0>
|
||||
struct test {
|
||||
static int
|
||||
run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, const char *kname, const char *src, int dynscl=0)
|
||||
{
|
||||
size_t tmp;
|
||||
int error;
|
||||
int subgroup_size, num_subgroups;
|
||||
size_t realSize;
|
||||
size_t global;
|
||||
size_t local;
|
||||
const char *kstrings[3];
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
cl_platform_id platform;
|
||||
cl_int sgmap[2*GSIZE];
|
||||
Ty mapin[LSIZE];
|
||||
Ty mapout[LSIZE];
|
||||
|
||||
// Make sure a test of type Ty is supported by the device
|
||||
if (!TypeCheck<Ty>::val(device))
|
||||
return 0;
|
||||
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL);
|
||||
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
|
||||
|
||||
kstrings[0] = "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
|
||||
"#define XY(M,I) M[I].x = get_sub_group_local_id(); M[I].y = get_sub_group_id();\n";
|
||||
kstrings[1] = TypeDef<Ty>::val();
|
||||
kstrings[2] = src;
|
||||
error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 3, kstrings, kname, "-cl-std=CL2.0");
|
||||
if (error != 0)
|
||||
return error;
|
||||
|
||||
// Determine some local dimensions to use for the test.
|
||||
global = GSIZE;
|
||||
error = get_max_common_work_group_size(context, kernel, GSIZE, &local);
|
||||
test_error(error, "get_max_common_work_group_size failed");
|
||||
|
||||
// Limit it a bit so we have muliple work groups
|
||||
// Ideally this will still be large enough to give us multiple subgroups
|
||||
if (local > LSIZE)
|
||||
local = LSIZE;
|
||||
|
||||
// Get the sub group info
|
||||
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr;
|
||||
clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platform,
|
||||
"clGetKernelSubGroupInfoKHR");
|
||||
if (clGetKernelSubGroupInfoKHR_ptr == NULL) {
|
||||
log_error("ERROR: clGetKernelSubGroupInfoKHR function not available");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
|
||||
sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
|
||||
test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR");
|
||||
subgroup_size = (int)tmp;
|
||||
|
||||
error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
|
||||
sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
|
||||
test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR");
|
||||
num_subgroups = (int)tmp;
|
||||
|
||||
// Make sure the number of sub groups is what we expect
|
||||
if (num_subgroups != (local + subgroup_size - 1)/ subgroup_size) {
|
||||
log_error("ERROR: unexpected number of subgroups (%d) returned by clGetKernelSubGroupInfoKHR\n", num_subgroups);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<Ty> idata;
|
||||
std::vector<Ty> odata;
|
||||
size_t input_array_size = GSIZE;
|
||||
size_t output_array_size = GSIZE;
|
||||
|
||||
if (dynscl != 0) {
|
||||
input_array_size = (int)global / (int)local * num_subgroups * dynscl;
|
||||
output_array_size = (int)global / (int)local * dynscl;
|
||||
}
|
||||
|
||||
idata.resize(input_array_size);
|
||||
odata.resize(output_array_size);
|
||||
|
||||
// Run the kernel once on zeroes to get the map
|
||||
memset(&idata[0], 0, input_array_size * sizeof(Ty));
|
||||
error = run_kernel(context, queue, kernel, global, local,
|
||||
&idata[0], input_array_size * sizeof(Ty),
|
||||
sgmap, global*sizeof(cl_int)*2,
|
||||
&odata[0], output_array_size * sizeof(Ty),
|
||||
TSIZE*sizeof(Ty));
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
// Generate the desired input for the kernel
|
||||
Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local, (int)global / (int)local);
|
||||
|
||||
error = run_kernel(context, queue, kernel, global, local,
|
||||
&idata[0], input_array_size * sizeof(Ty),
|
||||
sgmap, global*sizeof(cl_int)*2,
|
||||
&odata[0], output_array_size * sizeof(Ty),
|
||||
TSIZE*sizeof(Ty));
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
|
||||
// Check the result
|
||||
return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap, subgroup_size, (int)local, (int)global / (int)local);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
147
test_conformance/subgroups/test_barrier.cpp
Normal file
147
test_conformance/subgroups/test_barrier.cpp
Normal file
@@ -0,0 +1,147 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "subhelpers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
static const char * lbar_source =
|
||||
"__kernel void test_lbar(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" __local int tmp[200];\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" int nid = get_sub_group_size();\n"
|
||||
" int lid = get_sub_group_local_id();\n"
|
||||
" xy[gid].x = lid;\n"
|
||||
" xy[gid].y = get_sub_group_id();\n"
|
||||
" if (get_sub_group_id() == 0) {\n"
|
||||
" tmp[lid] = in[gid];\n"
|
||||
" sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" out[gid] = tmp[nid-1-lid];\n"
|
||||
" } else {\n"
|
||||
" out[gid] = -in[gid];\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
static const char * gbar_source =
|
||||
"__kernel void test_gbar(const __global Type *in, __global int2 *xy, __global Type *out, __global Type *tmp)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" int nid = get_sub_group_size();\n"
|
||||
" int lid = get_sub_group_local_id();\n"
|
||||
" int tof = get_group_id(0)*get_max_sub_group_size();\n"
|
||||
" xy[gid].x = lid;\n"
|
||||
" xy[gid].y = get_sub_group_id();\n"
|
||||
" if (get_sub_group_id() == 0) {\n"
|
||||
" tmp[tof+lid] = in[gid];\n"
|
||||
" sub_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
" out[gid] = tmp[tof+nid-1-lid];\n"
|
||||
" } else {\n"
|
||||
" out[gid] = -in[gid];\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
// barrier test functions
|
||||
template <int Which>
|
||||
struct BAR {
|
||||
static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int nj = (nw + ns - 1)/ns;
|
||||
int e;
|
||||
|
||||
ii = 0;
|
||||
for (k=0; k<ng; ++k) {
|
||||
for (j=0; j<nj; ++j) {
|
||||
ii = j*ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
for (i=0;i<n;++i)
|
||||
t[ii+i] = genrand_int32(gMTdata);
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (j=0;j<nw;++j) {
|
||||
i = m[2*j+1]*ns + m[2*j];
|
||||
x[j] = t[i];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 2*nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nj = (nw + ns - 1)/ns;
|
||||
cl_int tr, rr;
|
||||
|
||||
if (Which == 0)
|
||||
log_info(" sub_group_barrier(CLK_LOCAL_MEM_FENCE)...\n");
|
||||
else
|
||||
log_info(" sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...\n");
|
||||
|
||||
for (k=0; k<ng; ++k) {
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j=0; j<nw; ++j) {
|
||||
i = m[2*j+1]*ns + m[2*j];
|
||||
mx[i] = x[j];
|
||||
my[i] = y[j];
|
||||
}
|
||||
|
||||
for (j=0; j<nj; ++j) {
|
||||
ii = j*ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
for (i=0; i<n; ++i) {
|
||||
tr = j == 0 ? mx[ii + n - 1 - i] : -mx[ii + i];
|
||||
rr = my[ii + i];
|
||||
|
||||
if (tr != rr) {
|
||||
log_error("ERROR: sub_group_barrier mismatch for local id %d in sub group %d in group %d\n",
|
||||
i, j, k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 2*nw;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Entry point from main
|
||||
int
|
||||
test_barrier_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
|
||||
// Adjust these individually below if desired/needed
|
||||
#define G 2000
|
||||
#define L 200
|
||||
|
||||
error = test<cl_int, BAR<0>, G, L>::run(device, context, queue, num_elements, "test_lbar", lbar_source);
|
||||
error = test<cl_int, BAR<1>, G, L, G>::run(device, context, queue, num_elements, "test_gbar", gbar_source);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
136
test_conformance/subgroups/test_queries.cpp
Normal file
136
test_conformance/subgroups/test_queries.cpp
Normal file
@@ -0,0 +1,136 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
|
||||
typedef struct {
|
||||
cl_uint maxSubGroupSize;
|
||||
cl_uint numSubGroups;
|
||||
} result_data;
|
||||
|
||||
static const char * query_kernel_source =
|
||||
"#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
|
||||
"\n"
|
||||
"typedef struct {\n"
|
||||
" uint maxSubGroupSize;\n"
|
||||
" uint numSubGroups;\n"
|
||||
"} result_data;\n"
|
||||
"\n"
|
||||
"__kernel void query_kernel( __global result_data *outData )\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id( 0 );\n"
|
||||
" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
|
||||
" outData[gid].numSubGroups = get_num_sub_groups();\n"
|
||||
"}";
|
||||
|
||||
int
|
||||
test_sub_group_info(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
static const size_t gsize0 = 80;
|
||||
int i, error;
|
||||
size_t realSize;
|
||||
size_t kernel_max_subgroup_size, kernel_subgroup_count;
|
||||
size_t global[] = {gsize0,14,10};
|
||||
size_t local[] = {0,0,0};
|
||||
result_data result[gsize0];
|
||||
|
||||
cl_uint max_dimensions;
|
||||
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dimensions), &max_dimensions, NULL);
|
||||
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
|
||||
|
||||
cl_platform_id platform;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper out;
|
||||
|
||||
error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &query_kernel_source, "query_kernel", "-cl-std=CL2.0");
|
||||
if (error != 0)
|
||||
return error;
|
||||
|
||||
// Determine some local dimensions to use for the test.
|
||||
if (max_dimensions == 1) {
|
||||
error = get_max_common_work_group_size(context, kernel, global[0], &local[0]);
|
||||
test_error(error, "get_max_common_work_group_size failed");
|
||||
} else if (max_dimensions == 2) {
|
||||
error = get_max_common_2D_work_group_size(context, kernel, global, local);
|
||||
test_error(error, "get_max_common_2D_work_group_size failed");
|
||||
} else {
|
||||
error = get_max_common_3D_work_group_size(context, kernel, global, local);
|
||||
test_error(error, "get_max_common_3D_work_group_size failed");
|
||||
}
|
||||
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL);
|
||||
test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM");
|
||||
|
||||
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr;
|
||||
clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clGetKernelSubGroupInfoKHR");
|
||||
if (clGetKernelSubGroupInfoKHR_ptr == NULL) {
|
||||
log_error("ERROR: clGetKernelSubGroupInfoKHR function not available");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
|
||||
sizeof(local), (void *)&local, sizeof(kernel_max_subgroup_size), (void *)&kernel_max_subgroup_size, &realSize);
|
||||
test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR");
|
||||
log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR for the kernel is %d.\n", (int)kernel_max_subgroup_size);
|
||||
|
||||
if (realSize != sizeof(kernel_max_subgroup_size)) {
|
||||
log_error( "ERROR: Returned size of max sub group size not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_max_subgroup_size), (int)realSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
|
||||
sizeof(local), (void *)&local, sizeof(kernel_subgroup_count), (void *)&kernel_subgroup_count, &realSize);
|
||||
test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR");
|
||||
log_info("The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR for the kernel is %d.\n", (int)kernel_subgroup_count);
|
||||
|
||||
if (realSize != sizeof(kernel_subgroup_count)) {
|
||||
log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Verify that the kernel gets the same max_subgroup_size and subgroup_count
|
||||
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(result), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(out), &out);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, max_dimensions, NULL, global, local, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), &result, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
for (i=0; i<(int)gsize0; ++i) {
|
||||
if (result[i].maxSubGroupSize != (cl_uint)kernel_max_subgroup_size) {
|
||||
log_error("ERROR: get_max_subgroup_size() doesn't match result from clGetKernelSubGroupInfoKHR, %u vs %u\n",
|
||||
result[i].maxSubGroupSize, (cl_uint)kernel_max_subgroup_size);
|
||||
return -1;
|
||||
}
|
||||
if (result[i].numSubGroups != (cl_uint)kernel_subgroup_count) {
|
||||
log_error("ERROR: get_num_sub_groups() doesn't match result from clGetKernelSubGroupInfoKHR, %u vs %u\n",
|
||||
result[i].numSubGroups, (cl_uint)kernel_subgroup_count);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
867
test_conformance/subgroups/test_workgroup.cpp
Normal file
867
test_conformance/subgroups/test_workgroup.cpp
Normal file
@@ -0,0 +1,867 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "subhelpers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
static const char * any_source =
|
||||
"__kernel void test_any(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_any(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char * all_source =
|
||||
"__kernel void test_all(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_all(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char * bcast_source =
|
||||
"__kernel void test_bcast(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" Type x = in[gid];\n"
|
||||
" size_t loid = (size_t)((int)x % 100);\n"
|
||||
" out[gid] = sub_group_broadcast(x, loid);\n"
|
||||
"}\n";
|
||||
|
||||
static const char * redadd_source =
|
||||
"__kernel void test_redadd(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_reduce_add(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char * redmax_source =
|
||||
"__kernel void test_redmax(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_reduce_max(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char * redmin_source =
|
||||
"__kernel void test_redmin(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_reduce_min(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char * scinadd_source =
|
||||
"__kernel void test_scinadd(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_inclusive_add(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char * scinmax_source =
|
||||
"__kernel void test_scinmax(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_inclusive_max(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char * scinmin_source =
|
||||
"__kernel void test_scinmin(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_inclusive_min(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char * scexadd_source =
|
||||
"__kernel void test_scexadd(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_exclusive_add(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char * scexmax_source =
|
||||
"__kernel void test_scexmax(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_exclusive_max(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
static const char * scexmin_source =
|
||||
"__kernel void test_scexmin(const __global Type *in, __global int2 *xy, __global Type *out)\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id(0);\n"
|
||||
" XY(xy,gid);\n"
|
||||
" out[gid] = sub_group_scan_exclusive_min(in[gid]);\n"
|
||||
"}\n";
|
||||
|
||||
// These need to stay in sync with the kernel source below
|
||||
#define NUM_LOC 49
|
||||
#define INST_LOC_MASK 0x7f
|
||||
#define INST_OP_SHIFT 0
|
||||
#define INST_OP_MASK 0xf
|
||||
#define INST_LOC_SHIFT 4
|
||||
#define INST_VAL_SHIFT 12
|
||||
#define INST_VAL_MASK 0x7ffff
|
||||
#define INST_END 0x0
|
||||
#define INST_STORE 0x1
|
||||
#define INST_WAIT 0x2
|
||||
#define INST_COUNT 0x3
|
||||
|
||||
static const char * ifp_source =
|
||||
"#define NUM_LOC 49\n"
|
||||
"#define INST_LOC_MASK 0x7f\n"
|
||||
"#define INST_OP_SHIFT 0\n"
|
||||
"#define INST_OP_MASK 0xf\n"
|
||||
"#define INST_LOC_SHIFT 4\n"
|
||||
"#define INST_VAL_SHIFT 12\n"
|
||||
"#define INST_VAL_MASK 0x7ffff\n"
|
||||
"#define INST_END 0x0\n"
|
||||
"#define INST_STORE 0x1\n"
|
||||
"#define INST_WAIT 0x2\n"
|
||||
"#define INST_COUNT 0x3\n"
|
||||
"\n"
|
||||
"__kernel void\n"
|
||||
"test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n"
|
||||
"{\n"
|
||||
" __local atomic_int loc[NUM_LOC];\n"
|
||||
"\n"
|
||||
" // Don't run if there is only one sub group\n"
|
||||
" if (get_num_sub_groups() == 1)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" // First initialize loc[]\n"
|
||||
" int lid = (int)get_local_id(0);\n"
|
||||
"\n"
|
||||
" if (lid < NUM_LOC)\n"
|
||||
" atomic_init(loc+lid, 0);\n"
|
||||
"\n"
|
||||
" work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" // Compute pointer to this sub group's \"instructions\"\n"
|
||||
" const __global int *pc = in +\n"
|
||||
" ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n"
|
||||
" (int)get_sub_group_id()) *\n"
|
||||
" (NUM_LOC+1);\n"
|
||||
"\n"
|
||||
" // Set up to \"run\"\n"
|
||||
" bool ok = (int)get_sub_group_local_id() == 0;\n"
|
||||
" bool run = true;\n"
|
||||
"\n"
|
||||
" while (run) {\n"
|
||||
" int inst = *pc++;\n"
|
||||
" int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n"
|
||||
" int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n"
|
||||
" int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n"
|
||||
"\n"
|
||||
" switch (iop) {\n"
|
||||
" case INST_STORE:\n"
|
||||
" if (ok)\n"
|
||||
" atomic_store(loc+iloc, ival);\n"
|
||||
" break;\n"
|
||||
" case INST_WAIT:\n"
|
||||
" if (ok) {\n"
|
||||
" while (atomic_load(loc+iloc) != ival)\n"
|
||||
" ;\n"
|
||||
" }\n"
|
||||
" break;\n"
|
||||
" case INST_COUNT:\n"
|
||||
" if (ok) {\n"
|
||||
" int i;\n"
|
||||
" for (i=0;i<ival;++i)\n"
|
||||
" atomic_fetch_add(loc+iloc, 1);\n"
|
||||
" }\n"
|
||||
" break;\n"
|
||||
" case INST_END:\n"
|
||||
" run = false;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" // Save this group's result\n"
|
||||
" __global int *op = out + (int)get_group_id(0)*NUM_LOC;\n"
|
||||
" if (lid < NUM_LOC)\n"
|
||||
" op[lid] = atomic_load(loc+lid);\n"
|
||||
"}\n";
|
||||
|
||||
// Any/All test functions
|
||||
template <int Which>
|
||||
struct AA {
|
||||
static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int nj = (nw + ns - 1)/ns;
|
||||
int e;
|
||||
|
||||
ii = 0;
|
||||
for (k=0; k<ng; ++k) {
|
||||
for (j=0; j<nj; ++j) {
|
||||
ii = j*ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
e = (int)(genrand_int32(gMTdata) % 3);
|
||||
|
||||
// Initialize data matrix indexed by local id and sub group id
|
||||
switch (e) {
|
||||
case 0:
|
||||
memset(&t[ii], 0, n*sizeof(cl_int));
|
||||
break;
|
||||
case 1:
|
||||
memset(&t[ii], 0, n*sizeof(cl_int));
|
||||
i = (int)(genrand_int32(gMTdata) % (cl_uint)n);
|
||||
t[ii + i] = 41;
|
||||
break;
|
||||
case 2:
|
||||
memset(&t[ii], 0xff, n*sizeof(cl_int));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (j=0;j<nw;++j) {
|
||||
i = m[2*j+1]*ns + m[2*j];
|
||||
x[j] = t[i];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 2*nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nj = (nw + ns - 1)/ns;
|
||||
cl_int taa, raa;
|
||||
|
||||
log_info(" sub_group_%s...\n", Which == 0 ? "any" : "all");
|
||||
|
||||
for (k=0; k<ng; ++k) {
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j=0; j<nw; ++j) {
|
||||
i = m[2*j+1]*ns + m[2*j];
|
||||
mx[i] = x[j];
|
||||
my[i] = y[j];
|
||||
}
|
||||
|
||||
for (j=0; j<nj; ++j) {
|
||||
ii = j*ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
// Compute target
|
||||
if (Which == 0) {
|
||||
taa = 0;
|
||||
for (i=0; i<n; ++i)
|
||||
taa |= mx[ii + i] != 0;
|
||||
} else {
|
||||
taa = 1;
|
||||
for (i=0; i<n; ++i)
|
||||
taa &= mx[ii + i] != 0;
|
||||
}
|
||||
|
||||
// Check result
|
||||
for (i=0; i<n; ++i) {
|
||||
raa = my[ii+i] != 0;
|
||||
if (raa != taa) {
|
||||
log_error("ERROR: sub_group_%s mismatch for local id %d in sub group %d in group %d\n",
|
||||
Which == 0 ? "any" : "all", i, j, k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 2*nw;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Reduce functions
|
||||
template <typename Ty, int Which>
|
||||
struct RED {
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int nj = (nw + ns - 1)/ns;
|
||||
|
||||
ii = 0;
|
||||
for (k=0; k<ng; ++k) {
|
||||
for (j=0; j<nj; ++j) {
|
||||
ii = j*ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
for (i=0; i<n; ++i)
|
||||
t[ii+i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1);
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (j=0;j<nw;++j) {
|
||||
i = m[2*j+1]*ns + m[2*j];
|
||||
x[j] = t[i];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 2*nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nj = (nw + ns - 1)/ns;
|
||||
Ty tr, rr;
|
||||
|
||||
log_info(" sub_group_reduce_%s(%s)...\n", Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName<Ty>::val());
|
||||
|
||||
for (k=0; k<ng; ++k) {
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j=0; j<nw; ++j) {
|
||||
i = m[2*j+1]*ns + m[2*j];
|
||||
mx[i] = x[j];
|
||||
my[i] = y[j];
|
||||
}
|
||||
|
||||
for (j=0; j<nj; ++j) {
|
||||
ii = j*ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
// Compute target
|
||||
if (Which == 0) {
|
||||
// add
|
||||
tr = mx[ii];
|
||||
for (i=1; i<n; ++i)
|
||||
tr += mx[ii + i];
|
||||
} else if (Which == 1) {
|
||||
// max
|
||||
tr = mx[ii];
|
||||
for (i=1; i<n; ++i)
|
||||
tr = tr > mx[ii + i] ? tr : mx[ii + i];
|
||||
} else if (Which == 2) {
|
||||
// min
|
||||
tr = mx[ii];
|
||||
for (i=1; i<n; ++i)
|
||||
tr = tr > mx[ii + i] ? mx[ii + i] : tr;
|
||||
}
|
||||
|
||||
// Check result
|
||||
for (i=0; i<n; ++i) {
|
||||
rr = my[ii+i];
|
||||
if (rr != tr) {
|
||||
log_error("ERROR: sub_group_reduce_%s(%s) mismatch for local id %d in sub group %d in group %d\n",
|
||||
Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName<Ty>::val(), i, j, k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 2*nw;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Scan Inclusive functions
|
||||
template <typename Ty, int Which>
|
||||
struct SCIN {
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int nj = (nw + ns - 1)/ns;
|
||||
|
||||
ii = 0;
|
||||
for (k=0; k<ng; ++k) {
|
||||
for (j=0; j<nj; ++j) {
|
||||
ii = j*ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
for (i=0; i<n; ++i)
|
||||
// t[ii+i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1);
|
||||
t[ii+i] = (Ty)i;
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (j=0;j<nw;++j) {
|
||||
i = m[2*j+1]*ns + m[2*j];
|
||||
x[j] = t[i];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 2*nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nj = (nw + ns - 1)/ns;
|
||||
Ty tr, rr;
|
||||
|
||||
log_info(" sub_group_scan_inclusive_%s(%s)...\n", Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName<Ty>::val());
|
||||
|
||||
for (k=0; k<ng; ++k) {
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j=0; j<nw; ++j) {
|
||||
i = m[2*j+1]*ns + m[2*j];
|
||||
mx[i] = x[j];
|
||||
my[i] = y[j];
|
||||
}
|
||||
|
||||
for (j=0; j<nj; ++j) {
|
||||
ii = j*ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
// Check result
|
||||
for (i=0; i<n; ++i) {
|
||||
if (Which == 0) {
|
||||
tr = i == 0 ? mx[ii] : tr + mx[ii + i];
|
||||
} else if (Which == 1) {
|
||||
tr = i == 0 ? mx[ii] : (tr > mx[ii + i] ? tr : mx[ii + i]);
|
||||
} else {
|
||||
tr = i == 0 ? mx[ii] : (tr > mx[ii + i] ? mx[ii + i] : tr);
|
||||
}
|
||||
|
||||
rr = my[ii+i];
|
||||
if (rr != tr) {
|
||||
log_error("ERROR: sub_group_scan_inclusive_%s(%s) mismatch for local id %d in sub group %d in group %d\n",
|
||||
Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName<Ty>::val(), i, j, k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 2*nw;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Scan Exclusive functions
|
||||
template <typename Ty, int Which>
|
||||
struct SCEX {
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int i, ii, j, k, n;
|
||||
int nj = (nw + ns - 1)/ns;
|
||||
|
||||
ii = 0;
|
||||
for (k=0; k<ng; ++k) {
|
||||
for (j=0; j<nj; ++j) {
|
||||
ii = j*ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
for (i=0; i<n; ++i)
|
||||
t[ii+i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1);
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (j=0;j<nw;++j) {
|
||||
i = m[2*j+1]*ns + m[2*j];
|
||||
x[j] = t[i];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 2*nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int ii, i, j, k, n;
|
||||
int nj = (nw + ns - 1)/ns;
|
||||
Ty tr, trt, rr;
|
||||
|
||||
log_info(" sub_group_scan_exclusive_%s(%s)...\n", Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName<Ty>::val());
|
||||
|
||||
for (k=0; k<ng; ++k) {
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j=0; j<nw; ++j) {
|
||||
i = m[2*j+1]*ns + m[2*j];
|
||||
mx[i] = x[j];
|
||||
my[i] = y[j];
|
||||
}
|
||||
|
||||
for (j=0; j<nj; ++j) {
|
||||
ii = j*ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
|
||||
// Check result
|
||||
for (i=0; i<n; ++i) {
|
||||
if (Which == 0) {
|
||||
tr = i == 0 ? TypeIdentity<Ty,Which>::val() : tr + trt;
|
||||
} else if (Which == 1) {
|
||||
tr = i == 0 ? TypeIdentity<Ty,Which>::val() : (trt > tr ? trt : tr);
|
||||
} else {
|
||||
tr = i == 0 ? TypeIdentity<Ty,Which>::val() : (trt > tr ? tr : trt);
|
||||
}
|
||||
trt = mx[ii+i];
|
||||
rr = my[ii+i];
|
||||
|
||||
if (rr != tr) {
|
||||
log_error("ERROR: sub_group_scan_exclusive_%s(%s) mismatch for local id %d in sub group %d in group %d\n",
|
||||
Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName<Ty>::val(), i, j, k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 2*nw;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Broadcast functios
|
||||
template <typename Ty>
|
||||
struct BC {
|
||||
static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int i, ii, j, k, l, n;
|
||||
int nj = (nw + ns - 1)/ns;
|
||||
int d = ns > 100 ? 100 : ns;
|
||||
|
||||
ii = 0;
|
||||
for (k=0; k<ng; ++k) {
|
||||
for (j=0; j<nj; ++j) {
|
||||
ii = j*ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
l = (int)(genrand_int32(gMTdata) & 0x7fffffff) % (d > n ? n : d);
|
||||
|
||||
for (i=0; i<n; ++i)
|
||||
t[ii+i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff) % 100 * 100 + l);
|
||||
}
|
||||
|
||||
// Now map into work group using map from device
|
||||
for (j=0;j<nw;++j) {
|
||||
i = m[2*j+1]*ns + m[2*j];
|
||||
x[j] = t[i];
|
||||
}
|
||||
|
||||
x += nw;
|
||||
m += 2*nw;
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw, int ng)
|
||||
{
|
||||
int ii, i, j, k, l, n;
|
||||
int nj = (nw + ns - 1)/ns;
|
||||
Ty tr, rr;
|
||||
|
||||
log_info(" sub_group_broadcast(%s)...\n", TypeName<Ty>::val());
|
||||
|
||||
for (k=0; k<ng; ++k) {
|
||||
// Map to array indexed to array indexed by local ID and sub group
|
||||
for (j=0; j<nw; ++j) {
|
||||
i = m[2*j+1]*ns + m[2*j];
|
||||
mx[i] = x[j];
|
||||
my[i] = y[j];
|
||||
}
|
||||
|
||||
for (j=0; j<nj; ++j) {
|
||||
ii = j*ns;
|
||||
n = ii + ns > nw ? nw - ii : ns;
|
||||
l = (int)mx[ii] % 100;
|
||||
tr = mx[ii+l];
|
||||
|
||||
// Check result
|
||||
for (i=0; i<n; ++i) {
|
||||
rr = my[ii+i];
|
||||
if (rr != tr) {
|
||||
log_error("ERROR: sub_group_broadcast(%s) mismatch for local id %d in sub group %d in group %d\n",
|
||||
TypeName<Ty>::val(), i, j, k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x += nw;
|
||||
y += nw;
|
||||
m += 2*nw;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Independent forward progress stuff
|
||||
// Note:
|
||||
// Output needs num_groups * NUM_LOC elements
|
||||
// local_size must be > NUM_LOC
|
||||
// Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements
|
||||
|
||||
static inline int
|
||||
inst(int op, int loc, int val)
|
||||
{
|
||||
return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT) | (op << INST_OP_SHIFT);
|
||||
}
|
||||
|
||||
void gen_insts(cl_int *x, cl_int *p, int n)
|
||||
{
|
||||
int i, j0, j1;
|
||||
int val;
|
||||
int ii[NUM_LOC];
|
||||
|
||||
// Create a random permutation of 0...NUM_LOC-1
|
||||
ii[0] = 0;
|
||||
for (i=1; i<NUM_LOC;++i) {
|
||||
j0 = random_in_range(0, i, gMTdata);
|
||||
if (j0 != i)
|
||||
ii[i] = ii[j0];
|
||||
ii[j0] = i;
|
||||
}
|
||||
|
||||
// Initialize "instruction pointers"
|
||||
memset(p, 0, n*4);
|
||||
|
||||
for (i=0; i<NUM_LOC; ++i) {
|
||||
// Randomly choose 2 different sub groups
|
||||
// One does a random amount of work, and the other waits for it
|
||||
j0 = random_in_range(0, n-1, gMTdata);
|
||||
|
||||
do
|
||||
j1 = random_in_range(0, n-1, gMTdata);
|
||||
while (j1 == j0);
|
||||
|
||||
// Randomly choose a wait value and assign "instructions"
|
||||
val = random_in_range(100, 200 + 10*NUM_LOC, gMTdata);
|
||||
x[j0*(NUM_LOC+1) + p[j0]] = inst(INST_COUNT, ii[i], val);
|
||||
x[j1*(NUM_LOC+1) + p[j1]] = inst(INST_WAIT, ii[i], val);
|
||||
++p[j0];
|
||||
++p[j1];
|
||||
}
|
||||
|
||||
// Last "inst" for each sub group is END
|
||||
for (i=0; i<n; ++i)
|
||||
x[i*(NUM_LOC+1) + p[i]] = inst(INST_END, 0, 0);
|
||||
}
|
||||
|
||||
// Execute one group's "instructions"
|
||||
void run_insts(cl_int *x, cl_int *p, int n)
|
||||
{
|
||||
int i, nend;
|
||||
bool scont;
|
||||
cl_int loc[NUM_LOC];
|
||||
|
||||
// Initialize result and "instruction pointers"
|
||||
memset(loc, 0, sizeof(loc));
|
||||
memset(p, 0, 4*n);
|
||||
|
||||
// Repetitively loop over subgroups with each executing "instructions" until blocked
|
||||
// The loop terminates when all subgroups have hit the "END instruction"
|
||||
do {
|
||||
nend = 0;
|
||||
for (i=0; i<n; ++i) {
|
||||
do {
|
||||
cl_int inst = x[i*(NUM_LOC+1) + p[i]];
|
||||
cl_int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;
|
||||
cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;
|
||||
cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;
|
||||
scont = false;
|
||||
|
||||
switch (iop) {
|
||||
case INST_STORE:
|
||||
loc[iloc] = ival;
|
||||
++p[i];
|
||||
scont = true;
|
||||
break;
|
||||
case INST_WAIT:
|
||||
if (loc[iloc] == ival) {
|
||||
++p[i];
|
||||
scont = true;
|
||||
}
|
||||
break;
|
||||
case INST_COUNT:
|
||||
loc[iloc] += ival;
|
||||
++p[i];
|
||||
scont = true;
|
||||
break;
|
||||
case INST_END:
|
||||
++nend;
|
||||
break;
|
||||
}
|
||||
} while (scont);
|
||||
}
|
||||
} while (nend < n);
|
||||
|
||||
// Return result, reusing "p"
|
||||
memcpy(p, loc, sizeof(loc));
|
||||
}
|
||||
|
||||
|
||||
struct IFP {
|
||||
static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng)
|
||||
{
|
||||
int k;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
|
||||
// We need at least 2 sub groups per group for this test
|
||||
if (nj == 1)
|
||||
return;
|
||||
|
||||
for (k=0; k<ng; ++k) {
|
||||
gen_insts(x, t, nj);
|
||||
x += nj * (NUM_LOC+1);
|
||||
}
|
||||
}
|
||||
|
||||
static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns, int nw, int ng)
|
||||
{
|
||||
int i, k;
|
||||
int nj = (nw + ns - 1) / ns;
|
||||
|
||||
// We need at least 2 sub groups per group for this tes
|
||||
if (nj == 1)
|
||||
return 0;
|
||||
|
||||
log_info(" independent forward progress...\n");
|
||||
|
||||
for (k=0; k<ng; ++k) {
|
||||
run_insts(x, t, nj);
|
||||
for (i=0; i<NUM_LOC; ++i) {
|
||||
if (t[i] != y[i]) {
|
||||
log_error("ERROR: mismatch at element %d in work group %d\n", i, k);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
x += nj * (NUM_LOC+1);
|
||||
y += NUM_LOC;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Entry point from main
|
||||
int
|
||||
test_work_group_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
|
||||
// Adjust these individually below if desired/needed
|
||||
#define G 2000
|
||||
#define L 200
|
||||
|
||||
error = test<int, AA<0>, G, L>::run(device, context, queue, num_elements, "test_any", any_source);
|
||||
error |= test<int, AA<1>, G, L>::run(device, context, queue, num_elements, "test_all", all_source);
|
||||
|
||||
// error |= test<cl_half, BC<cl_half>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
error |= test<cl_uint, BC<cl_uint>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
error |= test<cl_int, BC<cl_int>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
error |= test<cl_ulong, BC<cl_ulong>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
error |= test<cl_long, BC<cl_long>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
error |= test<float, BC<float>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
error |= test<double, BC<double>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
|
||||
|
||||
// error |= test<cl_half, RED<cl_half,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
error |= test<cl_uint, RED<cl_uint,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
error |= test<cl_int, RED<cl_int,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
error |= test<cl_ulong, RED<cl_ulong,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
error |= test<cl_long, RED<cl_long,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
error |= test<float, RED<float,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
error |= test<double, RED<double,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
|
||||
|
||||
// error |= test<cl_half, RED<cl_half,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
error |= test<cl_uint, RED<cl_uint,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
error |= test<cl_int, RED<cl_int,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
error |= test<cl_ulong, RED<cl_ulong,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
error |= test<cl_long, RED<cl_long,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
error |= test<float, RED<float,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
error |= test<double, RED<double,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
|
||||
|
||||
// error |= test<cl_half, RED<cl_half,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
error |= test<cl_uint, RED<cl_uint,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
error |= test<cl_int, RED<cl_int,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
error |= test<cl_ulong, RED<cl_ulong,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
error |= test<cl_long, RED<cl_long,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
error |= test<float, RED<float,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
error |= test<double, RED<double,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
|
||||
|
||||
// error |= test<cl_half, SCIN<cl_half,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
error |= test<cl_uint, SCIN<cl_uint,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
error |= test<cl_int, SCIN<cl_int,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
error |= test<cl_ulong, SCIN<cl_ulong,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
error |= test<cl_long, SCIN<cl_long,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
error |= test<float, SCIN<float,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
error |= test<double, SCIN<double,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
|
||||
|
||||
// error |= test<cl_half, SCIN<cl_half,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
error |= test<cl_uint, SCIN<cl_uint,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
error |= test<cl_int, SCIN<cl_int,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
error |= test<cl_ulong, SCIN<cl_ulong,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
error |= test<cl_long, SCIN<cl_long,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
error |= test<float, SCIN<float,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
error |= test<double, SCIN<double,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
|
||||
|
||||
// error |= test<cl_half, SCIN<cl_half,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
error |= test<cl_uint, SCIN<cl_uint,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
error |= test<cl_int, SCIN<cl_int,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
error |= test<cl_ulong, SCIN<cl_ulong,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
error |= test<cl_long, SCIN<cl_long,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
error |= test<float, SCIN<float,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
error |= test<double, SCIN<double,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
|
||||
|
||||
// error |= test<cl_half, SCEX<cl_half,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
error |= test<cl_uint, SCEX<cl_uint,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
error |= test<cl_int, SCEX<cl_int,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
error |= test<cl_ulong, SCEX<cl_ulong,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
error |= test<cl_long, SCEX<cl_long,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
error |= test<float, SCEX<float,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
error |= test<double, SCEX<double,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
|
||||
|
||||
// error |= test<cl_half, SCEX<cl_half,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
error |= test<cl_uint, SCEX<cl_uint,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
error |= test<cl_int, SCEX<cl_int,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
error |= test<cl_ulong, SCEX<cl_ulong,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
error |= test<cl_long, SCEX<cl_long,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
error |= test<float, SCEX<float,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
error |= test<double, SCEX<double,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
|
||||
|
||||
// error |= test<cl_half, SCEX<cl_half,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
error |= test<cl_uint, SCEX<cl_uint,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
error |= test<cl_int, SCEX<cl_int,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
error |= test<cl_ulong, SCEX<cl_ulong,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
error |= test<cl_long, SCEX<cl_long,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
error |= test<float, SCEX<float,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
error |= test<double, SCEX<double,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
|
||||
|
||||
error |= test<cl_int, IFP, G, L>::run(device, context, queue, num_elements, "test_ifp", ifp_source, NUM_LOC + 1);
|
||||
return error;
|
||||
}
|
||||
|
||||
252
test_conformance/subgroups/test_workitem.cpp
Normal file
252
test_conformance/subgroups/test_workitem.cpp
Normal file
@@ -0,0 +1,252 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
struct get_test_data {
|
||||
cl_uint subGroupSize;
|
||||
cl_uint maxSubGroupSize;
|
||||
cl_uint numSubGroups;
|
||||
cl_uint enqNumSubGroups;
|
||||
cl_uint subGroupId;
|
||||
cl_uint subGroupLocalId;
|
||||
bool operator==(get_test_data x) {
|
||||
return subGroupSize == x.subGroupSize &&
|
||||
maxSubGroupSize == x.maxSubGroupSize &&
|
||||
numSubGroups == x.numSubGroups &&
|
||||
subGroupId == x.subGroupId &&
|
||||
subGroupLocalId == x.subGroupLocalId;
|
||||
}
|
||||
};
|
||||
|
||||
static const char * get_test_source =
|
||||
"#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
|
||||
"\n"
|
||||
"typedef struct {\n"
|
||||
" uint subGroupSize;\n"
|
||||
" uint maxSubGroupSize;\n"
|
||||
" uint numSubGroups;\n"
|
||||
" uint enqNumSubGroups;\n"
|
||||
" uint subGroupId;\n"
|
||||
" uint subGroupLocalId;\n"
|
||||
"} get_test_data;\n"
|
||||
"\n"
|
||||
"__kernel void get_test( __global get_test_data *outData )\n"
|
||||
"{\n"
|
||||
" int gid = get_global_id( 0 );\n"
|
||||
" outData[gid].subGroupSize = get_sub_group_size();\n"
|
||||
" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
|
||||
" outData[gid].numSubGroups = get_num_sub_groups();\n"
|
||||
" outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
|
||||
" outData[gid].subGroupId = get_sub_group_id();\n"
|
||||
" outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
|
||||
"}";
|
||||
|
||||
static int
|
||||
check_group(const get_test_data *result, int nw, cl_uint ensg, int maxwgs)
|
||||
{
|
||||
int first = -1;
|
||||
int last = -1;
|
||||
int i, j;
|
||||
cl_uint hit[32];
|
||||
|
||||
for (i=0; i<nw; ++i) {
|
||||
if (result[i].subGroupId == 0 && result[i].subGroupLocalId == 0)
|
||||
first = i;
|
||||
if (result[i].subGroupId == result[0].numSubGroups-1 && result[i].subGroupLocalId == 0)
|
||||
last = i;
|
||||
if (first != -1 && last != -1)
|
||||
break;
|
||||
}
|
||||
|
||||
if (first == -1 || last == -1) {
|
||||
log_error("ERROR: expected sub group id's are missing\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check them
|
||||
if (result[first].subGroupSize == 0) {
|
||||
log_error("ERROR: get_sub_group_size() returned 0\n");
|
||||
return -1;
|
||||
}
|
||||
if (result[first].maxSubGroupSize == 0 || result[first].maxSubGroupSize > maxwgs) {
|
||||
log_error("ERROR: get_max_subgroup_size() returned incorrect result: %u\n", result[first].maxSubGroupSize);
|
||||
return -1;
|
||||
}
|
||||
if (result[first].subGroupSize > result[first].maxSubGroupSize) {
|
||||
log_error("ERROR: get_sub_group_size() > get_max_sub_group_size()\n");
|
||||
return -1;
|
||||
}
|
||||
if (result[last].subGroupSize > result[first].subGroupSize) {
|
||||
log_error("ERROR: last sub group larger than first sub group\n");
|
||||
return -1;
|
||||
}
|
||||
if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg) {
|
||||
log_error("ERROR: get_num_sub_groups() returned incorrect result: %u \n", result[first].numSubGroups);
|
||||
return -1;
|
||||
}
|
||||
|
||||
memset(hit, 0, sizeof(hit));
|
||||
for (i=0; i<nw; ++i) {
|
||||
if (result[i].maxSubGroupSize != result[first].maxSubGroupSize ||
|
||||
result[i].numSubGroups != result[first].numSubGroups) {
|
||||
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
|
||||
return -1;
|
||||
}
|
||||
if (result[i].subGroupId >= result[first].numSubGroups) {
|
||||
log_error("ERROR: get_sub_group_id() returned out of range value: %u\n", result[i].subGroupId);
|
||||
return -1;
|
||||
}
|
||||
if (result[i].enqNumSubGroups != ensg) {
|
||||
log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect value: %u\n", result[i].enqNumSubGroups);
|
||||
return -1;
|
||||
}
|
||||
if (result[first].numSubGroups > 1) {
|
||||
if (result[i].subGroupId < result[first].numSubGroups-1) {
|
||||
if (result[i].subGroupSize != result[first].subGroupSize) {
|
||||
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
|
||||
return -1;
|
||||
}
|
||||
if (result[i].subGroupLocalId >= result[first].subGroupSize) {
|
||||
log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
if (result[i].subGroupSize != result[last].subGroupSize) {
|
||||
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
|
||||
return -1;
|
||||
}
|
||||
if (result[i].subGroupLocalId >= result[last].subGroupSize) {
|
||||
log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (result[i].subGroupSize != result[first].subGroupSize) {
|
||||
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
|
||||
return -1;
|
||||
}
|
||||
if (result[i].subGroupLocalId >= result[first].subGroupSize) {
|
||||
log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
j = (result[first].subGroupSize + 31)/32 * result[i].subGroupId + (result[i].subGroupLocalId >> 5);
|
||||
if (j < sizeof(hit)/4) {
|
||||
cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU);
|
||||
if ((hit[j] & b) != 0) {
|
||||
log_error("ERROR: get_sub_group_local_id() repeated a result in the same sub group\n");
|
||||
return -1;
|
||||
}
|
||||
hit[j] |= b;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
test_work_item_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
static const size_t lsize = 200;
|
||||
int error;
|
||||
int i, j, k, q, r, nw;
|
||||
int maxwgs;
|
||||
cl_uint ensg;
|
||||
size_t global;
|
||||
size_t local;
|
||||
get_test_data result[lsize*6];
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper out;
|
||||
|
||||
error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &get_test_source, "get_test", "-cl-std=CL2.0");
|
||||
if (error != 0)
|
||||
return error;
|
||||
|
||||
error = get_max_allowed_work_group_size(context, kernel, &local, NULL);
|
||||
if (error != 0)
|
||||
return error;
|
||||
|
||||
maxwgs = (int)local;
|
||||
|
||||
// Limit it a bit so we have muliple work groups
|
||||
// Ideally this will still be large enough to give us multiple subgroups
|
||||
if (local > lsize)
|
||||
local = lsize;
|
||||
|
||||
// Create our buffer
|
||||
out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
// Set argument
|
||||
error = clSetKernelArg(kernel, 0, sizeof(out), &out);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
global = local * 5;
|
||||
|
||||
// Make sure we have a flexible range
|
||||
global += 3 * local / 4;
|
||||
|
||||
// Collect the data
|
||||
memset((void *)&result, 0xf0, sizeof(result));
|
||||
|
||||
error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result), (void *)&result, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), (void *)&result, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
nw = (int)local;
|
||||
ensg = result[0].enqNumSubGroups;
|
||||
|
||||
// Check the first group
|
||||
error = check_group(result, nw, ensg, maxwgs);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
q = (int)global / nw;
|
||||
r = (int)global % nw;
|
||||
|
||||
// Check the remaining work groups including the last if it is the same size
|
||||
for (k=1; k<q; ++k) {
|
||||
for (j=0; j<nw; ++j) {
|
||||
i = k*nw + j;
|
||||
if (!(result[i] == result[i-nw])) {
|
||||
log_error("ERROR: sub group mapping is not identical for all work groups\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check the last group if it wasn't the same size
|
||||
if (r != 0) {
|
||||
error = check_group(result + q*nw, r, ensg, maxwgs);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user