mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-24 15:59:03 +00:00
Initial open source release of OpenCL 2.2 CTS.
This commit is contained in:
@@ -0,0 +1,342 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
|
||||
|
||||
namespace test_sub_group_barrier {
|
||||
|
||||
enum class barrier_type
|
||||
{
|
||||
local,
|
||||
global
|
||||
};
|
||||
|
||||
struct test_options
|
||||
{
|
||||
barrier_type barrier;
|
||||
size_t max_count;
|
||||
size_t num_tests;
|
||||
};
|
||||
|
||||
const std::string source_common = R"(
|
||||
// Circular shift of sub-group local ids
|
||||
size_t get_shifted_local_id(int sub_group_local_id_delta)
|
||||
{
|
||||
const int sub_group_size = (int)get_sub_group_size();
|
||||
return (get_local_id(0) - get_sub_group_local_id()) +
|
||||
(((int)get_sub_group_local_id() + sub_group_local_id_delta) % sub_group_size + sub_group_size) % sub_group_size;
|
||||
}
|
||||
|
||||
// Get global ids from shifted local ids
|
||||
size_t get_shifted_global_id(int sub_group_local_id_delta)
|
||||
{
|
||||
return get_group_id(0) * get_enqueued_local_size(0) + get_shifted_local_id(sub_group_local_id_delta);
|
||||
}
|
||||
)";
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
std::string generate_source(test_options options)
|
||||
{
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
|
||||
)";
|
||||
s << source_common;
|
||||
if (options.barrier == barrier_type::global)
|
||||
{
|
||||
s << R"(
|
||||
kernel void test(const int iter_lo, const int iter_hi, global long *output)
|
||||
{
|
||||
const size_t gid = get_shifted_global_id(0);
|
||||
|
||||
output[gid] = gid;
|
||||
sub_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
|
||||
for (int i = iter_lo; i < iter_hi; i++)
|
||||
{
|
||||
const size_t other_gid = get_shifted_global_id(i);
|
||||
|
||||
output[other_gid] += other_gid;
|
||||
sub_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
|
||||
output[gid] += gid;
|
||||
sub_group_barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
}
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.barrier == barrier_type::local)
|
||||
{
|
||||
s << R"(
|
||||
kernel void test(const int iter_lo, const int iter_hi, global long *output, local long *values)
|
||||
{
|
||||
const size_t gid = get_shifted_global_id(0);
|
||||
const size_t lid = get_shifted_local_id(0);
|
||||
|
||||
values[lid] = gid;
|
||||
sub_group_barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (int i = iter_lo; i < iter_hi; i++)
|
||||
{
|
||||
const size_t other_lid = get_shifted_local_id(i);
|
||||
const size_t other_gid = get_shifted_global_id(i);
|
||||
|
||||
values[other_lid] += other_gid;
|
||||
sub_group_barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
values[lid] += gid;
|
||||
sub_group_barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
output[gid] = values[lid];
|
||||
}
|
||||
)";
|
||||
}
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#else
|
||||
std::string generate_source(test_options options)
|
||||
{
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
#include <opencl_memory>
|
||||
#include <opencl_work_item>
|
||||
#include <opencl_synchronization>
|
||||
|
||||
using namespace cl;
|
||||
|
||||
)";
|
||||
s << source_common;
|
||||
|
||||
if (options.barrier == barrier_type::global)
|
||||
{
|
||||
s << R"(
|
||||
kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output)
|
||||
{
|
||||
const size_t gid = get_shifted_global_id(0);
|
||||
|
||||
output[gid] = gid;
|
||||
sub_group_barrier(mem_fence::global);
|
||||
|
||||
for (int i = iter_lo; i < iter_hi; i++)
|
||||
{
|
||||
const size_t other_gid = get_shifted_global_id(i);
|
||||
|
||||
output[other_gid] += other_gid;
|
||||
sub_group_barrier(mem_fence::global);
|
||||
|
||||
output[gid] += gid;
|
||||
sub_group_barrier(mem_fence::global);
|
||||
}
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.barrier == barrier_type::local)
|
||||
{
|
||||
s << R"(
|
||||
kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output, local_ptr<long[]> values)
|
||||
{
|
||||
const size_t gid = get_shifted_global_id(0);
|
||||
const size_t lid = get_shifted_local_id(0);
|
||||
|
||||
values[lid] = gid;
|
||||
sub_group_barrier(mem_fence::local);
|
||||
|
||||
for (int i = iter_lo; i < iter_hi; i++)
|
||||
{
|
||||
const size_t other_lid = get_shifted_local_id(i);
|
||||
const size_t other_gid = get_shifted_global_id(i);
|
||||
|
||||
values[other_lid] += other_gid;
|
||||
sub_group_barrier(mem_fence::local);
|
||||
|
||||
values[lid] += gid;
|
||||
sub_group_barrier(mem_fence::local);
|
||||
}
|
||||
|
||||
output[gid] = values[lid];
|
||||
}
|
||||
)";
|
||||
}
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
if (!is_extension_available(device, "cl_khr_subgroups"))
|
||||
{
|
||||
log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
std::string kernel_name = "test";
|
||||
std::string source = generate_source(options);
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name, "-cl-std=CL2.0", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
size_t max_work_group_size;
|
||||
error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
|
||||
|
||||
if (options.barrier == barrier_type::local)
|
||||
{
|
||||
cl_ulong kernel_local_mem_size;
|
||||
error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
|
||||
|
||||
cl_ulong device_local_mem_size;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(device_local_mem_size), &device_local_mem_size, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
|
||||
|
||||
max_work_group_size = (std::min)(max_work_group_size, (device_local_mem_size - kernel_local_mem_size) / sizeof(cl_long));
|
||||
}
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
std::uniform_int_distribution<size_t> global_size_dis(1, options.max_count);
|
||||
std::uniform_int_distribution<size_t> local_size_dis(1, max_work_group_size);
|
||||
std::uniform_int_distribution<int> iter_dis(0, 20);
|
||||
|
||||
for (size_t test = 0; test < options.num_tests; test++)
|
||||
{
|
||||
const size_t global_size = global_size_dis(gen);
|
||||
const size_t local_size = local_size_dis(gen);
|
||||
const size_t count = global_size;
|
||||
|
||||
const int iter_lo = -iter_dis(gen);
|
||||
const int iter_hi = +iter_dis(gen);
|
||||
|
||||
cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long) * count, NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(iter_lo), &iter_lo);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 1, sizeof(iter_hi), &iter_hi);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
if (options.barrier == barrier_type::local)
|
||||
{
|
||||
error = clSetKernelArg(kernel, 3, sizeof(cl_long) * local_size, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
}
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
std::vector<cl_long> output(count);
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(cl_long) * count,
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
error = clReleaseMemObject(output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
|
||||
|
||||
for (size_t gid = 0; gid < count; gid++)
|
||||
{
|
||||
const long value = output[gid];
|
||||
const long expected = gid + 2 * gid * (iter_hi - iter_lo);
|
||||
|
||||
if (value != expected)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Element %lu has incorrect value. Expected: %ld, got: %ld",
|
||||
gid, expected, value
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_barrier_global)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
test_options options;
|
||||
options.barrier = barrier_type::global;
|
||||
options.num_tests = 1000;
|
||||
options.max_count = num_elements;
|
||||
return test(device, context, queue, options);
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_barrier_local)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
test_options options;
|
||||
options.barrier = barrier_type::local;
|
||||
options.num_tests = 1000;
|
||||
options.max_count = num_elements;
|
||||
return test(device, context, queue, options);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP
|
||||
Reference in New Issue
Block a user