mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Remove OpenCL C++ tests (#1241)
* Remove OpenCL C++ tests Agreed in the 2021/05/11 teleconference. Signed-off-by: Kevin Petit <kevin.petit@arm.com> * fix CI
This commit is contained in:
@@ -62,58 +62,13 @@ set(CONFORMANCE_SUFFIX "" )
|
||||
#build driver as a dependency of the conformance tests, or other such CMake customization
|
||||
include(CMakeVendor.txt OPTIONAL)
|
||||
|
||||
#-----------------------------------------------------------
|
||||
# Development options for OpenCL C++ tests
|
||||
#-----------------------------------------------------------
|
||||
# Use OpenCL C kernels instead of OpenCL C++ kernels
|
||||
option(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS "Use OpenCL C kernels in OpenCL C++ tests" OFF)
|
||||
if(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
|
||||
set(CLPP_DEVELOPMENT_OPTIONS ${CLPP_DEVELOPMENT_OPTIONS} -DCLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
|
||||
endif(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
|
||||
# Only check if OpenCL C++ kernels compile to SPIR-V
|
||||
option(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION "Only check if OpenCL C++ kernels compile to SPIR-V" OFF)
|
||||
if(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION)
|
||||
if(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
|
||||
message(FATAL_ERROR "Can't use OpenCL C kernels and compile to SPIR-V.")
|
||||
endif(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
|
||||
set(CLPP_DEVELOPMENT_OPTIONS ${CLPP_DEVELOPMENT_OPTIONS} -DCLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION)
|
||||
endif(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION)
|
||||
#
|
||||
if(CLPP_DEVELOPMENT_OPTIONS)
|
||||
add_definitions(-DCLPP_DEVELOPMENT_OPTIONS)
|
||||
add_definitions(${CLPP_DEVELOPMENT_OPTIONS})
|
||||
endif(CLPP_DEVELOPMENT_OPTIONS)
|
||||
|
||||
# Offline OpenCL C/C++ compiler provided by Khronos is the only supported
|
||||
# offline compiler.
|
||||
#
|
||||
# Path to offline OpenCL C/C++ compiler provided by Khronos.
|
||||
# See https://github.com/KhronosGroup/SPIR/ (spirv-1.1 branch or newer SPIR-V-ready
|
||||
# branch should be used).
|
||||
if(KHRONOS_OFFLINE_COMPILER)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DKHRONOS_OFFLINE_COMPILER=${KHRONOS_OFFLINE_COMPILER}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DKHRONOS_OFFLINE_COMPILER=${KHRONOS_OFFLINE_COMPILER}")
|
||||
# Additional OpenCL C/C++ compiler option.
|
||||
if(KHRONOS_OFFLINE_COMPILER_OPTIONS)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DKHRONOS_OFFLINE_COMPILER_OPTIONS=${KHRONOS_OFFLINE_COMPILER_OPTIONS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DKHRONOS_OFFLINE_COMPILER_OPTIONS=${KHRONOS_OFFLINE_COMPILER_OPTIONS}")
|
||||
endif(KHRONOS_OFFLINE_COMPILER_OPTIONS)
|
||||
else(KHRONOS_OFFLINE_COMPILER)
|
||||
message(WARNING "KHRONOS_OFFLINE_COMPILER is not defined!")
|
||||
message(WARNING "Running CL C++ tests will not be possible.")
|
||||
endif(KHRONOS_OFFLINE_COMPILER)
|
||||
|
||||
# CL_LIBCLCXX_DIR - path to dir with OpenCL C++ STL (libclcxx)
|
||||
# CL_INCLUDE_DIR - path to dir with OpenCL headers
|
||||
# CL_LIBCLCXX_DIR - path to dir with OpenCL library
|
||||
if(CL_INCLUDE_DIR AND CL_LIB_DIR AND CL_LIBCLCXX_DIR)
|
||||
if(CL_INCLUDE_DIR AND CL_LIB_DIR)
|
||||
link_directories(${CL_LIB_DIR})
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DCL_LIBCLCXX_DIR=${CL_LIBCLCXX_DIR}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCL_LIBCLCXX_DIR=${CL_LIBCLCXX_DIR}")
|
||||
else(CL_INCLUDE_DIR AND CL_LIB_DIR AND CL_LIBCLCXX_DIR)
|
||||
else(CL_INCLUDE_DIR AND CL_LIB_DIR)
|
||||
message(STATUS "OpenCL hasn't been found!")
|
||||
message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR, -DCL_LIB_DIR and -DCL_LIBCLCXX_DIR")
|
||||
endif(CL_INCLUDE_DIR AND CL_LIB_DIR AND CL_LIBCLCXX_DIR)
|
||||
message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR and -DCL_LIB_DIR")
|
||||
endif(CL_INCLUDE_DIR AND CL_LIB_DIR)
|
||||
|
||||
# CLConform_GL_LIBRARIES_DIR - path to OpenGL libraries
|
||||
if(GL_IS_SUPPORTED AND CLConform_GL_LIBRARIES_DIR)
|
||||
|
||||
@@ -55,17 +55,13 @@ cd build
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/ ..
|
||||
make
|
||||
|
||||
# Get libclcxx
|
||||
cd ${TOP}
|
||||
git clone https://github.com/KhronosGroup/libclcxx.git
|
||||
|
||||
# Build CTS
|
||||
cd ${TOP}
|
||||
ls -l
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \
|
||||
-DCL_LIB_DIR=${TOP}/OpenCL-ICD-Loader/build \
|
||||
-DCL_LIBCLCXX_DIR=${TOP}/libclcxx \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
|
||||
-DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \
|
||||
-DOPENCL_LIBRARIES="-lOpenCL -lpthread" \
|
||||
|
||||
@@ -312,57 +312,6 @@ get_compilation_mode_str(const CompilationMode compilationMode)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef KHRONOS_OFFLINE_COMPILER
|
||||
static std::string
|
||||
get_khronos_compiler_command(const cl_uint device_address_space_size,
|
||||
const bool openclCXX, const std::string &bOptions,
|
||||
const std::string &sourceFilename,
|
||||
const std::string &outputFilename)
|
||||
{
|
||||
// Set compiler options
|
||||
// Emit SPIR-V
|
||||
std::string compilerOptions = " -cc1 -emit-spirv";
|
||||
// <triple>: for 32 bit SPIR-V use spir-unknown-unknown, for 64 bit SPIR-V
|
||||
// use spir64-unknown-unknown.
|
||||
if (device_address_space_size == 32)
|
||||
{
|
||||
compilerOptions += " -triple=spir-unknown-unknown";
|
||||
}
|
||||
else
|
||||
{
|
||||
compilerOptions += " -triple=spir64-unknown-unknown";
|
||||
}
|
||||
// Set OpenCL C++ flag required by SPIR-V-ready clang (compiler provided by
|
||||
// Khronos)
|
||||
if (openclCXX)
|
||||
{
|
||||
compilerOptions = compilerOptions + " -cl-std=c++";
|
||||
}
|
||||
// Set correct includes
|
||||
if (openclCXX)
|
||||
{
|
||||
compilerOptions += " -I ";
|
||||
compilerOptions += STRINGIFY_VALUE(CL_LIBCLCXX_DIR);
|
||||
}
|
||||
else
|
||||
{
|
||||
compilerOptions += " -include opencl.h";
|
||||
}
|
||||
|
||||
#ifdef KHRONOS_OFFLINE_COMPILER_OPTIONS
|
||||
compilerOptions += STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER_OPTIONS);
|
||||
#endif
|
||||
|
||||
// Add build options passed to this function
|
||||
compilerOptions += " " + bOptions;
|
||||
compilerOptions += " " + sourceFilename + " -o " + outputFilename;
|
||||
std::string runString =
|
||||
STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER) + compilerOptions;
|
||||
|
||||
return runString;
|
||||
}
|
||||
#endif // KHRONOS_OFFLINE_COMPILER
|
||||
|
||||
static cl_int get_cl_device_info_str(const cl_device_id device,
|
||||
const cl_uint device_address_space_size,
|
||||
const CompilationMode compilationMode,
|
||||
@@ -476,29 +425,9 @@ static int invoke_offline_compiler(const cl_device_id device,
|
||||
const CompilationMode compilationMode,
|
||||
const std::string &bOptions,
|
||||
const std::string &sourceFilename,
|
||||
const std::string &outputFilename,
|
||||
const bool openclCXX)
|
||||
const std::string &outputFilename)
|
||||
{
|
||||
std::string runString;
|
||||
if (openclCXX)
|
||||
{
|
||||
#ifndef KHRONOS_OFFLINE_COMPILER
|
||||
log_error("CL C++ compilation is not possible: "
|
||||
"KHRONOS_OFFLINE_COMPILER was not defined.\n");
|
||||
return CL_INVALID_OPERATION;
|
||||
#else
|
||||
if (compilationMode != kSpir_v)
|
||||
{
|
||||
log_error("Compilation mode must be SPIR-V for Khronos compiler");
|
||||
return -1;
|
||||
}
|
||||
runString = get_khronos_compiler_command(
|
||||
device_address_space_size, openclCXX, bOptions, sourceFilename,
|
||||
outputFilename);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string clDeviceInfoFilename;
|
||||
|
||||
// See cl_offline_compiler-interface.txt for a description of the
|
||||
@@ -506,8 +435,7 @@ static int invoke_offline_compiler(const cl_device_id device,
|
||||
// the internal command line interface for invoking the offline
|
||||
// compiler.
|
||||
|
||||
cl_int err =
|
||||
write_cl_device_info(device, device_address_space_size,
|
||||
cl_int err = write_cl_device_info(device, device_address_space_size,
|
||||
compilationMode, clDeviceInfoFilename);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
@@ -516,9 +444,8 @@ static int invoke_offline_compiler(const cl_device_id device,
|
||||
}
|
||||
|
||||
runString = get_offline_compilation_command(
|
||||
device_address_space_size, compilationMode, bOptions,
|
||||
sourceFilename, outputFilename, clDeviceInfoFilename);
|
||||
}
|
||||
device_address_space_size, compilationMode, bOptions, sourceFilename,
|
||||
outputFilename, clDeviceInfoFilename);
|
||||
|
||||
// execute script
|
||||
log_info("Executing command: %s\n", runString.c_str());
|
||||
@@ -577,9 +504,8 @@ static cl_int get_device_address_bits(const cl_device_id device,
|
||||
|
||||
static int get_offline_compiler_output(
|
||||
std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize,
|
||||
const bool openclCXX, const CompilationMode compilationMode,
|
||||
const std::string &bOptions, const std::string &kernelPath,
|
||||
const std::string &kernelNamePrefix)
|
||||
const CompilationMode compilationMode, const std::string &bOptions,
|
||||
const std::string &kernelPath, const std::string &kernelNamePrefix)
|
||||
{
|
||||
std::string sourceFilename =
|
||||
get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
|
||||
@@ -599,9 +525,9 @@ static int get_offline_compiler_output(
|
||||
}
|
||||
else
|
||||
{
|
||||
int error = invoke_offline_compiler(
|
||||
device, deviceAddrSpaceSize, compilationMode, bOptions,
|
||||
sourceFilename, outputFilename, openclCXX);
|
||||
int error = invoke_offline_compiler(device, deviceAddrSpaceSize,
|
||||
compilationMode, bOptions,
|
||||
sourceFilename, outputFilename);
|
||||
if (error != CL_SUCCESS) return error;
|
||||
|
||||
// read output file
|
||||
@@ -620,8 +546,7 @@ static int get_offline_compiler_output(
|
||||
static int create_single_kernel_helper_create_program_offline(
|
||||
cl_context context, cl_device_id device, cl_program *outProgram,
|
||||
unsigned int numKernelLines, const char *const *kernelProgram,
|
||||
const char *buildOptions, const bool openclCXX,
|
||||
CompilationMode compilationMode)
|
||||
const char *buildOptions, CompilationMode compilationMode)
|
||||
{
|
||||
if (kCacheModeDumpCl == gCompilationCacheMode)
|
||||
{
|
||||
@@ -649,22 +574,10 @@ static int create_single_kernel_helper_create_program_offline(
|
||||
|
||||
std::ifstream ifs;
|
||||
error = get_offline_compiler_output(ifs, device, device_address_space_size,
|
||||
openclCXX, compilationMode, bOptions,
|
||||
compilationMode, bOptions,
|
||||
gCompilationCachePath, kernelName);
|
||||
if (error != CL_SUCCESS) return error;
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT
|
||||
// ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
if (openclCXX)
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
ifs.seekg(0, ifs.end);
|
||||
int length = ifs.tellg();
|
||||
ifs.seekg(0, ifs.beg);
|
||||
@@ -748,8 +661,7 @@ static int create_single_kernel_helper_create_program_offline(
|
||||
static int create_single_kernel_helper_create_program(
|
||||
cl_context context, cl_device_id device, cl_program *outProgram,
|
||||
unsigned int numKernelLines, const char **kernelProgram,
|
||||
const char *buildOptions, const bool openclCXX,
|
||||
CompilationMode compilationMode)
|
||||
const char *buildOptions, CompilationMode compilationMode)
|
||||
{
|
||||
std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
|
||||
|
||||
@@ -787,37 +699,39 @@ static int create_single_kernel_helper_create_program(
|
||||
{
|
||||
return create_single_kernel_helper_create_program_offline(
|
||||
context, device, outProgram, numKernelLines, kernelProgram,
|
||||
buildOptions, openclCXX, compilationMode);
|
||||
buildOptions, compilationMode);
|
||||
}
|
||||
}
|
||||
|
||||
int create_single_kernel_helper_create_program(
|
||||
cl_context context, cl_program *outProgram, unsigned int numKernelLines,
|
||||
const char **kernelProgram, const char *buildOptions, const bool openclCXX)
|
||||
int create_single_kernel_helper_create_program(cl_context context,
|
||||
cl_program *outProgram,
|
||||
unsigned int numKernelLines,
|
||||
const char **kernelProgram,
|
||||
const char *buildOptions)
|
||||
{
|
||||
return create_single_kernel_helper_create_program(
|
||||
context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
|
||||
openclCXX, gCompilationMode);
|
||||
gCompilationMode);
|
||||
}
|
||||
|
||||
int create_single_kernel_helper_create_program_for_device(
|
||||
cl_context context, cl_device_id device, cl_program *outProgram,
|
||||
unsigned int numKernelLines, const char **kernelProgram,
|
||||
const char *buildOptions, const bool openclCXX)
|
||||
const char *buildOptions)
|
||||
{
|
||||
return create_single_kernel_helper_create_program(
|
||||
context, device, outProgram, numKernelLines, kernelProgram,
|
||||
buildOptions, openclCXX, gCompilationMode);
|
||||
buildOptions, gCompilationMode);
|
||||
}
|
||||
|
||||
int create_single_kernel_helper_with_build_options(
|
||||
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
|
||||
unsigned int numKernelLines, const char **kernelProgram,
|
||||
const char *kernelName, const char *buildOptions, const bool openclCXX)
|
||||
const char *kernelName, const char *buildOptions)
|
||||
{
|
||||
return create_single_kernel_helper(context, outProgram, outKernel,
|
||||
numKernelLines, kernelProgram,
|
||||
kernelName, buildOptions, openclCXX);
|
||||
kernelName, buildOptions);
|
||||
}
|
||||
|
||||
// Creates and builds OpenCL C/C++ program, and creates a kernel
|
||||
@@ -826,7 +740,7 @@ int create_single_kernel_helper(cl_context context, cl_program *outProgram,
|
||||
unsigned int numKernelLines,
|
||||
const char **kernelProgram,
|
||||
const char *kernelName,
|
||||
const char *buildOptions, const bool openclCXX)
|
||||
const char *buildOptions)
|
||||
{
|
||||
// For the logic that automatically adds -cl-std it is much cleaner if the
|
||||
// build options have RAII. This buffer will store the potentially updated
|
||||
@@ -865,51 +779,14 @@ int create_single_kernel_helper(cl_context context, cl_program *outProgram,
|
||||
build_options_internal += cl_std;
|
||||
buildOptions = build_options_internal.c_str();
|
||||
}
|
||||
int error;
|
||||
// Create OpenCL C++ program
|
||||
if (openclCXX)
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT
|
||||
// ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
// Save global variable
|
||||
bool tempgCompilationCacheMode = gCompilationCacheMode;
|
||||
// Force OpenCL C++ -> SPIR-V compilation on every run
|
||||
gCompilationCacheMode = kCacheModeOverwrite;
|
||||
#endif
|
||||
error = create_openclcpp_program(context, outProgram, numKernelLines,
|
||||
kernelProgram, buildOptions);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
log_error("Create program failed: %d, line: %d\n", error, __LINE__);
|
||||
return error;
|
||||
}
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT
|
||||
// ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
// Restore global variables
|
||||
gCompilationCacheMode = tempgCompilationCacheMode;
|
||||
log_info("WARNING: KERNEL %s WAS ONLY COMPILED TO SPIR-V\n",
|
||||
kernelName);
|
||||
return error;
|
||||
#endif
|
||||
}
|
||||
// Create OpenCL C program
|
||||
else
|
||||
{
|
||||
error = create_single_kernel_helper_create_program(
|
||||
int error = create_single_kernel_helper_create_program(
|
||||
context, outProgram, numKernelLines, kernelProgram, buildOptions);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
log_error("Create program failed: %d, line: %d\n", error, __LINE__);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove offline-compiler-only build options
|
||||
std::string newBuildOptions;
|
||||
if (buildOptions != NULL)
|
||||
@@ -930,18 +807,6 @@ int create_single_kernel_helper(cl_context context, cl_program *outProgram,
|
||||
kernelName, newBuildOptions.c_str());
|
||||
}
|
||||
|
||||
// Creates OpenCL C++ program
|
||||
int create_openclcpp_program(cl_context context, cl_program *outProgram,
|
||||
unsigned int numKernelLines,
|
||||
const char **kernelProgram,
|
||||
const char *buildOptions)
|
||||
{
|
||||
// Create program
|
||||
return create_single_kernel_helper_create_program(
|
||||
context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
|
||||
true, kSpir_v);
|
||||
}
|
||||
|
||||
// Builds OpenCL C/C++ program and creates
|
||||
int build_program_create_kernel_helper(
|
||||
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
|
||||
|
||||
@@ -72,24 +72,21 @@ extern int
|
||||
create_single_kernel_helper(cl_context context, cl_program *outProgram,
|
||||
cl_kernel *outKernel, unsigned int numKernelLines,
|
||||
const char **kernelProgram, const char *kernelName,
|
||||
const char *buildOptions = NULL,
|
||||
const bool openclCXX = false);
|
||||
const char *buildOptions = NULL);
|
||||
|
||||
extern int create_single_kernel_helper_with_build_options(
|
||||
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
|
||||
unsigned int numKernelLines, const char **kernelProgram,
|
||||
const char *kernelName, const char *buildOptions,
|
||||
const bool openclCXX = false);
|
||||
const char *kernelName, const char *buildOptions);
|
||||
|
||||
extern int create_single_kernel_helper_create_program(
|
||||
cl_context context, cl_program *outProgram, unsigned int numKernelLines,
|
||||
const char **kernelProgram, const char *buildOptions = NULL,
|
||||
const bool openclCXX = false);
|
||||
const char **kernelProgram, const char *buildOptions = NULL);
|
||||
|
||||
extern int create_single_kernel_helper_create_program_for_device(
|
||||
cl_context context, cl_device_id device, cl_program *outProgram,
|
||||
unsigned int numKernelLines, const char **kernelProgram,
|
||||
const char *buildOptions = NULL, const bool openclCXX = false);
|
||||
const char *buildOptions = NULL);
|
||||
|
||||
/* Creates OpenCL C++ program. This one must be used for creating OpenCL C++
|
||||
* program. */
|
||||
|
||||
@@ -50,9 +50,6 @@ add_subdirectory( subgroups )
|
||||
add_subdirectory( workgroups )
|
||||
add_subdirectory( pipes )
|
||||
add_subdirectory( device_timer )
|
||||
if(KHRONOS_OFFLINE_COMPILER)
|
||||
add_subdirectory( clcpp )
|
||||
endif()
|
||||
add_subdirectory( spirv_new )
|
||||
add_subdirectory( spir )
|
||||
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
add_subdirectory(address_spaces)
|
||||
add_subdirectory(api)
|
||||
add_subdirectory(atomics)
|
||||
add_subdirectory(attributes)
|
||||
add_subdirectory(common_funcs)
|
||||
add_subdirectory(convert)
|
||||
add_subdirectory(device_queue)
|
||||
add_subdirectory(geometric_funcs)
|
||||
add_subdirectory(images)
|
||||
add_subdirectory(integer_funcs)
|
||||
add_subdirectory(math_funcs)
|
||||
add_subdirectory(pipes)
|
||||
add_subdirectory(program_scope_ctors_dtors)
|
||||
add_subdirectory(reinterpret)
|
||||
add_subdirectory(relational_funcs)
|
||||
add_subdirectory(spec_constants)
|
||||
add_subdirectory(subgroups)
|
||||
add_subdirectory(synchronization)
|
||||
add_subdirectory(vload_vstore)
|
||||
add_subdirectory(workgroups)
|
||||
add_subdirectory(workitems)
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_ADDRESS_SPACES)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,202 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_COMMON_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_COMMON_HPP
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
#define RUN_ADDRESS_SPACES_TEST_MACRO(TEST_CLASS) \
|
||||
last_error = run_address_spaces_test( \
|
||||
device, context, queue, n_elems, TEST_CLASS \
|
||||
); \
|
||||
CHECK_ERROR(last_error) \
|
||||
error |= last_error;
|
||||
|
||||
// This is a base class for address spaces tests.
|
||||
template <class T>
|
||||
struct address_spaces_test : public detail::base_func_type<T>
|
||||
{
|
||||
// output buffer type
|
||||
typedef T type;
|
||||
|
||||
virtual ~address_spaces_test() {};
|
||||
// Returns test name
|
||||
virtual std::string str() = 0;
|
||||
// Returns OpenCL program source
|
||||
virtual std::string generate_program() = 0;
|
||||
// Returns kernel names IN ORDER
|
||||
virtual std::vector<std::string> get_kernel_names()
|
||||
{
|
||||
// Typical case, that is, only one kernel
|
||||
return { this->get_kernel_name() };
|
||||
}
|
||||
|
||||
// Return value that is expected to be in output_buffer[i]
|
||||
virtual T operator()(size_t i, size_t work_group_size) = 0;
|
||||
|
||||
// If local size has to be set in clEnqueueNDRangeKernel()
|
||||
// this should return true; otherwise - false;
|
||||
virtual bool set_local_size()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Calculates maximal work-group size (one dim)
|
||||
virtual size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
|
||||
cl_device_id device,
|
||||
size_t work_group_size, // default work-group size
|
||||
cl_int& error)
|
||||
{
|
||||
size_t wg_size = work_group_size;
|
||||
for(auto&k : kernels)
|
||||
{
|
||||
size_t max_wg_size;
|
||||
error = clGetKernelWorkGroupInfo(k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
|
||||
wg_size = (std::min)(max_wg_size, wg_size);
|
||||
}
|
||||
return wg_size;
|
||||
}
|
||||
|
||||
// This covers typical case: each kernel is executed once, every kernel
|
||||
// has only one argument which is output buffer
|
||||
virtual cl_int execute(const std::vector<cl_kernel>& kernels,
|
||||
cl_mem& output_buffer,
|
||||
cl_command_queue& queue,
|
||||
size_t work_size,
|
||||
size_t work_group_size)
|
||||
{
|
||||
cl_int err;
|
||||
for(auto& k : kernels)
|
||||
{
|
||||
err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(
|
||||
queue, k, 1,
|
||||
NULL, &work_size, this->set_local_size() ? &work_group_size : NULL,
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
}
|
||||
return err;
|
||||
}
|
||||
};
|
||||
|
||||
template <class address_spaces_test>
|
||||
int run_address_spaces_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, address_spaces_test op)
|
||||
{
|
||||
cl_mem buffers[1];
|
||||
cl_program program;
|
||||
std::vector<cl_kernel> kernels;
|
||||
size_t wg_size;
|
||||
size_t work_size[1];
|
||||
cl_int err;
|
||||
|
||||
typedef typename address_spaces_test::type TYPE;
|
||||
|
||||
// Don't run test for unsupported types
|
||||
if(!(type_supported<TYPE>(device)))
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
std::string code_str = op.generate_program();
|
||||
std::vector<std::string> kernel_names = op.get_kernel_names();
|
||||
if(kernel_names.empty())
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "No kernel to run");
|
||||
}
|
||||
kernels.resize(kernel_names.size());
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false);
|
||||
RETURN_ON_ERROR(err)
|
||||
for(size_t i = 1; i < kernels.size(); i++)
|
||||
{
|
||||
kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateKernel");
|
||||
}
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
|
||||
RETURN_ON_ERROR(err)
|
||||
for(size_t i = 1; i < kernels.size(); i++)
|
||||
{
|
||||
kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateKernel");
|
||||
}
|
||||
#endif
|
||||
|
||||
// Find the max possible wg size for among all the kernels
|
||||
wg_size = op.get_max_local_size(kernels, device, 1024, err);
|
||||
RETURN_ON_ERROR(err);
|
||||
|
||||
work_size[0] = count;
|
||||
if(op.set_local_size())
|
||||
{
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
work_size[0] = wg_number * wg_size;
|
||||
}
|
||||
|
||||
// output on host
|
||||
std::vector<TYPE> output = generate_output<TYPE>(work_size[0], 9999);
|
||||
|
||||
// output buffer
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(TYPE) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer")
|
||||
|
||||
// Execute test
|
||||
err = op.execute(kernels, buffers[0], queue, work_size[0], wg_size);
|
||||
RETURN_ON_ERROR(err)
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
for(size_t i = 0; i < output.size(); i++)
|
||||
{
|
||||
TYPE v = op(i, wg_size);
|
||||
if(!(are_equal(v, output[i], detail::make_value<TYPE>(0), op)))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name<TYPE>().c_str(),
|
||||
format_value(v).c_str(), format_value(output[i]).c_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
log_info("test_%s(%s) passed\n", op.str().c_str(), type_name<TYPE>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
for(auto& k : kernels)
|
||||
clReleaseKernel(k);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_COMMON_HPP
|
||||
@@ -1,25 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "test_pointer_types.hpp"
|
||||
#include "test_storage_types.hpp"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,412 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_POINTER_TYPES_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_POINTER_TYPES_HPP
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// ----------------------------
|
||||
// ---------- PRIVATE
|
||||
// ----------------------------
|
||||
|
||||
template <class T>
|
||||
struct private_pointer_test : public address_spaces_test<T>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "private_pointer";
|
||||
}
|
||||
|
||||
T operator()(size_t i, size_t work_group_size)
|
||||
{
|
||||
typedef typename scalar_type<T>::type SCALAR;
|
||||
(void) work_group_size;
|
||||
return detail::make_value<T>(static_cast<SCALAR>(i));
|
||||
}
|
||||
|
||||
// Each work-item writes its global id to output[work-item-global-id]
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = (" + type_name<T>() + ")(gid);\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_array>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" typedef " + type_name<T>() + " TYPE;\n"
|
||||
" TYPE v = TYPE(gid);\n"
|
||||
" private_ptr<TYPE> v_ptr1(dynamic_asptr_cast<private_ptr<TYPE>>(&v));\n"
|
||||
" private_ptr<TYPE> v_ptr2(v_ptr1);\n"
|
||||
" TYPE a[] = { TYPE(0), TYPE(1) };\n"
|
||||
" private_ptr<TYPE> a_ptr = dynamic_asptr_cast<private_ptr<TYPE>>(a);\n"
|
||||
" a_ptr++;\n"
|
||||
" TYPE * a_ptr2 = a_ptr.get();\n"
|
||||
" *a_ptr2 = *v_ptr2;\n"
|
||||
" output[gid] = a[1];\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_private_pointer)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// private pointer
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_uint>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_float2>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_float4>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_float8>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_uint16>());
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// ----------------------------
|
||||
// ---------- LOCAL
|
||||
// ----------------------------
|
||||
|
||||
template <class T>
|
||||
struct local_pointer_test : public address_spaces_test<T>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "local_pointer";
|
||||
}
|
||||
|
||||
T operator()(size_t i, size_t work_group_size)
|
||||
{
|
||||
typedef typename scalar_type<T>::type SCALAR;
|
||||
size_t r = i / work_group_size;
|
||||
return detail::make_value<T>(static_cast<SCALAR>(r));
|
||||
}
|
||||
|
||||
bool set_local_size()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
|
||||
cl_device_id device,
|
||||
size_t work_group_size, // default work-group size
|
||||
cl_int& error)
|
||||
{
|
||||
// Set size of the local memory, we need to to this to correctly calculate
|
||||
// max possible work-group size.
|
||||
// Additionally this already set 2nd argument of the test kernel, so we don't
|
||||
// have to modify execute() method.
|
||||
error = clSetKernelArg(kernels[0], 1, sizeof(cl_uint), NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg");
|
||||
|
||||
size_t wg_size;
|
||||
error = clGetKernelWorkGroupInfo(
|
||||
kernels[0], device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
|
||||
wg_size = wg_size <= work_group_size ? wg_size : work_group_size;
|
||||
return wg_size;
|
||||
}
|
||||
|
||||
// Every work-item writes id of its work-group to output[work-item-global-id]
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output, "
|
||||
"local uint * local_mem_ptr)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = (" + type_name<T>() + ")(get_group_id(0));\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_synchronization>\n"
|
||||
"#include <opencl_array>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output, "
|
||||
"local_ptr<uint[]> local_mem_ptr)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" size_t lid = get_local_id(0);\n"
|
||||
" typedef " + type_name<T>() + " TYPE;\n"
|
||||
// 1st work-item in work-group writes get_group_id() to var
|
||||
" local<uint> var;\n"
|
||||
" local_ptr<uint> var_ptr = var.ptr();\n"
|
||||
" if(lid == 0) { *var_ptr = get_group_id(0); }\n"
|
||||
" work_group_barrier(mem_fence::local);\n"
|
||||
// last work-item in work-group writes var to 1st element of local_mem
|
||||
" local_ptr<uint[]> local_mem_ptr2(local_mem_ptr);\n"
|
||||
" auto local_mem_ptr3 = local_mem_ptr2.release();\n"
|
||||
" if(lid == (get_local_size(0) - 1)) { *(local_mem_ptr3) = var; }\n"
|
||||
" work_group_barrier(mem_fence::local);\n"
|
||||
// each work-item in work-group writes local_mem_ptr[0] to output[work-item-global-id]
|
||||
" output[gid] = local_mem_ptr[0];\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_local_pointer)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// local pointer
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_uint>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_float2>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_float4>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_float8>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_uint16>());
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// ----------------------------
|
||||
// ---------- GLOBAL
|
||||
// ----------------------------
|
||||
|
||||
template <class T>
|
||||
struct global_pointer_test : public address_spaces_test<T>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "global_pointer";
|
||||
}
|
||||
|
||||
T operator()(size_t i, size_t work_group_size)
|
||||
{
|
||||
typedef typename scalar_type<T>::type SCALAR;
|
||||
(void) work_group_size;
|
||||
return detail::make_value<T>(static_cast<SCALAR>(i));
|
||||
}
|
||||
|
||||
// Each work-item writes its global id to output[work-item-global-id]
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = (" + type_name<T>() + ")(gid);\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_array>\n"
|
||||
"using namespace cl;\n"
|
||||
"typedef " + type_name<T>() + " TYPE;\n"
|
||||
"void set_to_gid(global_ptr<TYPE> ptr)\n"
|
||||
"{\n"
|
||||
" *ptr = TYPE(get_global_id(0));"
|
||||
"}\n"
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<TYPE[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" auto ptr = output.get();\n"
|
||||
" global_ptr<TYPE> ptr2(ptr);\n"
|
||||
" ptr2 += ptrdiff_t(gid);\n"
|
||||
" set_to_gid(ptr2);\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_global_pointer)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// global pointer
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_uint>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_float2>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_float4>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_float8>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_uint16>());
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// ----------------------------
|
||||
// ---------- CONSTANT
|
||||
// ----------------------------
|
||||
|
||||
template <class T>
|
||||
struct constant_pointer_test : public address_spaces_test<T>
|
||||
{
|
||||
// m_test_value is just a random value we use in this test.
|
||||
constant_pointer_test() : m_test_value(0xdeaddeadU)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "constant_pointer";
|
||||
}
|
||||
|
||||
T operator()(size_t i, size_t work_group_size)
|
||||
{
|
||||
typedef typename scalar_type<T>::type SCALAR;
|
||||
(void) work_group_size;
|
||||
return detail::make_value<T>(static_cast<SCALAR>(m_test_value));
|
||||
}
|
||||
|
||||
// Each work-item writes m_test_value to output[work-item-global-id]
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output, "
|
||||
"constant uint * const_ptr)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = (" + type_name<T>() + ")(const_ptr[0]);\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_array>\n"
|
||||
"using namespace cl;\n"
|
||||
"typedef " + type_name<T>() + " TYPE;\n"
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<TYPE[]> output, "
|
||||
"constant_ptr<uint[]> const_ptr)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" constant_ptr<uint[]> const_ptr2 = const_ptr;\n"
|
||||
" auto const_ptr3 = const_ptr2.get();\n"
|
||||
" output[gid] = *const_ptr3;\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
// execute() method needs to be modified, to create additional buffer
|
||||
// and set it in 2nd arg (constant_ptr<uint[]> const_ptr)
|
||||
cl_int execute(const std::vector<cl_kernel>& kernels,
|
||||
cl_mem& output_buffer,
|
||||
cl_command_queue& queue,
|
||||
size_t work_size,
|
||||
size_t work_group_size)
|
||||
{
|
||||
cl_int err;
|
||||
|
||||
// Get context from queue
|
||||
cl_context context;
|
||||
err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo");
|
||||
|
||||
// Create constant buffer
|
||||
auto const_buff = clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||
sizeof(cl_uint), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
// Write m_test_value to const_buff
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, const_buff, CL_TRUE, 0, sizeof(cl_uint),
|
||||
static_cast<void *>(&m_test_value), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernels[0], 0, sizeof(output_buffer), &output_buffer);
|
||||
err |= clSetKernelArg(kernels[0], 1, sizeof(const_buff), &const_buff);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(
|
||||
queue, kernels[0], 1, NULL, &work_size, this->set_local_size() ? &work_group_size : NULL, 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clFinish(queue);
|
||||
RETURN_ON_CL_ERROR(err, "clFinish");
|
||||
|
||||
err = clReleaseMemObject(const_buff);
|
||||
RETURN_ON_CL_ERROR(err, "clReleaseMemObject");
|
||||
return err;
|
||||
}
|
||||
|
||||
private:
|
||||
cl_uint m_test_value;
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_constant_pointer)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// constant pointer
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_uint>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_float2>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_float4>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_float8>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_uint16>());
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_POINTER_TYPES_HPP
|
||||
@@ -1,418 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_STORAGE_TYPES_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_STORAGE_TYPES_HPP
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// ----------------------------
|
||||
// ---------- PRIVATE
|
||||
// ----------------------------
|
||||
|
||||
template <class T>
|
||||
struct private_storage_test : public address_spaces_test<T>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "private_storage";
|
||||
}
|
||||
|
||||
T operator()(size_t i, size_t work_group_size)
|
||||
{
|
||||
typedef typename scalar_type<T>::type SCALAR;
|
||||
(void) work_group_size;
|
||||
return detail::make_value<T>(static_cast<SCALAR>(i));
|
||||
}
|
||||
|
||||
// Each work-item writes its global id to output[work-item-global-id]
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = (" + type_name<T>() + ")(gid);\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_array>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" typedef " + type_name<T>() + " TYPE;\n"
|
||||
" priv<TYPE> v = { TYPE(gid) };\n"
|
||||
" const TYPE *v_ptr1 = &v;\n"
|
||||
" private_ptr<TYPE> v_ptr2 = v.ptr();\n"
|
||||
" TYPE v2 = *v_ptr2;\n"
|
||||
" priv<array<TYPE, 1>> a;\n"
|
||||
" *(a.begin()) = v2;\n"
|
||||
" output[gid] = a[0];\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_private_storage)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// private storage
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_uint>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_float2>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_float4>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_float8>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_uint16>());
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// ----------------------------
|
||||
// ---------- LOCAL
|
||||
// ----------------------------
|
||||
|
||||
template <class T>
|
||||
struct local_storage_test : public address_spaces_test<T>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "local_storage";
|
||||
}
|
||||
|
||||
T operator()(size_t i, size_t work_group_size)
|
||||
{
|
||||
typedef typename scalar_type<T>::type SCALAR;
|
||||
size_t r = i / work_group_size;
|
||||
return detail::make_value<T>(static_cast<SCALAR>(r));
|
||||
}
|
||||
|
||||
bool set_local_size()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Every work-item writes id of its work-group to output[work-item-global-id]
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = (" + type_name<T>() + ")(get_group_id(0));\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_synchronization>\n"
|
||||
"#include <opencl_array>\n"
|
||||
"using namespace cl;\n"
|
||||
// Using program scope local variable
|
||||
"local<" + type_name<T>() + "> program_scope_var;"
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" size_t lid = get_local_id(0);\n"
|
||||
" typedef " + type_name<T>() + " TYPE;\n"
|
||||
// 1st work-item in work-group writes get_group_id() to var
|
||||
" local<TYPE> var;\n"
|
||||
" if(lid == 0) { var = TYPE(get_group_id(0)); }\n"
|
||||
" work_group_barrier(mem_fence::local);\n"
|
||||
// last work-item in work-group writes var to 1st element of a
|
||||
" local_ptr<TYPE> var_ptr = var.ptr();\n"
|
||||
" TYPE var2 = *var_ptr;\n"
|
||||
" local<array<TYPE, 1>> a;\n"
|
||||
" if(lid == (get_local_size(0) - 1)) { *(a.begin()) = var2; }\n"
|
||||
" work_group_barrier(mem_fence::local);\n"
|
||||
// 1st work-item in work-group writes a[0] to program_scope_var
|
||||
" if(lid == 0) { program_scope_var = a[0]; }\n"
|
||||
" work_group_barrier(mem_fence::local);\n"
|
||||
" const TYPE *program_scope_var_ptr = &program_scope_var;\n"
|
||||
" output[gid] = *program_scope_var_ptr;\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_local_storage)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// local storage
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_uint>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_float2>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_float4>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_float8>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_int16>());
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// ----------------------------
|
||||
// ---------- GLOBAL
|
||||
// ----------------------------
|
||||
|
||||
template <class T>
|
||||
struct global_storage_test : public address_spaces_test<T>
|
||||
{
|
||||
// m_test_value is just a random value we use in this test.
|
||||
// m_test_value should not be zero.
|
||||
global_storage_test() : m_test_value(0xdeaddeadU)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "global_storage";
|
||||
}
|
||||
|
||||
T operator()(size_t i, size_t work_group_size)
|
||||
{
|
||||
typedef typename scalar_type<T>::type SCALAR;
|
||||
return detail::make_value<T>(static_cast<SCALAR>(m_test_value));
|
||||
}
|
||||
|
||||
std::vector<std::string> get_kernel_names()
|
||||
{
|
||||
return
|
||||
{
|
||||
this->get_kernel_name() + "1",
|
||||
this->get_kernel_name() + "2"
|
||||
};
|
||||
}
|
||||
|
||||
// Every work-item writes m_test_value to output[work-item-global-id]
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_names()[0] + "(global " + type_name<T>() + " *output, "
|
||||
"uint test_value)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = (" + type_name<T>() + ")(test_value);\n"
|
||||
"}\n"
|
||||
"__kernel void " + this->get_kernel_names()[1] + "(global " + type_name<T>() + " *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = output[gid];\n"
|
||||
"}\n";
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_array>\n"
|
||||
"using namespace cl;\n"
|
||||
"typedef " + type_name<T>() + " TYPE;\n"
|
||||
// Using program scope global variable
|
||||
"global<array<TYPE, 1>> program_scope_global_array;"
|
||||
"__kernel void " + this->get_kernel_names()[0] + "(global_ptr<" + type_name<T>() + "[]> output, "
|
||||
"uint test_value)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
// 1st work-item writes test_value to program_scope_global_array[0]
|
||||
" if(gid == 0) { program_scope_global_array[0] = test_value; }\n"
|
||||
"}\n"
|
||||
"__kernel void " + this->get_kernel_names()[1] + "(global_ptr<" + type_name<T>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" static global<uint> func_scope_global_var { 0 };\n"
|
||||
// if (func_scope_global_var == 1) is true then
|
||||
// each work-item saves program_scope_global_array[0] to output[work-item-global-id]
|
||||
" if(func_scope_global_var == uint(1))\n"
|
||||
" {\n"
|
||||
" output[gid] = program_scope_global_array[0];\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
// 1st work-item writes 1 to func_scope_global_var
|
||||
" if(gid == 0) { func_scope_global_var = uint(1); }\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
// In this test execution is quite complicated. We have two kernels.
|
||||
// 1st kernel tests program scope global variable, and 2nd kernel tests
|
||||
// function scope global variable (that's why it is run twice).
|
||||
cl_int execute(const std::vector<cl_kernel>& kernels,
|
||||
cl_mem& output_buffer,
|
||||
cl_command_queue& queue,
|
||||
size_t work_size,
|
||||
size_t wg_size)
|
||||
{
|
||||
cl_int err;
|
||||
err = clSetKernelArg(kernels[0], 0, sizeof(output_buffer), &output_buffer);
|
||||
err |= clSetKernelArg(kernels[0], 1, sizeof(cl_uint), &m_test_value);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
// Run first kernel, once.
|
||||
// This kernel saves m_test_value to program scope global variable called program_scope_global_var
|
||||
err = clEnqueueNDRangeKernel(
|
||||
queue, kernels[0], 1, NULL, &work_size, this->set_local_size() ? &wg_size : NULL, 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
err = clFinish(queue);
|
||||
RETURN_ON_CL_ERROR(err, "clFinish")
|
||||
|
||||
err = clSetKernelArg(kernels[1], 0, sizeof(output_buffer), &output_buffer);
|
||||
// Run 2nd kernel, twice.
|
||||
// 1st run: program_scope_global_var is saved to function scope global array called func_scope_global_array
|
||||
// 2nd run: each work-item saves func_scope_global_array[0] to ouput[work-item-global-id]
|
||||
for(size_t i = 0; i < 2; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(
|
||||
queue, kernels[1], 1, NULL, &work_size, this->set_local_size() ? &wg_size : NULL, 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
err = clFinish(queue);
|
||||
RETURN_ON_CL_ERROR(err, "clFinish")
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
private:
|
||||
cl_uint m_test_value;
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_global_storage)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_uint>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_float2>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_float4>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_float8>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_int16>());
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// ----------------------------
|
||||
// ---------- CONSTANT
|
||||
// ----------------------------
|
||||
|
||||
template <class T>
|
||||
struct constant_storage_test : public address_spaces_test<T>
|
||||
{
|
||||
// m_test_value is just a random value we use in this test.
|
||||
constant_storage_test() : m_test_value(0xdeaddeadU)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "constant_storage";
|
||||
}
|
||||
|
||||
T operator()(size_t i, size_t work_group_size)
|
||||
{
|
||||
typedef typename scalar_type<T>::type SCALAR;
|
||||
return detail::make_value<T>(static_cast<SCALAR>(m_test_value));
|
||||
}
|
||||
|
||||
// Every work-item writes m_test_value to output[work-item-global-id]
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = (" + type_name<T>() + ")(" + std::to_string(m_test_value) + ");\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_array>\n"
|
||||
"using namespace cl;\n"
|
||||
// Program scope constant variable, program_scope_var == (m_test_value - 1)
|
||||
"constant<uint> program_scope_const{ (" + std::to_string(m_test_value) + " - 1) };"
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" typedef " + type_name<T>() + " TYPE;\n"
|
||||
" static constant<uint> func_scope_const{ 1 };\n"
|
||||
" constant_ptr<uint> ps_const_ptr = program_scope_const.ptr();\n"
|
||||
// " constant_ptr<array<uint, 1>> fs_const_ptr = &func_scope_const;\n"
|
||||
" output[gid] = TYPE(*ps_const_ptr + func_scope_const);\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
private:
|
||||
cl_uint m_test_value;
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_constant_storage)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_uint>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_float2>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_float4>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_float8>());
|
||||
RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_int16>());
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_STORAGE_TYPES_HPP
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_API)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,27 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "test_spec_consts.hpp"
|
||||
#include "test_ctors_dtors.hpp"
|
||||
#include "test_ctors.hpp"
|
||||
#include "test_dtors.hpp"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,487 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
||||
#include "../common.hpp"
|
||||
|
||||
// TEST 1
|
||||
// Verify that constructors are executed before any kernel is executed.
|
||||
// Verify that when present, multiple constructors are executed. The order between
|
||||
// constructors is undefined, but they should all execute.
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
const char * kernel_test_ctors_executed =
|
||||
"__kernel void test_ctors_executed(global uint *output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" output[gid] = 0;\n"
|
||||
"}\n"
|
||||
;
|
||||
const char * kernel_test_ctors_executed_multiple_ctors =
|
||||
"__kernel void test_ctors_executed_multiple_ctors(global uint *output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" output[gid] = 0;\n"
|
||||
"}\n"
|
||||
;
|
||||
#else
|
||||
const char * kernel_test_ctors_executed =
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
"struct ctor_test_class {\n"
|
||||
// non-trivial ctor
|
||||
" ctor_test_class(int y) { x = y;};\n"
|
||||
" int x;\n"
|
||||
"};\n"
|
||||
// global scope program variable
|
||||
"ctor_test_class global_var(int(0xbeefbeef));\n"
|
||||
"__kernel void test_ctors_executed(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" int result = 0;\n"
|
||||
" if(global_var.x != int(0xbeefbeef)) result = 1;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n"
|
||||
;
|
||||
const char * kernel_test_ctors_executed_multiple_ctors =
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_limits>\n"
|
||||
"using namespace cl;\n"
|
||||
"template<class T>\n"
|
||||
"struct ctor_test_class {\n"
|
||||
// non-trivial ctor
|
||||
" ctor_test_class(T y) { x = y;};\n"
|
||||
" T x;\n"
|
||||
"};\n"
|
||||
// global scope program variables
|
||||
"ctor_test_class<int> global_var0(int(0xbeefbeef));\n"
|
||||
"ctor_test_class<uint> global_var1(uint(0xbeefbeefU));\n"
|
||||
"ctor_test_class<float> global_var2(float(FLT_MAX));\n"
|
||||
"__kernel void test_ctors_executed_multiple_ctors(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" int result = 0;\n"
|
||||
" if(global_var0.x != int(0xbeefbeef)) result = 1;\n"
|
||||
" if(global_var1.x != uint(0xbeefbeefU)) result = 1;\n"
|
||||
" if(global_var2.x != float(FLT_MAX)) result = 1;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n"
|
||||
;
|
||||
#endif
|
||||
|
||||
int test_ctors_execution(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
int count,
|
||||
std::string kernel_name,
|
||||
const char * kernel_source)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_mem output_buffer;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
size_t dim = 1;
|
||||
size_t work_size[1];
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name, "", false);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
// host vector, size == count, output[0...count-1] == 1
|
||||
std::vector<cl_uint> output(count, cl_uint(1));
|
||||
output_buffer =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * output.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_uint) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
work_size[0] = output.size();
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_uint) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
|
||||
if(sum != 0)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(error, "Test %s failed.", kernel_name.c_str());
|
||||
}
|
||||
|
||||
clReleaseMemObject(output_buffer);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_global_scope_ctors_executed)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int count)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = test_ctors_execution(
|
||||
device, context, queue, count,
|
||||
"test_ctors_executed", kernel_test_ctors_executed
|
||||
);
|
||||
CHECK_ERROR(local_error);
|
||||
error |= local_error;
|
||||
|
||||
local_error = test_ctors_execution(
|
||||
device, context, queue, count,
|
||||
"test_ctors_executed_multiple_ctors", kernel_test_ctors_executed_multiple_ctors
|
||||
);
|
||||
CHECK_ERROR(local_error);
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// TEST 2
|
||||
// Verify that constructors are only executed once when multiple kernels from a program are executed.
|
||||
|
||||
// How: The first kernel (test_ctors_executed_once_set) is run once. It changes values of program scope
|
||||
// variables, then the second kernel is run multiple times, each time verifying that global variables
|
||||
// have correct values (the second kernel should observe the values assigned by the first kernel, not
|
||||
// by the constructors).
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
const char * program_test_ctors_executed_once =
|
||||
"__kernel void test_ctors_executed_once_set()\n"
|
||||
"{\n"
|
||||
"}\n"
|
||||
"__kernel void test_ctors_executed_once_read(global uint *output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" output[gid] = 0;\n"
|
||||
"}\n"
|
||||
;
|
||||
#else
|
||||
const char * program_test_ctors_executed_once =
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
// struct template
|
||||
"template<class T>\n"
|
||||
"struct ctor_test_class {\n"
|
||||
// non-trivial ctor
|
||||
" ctor_test_class(T y) { x = y;};\n"
|
||||
" T x;\n"
|
||||
"};\n"
|
||||
// global scope program variables
|
||||
"ctor_test_class<int> global_var0(int(0));\n"
|
||||
"ctor_test_class<uint> global_var1(uint(0));\n"
|
||||
|
||||
"__kernel void test_ctors_executed_once_set()\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" if(gid == 0) {\n"
|
||||
" global_var0.x = int(0xbeefbeef);\n"
|
||||
" global_var1.x = uint(0xbeefbeefU);\n"
|
||||
" }\n"
|
||||
"}\n\n"
|
||||
|
||||
"__kernel void test_ctors_executed_once_read(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" int result = 0;\n"
|
||||
" if(global_var0.x != int(0xbeefbeef)) result = 1;\n"
|
||||
" if(global_var1.x != uint(0xbeefbeefU)) result = 1;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n"
|
||||
;
|
||||
#endif
|
||||
|
||||
AUTO_TEST_CASE(test_global_scope_ctors_executed_once)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int count)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_mem output_buffer;
|
||||
cl_program program;
|
||||
cl_kernel kernel_set_global_vars;
|
||||
cl_kernel kernel_read_global_vars;
|
||||
|
||||
size_t dim = 1;
|
||||
size_t work_size[1];
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel_set_global_vars,
|
||||
program_test_ctors_executed_once, "test_ctors_executed_once_set"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel_set_global_vars,
|
||||
program_test_ctors_executed_once, "test_ctors_executed_once_set", "", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Get the second kernel
|
||||
kernel_read_global_vars = clCreateKernel(program, "test_ctors_executed_once_read", &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateKernel");
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel_set_global_vars,
|
||||
program_test_ctors_executed_once, "test_ctors_executed_once_set"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Get the second kernel
|
||||
kernel_read_global_vars = clCreateKernel(program, "test_ctors_executed_once_read", &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateKernel");
|
||||
#endif
|
||||
|
||||
// Execute kernel_set_global_vars
|
||||
|
||||
work_size[0] = count;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel_set_global_vars, dim, NULL, work_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
// Execute kernel_read_global_vars 4 times, each time we check if
|
||||
// global variables have correct values.
|
||||
|
||||
// host vector, size == count, output[0...count-1] == 1
|
||||
std::vector<cl_uint> output(count, cl_uint(1));
|
||||
output_buffer =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * output.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
std::fill(output.begin(), output.end(), cl_uint(1));
|
||||
error = clEnqueueWriteBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel_read_global_vars, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
work_size[0] = output.size();
|
||||
error = clEnqueueNDRangeKernel(
|
||||
queue, kernel_read_global_vars,
|
||||
dim, NULL, work_size, NULL,
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
|
||||
if(sum != 0)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(error, "Test test_ctors_executed_onces failed.");
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseMemObject(output_buffer);
|
||||
clReleaseKernel(kernel_set_global_vars);
|
||||
clReleaseKernel(kernel_read_global_vars);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
// TEST3
|
||||
// Verify that when constructor is executed, the ND-range used is (1,1,1).
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
const char * program_test_ctors_ndrange =
|
||||
"__kernel void test_ctors_ndrange(global int *output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" output[gid] = 0;\n"
|
||||
"}\n"
|
||||
;
|
||||
#else
|
||||
const char * program_test_ctors_ndrange =
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
// struct
|
||||
"struct ctor_test_class {\n"
|
||||
// non-trivial ctor
|
||||
" ctor_test_class() {\n"
|
||||
" x = get_global_size(0);\n"
|
||||
" y = get_global_size(1);\n"
|
||||
" z = get_global_size(2);\n"
|
||||
" };\n"
|
||||
" ulong x;\n"
|
||||
" ulong y;\n"
|
||||
" ulong z;\n"
|
||||
// return true if the ND-range used when ctor was exectured was
|
||||
// (1, 1, 1); otherwise - false
|
||||
" bool check() { return (x == 1) && (y == 1) && (z == 1);}"
|
||||
"};\n"
|
||||
// global scope program variables
|
||||
"ctor_test_class global_var0;\n"
|
||||
"ctor_test_class global_var1;\n"
|
||||
|
||||
"__kernel void test_ctors_ndrange(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" int result = 0;\n"
|
||||
" if(!global_var0.check()) result = 1;\n"
|
||||
" if(!global_var1.check()) result = 1;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n"
|
||||
;
|
||||
#endif
|
||||
|
||||
AUTO_TEST_CASE(test_global_scope_ctors_ndrange)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int count)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_mem output_buffer;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
size_t dim = 1;
|
||||
size_t work_size[1];
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
program_test_ctors_ndrange, "test_ctors_ndrange"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
program_test_ctors_ndrange, "test_ctors_ndrange", "", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
program_test_ctors_ndrange, "test_ctors_ndrange"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
// host vector, size == count, output[0...count-1] == 1
|
||||
std::vector<cl_uint> output(count, cl_uint(1));
|
||||
output_buffer =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * output.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clEnqueueWriteBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
work_size[0] = output.size();
|
||||
error = clEnqueueNDRangeKernel(
|
||||
queue, kernel,
|
||||
dim, NULL, work_size, NULL,
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
|
||||
if(sum != 0)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(error, "Test test_ctors_executed_ndrange failed.");
|
||||
}
|
||||
|
||||
clReleaseMemObject(output_buffer);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_HPP
|
||||
@@ -1,185 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_DTORS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_DTORS_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
#include "../common.hpp"
|
||||
|
||||
// Verify queries clGetProgramInfo correctly return the presence of constructors and/or destructors
|
||||
// in the program (using option CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT/CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT)
|
||||
// (both are present, either one is present, none is present).
|
||||
|
||||
std::string generate_ctor_dtor_program(const bool ctor, const bool dtor)
|
||||
{
|
||||
std::string program;
|
||||
if(ctor)
|
||||
{
|
||||
program +=
|
||||
"struct ctor_test_class {\n"
|
||||
// non-trivial ctor
|
||||
" ctor_test_class(int y) { x = y;};\n"
|
||||
" int x;\n"
|
||||
"};\n"
|
||||
"ctor_test_class ctor = ctor_test_class(1024);\n"
|
||||
;
|
||||
}
|
||||
if(dtor)
|
||||
{
|
||||
program +=
|
||||
"struct dtor_test_class {\n"
|
||||
// non-trivial dtor
|
||||
" ~dtor_test_class() { x = -1024; };\n"
|
||||
" int x;\n"
|
||||
"};\n"
|
||||
"dtor_test_class dtor;\n"
|
||||
;
|
||||
}
|
||||
program += "__kernel void test_ctor_dtor()\n {\n }\n";
|
||||
return program;
|
||||
}
|
||||
|
||||
int test_get_program_info_global_ctors_dtors_present(cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
const bool ctor,
|
||||
const bool dtor)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
cl_program program;
|
||||
|
||||
// program source and options
|
||||
std::string options = "";
|
||||
std::string source = generate_ctor_dtor_program(ctor, dtor);
|
||||
const char * source_ptr = source.c_str();
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
// Create program
|
||||
error = create_openclcpp_program(context, &program, 1, &source_ptr, options.c_str());
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return CL_SUCCESS;
|
||||
// Normal run
|
||||
#else
|
||||
// Create program
|
||||
error = create_openclcpp_program(context, &program, 1, &source_ptr, options.c_str());
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
// CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT cl_bool
|
||||
// This indicates that the program object contains non-trivial constructor(s) that will be
|
||||
// executed by runtime before any kernel from the program is executed.
|
||||
|
||||
// CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT cl_bool
|
||||
// This indicates that the program object contains non-trivial destructor(s) that will be
|
||||
// executed by runtime when program is destroyed.
|
||||
|
||||
// CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT
|
||||
cl_bool ctors_present;
|
||||
size_t cl_bool_size;
|
||||
error = clGetProgramInfo(
|
||||
program,
|
||||
CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT,
|
||||
sizeof(cl_bool),
|
||||
static_cast<void*>(&ctors_present),
|
||||
&cl_bool_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
|
||||
if(cl_bool_size != sizeof(cl_bool))
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(-1, "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).", cl_bool_size, sizeof(cl_bool));
|
||||
}
|
||||
if(ctor && ctors_present != CL_TRUE)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 0, should be: 1.");
|
||||
}
|
||||
else if(!ctor && ctors_present == CL_TRUE)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 1, should be: 0.");
|
||||
}
|
||||
|
||||
// CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT
|
||||
cl_bool dtors_present = 0;
|
||||
error = clGetProgramInfo(
|
||||
program,
|
||||
CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT,
|
||||
sizeof(cl_bool),
|
||||
static_cast<void*>(&ctors_present),
|
||||
&cl_bool_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
|
||||
if(cl_bool_size != sizeof(cl_bool))
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(-1, "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).", cl_bool_size, sizeof(cl_bool));
|
||||
}
|
||||
if(dtor && dtors_present != CL_TRUE)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 0, should be: 1.");
|
||||
}
|
||||
else if(!dtor && dtors_present == CL_TRUE)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 1, should be: 0.");
|
||||
}
|
||||
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_global_scope_ctors_dtors_present)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int count)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
// both present
|
||||
last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, true, true);
|
||||
CHECK_ERROR(last_error);
|
||||
error |= last_error;
|
||||
// dtor
|
||||
last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, false, true);
|
||||
CHECK_ERROR(last_error);
|
||||
error |= last_error;
|
||||
// ctor
|
||||
last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, true, false);
|
||||
CHECK_ERROR(last_error);
|
||||
error |= last_error;
|
||||
// none present
|
||||
last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, false, false);
|
||||
CHECK_ERROR(last_error);
|
||||
error |= last_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_DTORS_HPP
|
||||
@@ -1,559 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_DTORS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_API_TEST_DTORS_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
||||
#include "../common.hpp"
|
||||
|
||||
// TEST 1
|
||||
// Verify that destructor is executed.
|
||||
|
||||
// How: destructor of struct dtor_test_class has a side effect: zeroing buffer. If values
|
||||
// in buffer are not zeros after releasing program, destructor was not executed.
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
const char * program_test_dtor_is_executed =
|
||||
"__kernel void test_dtor_is_executed(global uint *output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" output[gid] = 0;\n"
|
||||
"}\n"
|
||||
;
|
||||
#else
|
||||
const char * program_test_dtor_is_executed =
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
// struct
|
||||
"struct dtor_test_class {\n"
|
||||
// non-trivial dtor
|
||||
// set all values in buffer to 0
|
||||
" ~dtor_test_class() {\n"
|
||||
" for(ulong i = 0; i < size; i++)\n"
|
||||
" buffer[i] = 0;\n"
|
||||
" };\n"
|
||||
" global_ptr<uint[]> buffer;\n"
|
||||
" ulong size;\n"
|
||||
"};\n"
|
||||
// global scope program variable
|
||||
"dtor_test_class global_var;\n"
|
||||
|
||||
// values in output __MUST BE__ greater than 0 for the test to work
|
||||
// correctly
|
||||
"__kernel void test_dtor_is_executed(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
// set buffer and size in global var
|
||||
" if(gid == 0){\n"
|
||||
" global_var.buffer = output;\n"
|
||||
" global_var.size = get_global_size(0);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
;
|
||||
#endif
|
||||
|
||||
AUTO_TEST_CASE(test_global_scope_dtor_is_executed)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int count)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_mem output_buffer;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
size_t dim = 1;
|
||||
size_t work_size[1];
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
program_test_dtor_is_executed, "test_dtor_is_executed"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
program_test_dtor_is_executed, "test_dtor_is_executed", "", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
program_test_dtor_is_executed, "test_dtor_is_executed"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
// host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023)
|
||||
// values in output __MUST BE__ greater than 0 for the test to work correctly
|
||||
std::vector<cl_uint> output(count, cl_uint(0xbeefbeef));
|
||||
output_buffer =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * output.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clEnqueueWriteBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
work_size[0] = output.size();
|
||||
error = clEnqueueNDRangeKernel(
|
||||
queue, kernel,
|
||||
dim, NULL, work_size, NULL,
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
// Release kernel and program
|
||||
// Dtor should be called now
|
||||
error = clReleaseKernel(kernel);
|
||||
RETURN_ON_CL_ERROR(error, "clReleaseKernel")
|
||||
error = clReleaseProgram(program);
|
||||
RETURN_ON_CL_ERROR(error, "clReleaseProgram")
|
||||
|
||||
// Finish
|
||||
error = clFinish(queue);
|
||||
RETURN_ON_CL_ERROR(error, "clFinish")
|
||||
|
||||
// Read output buffer
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
|
||||
if(sum != 0)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(error, "Test test_dtor_is_executed failed.");
|
||||
}
|
||||
|
||||
clReleaseMemObject(output_buffer);
|
||||
return error;
|
||||
}
|
||||
|
||||
// TEST 2
|
||||
// Verify that multiple destructors, if present, are executed. Order between multiple
|
||||
// destructors is undefined.
|
||||
// Verify that each destructor is executed only once.
|
||||
|
||||
// How:
|
||||
// 0) dtor_test_class struct has a global pointer to a buffer, it's set by
|
||||
// test_dtors_executed_once kernel.
|
||||
// 1) Destructors have a side effect: each dtor writes to its part of the buffer. If all
|
||||
// dtors are executed, all values in that buffer should be changed.
|
||||
// 2) The first time destructors are executed, they set their parts of the buffer to zero.
|
||||
// Next time to 1, next time to 2 etc. Since dtors should be executed only once, all
|
||||
// values in that buffer should be equal to zero.
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
const char * program_test_dtors_executed_once =
|
||||
"__kernel void test_dtors_executed_once(global uint *output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" output[gid] = 0;\n"
|
||||
"}\n"
|
||||
;
|
||||
#else
|
||||
const char * program_test_dtors_executed_once =
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
// struct
|
||||
"struct dtor_test_class {\n"
|
||||
// non-trivial dtor
|
||||
// Set all values in range [start; end - 1] in buffer to counter.
|
||||
// If dtor is executed only once (correct), all values in range
|
||||
// [start; end - 1] in buffer should be equal to zero after releasing
|
||||
// the program
|
||||
" ~dtor_test_class() {\n"
|
||||
" for(ulong i = start; i < end; i++){\n"
|
||||
" buffer[i] = counter;\n"
|
||||
" };\n"
|
||||
" counter++;\n"
|
||||
" };\n"
|
||||
" global_ptr<uint[]> buffer;\n"
|
||||
" ulong start;\n"
|
||||
" ulong end;\n"
|
||||
" ulong counter;\n"
|
||||
"};\n"
|
||||
// global scope program variables
|
||||
"dtor_test_class global_var0;\n"
|
||||
"dtor_test_class global_var1;\n"
|
||||
"dtor_test_class global_var2;\n"
|
||||
"dtor_test_class global_var3;\n"
|
||||
|
||||
// values in output __MUST BE__ greater than 0 for the test to work correctly
|
||||
"__kernel void test_dtors_executed_once(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
// set buffer and size in global var
|
||||
" if(gid == 0){\n"
|
||||
" ulong end = get_global_size(0) / 4;"
|
||||
// global_var0
|
||||
" global_var0.buffer = output;\n"
|
||||
" global_var0.start = 0;\n"
|
||||
" global_var0.end = end;\n"
|
||||
" global_var0.counter = 0;\n"
|
||||
// global_var1
|
||||
" global_var1.buffer = output;\n"
|
||||
" global_var1.start = end;\n"
|
||||
" end += get_global_size(0) / 4;\n"
|
||||
" global_var1.end = end;\n"
|
||||
" global_var1.counter = 0;\n"
|
||||
// global_var2
|
||||
" global_var2.buffer = output;\n"
|
||||
" global_var2.start = end;\n"
|
||||
" end += get_global_size(0) / 4;\n"
|
||||
" global_var2.end = end;\n"
|
||||
" global_var2.counter = 0;\n"
|
||||
// global_var3
|
||||
" global_var3.buffer = output;\n"
|
||||
" global_var3.start = end;\n"
|
||||
" global_var3.end = get_global_size(0);\n"
|
||||
" global_var3.counter = 0;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
;
|
||||
#endif
|
||||
|
||||
AUTO_TEST_CASE(test_global_scope_dtors_executed_once)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int count)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_mem output_buffer;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
size_t dim = 1;
|
||||
size_t work_size[1];
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
program_test_dtors_executed_once, "test_dtors_executed_once"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
program_test_dtors_executed_once, "test_dtors_executed_once", "", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
program_test_dtors_executed_once, "test_dtors_executed_once"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
// host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023)
|
||||
// values in output __MUST BE__ greater than 0 for the test to work correctly
|
||||
cl_uint init_value = cl_uint(0xbeefbeef);
|
||||
std::vector<cl_uint> output(count, init_value);
|
||||
output_buffer =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * output.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clEnqueueWriteBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
work_size[0] = output.size();
|
||||
error = clEnqueueNDRangeKernel(
|
||||
queue, kernel,
|
||||
dim, NULL, work_size, NULL,
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
|
||||
// Increments the program reference count. Twice
|
||||
error = clRetainProgram(program);
|
||||
RETURN_ON_CL_ERROR(error, "clRetainProgram")
|
||||
error = clRetainProgram(program);
|
||||
RETURN_ON_CL_ERROR(error, "clRetainProgram")
|
||||
|
||||
// Should just decrement the program reference count.
|
||||
error = clReleaseProgram(program);
|
||||
RETURN_ON_CL_ERROR(error, "clReleaseProgram")
|
||||
error = clFinish(queue);
|
||||
RETURN_ON_CL_ERROR(error, "clFinish")
|
||||
|
||||
// Should just decrement the program reference count.
|
||||
error = clReleaseProgram(program);
|
||||
RETURN_ON_CL_ERROR(error, "clReleaseProgram")
|
||||
error = clFinish(queue);
|
||||
RETURN_ON_CL_ERROR(error, "clFinish")
|
||||
|
||||
#ifndef USE_OPENCLC_KERNELS
|
||||
// At this point global scope variables should not be destroyed,
|
||||
// values in output buffer should not be modified.
|
||||
|
||||
// Read output buffer
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
for(auto& i : output)
|
||||
{
|
||||
if(i != init_value)
|
||||
{
|
||||
log_error("ERROR: Test test_global_scope_dtors_executed_once failed.");
|
||||
log_error("\tDestructors were executed prematurely.\n");
|
||||
RETURN_ON_ERROR(-1)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Release kernel and program, destructors should be called now
|
||||
error = clReleaseKernel(kernel);
|
||||
RETURN_ON_CL_ERROR(error, "clReleaseKernel")
|
||||
error = clReleaseProgram(program);
|
||||
RETURN_ON_CL_ERROR(error, "clReleaseProgram")
|
||||
|
||||
// Finish
|
||||
error = clFinish(queue);
|
||||
RETURN_ON_CL_ERROR(error, "clFinish")
|
||||
|
||||
// Read output buffer
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
|
||||
if(sum != 0)
|
||||
{
|
||||
log_error("ERROR: Test test_global_scope_dtors_executed_once failed.");
|
||||
// Maybe some dtors were not run?
|
||||
for(auto& i : output)
|
||||
{
|
||||
if(i == init_value)
|
||||
{
|
||||
log_error("\tSome dtors were not executed.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
log_error("\n");
|
||||
RETURN_ON_ERROR(-1)
|
||||
}
|
||||
|
||||
// Clean
|
||||
clReleaseMemObject(output_buffer);
|
||||
return error;
|
||||
}
|
||||
|
||||
// TEST3
|
||||
// Verify that ND-range during destructor execution is set to (1,1,1)
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
const char * program_test_dtor_ndrange =
|
||||
"__kernel void test_dtor_ndrange(global uint *output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" output[gid] = 0;\n"
|
||||
"}\n"
|
||||
;
|
||||
#else
|
||||
const char * program_test_dtor_ndrange =
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
// struct
|
||||
"struct dtor_test_class {\n"
|
||||
// non-trivial dtor
|
||||
// set all values in buffer to 0 only if ND-range is (1, 1, 1)
|
||||
" ~dtor_test_class() {\n"
|
||||
" if(check()){\n"
|
||||
" for(ulong i = 0; i < size; i++)\n"
|
||||
" buffer[i] = 0;\n"
|
||||
" }\n"
|
||||
" };\n"
|
||||
// return true if the ND-range is (1, 1, 1); otherwise - false
|
||||
" bool check() {\n"
|
||||
" return (get_global_size(0) == 1)"
|
||||
" && (get_global_size(1) == 1)"
|
||||
" && (get_global_size(2) == 1);\n"
|
||||
" }"
|
||||
" ulong size;\n"
|
||||
" global_ptr<uint[]> buffer;\n"
|
||||
"};\n"
|
||||
// global scope program variable
|
||||
"dtor_test_class global_var;\n"
|
||||
|
||||
// values in output __MUST BE__ greater than 0 for the test to work correctly
|
||||
"__kernel void test_dtor_ndrange(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
// set buffer and size in global var
|
||||
" if(gid == 0){\n"
|
||||
" global_var.buffer = output;\n"
|
||||
" global_var.size = get_global_size(0);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
;
|
||||
#endif
|
||||
|
||||
AUTO_TEST_CASE(test_global_scope_dtor_ndrange)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int count)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_mem output_buffer;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
size_t dim = 1;
|
||||
size_t work_size[1];
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
program_test_dtor_ndrange, "test_dtor_ndrange"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
program_test_dtor_ndrange, "test_dtor_ndrange", "", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
program_test_dtor_ndrange, "test_dtor_ndrange"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
// host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023)
|
||||
// values in output __MUST BE__ greater than 0 for the test to work correctly
|
||||
std::vector<cl_uint> output(count, cl_uint(0xbeefbeef));
|
||||
output_buffer =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * output.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clEnqueueWriteBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
work_size[0] = output.size();
|
||||
error = clEnqueueNDRangeKernel(
|
||||
queue, kernel,
|
||||
dim, NULL, work_size, NULL,
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
// Release kernel and program
|
||||
// Dtor should be called now
|
||||
error = clReleaseKernel(kernel);
|
||||
RETURN_ON_CL_ERROR(error, "clReleaseKernel")
|
||||
error = clReleaseProgram(program);
|
||||
RETURN_ON_CL_ERROR(error, "clReleaseProgram")
|
||||
|
||||
// Finish
|
||||
error = clFinish(queue);
|
||||
RETURN_ON_CL_ERROR(error, "clFinish")
|
||||
|
||||
// Read output buffer
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
|
||||
if(sum != 0)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(error, "Test test_dtor_ndrange failed.");
|
||||
}
|
||||
|
||||
clReleaseMemObject(output_buffer);
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_API_TEST_DTORS_HPP
|
||||
@@ -1,480 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_SPEC_CONSTS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_API_TEST_SPEC_CONSTS_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
#include "../common.hpp"
|
||||
|
||||
// TEST 1
|
||||
// Verify that if left unset the specialization constant defaults to the default value set in SPIR-V (zero).
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
const char * kernel_test_spec_consts_defaults =
|
||||
"__kernel void test_spec_consts_defaults(global int *output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" output[gid] = 0;\n"
|
||||
"}\n"
|
||||
;
|
||||
#else
|
||||
const char * kernel_test_spec_consts_defaults =
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_spec_constant>\n"
|
||||
"using namespace cl;\n"
|
||||
"spec_constant<char, 1> spec1(0);\n"
|
||||
"spec_constant<uchar, 2> spec2(0);\n"
|
||||
"spec_constant<short, 3> spec3(0);\n"
|
||||
"spec_constant<ushort,4> spec4(0);\n"
|
||||
"spec_constant<int, 5> spec5(0);\n"
|
||||
"spec_constant<uint, 6> spec6(0);\n"
|
||||
"spec_constant<long, 7> spec7(0);\n"
|
||||
"spec_constant<ulong, 8> spec8(0);\n"
|
||||
"spec_constant<float, 9> spec9(0.0f);\n"
|
||||
"#ifdef cl_khr_fp64\n"
|
||||
"spec_constant<double, 10> spec10(0.0);\n"
|
||||
"#endif\n"
|
||||
"#ifdef cl_khr_fp16\n"
|
||||
"spec_constant<half, 11> spec11(0.0h);\n"
|
||||
"#endif\n"
|
||||
"__kernel void test_spec_consts_defaults(global_ptr<int[]> output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" int result = 0;\n"
|
||||
" if(get(spec1) != char(0)) result = 1;\n"
|
||||
" if(get(spec2) != uchar(0)) result = 1;\n"
|
||||
" if(get(spec3) != short(0)) result = 1;\n"
|
||||
" if(get(spec4) != ushort(0)) result = 1;\n"
|
||||
" if(get(spec5) != int(0)) result = 1;\n"
|
||||
" if(get(spec6) != uint(0)) result = 1;\n"
|
||||
" if(get(spec7) != long(0)) result = 1;\n"
|
||||
" if(get(spec8) != ulong(0)) result = 1;\n"
|
||||
" if(get(spec9) != float(0)) result = 1;\n"
|
||||
"#ifdef cl_khr_fp64\n"
|
||||
" if(get(spec10) != double(0)) result = 1;\n"
|
||||
"#endif\n"
|
||||
"#ifdef cl_khr_fp16\n"
|
||||
" if(get(spec11) != half(0)) result = 1;\n"
|
||||
"#endif\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n"
|
||||
;
|
||||
#endif
|
||||
|
||||
AUTO_TEST_CASE(test_spec_consts_defaults)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int count)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_mem output_buffer;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
size_t dim = 1;
|
||||
size_t work_size[1];
|
||||
|
||||
std::string options = "";
|
||||
if(is_extension_available(device, "cl_khr_fp16"))
|
||||
{
|
||||
options += " -cl-fp16-enable";
|
||||
}
|
||||
if(is_extension_available(device, "cl_khr_fp64"))
|
||||
{
|
||||
options += " -cl-fp64-enable";
|
||||
}
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", options);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", "", false);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
// Spec constants are NOT set before clBuildProgram (called in create_opencl_kernel), so
|
||||
// they all should default to the default value set in SPIR-V (zero).
|
||||
error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", options);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
// host vector, size == 1, output[0] == 1
|
||||
std::vector<cl_int> output(1, cl_int(1));
|
||||
output_buffer =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_int) * output.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
work_size[0] = output.size();
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKerne")
|
||||
|
||||
error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
// if output[0] != 0, then some spec constant(s) did not default to zero.
|
||||
if(output[0] != 0)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_defaults failed, output[0]: %d.", output[0])
|
||||
}
|
||||
|
||||
clReleaseMemObject(output_buffer);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
// TEST 2
|
||||
// Verify that setting an existing specialization constant affects only
|
||||
// the value of that constant and not of other specialization constants.
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
const char * kernel_test_spec_consts_many_constants =
|
||||
"__kernel void test_spec_consts_many_constants(global int *output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" output[gid] = 0;\n"
|
||||
"}\n"
|
||||
;
|
||||
#else
|
||||
const char * kernel_test_spec_consts_many_constants =
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_spec_constant>\n"
|
||||
"using namespace cl;\n"
|
||||
"spec_constant<int, 1> spec1(0);\n"
|
||||
"spec_constant<int, 2> spec2(0);\n"
|
||||
"spec_constant<int, 3> spec3(0);\n"
|
||||
"__kernel void test_spec_consts_defaults(global_ptr<int[]> output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" int result = 0;\n"
|
||||
" if(get(spec1) != int(-1024)) result += 1;\n"
|
||||
" if(get(spec2) != int(0)) result += 2;\n"
|
||||
" if(get(spec3) != int(1024)) result += 4;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n"
|
||||
;
|
||||
#endif
|
||||
|
||||
AUTO_TEST_CASE(test_spec_consts_many_constants)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int count)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_mem output_buffer;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
size_t dim = 1;
|
||||
size_t work_size[1];
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants", "", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
// Create program
|
||||
error = create_openclcpp_program(context, &program, 1, &kernel_test_spec_consts_many_constants);
|
||||
RETURN_ON_ERROR(error)
|
||||
|
||||
// Set specialization constants
|
||||
|
||||
// clSetProgramSpecializationConstant(
|
||||
// cl_program /* program */, cl_uint /* spec_id */, size_t /* spec_size */,const void* /* spec_value */
|
||||
// )
|
||||
cl_int spec1 = -1024;
|
||||
cl_int spec3 = 1024;
|
||||
// Set spec1
|
||||
error = clSetProgramSpecializationConstant(program, cl_uint(1), sizeof(cl_int), static_cast<void*>(&spec1));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
// Specialization constant spec2 should default to zero
|
||||
// Set spec3
|
||||
error = clSetProgramSpecializationConstant(program, cl_uint(3), sizeof(cl_int), static_cast<void*>(&spec3));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
|
||||
// Build program and create kernel
|
||||
error = build_program_create_kernel_helper(
|
||||
context, &program, &kernel, 1, &kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
// host vector, size == 1, output[0] == 1
|
||||
std::vector<cl_int> output(1, cl_int(1));
|
||||
output_buffer =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_int) * output.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
work_size[0] = output.size();
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
// if output[0] != 0, then values of spec constants were incorrect
|
||||
if(output[0] != 0)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_many_constants failed, output[0]: %d.", output[0]);
|
||||
}
|
||||
|
||||
clReleaseMemObject(output_buffer);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
// TEST 3
|
||||
// Verify that the API correctly handles the size of a specialization constant by exercising
|
||||
// the API for specialization constants of different types (int, bool, float, etc.)
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
const char * kernel_test_spec_consts_different_types =
|
||||
"__kernel void test_spec_consts_different_types(global int *output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" output[gid] = 0;\n"
|
||||
"}\n"
|
||||
;
|
||||
#else
|
||||
const char * kernel_test_spec_consts_different_types =
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_spec_constant>\n"
|
||||
"#include <opencl_limits>\n"
|
||||
"using namespace cl;\n"
|
||||
"spec_constant<char, 1> spec1(0);\n"
|
||||
"spec_constant<uchar, 2> spec2(0);\n"
|
||||
"spec_constant<short, 3> spec3(0);\n"
|
||||
"spec_constant<ushort,4> spec4(0);\n"
|
||||
"spec_constant<int, 5> spec5(0);\n"
|
||||
"spec_constant<uint, 6> spec6(0);\n"
|
||||
"spec_constant<long, 7> spec7(0);\n"
|
||||
"spec_constant<ulong, 8> spec8(0);\n"
|
||||
"spec_constant<float, 9> spec9(0.0f);\n"
|
||||
"#ifdef cl_khr_fp64\n"
|
||||
"spec_constant<double, 10> spec10(0.0);\n"
|
||||
"#endif\n"
|
||||
"#ifdef cl_khr_fp16\n"
|
||||
"spec_constant<half, 11> spec11(0.0h);\n"
|
||||
"#endif\n"
|
||||
"__kernel void test_spec_consts_different_types(global_ptr<int[]> output)\n"
|
||||
"{\n"
|
||||
" ulong gid = get_global_id(0);\n"
|
||||
" int result = 0;\n"
|
||||
" if(get(spec1) != char(CHAR_MAX)) result += 1;\n"
|
||||
" if(get(spec2) != uchar(UCHAR_MAX)) result += 2;\n"
|
||||
" if(get(spec3) != short(SHRT_MAX)) result += 4;\n"
|
||||
" if(get(spec4) != ushort(USHRT_MAX)) result += 8;\n"
|
||||
" if(get(spec5) != int(INT_MAX)) result += 16;\n"
|
||||
" if(get(spec6) != uint(UINT_MAX)) result += 32;\n"
|
||||
" if(get(spec7) != long(LONG_MAX)) result += 64;\n"
|
||||
" if(get(spec8) != ulong(ULONG_MAX)) result += 128;\n"
|
||||
" if(get(spec9) != float(FLT_MAX)) result += 256;\n"
|
||||
"#ifdef cl_khr_fp64\n"
|
||||
" if(get(spec10) != double(DBL_MAX)) result += 512;\n"
|
||||
"#endif\n"
|
||||
"#ifdef cl_khr_fp16\n"
|
||||
" if(get(spec11) != half(HALF_MAX)) result += 1024;\n"
|
||||
"#endif\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n"
|
||||
;
|
||||
#endif
|
||||
|
||||
|
||||
AUTO_TEST_CASE(test_spec_consts_different_types)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int count)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_mem output_buffer;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
size_t dim = 1;
|
||||
size_t work_size[1];
|
||||
|
||||
std::string options = "";
|
||||
if(is_extension_available(device, "cl_khr_fp16"))
|
||||
{
|
||||
options += " -cl-fp16-enable";
|
||||
}
|
||||
if(is_extension_available(device, "cl_khr_fp64"))
|
||||
{
|
||||
options += " -cl-fp64-enable";
|
||||
}
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_different_types, "test_spec_consts_different_types", options);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_different_types, "test_spec_consts_different_types", "", false);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
// Create program
|
||||
error = create_openclcpp_program(context, &program, 1, &kernel_test_spec_consts_different_types, options.c_str());
|
||||
RETURN_ON_ERROR(error)
|
||||
|
||||
// Set specialization constants
|
||||
cl_uint spec_id = 1;
|
||||
|
||||
cl_char spec1 = CL_CHAR_MAX;
|
||||
cl_uchar spec2 = CL_UCHAR_MAX;
|
||||
cl_short spec3 = CL_SHRT_MAX;
|
||||
cl_ushort spec4 = CL_USHRT_MAX;
|
||||
cl_int spec5 = CL_INT_MAX;
|
||||
cl_uint spec6 = CL_UINT_MAX;
|
||||
cl_long spec7 = CL_LONG_MAX;
|
||||
cl_ulong spec8 = CL_ULONG_MAX;
|
||||
cl_float spec9 = CL_FLT_MAX;
|
||||
cl_double spec10 = CL_DBL_MAX;
|
||||
cl_half spec11 = CL_HALF_MAX;
|
||||
|
||||
// Set spec1
|
||||
error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_char), static_cast<void*>(&spec1));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
// Set spec2
|
||||
error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_uchar), static_cast<void*>(&spec2));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
// Set spec3
|
||||
error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_short), static_cast<void*>(&spec3));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
// Set spec4
|
||||
error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_ushort), static_cast<void*>(&spec4));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
// Set spec5
|
||||
error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_int), static_cast<void*>(&spec5));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
// Set spec6
|
||||
error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_uint), static_cast<void*>(&spec6));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
// Set spec7
|
||||
error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_long), static_cast<void*>(&spec7));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
// Set spec8
|
||||
error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_ulong), static_cast<void*>(&spec8));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
// Set spec9
|
||||
error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_float), static_cast<void*>(&spec9));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
// Set spec10
|
||||
if(is_extension_available(device, "cl_khr_fp64"))
|
||||
{
|
||||
error = clSetProgramSpecializationConstant(program, cl_uint(10), sizeof(cl_double), static_cast<void*>(&spec10));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
}
|
||||
// Set spec11
|
||||
if(is_extension_available(device, "cl_khr_fp16"))
|
||||
{
|
||||
error = clSetProgramSpecializationConstant(program, cl_uint(11), sizeof(cl_half), static_cast<void*>(&spec11));
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
}
|
||||
|
||||
// Build program and create kernel
|
||||
error = build_program_create_kernel_helper(
|
||||
context, &program, &kernel, 1, &kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants"
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
// Copy output to output_buffer, run kernel, copy output_buffer back to output, check result
|
||||
|
||||
// host vector, size == 1, output[0] == 1
|
||||
std::vector<cl_int> output(1, cl_int(1));
|
||||
output_buffer =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_int) * output.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
work_size[0] = output.size();
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
// if output[0] != 0, then some spec constants had incorrect values
|
||||
if(output[0] != 0)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_different_types failed, output[0]: %d.", output[0])
|
||||
}
|
||||
|
||||
clReleaseMemObject(output_buffer);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_API_TEST_SPEC_CONSTS_HPP
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_ATOMICS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,308 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_ATOMICS_ATOMIC_FETCH_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_ATOMICS_ATOMIC_FETCH_HPP
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
|
||||
const size_t atomic_bucket_size = 100;
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
template <class func_type, class type>
|
||||
std::string generate_kernel_atomic_fetch(func_type func)
|
||||
{
|
||||
std::string in1_value = "input[gid]";
|
||||
std::string out1_value = "output[gid / " + std::to_string(atomic_bucket_size) + "]";
|
||||
std::string function_call = "atomic_" + func.str() + "(&" + out1_value + ", " + in1_value + ")";
|
||||
return
|
||||
"" + func.defs() +
|
||||
"__kernel void test_" + func.str() + "(global " + type_name<type>() + " *input, global atomic_" + type_name<type>() + " *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" " + function_call + ";\n"
|
||||
"}\n";
|
||||
}
|
||||
#else
|
||||
template <class func_type, class type>
|
||||
std::string generate_kernel_atomic_fetch(func_type func)
|
||||
{
|
||||
std::string in1_value = "input[gid]";
|
||||
std::string out1_value = "output[gid / " + std::to_string(atomic_bucket_size) + "]";
|
||||
std::string function_call = func.str() + "(" + in1_value + ")";
|
||||
return
|
||||
"" + func.defs() +
|
||||
"" + func.headers() +
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_" + func.str() + "(global_ptr<" + type_name<type>() + "[]> input,"
|
||||
"global_ptr<atomic<" + type_name<type>() + ">[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" " + out1_value + "." + function_call + ";\n"
|
||||
"}\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
template<class TYPE, class atomic_fetch>
|
||||
bool verify_atomic_fetch(const std::vector<TYPE> &in, const std::vector<TYPE> &out, atomic_fetch op)
|
||||
{
|
||||
for (size_t i = 0; i < out.size(); i++)
|
||||
{
|
||||
TYPE expected = op.init_out();
|
||||
for (size_t k = 0; k < atomic_bucket_size; k++)
|
||||
{
|
||||
const size_t in_i = i * atomic_bucket_size + k;
|
||||
if (in_i >= in.size())
|
||||
break;
|
||||
expected = op(expected, in[in_i]);
|
||||
}
|
||||
if (expected != out[i])
|
||||
{
|
||||
print_error_msg(expected, out[i], i, op);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class atomic_fetch>
|
||||
int test_atomic_fetch_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, atomic_fetch op)
|
||||
{
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t work_size[1];
|
||||
int err;
|
||||
|
||||
typedef typename atomic_fetch::in_type TYPE;
|
||||
|
||||
// Don't run test for unsupported types
|
||||
if (!(type_supported<TYPE>(device)))
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
if (sizeof(TYPE) == 8 &&
|
||||
(!is_extension_available(device, "cl_khr_int64_base_atomics") ||
|
||||
!is_extension_available(device, "cl_khr_int64_extended_atomics")))
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
std::string code_str = generate_kernel_atomic_fetch<atomic_fetch, TYPE>(op);
|
||||
std::string kernel_name("test_"); kernel_name += op.str();
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
|
||||
RETURN_ON_ERROR(err)
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
std::vector<TYPE> input = generate_input<TYPE>(count, op.min1(), op.max1(), std::vector<TYPE>());
|
||||
std::vector<TYPE> output = generate_output<TYPE>((count - 1) / atomic_bucket_size + 1);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(TYPE) * input.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer")
|
||||
|
||||
buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(TYPE) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer")
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer")
|
||||
|
||||
const TYPE pattern = op.init_out();
|
||||
err = clEnqueueFillBuffer(queue, buffers[1], &pattern, sizeof(pattern), 0, sizeof(TYPE) * output.size(), 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueFillBuffer")
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg")
|
||||
err = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg")
|
||||
|
||||
work_size[0] = count;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(TYPE) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer")
|
||||
|
||||
if (!verify_atomic_fetch(input, output, op))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "test_%s %s failed", op.str().c_str(), type_name<TYPE>().c_str());
|
||||
}
|
||||
log_info("test_%s %s passed\n", op.str().c_str(), type_name<TYPE>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
template<class TYPE>
|
||||
struct atomic_fetch
|
||||
{
|
||||
typedef TYPE in_type;
|
||||
|
||||
std::string decl_str()
|
||||
{
|
||||
return type_name<TYPE>();
|
||||
}
|
||||
|
||||
std::string defs()
|
||||
{
|
||||
std::string defs;
|
||||
if (sizeof(TYPE) == 8)
|
||||
{
|
||||
defs += "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n";
|
||||
defs += "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
|
||||
}
|
||||
return defs;
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_atomic>\n";
|
||||
}
|
||||
|
||||
TYPE min1()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
TYPE max1()
|
||||
{
|
||||
return 1000;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#define DEF_ATOMIC_FETCH_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION, INIT_OUT) \
|
||||
template<class TYPE> \
|
||||
struct CLASS_NAME : public atomic_fetch<TYPE> \
|
||||
{ \
|
||||
std::string str() \
|
||||
{ \
|
||||
return #FUNC_NAME; \
|
||||
} \
|
||||
\
|
||||
TYPE init_out() \
|
||||
{ \
|
||||
return INIT_OUT; \
|
||||
} \
|
||||
\
|
||||
TYPE operator()(const TYPE& x, const TYPE& y) \
|
||||
{ \
|
||||
return HOST_FUNC_EXPRESSION; \
|
||||
} \
|
||||
};
|
||||
|
||||
DEF_ATOMIC_FETCH_FUNC(atomic_fetch_add, fetch_add, x + y, 0)
|
||||
DEF_ATOMIC_FETCH_FUNC(atomic_fetch_sub, fetch_sub, x - y, (std::numeric_limits<TYPE>::max)())
|
||||
|
||||
DEF_ATOMIC_FETCH_FUNC(atomic_fetch_and, fetch_and, x & y, (std::numeric_limits<TYPE>::max)())
|
||||
DEF_ATOMIC_FETCH_FUNC(atomic_fetch_or, fetch_or, x | y, 0)
|
||||
DEF_ATOMIC_FETCH_FUNC(atomic_fetch_xor, fetch_xor, x ^ y, 0)
|
||||
|
||||
DEF_ATOMIC_FETCH_FUNC(atomic_fetch_max, fetch_max, (std::max)(x, y), 0)
|
||||
DEF_ATOMIC_FETCH_FUNC(atomic_fetch_min, fetch_min, (std::min)(x, y), (std::numeric_limits<TYPE>::max)())
|
||||
|
||||
#undef DEF_ATOMIC_FETCH_FUNC
|
||||
|
||||
|
||||
AUTO_TEST_CASE(test_atomic_fetch)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
#define TEST_ATOMIC_MACRO(TEST_CLASS) \
|
||||
last_error = test_atomic_fetch_func( \
|
||||
device, context, queue, n_elems, TEST_CLASS \
|
||||
); \
|
||||
CHECK_ERROR(last_error) \
|
||||
error |= last_error;
|
||||
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_add<cl_int>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_add<cl_uint>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_add<cl_long>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_add<cl_ulong>()))
|
||||
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_int>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_uint>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_long>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_ulong>()))
|
||||
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_and<cl_int>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_and<cl_uint>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_and<cl_long>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_and<cl_ulong>()))
|
||||
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_or<cl_int>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_or<cl_uint>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_or<cl_long>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_or<cl_ulong>()))
|
||||
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_int>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_uint>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_long>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_ulong>()))
|
||||
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_max<cl_int>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_max<cl_uint>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_max<cl_long>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_max<cl_ulong>()))
|
||||
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_min<cl_int>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_min<cl_uint>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_min<cl_long>()))
|
||||
TEST_ATOMIC_MACRO((atomic_fetch_min<cl_ulong>()))
|
||||
|
||||
#undef TEST_ATOMIC_MACRO
|
||||
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_ATOMICS_ATOMIC_FETCH_HPP
|
||||
@@ -1,25 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "atomic_fetch.hpp"
|
||||
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_ATTRIBUTES)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,27 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "test_ivdep.hpp"
|
||||
#include "test_max_size.hpp"
|
||||
#include "test_required_num_sub_groups.hpp"
|
||||
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,418 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_IVDEP_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_IVDEP_HPP
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
|
||||
|
||||
namespace test_ivdep {
|
||||
|
||||
enum class loop_kind
|
||||
{
|
||||
for_loop,
|
||||
while_loop,
|
||||
do_loop
|
||||
};
|
||||
|
||||
struct test_options
|
||||
{
|
||||
loop_kind loop;
|
||||
int ivdep_length;
|
||||
int offset1;
|
||||
int offset2;
|
||||
int iter_count;
|
||||
bool offset1_param;
|
||||
bool offset2_param;
|
||||
bool iter_count_param;
|
||||
bool cond_in_header;
|
||||
bool init_in_header;
|
||||
bool incr_in_header;
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
std::string generate_source(test_options options)
|
||||
{
|
||||
std::string offset1s = options.offset1_param ? "offset1" : std::to_string(options.offset1);
|
||||
std::string offset2s = options.offset2_param ? "offset2" : std::to_string(options.offset2);
|
||||
|
||||
std::string init = "i = 0";
|
||||
std::string cond = std::string("i < ") + (options.iter_count_param ? "iter_count" : std::to_string(options.iter_count));
|
||||
std::string incr = "i += 2";
|
||||
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
kernel void test(global int *a, global int *b, global int *c, int offset1, int offset2, int iter_count)
|
||||
{
|
||||
int i;
|
||||
)";
|
||||
|
||||
// Loop #1
|
||||
if (!options.init_in_header) s << init << ";" << std::endl;
|
||||
if (options.loop == loop_kind::for_loop)
|
||||
s << "for (" <<
|
||||
(options.init_in_header ? init : "") << ";" <<
|
||||
(options.cond_in_header ? cond : "") << ";" <<
|
||||
(options.incr_in_header ? incr : "") << ")";
|
||||
else if (options.loop == loop_kind::while_loop)
|
||||
s << "while (" << (options.cond_in_header ? cond : "true") << ")";
|
||||
else if (options.loop == loop_kind::do_loop)
|
||||
s << "do";
|
||||
s << "{" << std::endl;
|
||||
if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
|
||||
s << "a[i + " << offset1s << "] = b[i + " << offset1s << "] * c[i + " << offset1s << "];" << std::endl;
|
||||
if (!options.incr_in_header) s << incr << ";" << std::endl;
|
||||
s << "}" << std::endl;
|
||||
if (options.loop == loop_kind::do_loop)
|
||||
s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
|
||||
|
||||
// Loop #2
|
||||
if (!options.init_in_header) s << init << ";" << std::endl;
|
||||
if (options.loop == loop_kind::for_loop)
|
||||
s << "for (" <<
|
||||
(options.init_in_header ? init : "") << ";" <<
|
||||
(options.cond_in_header ? cond : "") << ";" <<
|
||||
(options.incr_in_header ? incr : "") << ")";
|
||||
else if (options.loop == loop_kind::while_loop)
|
||||
s << "while (" << (options.cond_in_header ? cond : "true") << ")";
|
||||
else if (options.loop == loop_kind::do_loop)
|
||||
s << "do";
|
||||
s << "{" << std::endl;
|
||||
if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
|
||||
s << "a[i + " << offset2s << "] = a[i] + b[i];" << std::endl;
|
||||
if (!options.incr_in_header) s << incr << ";" << std::endl;
|
||||
s << "}" << std::endl;
|
||||
if (options.loop == loop_kind::do_loop)
|
||||
s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
|
||||
|
||||
s << "}" << std::endl;
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#else
|
||||
std::string generate_source(test_options options)
|
||||
{
|
||||
std::string offset1s = options.offset1_param ? "offset1" : std::to_string(options.offset1);
|
||||
std::string offset2s = options.offset2_param ? "offset2" : std::to_string(options.offset2);
|
||||
|
||||
std::string init = "i = 0";
|
||||
std::string cond = std::string("i < ") + (options.iter_count_param ? "iter_count" : std::to_string(options.iter_count));
|
||||
std::string incr = "i += 2";
|
||||
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
#include <opencl_memory>
|
||||
#include <opencl_work_item>
|
||||
|
||||
using namespace cl;
|
||||
)";
|
||||
s << R"(
|
||||
kernel void test(global_ptr<int[]> a, global_ptr<int[]> b, global_ptr<int[]> c, int offset1, int offset2, int iter_count)
|
||||
{
|
||||
int i;
|
||||
)";
|
||||
|
||||
// Loop #1
|
||||
if (!options.init_in_header) s << init << ";" << std::endl;
|
||||
if (options.ivdep_length > 0) s << "[[cl::ivdep]]" << std::endl;
|
||||
if (options.loop == loop_kind::for_loop)
|
||||
s << "for (" <<
|
||||
(options.init_in_header ? init : "") << ";" <<
|
||||
(options.cond_in_header ? cond : "") << ";" <<
|
||||
(options.incr_in_header ? incr : "") << ")";
|
||||
else if (options.loop == loop_kind::while_loop)
|
||||
s << "while (" << (options.cond_in_header ? cond : "true") << ")";
|
||||
else if (options.loop == loop_kind::do_loop)
|
||||
s << "do";
|
||||
s << "{" << std::endl;
|
||||
if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
|
||||
s << "a[i + " << offset1s << "] = b[i + " << offset1s << "] * c[i + " << offset1s << "];" << std::endl;
|
||||
if (!options.incr_in_header) s << incr << ";" << std::endl;
|
||||
s << "}" << std::endl;
|
||||
if (options.loop == loop_kind::do_loop)
|
||||
s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
|
||||
|
||||
// Loop #2
|
||||
if (!options.init_in_header) s << init << ";" << std::endl;
|
||||
if (options.ivdep_length > 0) s << "[[cl::ivdep(" << options.ivdep_length << ")]]" << std::endl;
|
||||
if (options.loop == loop_kind::for_loop)
|
||||
s << "for (" <<
|
||||
(options.init_in_header ? init : "") << ";" <<
|
||||
(options.cond_in_header ? cond : "") << ";" <<
|
||||
(options.incr_in_header ? incr : "") << ")";
|
||||
else if (options.loop == loop_kind::while_loop)
|
||||
s << "while (" << (options.cond_in_header ? cond : "true") << ")";
|
||||
else if (options.loop == loop_kind::do_loop)
|
||||
s << "do";
|
||||
s << "{" << std::endl;
|
||||
if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
|
||||
s << "a[i + " << offset2s << "] = a[i] + b[i];" << std::endl;
|
||||
if (!options.incr_in_header) s << incr << ";" << std::endl;
|
||||
s << "}" << std::endl;
|
||||
if (options.loop == loop_kind::do_loop)
|
||||
s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
|
||||
|
||||
s << "}" << std::endl;
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
std::string kernel_name = "test";
|
||||
std::string source = generate_source(options);
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name, "", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
const size_t count = 100;
|
||||
const size_t global_size = 1;
|
||||
|
||||
std::vector<int> a(count);
|
||||
std::vector<int> b(count);
|
||||
std::vector<int> c(count);
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
a[i] = 0;
|
||||
b[i] = i;
|
||||
c[i] = 1;
|
||||
}
|
||||
|
||||
cl_mem a_buffer;
|
||||
a_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(int) * count, static_cast<void *>(a.data()), &error
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
cl_mem b_buffer;
|
||||
b_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(int) * count, static_cast<void *>(b.data()), &error
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
cl_mem c_buffer;
|
||||
c_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(int) * count, static_cast<void *>(c.data()),&error
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &a_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &c_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 3, sizeof(cl_int), &options.offset1);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 4, sizeof(cl_int), &options.offset2);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 5, sizeof(cl_int), &options.iter_count);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
std::vector<int> a_output(count);
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, a_buffer, CL_TRUE,
|
||||
0, sizeof(int) * count,
|
||||
static_cast<void *>(a_output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
for (int i = 0; i < options.iter_count; i += 2)
|
||||
{
|
||||
a[i + options.offset1] = b[i + options.offset1] * c[i + options.offset1];
|
||||
}
|
||||
|
||||
for (int i = 0; i < options.iter_count; i += 2)
|
||||
{
|
||||
a[i + options.offset2] = a[i] + b[i];
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
const int value = a_output[i];
|
||||
const int expected = a[i];
|
||||
if (value != expected)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Test failed. Element %lu: %d should be: %d",
|
||||
i, value, expected
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseMemObject(a_buffer);
|
||||
clReleaseMemObject(b_buffer);
|
||||
clReleaseMemObject(c_buffer);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
const std::vector<std::tuple<int, int, int>> params{
|
||||
std::make_tuple<int, int, int>( -1, 0, 0 ),
|
||||
std::make_tuple<int, int, int>( -1, 3, 4 ),
|
||||
std::make_tuple<int, int, int>( 1, 1, 1 ),
|
||||
std::make_tuple<int, int, int>( 3, 4, 2 ),
|
||||
std::make_tuple<int, int, int>( 3, 4, 3 ),
|
||||
std::make_tuple<int, int, int>( 8, 10, 7 ),
|
||||
std::make_tuple<int, int, int>( 16, 16, 16 )
|
||||
};
|
||||
const std::vector<int> iter_counts{ { 1, 4, 12, 40 } };
|
||||
|
||||
AUTO_TEST_CASE(test_ivdep_for)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
for (auto param : params)
|
||||
for (auto iter_count : iter_counts)
|
||||
for (bool offset1_param : { false, true })
|
||||
for (bool offset2_param : { false, true })
|
||||
for (bool iter_count_param : { false, true })
|
||||
for (bool cond_in_header : { false, true })
|
||||
for (bool init_in_header : { false, true })
|
||||
for (bool incr_in_header : { false, true })
|
||||
{
|
||||
test_options options;
|
||||
options.loop = loop_kind::for_loop;
|
||||
options.ivdep_length = std::get<0>(param);
|
||||
options.offset1 = std::get<1>(param);
|
||||
options.offset2 = std::get<2>(param);
|
||||
options.iter_count = iter_count;
|
||||
options.offset1_param = offset1_param;
|
||||
options.offset2_param = offset2_param;
|
||||
options.iter_count_param = iter_count_param;
|
||||
options.cond_in_header = cond_in_header;
|
||||
options.init_in_header = init_in_header;
|
||||
options.incr_in_header = incr_in_header;
|
||||
|
||||
error = test(device, context, queue, options);
|
||||
RETURN_ON_ERROR(error)
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_ivdep_while)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
for (auto param : params)
|
||||
for (auto iter_count : iter_counts)
|
||||
for (bool offset1_param : { false, true })
|
||||
for (bool offset2_param : { false, true })
|
||||
for (bool iter_count_param : { false, true })
|
||||
for (bool cond_in_header : { false, true })
|
||||
{
|
||||
test_options options;
|
||||
options.loop = loop_kind::while_loop;
|
||||
options.ivdep_length = std::get<0>(param);
|
||||
options.offset1 = std::get<1>(param);
|
||||
options.offset2 = std::get<2>(param);
|
||||
options.iter_count = iter_count;
|
||||
options.offset1_param = offset1_param;
|
||||
options.offset2_param = offset2_param;
|
||||
options.iter_count_param = iter_count_param;
|
||||
options.cond_in_header = cond_in_header;
|
||||
options.init_in_header = false;
|
||||
options.incr_in_header = false;
|
||||
|
||||
error = test(device, context, queue, options);
|
||||
RETURN_ON_ERROR(error)
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_ivdep_do)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
for (auto param : params)
|
||||
for (auto iter_count : iter_counts)
|
||||
for (bool offset1_param : { false, true })
|
||||
for (bool offset2_param : { false, true })
|
||||
for (bool iter_count_param : { false, true })
|
||||
for (bool cond_in_header : { false, true })
|
||||
{
|
||||
test_options options;
|
||||
options.loop = loop_kind::do_loop;
|
||||
options.ivdep_length = std::get<0>(param);
|
||||
options.offset1 = std::get<1>(param);
|
||||
options.offset2 = std::get<2>(param);
|
||||
options.iter_count = iter_count;
|
||||
options.offset1_param = offset1_param;
|
||||
options.offset2_param = offset2_param;
|
||||
options.iter_count_param = iter_count_param;
|
||||
options.cond_in_header = cond_in_header;
|
||||
options.init_in_header = false;
|
||||
options.incr_in_header = false;
|
||||
|
||||
error = test(device, context, queue, options);
|
||||
RETURN_ON_ERROR(error)
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_IVDEP_HPP
|
||||
@@ -1,266 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_MAX_SIZE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_MAX_SIZE_HPP
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
|
||||
|
||||
namespace test_max_size {
|
||||
|
||||
enum class address_space
|
||||
{
|
||||
constant,
|
||||
local
|
||||
};
|
||||
|
||||
enum class param_kind
|
||||
{
|
||||
ptr_type, // constant_ptr<T>
|
||||
ptr, // constant<T>*
|
||||
ref // constant<T>&
|
||||
};
|
||||
|
||||
const param_kind param_kinds[] =
|
||||
{
|
||||
param_kind::ptr_type,
|
||||
param_kind::ptr,
|
||||
param_kind::ref
|
||||
};
|
||||
|
||||
struct test_options
|
||||
{
|
||||
address_space space;
|
||||
int max_size;
|
||||
bool spec_const;
|
||||
param_kind kind;
|
||||
bool array;
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
std::string generate_source(test_options options)
|
||||
{
|
||||
std::stringstream s;
|
||||
s << "kernel void test(";
|
||||
s << (options.space == address_space::constant ? "constant" : "local");
|
||||
s << " int2 *input) { }" << std::endl;
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#else
|
||||
std::string generate_source(test_options options)
|
||||
{
|
||||
std::string type_str = "int2";
|
||||
if (options.array)
|
||||
type_str += "[]";
|
||||
|
||||
std::stringstream s;
|
||||
s << "#include <opencl_memory>" << std::endl;
|
||||
|
||||
if (options.spec_const)
|
||||
{
|
||||
s << "#include <opencl_spec_constant>" << std::endl;
|
||||
s << "cl::spec_constant<int, 1> max_size_spec{ 1234567890 };" << std::endl;
|
||||
}
|
||||
|
||||
s << "kernel void test(";
|
||||
s << "[[cl::max_size(" << (options.spec_const ? "max_size_spec" : std::to_string(options.max_size)) << ")]] ";
|
||||
s << (options.space == address_space::constant ? "cl::constant" : "cl::local");
|
||||
if (options.kind == param_kind::ptr_type)
|
||||
s << "_ptr<" << type_str << ">";
|
||||
else if (options.kind == param_kind::ptr)
|
||||
s << "<" << type_str << ">*";
|
||||
else if (options.kind == param_kind::ref)
|
||||
s << "<" << type_str << ">&";
|
||||
s << " input) { }" << std::endl;
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
std::string kernel_name = "test";
|
||||
std::string source = generate_source(options);
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name, "", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
const char *source_c_str = source.c_str();
|
||||
error = create_openclcpp_program(context, &program, 1, &source_c_str, "");
|
||||
RETURN_ON_ERROR(error)
|
||||
|
||||
if (options.spec_const)
|
||||
{
|
||||
error = clSetProgramSpecializationConstant(program, 1, sizeof(cl_int), &options.max_size);
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
}
|
||||
|
||||
error = build_program_create_kernel_helper(
|
||||
context, &program, &kernel, 1, &source_c_str, kernel_name.c_str()
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
const int max_size = options.max_size;
|
||||
const int sizes[] = {
|
||||
1,
|
||||
max_size / 2,
|
||||
max_size,
|
||||
max_size + 1,
|
||||
max_size * 2
|
||||
};
|
||||
|
||||
for (int size : sizes)
|
||||
{
|
||||
cl_mem const_buffer;
|
||||
if (options.space == address_space::constant)
|
||||
{
|
||||
const_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &const_buffer);
|
||||
// Check the status later (depending on size and max_size values)
|
||||
}
|
||||
else if (options.space == address_space::local)
|
||||
{
|
||||
error = clSetKernelArg(kernel, 0, size, NULL);
|
||||
// Check the status later (depending on size and max_size values)
|
||||
}
|
||||
|
||||
if (size <= max_size)
|
||||
{
|
||||
// Correct value, must not fail
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
const size_t global_size = 123;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
error = clFinish(queue);
|
||||
RETURN_ON_CL_ERROR(error, "clFinish")
|
||||
}
|
||||
else
|
||||
{
|
||||
// Incorrect value, must fail
|
||||
if (error != CL_MAX_SIZE_RESTRICTION_EXCEEDED)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"clSetKernelArg must fail with CL_MAX_SIZE_RESTRICTION_EXCEEDED,"
|
||||
" but returned %s (%d)", get_cl_error_string(error).c_str(), error
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.space == address_space::constant)
|
||||
{
|
||||
error = clReleaseMemObject(const_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_max_size_constant)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_ulong max_size;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(max_size), &max_size, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
|
||||
|
||||
for (bool spec_const : { false, true })
|
||||
for (auto kind : param_kinds)
|
||||
for (bool array : { false, true })
|
||||
{
|
||||
test_options options;
|
||||
options.space = address_space::constant;
|
||||
options.max_size = max_size / 2;
|
||||
options.spec_const = spec_const;
|
||||
options.kind = kind;
|
||||
options.array = array;
|
||||
|
||||
error = test(device, context, queue, options);
|
||||
RETURN_ON_ERROR(error)
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_max_size_local)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_ulong max_size;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_size), &max_size, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
|
||||
|
||||
for (bool spec_const : { false, true })
|
||||
for (auto kind : param_kinds)
|
||||
for (bool array : { false, true })
|
||||
{
|
||||
test_options options;
|
||||
options.space = address_space::local;
|
||||
options.max_size = max_size / 2;
|
||||
options.spec_const = spec_const;
|
||||
options.kind = kind;
|
||||
options.array = array;
|
||||
|
||||
error = test(device, context, queue, options);
|
||||
RETURN_ON_ERROR(error)
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_MAX_SIZE_HPP
|
||||
@@ -1,285 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
|
||||
|
||||
namespace test_required_num_sub_groups {
|
||||
|
||||
struct test_options
|
||||
{
|
||||
size_t num_sub_groups;
|
||||
bool spec_const;
|
||||
size_t max_count;
|
||||
size_t num_tests;
|
||||
};
|
||||
|
||||
struct output_type
|
||||
{
|
||||
cl_ulong num_sub_groups;
|
||||
cl_ulong enqueued_num_sub_groups;
|
||||
};
|
||||
|
||||
const std::string source_common = R"(
|
||||
struct output_type
|
||||
{
|
||||
ulong num_sub_groups;
|
||||
ulong enqueued_num_sub_groups;
|
||||
};
|
||||
)";
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
std::string generate_source(test_options options)
|
||||
{
|
||||
std::stringstream s;
|
||||
s << source_common;
|
||||
s << R"(
|
||||
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
|
||||
|
||||
kernel void test(global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_linear_id();
|
||||
output[gid].num_sub_groups = get_num_sub_groups();
|
||||
output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
|
||||
}
|
||||
)";
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#else
|
||||
std::string generate_source(test_options options)
|
||||
{
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
#include <opencl_memory>
|
||||
#include <opencl_work_item>
|
||||
using namespace cl;
|
||||
)";
|
||||
|
||||
if (options.spec_const)
|
||||
{
|
||||
s << "#include <opencl_spec_constant>" << std::endl;
|
||||
s << "cl::spec_constant<uint, 1> num_sub_groups_spec{ 1234567890 };" << std::endl;
|
||||
}
|
||||
|
||||
s << source_common << std::endl;
|
||||
s << "[[cl::required_num_sub_groups(" << (options.spec_const ? "num_sub_groups_spec" : std::to_string(options.num_sub_groups)) << ")]]";
|
||||
s << R"(
|
||||
kernel void test(global_ptr<output_type[]> output)
|
||||
{
|
||||
const ulong gid = get_global_linear_id();
|
||||
output[gid].num_sub_groups = get_num_sub_groups();
|
||||
output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
|
||||
}
|
||||
)";
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
if (!is_extension_available(device, "cl_khr_subgroups"))
|
||||
{
|
||||
log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
std::string kernel_name = "test";
|
||||
std::string source = generate_source(options);
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name, "-cl-std=CL2.0", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
const char *source_c_str = source.c_str();
|
||||
error = create_openclcpp_program(context, &program, 1, &source_c_str, "");
|
||||
RETURN_ON_ERROR(error)
|
||||
|
||||
if (options.spec_const)
|
||||
{
|
||||
cl_uint spec_num_sub_groups = static_cast<cl_uint>(options.num_sub_groups);
|
||||
error = clSetProgramSpecializationConstant(program, 1, sizeof(cl_uint), &spec_num_sub_groups);
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
}
|
||||
|
||||
error = build_program_create_kernel_helper(
|
||||
context, &program, &kernel, 1, &source_c_str, kernel_name.c_str()
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
size_t compile_num_sub_groups;
|
||||
error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
|
||||
0, NULL,
|
||||
sizeof(size_t), &compile_num_sub_groups, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
|
||||
if (compile_num_sub_groups != options.num_sub_groups)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"CL_KERNEL_COMPILE_NUM_SUB_GROUPS did not return correct value (expected %lu, got %lu)",
|
||||
options.num_sub_groups, compile_num_sub_groups
|
||||
)
|
||||
}
|
||||
|
||||
cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * options.max_count, NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
std::uniform_int_distribution<size_t> count_dis(1, options.max_count);
|
||||
|
||||
for (size_t test = 0; test < options.num_tests; test++)
|
||||
{
|
||||
for (size_t dim = 1; dim <= 3; dim++)
|
||||
{
|
||||
size_t global_size[3] = { 1, 1, 1 };
|
||||
size_t count = count_dis(gen);
|
||||
std::uniform_int_distribution<size_t> global_size_dis(1, static_cast<size_t>(pow(count, 1.0 / dim)));
|
||||
for (size_t d = 0; d < dim; d++)
|
||||
{
|
||||
global_size[d] = global_size_dis(gen);
|
||||
}
|
||||
count = global_size[0] * global_size[1] * global_size[2];
|
||||
|
||||
size_t local_size[3] = { 1, 1, 1 };
|
||||
error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
|
||||
sizeof(size_t), &options.num_sub_groups,
|
||||
sizeof(size_t) * dim, local_size, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
|
||||
if (local_size[0] == 0 || local_size[1] != 1 || local_size[2] != 1)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT did not return correct value"
|
||||
)
|
||||
}
|
||||
|
||||
size_t sub_group_count_for_ndrange;
|
||||
error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
|
||||
sizeof(size_t) * dim, local_size,
|
||||
sizeof(size_t), &sub_group_count_for_ndrange, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
|
||||
if (sub_group_count_for_ndrange != options.num_sub_groups)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE did not return correct value (expected %lu, got %lu)",
|
||||
options.num_sub_groups, sub_group_count_for_ndrange
|
||||
)
|
||||
}
|
||||
|
||||
const char pattern = 0;
|
||||
error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, global_size, local_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
std::vector<output_type> output(count);
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(output_type) * count,
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
for (size_t gid = 0; gid < count; gid++)
|
||||
{
|
||||
const output_type &o = output[gid];
|
||||
|
||||
if (o.enqueued_num_sub_groups != options.num_sub_groups)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "get_enqueued_num_sub_groups does not equal to required_num_sub_groups")
|
||||
}
|
||||
if (o.num_sub_groups > options.num_sub_groups)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "get_num_sub_groups did not return correct value")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseMemObject(output_buffer);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_required_num_sub_groups)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_uint max_num_sub_groups;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS, sizeof(max_num_sub_groups), &max_num_sub_groups, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
|
||||
|
||||
for (bool spec_const : { false, true })
|
||||
for (size_t num_sub_groups = 1; num_sub_groups <= max_num_sub_groups; num_sub_groups++)
|
||||
{
|
||||
test_options options;
|
||||
options.spec_const = spec_const;
|
||||
options.num_sub_groups = num_sub_groups;
|
||||
options.num_tests = 100;
|
||||
options.max_count = num_elements;
|
||||
|
||||
error = test(device, context, queue, options);
|
||||
RETURN_ON_ERROR(error)
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP
|
||||
@@ -1,51 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_COMMON_INC_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_COMMON_INC_HPP
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
// harness framework
|
||||
#include "harness/compat.h"
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/errorHelpers.h"
|
||||
#include "harness/kernelHelpers.h"
|
||||
|
||||
// autotest
|
||||
#include "autotest/autotest.hpp"
|
||||
|
||||
// utils_common
|
||||
#include "utils_common/is_vector_type.hpp"
|
||||
#include "utils_common/scalar_type.hpp"
|
||||
#include "utils_common/make_vector_type.hpp"
|
||||
#include "utils_common/type_name.hpp"
|
||||
#include "utils_common/type_supported.hpp"
|
||||
#include "utils_common/vector_size.hpp"
|
||||
#include "utils_common/kernel_helpers.hpp"
|
||||
#include "utils_common/errors.hpp"
|
||||
#include "utils_common/string.hpp"
|
||||
|
||||
size_t get_uniform_global_size(size_t global_size, size_t local_size)
|
||||
{
|
||||
return static_cast<size_t>(std::ceil(static_cast<double>(global_size) / local_size)) * local_size;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_COMMON_INC_HPP
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_COMMON_FUNCS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,417 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_COMMON_FUNCS_COMMON_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_COMMON_FUNCS_COMMON_FUNCS_HPP
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
#include <type_traits>
|
||||
#include <algorithm>
|
||||
|
||||
// floatn clamp(floatn x, floatn min, floatn max) (only scalars)
|
||||
template<class IN1, class IN2, class IN3, class OUT1>
|
||||
struct common_func_clamp : public ternary_func<IN1, IN2, IN3, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "clamp";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_common>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value
|
||||
&& std::is_same<IN2, IN3>::value
|
||||
&& std::is_same<IN3, OUT1>::value,
|
||||
"All types must be the same"
|
||||
);
|
||||
return (std::min)((std::max)(x, minval), maxval);
|
||||
}
|
||||
|
||||
IN2 min2()
|
||||
{
|
||||
return (std::numeric_limits<IN2>::min)();
|
||||
}
|
||||
|
||||
IN2 max2()
|
||||
{
|
||||
return (std::numeric_limits<IN2>::max)() / IN2(4000.0f);
|
||||
}
|
||||
|
||||
IN3 min3()
|
||||
{
|
||||
return IN3(1) + ((std::numeric_limits<IN3>::max)() / IN3(4000.0f));
|
||||
}
|
||||
|
||||
IN3 max3()
|
||||
{
|
||||
return (std::numeric_limits<IN3>::max)() / IN3(2000.0f);
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
};
|
||||
|
||||
// floatn degrees(floatn t)
|
||||
template<class IN1, class OUT1, class REFERENCE>
|
||||
struct common_func_degrees : public unary_func<IN1, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "degrees";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_common>\n";
|
||||
}
|
||||
|
||||
REFERENCE operator()(const IN1& x)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, OUT1>::value,
|
||||
"All types must be the same"
|
||||
);
|
||||
return (REFERENCE(180.0) / CL_M_PI) * static_cast<REFERENCE>(x);
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return 2.5f;
|
||||
}
|
||||
};
|
||||
|
||||
// floatn max(floatn x, floatn y)
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct common_func_max : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "max";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_common>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
|
||||
"All types must be the same"
|
||||
);
|
||||
return (std::max)(x, y);
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
};
|
||||
|
||||
// floatn min(floatn x, floatn y)
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct common_func_min : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "min";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_common>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
|
||||
"All types must be the same"
|
||||
);
|
||||
return (std::min)(x, y);
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
};
|
||||
|
||||
// floatn mix(floatn x, floatn y, floatn a);
|
||||
template<class IN1, class IN2, class IN3, class OUT1>
|
||||
struct common_func_mix : public ternary_func<IN1, IN2, IN3, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "mix";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_common>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y, const IN3& a)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value
|
||||
&& std::is_same<IN2, IN3>::value
|
||||
&& std::is_same<IN3, OUT1>::value,
|
||||
"All types must be the same"
|
||||
);
|
||||
return static_cast<double>(x) + ((static_cast<double>(y) - static_cast<double>(x)) * static_cast<double>(a));
|
||||
}
|
||||
|
||||
IN3 min3()
|
||||
{
|
||||
return IN3(0.0f + CL_FLT_EPSILON);
|
||||
}
|
||||
|
||||
IN3 max3()
|
||||
{
|
||||
return IN3(1.0f - CL_FLT_EPSILON);
|
||||
}
|
||||
|
||||
bool use_ulp()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// floatn radians(floatn t)
|
||||
template<class IN1, class OUT1, class REFERENCE>
|
||||
struct common_func_radians : public unary_func<IN1, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "radians";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_common>\n";
|
||||
}
|
||||
|
||||
REFERENCE operator()(const IN1& x)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, OUT1>::value,
|
||||
"All types must be the same"
|
||||
);
|
||||
return (CL_M_PI / REFERENCE(180.0)) * static_cast<REFERENCE>(x);
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return 2.5f;
|
||||
}
|
||||
};
|
||||
|
||||
// floatn step(floatn edge, floatn x)
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct common_func_step : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "step";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_common>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& edge, const IN2& x)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
|
||||
"All types must be the same"
|
||||
);
|
||||
if(x < edge)
|
||||
return OUT1(0.0f);
|
||||
return OUT1(1.0f);
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
};
|
||||
|
||||
// floatn smoothstep(floatn edge0, floatn edge1, floatn x);
|
||||
template<class IN1, class IN2, class IN3, class OUT1>
|
||||
struct common_func_smoothstep : public ternary_func<IN1, IN2, IN3, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "smoothstep";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_common>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& edge0, const IN2& edge1, const IN3& x)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value
|
||||
&& std::is_same<IN2, IN3>::value
|
||||
&& std::is_same<IN3, OUT1>::value,
|
||||
"All types must be the same"
|
||||
);
|
||||
if(x <= edge0)
|
||||
{
|
||||
return OUT1(0.0f);
|
||||
}
|
||||
if(x >= edge1)
|
||||
{
|
||||
return OUT1(1.0f);
|
||||
}
|
||||
OUT1 t = (x - edge0) / (edge1 - edge0);
|
||||
t = t * t * (3.0f - 2.0f * t);
|
||||
return t;
|
||||
}
|
||||
|
||||
// edge0 must be < edge1
|
||||
IN1 min1()
|
||||
{
|
||||
return (std::numeric_limits<IN1>::min)();
|
||||
}
|
||||
|
||||
IN1 max1()
|
||||
{
|
||||
return (std::numeric_limits<IN1>::max)() / IN1(8000.0f);
|
||||
}
|
||||
|
||||
IN2 min2()
|
||||
{
|
||||
return IN3(1) + ((std::numeric_limits<IN2>::max)() / IN2(4000.0f));
|
||||
}
|
||||
|
||||
IN2 max2()
|
||||
{
|
||||
return (std::numeric_limits<IN2>::max)() / IN2(2000.0f);
|
||||
}
|
||||
|
||||
bool use_ulp()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// floatn sign(floatn t)
|
||||
template<class IN1, class OUT1>
|
||||
struct common_func_sign : public unary_func<IN1, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "sign";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_common>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, OUT1>::value,
|
||||
"All types must be the same"
|
||||
);
|
||||
if(x == IN1(-0.0f))
|
||||
{
|
||||
return IN1(-0.0f);
|
||||
}
|
||||
if(x == IN1(+0.0f))
|
||||
{
|
||||
return IN1(+0.0f);
|
||||
}
|
||||
if(x > IN1(0.0f))
|
||||
{
|
||||
return IN1(1.0f);
|
||||
}
|
||||
return IN1(-1.0f);
|
||||
}
|
||||
|
||||
bool use_ulp()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
std::vector<IN1> in_special_cases()
|
||||
{
|
||||
return { -0.0f, +0.0f };
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_common_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// floatn clamp(floatn x, floatn min, floatn max)
|
||||
TEST_TERNARY_FUNC_MACRO((common_func_clamp<cl_float, cl_float, cl_float, cl_float>()))
|
||||
|
||||
// floatn degrees(floatn t)
|
||||
TEST_UNARY_FUNC_MACRO((common_func_degrees<cl_float, cl_float, cl_double>()))
|
||||
|
||||
// floatn max(floatn x, floatn y);
|
||||
TEST_BINARY_FUNC_MACRO((common_func_max<cl_float, cl_float, cl_float>()))
|
||||
|
||||
// floatn min(floatn x, floatn y);
|
||||
TEST_BINARY_FUNC_MACRO((common_func_min<cl_float, cl_float, cl_float>()))
|
||||
|
||||
// floatn mix(floatn x, floatn y, floatn a);
|
||||
TEST_TERNARY_FUNC_MACRO((common_func_mix<cl_float, cl_float, cl_float, cl_float>()))
|
||||
|
||||
// floatn radians(floatn t)
|
||||
TEST_UNARY_FUNC_MACRO((common_func_radians<cl_float, cl_float, cl_double>()))
|
||||
|
||||
// floatn step(floatn edge, floatn x)
|
||||
TEST_BINARY_FUNC_MACRO((common_func_step<cl_float, cl_float, cl_float>()))
|
||||
|
||||
// floatn smoothstep(floatn edge0, floatn edge1, floatn x)
|
||||
TEST_TERNARY_FUNC_MACRO((common_func_smoothstep<cl_float, cl_float, cl_float, cl_float>()))
|
||||
|
||||
// floatn sign(floatn t);
|
||||
TEST_UNARY_FUNC_MACRO((common_func_sign<cl_float, cl_float>()))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_COMMON_FUNCS_COMMON_FUNCS_HPP
|
||||
@@ -1,43 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <limits>
|
||||
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "common_funcs.hpp"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
// Check if cl_float (float) and cl_double (double) fulfill the requirements of
|
||||
// IEC 559 (IEEE 754) standard. This is required for the tests to run correctly.
|
||||
if(!std::numeric_limits<cl_float>::is_iec559)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
|
||||
"Tests won't run correctly."
|
||||
);
|
||||
}
|
||||
if(!std::numeric_limits<cl_double>::is_iec559)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
|
||||
"Tests won't run correctly."
|
||||
);
|
||||
}
|
||||
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_CONVERT)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,309 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_CONVERT_CONVERT_CAST_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_CONVERT_CONVERT_CAST_HPP
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
#include <functional>
|
||||
|
||||
|
||||
enum class rounding_mode
|
||||
{
|
||||
def,
|
||||
/*rte, not implemented here */
|
||||
rtz,
|
||||
rtp,
|
||||
rtn
|
||||
};
|
||||
|
||||
enum class saturate { def, off, on };
|
||||
|
||||
std::string rounding_mode_name(rounding_mode rmode)
|
||||
{
|
||||
switch (rmode)
|
||||
{
|
||||
case rounding_mode::rtz: return "rtz";
|
||||
case rounding_mode::rtp: return "rtp";
|
||||
case rounding_mode::rtn: return "rtn";
|
||||
default: return "";
|
||||
}
|
||||
}
|
||||
|
||||
std::string saturate_name(saturate smode)
|
||||
{
|
||||
switch (smode)
|
||||
{
|
||||
case saturate::off: return "off";
|
||||
case saturate::on: return "on";
|
||||
default: return "";
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T clamp(T x, T a, T b)
|
||||
{
|
||||
return (std::min)(b, (std::max)(a, x));
|
||||
}
|
||||
|
||||
template<class IN1, class OUT1>
|
||||
struct convert_cast : public unary_func<IN1, OUT1>
|
||||
{
|
||||
static_assert(vector_size<IN1>::value == vector_size<OUT1>::value, "The operand and result type must have the same number of elements");
|
||||
|
||||
typedef typename scalar_type<IN1>::type in_scalar_type;
|
||||
typedef typename scalar_type<OUT1>::type out_scalar_type;
|
||||
|
||||
in_scalar_type in_min;
|
||||
in_scalar_type in_max;
|
||||
rounding_mode rmode;
|
||||
saturate smode;
|
||||
|
||||
convert_cast(in_scalar_type min, in_scalar_type max, rounding_mode rmode, saturate smode)
|
||||
: in_min(min), in_max(max), rmode(rmode), smode(smode)
|
||||
{
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "convert_cast";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_convert>\n";
|
||||
}
|
||||
|
||||
IN1 min1()
|
||||
{
|
||||
return detail::def_limit<IN1>(in_min);
|
||||
}
|
||||
|
||||
IN1 max1()
|
||||
{
|
||||
return detail::def_limit<IN1>(in_max);
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x)
|
||||
{
|
||||
OUT1 y;
|
||||
for (size_t i = 0; i < vector_size<IN1>::value; i++)
|
||||
{
|
||||
in_scalar_type v;
|
||||
if (smode == saturate::on)
|
||||
v = clamp(x.s[i],
|
||||
static_cast<in_scalar_type>((std::numeric_limits<out_scalar_type>::min)()),
|
||||
static_cast<in_scalar_type>((std::numeric_limits<out_scalar_type>::max)())
|
||||
);
|
||||
else
|
||||
v = x.s[i];
|
||||
|
||||
if (std::is_integral<out_scalar_type>::value)
|
||||
{
|
||||
switch (rmode)
|
||||
{
|
||||
case rounding_mode::rtp:
|
||||
y.s[i] = static_cast<out_scalar_type>(std::ceil(v));
|
||||
break;
|
||||
case rounding_mode::rtn:
|
||||
y.s[i] = static_cast<out_scalar_type>(std::floor(v));
|
||||
break;
|
||||
default:
|
||||
y.s[i] = static_cast<out_scalar_type>(v);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
y.s[i] = static_cast<out_scalar_type>(v);
|
||||
}
|
||||
}
|
||||
return y;
|
||||
}
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
template <class func_type, class in_type, class out_type>
|
||||
std::string generate_kernel_convert_cast(func_type func)
|
||||
{
|
||||
std::string in1_value = "input[gid]";
|
||||
std::string function_call = "convert_" + type_name<out_type>();
|
||||
if (func.smode == saturate::on)
|
||||
function_call += "_sat";
|
||||
if (func.rmode != rounding_mode::def)
|
||||
function_call += "_" + rounding_mode_name(func.rmode);
|
||||
function_call += "(" + in1_value + ")";
|
||||
return
|
||||
"__kernel void test_" + func.str() + "(global " + type_name<in_type>() + " *input, global " + type_name<out_type>() + " *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = " + function_call + ";\n"
|
||||
"}\n";
|
||||
}
|
||||
#else
|
||||
template <class func_type, class in_type, class out_type>
|
||||
std::string generate_kernel_convert_cast(func_type func)
|
||||
{
|
||||
std::string headers = func.headers();
|
||||
std::string in1_value = "input[gid]";
|
||||
std::string function_call = "convert_cast<" + type_name<out_type>();
|
||||
if (func.rmode != rounding_mode::def)
|
||||
function_call += ", rounding_mode::" + rounding_mode_name(func.rmode);
|
||||
if (func.smode != saturate::def)
|
||||
function_call += ", saturate::" + saturate_name(func.smode);
|
||||
function_call += ">(" + in1_value + ")";
|
||||
return
|
||||
"" + func.defs() +
|
||||
"" + headers +
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_" + func.str() + "(global_ptr<" + type_name<in_type>() + "[]> input,"
|
||||
"global_ptr<" + type_name<out_type>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = " + function_call + ";\n"
|
||||
"}\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
template <class convert_cast_op>
|
||||
int test_convert_cast_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, convert_cast_op op)
|
||||
{
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t work_size[1];
|
||||
int error;
|
||||
|
||||
typedef typename convert_cast_op::in_type INPUT;
|
||||
typedef typename convert_cast_op::out_type OUTPUT;
|
||||
|
||||
// Don't run test for unsupported types
|
||||
if (!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
std::string code_str = generate_kernel_convert_cast<convert_cast_op, INPUT, OUTPUT>(op);
|
||||
std::string kernel_name("test_"); kernel_name += op.str();
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
|
||||
RETURN_ON_ERROR(error)
|
||||
#else
|
||||
error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
|
||||
std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(INPUT) * input.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(OUTPUT) * output.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
work_size[0] = count;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
if (!verify_unary(input, output, op))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
|
||||
}
|
||||
log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
AUTO_TEST_CASE(test_convert_cast)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
#define TEST_CONVERT_CAST_MACRO(OP) \
|
||||
last_error = test_convert_cast_func( \
|
||||
device, context, queue, n_elems, OP \
|
||||
); \
|
||||
CHECK_ERROR(last_error) \
|
||||
error |= last_error;
|
||||
|
||||
// No-op
|
||||
TEST_CONVERT_CAST_MACRO((convert_cast<cl_float2, cl_float2>(-100.0f, +100.0f, rounding_mode::rtn, saturate::def)))
|
||||
TEST_CONVERT_CAST_MACRO((convert_cast<cl_uchar2, cl_uchar2>(0, 255, rounding_mode::def, saturate::def)))
|
||||
|
||||
// int to int
|
||||
TEST_CONVERT_CAST_MACRO((convert_cast<cl_int4, cl_short4>(40000, 40000, rounding_mode::def, saturate::on)))
|
||||
TEST_CONVERT_CAST_MACRO((convert_cast<cl_uchar8, cl_char8>(0, 127, rounding_mode::def, saturate::off)))
|
||||
TEST_CONVERT_CAST_MACRO((convert_cast<cl_char8, cl_int8>(-100, 100, rounding_mode::def, saturate::off)))
|
||||
|
||||
// float to int
|
||||
TEST_CONVERT_CAST_MACRO((convert_cast<cl_float2, cl_uchar2>(-100.0f, +400.0f, rounding_mode::def, saturate::on)))
|
||||
TEST_CONVERT_CAST_MACRO((convert_cast<cl_double4, cl_char4>(-127.0, +127.0, rounding_mode::rtp, saturate::off)))
|
||||
TEST_CONVERT_CAST_MACRO((convert_cast<cl_float8, cl_uint8>(-1000.0f, +10000.0f, rounding_mode::rtp, saturate::on)))
|
||||
TEST_CONVERT_CAST_MACRO((convert_cast<cl_float16, cl_ushort16>(-10000.0f, +70000.0f, rounding_mode::rtn, saturate::on)))
|
||||
|
||||
// int to float
|
||||
TEST_CONVERT_CAST_MACRO((convert_cast<cl_short8, cl_float8>(0, 12345, rounding_mode::def, saturate::def)))
|
||||
TEST_CONVERT_CAST_MACRO((convert_cast<cl_long2, cl_float2>(-1000000, +1000000, rounding_mode::rtz, saturate::def)))
|
||||
|
||||
#undef TEST_CONVERT_CAST_MACRO
|
||||
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_CONVERT_CONVERT_CAST_HPP
|
||||
@@ -1,25 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "convert_cast.hpp"
|
||||
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_DEVICE_QUEUE)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,25 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "test_enqueue.hpp"
|
||||
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,699 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_DEVICE_QUEUE_TEST_ENQUEUE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_DEVICE_QUEUE_TEST_ENQUEUE_HPP
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
|
||||
|
||||
namespace test_enqueue {
|
||||
|
||||
struct test_options
|
||||
{
|
||||
int test;
|
||||
};
|
||||
|
||||
struct output_type
|
||||
{
|
||||
cl_int enqueue_kernel1_success;
|
||||
cl_int enqueue_kernel2_success;
|
||||
cl_int enqueue_kernel3_success;
|
||||
cl_int enqueue_marker_success;
|
||||
cl_int event1_is_valid;
|
||||
cl_int event2_is_valid;
|
||||
cl_int event3_is_valid;
|
||||
cl_int user_event1_is_valid;
|
||||
cl_int user_event2_is_valid;
|
||||
cl_int values[10000];
|
||||
};
|
||||
|
||||
const std::string source_common = R"(
|
||||
struct output_type
|
||||
{
|
||||
int enqueue_kernel1_success;
|
||||
int enqueue_kernel2_success;
|
||||
int enqueue_kernel3_success;
|
||||
int enqueue_marker_success;
|
||||
int event1_is_valid;
|
||||
int event2_is_valid;
|
||||
int event3_is_valid;
|
||||
int user_event1_is_valid;
|
||||
int user_event2_is_valid;
|
||||
int values[10000];
|
||||
};
|
||||
)";
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
std::string generate_source(test_options options)
|
||||
{
|
||||
std::stringstream s;
|
||||
s << source_common;
|
||||
if (options.test == 0)
|
||||
{
|
||||
s << R"(
|
||||
kernel void test(queue_t queue, global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
if (gid != 0)
|
||||
return;
|
||||
|
||||
output->enqueue_kernel2_success = 1;
|
||||
output->enqueue_kernel3_success = 1;
|
||||
output->enqueue_marker_success = 1;
|
||||
output->event2_is_valid = 1;
|
||||
output->event3_is_valid = 1;
|
||||
output->user_event1_is_valid = 1;
|
||||
output->user_event2_is_valid = 1;
|
||||
|
||||
queue_t default_queue = get_default_queue();
|
||||
|
||||
ndrange_t ndrange1 = ndrange_1D(get_global_size(0));
|
||||
clk_event_t event1;
|
||||
int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange1, 0, NULL, &event1,
|
||||
^{
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[gid] = 1;
|
||||
});
|
||||
output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
|
||||
output->event1_is_valid = is_valid_event(event1);
|
||||
|
||||
release_event(event1);
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.test == 1)
|
||||
{
|
||||
s << R"(
|
||||
kernel void test(queue_t queue, global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
if (gid != 0)
|
||||
return;
|
||||
|
||||
output->enqueue_kernel3_success = 1;
|
||||
output->enqueue_marker_success = 1;
|
||||
output->event3_is_valid = 1;
|
||||
output->user_event1_is_valid = 1;
|
||||
output->user_event2_is_valid = 1;
|
||||
|
||||
queue_t default_queue = get_default_queue();
|
||||
|
||||
ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2);
|
||||
clk_event_t event1;
|
||||
int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange1, 0, NULL, &event1,
|
||||
^{
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[gid * 2] = 1;
|
||||
});
|
||||
output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
|
||||
output->event1_is_valid = is_valid_event(event1);
|
||||
|
||||
ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, 1);
|
||||
clk_event_t event2;
|
||||
int status2 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange2, 1, &event1, &event2,
|
||||
^{
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[(gid - 1) * 2 + 1] = 1;
|
||||
});
|
||||
output->enqueue_kernel2_success = status2 == CLK_SUCCESS;
|
||||
output->event2_is_valid = is_valid_event(event2);
|
||||
|
||||
release_event(event1);
|
||||
release_event(event2);
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.test == 2)
|
||||
{
|
||||
s << R"(
|
||||
kernel void test(queue_t queue, global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
if (gid != 0)
|
||||
return;
|
||||
|
||||
output->enqueue_marker_success = 1;
|
||||
output->event3_is_valid = 1;
|
||||
output->enqueue_kernel3_success = 1;
|
||||
|
||||
queue_t default_queue = get_default_queue();
|
||||
|
||||
clk_event_t user_event1 = create_user_event();
|
||||
retain_event(user_event1);
|
||||
output->user_event1_is_valid = is_valid_event(user_event1);
|
||||
|
||||
ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2);
|
||||
clk_event_t event1;
|
||||
int status1 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange1, 1, &user_event1, &event1,
|
||||
^{
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[gid * 2] = 1;
|
||||
});
|
||||
output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
|
||||
output->event1_is_valid = is_valid_event(event1);
|
||||
release_event(user_event1);
|
||||
|
||||
clk_event_t user_event2 = create_user_event();
|
||||
output->user_event2_is_valid = is_valid_event(user_event2);
|
||||
|
||||
clk_event_t events[2];
|
||||
events[0] = user_event2;
|
||||
events[1] = user_event1;
|
||||
|
||||
ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, get_local_size(0));
|
||||
clk_event_t event2;
|
||||
int status2 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange2, 2, events, &event2,
|
||||
^(local void *p0, local void *p1, local void *p2) {
|
||||
const ulong gid = get_global_id(0);
|
||||
const ulong lid = get_local_id(0);
|
||||
local int2 *l0 = (local int2 *)p0;
|
||||
local int *l1 = (local int *)p1;
|
||||
local int *l2 = (local int *)p2;
|
||||
l1[get_local_size(0) - lid - 1] = gid > 0 ? 1 : 0;
|
||||
work_group_barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (lid < 5) l0[lid] = (int2)(3, 4);
|
||||
if (lid < 3) l2[lid] = 5;
|
||||
work_group_barrier(CLK_LOCAL_MEM_FENCE);
|
||||
output->values[(gid - 1) * 2 + 1] = min(l1[lid], min(l0[0].x, l2[0]));
|
||||
}, sizeof(int2) * 5, sizeof(int) * get_local_size(0), sizeof(int) * 3);
|
||||
output->enqueue_kernel2_success = status2 == CLK_SUCCESS;
|
||||
output->event2_is_valid = is_valid_event(event2);
|
||||
|
||||
set_user_event_status(user_event1, CL_COMPLETE);
|
||||
set_user_event_status(user_event2, CL_COMPLETE);
|
||||
|
||||
release_event(user_event1);
|
||||
release_event(user_event2);
|
||||
release_event(event1);
|
||||
release_event(event2);
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.test == 3)
|
||||
{
|
||||
s << R"(
|
||||
kernel void test(queue_t queue, global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
if (gid != 0)
|
||||
return;
|
||||
|
||||
output->user_event2_is_valid = 1;
|
||||
|
||||
queue_t default_queue = get_default_queue();
|
||||
|
||||
ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2);
|
||||
clk_event_t event1;
|
||||
int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange1, 0, NULL, &event1,
|
||||
^{
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[gid * 2] = 20;
|
||||
});
|
||||
output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
|
||||
output->event1_is_valid = is_valid_event(event1);
|
||||
|
||||
ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, 1);
|
||||
clk_event_t event2;
|
||||
int status2 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange2, 0, NULL, &event2,
|
||||
^{
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[(gid - 1) * 2 + 1] = 20;
|
||||
});
|
||||
output->enqueue_kernel2_success = status2 == CLK_SUCCESS;
|
||||
output->event2_is_valid = is_valid_event(event2);
|
||||
|
||||
clk_event_t user_event1 = create_user_event();
|
||||
output->user_event1_is_valid = is_valid_event(user_event1);
|
||||
|
||||
clk_event_t events[3];
|
||||
events[0] = event2;
|
||||
events[1] = user_event1;
|
||||
events[2] = event1;
|
||||
|
||||
clk_event_t event3;
|
||||
int status3 = enqueue_marker(queue, 3, events, &event3);
|
||||
output->enqueue_marker_success = status3 == CLK_SUCCESS;
|
||||
output->event3_is_valid = is_valid_event(event3);
|
||||
|
||||
int status4 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange_1D(get_global_size(0)), 1, &event3, NULL,
|
||||
^{
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[gid] /= 20;
|
||||
});
|
||||
output->enqueue_kernel3_success = status4 == CLK_SUCCESS;
|
||||
|
||||
set_user_event_status(user_event1, CL_COMPLETE);
|
||||
|
||||
release_event(user_event1);
|
||||
release_event(event1);
|
||||
release_event(event2);
|
||||
release_event(event3);
|
||||
}
|
||||
)";
|
||||
}
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#else
|
||||
std::string generate_source(test_options options)
|
||||
{
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
#include <opencl_memory>
|
||||
#include <opencl_common>
|
||||
#include <opencl_work_item>
|
||||
#include <opencl_synchronization>
|
||||
#include <opencl_device_queue>
|
||||
using namespace cl;
|
||||
)";
|
||||
|
||||
s << source_common;
|
||||
if (options.test == 0)
|
||||
{
|
||||
s << R"(
|
||||
kernel void test(device_queue queue, global<output_type> *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
if (gid != 0)
|
||||
return;
|
||||
|
||||
output->enqueue_kernel2_success = 1;
|
||||
output->enqueue_kernel3_success = 1;
|
||||
output->enqueue_marker_success = 1;
|
||||
output->event2_is_valid = 1;
|
||||
output->event3_is_valid = 1;
|
||||
output->user_event1_is_valid = 1;
|
||||
output->user_event2_is_valid = 1;
|
||||
|
||||
device_queue default_queue = get_default_device_queue();
|
||||
|
||||
ndrange ndrange1(get_global_size(0));
|
||||
event event1;
|
||||
enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 0, nullptr, &event1, ndrange1,
|
||||
[](global<output_type> *output) {
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[gid] = 1;
|
||||
}, output);
|
||||
output->enqueue_kernel1_success = status1 == enqueue_status::success;
|
||||
output->event1_is_valid = event1.is_valid();
|
||||
|
||||
event1.release();
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.test == 1)
|
||||
{
|
||||
s << R"(
|
||||
kernel void test(device_queue queue, global<output_type> *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
if (gid != 0)
|
||||
return;
|
||||
|
||||
output->enqueue_kernel3_success = 1;
|
||||
output->enqueue_marker_success = 1;
|
||||
output->event3_is_valid = 1;
|
||||
output->user_event1_is_valid = 1;
|
||||
output->user_event2_is_valid = 1;
|
||||
|
||||
device_queue default_queue = get_default_device_queue();
|
||||
|
||||
ndrange ndrange1(get_global_size(0) / 2);
|
||||
event event1;
|
||||
enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::wait_work_group, 0, nullptr, &event1, ndrange1,
|
||||
[](global<output_type> *output) {
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[gid * 2] = 1;
|
||||
}, output);
|
||||
output->enqueue_kernel1_success = status1 == enqueue_status::success;
|
||||
output->event1_is_valid = event1.is_valid();
|
||||
|
||||
ndrange ndrange2(1, get_global_size(0) / 2, 1);
|
||||
event event2;
|
||||
enqueue_status status2 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 1, &event1, &event2, ndrange2,
|
||||
[](global<output_type> *output) {
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[(gid - 1) * 2 + 1] = 1;
|
||||
}, output);
|
||||
output->enqueue_kernel2_success = status2 == enqueue_status::success;
|
||||
output->event2_is_valid = event2.is_valid();
|
||||
|
||||
event1.release();
|
||||
event2.release();
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.test == 2)
|
||||
{
|
||||
s << R"(
|
||||
kernel void test(device_queue queue, global<output_type> *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
if (gid != 0)
|
||||
return;
|
||||
|
||||
output->enqueue_marker_success = 1;
|
||||
output->event3_is_valid = 1;
|
||||
output->enqueue_kernel3_success = 1;
|
||||
|
||||
device_queue default_queue = get_default_device_queue();
|
||||
|
||||
event user_event1 = make_user_event();
|
||||
user_event1.retain();
|
||||
output->user_event1_is_valid = user_event1.is_valid();
|
||||
|
||||
ndrange ndrange1(get_global_size(0) / 2);
|
||||
event event1;
|
||||
enqueue_status status1 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 1, &user_event1, &event1, ndrange1,
|
||||
[](global<output_type> *output){
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[gid * 2] = 1;
|
||||
}, output);
|
||||
output->enqueue_kernel1_success = status1 == enqueue_status::success;
|
||||
output->event1_is_valid = event1.is_valid();
|
||||
user_event1.release();
|
||||
|
||||
event user_event2 = make_user_event();
|
||||
output->user_event2_is_valid = user_event2.is_valid();
|
||||
|
||||
event events[2];
|
||||
events[0] = user_event2;
|
||||
events[1] = user_event1;
|
||||
|
||||
ndrange ndrange2(1, get_global_size(0) / 2, get_local_size(0));
|
||||
event event2;
|
||||
enqueue_status status2 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 2, events, &event2, ndrange2,
|
||||
[](global<output_type> *output, local_ptr<int2[]> l0, local_ptr<int[]> l1, local_ptr<int[]> l2) {
|
||||
const ulong gid = get_global_id(0);
|
||||
const ulong lid = get_local_id(0);
|
||||
l1[get_local_size(0) - lid - 1] = gid > 0 ? 1 : 0;
|
||||
work_group_barrier(mem_fence::local);
|
||||
if (lid < 5) l0[lid] = int2(3, 4);
|
||||
if (lid < 3) l2[lid] = 5;
|
||||
work_group_barrier(mem_fence::local);
|
||||
output->values[(gid - 1) * 2 + 1] = min(l1[lid], min(l0[0].x, l2[0]));
|
||||
}, output, local_ptr<int2[]>::size_type(5), local_ptr<int[]>::size_type(get_local_size(0)), local_ptr<int[]>::size_type(3));
|
||||
output->enqueue_kernel2_success = status2 == enqueue_status::success;
|
||||
output->event2_is_valid = event2.is_valid();
|
||||
|
||||
user_event1.set_status(event_status::complete);
|
||||
user_event2.set_status(event_status::complete);
|
||||
|
||||
user_event1.release();
|
||||
user_event2.release();
|
||||
event1.release();
|
||||
event2.release();
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.test == 3)
|
||||
{
|
||||
s << R"(
|
||||
kernel void test(device_queue queue, global<output_type> *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
if (gid != 0)
|
||||
return;
|
||||
|
||||
output->user_event2_is_valid = 1;
|
||||
|
||||
device_queue default_queue = get_default_device_queue();
|
||||
|
||||
ndrange ndrange1(get_global_size(0) / 2);
|
||||
event event1;
|
||||
enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::wait_work_group, 0, nullptr, &event1, ndrange1,
|
||||
[](global<output_type> *output) {
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[gid * 2] = 20;
|
||||
}, output);
|
||||
output->enqueue_kernel1_success = status1 == enqueue_status::success;
|
||||
output->event1_is_valid = event1.is_valid();
|
||||
|
||||
ndrange ndrange2(1, get_global_size(0) / 2, 1);
|
||||
event event2;
|
||||
enqueue_status status2 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 0, nullptr, &event2, ndrange2,
|
||||
[](global<output_type> *output) {
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[(gid - 1) * 2 + 1] = 20;
|
||||
}, output);
|
||||
output->enqueue_kernel2_success = status2 == enqueue_status::success;
|
||||
output->event2_is_valid = event2.is_valid();
|
||||
|
||||
event user_event1 = make_user_event();
|
||||
output->user_event1_is_valid = user_event1.is_valid();
|
||||
|
||||
event events[3];
|
||||
events[0] = event2;
|
||||
events[1] = user_event1;
|
||||
events[2] = event1;
|
||||
|
||||
event event3;
|
||||
enqueue_status status3 = queue.enqueue_marker(3, events, &event3);
|
||||
output->enqueue_marker_success = status3 == enqueue_status::success;
|
||||
output->event3_is_valid = event3.is_valid();
|
||||
|
||||
enqueue_status status4 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 1, &event3, nullptr, ndrange(get_global_size(0)),
|
||||
[](global<output_type> *output) {
|
||||
const ulong gid = get_global_id(0);
|
||||
output->values[gid] /= 20;
|
||||
}, output);
|
||||
output->enqueue_kernel3_success = status4 == enqueue_status::success;
|
||||
|
||||
user_event1.set_status(event_status::complete);
|
||||
|
||||
user_event1.release();
|
||||
event1.release();
|
||||
event2.release();
|
||||
event3.release();
|
||||
}
|
||||
)";
|
||||
}
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
std::string kernel_name = "test";
|
||||
std::string source = generate_source(options);
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name, "-cl-std=CL2.0", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
cl_uint max_queues;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(cl_uint), &max_queues, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
|
||||
|
||||
cl_uint max_events;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_EVENTS, sizeof(cl_uint), &max_events, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
|
||||
|
||||
cl_command_queue device_queue1 = NULL;
|
||||
cl_command_queue device_queue2 = NULL;
|
||||
|
||||
cl_queue_properties queue_properties1[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT,
|
||||
0
|
||||
};
|
||||
device_queue1 = clCreateCommandQueueWithProperties(context, device, queue_properties1, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateCommandQueueWithProperties")
|
||||
|
||||
if (max_queues > 1)
|
||||
{
|
||||
cl_queue_properties queue_properties2[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE,
|
||||
0
|
||||
};
|
||||
device_queue2 = clCreateCommandQueueWithProperties(context, device, queue_properties2, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateCommandQueueWithProperties")
|
||||
}
|
||||
|
||||
cl_mem output_buffer;
|
||||
output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(cl_command_queue), device_queue2 != NULL ? &device_queue2 : &device_queue1);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
const char pattern = 0;
|
||||
error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type), 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
|
||||
|
||||
size_t max_work_group_size;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
|
||||
|
||||
const size_t local_size = (std::min)((size_t)256, max_work_group_size);
|
||||
const size_t global_size = 10000 / local_size * local_size;
|
||||
const size_t count = global_size;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
output_type output;
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(output_type),
|
||||
static_cast<void *>(&output),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
if (!output.enqueue_kernel1_success)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed")
|
||||
}
|
||||
if (!output.enqueue_kernel2_success)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed")
|
||||
}
|
||||
if (!output.enqueue_kernel3_success)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed")
|
||||
}
|
||||
if (!output.enqueue_marker_success)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "enqueue_marker did not succeed")
|
||||
}
|
||||
if (!output.event1_is_valid)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "event1 is not valid")
|
||||
}
|
||||
if (!output.event2_is_valid)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "event2 is not valid")
|
||||
}
|
||||
if (!output.event3_is_valid)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "event3 is not valid")
|
||||
}
|
||||
if (!output.user_event1_is_valid)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "user_event1 is not valid")
|
||||
}
|
||||
if (!output.user_event2_is_valid)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "user_event2 is not valid")
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
const cl_int result = output.values[i];
|
||||
const cl_int expected = 1;
|
||||
|
||||
if (result != expected)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"kernel did not return correct value. Expected: %s, got: %s",
|
||||
format_value(expected).c_str(), format_value(result).c_str()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseMemObject(output_buffer);
|
||||
clReleaseCommandQueue(device_queue1);
|
||||
if (device_queue2 != NULL)
|
||||
clReleaseCommandQueue(device_queue2);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_enqueue_one_kernel)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
test_options options;
|
||||
options.test = 0;
|
||||
return test(device, context, queue, options);
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_enqueue_two_kernels)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
test_options options;
|
||||
options.test = 1;
|
||||
return test(device, context, queue, options);
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_enqueue_user_events_and_locals)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
test_options options;
|
||||
options.test = 2;
|
||||
return test(device, context, queue, options);
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_enqueue_marker)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
test_options options;
|
||||
options.test = 3;
|
||||
return test(device, context, queue, options);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_DEVICE_QUEUE_TEST_ENQUEUE_HPP
|
||||
@@ -1,72 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_FUNCS_TEST_UTILS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_FUNCS_TEST_UTILS_HPP
|
||||
|
||||
// This file contains helper classes and functions for testing various unary, binary
|
||||
// and ternary OpenCL functions (for example cl::abs(x) or cl::abs_diff(x, y)),
|
||||
// as well as other helper functions/classes.
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
#define TEST_UNARY_FUNC_MACRO(TEST_CLASS) \
|
||||
last_error = test_unary_func( \
|
||||
device, context, queue, n_elems, TEST_CLASS \
|
||||
); \
|
||||
CHECK_ERROR(last_error) \
|
||||
error |= last_error;
|
||||
|
||||
#define TEST_BINARY_FUNC_MACRO(TEST_CLASS) \
|
||||
last_error = test_binary_func( \
|
||||
device, context, queue, n_elems, TEST_CLASS \
|
||||
); \
|
||||
CHECK_ERROR(last_error) \
|
||||
error |= last_error;
|
||||
|
||||
#define TEST_TERNARY_FUNC_MACRO(TEST_CLASS) \
|
||||
last_error = test_ternary_func( \
|
||||
device, context, queue, n_elems, TEST_CLASS \
|
||||
); \
|
||||
CHECK_ERROR(last_error) \
|
||||
error |= last_error;
|
||||
|
||||
#include "utils_test/compare.hpp"
|
||||
#include "utils_test/generate_inputs.hpp"
|
||||
|
||||
// HOWTO:
|
||||
//
|
||||
// unary_func, binary_func, ternary_func - base classes wrapping OpenCL functions that
|
||||
// you want to test.
|
||||
//
|
||||
// To create a wrapper class for given function, you need to create a class derived from correct
|
||||
// base class (unary_func, binary_func, ternary_func), and define:
|
||||
//
|
||||
// * std::string str() method which should return class name in OpenCL ("abs", "abs_diff"),
|
||||
// * operator(x), operator(x, y) or operator(x,y,z) depending on arity of the function you wish
|
||||
// to test, method should work exactly as the tested function works in OpenCL
|
||||
// * if it's needed you can overload min1, max1, min2, max2, min3, max3 methods with returns min
|
||||
// and max values that can be generated for given input (function argument) [required for vec
|
||||
// arguments],
|
||||
// * if you want to use vector arguments (for example: cl_int2, cl_ulong16), you should look at
|
||||
// how int_func_clamp<> is implemented in integer_funcs/numeric_funcs.hpp.
|
||||
//
|
||||
// To see how you should use class you've just created see AUTO_TEST_CASE(test_int_numeric_funcs)
|
||||
// in integer_funcs/numeric_funcs.hpp.
|
||||
#include "utils_test/unary.hpp"
|
||||
#include "utils_test/binary.hpp"
|
||||
#include "utils_test/ternary.hpp"
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_FUNCS_TEST_UTILS_HPP
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_GEOMETRIC_FUNCS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,229 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_FAST_GEOMETRIC_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_FAST_GEOMETRIC_FUNCS_HPP
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
// float fast_distance(float4 p0, float4 p1);
|
||||
struct geometric_func_fast_distance : public binary_func<cl_float4, cl_float4, cl_float>
|
||||
{
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "fast_distance";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_geometric>\n";
|
||||
}
|
||||
|
||||
cl_float operator()(const cl_float4& p0, const cl_float4& p1)
|
||||
{
|
||||
cl_double r = 0.0f;
|
||||
cl_double t;
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
t = static_cast<cl_double>(p0.s[i]) - static_cast<cl_double>(p1.s[i]);
|
||||
r += t * t;
|
||||
}
|
||||
return std::sqrt(r);
|
||||
}
|
||||
|
||||
cl_float4 min1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(-512.0f);
|
||||
}
|
||||
|
||||
cl_float4 max1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(512.0f);
|
||||
}
|
||||
|
||||
cl_float4 min2()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(-512.0f);
|
||||
}
|
||||
|
||||
cl_float4 max2()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(512.0f);
|
||||
}
|
||||
|
||||
cl_double delta(const cl_float4& p0, const cl_float4& p1, const cl_float& expected)
|
||||
{
|
||||
(void) p0; (void) p1;
|
||||
return 0.01f * expected;
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return
|
||||
8192.0f + // error in sqrt
|
||||
(1.5f * 4.0f) + // cumulative error for multiplications
|
||||
(0.5f * 3.0f); // cumulative error for additions
|
||||
}
|
||||
};
|
||||
|
||||
// float fast_length(float4 p);
|
||||
struct geometric_func_fast_length : public unary_func<cl_float4,cl_float>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "fast_length";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_geometric>\n";
|
||||
}
|
||||
|
||||
cl_float operator()(const cl_float4& p)
|
||||
{
|
||||
cl_double r = 0.0f;
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
r += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
|
||||
}
|
||||
return std::sqrt(r);
|
||||
}
|
||||
|
||||
cl_float4 min1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(-512.0f);
|
||||
}
|
||||
|
||||
cl_float4 max1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(512.0f);
|
||||
}
|
||||
|
||||
cl_double delta(const cl_float4& p, const cl_float& expected)
|
||||
{
|
||||
(void) p;
|
||||
return 0.01f * expected;
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return
|
||||
8192.0f + // error in sqrt
|
||||
0.5f * // effect on e of taking sqrt( x + e )
|
||||
((0.5f * 4.0f) + // cumulative error for multiplications
|
||||
(0.5f * 3.0f)); // cumulative error for additions
|
||||
}
|
||||
};
|
||||
|
||||
// float4 fast_normalize(float4 p);
|
||||
struct geometric_func_fast_normalize : public unary_func<cl_float4,cl_float4>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "fast_normalize";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_geometric>\n";
|
||||
}
|
||||
|
||||
cl_float4 operator()(const cl_float4& p)
|
||||
{
|
||||
cl_double t = 0.0f;
|
||||
cl_float4 r;
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
t += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
|
||||
}
|
||||
|
||||
if(t == 0.0f)
|
||||
{
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
r.s[i] = 0.0f;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
t = std::sqrt(t);
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
r.s[i] = static_cast<cl_double>(p.s[i]) / t;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
cl_float4 min1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(-512.0f);
|
||||
}
|
||||
|
||||
cl_float4 max1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(512.0f);
|
||||
}
|
||||
|
||||
std::vector<cl_float4> in_special_cases()
|
||||
{
|
||||
return {
|
||||
{0.0f, 0.0f, 0.0f, 0.0f}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
cl_double4 delta(const cl_float4& p, const cl_float4& expected)
|
||||
{
|
||||
(void) p;
|
||||
auto e = detail::make_value<cl_double4>(0.01f);
|
||||
return detail::multiply<cl_double4>(e, expected);
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return
|
||||
8192.5f + // error in rsqrt + error in multiply
|
||||
(0.5f * 4.0f) + // cumulative error for multiplications
|
||||
(0.5f * 3.0f); // cumulative error for additions
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_fast_geometric_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// float fast_distance(float4 p0, float4 p1)
|
||||
TEST_BINARY_FUNC_MACRO((geometric_func_fast_distance()))
|
||||
|
||||
// float fast_length(float4 p)
|
||||
TEST_UNARY_FUNC_MACRO((geometric_func_fast_length()))
|
||||
|
||||
// float4 fast_normalize(float4 p)
|
||||
TEST_UNARY_FUNC_MACRO((geometric_func_fast_normalize()))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_FAST_GEOMETRIC_FUNCS_HPP
|
||||
@@ -1,389 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
// float4 cross(float4 p0, float4 p1)
|
||||
struct geometric_func_cross : public binary_func<cl_float4, cl_float4, cl_float4>
|
||||
{
|
||||
geometric_func_cross(cl_device_id device)
|
||||
{
|
||||
// On an embedded device w/ round-to-zero, 3 ulps is the worst-case tolerance for cross product
|
||||
this->m_delta = 3.0f * CL_FLT_EPSILON;
|
||||
// RTZ devices accrue approximately double the amount of error per operation. Allow for that.
|
||||
if(get_default_rounding_mode(device) == CL_FP_ROUND_TO_ZERO)
|
||||
{
|
||||
this->m_delta *= 2.0f;
|
||||
}
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "cross";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_geometric>\n";
|
||||
}
|
||||
|
||||
cl_float4 operator()(const cl_float4& p0, const cl_float4& p1)
|
||||
{
|
||||
cl_float4 r;
|
||||
r.s[0] = (p0.s[1] * p1.s[2]) - (p0.s[2] * p1.s[1]);
|
||||
r.s[1] = (p0.s[2] * p1.s[0]) - (p0.s[0] * p1.s[2]);
|
||||
r.s[2] = (p0.s[0] * p1.s[1]) - (p0.s[1] * p1.s[0]);
|
||||
r.s[3] = 0.0f;
|
||||
return r;
|
||||
}
|
||||
|
||||
cl_float4 max1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(1000.0f);
|
||||
}
|
||||
|
||||
cl_float4 max2()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(1000.0f);
|
||||
}
|
||||
|
||||
cl_float4 min1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(-1000.0f);
|
||||
}
|
||||
|
||||
cl_float4 min2()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(-1000.0f);
|
||||
}
|
||||
|
||||
bool use_ulp()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
cl_double4 delta(const cl_float4& p0, const cl_float4& p1, const cl_float4& expected)
|
||||
{
|
||||
(void) p0; (void) p1;
|
||||
auto e = detail::make_value<cl_double4>(m_delta);
|
||||
return detail::multiply<cl_double4>(e, expected);
|
||||
}
|
||||
|
||||
private:
|
||||
cl_double m_delta;
|
||||
};
|
||||
|
||||
// float dot(float4 p0, float4 p1);
|
||||
struct geometric_func_dot : public binary_func<cl_float4, cl_float4, cl_float>
|
||||
{
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "dot";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_geometric>\n";
|
||||
}
|
||||
|
||||
cl_float operator()(const cl_float4& p0, const cl_float4& p1)
|
||||
{
|
||||
cl_float r;
|
||||
r = p0.s[0] * p1.s[0];
|
||||
r += p0.s[1] * p1.s[1];
|
||||
r += p0.s[2] * p1.s[2];
|
||||
r += p0.s[3] * p1.s[3];
|
||||
return r;
|
||||
}
|
||||
|
||||
cl_float4 max1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(1000.0f);
|
||||
}
|
||||
|
||||
cl_float4 max2()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(1000.0f);
|
||||
}
|
||||
|
||||
cl_float4 min1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(-1000.0f);
|
||||
}
|
||||
|
||||
cl_float4 min2()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(-1000.0f);
|
||||
}
|
||||
|
||||
bool use_ulp()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
cl_double delta(const cl_float4& p0, const cl_float4& p1, cl_float expected)
|
||||
{
|
||||
(void) p0; (void) p1;
|
||||
return expected * ((4.0f + (4.0f - 1.0f)) * CL_FLT_EPSILON);
|
||||
}
|
||||
};
|
||||
|
||||
// float distance(float4 p0, float4 p1);
|
||||
struct geometric_func_distance : public binary_func<cl_float4, cl_float4, cl_float>
|
||||
{
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "distance";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_geometric>\n";
|
||||
}
|
||||
|
||||
cl_float operator()(const cl_float4& p0, const cl_float4& p1)
|
||||
{
|
||||
cl_double r = 0.0f;
|
||||
cl_double t;
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
t = static_cast<cl_double>(p0.s[i]) - static_cast<cl_double>(p1.s[i]);
|
||||
r += t * t;
|
||||
}
|
||||
return std::sqrt(r);
|
||||
}
|
||||
|
||||
cl_float4 max1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(1000.0f);
|
||||
}
|
||||
|
||||
cl_float4 max2()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(1000.0f);
|
||||
}
|
||||
|
||||
cl_float4 min1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(-1000.0f);
|
||||
}
|
||||
|
||||
cl_float4 min2()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(-1000.0f);
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return
|
||||
3.0f + // error in sqrt
|
||||
(1.5f * 4.0f) + // cumulative error for multiplications
|
||||
(0.5f * 3.0f); // cumulative error for additions
|
||||
}
|
||||
};
|
||||
|
||||
// float length(float4 p);
|
||||
struct geometric_func_length : public unary_func<cl_float4,cl_float>
|
||||
{
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "length";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_geometric>\n";
|
||||
}
|
||||
|
||||
cl_float operator()(const cl_float4& p)
|
||||
{
|
||||
cl_double r = 0.0f;
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
r += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
|
||||
}
|
||||
return std::sqrt(r);
|
||||
}
|
||||
|
||||
cl_float4 max1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(1000.0f);
|
||||
}
|
||||
|
||||
cl_float4 min1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(-1000.0f);
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return
|
||||
3.0f + // error in sqrt
|
||||
0.5f * // effect on e of taking sqrt( x + e )
|
||||
((0.5f * 4.0f) + // cumulative error for multiplications
|
||||
(0.5f * 3.0f)); // cumulative error for additions
|
||||
}
|
||||
};
|
||||
|
||||
// float4 normalize(float4 p);
|
||||
struct geometric_func_normalize : public unary_func<cl_float4,cl_float4>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "normalize";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_geometric>\n";
|
||||
}
|
||||
|
||||
cl_float4 operator()(const cl_float4& p)
|
||||
{
|
||||
cl_double t = 0.0f;
|
||||
cl_float4 r;
|
||||
|
||||
// normalize( v ) returns a vector full of NaNs if any element is a NaN.
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
if((std::isnan)(p.s[i]))
|
||||
{
|
||||
for(size_t j = 0; j < 4; j++)
|
||||
{
|
||||
r.s[j] = p.s[i];
|
||||
}
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
// normalize( v ) for which any element in v is infinite shall proceed as
|
||||
// if the elements in v were replaced as follows:
|
||||
// for( i = 0; i < sizeof(v) / sizeof(v[0] ); i++ )
|
||||
// v[i] = isinf(v[i]) ? copysign(1.0, v[i]) : 0.0 * v [i];
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
if((std::isinf)(p.s[i]))
|
||||
{
|
||||
for(size_t j = 0; j < 4; j++)
|
||||
{
|
||||
r.s[j] = (std::isinf)(p.s[j]) ? (std::copysign)(1.0, p.s[j]) : 0.0 * p.s[j];
|
||||
}
|
||||
r = (*this)(r);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
t += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
|
||||
}
|
||||
|
||||
// normalize( v ) returns v if all elements of v are zero.
|
||||
if(t == 0.0f)
|
||||
{
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
r.s[i] = 0.0f;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
t = std::sqrt(t);
|
||||
for(size_t i = 0; i < 4; i++)
|
||||
{
|
||||
r.s[i] = static_cast<cl_double>(p.s[i]) / t;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
cl_float4 max1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(1000.0f);
|
||||
}
|
||||
|
||||
cl_float4 min1()
|
||||
{
|
||||
return detail::def_limit<cl_float4>(-1000.0f);
|
||||
}
|
||||
|
||||
std::vector<cl_float4> in_special_cases()
|
||||
{
|
||||
return {
|
||||
{0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{std::numeric_limits<float>::infinity(), 0.0f, 0.0f, 0.0f},
|
||||
{
|
||||
std::numeric_limits<float>::infinity(),
|
||||
std::numeric_limits<float>::infinity(),
|
||||
std::numeric_limits<float>::infinity(),
|
||||
std::numeric_limits<float>::infinity()
|
||||
},
|
||||
{
|
||||
std::numeric_limits<float>::infinity(),
|
||||
1.0f,
|
||||
0.0f,
|
||||
std::numeric_limits<float>::quiet_NaN()
|
||||
},
|
||||
{-1.0f, -1.0f, 0.0f,-300.0f}
|
||||
};
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
return
|
||||
2.5f + // error in rsqrt + error in multiply
|
||||
(0.5f * 4.0f) + // cumulative error for multiplications
|
||||
(0.5f * 3.0f); // cumulative error for additions
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_geometric_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// float4 cross(float4 p0, float4 p1)
|
||||
TEST_BINARY_FUNC_MACRO((geometric_func_cross(device)))
|
||||
|
||||
// float dot(float4 p0, float4 p1)
|
||||
TEST_BINARY_FUNC_MACRO((geometric_func_dot()))
|
||||
|
||||
// float distance(float4 p0, float4 p1)
|
||||
TEST_BINARY_FUNC_MACRO((geometric_func_distance()))
|
||||
|
||||
// float length(float4 p)
|
||||
TEST_UNARY_FUNC_MACRO((geometric_func_length()))
|
||||
|
||||
// float4 normalize(float4 p)
|
||||
TEST_UNARY_FUNC_MACRO((geometric_func_normalize()))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP
|
||||
@@ -1,44 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <limits>
|
||||
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "geometric_funcs.hpp"
|
||||
#include "fast_geometric_funcs.hpp"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
// Check if cl_float (float) and cl_double (double) fulfill the requirements of
|
||||
// IEC 559 (IEEE 754) standard. This is required for the tests to run correctly.
|
||||
if(!std::numeric_limits<cl_float>::is_iec559)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
|
||||
"Tests won't run correctly."
|
||||
);
|
||||
}
|
||||
if(!std::numeric_limits<cl_double>::is_iec559)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
|
||||
"Tests won't run correctly."
|
||||
);
|
||||
}
|
||||
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_IMAGES)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,195 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_COMMON_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_IMAGES_COMMON_HPP
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
#include "../harness/imageHelpers.h"
|
||||
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
template<cl_channel_type channel_type>
|
||||
struct channel_info;
|
||||
|
||||
template<>
|
||||
struct channel_info<CL_SIGNED_INT8>
|
||||
{
|
||||
typedef cl_char channel_type;
|
||||
typedef cl_int4 element_type;
|
||||
static std::string function_suffix() { return "i"; }
|
||||
|
||||
channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
|
||||
channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct channel_info<CL_SIGNED_INT16>
|
||||
{
|
||||
typedef cl_short channel_type;
|
||||
typedef cl_int4 element_type;
|
||||
static std::string function_suffix() { return "i"; }
|
||||
|
||||
channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
|
||||
channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct channel_info<CL_SIGNED_INT32>
|
||||
{
|
||||
typedef cl_int channel_type;
|
||||
typedef cl_int4 element_type;
|
||||
static std::string function_suffix() { return "i"; }
|
||||
|
||||
channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
|
||||
channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct channel_info<CL_UNSIGNED_INT8>
|
||||
{
|
||||
typedef cl_uchar channel_type;
|
||||
typedef cl_uint4 element_type;
|
||||
static std::string function_suffix() { return "ui"; }
|
||||
|
||||
channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
|
||||
channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct channel_info<CL_UNSIGNED_INT16>
|
||||
{
|
||||
typedef cl_ushort channel_type;
|
||||
typedef cl_uint4 element_type;
|
||||
static std::string function_suffix() { return "ui"; }
|
||||
|
||||
channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
|
||||
channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct channel_info<CL_UNSIGNED_INT32>
|
||||
{
|
||||
typedef cl_uint channel_type;
|
||||
typedef cl_uint4 element_type;
|
||||
static std::string function_suffix() { return "ui"; }
|
||||
|
||||
channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
|
||||
channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct channel_info<CL_FLOAT>
|
||||
{
|
||||
typedef cl_float channel_type;
|
||||
typedef cl_float4 element_type;
|
||||
static std::string function_suffix() { return "f"; }
|
||||
|
||||
channel_type channel_min() { return -1e-3f; }
|
||||
channel_type channel_max() { return +1e+3f; }
|
||||
};
|
||||
|
||||
template<cl_mem_object_type image_type>
|
||||
struct image_info;
|
||||
|
||||
template<>
|
||||
struct image_info<CL_MEM_OBJECT_IMAGE1D>
|
||||
{
|
||||
static std::string image_type_name() { return "image1d"; }
|
||||
static std::string coord_accessor() { return "x"; }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct image_info<CL_MEM_OBJECT_IMAGE2D>
|
||||
{
|
||||
static std::string image_type_name() { return "image2d"; }
|
||||
static std::string coord_accessor() { return "xy"; }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct image_info<CL_MEM_OBJECT_IMAGE3D>
|
||||
{
|
||||
static std::string image_type_name() { return "image3d"; }
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
static std::string coord_accessor() { return "xyzw"; }
|
||||
#else
|
||||
static std::string coord_accessor() { return "xyz"; }
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
|
||||
struct image_test_base :
|
||||
detail::channel_info<ChannelType>,
|
||||
detail::image_info<ImageType>
|
||||
{ };
|
||||
|
||||
// Create image_descriptor (used by harness/imageHelpers functions)
|
||||
image_descriptor create_image_descriptor(cl_image_desc &image_desc, cl_image_format *image_format)
|
||||
{
|
||||
image_descriptor image_info;
|
||||
image_info.width = image_desc.image_width;
|
||||
image_info.height = image_desc.image_height;
|
||||
image_info.depth = image_desc.image_depth;
|
||||
image_info.arraySize = image_desc.image_array_size;
|
||||
image_info.rowPitch = image_desc.image_row_pitch;
|
||||
image_info.slicePitch = image_desc.image_slice_pitch;
|
||||
image_info.format = image_format;
|
||||
image_info.buffer = image_desc.mem_object;
|
||||
image_info.type = image_desc.image_type;
|
||||
image_info.num_mip_levels = image_desc.num_mip_levels;
|
||||
return image_info;
|
||||
}
|
||||
|
||||
const std::vector<cl_channel_order> get_channel_orders(cl_device_id device)
|
||||
{
|
||||
// According to "Minimum List of Supported Image Formats" of OpenCL specification:
|
||||
return { CL_R, CL_RG, CL_RGBA };
|
||||
}
|
||||
|
||||
bool is_test_supported(cl_device_id device)
|
||||
{
|
||||
// Check for image support
|
||||
if (checkForImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
|
||||
{
|
||||
log_info("SKIPPED: Device does not support images. Skipping test.\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Checks if x is equal to y.
|
||||
template<class type>
|
||||
inline bool are_equal(const type& x,
|
||||
const type& y)
|
||||
{
|
||||
for(size_t i = 0; i < vector_size<type>::value; i++)
|
||||
{
|
||||
if(!(x.s[i] == y.s[i]))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_IMAGES_COMMON_HPP
|
||||
@@ -1,30 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "test_read.hpp"
|
||||
#include "test_sample.hpp"
|
||||
#include "test_write.hpp"
|
||||
|
||||
// FIXME: To use certain functions in test_common/harness/imageHelpers.h
|
||||
// (for example, generate_random_image_data()), the tests are required to declare
|
||||
// the following variable (hangover from code specific to Apple's implementation):
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,307 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_TEST_READ_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_IMAGES_TEST_READ_HPP
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
|
||||
namespace test_images_read {
|
||||
|
||||
template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
|
||||
struct image_test : image_test_base<ImageType, ChannelType>
|
||||
{
|
||||
cl_channel_order channel_order;
|
||||
|
||||
image_test(cl_channel_order channel_order) :
|
||||
channel_order(channel_order)
|
||||
{ }
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
std::string generate_source()
|
||||
{
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
|
||||
|
||||
kernel void test(
|
||||
read_only )" << image_test::image_type_name() << R"(_t img,
|
||||
const global int4 *coords,
|
||||
global element_type *output
|
||||
) {
|
||||
const ulong gid = get_global_linear_id();
|
||||
|
||||
output[gid] = read_image)" << image_test::function_suffix() <<
|
||||
"(img, coords[gid]." << image_test::coord_accessor() << R"();
|
||||
}
|
||||
)";
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#else
|
||||
std::string generate_source()
|
||||
{
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
#include <opencl_memory>
|
||||
#include <opencl_common>
|
||||
#include <opencl_work_item>
|
||||
#include <opencl_image>
|
||||
using namespace cl;
|
||||
)";
|
||||
|
||||
s << R"(
|
||||
typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
|
||||
|
||||
kernel void test(
|
||||
const )" << image_test::image_type_name() << R"(<element_type, image_access::read> img,
|
||||
const global_ptr<int4[]> coords,
|
||||
global_ptr<element_type[]> output
|
||||
) {
|
||||
const ulong gid = get_global_linear_id();
|
||||
|
||||
output[gid] = img.read(coords[gid].)" << image_test::coord_accessor() << R"();
|
||||
}
|
||||
)";
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
std::string kernel_name = "test";
|
||||
std::string source = generate_source();
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name, "-cl-std=CL2.0", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
using element_type = typename image_test::element_type;
|
||||
using coord_type = cl_int4;
|
||||
using scalar_element_type = typename scalar_type<element_type>::type;
|
||||
using channel_type = typename image_test::channel_type;
|
||||
|
||||
cl_image_format image_format;
|
||||
image_format.image_channel_order = channel_order;
|
||||
image_format.image_channel_data_type = ChannelType;
|
||||
|
||||
const size_t pixel_size = get_pixel_size(&image_format);
|
||||
const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order);
|
||||
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = ImageType;
|
||||
if (ImageType == CL_MEM_OBJECT_IMAGE1D)
|
||||
{
|
||||
image_desc.image_width = 2048;
|
||||
image_desc.image_height = 1;
|
||||
image_desc.image_depth = 1;
|
||||
}
|
||||
else if (ImageType == CL_MEM_OBJECT_IMAGE2D)
|
||||
{
|
||||
image_desc.image_width = 256;
|
||||
image_desc.image_height = 256;
|
||||
image_desc.image_depth = 1;
|
||||
}
|
||||
else if (ImageType == CL_MEM_OBJECT_IMAGE3D)
|
||||
{
|
||||
image_desc.image_width = 64;
|
||||
image_desc.image_height = 64;
|
||||
image_desc.image_depth = 64;
|
||||
}
|
||||
image_desc.image_array_size = 0;
|
||||
image_desc.image_row_pitch = image_desc.image_width * pixel_size;
|
||||
image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.mem_object = NULL;
|
||||
|
||||
image_descriptor image_info = create_image_descriptor(image_desc, &image_format);
|
||||
|
||||
std::vector<channel_type> image_values = generate_input(
|
||||
image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count,
|
||||
image_test::channel_min(), image_test::channel_max(),
|
||||
std::vector<channel_type>()
|
||||
);
|
||||
|
||||
const size_t count = num_elements;
|
||||
|
||||
std::vector<coord_type> coords = generate_input(
|
||||
count,
|
||||
detail::make_value<coord_type>(0),
|
||||
coord_type {
|
||||
static_cast<cl_int>(image_desc.image_width - 1),
|
||||
static_cast<cl_int>(image_desc.image_height - 1),
|
||||
static_cast<cl_int>(image_desc.image_depth - 1),
|
||||
0
|
||||
},
|
||||
std::vector<coord_type>()
|
||||
);
|
||||
|
||||
cl_mem img = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
&image_format, &image_desc, static_cast<void *>(image_values.data()), &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateImage")
|
||||
|
||||
cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(coord_type) * count, static_cast<void *>(coords.data()), &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(element_type) * count, NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
const size_t global_size = count;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
std::vector<element_type> output(count);
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(element_type) * count,
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
const coord_type c = coords[i];
|
||||
const element_type result = output[i];
|
||||
|
||||
element_type expected;
|
||||
read_image_pixel<scalar_element_type>(static_cast<void *>(image_values.data()), &image_info,
|
||||
c.s[0], c.s[1], c.s[2],
|
||||
expected.s);
|
||||
|
||||
if (!are_equal(result, expected))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Reading from coordinates %s failed. Expected: %s, got: %s",
|
||||
format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseMemObject(img);
|
||||
clReleaseMemObject(coords_buffer);
|
||||
clReleaseMemObject(output_buffer);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
};
|
||||
|
||||
template<cl_mem_object_type ImageType>
|
||||
int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
if (!is_test_supported(device))
|
||||
return CL_SUCCESS;
|
||||
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
for (auto channel_order : get_channel_orders(device))
|
||||
{
|
||||
error = image_test<ImageType, CL_SIGNED_INT8>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
error = image_test<ImageType, CL_SIGNED_INT16>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
error = image_test<ImageType, CL_SIGNED_INT32>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
|
||||
error = image_test<ImageType, CL_UNSIGNED_INT8>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
error = image_test<ImageType, CL_UNSIGNED_INT16>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
error = image_test<ImageType, CL_UNSIGNED_INT32>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
|
||||
error = image_test<ImageType, CL_FLOAT>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
AUTO_TEST_CASE(test_images_read_1d)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return run_test_cases<CL_MEM_OBJECT_IMAGE1D>(device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_images_read_2d)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return run_test_cases<CL_MEM_OBJECT_IMAGE2D>(device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_images_read_3d)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return run_test_cases<CL_MEM_OBJECT_IMAGE3D>(device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_IMAGES_TEST_READ_HPP
|
||||
@@ -1,363 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_TEST_SAMPLE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_IMAGES_TEST_SAMPLE_HPP
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
|
||||
namespace test_images_sample {
|
||||
|
||||
enum class sampler_source
|
||||
{
|
||||
param,
|
||||
program_scope
|
||||
};
|
||||
|
||||
const sampler_source sampler_sources[] = { sampler_source::param, sampler_source::program_scope };
|
||||
|
||||
template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
|
||||
struct image_test : image_test_base<ImageType, ChannelType>
|
||||
{
|
||||
cl_channel_order channel_order;
|
||||
sampler_source source;
|
||||
|
||||
image_test(cl_channel_order channel_order, sampler_source source) :
|
||||
channel_order(channel_order),
|
||||
source(source)
|
||||
{ }
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
std::string generate_source()
|
||||
{
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
|
||||
)";
|
||||
|
||||
std::string sampler;
|
||||
if (source == sampler_source::program_scope)
|
||||
{
|
||||
s << R"(
|
||||
constant sampler_t sampler_program_scope = CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE;
|
||||
)";
|
||||
sampler = "sampler_program_scope";
|
||||
}
|
||||
else if (source == sampler_source::param)
|
||||
{
|
||||
sampler = "sampler_param";
|
||||
}
|
||||
|
||||
s << R"(
|
||||
kernel void test(
|
||||
read_only )" << image_test::image_type_name() << R"(_t img,
|
||||
const global int4 *coords,
|
||||
global element_type *output,
|
||||
sampler_t sampler_param
|
||||
) {
|
||||
const ulong gid = get_global_linear_id();
|
||||
|
||||
output[gid] = read_image)" << image_test::function_suffix() <<
|
||||
"(img, " << sampler << ", coords[gid]." << image_test::coord_accessor() << R"();
|
||||
}
|
||||
)";
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#else
|
||||
std::string generate_source()
|
||||
{
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
#include <opencl_memory>
|
||||
#include <opencl_common>
|
||||
#include <opencl_work_item>
|
||||
#include <opencl_image>
|
||||
using namespace cl;
|
||||
)";
|
||||
|
||||
s << R"(
|
||||
typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
|
||||
)";
|
||||
|
||||
std::string sampler;
|
||||
if (source == sampler_source::program_scope)
|
||||
{
|
||||
s << R"(
|
||||
sampler sampler_program_scope = make_sampler<addressing_mode::none, normalized_coordinates::unnormalized, filtering_mode::nearest>();
|
||||
)";
|
||||
sampler = "sampler_program_scope";
|
||||
}
|
||||
else if (source == sampler_source::param)
|
||||
{
|
||||
sampler = "sampler_param";
|
||||
}
|
||||
|
||||
s << R"(
|
||||
kernel void test(
|
||||
const )" << image_test::image_type_name() << R"(<element_type, image_access::sample> img,
|
||||
const global_ptr<int4[]> coords,
|
||||
global_ptr<element_type[]> output,
|
||||
sampler sampler_param
|
||||
) {
|
||||
const ulong gid = get_global_linear_id();
|
||||
|
||||
output[gid] = img.sample()" << sampler << ", coords[gid]." << image_test::coord_accessor() << R"();
|
||||
}
|
||||
)";
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
std::string kernel_name = "test";
|
||||
std::string source = generate_source();
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name, "-cl-std=CL2.0", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
using element_type = typename image_test::element_type;
|
||||
using coord_type = cl_int4;
|
||||
using scalar_element_type = typename scalar_type<element_type>::type;
|
||||
using channel_type = typename image_test::channel_type;
|
||||
|
||||
cl_image_format image_format;
|
||||
image_format.image_channel_order = channel_order;
|
||||
image_format.image_channel_data_type = ChannelType;
|
||||
|
||||
const size_t pixel_size = get_pixel_size(&image_format);
|
||||
const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order);
|
||||
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = ImageType;
|
||||
if (ImageType == CL_MEM_OBJECT_IMAGE1D)
|
||||
{
|
||||
image_desc.image_width = 2048;
|
||||
image_desc.image_height = 1;
|
||||
image_desc.image_depth = 1;
|
||||
}
|
||||
else if (ImageType == CL_MEM_OBJECT_IMAGE2D)
|
||||
{
|
||||
image_desc.image_width = 256;
|
||||
image_desc.image_height = 256;
|
||||
image_desc.image_depth = 1;
|
||||
}
|
||||
else if (ImageType == CL_MEM_OBJECT_IMAGE3D)
|
||||
{
|
||||
image_desc.image_width = 64;
|
||||
image_desc.image_height = 64;
|
||||
image_desc.image_depth = 64;
|
||||
}
|
||||
image_desc.image_array_size = 0;
|
||||
image_desc.image_row_pitch = image_desc.image_width * pixel_size;
|
||||
image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.mem_object = NULL;
|
||||
|
||||
image_descriptor image_info = create_image_descriptor(image_desc, &image_format);
|
||||
|
||||
std::vector<channel_type> image_values = generate_input(
|
||||
image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count,
|
||||
image_test::channel_min(), image_test::channel_max(),
|
||||
std::vector<channel_type>()
|
||||
);
|
||||
|
||||
const size_t count = num_elements;
|
||||
|
||||
std::vector<coord_type> coords = generate_input(
|
||||
count,
|
||||
detail::make_value<coord_type>(0),
|
||||
coord_type {
|
||||
static_cast<cl_int>(image_desc.image_width - 1),
|
||||
static_cast<cl_int>(image_desc.image_height - 1),
|
||||
static_cast<cl_int>(image_desc.image_depth - 1),
|
||||
0
|
||||
},
|
||||
std::vector<coord_type>()
|
||||
);
|
||||
|
||||
cl_mem img = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
&image_format, &image_desc, static_cast<void *>(image_values.data()), &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateImage")
|
||||
|
||||
cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(coord_type) * count, static_cast<void *>(coords.data()), &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(element_type) * count, NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
const cl_sampler_properties sampler_properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0
|
||||
};
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, sampler_properties, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateSamplerWithProperties")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 3, sizeof(sampler), &sampler);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
const size_t global_size = count;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
std::vector<element_type> output(count);
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(element_type) * count,
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
const coord_type c = coords[i];
|
||||
const element_type result = output[i];
|
||||
|
||||
element_type expected;
|
||||
read_image_pixel<scalar_element_type>(static_cast<void *>(image_values.data()), &image_info,
|
||||
c.s[0], c.s[1], c.s[2],
|
||||
expected.s);
|
||||
|
||||
if (!are_equal(result, expected))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Sampling from coordinates %s failed. Expected: %s, got: %s",
|
||||
format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseMemObject(img);
|
||||
clReleaseMemObject(coords_buffer);
|
||||
clReleaseMemObject(output_buffer);
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
};
|
||||
|
||||
template<cl_mem_object_type ImageType>
|
||||
int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
if (!is_test_supported(device))
|
||||
return CL_SUCCESS;
|
||||
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
for (auto channel_order : get_channel_orders(device))
|
||||
for (auto source : sampler_sources)
|
||||
{
|
||||
error = image_test<ImageType, CL_SIGNED_INT8>(channel_order, source)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
error = image_test<ImageType, CL_SIGNED_INT16>(channel_order, source)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
error = image_test<ImageType, CL_SIGNED_INT32>(channel_order, source)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
|
||||
error = image_test<ImageType, CL_UNSIGNED_INT8>(channel_order, source)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
error = image_test<ImageType, CL_UNSIGNED_INT16>(channel_order, source)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
error = image_test<ImageType, CL_UNSIGNED_INT32>(channel_order, source)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
|
||||
error = image_test<ImageType, CL_FLOAT>(channel_order, source)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
AUTO_TEST_CASE(test_images_sample_1d)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return run_test_cases<CL_MEM_OBJECT_IMAGE1D>(device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_images_sample_2d)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return run_test_cases<CL_MEM_OBJECT_IMAGE2D>(device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_images_sample_3d)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return run_test_cases<CL_MEM_OBJECT_IMAGE3D>(device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_IMAGES_TEST_SAMPLE_HPP
|
||||
@@ -1,327 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_TEST_WRITE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_IMAGES_TEST_WRITE_HPP
|
||||
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
|
||||
namespace test_images_write {
|
||||
|
||||
template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
|
||||
struct image_test : image_test_base<ImageType, ChannelType>
|
||||
{
|
||||
cl_channel_order channel_order;
|
||||
|
||||
image_test(cl_channel_order channel_order) :
|
||||
channel_order(channel_order)
|
||||
{ }
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
std::string generate_source()
|
||||
{
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
|
||||
|
||||
kernel void test(
|
||||
write_only )" << image_test::image_type_name() << R"(_t img,
|
||||
const global int4 *coords,
|
||||
const global element_type *input
|
||||
) {
|
||||
const ulong gid = get_global_linear_id();
|
||||
|
||||
write_image)" << image_test::function_suffix() <<
|
||||
"(img, coords[gid]." << image_test::coord_accessor() << R"(, input[gid]);
|
||||
}
|
||||
)";
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#else
|
||||
std::string generate_source()
|
||||
{
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
#include <opencl_memory>
|
||||
#include <opencl_common>
|
||||
#include <opencl_work_item>
|
||||
#include <opencl_image>
|
||||
using namespace cl;
|
||||
)";
|
||||
|
||||
s << R"(
|
||||
typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
|
||||
|
||||
kernel void test(
|
||||
)" << image_test::image_type_name() << R"(<element_type, image_access::write> img,
|
||||
const global_ptr<int4[]> coords,
|
||||
const global_ptr<element_type[]> input
|
||||
) {
|
||||
const ulong gid = get_global_linear_id();
|
||||
|
||||
img.write(coords[gid].)" << image_test::coord_accessor() << R"(, input[gid]);
|
||||
}
|
||||
)";
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
std::string kernel_name = "test";
|
||||
std::string source = generate_source();
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name, "-cl-std=CL2.0", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &kernel,
|
||||
source, kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
using element_type = typename image_test::element_type;
|
||||
using coord_type = cl_int4;
|
||||
using scalar_element_type = typename scalar_type<element_type>::type;
|
||||
using channel_type = typename image_test::channel_type;
|
||||
|
||||
cl_image_format image_format;
|
||||
image_format.image_channel_order = channel_order;
|
||||
image_format.image_channel_data_type = ChannelType;
|
||||
|
||||
const size_t pixel_size = get_pixel_size(&image_format);
|
||||
const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order);
|
||||
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = ImageType;
|
||||
if (ImageType == CL_MEM_OBJECT_IMAGE1D)
|
||||
{
|
||||
image_desc.image_width = 2048;
|
||||
image_desc.image_height = 1;
|
||||
image_desc.image_depth = 1;
|
||||
}
|
||||
else if (ImageType == CL_MEM_OBJECT_IMAGE2D)
|
||||
{
|
||||
image_desc.image_width = 256;
|
||||
image_desc.image_height = 256;
|
||||
image_desc.image_depth = 1;
|
||||
}
|
||||
else if (ImageType == CL_MEM_OBJECT_IMAGE3D)
|
||||
{
|
||||
image_desc.image_width = 64;
|
||||
image_desc.image_height = 64;
|
||||
image_desc.image_depth = 64;
|
||||
}
|
||||
image_desc.image_array_size = 0;
|
||||
image_desc.image_row_pitch = image_desc.image_width * pixel_size;
|
||||
image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.mem_object = NULL;
|
||||
|
||||
image_descriptor image_info = create_image_descriptor(image_desc, &image_format);
|
||||
|
||||
std::vector<channel_type> random_image_values = generate_input(
|
||||
image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count,
|
||||
image_test::channel_min(), image_test::channel_max(),
|
||||
std::vector<channel_type>()
|
||||
);
|
||||
|
||||
const size_t count = num_elements;
|
||||
|
||||
std::vector<coord_type> coords = generate_input(
|
||||
count,
|
||||
detail::make_value<coord_type>(0),
|
||||
coord_type {
|
||||
static_cast<cl_int>(image_desc.image_width - 1),
|
||||
static_cast<cl_int>(image_desc.image_height - 1),
|
||||
static_cast<cl_int>(image_desc.image_depth - 1),
|
||||
0
|
||||
},
|
||||
std::vector<coord_type>()
|
||||
);
|
||||
|
||||
std::vector<element_type> input(count);
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
const coord_type c = coords[i];
|
||||
|
||||
// Use read_image_pixel from harness/imageHelpers to fill input values
|
||||
// (it will deal with correct channels, orders etc.)
|
||||
read_image_pixel<scalar_element_type>(static_cast<void *>(random_image_values.data()), &image_info,
|
||||
c.s[0], c.s[1], c.s[2],
|
||||
input[i].s);
|
||||
}
|
||||
|
||||
// image_row_pitch and image_slice_pitch must be 0, when clCreateImage is used with host_ptr = NULL
|
||||
image_desc.image_row_pitch = 0;
|
||||
image_desc.image_slice_pitch = 0;
|
||||
cl_mem img = clCreateImage(context, CL_MEM_WRITE_ONLY,
|
||||
&image_format, &image_desc, NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateImage")
|
||||
|
||||
cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(coord_type) * count, static_cast<void *>(coords.data()), &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
cl_mem input_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(element_type) * count, static_cast<void *>(input.data()), &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 2, sizeof(input_buffer), &input_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
const size_t global_size = count;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
std::vector<channel_type> image_values(image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count);
|
||||
|
||||
const size_t origin[3] = { 0 };
|
||||
const size_t region[3] = { image_desc.image_width, image_desc.image_height, image_desc.image_depth };
|
||||
error = clEnqueueReadImage(
|
||||
queue, img, CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
static_cast<void *>(image_values.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
const coord_type c = coords[i];
|
||||
const element_type expected = input[i];
|
||||
|
||||
element_type result;
|
||||
read_image_pixel<scalar_element_type>(static_cast<void *>(image_values.data()), &image_info,
|
||||
c.s[0], c.s[1], c.s[2],
|
||||
result.s);
|
||||
|
||||
if (!are_equal(result, expected))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Writing to coordinates %s failed. Expected: %s, got: %s",
|
||||
format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseMemObject(img);
|
||||
clReleaseMemObject(coords_buffer);
|
||||
clReleaseMemObject(input_buffer);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
};
|
||||
|
||||
template<cl_mem_object_type ImageType>
|
||||
int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
if (!is_test_supported(device))
|
||||
return CL_SUCCESS;
|
||||
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
for (auto channel_order : get_channel_orders(device))
|
||||
{
|
||||
error = image_test<ImageType, CL_SIGNED_INT8>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
error = image_test<ImageType, CL_SIGNED_INT16>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
error = image_test<ImageType, CL_SIGNED_INT32>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
|
||||
error = image_test<ImageType, CL_UNSIGNED_INT8>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
error = image_test<ImageType, CL_UNSIGNED_INT16>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
error = image_test<ImageType, CL_UNSIGNED_INT32>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
|
||||
error = image_test<ImageType, CL_FLOAT>(channel_order)
|
||||
.run(device, context, queue, num_elements);
|
||||
RETURN_ON_ERROR(error)
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
AUTO_TEST_CASE(test_images_write_1d)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return run_test_cases<CL_MEM_OBJECT_IMAGE1D>(device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_images_write_2d)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return run_test_cases<CL_MEM_OBJECT_IMAGE2D>(device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_images_write_3d)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return run_test_cases<CL_MEM_OBJECT_IMAGE3D>(device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_IMAGES_TEST_WRITE_HPP
|
||||
@@ -1,142 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_24BIT_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_24BIT_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include <type_traits>
|
||||
|
||||
template<class IN1, class IN2, class IN3, class OUT1>
|
||||
struct int_func_mad24 : public ternary_func<IN1, IN2, IN3, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "mad24";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y, const IN3& z)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value
|
||||
&& std::is_same<IN2, IN3>::value
|
||||
&& std::is_same<IN3, OUT1>::value,
|
||||
"All types must be the same"
|
||||
);
|
||||
static_assert(
|
||||
std::is_same<cl_uint, IN1>::value || std::is_same<cl_int, IN1>::value,
|
||||
"Function takes only signed/unsigned integers."
|
||||
);
|
||||
return (x * y) + z;
|
||||
}
|
||||
|
||||
IN1 min1()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
IN1 max1()
|
||||
{
|
||||
return (std::numeric_limits<IN1>::max)() & IN1(0x00FFFF);
|
||||
}
|
||||
|
||||
IN2 min2()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
IN2 max2()
|
||||
{
|
||||
return (std::numeric_limits<IN2>::max)() & IN2(0x00FFFF);
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct int_func_mul24 : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "mul24";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value
|
||||
&& std::is_same<IN2, OUT1>::value,
|
||||
"All types must be the same"
|
||||
);
|
||||
static_assert(
|
||||
std::is_same<cl_uint, IN1>::value || std::is_same<cl_int, IN1>::value,
|
||||
"Function takes only signed/unsigned integers."
|
||||
);
|
||||
return x * y;
|
||||
}
|
||||
|
||||
IN1 min1()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
IN1 max1()
|
||||
{
|
||||
return (std::numeric_limits<IN1>::max)() & IN1(0x00FFFF);
|
||||
}
|
||||
|
||||
IN2 min2()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
IN2 max2()
|
||||
{
|
||||
return (std::numeric_limits<IN2>::max)() & IN2(0x00FFFF);
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_int_24bit_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// intn mad24(intn x, intn y, intn z);
|
||||
// uintn mad24(uintn x, uintn y, uintn z);
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_mad24<cl_int, cl_int, cl_int, cl_int>()))
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_mad24<cl_uint, cl_uint, cl_uint, cl_uint>()))
|
||||
|
||||
// intn mul24(intn x, intn y);
|
||||
// uintn mul24(uintn x, uintn y);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_mul24<cl_int, cl_int, cl_int>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_mul24<cl_uint, cl_uint, cl_uint>()))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_24BIT_HPP
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_INTEGER_FUNCS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,232 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_BITWISE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_BITWISE_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include <type_traits>
|
||||
|
||||
template<class IN1, class OUT1>
|
||||
struct int_func_popcount : public unary_func<IN1, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "popcount";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(IN1 x)
|
||||
{
|
||||
OUT1 count = 0;
|
||||
for (count = 0; x != 0; count++)
|
||||
{
|
||||
x &= x - 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class OUT1>
|
||||
struct int_func_clz : public unary_func<IN1, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "clz";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(IN1 x)
|
||||
{
|
||||
OUT1 count = 0;
|
||||
if(std::is_unsigned<IN1>::value)
|
||||
{
|
||||
cl_ulong value = x;
|
||||
value <<= 8 * sizeof(value) - (8 * sizeof(x));
|
||||
for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
|
||||
{
|
||||
value <<= 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cl_long value = x;
|
||||
value <<= 8 * sizeof(value) - (8 * sizeof(x));
|
||||
for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
|
||||
{
|
||||
value <<= 1;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class OUT1>
|
||||
struct int_func_ctz : public unary_func<IN1, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "ctz";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(IN1 x)
|
||||
{
|
||||
if(x == 0)
|
||||
return sizeof(x);
|
||||
|
||||
OUT1 count = 0;
|
||||
IN1 value = x;
|
||||
for(count = 0; 0 == (value & 0x1); count++)
|
||||
{
|
||||
value >>= 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct int_func_rotate : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "rotate";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(IN1 value, IN2 shift)
|
||||
{
|
||||
static_assert(
|
||||
std::is_unsigned<IN1>::value,
|
||||
"Only unsigned integers are supported"
|
||||
);
|
||||
if ((shift &= sizeof(value)*8 - 1) == 0)
|
||||
return value;
|
||||
return (value << shift) | (value >> (sizeof(value)*8 - shift));
|
||||
}
|
||||
|
||||
IN2 min2()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
IN2 max2()
|
||||
{
|
||||
return sizeof(IN1) * 8;
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct int_func_upsample : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "upsample";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(IN1 hi, IN2 lo)
|
||||
{
|
||||
static_assert(
|
||||
sizeof(IN1) == sizeof(IN2),
|
||||
"sizeof(IN1) != sizeof(IN2)"
|
||||
);
|
||||
static_assert(
|
||||
sizeof(OUT1) == 2 * sizeof(IN1),
|
||||
"sizeof(OUT1) != 2 * sizeof(IN1)"
|
||||
);
|
||||
static_assert(
|
||||
std::is_unsigned<IN2>::value,
|
||||
"IN2 type must be unsigned"
|
||||
);
|
||||
return (static_cast<OUT1>(hi) << (8*sizeof(IN1))) | lo;
|
||||
}
|
||||
|
||||
IN2 min2()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
IN2 max2()
|
||||
{
|
||||
return sizeof(IN1) * 8;
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_int_bitwise_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_int, cl_int>()))
|
||||
TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_uint, cl_uint>()))
|
||||
TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_long, cl_long>()))
|
||||
TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_ulong, cl_ulong>()))
|
||||
|
||||
TEST_UNARY_FUNC_MACRO((int_func_clz<cl_int, cl_int>()))
|
||||
TEST_UNARY_FUNC_MACRO((int_func_clz<cl_uint, cl_uint>()))
|
||||
TEST_UNARY_FUNC_MACRO((int_func_clz<cl_long, cl_long>()))
|
||||
TEST_UNARY_FUNC_MACRO((int_func_clz<cl_ulong, cl_ulong>()))
|
||||
|
||||
TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_int, cl_int>()))
|
||||
TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_uint, cl_uint>()))
|
||||
TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_long, cl_long>()))
|
||||
TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_ulong, cl_ulong>()))
|
||||
|
||||
TEST_BINARY_FUNC_MACRO((int_func_rotate<cl_uint, cl_uint, cl_uint>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_rotate<cl_ulong, cl_ulong, cl_ulong>()))
|
||||
|
||||
// shortn upsample(charn hi, ucharn lo);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_char, cl_uchar, cl_short>()))
|
||||
// ushortn upsample(ucharn hi, ucharn lo);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_uchar, cl_uchar, cl_ushort>()))
|
||||
// intn upsample(shortn hi, ushortn lo);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_short, cl_ushort, cl_int>()))
|
||||
// uintn upsample(ushortn hi, ushortn lo);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_ushort, cl_ushort, cl_uint>()))
|
||||
// longn upsample(intn hi, uintn lo);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_int, cl_uint, cl_long>()))
|
||||
// ulongn upsample(uintn hi, uintn lo);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_uint, cl_uint, cl_ulong>()))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_BITWISE_HPP
|
||||
@@ -1,26 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_COMMON_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_COMMON_HPP
|
||||
|
||||
#include <random>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_COMMON_HPP
|
||||
@@ -1,26 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "bitwise_funcs.hpp"
|
||||
#include "numeric_funcs.hpp"
|
||||
#include "24bit_funcs.hpp"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,703 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_NUMERIC_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_NUMERIC_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include <type_traits>
|
||||
|
||||
template<class IN1, class OUT1>
|
||||
struct int_func_abs : public unary_func<IN1, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "abs";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x)
|
||||
{
|
||||
static_assert(
|
||||
std::is_unsigned<OUT1>::value,
|
||||
"OUT1 type must be unsigned"
|
||||
);
|
||||
if(x < IN1(0))
|
||||
return static_cast<OUT1>(-x);
|
||||
return static_cast<OUT1>(x);
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct int_func_abs_diff : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "abs_diff";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value,
|
||||
"IN1 must be IN2"
|
||||
);
|
||||
static_assert(
|
||||
std::is_unsigned<OUT1>::value,
|
||||
"OUT1 type must be unsigned"
|
||||
);
|
||||
if(x < y)
|
||||
return static_cast<OUT1>(y-x);
|
||||
return static_cast<OUT1>(x-y);
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct int_func_add_sat : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "add_sat";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value,
|
||||
"IN1 must be IN2"
|
||||
);
|
||||
static_assert(
|
||||
std::is_same<OUT1, IN2>::value,
|
||||
"OUT1 must be IN2"
|
||||
);
|
||||
// sat unsigned integers
|
||||
if(std::is_unsigned<OUT1>::value)
|
||||
{
|
||||
OUT1 z = x + y;
|
||||
if(z < x || z < y)
|
||||
return (std::numeric_limits<OUT1>::max)();
|
||||
return z;
|
||||
}
|
||||
// sat signed integers
|
||||
OUT1 z = x + y;
|
||||
if(y > 0)
|
||||
{
|
||||
if(z < x)
|
||||
return (std::numeric_limits<OUT1>::max)();
|
||||
}
|
||||
else
|
||||
{
|
||||
if(z > x)
|
||||
return (std::numeric_limits<OUT1>::min)();
|
||||
}
|
||||
return z;
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct int_func_hadd : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "hadd";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value,
|
||||
"IN1 must be IN2"
|
||||
);
|
||||
static_assert(
|
||||
std::is_same<OUT1, IN2>::value,
|
||||
"OUT1 must be IN2"
|
||||
);
|
||||
return (x >> OUT1(1)) + (y >> OUT1(1)) + (x & y & OUT1(1));
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct int_func_rhadd : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "rhadd";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value,
|
||||
"IN1 must be IN2"
|
||||
);
|
||||
static_assert(
|
||||
std::is_same<OUT1, IN2>::value,
|
||||
"OUT1 must be IN2"
|
||||
);
|
||||
return (x >> OUT1(1)) + (y >> OUT1(1)) + ((x | y) & OUT1(1));
|
||||
}
|
||||
};
|
||||
|
||||
// clamp for scalars
|
||||
template<class IN1, class IN2, class IN3, class OUT1, class Enable = void>
|
||||
struct int_func_clamp : public ternary_func<IN1, IN2, IN3, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "clamp";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN2, IN3>::value,
|
||||
"IN3 must be IN2"
|
||||
);
|
||||
static_assert(
|
||||
std::is_same<OUT1, IN1>::value,
|
||||
"OUT1 must be IN1"
|
||||
);
|
||||
return (std::min)((std::max)(x, minval), maxval);
|
||||
}
|
||||
|
||||
IN2 min2()
|
||||
{
|
||||
return (std::numeric_limits<IN2>::min)();
|
||||
}
|
||||
|
||||
IN2 max2()
|
||||
{
|
||||
return (std::numeric_limits<IN2>::max)() / IN2(2);
|
||||
}
|
||||
|
||||
IN3 min3()
|
||||
{
|
||||
return IN3(1) + ((std::numeric_limits<IN3>::max)() / IN3(2));
|
||||
}
|
||||
|
||||
IN3 max3()
|
||||
{
|
||||
return (std::numeric_limits<IN3>::max)();
|
||||
}
|
||||
};
|
||||
|
||||
// gentype clamp(gentype x, scalar minval, scalar maxval);
|
||||
template<class IN1, class IN2, class IN3, class OUT1>
|
||||
struct int_func_clamp<IN1, IN2, IN3, OUT1, typename std::enable_if<is_vector_type<OUT1>::value>::type> : public ternary_func<IN1, IN2, IN3, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "clamp";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN2, IN3>::value,
|
||||
"IN3 must be IN2"
|
||||
);
|
||||
static_assert(
|
||||
!is_vector_type<IN2>::value && !is_vector_type<IN3>::value,
|
||||
"IN3 and IN2 must be scalar"
|
||||
);
|
||||
static_assert(
|
||||
std::is_same<OUT1, IN1>::value,
|
||||
"OUT1 must be IN1"
|
||||
);
|
||||
OUT1 result;
|
||||
for(size_t i = 0; i < vector_size<OUT1>::value; i++)
|
||||
{
|
||||
result.s[i] = (std::min)((std::max)(x.s[i], minval), maxval);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
IN1 min1()
|
||||
{
|
||||
typedef typename scalar_type<IN1>::type SCALAR1;
|
||||
IN1 min1;
|
||||
for(size_t i = 0; i < vector_size<IN1>::value; i++)
|
||||
{
|
||||
min1.s[i] = (std::numeric_limits<SCALAR1>::min)();
|
||||
}
|
||||
return min1;
|
||||
}
|
||||
|
||||
IN1 max1()
|
||||
{
|
||||
typedef typename scalar_type<IN1>::type SCALAR1;
|
||||
IN1 max1;
|
||||
for(size_t i = 0; i < vector_size<IN1>::value; i++)
|
||||
{
|
||||
max1.s[i] = (std::numeric_limits<SCALAR1>::max)();
|
||||
}
|
||||
return max1;
|
||||
}
|
||||
|
||||
IN2 min2()
|
||||
{
|
||||
return (std::numeric_limits<IN2>::min)();
|
||||
}
|
||||
|
||||
IN2 max2()
|
||||
{
|
||||
return (std::numeric_limits<IN2>::max)() / IN2(2);
|
||||
}
|
||||
|
||||
IN3 min3()
|
||||
{
|
||||
return IN3(1) + ((std::numeric_limits<IN3>::max)() / IN3(2));
|
||||
}
|
||||
|
||||
IN3 max3()
|
||||
{
|
||||
return (std::numeric_limits<IN3>::max)();
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct int_func_mul_hi : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "mul_hi";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value
|
||||
&& std::is_same<IN2, OUT1>::value,
|
||||
"Types must be the same"
|
||||
);
|
||||
static_assert(
|
||||
!std::is_same<IN1, cl_long>::value && !std::is_same<IN1, cl_ulong>::value,
|
||||
"Operation unimplemented for 64-bit scalars"
|
||||
);
|
||||
cl_long xl = static_cast<cl_long>(x);
|
||||
cl_long yl = static_cast<cl_long>(y);
|
||||
return static_cast<OUT1>((xl * yl) >> (8 * sizeof(OUT1)));
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class IN3, class OUT1>
|
||||
struct int_func_mad_hi : public ternary_func<IN1, IN2, IN3, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "mad_hi";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y, const IN3& z)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value
|
||||
&& std::is_same<IN2, IN3>::value
|
||||
&& std::is_same<IN3, OUT1>::value,
|
||||
"Types must be the same"
|
||||
);
|
||||
return int_func_mul_hi<IN1, IN2, OUT1>()(x, y) + z;
|
||||
}
|
||||
};
|
||||
|
||||
// This test is implemented only for unsigned integers
|
||||
template<class IN1, class IN2, class IN3, class OUT1>
|
||||
struct int_func_mad_sat : public ternary_func<IN1, IN2, IN3, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "mad_sat";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y, const IN3& z)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value
|
||||
&& std::is_same<IN2, IN3>::value
|
||||
&& std::is_same<IN3, OUT1>::value,
|
||||
"Types must be the same"
|
||||
);
|
||||
static_assert(
|
||||
std::is_unsigned<OUT1>::value,
|
||||
"Test operation is not implemented for signed integers"
|
||||
);
|
||||
// mad_sat unsigned integers
|
||||
OUT1 w1 = (x * y);
|
||||
if (x != 0 && w1 / x != y)
|
||||
return (std::numeric_limits<OUT1>::max)();
|
||||
OUT1 w2 = w1 + z;
|
||||
if(w2 < w1)
|
||||
return (std::numeric_limits<OUT1>::max)();
|
||||
return w2;
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct int_func_sub_sat : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "sub_sat";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
|
||||
"IN1, IN2 and OUT1 must be the same types"
|
||||
);
|
||||
// sat unsigned integers
|
||||
if(std::is_unsigned<OUT1>::value)
|
||||
{
|
||||
OUT1 z = x - y;
|
||||
if(x < y)
|
||||
return (std::numeric_limits<OUT1>::min)();
|
||||
return z;
|
||||
}
|
||||
// sat signed integers
|
||||
OUT1 z = x - y;
|
||||
if(y < 0)
|
||||
{
|
||||
if(z < x)
|
||||
return (std::numeric_limits<OUT1>::max)();
|
||||
}
|
||||
else
|
||||
{
|
||||
if(z > x)
|
||||
return (std::numeric_limits<OUT1>::min)();
|
||||
}
|
||||
return z;
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1, class Enable = void>
|
||||
struct int_func_max : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "max";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
|
||||
"IN1, IN2 and OUT1 must be the same types"
|
||||
);
|
||||
return (std::max)(x, y);
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct int_func_max<IN1, IN2, OUT1, typename std::enable_if<is_vector_type<OUT1>::value>::type> : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "max";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
IN1 min1()
|
||||
{
|
||||
typedef typename scalar_type<IN1>::type SCALAR1;
|
||||
IN1 min1;
|
||||
for(size_t i = 0; i < vector_size<IN1>::value; i++)
|
||||
{
|
||||
min1.s[i] = (std::numeric_limits<SCALAR1>::min)();
|
||||
}
|
||||
return min1;
|
||||
}
|
||||
|
||||
IN1 max1()
|
||||
{
|
||||
typedef typename scalar_type<IN1>::type SCALAR1;
|
||||
IN1 max1;
|
||||
for(size_t i = 0; i < vector_size<IN1>::value; i++)
|
||||
{
|
||||
max1.s[i] = (std::numeric_limits<SCALAR1>::max)();
|
||||
}
|
||||
return max1;
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, OUT1>::value,
|
||||
"IN1 and OUT1 must be the same types"
|
||||
);
|
||||
static_assert(
|
||||
!is_vector_type<IN2>::value,
|
||||
"IN2 must be scalar"
|
||||
);
|
||||
static_assert(
|
||||
std::is_same<typename scalar_type<OUT1>::type, IN2>::value,
|
||||
"IN2 must match with OUT1 and IN1"
|
||||
);
|
||||
IN1 result = x;
|
||||
for(size_t i = 0; i < vector_size<IN1>::value; i++)
|
||||
{
|
||||
result.s[i] = (std::max)(x.s[i], y);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1, class Enable = void>
|
||||
struct int_func_min : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "min";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
|
||||
"IN1, IN2 and OUT1 must be the same types"
|
||||
);
|
||||
return (std::min)(x, y);
|
||||
}
|
||||
};
|
||||
|
||||
template<class IN1, class IN2, class OUT1>
|
||||
struct int_func_min<IN1, IN2, OUT1, typename std::enable_if<is_vector_type<OUT1>::value>::type> : public binary_func<IN1, IN2, OUT1>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "min";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_integer>\n";
|
||||
}
|
||||
|
||||
IN1 min1()
|
||||
{
|
||||
typedef typename scalar_type<IN1>::type SCALAR1;
|
||||
IN1 min1;
|
||||
for(size_t i = 0; i < vector_size<IN1>::value; i++)
|
||||
{
|
||||
min1.s[i] = (std::numeric_limits<SCALAR1>::min)();
|
||||
}
|
||||
return min1;
|
||||
}
|
||||
|
||||
IN1 max1()
|
||||
{
|
||||
typedef typename scalar_type<IN1>::type SCALAR1;
|
||||
IN1 max1;
|
||||
for(size_t i = 0; i < vector_size<IN1>::value; i++)
|
||||
{
|
||||
max1.s[i] = (std::numeric_limits<SCALAR1>::max)();
|
||||
}
|
||||
return max1;
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x, const IN2& y)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<IN1, OUT1>::value,
|
||||
"IN1 and OUT1 must be the same types"
|
||||
);
|
||||
static_assert(
|
||||
!is_vector_type<IN2>::value,
|
||||
"IN2 must be scalar"
|
||||
);
|
||||
static_assert(
|
||||
std::is_same<typename scalar_type<OUT1>::type, IN2>::value,
|
||||
"IN2 must match with OUT1 and IN1"
|
||||
);
|
||||
IN1 result = x;
|
||||
for(size_t i = 0; i < vector_size<IN1>::value; i++)
|
||||
{
|
||||
result.s[i] = (std::min)(x.s[i], y);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_int_numeric_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// ugentype abs(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((int_func_abs<cl_int, cl_uint>()))
|
||||
TEST_UNARY_FUNC_MACRO((int_func_abs<cl_uint, cl_uint>()))
|
||||
TEST_UNARY_FUNC_MACRO((int_func_abs<cl_long, cl_ulong>()))
|
||||
TEST_UNARY_FUNC_MACRO((int_func_abs<cl_ulong, cl_ulong>()))
|
||||
|
||||
// ugentype abs_diff(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_int, cl_int, cl_uint>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_uint, cl_uint, cl_uint>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_long, cl_long, cl_ulong>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_ulong, cl_ulong, cl_ulong>()))
|
||||
|
||||
// gentype add_sat(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_int, cl_int, cl_int>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_uint, cl_uint, cl_uint>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_long, cl_long, cl_long>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_ulong, cl_ulong, cl_ulong>()))
|
||||
|
||||
// gentype hadd(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_int, cl_int, cl_int>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_uint, cl_uint, cl_uint>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_long, cl_long, cl_long>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_ulong, cl_ulong, cl_ulong>()))
|
||||
|
||||
// gentype rhadd(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_int, cl_int, cl_int>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_uint, cl_uint, cl_uint>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_long, cl_long, cl_long>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_ulong, cl_ulong, cl_ulong>()))
|
||||
|
||||
// gentype clamp(gentype x, gentype minval, gentype maxval);
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_int, cl_int, cl_int, cl_int>()))
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_uint, cl_uint, cl_uint, cl_uint>()))
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_long, cl_long, cl_long, cl_long>()))
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_ulong, cl_ulong, cl_ulong, cl_ulong>()))
|
||||
|
||||
// gentype clamp(gentype x, scalar minval, scalar maxval);
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_int2, cl_int, cl_int, cl_int2>()))
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_uint4, cl_uint, cl_uint, cl_uint4>()))
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_long8, cl_long, cl_long, cl_long8>()))
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_ulong16, cl_ulong, cl_ulong, cl_ulong16>()))
|
||||
|
||||
// gentype mad_hi(gentype a, gentype b, gentype c);
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_short, cl_short, cl_short, cl_short>()))
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_ushort, cl_ushort, cl_ushort, cl_ushort>()))
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_int, cl_int, cl_int, cl_int>()))
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_uint, cl_uint, cl_uint, cl_uint>()))
|
||||
|
||||
// gentype mad_sat(gentype a, gentype b, gentype c);
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_mad_sat<cl_ushort, cl_ushort, cl_ushort, cl_ushort>()))
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_mad_sat<cl_uint, cl_uint, cl_uint, cl_uint>()))
|
||||
TEST_TERNARY_FUNC_MACRO((int_func_mad_sat<cl_ulong, cl_ulong, cl_ulong, cl_ulong>()))
|
||||
|
||||
// gentype max(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_max<cl_int, cl_int, cl_int>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_max<cl_uint, cl_uint, cl_uint>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_max<cl_long, cl_long, cl_long>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_max<cl_ulong, cl_ulong, cl_ulong>()))
|
||||
|
||||
// gentype max(gentype x, scalar y);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_max<cl_int2, cl_int, cl_int2>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_max<cl_uint4, cl_uint, cl_uint4>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_max<cl_long8, cl_long, cl_long8>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_max<cl_ulong16, cl_ulong, cl_ulong16>()))
|
||||
|
||||
// gentype min(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_min<cl_int, cl_int, cl_int>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_min<cl_uint, cl_uint, cl_uint>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_min<cl_long, cl_long, cl_long>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_min<cl_ulong, cl_ulong, cl_ulong>()))
|
||||
|
||||
// gentype min(gentype x, scalar y);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_min<cl_int2, cl_int, cl_int2>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_min<cl_uint4, cl_uint, cl_uint4>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_min<cl_long8, cl_long, cl_long8>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_min<cl_ulong16, cl_ulong, cl_ulong16>()))
|
||||
|
||||
// gentype mul_hi(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_short, cl_short, cl_short>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_ushort, cl_ushort, cl_ushort>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_int, cl_int, cl_int>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_uint, cl_uint, cl_uint>()))
|
||||
|
||||
// gentype sub_sat(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_int, cl_int, cl_int>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_uint, cl_uint, cl_uint>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_long, cl_long, cl_long>()))
|
||||
TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_ulong, cl_ulong, cl_ulong>()))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_NUMERIC_HPP
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_MATH_FUNCS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,347 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMMON_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMMON_FUNCS_HPP
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
#include "reference.hpp"
|
||||
|
||||
#ifndef MATH_FUNCS_CLASS_NAME
|
||||
#define MATH_FUNCS_CLASS_NAME(x, y) x ## _func_ ## y
|
||||
#endif
|
||||
|
||||
#define MATH_FUNCS_DEFINE_UNARY_FUNC1(GROUP_NAME, NAME, OCL_FUNC, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1) \
|
||||
struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public unary_func<cl_float, cl_float> \
|
||||
{ \
|
||||
MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded) \
|
||||
{ \
|
||||
\
|
||||
} \
|
||||
\
|
||||
std::string str() \
|
||||
{ \
|
||||
return #OCL_FUNC; \
|
||||
} \
|
||||
\
|
||||
std::string headers() \
|
||||
{ \
|
||||
return "#include <opencl_math>\n"; \
|
||||
} \
|
||||
/* Reference value type is cl_double */ \
|
||||
cl_double operator()(const cl_float& x) \
|
||||
{ \
|
||||
return (HOST_FUNC)(static_cast<cl_double>(x)); \
|
||||
} \
|
||||
\
|
||||
cl_float min1() \
|
||||
{ \
|
||||
return MIN1; \
|
||||
} \
|
||||
\
|
||||
cl_float max1() \
|
||||
{ \
|
||||
return MAX1; \
|
||||
} \
|
||||
\
|
||||
std::vector<cl_float> in1_special_cases() \
|
||||
{ \
|
||||
return { \
|
||||
cl_float(0.0f), \
|
||||
cl_float(-0.0f), \
|
||||
cl_float(1.0f), \
|
||||
cl_float(-1.0f), \
|
||||
cl_float(2.0f), \
|
||||
cl_float(-2.0f), \
|
||||
std::numeric_limits<cl_float>::infinity(), \
|
||||
-std::numeric_limits<cl_float>::infinity(), \
|
||||
std::numeric_limits<cl_float>::quiet_NaN() \
|
||||
}; \
|
||||
} \
|
||||
\
|
||||
bool use_ulp() \
|
||||
{ \
|
||||
return USE_ULP; \
|
||||
} \
|
||||
\
|
||||
template<class T> \
|
||||
typename make_vector_type<cl_double, vector_size<T>::value>::type \
|
||||
delta(const cl_float& in1, const T& expected) \
|
||||
{ \
|
||||
typedef \
|
||||
typename make_vector_type<cl_double, vector_size<T>::value>::type \
|
||||
delta_vector_type; \
|
||||
(void) in1; \
|
||||
auto e = detail::make_value<delta_vector_type>(DELTA); \
|
||||
return detail::multiply<delta_vector_type>(e, expected); \
|
||||
} \
|
||||
\
|
||||
float ulp() \
|
||||
{ \
|
||||
if(m_is_embedded) \
|
||||
{ \
|
||||
return ULP_EMBEDDED; \
|
||||
} \
|
||||
return ULP; \
|
||||
} \
|
||||
private: \
|
||||
bool m_is_embedded; \
|
||||
};
|
||||
|
||||
#define MATH_FUNCS_DEFINE_BINARY_FUNC1(GROUP_NAME, NAME, OCL_NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2) \
|
||||
struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public binary_func<cl_float, cl_float, cl_float> \
|
||||
{ \
|
||||
MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded) \
|
||||
{ \
|
||||
\
|
||||
} \
|
||||
\
|
||||
std::string str() \
|
||||
{ \
|
||||
return #OCL_NAME; \
|
||||
} \
|
||||
\
|
||||
std::string headers() \
|
||||
{ \
|
||||
return "#include <opencl_math>\n"; \
|
||||
} \
|
||||
\
|
||||
cl_float operator()(const cl_float& x, const cl_float& y) \
|
||||
{ \
|
||||
return (HOST_FUNC)(x, y); \
|
||||
} \
|
||||
\
|
||||
cl_float min1() \
|
||||
{ \
|
||||
return MIN1; \
|
||||
} \
|
||||
\
|
||||
cl_float max1() \
|
||||
{ \
|
||||
return MAX1; \
|
||||
} \
|
||||
\
|
||||
cl_float min2() \
|
||||
{ \
|
||||
return MIN2; \
|
||||
} \
|
||||
\
|
||||
cl_float max2() \
|
||||
{ \
|
||||
return MAX2; \
|
||||
} \
|
||||
\
|
||||
std::vector<cl_float> in1_special_cases() \
|
||||
{ \
|
||||
return { \
|
||||
cl_float(0.0f), \
|
||||
cl_float(-0.0f), \
|
||||
cl_float(1.0f), \
|
||||
cl_float(-1.0f), \
|
||||
cl_float(2.0f), \
|
||||
cl_float(-2.0f), \
|
||||
std::numeric_limits<cl_float>::infinity(), \
|
||||
-std::numeric_limits<cl_float>::infinity(), \
|
||||
std::numeric_limits<cl_float>::quiet_NaN() \
|
||||
}; \
|
||||
} \
|
||||
\
|
||||
std::vector<cl_float> in2_special_cases() \
|
||||
{ \
|
||||
return { \
|
||||
cl_float(0.0f), \
|
||||
cl_float(-0.0f), \
|
||||
cl_float(1.0f), \
|
||||
cl_float(-1.0f), \
|
||||
cl_float(2.0f), \
|
||||
cl_float(-2.0f), \
|
||||
std::numeric_limits<cl_float>::infinity(), \
|
||||
-std::numeric_limits<cl_float>::infinity(), \
|
||||
std::numeric_limits<cl_float>::quiet_NaN() \
|
||||
}; \
|
||||
} \
|
||||
\
|
||||
template<class T> \
|
||||
typename make_vector_type<cl_double, vector_size<T>::value>::type \
|
||||
delta(const cl_float& in1, const cl_float& in2, const T& expected) \
|
||||
{ \
|
||||
typedef \
|
||||
typename make_vector_type<cl_double, vector_size<T>::value>::type \
|
||||
delta_vector_type; \
|
||||
(void) in1; \
|
||||
(void) in2; \
|
||||
auto e = detail::make_value<delta_vector_type>(DELTA); \
|
||||
return detail::multiply<delta_vector_type>(e, expected); \
|
||||
} \
|
||||
\
|
||||
bool use_ulp() \
|
||||
{ \
|
||||
return USE_ULP; \
|
||||
} \
|
||||
\
|
||||
float ulp() \
|
||||
{ \
|
||||
if(m_is_embedded) \
|
||||
{ \
|
||||
return ULP_EMBEDDED; \
|
||||
} \
|
||||
return ULP; \
|
||||
} \
|
||||
private: \
|
||||
bool m_is_embedded; \
|
||||
};
|
||||
|
||||
#define MATH_FUNCS_DEFINE_TERNARY_FUNC1(GROUP_NAME, NAME, OCL_NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2, MIN3, MAX3) \
|
||||
struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public ternary_func<cl_float, cl_float, cl_float, cl_float> \
|
||||
{ \
|
||||
MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded) \
|
||||
{ \
|
||||
\
|
||||
} \
|
||||
\
|
||||
std::string str() \
|
||||
{ \
|
||||
return #OCL_NAME; \
|
||||
} \
|
||||
\
|
||||
std::string headers() \
|
||||
{ \
|
||||
return "#include <opencl_math>\n"; \
|
||||
} \
|
||||
\
|
||||
cl_double operator()(const cl_float& x, const cl_float& y, const cl_float& z) \
|
||||
{ \
|
||||
return (HOST_FUNC)(static_cast<cl_double>(x), static_cast<cl_double>(y), static_cast<cl_double>(z)); \
|
||||
} \
|
||||
\
|
||||
cl_float min1() \
|
||||
{ \
|
||||
return MIN1; \
|
||||
} \
|
||||
\
|
||||
cl_float max1() \
|
||||
{ \
|
||||
return MAX1; \
|
||||
} \
|
||||
\
|
||||
cl_float min2() \
|
||||
{ \
|
||||
return MIN2; \
|
||||
} \
|
||||
\
|
||||
cl_float max2() \
|
||||
{ \
|
||||
return MAX2; \
|
||||
} \
|
||||
\
|
||||
cl_float min3() \
|
||||
{ \
|
||||
return MIN3; \
|
||||
} \
|
||||
\
|
||||
cl_float max3() \
|
||||
{ \
|
||||
return MAX3; \
|
||||
} \
|
||||
\
|
||||
std::vector<cl_float> in1_special_cases() \
|
||||
{ \
|
||||
return { \
|
||||
cl_float(0.0f), \
|
||||
cl_float(-0.0f), \
|
||||
cl_float(1.0f), \
|
||||
cl_float(-1.0f), \
|
||||
cl_float(2.0f), \
|
||||
cl_float(-2.0f), \
|
||||
std::numeric_limits<cl_float>::infinity(), \
|
||||
-std::numeric_limits<cl_float>::infinity(), \
|
||||
std::numeric_limits<cl_float>::quiet_NaN() \
|
||||
}; \
|
||||
} \
|
||||
\
|
||||
std::vector<cl_float> in2_special_cases() \
|
||||
{ \
|
||||
return { \
|
||||
cl_float(0.0f), \
|
||||
cl_float(-0.0f), \
|
||||
cl_float(1.0f), \
|
||||
cl_float(-1.0f), \
|
||||
cl_float(2.0f), \
|
||||
cl_float(-2.0f), \
|
||||
std::numeric_limits<cl_float>::infinity(), \
|
||||
-std::numeric_limits<cl_float>::infinity(), \
|
||||
std::numeric_limits<cl_float>::quiet_NaN() \
|
||||
}; \
|
||||
} \
|
||||
\
|
||||
std::vector<cl_float> in3_special_cases() \
|
||||
{ \
|
||||
return { \
|
||||
cl_float(0.0f), \
|
||||
cl_float(-0.0f), \
|
||||
cl_float(1.0f), \
|
||||
cl_float(-1.0f), \
|
||||
cl_float(2.0f), \
|
||||
cl_float(-2.0f), \
|
||||
std::numeric_limits<cl_float>::infinity(), \
|
||||
-std::numeric_limits<cl_float>::infinity(), \
|
||||
std::numeric_limits<cl_float>::quiet_NaN() \
|
||||
}; \
|
||||
} \
|
||||
\
|
||||
template<class T> \
|
||||
typename make_vector_type<cl_double, vector_size<T>::value>::type \
|
||||
delta(const cl_float& in1, const cl_float& in2, const cl_float& in3, const T& expected) \
|
||||
{ \
|
||||
typedef \
|
||||
typename make_vector_type<cl_double, vector_size<T>::value>::type \
|
||||
delta_vector_type; \
|
||||
(void) in1; \
|
||||
(void) in2; \
|
||||
(void) in3; \
|
||||
auto e = detail::make_value<delta_vector_type>(DELTA); \
|
||||
return detail::multiply<delta_vector_type>(e, expected); \
|
||||
} \
|
||||
\
|
||||
bool use_ulp() \
|
||||
{ \
|
||||
return USE_ULP; \
|
||||
} \
|
||||
\
|
||||
float ulp() \
|
||||
{ \
|
||||
if(m_is_embedded) \
|
||||
{ \
|
||||
return ULP_EMBEDDED; \
|
||||
} \
|
||||
return ULP; \
|
||||
} \
|
||||
private: \
|
||||
bool m_is_embedded; \
|
||||
};
|
||||
|
||||
#define MATH_FUNCS_DEFINE_UNARY_FUNC(GROUP_NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1) \
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(GROUP_NAME, NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1)
|
||||
#define MATH_FUNCS_DEFINE_BINARY_FUNC(GROUP_NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2) \
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC1(GROUP_NAME, NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2)
|
||||
#define MATH_FUNCS_DEFINE_TERNARY_FUNC(GROUP_NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2, MIN3, MAX3) \
|
||||
MATH_FUNCS_DEFINE_TERNARY_FUNC1(GROUP_NAME, NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2, MIN3, MAX3)
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMMON_FUNCS_HPP
|
||||
@@ -1,59 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMPARISON_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMPARISON_FUNCS_HPP
|
||||
|
||||
#include <type_traits>
|
||||
#include <cmath>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fdim, std::fdim, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fmax, std::fmax, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fmin, std::fmin, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, maxmag, reference::maxmag, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, minmag, reference::minmag, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
|
||||
|
||||
// comparison functions
|
||||
AUTO_TEST_CASE(test_comparison_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// Check for EMBEDDED_PROFILE
|
||||
bool is_embedded_profile = false;
|
||||
char profile[128];
|
||||
last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
|
||||
RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
|
||||
if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
|
||||
is_embedded_profile = true;
|
||||
|
||||
TEST_BINARY_FUNC_MACRO((comparison_func_fdim(is_embedded_profile)))
|
||||
TEST_BINARY_FUNC_MACRO((comparison_func_fmax(is_embedded_profile)))
|
||||
TEST_BINARY_FUNC_MACRO((comparison_func_fmin(is_embedded_profile)))
|
||||
TEST_BINARY_FUNC_MACRO((comparison_func_maxmag(is_embedded_profile)))
|
||||
TEST_BINARY_FUNC_MACRO((comparison_func_minmag(is_embedded_profile)))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMPARISON_FUNCS_HPP
|
||||
@@ -1,139 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_EXP_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_EXP_FUNCS_HPP
|
||||
|
||||
#include <type_traits>
|
||||
#include <cmath>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp, std::exp, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, expm1, std::expm1, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp2, std::exp2, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp10, reference::exp10, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
|
||||
struct exponential_func_ldexp : public binary_func<cl_float, cl_int, cl_float>
|
||||
{
|
||||
exponential_func_ldexp(bool is_embedded) : m_is_embedded(is_embedded)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "ldexp";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_math>\n";
|
||||
}
|
||||
|
||||
/* Reference value type is cl_double */
|
||||
cl_double operator()(const cl_float& x, const cl_int& y)
|
||||
{
|
||||
return (std::ldexp)(static_cast<cl_double>(x), y);
|
||||
}
|
||||
|
||||
cl_float min1()
|
||||
{
|
||||
return -1000.0f;
|
||||
}
|
||||
|
||||
cl_float max1()
|
||||
{
|
||||
return 1000.0f;
|
||||
}
|
||||
|
||||
cl_int min2()
|
||||
{
|
||||
return -8;
|
||||
}
|
||||
|
||||
cl_int max2()
|
||||
{
|
||||
return 8;
|
||||
}
|
||||
|
||||
std::vector<cl_float> in1_special_cases()
|
||||
{
|
||||
return {
|
||||
cl_float(0.0f),
|
||||
cl_float(-0.0f),
|
||||
cl_float(1.0f),
|
||||
cl_float(-1.0f),
|
||||
cl_float(2.0f),
|
||||
cl_float(-2.0f),
|
||||
std::numeric_limits<cl_float>::infinity(),
|
||||
-std::numeric_limits<cl_float>::infinity(),
|
||||
std::numeric_limits<cl_float>::quiet_NaN()
|
||||
};
|
||||
}
|
||||
|
||||
bool use_ulp()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
if(m_is_embedded)
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
private:
|
||||
bool m_is_embedded;
|
||||
};
|
||||
|
||||
// exponential functions
|
||||
AUTO_TEST_CASE(test_exponential_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// Check for EMBEDDED_PROFILE
|
||||
bool is_embedded_profile = false;
|
||||
char profile[128];
|
||||
last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
|
||||
RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
|
||||
if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
|
||||
is_embedded_profile = true;
|
||||
|
||||
// auto exp(gentype x);
|
||||
// auto expm1(gentype x);
|
||||
// auto exp2(gentype x);
|
||||
// auto exp10(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((exponential_func_exp(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((exponential_func_expm1(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((exponential_func_exp2(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((exponential_func_exp10(is_embedded_profile)))
|
||||
|
||||
// auto ldexp(gentype x, intn k);
|
||||
TEST_BINARY_FUNC_MACRO((exponential_func_ldexp(is_embedded_profile)))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_EXP_FUNCS_HPP
|
||||
@@ -1,733 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_FP_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_FP_FUNCS_HPP
|
||||
|
||||
#include <type_traits>
|
||||
#include <cmath>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// -------------- UNARY FUNCTIONS
|
||||
|
||||
// gentype ceil(gentype x);
|
||||
// gentype floor(gentype x);
|
||||
// gentype rint(gentype x);
|
||||
// gentype round(gentype x);
|
||||
// gentype trunc(gentype x);
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(fp, ceil, std::ceil, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(fp, floor, std::floor, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(fp, rint, std::rint, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(fp, round, std::round, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(fp, trunc, std::trunc, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
|
||||
// floatn nan(uintn nancode);
|
||||
struct fp_func_nan : public unary_func<cl_uint, cl_float>
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "nan";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_math>\n";
|
||||
}
|
||||
|
||||
cl_float operator()(const cl_uint& x)
|
||||
{
|
||||
cl_uint r = x | 0x7fc00000U;
|
||||
// cl_float and cl_int have the same size so that's correct
|
||||
cl_float rf = *reinterpret_cast<cl_float*>(&r);
|
||||
return rf;
|
||||
}
|
||||
|
||||
cl_uint min1()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
cl_uint max1()
|
||||
{
|
||||
return 100;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> in1_special_cases()
|
||||
{
|
||||
return {
|
||||
0, 1
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// -------------- UNARY FUNCTIONS, 2ND ARG IS POINTER
|
||||
|
||||
// gentype fract(gentype x, gentype* iptr);
|
||||
//
|
||||
// Fuction fract() returns additional value via pointer (2nd argument). In order to test
|
||||
// if it's correct output buffer type is cl_float2. In first compontent we store what
|
||||
// fract() function returns, and in the 2nd component we store what is returned via its
|
||||
// 2nd argument (gentype* iptr).
|
||||
struct fp_func_fract : public unary_func<cl_float, cl_float2>
|
||||
{
|
||||
fp_func_fract(bool is_embedded) : m_is_embedded(is_embedded)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "fract";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_math>\n";
|
||||
}
|
||||
|
||||
cl_double2 operator()(const cl_float& x)
|
||||
{
|
||||
return reference::fract(static_cast<cl_double>(x));
|
||||
}
|
||||
|
||||
cl_float min1()
|
||||
{
|
||||
return -1000.0f;
|
||||
}
|
||||
|
||||
cl_float max1()
|
||||
{
|
||||
return 1000.0f;
|
||||
}
|
||||
|
||||
std::vector<cl_float> in1_special_cases()
|
||||
{
|
||||
return {
|
||||
cl_float(0.0f),
|
||||
cl_float(-0.0f),
|
||||
cl_float(1.0f),
|
||||
cl_float(-1.0f),
|
||||
cl_float(2.0f),
|
||||
cl_float(-2.0f),
|
||||
std::numeric_limits<cl_float>::infinity(),
|
||||
-std::numeric_limits<cl_float>::infinity(),
|
||||
std::numeric_limits<cl_float>::quiet_NaN()
|
||||
};
|
||||
}
|
||||
|
||||
bool use_ulp()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
if(m_is_embedded)
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
private:
|
||||
bool m_is_embedded;
|
||||
};
|
||||
|
||||
// We need to specialize generate_kernel_unary<>() function template for fp_func_fract.
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
template <>
|
||||
std::string generate_kernel_unary<fp_func_fract, cl_float, cl_float2>(fp_func_fract func)
|
||||
{
|
||||
return
|
||||
"__kernel void test_fract(global float *input, global float2 *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" float2 result;\n"
|
||||
" float itpr = 0;\n"
|
||||
" result.x = fract(input[gid], &itpr);\n"
|
||||
" result.y = itpr;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
std::string generate_kernel_unary<fp_func_fract, cl_float, cl_float2>(fp_func_fract func)
|
||||
{
|
||||
return
|
||||
"" + func.defs() +
|
||||
"" + func.headers() +
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_fract(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" float2 result;\n"
|
||||
" float itpr = 0;\n"
|
||||
" result.x = fract(input[gid], &itpr);\n"
|
||||
" result.y = itpr;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
// gentype modf(gentype x, gentype* iptr);
|
||||
//
|
||||
// Fuction modf() returns additional value via pointer (2nd argument). In order to test
|
||||
// if it's correct output buffer type is cl_float2. In first compontent we store what
|
||||
// modf() function returns, and in the 2nd component we store what is returned via its
|
||||
// 2nd argument (gentype* iptr).
|
||||
struct fp_func_modf : public unary_func<cl_float, cl_float2>
|
||||
{
|
||||
fp_func_modf(bool is_embedded) : m_is_embedded(is_embedded)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "modf";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_math>\n";
|
||||
}
|
||||
|
||||
cl_double2 operator()(const cl_float& x)
|
||||
{
|
||||
cl_double2 r;
|
||||
r.s[0] = (std::modf)(static_cast<cl_double>(x), &(r.s[1]));
|
||||
return r;
|
||||
}
|
||||
|
||||
cl_float min1()
|
||||
{
|
||||
return -1000.0f;
|
||||
}
|
||||
|
||||
cl_float max1()
|
||||
{
|
||||
return 1000.0f;
|
||||
}
|
||||
|
||||
std::vector<cl_float> in1_special_cases()
|
||||
{
|
||||
return {
|
||||
cl_float(0.0f),
|
||||
cl_float(-0.0f),
|
||||
cl_float(1.0f),
|
||||
cl_float(-1.0f),
|
||||
cl_float(2.0f),
|
||||
cl_float(-2.0f),
|
||||
std::numeric_limits<cl_float>::infinity(),
|
||||
-std::numeric_limits<cl_float>::infinity(),
|
||||
std::numeric_limits<cl_float>::quiet_NaN()
|
||||
};
|
||||
}
|
||||
|
||||
bool use_ulp()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
if(m_is_embedded)
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
private:
|
||||
bool m_is_embedded;
|
||||
};
|
||||
|
||||
// We need to specialize generate_kernel_unary<>() function template for fp_func_modf.
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
template <>
|
||||
std::string generate_kernel_unary<fp_func_modf, cl_float, cl_float2>(fp_func_modf func)
|
||||
{
|
||||
return
|
||||
"__kernel void test_modf(global float *input, global float2 *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" float2 result;\n"
|
||||
" float itpr = 0;\n"
|
||||
" result.x = modf(input[gid], &itpr);\n"
|
||||
" result.y = itpr;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
std::string generate_kernel_unary<fp_func_modf, cl_float, cl_float2>(fp_func_modf func)
|
||||
{
|
||||
return
|
||||
"" + func.defs() +
|
||||
"" + func.headers() +
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_modf(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" float2 result;\n"
|
||||
" float itpr = 0;\n"
|
||||
" result.x = modf(input[gid], &itpr);\n"
|
||||
" result.y = itpr;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
// gentype frexp(gentype x, intn* exp);
|
||||
//
|
||||
// Fuction frexp() returns additional value via pointer (2nd argument). In order to test
|
||||
// if it's correct output buffer type is cl_float2. In first compontent we store what
|
||||
// modf() function returns, and in the 2nd component we store what is returned via its
|
||||
// 2nd argument (intn* exp).
|
||||
struct fp_func_frexp : public unary_func<cl_float, cl_float2>
|
||||
{
|
||||
fp_func_frexp(bool is_embedded) : m_is_embedded(is_embedded)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "frexp";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_math>\n";
|
||||
}
|
||||
|
||||
cl_double2 operator()(const cl_float& x)
|
||||
{
|
||||
cl_double2 r;
|
||||
cl_int e;
|
||||
r.s[0] = (std::frexp)(static_cast<cl_double>(x), &e);
|
||||
r.s[1] = static_cast<cl_float>(e);
|
||||
return r;
|
||||
}
|
||||
|
||||
cl_float min1()
|
||||
{
|
||||
return -1000.0f;
|
||||
}
|
||||
|
||||
cl_float max1()
|
||||
{
|
||||
return 1000.0f;
|
||||
}
|
||||
|
||||
std::vector<cl_float> in1_special_cases()
|
||||
{
|
||||
return {
|
||||
cl_float(0.0f),
|
||||
cl_float(-0.0f),
|
||||
cl_float(1.0f),
|
||||
cl_float(-1.0f),
|
||||
cl_float(2.0f),
|
||||
cl_float(-2.0f),
|
||||
std::numeric_limits<cl_float>::infinity(),
|
||||
-std::numeric_limits<cl_float>::infinity(),
|
||||
std::numeric_limits<cl_float>::quiet_NaN()
|
||||
};
|
||||
}
|
||||
|
||||
bool use_ulp()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
if(m_is_embedded)
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
private:
|
||||
bool m_is_embedded;
|
||||
};
|
||||
|
||||
// We need to specialize generate_kernel_unary<>() function template for fp_func_frexp.
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
template <>
|
||||
std::string generate_kernel_unary<fp_func_frexp, cl_float, cl_float2>(fp_func_frexp func)
|
||||
{
|
||||
return
|
||||
"__kernel void test_frexp(global float *input, global float2 *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" float2 result;\n"
|
||||
" int itpr = 0;\n"
|
||||
" result.x = frexp(input[gid], &itpr);\n"
|
||||
" result.y = itpr;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
std::string generate_kernel_unary<fp_func_frexp, cl_float, cl_float2>(fp_func_frexp func)
|
||||
{
|
||||
return
|
||||
"" + func.defs() +
|
||||
"" + func.headers() +
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_frexp(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" float2 result;\n"
|
||||
" int itpr = 0;\n"
|
||||
" result.x = frexp(input[gid], &itpr);\n"
|
||||
" result.y = itpr;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
// -------------- BINARY FUNCTIONS
|
||||
|
||||
// gentype copysign(gentype x, gentype y);
|
||||
// gentype fmod(gentype x, gentype y);
|
||||
// gentype remainder(gentype x, gentype y);
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(fp, copysign, std::copysign, true, 0.0f, 0.0f, 0.001f, -100.0f, 100.0f, -10.0f, 10.0f)
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(fp, fmod, std::fmod, true, 0.0f, 0.0f, 0.001f, -100.0f, 100.0f, -10.0f, 10.0f)
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(fp, remainder, std::remainder, true, 0.0f, 0.001f, 0.0f, -100.0f, 100.0f, -10.0f, 10.0f)
|
||||
|
||||
// In case of function float nextafter(float, float) reference function must
|
||||
// operate on floats and return float.
|
||||
struct fp_func_nextafter : public binary_func<cl_float, cl_float, cl_float>
|
||||
{
|
||||
fp_func_nextafter(bool is_embedded) : m_is_embedded(is_embedded)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "nextafter";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_math>\n";
|
||||
}
|
||||
|
||||
/* In this case reference value type MUST BE cl_float */
|
||||
cl_float operator()(const cl_float& x, const cl_float& y)
|
||||
{
|
||||
return (std::nextafter)(x, y);
|
||||
}
|
||||
|
||||
cl_float min1()
|
||||
{
|
||||
return -1000.0f;
|
||||
}
|
||||
|
||||
cl_float max1()
|
||||
{
|
||||
return 500.0f;
|
||||
}
|
||||
|
||||
cl_float min2()
|
||||
{
|
||||
return 501.0f;
|
||||
}
|
||||
|
||||
cl_float max2()
|
||||
{
|
||||
return 1000.0f;
|
||||
}
|
||||
|
||||
std::vector<cl_float> in1_special_cases()
|
||||
{
|
||||
return {
|
||||
cl_float(0.0f),
|
||||
cl_float(-0.0f),
|
||||
cl_float(1.0f),
|
||||
cl_float(-1.0f),
|
||||
cl_float(2.0f),
|
||||
cl_float(-2.0f),
|
||||
std::numeric_limits<cl_float>::infinity(),
|
||||
-std::numeric_limits<cl_float>::infinity(),
|
||||
std::numeric_limits<cl_float>::quiet_NaN()
|
||||
};
|
||||
}
|
||||
|
||||
std::vector<cl_float> in2_special_cases()
|
||||
{
|
||||
return {
|
||||
cl_float(0.0f),
|
||||
cl_float(-0.0f),
|
||||
cl_float(1.0f),
|
||||
cl_float(-1.0f),
|
||||
cl_float(2.0f),
|
||||
cl_float(-2.0f),
|
||||
std::numeric_limits<cl_float>::infinity(),
|
||||
-std::numeric_limits<cl_float>::infinity(),
|
||||
std::numeric_limits<cl_float>::quiet_NaN()
|
||||
};
|
||||
}
|
||||
|
||||
bool use_ulp()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
if(m_is_embedded)
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
private:
|
||||
bool m_is_embedded;
|
||||
};
|
||||
|
||||
// gentype remquo(gentype x, gentype y, intn* quo);
|
||||
struct fp_func_remquo : public binary_func<cl_float, cl_float, cl_float2>
|
||||
{
|
||||
fp_func_remquo(bool is_embedded) : m_is_embedded(is_embedded)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "remquo";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_math>\n";
|
||||
}
|
||||
|
||||
cl_double2 operator()(const cl_float& x, const cl_float& y)
|
||||
{
|
||||
return reference::remquo(static_cast<cl_double>(x), static_cast<cl_double>(y));
|
||||
}
|
||||
|
||||
cl_float min1()
|
||||
{
|
||||
return -1000.0f;
|
||||
}
|
||||
|
||||
cl_float max1()
|
||||
{
|
||||
return 1000.0f;
|
||||
}
|
||||
|
||||
cl_float min2()
|
||||
{
|
||||
return -1000.0f;
|
||||
}
|
||||
|
||||
cl_float max2()
|
||||
{
|
||||
return 1000.0f;
|
||||
}
|
||||
|
||||
std::vector<cl_float> in1_special_cases()
|
||||
{
|
||||
return {
|
||||
cl_float(0.0f),
|
||||
cl_float(-0.0f),
|
||||
cl_float(1.0f),
|
||||
cl_float(-1.0f),
|
||||
std::numeric_limits<cl_float>::infinity(),
|
||||
-std::numeric_limits<cl_float>::infinity(),
|
||||
std::numeric_limits<cl_float>::quiet_NaN()
|
||||
};
|
||||
}
|
||||
|
||||
std::vector<cl_float> in2_special_cases()
|
||||
{
|
||||
return {
|
||||
cl_float(0.0f),
|
||||
cl_float(-0.0f),
|
||||
cl_float(1.0f),
|
||||
cl_float(-1.0f),
|
||||
std::numeric_limits<cl_float>::infinity(),
|
||||
-std::numeric_limits<cl_float>::infinity(),
|
||||
std::numeric_limits<cl_float>::quiet_NaN()
|
||||
};
|
||||
}
|
||||
|
||||
bool use_ulp()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
if(m_is_embedded)
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
private:
|
||||
bool m_is_embedded;
|
||||
};
|
||||
|
||||
|
||||
// We need to specialize generate_kernel_binary<>() function template for fp_func_remquo.
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
template <>
|
||||
std::string generate_kernel_binary<fp_func_remquo, cl_float, cl_float, cl_float2>(fp_func_remquo func)
|
||||
{
|
||||
return
|
||||
"__kernel void test_remquo(global float *input1, global float *input2, global float2 *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" float2 result;\n"
|
||||
" int quo = 0;\n"
|
||||
" int sign = 0;\n"
|
||||
" result.x = remquo(input1[gid], input2[gid], &quo);\n"
|
||||
// Specification say:
|
||||
// "remquo also calculates the lower seven bits of the integral quotient x/y,
|
||||
// and gives that value the same sign as x/y. It stores this signed value in
|
||||
// the object pointed to by quo."
|
||||
// Implemenation may save into quo more than seven bits. We need to take
|
||||
// care of that here.
|
||||
" sign = (quo < 0) ? -1 : 1;\n"
|
||||
" quo = (quo < 0) ? -quo : quo;\n"
|
||||
" quo &= 0x0000007f;\n"
|
||||
" result.y = (sign < 0) ? -quo : quo;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
std::string generate_kernel_binary<fp_func_remquo, cl_float, cl_float, cl_float2>(fp_func_remquo func)
|
||||
{
|
||||
return
|
||||
"" + func.defs() +
|
||||
"" + func.headers() +
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_remquo(global_ptr<float[]> input1, global_ptr<float[]> input2, global_ptr<float2[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" float2 result;\n"
|
||||
" int quo = 0;\n"
|
||||
" int sign = 0;\n"
|
||||
" result.x = remquo(input1[gid], input2[gid], &quo);\n"
|
||||
// Specification say:
|
||||
// "remquo also calculates the lower seven bits of the integral quotient x/y,
|
||||
// and gives that value the same sign as x/y. It stores this signed value in
|
||||
// the object pointed to by quo."
|
||||
// Implemenation may save into quo more than seven bits. We need to take
|
||||
// care of that here.
|
||||
" sign = (quo < 0) ? -1 : 1;\n"
|
||||
" quo = (quo < 0) ? -quo : quo;\n"
|
||||
" quo &= 0x0000007f;\n"
|
||||
" result.y = (sign < 0) ? -quo : quo;\n"
|
||||
" output[gid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
// -------------- TERNARY FUNCTIONS
|
||||
|
||||
// gentype fma(gentype a, gentype b, gentype c);
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2, min3, max3
|
||||
MATH_FUNCS_DEFINE_TERNARY_FUNC(fp, fma, std::fma, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
|
||||
|
||||
// floating point functions
|
||||
AUTO_TEST_CASE(test_fp_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// Check for EMBEDDED_PROFILE
|
||||
bool is_embedded_profile = false;
|
||||
char profile[128];
|
||||
last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
|
||||
RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
|
||||
if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
|
||||
is_embedded_profile = true;
|
||||
|
||||
// gentype ceil(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((fp_func_ceil(is_embedded_profile)))
|
||||
// gentype floor(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((fp_func_floor(is_embedded_profile)))
|
||||
// gentype rint(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((fp_func_rint(is_embedded_profile)))
|
||||
// gentype round(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((fp_func_round(is_embedded_profile)))
|
||||
// gentype trunc(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((fp_func_trunc(is_embedded_profile)))
|
||||
|
||||
// floatn nan(uintn nancode);
|
||||
TEST_UNARY_FUNC_MACRO((fp_func_nan()))
|
||||
|
||||
// gentype fract(gentype x, gentype* iptr);
|
||||
TEST_UNARY_FUNC_MACRO((fp_func_fract(is_embedded_profile)))
|
||||
// gentype modf(gentype x, gentype* iptr);
|
||||
TEST_UNARY_FUNC_MACRO((fp_func_modf(is_embedded_profile)))
|
||||
// gentype frexp(gentype x, intn* exp);
|
||||
TEST_UNARY_FUNC_MACRO((fp_func_frexp(is_embedded_profile)))
|
||||
|
||||
// gentype remainder(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((fp_func_remainder(is_embedded_profile)))
|
||||
// gentype copysign(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((fp_func_copysign(is_embedded_profile)))
|
||||
// gentype fmod(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((fp_func_fmod(is_embedded_profile)))
|
||||
|
||||
// gentype nextafter(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((fp_func_nextafter(is_embedded_profile)))
|
||||
|
||||
// gentype remquo(gentype x, gentype y, intn* quo);
|
||||
TEST_BINARY_FUNC_MACRO((fp_func_remquo(is_embedded_profile)))
|
||||
|
||||
// gentype fma(gentype a, gentype b, gentype c);
|
||||
TEST_TERNARY_FUNC_MACRO((fp_func_fma(is_embedded_profile)))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_FP_FUNCS_HPP
|
||||
@@ -1,106 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_HALF_MATH_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_HALF_MATH_FUNCS_HPP
|
||||
|
||||
#include <type_traits>
|
||||
#include <cmath>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, cos, half_cos, std::cos, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sin, half_sin, std::sin, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, tan, half_tan, std::tan, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
|
||||
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp, half_exp, std::exp, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp2, half_exp2, std::exp2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp10, half_exp10, reference::exp10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log, half_log, std::log, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log2, half_log2, std::log2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log10, half_log10, std::log10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, rsqrt, half_rsqrt, reference::rsqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sqrt, half_sqrt, std::sqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, recip, half_recip, reference::recip, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, divide, half_divide, reference::divide, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, powr, half_powr, reference::powr, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
|
||||
#else
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, cos, half_math::cos, std::cos, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sin, half_math::sin, std::sin, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, tan, half_math::tan, std::tan, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
|
||||
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp, half_math::exp, std::exp, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp2, half_math::exp2, std::exp2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp10, half_math::exp10, reference::exp10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log, half_math::log, std::log, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log2, half_math::log2, std::log2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log10, half_math::log10, std::log10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, rsqrt, half_math::rsqrt, reference::rsqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sqrt, half_math::sqrt, std::sqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, recip, half_math::recip, reference::recip, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
|
||||
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, divide, half_math::divide, reference::divide, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, powr, half_math::powr, reference::powr, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
|
||||
#endif
|
||||
|
||||
// comparison functions
|
||||
AUTO_TEST_CASE(test_half_math_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// Check for EMBEDDED_PROFILE
|
||||
bool is_embedded_profile = false;
|
||||
char profile[128];
|
||||
last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
|
||||
RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
|
||||
if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
|
||||
is_embedded_profile = true;
|
||||
|
||||
TEST_UNARY_FUNC_MACRO((half_math_func_cos(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((half_math_func_sin(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((half_math_func_tan(is_embedded_profile)))
|
||||
|
||||
TEST_UNARY_FUNC_MACRO((half_math_func_exp(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((half_math_func_exp2(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((half_math_func_exp10(is_embedded_profile)))
|
||||
|
||||
TEST_UNARY_FUNC_MACRO((half_math_func_log(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((half_math_func_log2(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((half_math_func_log10(is_embedded_profile)))
|
||||
|
||||
TEST_BINARY_FUNC_MACRO((half_math_func_divide(is_embedded_profile)))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_HALF_MATH_FUNCS_HPP
|
||||
@@ -1,261 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_LOG_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_LOG_FUNCS_HPP
|
||||
|
||||
#include <type_traits>
|
||||
#include <cmath>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
// This function reads values of FP_ILOGB0 and FP_ILOGBNAN macros defined on the device.
|
||||
// OpenCL C++ Spec:
|
||||
// The value of FP_ILOGB0 shall be either {INT_MIN} or {INT_MAX}. The value of FP_ILOGBNAN
|
||||
// shall be either {INT_MAX} or {INT_MIN}.
|
||||
int get_ilogb_nan_zero(cl_device_id device, cl_context context, cl_command_queue queue, cl_int& ilogb_nan, cl_int& ilogb_zero)
|
||||
{
|
||||
cl_mem buffers[1];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t work_size[1];
|
||||
int err;
|
||||
|
||||
std::string code_str =
|
||||
"__kernel void get_ilogb_nan_zero(__global int *out)\n"
|
||||
"{\n"
|
||||
" out[0] = FP_ILOGB0;\n"
|
||||
" out[1] = FP_ILOGBNAN;\n"
|
||||
"}\n";
|
||||
std::string kernel_name("get_ilogb_nan_zero");
|
||||
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
|
||||
RETURN_ON_ERROR(err)
|
||||
|
||||
std::vector<cl_int> output = generate_output<cl_int>(2);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_int) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer")
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
work_size[0] = 1;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(cl_int) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
// Save
|
||||
ilogb_zero = output[0];
|
||||
ilogb_nan = output[1];
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
struct logarithmic_func_ilogb : public unary_func<cl_float, cl_int>
|
||||
{
|
||||
logarithmic_func_ilogb(cl_int ilogb_nan, cl_int ilogb_zero)
|
||||
: m_ilogb_nan(ilogb_nan), m_ilogb_zero(ilogb_zero)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "ilogb";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_math>\n";
|
||||
}
|
||||
|
||||
cl_int operator()(const cl_float& x)
|
||||
{
|
||||
if((std::isnan)(x))
|
||||
{
|
||||
return m_ilogb_nan;
|
||||
}
|
||||
else if(x == 0.0 || x == -0.0)
|
||||
{
|
||||
return m_ilogb_zero;
|
||||
}
|
||||
static_assert(
|
||||
sizeof(cl_int) == sizeof(int),
|
||||
"Tests assumes that sizeof(cl_int) == sizeof(int)"
|
||||
);
|
||||
return (std::ilogb)(x);
|
||||
}
|
||||
|
||||
cl_float min1()
|
||||
{
|
||||
return -100.0f;
|
||||
}
|
||||
|
||||
cl_float max1()
|
||||
{
|
||||
return 1000.0f;
|
||||
}
|
||||
|
||||
std::vector<cl_float> in1_special_cases()
|
||||
{
|
||||
return {
|
||||
cl_float(0.0f),
|
||||
cl_float(-0.0f),
|
||||
cl_float(1.0f),
|
||||
cl_float(-1.0f),
|
||||
cl_float(2.0f),
|
||||
cl_float(-2.0f),
|
||||
std::numeric_limits<cl_float>::infinity(),
|
||||
-std::numeric_limits<cl_float>::infinity(),
|
||||
std::numeric_limits<cl_float>::quiet_NaN()
|
||||
};
|
||||
}
|
||||
private:
|
||||
cl_int m_ilogb_nan;
|
||||
cl_int m_ilogb_zero;
|
||||
};
|
||||
|
||||
// gentype log(gentype x);
|
||||
// gentype logb(gentype x);
|
||||
// gentype log2(gentype x);
|
||||
// gentype log10(gentype x);
|
||||
// gentype log1p(gentype x);
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log, std::log, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, logb, std::logb, true, 0.0f, 0.0f, 0.001f, -10.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log2, std::log2, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log10, std::log10, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log1p, std::log1p, true, 2.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
|
||||
|
||||
// gentype lgamma(gentype x);
|
||||
// OpenCL C++ Spec.:
|
||||
// The ULP values for built-in math functions lgamma and lgamma_r is currently undefined.
|
||||
// Because of that we don't check ULP and set acceptable delta to 0.2f (20%).
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, lgamma, std::lgamma, false, 0.0f, 0.0f, 0.2f, -10.0f, 1000.0f)
|
||||
|
||||
// gentype lgamma_r(gentype x, intn* signp);
|
||||
// OpenCL C++ Spec.:
|
||||
// The ULP values for built-in math functions lgamma and lgamma_r is currently undefined.
|
||||
// Because of that we don't check ULP and set acceptable delta to 0.2f (20%).
|
||||
//
|
||||
// Note:
|
||||
// We DO NOT test if sign of the gamma function return by lgamma_r is correct.
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, lgamma_r, std::lgamma, false, 0.0f, 0.0f, 0.2f, -10.0f, 1000.0f)
|
||||
|
||||
// We need to specialize generate_kernel_unary<>() function template for logarithmic_func_lgamma_r
|
||||
// because it takes two arguments, but only one of it is input, the 2nd one is used to return
|
||||
// the sign of the gamma function.
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
template <>
|
||||
std::string generate_kernel_unary<logarithmic_func_lgamma_r, cl_float, cl_float>(logarithmic_func_lgamma_r func)
|
||||
{
|
||||
return
|
||||
"__kernel void test_lgamma_r(global float *input, global float *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" int sign;\n"
|
||||
" output[gid] = lgamma_r(input[gid], &sign);\n"
|
||||
"}\n";
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
std::string generate_kernel_unary<logarithmic_func_lgamma_r, cl_float, cl_float>(logarithmic_func_lgamma_r func)
|
||||
{
|
||||
return
|
||||
"" + func.defs() +
|
||||
"" + func.headers() +
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_lgamma_r(global_ptr<float[]> input, global_ptr<float[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" int sign;\n"
|
||||
" output[gid] = lgamma_r(input[gid], &sign);\n"
|
||||
"}\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
// logarithmic functions
|
||||
AUTO_TEST_CASE(test_logarithmic_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// Check for EMBEDDED_PROFILE
|
||||
bool is_embedded_profile = false;
|
||||
char profile[128];
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
|
||||
if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
|
||||
is_embedded_profile = true;
|
||||
|
||||
// Write values of FP_ILOGB0 and FP_ILOGBNAN, which are macros defined on the device, to
|
||||
// ilogb_zero and ilogb_nan.
|
||||
cl_int ilogb_nan = 0;
|
||||
cl_int ilogb_zero = 0;
|
||||
error = detail::get_ilogb_nan_zero(device, context, queue, ilogb_nan, ilogb_zero);
|
||||
RETURN_ON_ERROR_MSG(error, "detail::get_ilogb_nan_zero function failed");
|
||||
|
||||
// intn ilogb(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((logarithmic_func_ilogb(ilogb_nan, ilogb_zero)))
|
||||
|
||||
// gentype log(gentype x);
|
||||
// gentype logb(gentype x);
|
||||
// gentype log2(gentype x);
|
||||
// gentype log10(gentype x);
|
||||
// gentype log1p(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((logarithmic_func_log(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((logarithmic_func_logb(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((logarithmic_func_log2(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((logarithmic_func_log10(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((logarithmic_func_log1p(is_embedded_profile)))
|
||||
|
||||
// gentype lgamma(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((logarithmic_func_lgamma(is_embedded_profile)))
|
||||
|
||||
// gentype lgamma(gentype x);
|
||||
//
|
||||
// Note:
|
||||
// We DO NOT test if sign of the gamma function return by lgamma_r is correct
|
||||
TEST_UNARY_FUNC_MACRO((logarithmic_func_lgamma_r(is_embedded_profile)))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_LOG_FUNCS_HPP
|
||||
@@ -1,50 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <limits>
|
||||
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "comparison_funcs.hpp"
|
||||
#include "exponential_funcs.hpp"
|
||||
#include "floating_point_funcs.hpp"
|
||||
#include "half_math_funcs.hpp"
|
||||
#include "logarithmic_funcs.hpp"
|
||||
#include "other_funcs.hpp"
|
||||
#include "power_funcs.hpp"
|
||||
#include "trigonometric_funcs.hpp"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
// Check if cl_float (float) and cl_double (double) fulfill the requirements of
|
||||
// IEC 559 (IEEE 754) standard. This is required for the tests to run correctly.
|
||||
if(!std::numeric_limits<cl_float>::is_iec559)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
|
||||
"Tests won't run correctly."
|
||||
);
|
||||
}
|
||||
if(!std::numeric_limits<cl_double>::is_iec559)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
|
||||
"Tests won't run correctly."
|
||||
);
|
||||
}
|
||||
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_OTHER_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_OTHER_FUNCS_HPP
|
||||
|
||||
#include <type_traits>
|
||||
#include <cmath>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(other, erfc, std::erfc, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(other, erf, std::erf, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(other, fabs, std::fabs, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(other, tgamma, std::tgamma, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f)
|
||||
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(other, hypot, std::hypot, true, 4.0f, 4.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
|
||||
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2, min3, max3
|
||||
MATH_FUNCS_DEFINE_TERNARY_FUNC(other, mad, reference::mad, false, 0.0f, 0.0f, 0.1f, -10.0f, 10.0f, -10.0f, 10.0f, -10.0f, 10.0f)
|
||||
|
||||
// other functions
|
||||
AUTO_TEST_CASE(test_other_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// Check for EMBEDDED_PROFILE
|
||||
bool is_embedded_profile = false;
|
||||
char profile[128];
|
||||
last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
|
||||
RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
|
||||
if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
|
||||
is_embedded_profile = true;
|
||||
|
||||
// gentype erf(gentype x);
|
||||
// gentype erfc(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((other_func_erfc(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((other_func_erf(is_embedded_profile)))
|
||||
|
||||
// gentype fabs(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((other_func_fabs(is_embedded_profile)))
|
||||
|
||||
// gentype tgamma(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((other_func_tgamma(is_embedded_profile)))
|
||||
|
||||
// gentype hypot(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((other_func_hypot(is_embedded_profile)))
|
||||
|
||||
// gentype mad(gentype a, gentype b, gentype c);
|
||||
TEST_TERNARY_FUNC_MACRO((other_func_mad(is_embedded_profile)))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_OTHER_FUNCS_HPP
|
||||
@@ -1,153 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_POWER_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_POWER_FUNCS_HPP
|
||||
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
#include <cmath>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
#define DEFINE_BINARY_POWER_FUNC_INT(NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, MIN1, MAX1, MIN2, MAX2) \
|
||||
struct power_func_ ## NAME : public binary_func<cl_float, cl_int, cl_float> \
|
||||
{ \
|
||||
power_func_ ## NAME(bool is_embedded) : m_is_embedded(is_embedded) \
|
||||
{ \
|
||||
\
|
||||
} \
|
||||
\
|
||||
std::string str() \
|
||||
{ \
|
||||
return #NAME; \
|
||||
} \
|
||||
\
|
||||
std::string headers() \
|
||||
{ \
|
||||
return "#include <opencl_math>\n"; \
|
||||
} \
|
||||
/* Reference value type is cl_double */ \
|
||||
cl_double operator()(const cl_float& x, const cl_int& y) \
|
||||
{ \
|
||||
return (HOST_FUNC)(static_cast<cl_double>(x), y); \
|
||||
} \
|
||||
\
|
||||
cl_float min1() \
|
||||
{ \
|
||||
return MIN1; \
|
||||
} \
|
||||
\
|
||||
cl_float max1() \
|
||||
{ \
|
||||
return MAX1; \
|
||||
} \
|
||||
\
|
||||
cl_int min2() \
|
||||
{ \
|
||||
return MIN2; \
|
||||
} \
|
||||
\
|
||||
cl_int max2() \
|
||||
{ \
|
||||
return MAX2; \
|
||||
} \
|
||||
\
|
||||
std::vector<cl_float> in1_special_cases() \
|
||||
{ \
|
||||
return { \
|
||||
cl_float(-1.0f), \
|
||||
cl_float(0.0f), \
|
||||
cl_float(-0.0f), \
|
||||
}; \
|
||||
} \
|
||||
\
|
||||
std::vector<cl_int> in2_special_cases() \
|
||||
{ \
|
||||
return { \
|
||||
2, 3, -1, 1, -2, 2 \
|
||||
}; \
|
||||
} \
|
||||
\
|
||||
bool use_ulp() \
|
||||
{ \
|
||||
return USE_ULP; \
|
||||
} \
|
||||
\
|
||||
float ulp() \
|
||||
{ \
|
||||
if(m_is_embedded) \
|
||||
{ \
|
||||
return ULP_EMBEDDED; \
|
||||
} \
|
||||
return ULP; \
|
||||
} \
|
||||
private: \
|
||||
bool m_is_embedded; \
|
||||
};
|
||||
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(power, cbrt, std::cbrt, true, 2.0f, 4.0f, 0.001f, -1000.0f, -9.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(power, rsqrt, reference::rsqrt, true, 2.0f, 4.0f, 0.001f, 1.0f, 100.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(power, sqrt, std::sqrt, true, 3.0f, 4.0f, 0.001f, 1.0f, 100.0f)
|
||||
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(power, pow, std::pow, true, 16.0f, 16.0f, 0.001f, 1.0f, 100.0f, 1.0f, 10.0f)
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(power, powr, reference::powr, true, 16.0f, 16.0f, 0.001f, 1.0f, 100.0f, 1.0f, 10.0f)
|
||||
|
||||
// func_name, reference_func, use_ulp, ulp, ulp_for_embedded, min1, max1, min2, max2
|
||||
DEFINE_BINARY_POWER_FUNC_INT(pown, std::pow, true, 16.0f, 16.0f, 1.0f, 100.0f, 1, 10)
|
||||
DEFINE_BINARY_POWER_FUNC_INT(rootn, reference::rootn, true, 16.0f, 16.0f, -100.0f, 100.0f, -10, 10)
|
||||
|
||||
// power functions
|
||||
AUTO_TEST_CASE(test_power_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// Check for EMBEDDED_PROFILE
|
||||
bool is_embedded_profile = false;
|
||||
char profile[128];
|
||||
last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
|
||||
RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
|
||||
if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
|
||||
is_embedded_profile = true;
|
||||
|
||||
// gentype cbrt(gentype x);
|
||||
// gentype rsqrt(gentype x);
|
||||
// gentype sqrt(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((power_func_cbrt(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((power_func_sqrt(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((power_func_rsqrt(is_embedded_profile)))
|
||||
|
||||
// gentype pow(gentype x, gentype y);
|
||||
// gentype powr(gentype x, gentype y);
|
||||
TEST_BINARY_FUNC_MACRO((power_func_pow(is_embedded_profile)))
|
||||
TEST_BINARY_FUNC_MACRO((power_func_powr(is_embedded_profile)))
|
||||
|
||||
// gentype pown(gentype x, intn y);
|
||||
// gentype rootn(gentype x, intn y);
|
||||
TEST_BINARY_FUNC_MACRO((power_func_pown(is_embedded_profile)))
|
||||
TEST_BINARY_FUNC_MACRO((power_func_rootn(is_embedded_profile)))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_POWER_FUNCS_HPP
|
||||
@@ -1,315 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_REFERENCE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_REFERENCE_HPP
|
||||
|
||||
#include <type_traits>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#include "../common.hpp"
|
||||
|
||||
namespace reference
|
||||
{
|
||||
// Reference functions for OpenCL comparison functions that
|
||||
// are not already defined in STL.
|
||||
cl_float maxmag(const cl_float& x, const cl_float& y)
|
||||
{
|
||||
if((std::abs)(x) > (std::abs)(y))
|
||||
{
|
||||
return x;
|
||||
}
|
||||
else if((std::abs)(y) > (std::abs)(x))
|
||||
{
|
||||
return y;
|
||||
}
|
||||
return (std::fmax)(x, y);
|
||||
}
|
||||
|
||||
cl_float minmag(const cl_float& x, const cl_float& y)
|
||||
{
|
||||
if((std::abs)(x) < (std::abs)(y))
|
||||
{
|
||||
return x;
|
||||
}
|
||||
else if((std::abs)(y) < (std::abs)(x))
|
||||
{
|
||||
return y;
|
||||
}
|
||||
return (std::fmin)(x, y);
|
||||
}
|
||||
|
||||
// Reference functions for OpenCL exp functions that
|
||||
// are not already defined in STL.
|
||||
cl_double exp10(const cl_double& x)
|
||||
{
|
||||
// 10^x = exp2( x * log2(10) )
|
||||
auto log2_10 = (std::log2)(static_cast<long double>(10.0));
|
||||
cl_double x_log2_10 = static_cast<cl_double>(x * log2_10);
|
||||
return (std::exp2)(x_log2_10);
|
||||
}
|
||||
|
||||
// Reference functions for OpenCL floating point functions that
|
||||
// are not already defined in STL.
|
||||
cl_double2 fract(cl_double x)
|
||||
{
|
||||
// Copied from math_brute_force/reference_math.c
|
||||
cl_double2 r;
|
||||
if((std::isnan)(x))
|
||||
{
|
||||
r.s[0] = std::numeric_limits<cl_double>::quiet_NaN();
|
||||
r.s[1] = std::numeric_limits<cl_double>::quiet_NaN();
|
||||
return r;
|
||||
}
|
||||
|
||||
r.s[0] = (std::modf)(x, &(r.s[1]));
|
||||
if(r.s[0] < 0.0 )
|
||||
{
|
||||
r.s[0] = 1.0f + r.s[0];
|
||||
r.s[1] -= 1.0f;
|
||||
if( r.s[0] == 1.0f )
|
||||
r.s[0] = HEX_FLT(+, 1, fffffe, -, 1);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
cl_double2 remquo(cl_double x, cl_double y)
|
||||
{
|
||||
cl_double2 r;
|
||||
// remquo return the same value that is returned by the
|
||||
// remainder function
|
||||
r.s[0] = (std::remainder)(x,y);
|
||||
// calulcate quo
|
||||
cl_double x_y = (x - r.s[0]) / y;
|
||||
cl_uint quo = (std::abs)(x_y);
|
||||
r.s[1] = quo & 0x0000007fU;
|
||||
if(x_y < 0.0)
|
||||
r.s[1] = -r.s[1];
|
||||
|
||||
// fix edge cases
|
||||
if(!(std::isnan)(x) && y == 0.0)
|
||||
{
|
||||
r.s[1] = 0;
|
||||
}
|
||||
else if((std::isnan)(x) && (std::isnan)(y))
|
||||
{
|
||||
r.s[1] = 0;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// Reference functions for OpenCL half_math:: functions that
|
||||
// are not already defined in STL.
|
||||
cl_double divide(cl_double x, cl_double y)
|
||||
{
|
||||
return x / y;
|
||||
}
|
||||
|
||||
cl_double recip(cl_double x)
|
||||
{
|
||||
return 1.0 / x;
|
||||
}
|
||||
|
||||
// Reference functions for OpenCL other functions that
|
||||
// are not already defined in STL.
|
||||
cl_double mad(cl_double x, cl_double y, cl_double z)
|
||||
{
|
||||
return (x * y) + z;
|
||||
}
|
||||
|
||||
// Reference functions for OpenCL power functions that
|
||||
// are not already defined in STL.
|
||||
cl_double rsqrt(const cl_double& x)
|
||||
{
|
||||
return cl_double(1.0) / ((std::sqrt)(x));
|
||||
}
|
||||
|
||||
cl_double powr(const cl_double& x, const cl_double& y)
|
||||
{
|
||||
//powr(x, y) returns NaN for x < 0.
|
||||
if( x < 0.0 )
|
||||
return std::numeric_limits<cl_double>::quiet_NaN();
|
||||
|
||||
//powr ( x, NaN ) returns the NaN for x >= 0.
|
||||
//powr ( NaN, y ) returns the NaN.
|
||||
if((std::isnan)(x) || (std::isnan)(y) )
|
||||
return std::numeric_limits<cl_double>::quiet_NaN();
|
||||
|
||||
if( x == 1.0 )
|
||||
{
|
||||
//powr ( +1, +-inf ) returns NaN.
|
||||
if((std::abs)(y) == INFINITY )
|
||||
return std::numeric_limits<cl_double>::quiet_NaN();
|
||||
|
||||
//powr ( +1, y ) is 1 for finite y. (NaN handled above)
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
if( y == 0.0 )
|
||||
{
|
||||
//powr ( +inf, +-0 ) returns NaN.
|
||||
//powr ( +-0, +-0 ) returns NaN.
|
||||
if( x == 0.0 || x == std::numeric_limits<cl_double>::infinity())
|
||||
return std::numeric_limits<cl_double>::quiet_NaN();
|
||||
|
||||
//powr ( x, +-0 ) is 1 for finite x > 0. (x <= 0, NaN, INF already handled above)
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
if( x == 0.0 )
|
||||
{
|
||||
//powr ( +-0, -inf) is +inf.
|
||||
//powr ( +-0, y ) is +inf for finite y < 0.
|
||||
if( y < 0.0 )
|
||||
return std::numeric_limits<cl_double>::infinity();
|
||||
|
||||
//powr ( +-0, y ) is +0 for y > 0. (NaN, y==0 handled above)
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// x = +inf
|
||||
if( (std::isinf)(x) )
|
||||
{
|
||||
if( y < 0 )
|
||||
return 0;
|
||||
return std::numeric_limits<cl_double>::infinity();
|
||||
}
|
||||
|
||||
double fabsx = (std::abs)(x);
|
||||
double fabsy = (std::abs)(y);
|
||||
|
||||
//y = +-inf cases
|
||||
if( (std::isinf)(fabsy) )
|
||||
{
|
||||
if( y < 0.0 )
|
||||
{
|
||||
if( fabsx < 1.0 )
|
||||
return std::numeric_limits<cl_double>::infinity();
|
||||
return 0;
|
||||
}
|
||||
if( fabsx < 1.0 )
|
||||
return 0.0;
|
||||
return std::numeric_limits<cl_double>::infinity();
|
||||
}
|
||||
return (std::pow)(x, y);
|
||||
}
|
||||
|
||||
cl_double rootn(const cl_double& x, const cl_int n)
|
||||
{
|
||||
//rootn (x, 0) returns a NaN.
|
||||
if(n == 0)
|
||||
return std::numeric_limits<cl_double>::quiet_NaN();
|
||||
|
||||
//rootn ( x, n ) returns a NaN for x < 0 and n is even.
|
||||
if(x < 0 && 0 == (n & 1))
|
||||
return std::numeric_limits<cl_double>::quiet_NaN();
|
||||
|
||||
if(x == 0.0)
|
||||
{
|
||||
if(n > 0)
|
||||
{
|
||||
//rootn ( +-0, n ) is +0 for even n > 0.
|
||||
if(0 == (n & 1))
|
||||
{
|
||||
return cl_double(0.0);
|
||||
}
|
||||
//rootn ( +-0, n ) is +-0 for odd n > 0.
|
||||
else
|
||||
{
|
||||
return x;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//rootn ( +-0, n ) is +inf for even n < 0.
|
||||
if(0 == ((-n) & 1))
|
||||
{
|
||||
return std::numeric_limits<cl_double>::infinity();
|
||||
}
|
||||
//rootn ( +-0, n ) is +-inf for odd n < 0.
|
||||
else
|
||||
{
|
||||
return (std::copysign)(
|
||||
std::numeric_limits<cl_double>::infinity(), x
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cl_double r = (std::abs)(x);
|
||||
r = (std::exp2)((std::log2)(r) / static_cast<cl_double>(n));
|
||||
return (std::copysign)(r, x);
|
||||
}
|
||||
|
||||
// Reference functions for OpenCL trigonometric functions that
|
||||
// are not already defined in STL.
|
||||
cl_double acospi(cl_double x)
|
||||
{
|
||||
return (std::acos)(x) / CL_M_PI;
|
||||
}
|
||||
|
||||
cl_double asinpi(cl_double x)
|
||||
{
|
||||
return (std::asin)(x) / CL_M_PI;
|
||||
}
|
||||
|
||||
cl_double atanpi(cl_double x)
|
||||
{
|
||||
return (std::atan)(x) / CL_M_PI;
|
||||
}
|
||||
|
||||
cl_double cospi(cl_double x)
|
||||
{
|
||||
return (std::cos)(x * CL_M_PI);
|
||||
}
|
||||
|
||||
cl_double sinpi(cl_double x)
|
||||
{
|
||||
return (std::sin)(x * CL_M_PI);
|
||||
}
|
||||
|
||||
cl_double tanpi(cl_double x)
|
||||
{
|
||||
return (std::tan)(x * CL_M_PI);
|
||||
}
|
||||
|
||||
cl_double atan2(cl_double x, cl_double y)
|
||||
{
|
||||
#if defined(WIN32) || defined(_WIN32)
|
||||
// Fix edge cases for Windows
|
||||
if ((std::isinf)(x) && (std::isinf)(y)) {
|
||||
cl_double retval = (y > 0) ? CL_M_PI_4 : 3.f * CL_M_PI_4;
|
||||
return (x > 0) ? retval : -retval;
|
||||
}
|
||||
#endif // defined(WIN32) || defined(_WIN32)
|
||||
return (std::atan2)(x, y);
|
||||
}
|
||||
|
||||
cl_double atan2pi(cl_double x, cl_double y)
|
||||
{
|
||||
return ::reference::atan2(x, y) / CL_M_PI;
|
||||
}
|
||||
|
||||
cl_double2 sincos(cl_double x)
|
||||
{
|
||||
cl_double2 r;
|
||||
r.s[0] = (std::sin)(x);
|
||||
r.s[1] = (std::cos)(x);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_REFERENCE_HPP
|
||||
@@ -1,222 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_TRI_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_TRI_FUNCS_HPP
|
||||
|
||||
#include <type_traits>
|
||||
#include <cmath>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acos, std::acos, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acosh, std::acosh, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acospi, reference::acospi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asin, std::asin, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asinh, std::asinh, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asinpi, reference::asinpi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atan, std::atan, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atanh, std::atanh, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atanpi, reference::atanpi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
|
||||
|
||||
// For (sin/cos/tan)pi functions min input value is -0.24 and max input value is 0.24,
|
||||
// so (CL_M_PI * x) is never greater than CL_M_PI_F.
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cos, std::cos, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cosh, std::cosh, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cospi, reference::cospi, true, 4.0f, 4.0f, 0.001f, -0.24, -0.24f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sin, std::sin, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sinh, std::sinh, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sinpi, reference::sinpi, true, 4.0f, 4.0f, 0.001f, -0.24, -0.24f)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tan, std::tan, true, 5.0f, 5.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tanh, std::tanh, true, 5.0f, 5.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
|
||||
MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tanpi, reference::tanpi, true, 6.0f, 6.0f, 0.001f, -0.24, -0.24f)
|
||||
|
||||
// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(trigonometric, atan2, reference::atan2, true, 6.0f, 6.0f, 0.001f, -1.0f, 1.0f, -1.0f, 1.0f)
|
||||
MATH_FUNCS_DEFINE_BINARY_FUNC(trigonometric, atan2pi, reference::atan2pi, true, 6.0f, 6.0f, 0.001f, -1.0f, 1.0f, -1.0f, 1.0f)
|
||||
|
||||
// gentype sincos(gentype x, gentype * cosval);
|
||||
//
|
||||
// Fact that second argument is a pointer is inconvenient.
|
||||
//
|
||||
// We don't want to modify all helper functions defined in funcs_test_utils.hpp
|
||||
// that run test kernels generated based on this class and check if results are
|
||||
// correct, so instead of having two output cl_float buffers, one for sines and
|
||||
// one for cosines values, we use one cl_float2 output buffer (first component is
|
||||
// sine, second is cosine).
|
||||
//
|
||||
// Below we also define specialization of generate_kernel_unary function template
|
||||
// for trigonometric_func_sincos.
|
||||
struct trigonometric_func_sincos : public unary_func<cl_float, cl_float2>
|
||||
{
|
||||
trigonometric_func_sincos(bool is_embedded) : m_is_embedded(is_embedded)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "sincos";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_math>\n";
|
||||
}
|
||||
|
||||
/* Reference value type is cl_double */
|
||||
cl_double2 operator()(const cl_float& x)
|
||||
{
|
||||
return (reference::sincos)(static_cast<cl_double>(x));
|
||||
}
|
||||
|
||||
cl_float min1()
|
||||
{
|
||||
return -CL_M_PI_F;
|
||||
}
|
||||
|
||||
cl_float max1()
|
||||
{
|
||||
return CL_M_PI_F;
|
||||
}
|
||||
|
||||
bool use_ulp()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
float ulp()
|
||||
{
|
||||
if(m_is_embedded)
|
||||
{
|
||||
return 4.0f;
|
||||
}
|
||||
return 4.0f;
|
||||
}
|
||||
private:
|
||||
bool m_is_embedded;
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
template <>
|
||||
std::string generate_kernel_unary<trigonometric_func_sincos, cl_float, cl_float2>(trigonometric_func_sincos func)
|
||||
{
|
||||
return
|
||||
"__kernel void test_sincos(global float *input, global float2 *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" float2 sine_cosine_of_x;\n"
|
||||
" float cosine_of_x = 0;\n"
|
||||
" sine_cosine_of_x.x = sincos(input[gid], &(cosine_of_x));\n"
|
||||
" sine_cosine_of_x.y = cosine_of_x;\n"
|
||||
" output[gid] = sine_cosine_of_x;\n"
|
||||
"}\n";
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
std::string generate_kernel_unary<trigonometric_func_sincos, cl_float, cl_float2>(trigonometric_func_sincos func)
|
||||
{
|
||||
return
|
||||
"" + func.defs() +
|
||||
"" + func.headers() +
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sincos(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" float2 sine_cosine_of_x;\n"
|
||||
" float cosine_of_x = 0;\n"
|
||||
" sine_cosine_of_x.x = sincos(input[gid], &(cosine_of_x));\n"
|
||||
" sine_cosine_of_x.y = cosine_of_x;\n"
|
||||
" output[gid] = sine_cosine_of_x;\n"
|
||||
"}\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
// trigonometric functions
|
||||
AUTO_TEST_CASE(test_trigonometric_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// Check for EMBEDDED_PROFILE
|
||||
bool is_embedded_profile = false;
|
||||
char profile[128];
|
||||
last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
|
||||
RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
|
||||
if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
|
||||
is_embedded_profile = true;
|
||||
|
||||
// gentype acos(gentype x);
|
||||
// gentype acosh(gentype x);
|
||||
// gentype acospi(gentype x);
|
||||
// gentype asin(gentype x);
|
||||
// gentype asinh(gentype x);
|
||||
// gentype asinpi(gentype x);
|
||||
// gentype atan(gentype x);
|
||||
// gentype atanh(gentype x);
|
||||
// gentype atanpi(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_acos(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_acosh(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_acospi(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_asin(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_asinh(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_asinpi(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_atan(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_atanh(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_atanpi(is_embedded_profile)))
|
||||
|
||||
// gentype cos(gentype x);
|
||||
// gentype cosh(gentype x);
|
||||
// gentype cospi(gentype x);
|
||||
// gentype sin(gentype x);
|
||||
// gentype sinh(gentype x);
|
||||
// gentype sinpi(gentype x);
|
||||
// gentype tan(gentype x);
|
||||
// gentype tanh(gentype x);
|
||||
// gentype tanpi(gentype x);
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_cos(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_cosh(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_cospi(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_sin(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_sinh(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_sinpi(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_tan(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_tanh(is_embedded_profile)))
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_tanpi(is_embedded_profile)))
|
||||
|
||||
// gentype atan2(gentype y, gentype x);
|
||||
// gentype atan2pi(gentype y, gentype x);
|
||||
TEST_BINARY_FUNC_MACRO((trigonometric_func_atan2(is_embedded_profile)))
|
||||
TEST_BINARY_FUNC_MACRO((trigonometric_func_atan2pi(is_embedded_profile)))
|
||||
|
||||
// gentype sincos(gentype x, gentype * cosval);
|
||||
TEST_UNARY_FUNC_MACRO((trigonometric_func_sincos(is_embedded_profile)))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_TRI_FUNCS_HPP
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_PIPES)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,25 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "test_pipes.hpp"
|
||||
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,632 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_PIPES_TEST_PIPES_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_PIPES_TEST_PIPES_HPP
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
|
||||
|
||||
namespace test_pipes {
|
||||
|
||||
enum class pipe_source
|
||||
{
|
||||
param,
|
||||
storage
|
||||
};
|
||||
|
||||
enum class pipe_operation
|
||||
{
|
||||
work_item,
|
||||
work_item_reservation,
|
||||
work_group_reservation,
|
||||
sub_group_reservation
|
||||
};
|
||||
|
||||
struct test_options
|
||||
{
|
||||
pipe_operation operation;
|
||||
pipe_source source;
|
||||
int max_packets;
|
||||
int num_packets;
|
||||
};
|
||||
|
||||
struct output_type
|
||||
{
|
||||
cl_uint write_reservation_is_valid;
|
||||
cl_uint write_success;
|
||||
|
||||
cl_uint num_packets;
|
||||
cl_uint max_packets;
|
||||
cl_uint read_reservation_is_valid;
|
||||
cl_uint read_success;
|
||||
|
||||
cl_uint value;
|
||||
};
|
||||
|
||||
const std::string source_common = R"(
|
||||
struct output_type
|
||||
{
|
||||
uint write_reservation_is_valid;
|
||||
uint write_success;
|
||||
|
||||
uint num_packets;
|
||||
uint max_packets;
|
||||
uint read_reservation_is_valid;
|
||||
uint read_success;
|
||||
|
||||
uint value;
|
||||
};
|
||||
)";
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
std::string generate_source(test_options options)
|
||||
{
|
||||
std::stringstream s;
|
||||
s << source_common;
|
||||
if (options.operation == pipe_operation::work_item)
|
||||
{
|
||||
s << R"(
|
||||
kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
output[gid].write_reservation_is_valid = 1;
|
||||
|
||||
uint value = gid;
|
||||
output[gid].write_success = write_pipe(out_pipe, &value) == 0;
|
||||
}
|
||||
|
||||
kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
output[gid].num_packets = get_pipe_num_packets(in_pipe);
|
||||
output[gid].max_packets = get_pipe_max_packets(in_pipe);
|
||||
|
||||
output[gid].read_reservation_is_valid = 1;
|
||||
|
||||
uint value;
|
||||
output[gid].read_success = read_pipe(in_pipe, &value) == 0;
|
||||
output[gid].value = value;
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.operation == pipe_operation::work_item_reservation)
|
||||
{
|
||||
s << R"(
|
||||
kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
if (gid % 2 == 1) return;
|
||||
|
||||
reserve_id_t reservation = reserve_write_pipe(out_pipe, 2);
|
||||
output[gid + 0].write_reservation_is_valid = is_valid_reserve_id(reservation);
|
||||
output[gid + 1].write_reservation_is_valid = is_valid_reserve_id(reservation);
|
||||
|
||||
uint value0 = gid + 0;
|
||||
uint value1 = gid + 1;
|
||||
output[gid + 0].write_success = write_pipe(out_pipe, reservation, 0, &value0) == 0;
|
||||
output[gid + 1].write_success = write_pipe(out_pipe, reservation, 1, &value1) == 0;
|
||||
commit_write_pipe(out_pipe, reservation);
|
||||
}
|
||||
|
||||
kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
if (gid % 2 == 1) return;
|
||||
|
||||
output[gid + 0].num_packets = get_pipe_num_packets(in_pipe);
|
||||
output[gid + 0].max_packets = get_pipe_max_packets(in_pipe);
|
||||
output[gid + 1].num_packets = get_pipe_num_packets(in_pipe);
|
||||
output[gid + 1].max_packets = get_pipe_max_packets(in_pipe);
|
||||
|
||||
reserve_id_t reservation = reserve_read_pipe(in_pipe, 2);
|
||||
output[gid + 0].read_reservation_is_valid = is_valid_reserve_id(reservation);
|
||||
output[gid + 1].read_reservation_is_valid = is_valid_reserve_id(reservation);
|
||||
|
||||
uint value0;
|
||||
uint value1;
|
||||
output[gid + 0].read_success = read_pipe(in_pipe, reservation, 1, &value0) == 0;
|
||||
output[gid + 1].read_success = read_pipe(in_pipe, reservation, 0, &value1) == 0;
|
||||
commit_read_pipe(in_pipe, reservation);
|
||||
output[gid + 0].value = value0;
|
||||
output[gid + 1].value = value1;
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.operation == pipe_operation::work_group_reservation)
|
||||
{
|
||||
s << R"(
|
||||
kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
reserve_id_t reservation = work_group_reserve_write_pipe(out_pipe, get_local_size(0));
|
||||
output[gid].write_reservation_is_valid = is_valid_reserve_id(reservation);
|
||||
|
||||
uint value = gid;
|
||||
output[gid].write_success = write_pipe(out_pipe, reservation, get_local_id(0), &value) == 0;
|
||||
work_group_commit_write_pipe(out_pipe, reservation);
|
||||
}
|
||||
|
||||
kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
output[gid].num_packets = get_pipe_num_packets(in_pipe);
|
||||
output[gid].max_packets = get_pipe_max_packets(in_pipe);
|
||||
|
||||
reserve_id_t reservation = work_group_reserve_read_pipe(in_pipe, get_local_size(0));
|
||||
output[gid].read_reservation_is_valid = is_valid_reserve_id(reservation);
|
||||
|
||||
uint value;
|
||||
output[gid].read_success = read_pipe(in_pipe, reservation, get_local_size(0) - 1 - get_local_id(0), &value) == 0;
|
||||
work_group_commit_read_pipe(in_pipe, reservation);
|
||||
output[gid].value = value;
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.operation == pipe_operation::sub_group_reservation)
|
||||
{
|
||||
s << R"(
|
||||
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
|
||||
|
||||
kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
reserve_id_t reservation = sub_group_reserve_write_pipe(out_pipe, get_sub_group_size());
|
||||
output[gid].write_reservation_is_valid = is_valid_reserve_id(reservation);
|
||||
|
||||
uint value = gid;
|
||||
output[gid].write_success = write_pipe(out_pipe, reservation, get_sub_group_local_id(), &value) == 0;
|
||||
sub_group_commit_write_pipe(out_pipe, reservation);
|
||||
}
|
||||
|
||||
kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
|
||||
{
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
output[gid].num_packets = get_pipe_num_packets(in_pipe);
|
||||
output[gid].max_packets = get_pipe_max_packets(in_pipe);
|
||||
|
||||
reserve_id_t reservation = sub_group_reserve_read_pipe(in_pipe, get_sub_group_size());
|
||||
output[gid].read_reservation_is_valid = is_valid_reserve_id(reservation);
|
||||
|
||||
uint value;
|
||||
output[gid].read_success = read_pipe(in_pipe, reservation, get_sub_group_size() - 1 - get_sub_group_local_id(), &value) == 0;
|
||||
sub_group_commit_read_pipe(in_pipe, reservation);
|
||||
output[gid].value = value;
|
||||
}
|
||||
)";
|
||||
}
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#else
|
||||
std::string generate_source(test_options options)
|
||||
{
|
||||
std::stringstream s;
|
||||
s << R"(
|
||||
#include <opencl_memory>
|
||||
#include <opencl_common>
|
||||
#include <opencl_work_item>
|
||||
#include <opencl_synchronization>
|
||||
#include <opencl_pipe>
|
||||
using namespace cl;
|
||||
)";
|
||||
|
||||
s << source_common;
|
||||
|
||||
std::string init_out_pipe;
|
||||
std::string init_in_pipe;
|
||||
if (options.source == pipe_source::param)
|
||||
{
|
||||
init_out_pipe = "auto out_pipe = pipe_param;";
|
||||
init_in_pipe = "auto in_pipe = pipe_param;";
|
||||
}
|
||||
else if (options.source == pipe_source::storage)
|
||||
{
|
||||
s << "pipe_storage<uint, " << std::to_string(options.max_packets) << "> storage;";
|
||||
init_out_pipe = "auto out_pipe = storage.get<pipe_access::write>();";
|
||||
init_in_pipe = "auto in_pipe = make_pipe(storage);";
|
||||
}
|
||||
|
||||
if (options.operation == pipe_operation::work_item)
|
||||
{
|
||||
s << R"(
|
||||
kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
|
||||
{
|
||||
)" << init_out_pipe << R"(
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
output[gid].write_reservation_is_valid = 1;
|
||||
|
||||
uint value = gid;
|
||||
output[gid].write_success = out_pipe.write(value);
|
||||
}
|
||||
|
||||
kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
|
||||
{
|
||||
)" << init_in_pipe << R"(
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
output[gid].num_packets = in_pipe.num_packets();
|
||||
output[gid].max_packets = in_pipe.max_packets();
|
||||
|
||||
output[gid].read_reservation_is_valid = 1;
|
||||
|
||||
uint value;
|
||||
output[gid].read_success = in_pipe.read(value);
|
||||
output[gid].value = value;
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.operation == pipe_operation::work_item_reservation)
|
||||
{
|
||||
s << R"(
|
||||
kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
|
||||
{
|
||||
)" << init_out_pipe << R"(
|
||||
const ulong gid = get_global_id(0);
|
||||
if (gid % 2 == 1) return;
|
||||
|
||||
auto reservation = out_pipe.reserve(2);
|
||||
output[gid + 0].write_reservation_is_valid = reservation.is_valid();
|
||||
output[gid + 1].write_reservation_is_valid = reservation.is_valid();
|
||||
|
||||
uint value0 = gid + 0;
|
||||
uint value1 = gid + 1;
|
||||
output[gid + 0].write_success = reservation.write(0, value0);
|
||||
output[gid + 1].write_success = reservation.write(1, value1);
|
||||
reservation.commit();
|
||||
}
|
||||
|
||||
kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
|
||||
{
|
||||
)" << init_in_pipe << R"(
|
||||
const ulong gid = get_global_id(0);
|
||||
if (gid % 2 == 1) return;
|
||||
|
||||
output[gid + 0].num_packets = in_pipe.num_packets();
|
||||
output[gid + 0].max_packets = in_pipe.max_packets();
|
||||
output[gid + 1].num_packets = in_pipe.num_packets();
|
||||
output[gid + 1].max_packets = in_pipe.max_packets();
|
||||
|
||||
auto reservation = in_pipe.reserve(2);
|
||||
output[gid + 0].read_reservation_is_valid = reservation.is_valid();
|
||||
output[gid + 1].read_reservation_is_valid = reservation.is_valid();
|
||||
|
||||
uint value0;
|
||||
uint value1;
|
||||
output[gid + 0].read_success = reservation.read(1, value0);
|
||||
output[gid + 1].read_success = reservation.read(0, value1);
|
||||
reservation.commit();
|
||||
output[gid + 0].value = value0;
|
||||
output[gid + 1].value = value1;
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.operation == pipe_operation::work_group_reservation)
|
||||
{
|
||||
s << R"(
|
||||
kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
|
||||
{
|
||||
)" << init_out_pipe << R"(
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
auto reservation = out_pipe.work_group_reserve(get_local_size(0));
|
||||
output[gid].write_reservation_is_valid = reservation.is_valid();
|
||||
|
||||
uint value = gid;
|
||||
output[gid].write_success = reservation.write(get_local_id(0), value);
|
||||
reservation.commit();
|
||||
}
|
||||
|
||||
kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
|
||||
{
|
||||
)" << init_in_pipe << R"(
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
output[gid].num_packets = in_pipe.num_packets();
|
||||
output[gid].max_packets = in_pipe.max_packets();
|
||||
|
||||
auto reservation = in_pipe.work_group_reserve(get_local_size(0));
|
||||
output[gid].read_reservation_is_valid = reservation.is_valid();
|
||||
|
||||
uint value;
|
||||
output[gid].read_success = reservation.read(get_local_size(0) - 1 - get_local_id(0), value);
|
||||
reservation.commit();
|
||||
output[gid].value = value;
|
||||
}
|
||||
)";
|
||||
}
|
||||
else if (options.operation == pipe_operation::sub_group_reservation)
|
||||
{
|
||||
s << R"(
|
||||
kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
|
||||
{
|
||||
)" << init_out_pipe << R"(
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
auto reservation = out_pipe.sub_group_reserve(get_sub_group_size());
|
||||
output[gid].write_reservation_is_valid = reservation.is_valid();
|
||||
|
||||
uint value = gid;
|
||||
output[gid].write_success = reservation.write(get_sub_group_local_id(), value);
|
||||
reservation.commit();
|
||||
}
|
||||
|
||||
kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
|
||||
{
|
||||
)" << init_in_pipe << R"(
|
||||
const ulong gid = get_global_id(0);
|
||||
|
||||
output[gid].num_packets = in_pipe.num_packets();
|
||||
output[gid].max_packets = in_pipe.max_packets();
|
||||
|
||||
auto reservation = in_pipe.sub_group_reserve(get_sub_group_size());
|
||||
output[gid].read_reservation_is_valid = reservation.is_valid();
|
||||
|
||||
uint value;
|
||||
output[gid].read_success = reservation.read(get_sub_group_size() - 1 - get_sub_group_local_id(), value);
|
||||
reservation.commit();
|
||||
output[gid].value = value;
|
||||
}
|
||||
)";
|
||||
}
|
||||
|
||||
return s.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
if (options.num_packets % 2 != 0 || options.max_packets < options.num_packets)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "Invalid test options")
|
||||
}
|
||||
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
if (options.operation == pipe_operation::sub_group_reservation && !is_extension_available(device, "cl_khr_subgroups"))
|
||||
{
|
||||
log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
cl_program program;
|
||||
cl_kernel producer_kernel;
|
||||
cl_kernel consumer_kernel;
|
||||
|
||||
std::string producer_kernel_name = "producer";
|
||||
std::string consumer_kernel_name = "consumer";
|
||||
std::string source = generate_source(options);
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &producer_kernel,
|
||||
source, producer_kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &producer_kernel,
|
||||
source, producer_kernel_name, "-cl-std=CL2.0", false
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
consumer_kernel = clCreateKernel(program, consumer_kernel_name.c_str(), &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateKernel")
|
||||
// Normal run
|
||||
#else
|
||||
error = create_opencl_kernel(
|
||||
context, &program, &producer_kernel,
|
||||
source, producer_kernel_name
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
consumer_kernel = clCreateKernel(program, consumer_kernel_name.c_str(), &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateKernel")
|
||||
#endif
|
||||
|
||||
size_t max_work_group_size;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
|
||||
|
||||
const size_t count = options.num_packets;
|
||||
const size_t local_size = (std::min)((size_t)256, max_work_group_size);
|
||||
const size_t global_size = count;
|
||||
|
||||
const cl_uint packet_size = sizeof(cl_uint);
|
||||
|
||||
cl_mem pipe = clCreatePipe(context, 0, packet_size, options.max_packets, NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreatePipe")
|
||||
|
||||
cl_mem output_buffer;
|
||||
output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * count, NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
const char pattern = 0;
|
||||
error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
|
||||
|
||||
error = clSetKernelArg(producer_kernel, 0, sizeof(cl_mem), &pipe);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(producer_kernel, 1, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, producer_kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
error = clSetKernelArg(consumer_kernel, 0, sizeof(cl_mem), &pipe);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(consumer_kernel, 1, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, consumer_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
std::vector<output_type> output(count);
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, output_buffer, CL_TRUE,
|
||||
0, sizeof(output_type) * count,
|
||||
static_cast<void *>(output.data()),
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
std::vector<bool> existing_values(count, false);
|
||||
for (size_t gid = 0; gid < count; gid++)
|
||||
{
|
||||
const output_type &o = output[gid];
|
||||
|
||||
if (!o.write_reservation_is_valid)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "write reservation is not valid")
|
||||
}
|
||||
if (!o.write_success)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "write did not succeed")
|
||||
}
|
||||
|
||||
if (o.num_packets == 0 || o.num_packets > options.num_packets)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "num_packets did not return correct value")
|
||||
}
|
||||
if (o.max_packets != options.max_packets)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "max_packets did not return correct value")
|
||||
}
|
||||
if (!o.read_reservation_is_valid)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "read reservation is not valid")
|
||||
}
|
||||
if (!o.read_success)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "read did not succeed")
|
||||
}
|
||||
|
||||
// Every value must be presented once in any order
|
||||
if (o.value >= count || existing_values[o.value])
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "kernel did not return correct value")
|
||||
}
|
||||
existing_values[o.value] = true;
|
||||
}
|
||||
|
||||
clReleaseMemObject(pipe);
|
||||
clReleaseMemObject(output_buffer);
|
||||
clReleaseKernel(producer_kernel);
|
||||
clReleaseKernel(consumer_kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
const pipe_operation pipe_operations[] = {
|
||||
pipe_operation::work_item,
|
||||
pipe_operation::work_item_reservation,
|
||||
pipe_operation::work_group_reservation,
|
||||
pipe_operation::sub_group_reservation
|
||||
};
|
||||
|
||||
const std::tuple<int, int> max_and_num_packets[] = {
|
||||
std::make_tuple<int, int>(2, 2),
|
||||
std::make_tuple<int, int>(10, 8),
|
||||
std::make_tuple<int, int>(256, 254),
|
||||
std::make_tuple<int, int>(1 << 16, 1 << 16),
|
||||
std::make_tuple<int, int>((1 << 16) + 5, 1 << 16),
|
||||
std::make_tuple<int, int>(12345, 12344),
|
||||
std::make_tuple<int, int>(1 << 18, 1 << 18)
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_pipes_pipe)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
std::vector<std::tuple<int, int>> ps;
|
||||
for (auto p : max_and_num_packets)
|
||||
{
|
||||
if (std::get<0>(p) < num_elements)
|
||||
ps.push_back(p);
|
||||
}
|
||||
ps.push_back(std::tuple<int, int>(num_elements, num_elements));
|
||||
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
for (auto operation : pipe_operations)
|
||||
for (auto p : ps)
|
||||
{
|
||||
test_options options;
|
||||
options.source = pipe_source::param;
|
||||
options.max_packets = std::get<0>(p);
|
||||
options.num_packets = std::get<1>(p);
|
||||
options.operation = operation;
|
||||
|
||||
error = test(device, context, queue, options);
|
||||
RETURN_ON_ERROR(error)
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_pipes_pipe_storage)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
std::vector<std::tuple<int, int>> ps;
|
||||
for (auto p : max_and_num_packets)
|
||||
{
|
||||
if (std::get<0>(p) < num_elements)
|
||||
ps.push_back(p);
|
||||
}
|
||||
ps.push_back(std::tuple<int, int>(num_elements, num_elements));
|
||||
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
for (auto operation : pipe_operations)
|
||||
for (auto p : ps)
|
||||
{
|
||||
test_options options;
|
||||
options.source = pipe_source::storage;
|
||||
options.max_packets = std::get<0>(p);
|
||||
options.num_packets = std::get<1>(p);
|
||||
options.operation = operation;
|
||||
|
||||
error = test(device, context, queue, options);
|
||||
RETURN_ON_ERROR(error)
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_PIPES_TEST_PIPES_HPP
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_PROGRAM_SCOPE_CTORS_DTORS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,283 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_COMMON_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_COMMON_HPP
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
#define RUN_PS_CTORS_DTORS_TEST_MACRO(TEST_CLASS) \
|
||||
last_error = run_ps_ctor_dtor_test( \
|
||||
device, context, queue, count, TEST_CLASS \
|
||||
); \
|
||||
CHECK_ERROR(last_error) \
|
||||
error |= last_error;
|
||||
|
||||
// Base class for all tests for kernels with program scope object with
|
||||
// non-trivial ctors and/or dtors
|
||||
struct ps_ctors_dtors_test_base : public detail::base_func_type<cl_uint>
|
||||
{
|
||||
// ctor is true, if and only if OpenCL program of this test contains program
|
||||
// scope variable with non-trivial ctor.
|
||||
// dtor is true, if and only if OpenCL program of this test contains program
|
||||
// scope variable with non-trivial dtor.
|
||||
ps_ctors_dtors_test_base(const bool ctor,
|
||||
const bool dtor)
|
||||
: m_ctor(ctor), m_dtor(dtor)
|
||||
{
|
||||
|
||||
}
|
||||
virtual ~ps_ctors_dtors_test_base() { };
|
||||
// Returns test name
|
||||
virtual std::string str() = 0;
|
||||
// Returns OpenCL program source
|
||||
virtual std::string generate_program() = 0;
|
||||
// Returns kernel names IN ORDER
|
||||
virtual std::vector<std::string> get_kernel_names()
|
||||
{
|
||||
// Typical case, that is, only one kernel
|
||||
return { this->get_kernel_name() };
|
||||
}
|
||||
// Returns value that is expected to be in output_buffer[i]
|
||||
virtual cl_uint operator()(size_t i) = 0;
|
||||
// Executes kernels
|
||||
// Typical case: execute every kernel once, every kernel has only
|
||||
// one argument, that is, output buffer
|
||||
virtual cl_int execute(const std::vector<cl_kernel>& kernels,
|
||||
cl_mem& output_buffer,
|
||||
cl_command_queue& queue,
|
||||
size_t work_size)
|
||||
{
|
||||
cl_int err;
|
||||
for(auto& k : kernels)
|
||||
{
|
||||
err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(
|
||||
queue, k, 1,
|
||||
NULL, &work_size, NULL,
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
}
|
||||
return err;
|
||||
}
|
||||
// This method check if queries for CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT
|
||||
// and CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT using clGetProgramInfo()
|
||||
// return correct values
|
||||
virtual cl_int ctors_dtors_present_queries(cl_program program)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return error;
|
||||
#else
|
||||
// CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT cl_bool
|
||||
// This indicates that the program object contains non-trivial constructor(s) that will be
|
||||
// executed by runtime before any kernel from the program is executed.
|
||||
|
||||
// CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT cl_bool
|
||||
// This indicates that the program object contains non-trivial destructor(s) that will be
|
||||
// executed by runtime when program is destroyed.
|
||||
|
||||
// CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT
|
||||
cl_bool ctors_present;
|
||||
size_t cl_bool_size;
|
||||
error = clGetProgramInfo(
|
||||
program,
|
||||
CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT,
|
||||
sizeof(cl_bool),
|
||||
static_cast<void*>(&ctors_present),
|
||||
&cl_bool_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
|
||||
if(cl_bool_size != sizeof(cl_bool))
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(
|
||||
error,
|
||||
"Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).\n",
|
||||
cl_bool_size,
|
||||
sizeof(cl_bool)
|
||||
);
|
||||
}
|
||||
|
||||
// CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT
|
||||
cl_bool dtors_present = 0;
|
||||
error = clGetProgramInfo(
|
||||
program,
|
||||
CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT,
|
||||
sizeof(cl_bool),
|
||||
static_cast<void*>(&ctors_present),
|
||||
&cl_bool_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
|
||||
if(cl_bool_size != sizeof(cl_bool))
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(
|
||||
error,
|
||||
"Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).\n",
|
||||
cl_bool_size,
|
||||
sizeof(cl_bool)
|
||||
);
|
||||
}
|
||||
|
||||
// check constructors
|
||||
if(m_ctor && ctors_present != CL_TRUE)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(
|
||||
error,
|
||||
"Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 0, should be: 1.\n"
|
||||
);
|
||||
}
|
||||
else if(!m_ctor && ctors_present == CL_TRUE)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(
|
||||
error,
|
||||
"Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 1, should be: 0.\n"
|
||||
);
|
||||
}
|
||||
|
||||
// check destructors
|
||||
if(m_dtor && dtors_present != CL_TRUE)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(
|
||||
error,
|
||||
"Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 0, should be: 1.\n"
|
||||
);
|
||||
}
|
||||
else if(!m_dtor && dtors_present == CL_TRUE)
|
||||
{
|
||||
error = -1;
|
||||
CHECK_ERROR_MSG(
|
||||
error,
|
||||
"Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 1, should be: 0.\n"
|
||||
);
|
||||
}
|
||||
return error;
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
bool m_ctor;
|
||||
bool m_dtor;
|
||||
};
|
||||
|
||||
template <class ps_ctor_dtor_test>
|
||||
int run_ps_ctor_dtor_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, ps_ctor_dtor_test op)
|
||||
{
|
||||
cl_mem buffers[1];
|
||||
cl_program program;
|
||||
std::vector<cl_kernel> kernels;
|
||||
size_t work_size[1];
|
||||
cl_int err;
|
||||
|
||||
std::string code_str = op.generate_program();
|
||||
std::vector<std::string> kernel_names = op.get_kernel_names();
|
||||
if(kernel_names.empty())
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "No kernel to run");
|
||||
}
|
||||
kernels.resize(kernel_names.size());
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false);
|
||||
RETURN_ON_ERROR(err)
|
||||
for(size_t i = 1; i < kernels.size(); i++)
|
||||
{
|
||||
kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateKernel");
|
||||
}
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
|
||||
RETURN_ON_ERROR(err)
|
||||
for(size_t i = 1; i < kernels.size(); i++)
|
||||
{
|
||||
kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateKernel");
|
||||
}
|
||||
#endif
|
||||
|
||||
work_size[0] = count;
|
||||
// host output vector
|
||||
std::vector<cl_uint> output = generate_output<cl_uint>(work_size[0], 9999);
|
||||
|
||||
// device output buffer
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer")
|
||||
|
||||
// Execute test
|
||||
err = op.execute(kernels, buffers[0], queue, work_size[0]);
|
||||
RETURN_ON_ERROR(err)
|
||||
|
||||
// Check if queries returns correct values
|
||||
err = op.ctors_dtors_present_queries(program);
|
||||
RETURN_ON_ERROR(err);
|
||||
|
||||
// Release kernels and program
|
||||
// Destructors should be called now
|
||||
for(auto& k : kernels)
|
||||
{
|
||||
err = clReleaseKernel(k);
|
||||
RETURN_ON_CL_ERROR(err, "clReleaseKernel");
|
||||
}
|
||||
err = clReleaseProgram(program);
|
||||
RETURN_ON_CL_ERROR(err, "clReleaseProgram");
|
||||
|
||||
// Finish
|
||||
err = clFinish(queue);
|
||||
RETURN_ON_CL_ERROR(err, "clFinish");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
// Check output values
|
||||
for(size_t i = 0; i < output.size(); i++)
|
||||
{
|
||||
cl_uint v = op(i);
|
||||
if(!(are_equal(v, output[i], detail::make_value<cl_uint>(0), op)))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name<cl_uint>().c_str(),
|
||||
format_value(v).c_str(), format_value(output[i]).c_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
log_info("test_%s(%s) passed\n", op.str().c_str(), type_name<cl_uint>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_COMMON_HPP
|
||||
@@ -1,24 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "test_ctors_dtors.hpp"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,324 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_TEST_CTORS_DTORS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_TEST_CTORS_DTORS_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// Test for program scope variable with non-trivial ctor
|
||||
struct ps_ctor_test : public ps_ctors_dtors_test_base
|
||||
{
|
||||
ps_ctor_test(const cl_uint test_value)
|
||||
: ps_ctors_dtors_test_base(true, false),
|
||||
m_test_value(test_value)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "ps_ctor_test";
|
||||
}
|
||||
|
||||
std::vector<std::string> get_kernel_names()
|
||||
{
|
||||
return {
|
||||
this->str() + "_set",
|
||||
this->str() + "_read"
|
||||
};
|
||||
}
|
||||
|
||||
// Returns value that is expected to be in output_buffer[i]
|
||||
cl_uint operator()(size_t i)
|
||||
{
|
||||
if(i % 2 == 0)
|
||||
return m_test_value;
|
||||
return cl_uint(0xbeefbeef);
|
||||
}
|
||||
|
||||
// In 1st kernel 0th work-tem sets member m_x of program scope variable global_var to
|
||||
// m_test_value and m_y to uint(0xbeefbeef),
|
||||
// In 2nd kernel:
|
||||
// 1) if global id is even, then work-item reads global_var.m_x and writes it to output[its-global-id];
|
||||
// 2) otherwise, work-item reads global_var.m_y and writes it to output[its-global-id].
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_names()[0] + "(global uint *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = 0xbeefbeef;\n"
|
||||
"}\n"
|
||||
"__kernel void " + this->get_kernel_names()[1] + "(global uint *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" if(gid % 2 == 0)\n"
|
||||
" output[gid] = " + std::to_string(m_test_value) + ";\n"
|
||||
"}\n";
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_array>\n"
|
||||
"using namespace cl;\n"
|
||||
// struct template
|
||||
"template<class T>\n"
|
||||
"struct ctor_test_class_base {\n"
|
||||
// non-trivial ctor
|
||||
" ctor_test_class_base(T x) { m_x = x;};\n"
|
||||
" T m_x;\n"
|
||||
"};\n"
|
||||
// struct template
|
||||
"template<class T>\n"
|
||||
"struct ctor_test_class : public ctor_test_class_base<T> {\n"
|
||||
// non-trivial ctor
|
||||
" ctor_test_class(T x, T y) : ctor_test_class_base<T>(x), m_y(y) { };\n"
|
||||
" T m_y;\n"
|
||||
"};\n"
|
||||
// global scope program variables
|
||||
"ctor_test_class<uint> global_var(uint(0), uint(0));\n"
|
||||
|
||||
"__kernel void " + this->get_kernel_names()[0] + "(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" if(gid == 0) {\n"
|
||||
" global_var.m_x = " + std::to_string(m_test_value) + ";\n"
|
||||
" global_var.m_y = 0xbeefbeef;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
|
||||
"__kernel void " + this->get_kernel_names()[1] + "(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" if(gid % 2 == 0)\n"
|
||||
" output[gid] = global_var.m_x;\n"
|
||||
" else\n"
|
||||
" output[gid] = global_var.m_y;\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
cl_uint m_test_value;
|
||||
};
|
||||
|
||||
// Test for program scope variable with non-trivial dtor
|
||||
struct ps_dtor_test : public ps_ctors_dtors_test_base
|
||||
{
|
||||
ps_dtor_test(const cl_uint test_value)
|
||||
: ps_ctors_dtors_test_base(false, true),
|
||||
m_test_value(test_value)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "ps_dtor_test";
|
||||
}
|
||||
|
||||
// Returns value that is expected to be in output_buffer[i]
|
||||
cl_uint operator()(size_t i)
|
||||
{
|
||||
if(i % 2 == 0)
|
||||
return m_test_value;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// In 1st kernel 0th work-item saves pointer to output buffer and its size in program scope
|
||||
// variable global_var, it also sets counter to 1;
|
||||
// After global_var is destroyed all even elements of output buffer should equal m_test_value,
|
||||
// and all odd should equal 1.
|
||||
// If odd elements of output buffer are >1 it means dtor was executed more than once.
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" if(gid % 2 == 0)\n"
|
||||
" output[gid] = " + std::to_string(m_test_value) + ";\n"
|
||||
" else\n"
|
||||
" output[gid] = 1;\n"
|
||||
"}\n";
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_array>\n"
|
||||
"using namespace cl;\n"
|
||||
// struct template
|
||||
"template<class T>\n"
|
||||
"struct dtor_test_class_base {\n"
|
||||
// non-trivial dtor
|
||||
// set all odd elements in buffer to counter
|
||||
" ~dtor_test_class_base() {\n"
|
||||
" for(size_t i = 1; i < this->size; i+=2)\n"
|
||||
" {\n"
|
||||
" this->buffer[i] = counter;\n"
|
||||
" }\n"
|
||||
" counter++;\n"
|
||||
" };\n"
|
||||
" global_ptr<uint[]> buffer;\n"
|
||||
" size_t size;\n"
|
||||
" T counter;\n"
|
||||
"};\n"
|
||||
// struct
|
||||
"struct dtor_test_class : public dtor_test_class_base<uint> {\n"
|
||||
// non-trivial dtor
|
||||
// set all values in buffer to m_test_value
|
||||
" ~dtor_test_class() {\n"
|
||||
" for(size_t i = 0; i < this->size; i+=2)\n"
|
||||
" this->buffer[i] = " + std::to_string(m_test_value) + ";\n"
|
||||
" };\n"
|
||||
"};\n"
|
||||
// global scope program variable
|
||||
"dtor_test_class global_var;\n"
|
||||
|
||||
// When global_var is being destroyed, first dtor ~dtor_test_class is called,
|
||||
// and then ~dtor_test_class_base is called.
|
||||
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
// set buffer and size in global var
|
||||
" if(gid == 0){\n"
|
||||
" global_var.buffer = output;\n"
|
||||
" global_var.size = get_global_size(0);\n"
|
||||
" global_var.counter = 1;\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
cl_uint m_test_value;
|
||||
};
|
||||
|
||||
// Test for program scope variable with both non-trivial ctor
|
||||
// and non-trivial dtor
|
||||
struct ps_ctor_dtor_test : public ps_ctors_dtors_test_base
|
||||
{
|
||||
ps_ctor_dtor_test(const cl_uint test_value)
|
||||
: ps_ctors_dtors_test_base(false, true),
|
||||
m_test_value(test_value)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "ps_ctor_dtor_test";
|
||||
}
|
||||
|
||||
// Returns value that is expected to be in output_buffer[i]
|
||||
cl_uint operator()(size_t i)
|
||||
{
|
||||
return m_test_value;
|
||||
}
|
||||
|
||||
// In 1st kernel 0th work-item saves pointer to output buffer and its size in program scope
|
||||
// variable global_var.
|
||||
// After global_var is destroyed all even elements of output buffer should equal m_test_value,
|
||||
// and all odd should equal 1.
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = " + std::to_string(m_test_value) + ";\n"
|
||||
"}\n";
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_array>\n"
|
||||
"using namespace cl;\n"
|
||||
// struct template
|
||||
"template<class T>\n"
|
||||
"struct ctor_test_class {\n"
|
||||
// non-trivial ctor
|
||||
" ctor_test_class(T value) : m_value(value) { };\n"
|
||||
" T m_value;\n"
|
||||
"};\n\n"
|
||||
// struct
|
||||
"struct ctor_dtor_test_class {\n"
|
||||
// non-trivial ctor
|
||||
" ctor_dtor_test_class(uint value) : ctor_test(value) { } \n"
|
||||
// non-trivial dtor
|
||||
// set all values in buffer to m_test_value
|
||||
" ~ctor_dtor_test_class() {\n"
|
||||
" for(size_t i = 0; i < this->size; i++)\n"
|
||||
" {\n"
|
||||
" this->buffer[i] = ctor_test.m_value;\n"
|
||||
" }\n"
|
||||
" };\n"
|
||||
" ctor_test_class<uint> ctor_test;\n"
|
||||
" global_ptr<uint[]> buffer;\n"
|
||||
" size_t size;\n"
|
||||
"};\n"
|
||||
// global scope program variable
|
||||
"ctor_dtor_test_class global_var(" + std::to_string(m_test_value) + ");\n"
|
||||
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
// set buffer and size in global var
|
||||
" if(gid == 0){\n"
|
||||
" global_var.buffer = output;\n"
|
||||
" global_var.size = get_global_size(0);\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
cl_uint m_test_value;
|
||||
};
|
||||
|
||||
// This contains tests for program scope (global) constructors and destructors, more
|
||||
// detailed tests are also in clcpp/api.
|
||||
AUTO_TEST_CASE(test_program_scope_ctors_dtors)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int count)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
RUN_PS_CTORS_DTORS_TEST_MACRO(ps_ctor_test(0xdeadbeefU))
|
||||
RUN_PS_CTORS_DTORS_TEST_MACRO(ps_dtor_test(0xbeefdeadU))
|
||||
RUN_PS_CTORS_DTORS_TEST_MACRO(ps_ctor_dtor_test(0xdeaddeadU))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_TEST_CTORS_DTORS_HPP
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_REINTERPRET)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,223 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_REINTERPRET_AS_TYPE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_REINTERPRET_AS_TYPE_HPP
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
|
||||
template<class IN1, class OUT1>
|
||||
struct as_type : public unary_func<IN1, OUT1>
|
||||
{
|
||||
static_assert(sizeof(IN1) == sizeof(OUT1), "It is an error to use the as_type<T> operator to reinterpret data to a type of a different number of bytes");
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "as_type";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_reinterpret>\n";
|
||||
}
|
||||
|
||||
OUT1 operator()(const IN1& x)
|
||||
{
|
||||
return *reinterpret_cast<const OUT1*>(&x);
|
||||
}
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
template <class func_type, class in_type, class out_type>
|
||||
std::string generate_kernel_as_type(func_type func)
|
||||
{
|
||||
std::string in1_value = "input[gid]";
|
||||
std::string function_call = "as_" + type_name<out_type>() + "(" + in1_value + ");";
|
||||
return
|
||||
"__kernel void test_" + func.str() + "(global " + type_name<in_type>() + " *input, global " + type_name<out_type>() + " *output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = " + function_call + ";\n"
|
||||
"}\n";
|
||||
}
|
||||
#else
|
||||
template <class func_type, class in_type, class out_type>
|
||||
std::string generate_kernel_as_type(func_type func)
|
||||
{
|
||||
std::string headers = func.headers();
|
||||
std::string in1_value = "input[gid]";
|
||||
std::string function_call = "as_type<" + type_name<out_type>() + ">(" + in1_value + ")";
|
||||
return
|
||||
"" + func.defs() +
|
||||
"" + headers +
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_" + func.str() + "(global_ptr<" + type_name<in_type>() + "[]> input,"
|
||||
"global_ptr<" + type_name<out_type>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = " + function_call + ";\n"
|
||||
"}\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
template<class INPUT, class OUTPUT, class as_type_op>
|
||||
bool verify_as_type(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, as_type_op op)
|
||||
{
|
||||
// When the operand and result type contain a different number of elements, the result is implementation-defined,
|
||||
// i.e. any result is correct
|
||||
if (vector_size<INPUT>::value == vector_size<OUTPUT>::value)
|
||||
{
|
||||
for (size_t i = 0; i < in.size(); i++)
|
||||
{
|
||||
auto expected = op(in[i]);
|
||||
if (std::memcmp(&expected, &out[i], sizeof(expected)) != 0)
|
||||
{
|
||||
print_error_msg(expected, out[i], i, op);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class as_type_op>
|
||||
int test_as_type_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, as_type_op op)
|
||||
{
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t work_size[1];
|
||||
int error;
|
||||
|
||||
typedef typename as_type_op::in_type INPUT;
|
||||
typedef typename as_type_op::out_type OUTPUT;
|
||||
|
||||
// Don't run test for unsupported types
|
||||
if (!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
std::string code_str = generate_kernel_as_type<as_type_op, INPUT, OUTPUT>(op);
|
||||
std::string kernel_name("test_"); kernel_name += op.str();
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
|
||||
RETURN_ON_ERROR(error)
|
||||
return error;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
|
||||
RETURN_ON_ERROR(error)
|
||||
#else
|
||||
error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
|
||||
RETURN_ON_ERROR(error)
|
||||
#endif
|
||||
|
||||
std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
|
||||
std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(INPUT) * input.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(OUTPUT) * output.size(), NULL, &error);
|
||||
RETURN_ON_CL_ERROR(error, "clCreateBuffer")
|
||||
|
||||
error = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
error = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
work_size[0] = count;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
|
||||
|
||||
error = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
|
||||
|
||||
if (!verify_as_type(input, output, op))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
|
||||
}
|
||||
log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return error;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_as_type)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
#define TEST_AS_TYPE_MACRO(TYPE1, TYPE2) \
|
||||
last_error = test_as_type_func( \
|
||||
device, context, queue, n_elems, as_type<TYPE1, TYPE2>() \
|
||||
); \
|
||||
CHECK_ERROR(last_error) \
|
||||
error |= last_error;
|
||||
|
||||
TEST_AS_TYPE_MACRO(cl_int, cl_int)
|
||||
TEST_AS_TYPE_MACRO(cl_uint, cl_int)
|
||||
TEST_AS_TYPE_MACRO(cl_int, cl_ushort2)
|
||||
TEST_AS_TYPE_MACRO(cl_uchar, cl_uchar)
|
||||
TEST_AS_TYPE_MACRO(cl_char4, cl_ushort2)
|
||||
TEST_AS_TYPE_MACRO(cl_uchar16, cl_char16)
|
||||
TEST_AS_TYPE_MACRO(cl_short8, cl_uchar16)
|
||||
TEST_AS_TYPE_MACRO(cl_float4, cl_uint4)
|
||||
TEST_AS_TYPE_MACRO(cl_float16, cl_int16)
|
||||
TEST_AS_TYPE_MACRO(cl_long2, cl_float4)
|
||||
TEST_AS_TYPE_MACRO(cl_ulong, cl_long)
|
||||
TEST_AS_TYPE_MACRO(cl_ulong16, cl_double16)
|
||||
TEST_AS_TYPE_MACRO(cl_uchar16, cl_double2)
|
||||
TEST_AS_TYPE_MACRO(cl_ulong4, cl_short16)
|
||||
|
||||
#undef TEST_AS_TYPE_MACRO
|
||||
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_REINTERPRET_AS_TYPE_HPP
|
||||
@@ -1,25 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "as_type.hpp"
|
||||
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_RELATIONAL_FUNCS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,112 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
#include <type_traits>
|
||||
#include <cmath>
|
||||
|
||||
template<class IN1, class IN2, class IN3, class OUT1, class F>
|
||||
OUT1 perform_function(const IN1& in1, const IN2& in2, const IN3& in3, F func, typename std::enable_if<is_vector_type<OUT1>::value>::type* = 0)
|
||||
{
|
||||
OUT1 result;
|
||||
for(size_t i = 0; i < vector_size<OUT1>::value; i++)
|
||||
{
|
||||
result.s[i] = func(in1.s[i], in2.s[i], in3.s[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<class IN1, class IN2, class IN3, class OUT1, class F>
|
||||
OUT1 perform_function(const IN1& in1, const IN2& in2, const IN3& in3, F func, typename std::enable_if<!is_vector_type<OUT1>::value>::type* = 0)
|
||||
{
|
||||
OUT1 result = func(in1, in2, in3);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
template<class IN1, class IN2, class OUT1, class F>
|
||||
OUT1 perform_function(const IN1& in1, const IN2& in2, F func, typename std::enable_if<is_vector_type<OUT1>::value>::type* = 0)
|
||||
{
|
||||
OUT1 result;
|
||||
for(size_t i = 0; i < vector_size<OUT1>::value; i++)
|
||||
{
|
||||
result.s[i] = func(in1.s[i], in2.s[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<class IN1, class IN2, class OUT1, class F>
|
||||
OUT1 perform_function(const IN1& in1, const IN2& in2, F func, typename std::enable_if<!is_vector_type<OUT1>::value>::type* = 0)
|
||||
{
|
||||
OUT1 result = func(in1, in2);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<class IN1, class OUT1, class F>
|
||||
OUT1 perform_function(const IN1& in1, F func, typename std::enable_if<is_vector_type<OUT1>::value>::type* = 0)
|
||||
{
|
||||
OUT1 result;
|
||||
for(size_t i = 0; i < vector_size<OUT1>::value; i++)
|
||||
{
|
||||
result.s[i] = func(in1.s[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<class IN1, class OUT1, class F>
|
||||
OUT1 perform_function(const IN1& in1, F func, typename std::enable_if<!is_vector_type<OUT1>::value>::type* = 0)
|
||||
{
|
||||
OUT1 result = func(in1);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<class IN1>
|
||||
cl_int perform_all_function(const IN1& in1, typename std::enable_if<is_vector_type<IN1>::value>::type* = 0)
|
||||
{
|
||||
cl_int result = 1;
|
||||
for(size_t i = 0; i < vector_size<IN1>::value; i++)
|
||||
{
|
||||
result = (in1.s[i] != 0) ? result : cl_int(0);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
cl_int perform_all_function(const cl_int& in1, typename std::enable_if<!is_vector_type<cl_int>::value>::type* = 0)
|
||||
{
|
||||
return (in1 != 0) ? cl_int(1) : cl_int(0);
|
||||
}
|
||||
|
||||
template<class IN1>
|
||||
cl_int perform_any_function(const IN1& in1, typename std::enable_if<is_vector_type<IN1>::value>::type* = 0)
|
||||
{
|
||||
cl_int result = 0;
|
||||
for(size_t i = 0; i < vector_size<IN1>::value; i++)
|
||||
{
|
||||
result = (in1.s[i] != 0) ? cl_int(1) : result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
cl_int perform_any_function(const cl_int& in1, typename std::enable_if<!is_vector_type<cl_int>::value>::type* = 0)
|
||||
{
|
||||
return (in1 != 0) ? cl_int(1) : cl_int(0);
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
|
||||
@@ -1,150 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMPARISON_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMPARISON_FUNCS_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// This marco creates a class wrapper for comparision function we want to test.
|
||||
#define DEF_COMPARISION_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION) \
|
||||
template <cl_int N /* Vector size */> \
|
||||
struct CLASS_NAME : public binary_func< \
|
||||
typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
|
||||
typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
|
||||
typename make_vector_type<cl_int, N>::type /* create cl_intN type */ \
|
||||
> \
|
||||
{ \
|
||||
typedef typename make_vector_type<cl_float, N>::type input_type; \
|
||||
typedef typename make_vector_type<cl_int, N>::type result_type; \
|
||||
\
|
||||
std::string str() \
|
||||
{ \
|
||||
return #FUNC_NAME; \
|
||||
} \
|
||||
\
|
||||
std::string headers() \
|
||||
{ \
|
||||
return "#include <opencl_relational>\n"; \
|
||||
} \
|
||||
\
|
||||
result_type operator()(const input_type& x, const input_type& y) \
|
||||
{ \
|
||||
typedef typename scalar_type<input_type>::type SCALAR; \
|
||||
return perform_function<input_type, input_type, result_type>( \
|
||||
x, y, \
|
||||
[](const SCALAR& a, const SCALAR& b) \
|
||||
{ \
|
||||
if(HOST_FUNC_EXPRESSION) \
|
||||
{ \
|
||||
return cl_int(1); \
|
||||
} \
|
||||
return cl_int(0); \
|
||||
} \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
bool is_out_bool() \
|
||||
{ \
|
||||
return true; \
|
||||
} \
|
||||
\
|
||||
input_type min1() \
|
||||
{ \
|
||||
return detail::def_limit<input_type>(-10000.0f); \
|
||||
} \
|
||||
\
|
||||
input_type max1() \
|
||||
{ \
|
||||
return detail::def_limit<input_type>(10000.0f); \
|
||||
} \
|
||||
\
|
||||
input_type min2() \
|
||||
{ \
|
||||
return detail::def_limit<input_type>(-10000.0f); \
|
||||
} \
|
||||
\
|
||||
input_type max2() \
|
||||
{ \
|
||||
return detail::def_limit<input_type>(10000.0f); \
|
||||
} \
|
||||
\
|
||||
std::vector<input_type> in1_special_cases() \
|
||||
{ \
|
||||
typedef typename scalar_type<input_type>::type SCALAR; \
|
||||
return { \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
|
||||
detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
|
||||
detail::make_value<input_type>(0.0f), \
|
||||
detail::make_value<input_type>(-0.0f) \
|
||||
}; \
|
||||
} \
|
||||
\
|
||||
std::vector<input_type> in2_special_cases() \
|
||||
{ \
|
||||
typedef typename scalar_type<input_type>::type SCALAR; \
|
||||
return { \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
|
||||
detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
|
||||
detail::make_value<input_type>(0.0f), \
|
||||
detail::make_value<input_type>(-0.0f) \
|
||||
}; \
|
||||
} \
|
||||
};
|
||||
|
||||
DEF_COMPARISION_FUNC(comparison_func_isequal, isequal, (a == b))
|
||||
DEF_COMPARISION_FUNC(comparison_func_isnotequal, isnotequal, !(a == b))
|
||||
DEF_COMPARISION_FUNC(comparison_func_isgreater, isgreater, (std::isgreater)(a, b))
|
||||
DEF_COMPARISION_FUNC(comparison_func_isgreaterequal, isgreaterequal, ((std::isgreater)(a, b) || a == b))
|
||||
DEF_COMPARISION_FUNC(comparison_func_isless, isless, (std::isless)(a, b))
|
||||
DEF_COMPARISION_FUNC(comparison_func_islessequal, islessequal, ((std::isless)(a, b) || a == b))
|
||||
DEF_COMPARISION_FUNC(comparison_func_islessgreater, islessgreater, ((a < b) || (a > b)))
|
||||
|
||||
#undef DEF_COMPARISION_FUNC
|
||||
|
||||
AUTO_TEST_CASE(test_relational_comparison_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// Helper macro, so we don't have to repreat the same code.
|
||||
#define TEST_BINARY_REL_FUNC_MACRO(CLASS_NAME) \
|
||||
TEST_BINARY_FUNC_MACRO(CLASS_NAME<1>()) \
|
||||
TEST_BINARY_FUNC_MACRO(CLASS_NAME<2>()) \
|
||||
TEST_BINARY_FUNC_MACRO(CLASS_NAME<4>()) \
|
||||
TEST_BINARY_FUNC_MACRO(CLASS_NAME<8>()) \
|
||||
TEST_BINARY_FUNC_MACRO(CLASS_NAME<16>())
|
||||
|
||||
TEST_BINARY_REL_FUNC_MACRO(comparison_func_isequal)
|
||||
TEST_BINARY_REL_FUNC_MACRO(comparison_func_isnotequal)
|
||||
TEST_BINARY_REL_FUNC_MACRO(comparison_func_isgreater)
|
||||
TEST_BINARY_REL_FUNC_MACRO(comparison_func_isgreaterequal)
|
||||
TEST_BINARY_REL_FUNC_MACRO(comparison_func_isless)
|
||||
TEST_BINARY_REL_FUNC_MACRO(comparison_func_islessequal)
|
||||
TEST_BINARY_REL_FUNC_MACRO(comparison_func_islessgreater)
|
||||
|
||||
#undef TEST_BINARY_REL_FUNC_MACRO
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMPARISON_FUNCS_HPP
|
||||
@@ -1,26 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "comparison_funcs.hpp"
|
||||
#include "select_funcs.hpp"
|
||||
#include "test_funcs.hpp"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,158 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_SELECT_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_SELECT_FUNCS_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
template <class IN1, cl_int N /* Vector size */>
|
||||
struct select_func_select : public ternary_func<
|
||||
typename make_vector_type<IN1, N>::type, /* create IN1N type */
|
||||
typename make_vector_type<IN1, N>::type, /* create IN1N type */
|
||||
typename make_vector_type<cl_int, N>::type, /* create cl_intN type */
|
||||
typename make_vector_type<IN1, N>::type /* create IN1N type */
|
||||
>
|
||||
{
|
||||
typedef typename make_vector_type<IN1, N>::type input1_type;
|
||||
typedef typename make_vector_type<IN1, N>::type input2_type;
|
||||
typedef typename make_vector_type<cl_int, N>::type input3_type;
|
||||
typedef typename make_vector_type<IN1, N>::type result_type;
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "select";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_relational>\n";
|
||||
}
|
||||
|
||||
result_type operator()(const input1_type& x, const input2_type& y, const input3_type& z)
|
||||
{
|
||||
typedef typename scalar_type<input1_type>::type SCALAR1;
|
||||
typedef typename scalar_type<input2_type>::type SCALAR2;
|
||||
typedef typename scalar_type<input3_type>::type SCALAR3;
|
||||
|
||||
return perform_function<input1_type, input2_type, input3_type, result_type>(
|
||||
x, y, z,
|
||||
[](const SCALAR1& a, const SCALAR2& b, const SCALAR3& c)
|
||||
{
|
||||
return (c != 0) ? b : a;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
bool is_in3_bool()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<input3_type> in3_special_cases()
|
||||
{
|
||||
return {
|
||||
detail::make_value<input3_type>(0),
|
||||
detail::make_value<input3_type>(1),
|
||||
detail::make_value<input3_type>(12),
|
||||
detail::make_value<input3_type>(-12)
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
template <class IN1, cl_int N /* Vector size */>
|
||||
struct select_func_bitselect : public ternary_func<
|
||||
typename make_vector_type<IN1, N>::type, /* create IN1N type */
|
||||
typename make_vector_type<IN1, N>::type, /* create IN1N type */
|
||||
typename make_vector_type<IN1, N>::type, /* create cl_intN type */
|
||||
typename make_vector_type<IN1, N>::type /* create IN1N type */
|
||||
>
|
||||
{
|
||||
typedef typename make_vector_type<IN1, N>::type input1_type;
|
||||
typedef typename make_vector_type<IN1, N>::type input2_type;
|
||||
typedef typename make_vector_type<IN1, N>::type input3_type;
|
||||
typedef typename make_vector_type<IN1, N>::type result_type;
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "bitselect";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_relational>\n";
|
||||
}
|
||||
|
||||
result_type operator()(const input1_type& x, const input2_type& y, const input3_type& z)
|
||||
{
|
||||
static_assert(
|
||||
std::is_integral<IN1>::value,
|
||||
"bitselect test is implemented only for integers."
|
||||
);
|
||||
static_assert(
|
||||
std::is_unsigned<IN1>::value,
|
||||
"IN1 type should be unsigned, bitwise operations on signed int may cause problems."
|
||||
);
|
||||
typedef typename scalar_type<input1_type>::type SCALAR1;
|
||||
typedef typename scalar_type<input2_type>::type SCALAR2;
|
||||
typedef typename scalar_type<input3_type>::type SCALAR3;
|
||||
|
||||
return perform_function<input1_type, input2_type, input3_type, result_type>(
|
||||
x, y, z,
|
||||
[](const SCALAR1& a, const SCALAR2& b, const SCALAR3& c)
|
||||
{
|
||||
return (~c & a) | (c & b);
|
||||
}
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_relational_select_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// Tests for select(gentype a, gentype b, booln c) are not run in USE_OPENCLC_KERNELS
|
||||
// mode, because this functions in OpenCL C requires different reference functions on host
|
||||
// compared to their equivalent in OpenCL C++.
|
||||
// (In OpenCL C the result of select(), when gentype is vector type, is based on the most
|
||||
// significant bits of c components)
|
||||
#ifndef USE_OPENCLC_KERNELS
|
||||
// gentype select(gentype a, gentype b, booln c)
|
||||
TEST_TERNARY_FUNC_MACRO((select_func_select<cl_uint, 1>()))
|
||||
TEST_TERNARY_FUNC_MACRO((select_func_select<cl_float, 2>()))
|
||||
TEST_TERNARY_FUNC_MACRO((select_func_select<cl_short, 4>()))
|
||||
TEST_TERNARY_FUNC_MACRO((select_func_select<cl_uint, 8>()))
|
||||
TEST_TERNARY_FUNC_MACRO((select_func_select<cl_uint, 16>()))
|
||||
#else
|
||||
log_info("WARNING:\n\tTests for select(gentype a, gentype b, booln c) are not run in USE_OPENCLC_KERNELS mode\n");
|
||||
#endif
|
||||
|
||||
// gentype bitselect(gentype a, gentype b, gentype c)
|
||||
TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_uint, 1>()))
|
||||
TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_ushort, 2>()))
|
||||
TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_uchar, 4>()))
|
||||
TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_ushort, 8>()))
|
||||
TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_uint, 16>()))
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_SELECT_FUNCS_HPP
|
||||
@@ -1,336 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_TEST_FUNCS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_TEST_FUNCS_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// This marco creates a class wrapper for unary test function we want to test.
|
||||
#define DEF_UNARY_TEST_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION) \
|
||||
template <cl_int N /* Vector size */> \
|
||||
struct CLASS_NAME : public unary_func< \
|
||||
typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
|
||||
typename make_vector_type<cl_int, N>::type /* create cl_intN type */ \
|
||||
> \
|
||||
{ \
|
||||
typedef typename make_vector_type<cl_float, N>::type input_type; \
|
||||
typedef typename make_vector_type<cl_int, N>::type result_type; \
|
||||
\
|
||||
std::string str() \
|
||||
{ \
|
||||
return #FUNC_NAME; \
|
||||
} \
|
||||
\
|
||||
std::string headers() \
|
||||
{ \
|
||||
return "#include <opencl_relational>\n"; \
|
||||
} \
|
||||
\
|
||||
result_type operator()(const input_type& x) \
|
||||
{ \
|
||||
typedef typename scalar_type<input_type>::type SCALAR; \
|
||||
return perform_function<input_type, result_type>( \
|
||||
x, \
|
||||
[](const SCALAR& a) \
|
||||
{ \
|
||||
if(HOST_FUNC_EXPRESSION) \
|
||||
{ \
|
||||
return cl_int(1); \
|
||||
} \
|
||||
return cl_int(0); \
|
||||
} \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
bool is_out_bool() \
|
||||
{ \
|
||||
return true; \
|
||||
} \
|
||||
\
|
||||
input_type min1() \
|
||||
{ \
|
||||
return detail::def_limit<input_type>(-10000.0f); \
|
||||
} \
|
||||
\
|
||||
input_type max1() \
|
||||
{ \
|
||||
return detail::def_limit<input_type>(10000.0f); \
|
||||
} \
|
||||
\
|
||||
std::vector<input_type> in1_special_cases() \
|
||||
{ \
|
||||
typedef typename scalar_type<input_type>::type SCALAR; \
|
||||
return { \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
|
||||
detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::signaling_NaN()), \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::denorm_min()), \
|
||||
detail::make_value<input_type>(0.0f), \
|
||||
detail::make_value<input_type>(-0.0f) \
|
||||
}; \
|
||||
} \
|
||||
};
|
||||
|
||||
// This marco creates a class wrapper for binary test function we want to test.
|
||||
#define DEF_BINARY_TEST_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION) \
|
||||
template <cl_int N /* Vector size */> \
|
||||
struct CLASS_NAME : public binary_func< \
|
||||
typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
|
||||
typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
|
||||
typename make_vector_type<cl_int, N>::type /* create cl_intN type */ \
|
||||
> \
|
||||
{ \
|
||||
typedef typename make_vector_type<cl_float, N>::type input_type; \
|
||||
typedef typename make_vector_type<cl_int, N>::type result_type; \
|
||||
\
|
||||
std::string str() \
|
||||
{ \
|
||||
return #FUNC_NAME; \
|
||||
} \
|
||||
\
|
||||
std::string headers() \
|
||||
{ \
|
||||
return "#include <opencl_relational>\n"; \
|
||||
} \
|
||||
\
|
||||
result_type operator()(const input_type& x, const input_type& y) \
|
||||
{ \
|
||||
typedef typename scalar_type<input_type>::type SCALAR; \
|
||||
return perform_function<input_type, input_type, result_type>( \
|
||||
x, y, \
|
||||
[](const SCALAR& a, const SCALAR& b) \
|
||||
{ \
|
||||
if(HOST_FUNC_EXPRESSION) \
|
||||
{ \
|
||||
return cl_int(1); \
|
||||
} \
|
||||
return cl_int(0); \
|
||||
} \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
bool is_out_bool() \
|
||||
{ \
|
||||
return true; \
|
||||
} \
|
||||
\
|
||||
input_type min1() \
|
||||
{ \
|
||||
return detail::def_limit<input_type>(-10000.0f); \
|
||||
} \
|
||||
\
|
||||
input_type max1() \
|
||||
{ \
|
||||
return detail::def_limit<input_type>(10000.0f); \
|
||||
} \
|
||||
\
|
||||
input_type min2() \
|
||||
{ \
|
||||
return detail::def_limit<input_type>(-10000.0f); \
|
||||
} \
|
||||
\
|
||||
input_type max2() \
|
||||
{ \
|
||||
return detail::def_limit<input_type>(10000.0f); \
|
||||
} \
|
||||
\
|
||||
std::vector<input_type> in1_special_cases() \
|
||||
{ \
|
||||
typedef typename scalar_type<input_type>::type SCALAR; \
|
||||
return { \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
|
||||
detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::signaling_NaN()), \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::denorm_min()), \
|
||||
detail::make_value<input_type>(0.0f), \
|
||||
detail::make_value<input_type>(-0.0f) \
|
||||
}; \
|
||||
} \
|
||||
\
|
||||
std::vector<input_type> in2_special_cases() \
|
||||
{ \
|
||||
typedef typename scalar_type<input_type>::type SCALAR; \
|
||||
return { \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
|
||||
detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::signaling_NaN()), \
|
||||
detail::make_value<input_type>(std::numeric_limits<SCALAR>::denorm_min()), \
|
||||
detail::make_value<input_type>(0.0f), \
|
||||
detail::make_value<input_type>(-0.0f) \
|
||||
}; \
|
||||
} \
|
||||
};
|
||||
|
||||
DEF_UNARY_TEST_FUNC(test_func_isfinite, isfinite, (std::isfinite)(a))
|
||||
DEF_UNARY_TEST_FUNC(test_func_isinf, isinf, (std::isinf)(a))
|
||||
DEF_UNARY_TEST_FUNC(test_func_isnan, isnan, (std::isnan)(a))
|
||||
DEF_UNARY_TEST_FUNC(test_func_isnormal, isnormal, (std::isnormal)(a))
|
||||
DEF_UNARY_TEST_FUNC(test_func_signbit, signbit , (std::signbit)(a))
|
||||
|
||||
DEF_BINARY_TEST_FUNC(test_func_isordered, isordered, !(std::isunordered)(a, b))
|
||||
DEF_BINARY_TEST_FUNC(test_func_isunordered, isunordered, (std::isunordered)(a, b))
|
||||
|
||||
#undef DEF_UNARY_TEST_FUNC
|
||||
#undef DEF_BINARY_TEST_FUNC
|
||||
|
||||
template <cl_int N /* Vector size */>
|
||||
struct test_func_all : public unary_func<
|
||||
typename make_vector_type<cl_int, N>::type, /* create cl_intN type */
|
||||
cl_int /* create cl_intN type */
|
||||
>
|
||||
{
|
||||
typedef typename make_vector_type<cl_int, N>::type input_type;
|
||||
typedef cl_int result_type;
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "all";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_relational>\n";
|
||||
}
|
||||
|
||||
result_type operator()(const input_type& x)
|
||||
{
|
||||
return perform_all_function(x);
|
||||
}
|
||||
|
||||
bool is_out_bool()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_in1_bool()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<input_type> in1_special_cases()
|
||||
{
|
||||
return {
|
||||
detail::make_value<input_type>(0),
|
||||
detail::make_value<input_type>(1),
|
||||
detail::make_value<input_type>(12),
|
||||
detail::make_value<input_type>(-12)
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
template <cl_int N /* Vector size */>
|
||||
struct test_func_any : public unary_func<
|
||||
typename make_vector_type<cl_int, N>::type, /* create cl_intN type */
|
||||
cl_int /* create cl_intN type */
|
||||
>
|
||||
{
|
||||
typedef typename make_vector_type<cl_int, N>::type input_type;
|
||||
typedef cl_int result_type;
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "any";
|
||||
}
|
||||
|
||||
std::string headers()
|
||||
{
|
||||
return "#include <opencl_relational>\n";
|
||||
}
|
||||
|
||||
result_type operator()(const input_type& x)
|
||||
{
|
||||
return perform_any_function(x);
|
||||
}
|
||||
|
||||
bool is_out_bool()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_in1_bool()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<input_type> in1_special_cases()
|
||||
{
|
||||
return {
|
||||
detail::make_value<input_type>(0),
|
||||
detail::make_value<input_type>(1),
|
||||
detail::make_value<input_type>(12),
|
||||
detail::make_value<input_type>(-12)
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_relational_test_funcs)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// Helper macro, so we don't have to repreat the same code.
|
||||
#define TEST_UNARY_REL_FUNC_MACRO(CLASS_NAME) \
|
||||
TEST_UNARY_FUNC_MACRO(CLASS_NAME<1>()) \
|
||||
TEST_UNARY_FUNC_MACRO(CLASS_NAME<2>()) \
|
||||
TEST_UNARY_FUNC_MACRO(CLASS_NAME<4>()) \
|
||||
TEST_UNARY_FUNC_MACRO(CLASS_NAME<8>()) \
|
||||
TEST_UNARY_FUNC_MACRO(CLASS_NAME<16>())
|
||||
|
||||
TEST_UNARY_REL_FUNC_MACRO(test_func_isfinite)
|
||||
TEST_UNARY_REL_FUNC_MACRO(test_func_isinf)
|
||||
TEST_UNARY_REL_FUNC_MACRO(test_func_isnan)
|
||||
TEST_UNARY_REL_FUNC_MACRO(test_func_isnormal)
|
||||
TEST_UNARY_REL_FUNC_MACRO(test_func_signbit)
|
||||
|
||||
// Tests for all(booln x) and any(booln x) are not run in USE_OPENCLC_KERNELS mode,
|
||||
// because those functions in OpenCL C require different reference functions on host
|
||||
// compared to their equivalents from OpenCL C++.
|
||||
// (In OpenCL C those functions returns true/false based on the most significant bits
|
||||
// in any/all component/s of x)
|
||||
#ifndef USE_OPENCLC_KERNELS
|
||||
TEST_UNARY_REL_FUNC_MACRO(test_func_all)
|
||||
TEST_UNARY_REL_FUNC_MACRO(test_func_any)
|
||||
#else
|
||||
log_info("WARNING:\n\tTests for bool all(booln x) are not run in USE_OPENCLC_KERNELS mode\n");
|
||||
log_info("WARNING:\n\tTests for bool any(booln x) are not run in USE_OPENCLC_KERNELS mode\n");
|
||||
#endif
|
||||
|
||||
#undef TEST_UNARY_REL_FUNC_MACRO
|
||||
|
||||
#define TEST_BINARY_REL_FUNC_MACRO(CLASS_NAME) \
|
||||
TEST_BINARY_FUNC_MACRO(CLASS_NAME<1>()) \
|
||||
TEST_BINARY_FUNC_MACRO(CLASS_NAME<2>()) \
|
||||
TEST_BINARY_FUNC_MACRO(CLASS_NAME<4>()) \
|
||||
TEST_BINARY_FUNC_MACRO(CLASS_NAME<8>()) \
|
||||
TEST_BINARY_FUNC_MACRO(CLASS_NAME<16>())
|
||||
|
||||
TEST_BINARY_REL_FUNC_MACRO(test_func_isordered)
|
||||
TEST_BINARY_REL_FUNC_MACRO(test_func_isunordered)
|
||||
|
||||
#undef TEST_BINARY_REL_FUNC_MACRO
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_TEST_FUNCS_HPP
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_SPEC_CONSTANTS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,256 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_COMMON_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_COMMON_HPP
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../funcs_test_utils.hpp"
|
||||
|
||||
#define RUN_SPEC_CONSTANTS_TEST_MACRO(TEST_CLASS) \
|
||||
last_error = run_spec_constants_test( \
|
||||
device, context, queue, n_elems, TEST_CLASS \
|
||||
); \
|
||||
CHECK_ERROR(last_error) \
|
||||
error |= last_error;
|
||||
|
||||
// Base class for all tests of cl::spec_contatnt
|
||||
template <class T>
|
||||
struct spec_constants_test : public detail::base_func_type<T>
|
||||
{
|
||||
// Output buffer type
|
||||
typedef T type;
|
||||
|
||||
virtual ~spec_constants_test() {};
|
||||
// Returns test name
|
||||
virtual std::string str() = 0;
|
||||
// Returns OpenCL program source
|
||||
virtual std::string generate_program() = 0;
|
||||
|
||||
// Return names of test's kernels, in order.
|
||||
// Typical case: one kernel.
|
||||
virtual std::vector<std::string> get_kernel_names()
|
||||
{
|
||||
// Typical case, that is, only one kernel
|
||||
return { this->get_kernel_name() };
|
||||
}
|
||||
|
||||
// If local size has to be set in clEnqueueNDRangeKernel()
|
||||
// this should return true; otherwise - false;
|
||||
virtual bool set_local_size()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Calculates maximal work-group size (one dim)
|
||||
virtual size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
|
||||
cl_device_id device,
|
||||
size_t work_group_size, // default work-group size
|
||||
cl_int& error)
|
||||
{
|
||||
size_t wg_size = work_group_size;
|
||||
for(auto& k : kernels)
|
||||
{
|
||||
size_t max_wg_size;
|
||||
error = clGetKernelWorkGroupInfo(
|
||||
k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
|
||||
wg_size = (std::min)(wg_size, max_wg_size);
|
||||
}
|
||||
return wg_size;
|
||||
}
|
||||
|
||||
// Sets spec constants
|
||||
// Typical case: no spec constants to set
|
||||
virtual cl_int set_spec_constants(const cl_program& program)
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
// This covers typical case:
|
||||
// 1. each kernel is executed once,
|
||||
// 2. the only argument in every kernel is output_buffer
|
||||
virtual cl_int execute(const std::vector<cl_kernel>& kernels,
|
||||
cl_mem& output_buffer,
|
||||
cl_command_queue& queue,
|
||||
size_t work_size,
|
||||
size_t work_group_size)
|
||||
{
|
||||
cl_int err;
|
||||
for(auto& k : kernels)
|
||||
{
|
||||
err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(
|
||||
queue, k, 1,
|
||||
NULL, &work_size, this->set_local_size() ? &work_group_size : NULL,
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
// This is a function which performs additional queries and checks
|
||||
// if the results are correct. This method is run after checking that
|
||||
// test results (output values) are correct.
|
||||
virtual cl_int check_queries(const std::vector<cl_kernel>& kernels,
|
||||
cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue)
|
||||
{
|
||||
(void) kernels;
|
||||
(void) device;
|
||||
(void) context;
|
||||
(void) queue;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
template <class spec_constants_test>
|
||||
int run_spec_constants_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, spec_constants_test op)
|
||||
{
|
||||
cl_mem buffers[1];
|
||||
cl_program program;
|
||||
std::vector<cl_kernel> kernels;
|
||||
size_t wg_size;
|
||||
size_t work_size[1];
|
||||
cl_int err;
|
||||
|
||||
typedef typename spec_constants_test::type TYPE;
|
||||
|
||||
// Don't run test for unsupported types
|
||||
if(!(type_supported<TYPE>(device)))
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
std::string code_str = op.generate_program();
|
||||
std::vector<std::string> kernel_names = op.get_kernel_names();
|
||||
if(kernel_names.empty())
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "No kernel to run");
|
||||
}
|
||||
kernels.resize(kernel_names.size());
|
||||
|
||||
std::string options = "";
|
||||
if(is_extension_available(device, "cl_khr_fp16"))
|
||||
{
|
||||
options += " -cl-fp16-enable";
|
||||
}
|
||||
if(is_extension_available(device, "cl_khr_fp64"))
|
||||
{
|
||||
options += " -cl-fp64-enable";
|
||||
}
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], options);
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false);
|
||||
RETURN_ON_ERROR(err)
|
||||
for(size_t i = 1; i < kernels.size(); i++)
|
||||
{
|
||||
kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateKernel");
|
||||
}
|
||||
#else
|
||||
const char * code_c_str = code_str.c_str();
|
||||
err = create_openclcpp_program(context, &program, 1, &(code_c_str), options.c_str());
|
||||
RETURN_ON_ERROR_MSG(err, "Creating OpenCL C++ program failed")
|
||||
|
||||
// Set spec constants
|
||||
err = op.set_spec_constants(program);
|
||||
RETURN_ON_ERROR_MSG(err, "Setting Spec Constants failed")
|
||||
|
||||
// Build program and create 1st kernel
|
||||
err = build_program_create_kernel_helper(
|
||||
context, &program, &(kernels[0]), 1, &(code_c_str), kernel_names[0].c_str()
|
||||
);
|
||||
RETURN_ON_ERROR_MSG(err, "Unable to build program or to create kernel")
|
||||
// Create other kernels
|
||||
for(size_t i = 1; i < kernels.size(); i++)
|
||||
{
|
||||
kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateKernel");
|
||||
}
|
||||
#endif
|
||||
|
||||
// Find the max possible wg size for among all the kernels
|
||||
wg_size = op.get_max_local_size(kernels, device, 1024, err);
|
||||
RETURN_ON_ERROR(err);
|
||||
|
||||
work_size[0] = count;
|
||||
if(op.set_local_size())
|
||||
{
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
work_size[0] = wg_number * wg_size;
|
||||
}
|
||||
|
||||
// host output vector
|
||||
std::vector<TYPE> output = generate_output<TYPE>(work_size[0], 9999);
|
||||
|
||||
// device output buffer
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(TYPE) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
// Execute test
|
||||
err = op.execute(kernels, buffers[0], queue, work_size[0], wg_size);
|
||||
RETURN_ON_ERROR(err)
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
// Check output values
|
||||
for(size_t i = 0; i < output.size(); i++)
|
||||
{
|
||||
TYPE v = op(i, wg_size);
|
||||
if(!(are_equal(v, output[i], detail::make_value<TYPE>(0), op)))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name<cl_uint>().c_str(),
|
||||
format_value(v).c_str(), format_value(output[i]).c_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if queries returns correct values
|
||||
err = op.check_queries(kernels, device, context, queue);
|
||||
RETURN_ON_ERROR(err);
|
||||
|
||||
log_info("test_%s(%s) passed\n", op.str().c_str(), type_name<TYPE>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
for(auto& k : kernels)
|
||||
clReleaseKernel(k);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_COMMON_HPP
|
||||
@@ -1,26 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "test_spec_consts_attributes.hpp"
|
||||
#include "test_spec_consts_if.hpp"
|
||||
#include "test_spec_consts_init_vars.hpp"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,281 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_ATTRIBUTES_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_ATTRIBUTES_HPP
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// In this test we check if specialization constant can be successfully used
|
||||
// in kernel attribute cl::required_work_group_size(X, Y, Z).
|
||||
struct spec_const_required_work_group_size_test : public spec_constants_test<cl_uint>
|
||||
{
|
||||
// See generate_program() to know what set_spec_constant is for.
|
||||
spec_const_required_work_group_size_test(const bool set_spec_constant,
|
||||
const cl_uint work_group_size_0)
|
||||
: m_set_spec_constant(set_spec_constant),
|
||||
m_work_group_size_0(work_group_size_0)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
if(m_set_spec_constant)
|
||||
return "spec_const_in_required_work_group_size_" + std::to_string(m_work_group_size_0);
|
||||
else
|
||||
return "spec_const_in_required_work_group_size_not_set";
|
||||
}
|
||||
|
||||
bool set_local_size()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
|
||||
cl_device_id device,
|
||||
size_t work_group_size, // default work-group size
|
||||
cl_int& error)
|
||||
{
|
||||
if(m_set_spec_constant)
|
||||
{
|
||||
return m_work_group_size_0;
|
||||
}
|
||||
return size_t(1);
|
||||
}
|
||||
|
||||
cl_uint operator()(size_t i, size_t work_group_size)
|
||||
{
|
||||
(void) work_group_size;
|
||||
if(m_set_spec_constant)
|
||||
{
|
||||
return m_work_group_size_0;
|
||||
}
|
||||
return cl_uint(1);
|
||||
}
|
||||
|
||||
// Check if query for CL_KERNEL_COMPILE_WORK_GROUP_SIZE using clGetKernelWorkGroupInfo
|
||||
// return correct values. It should return the work-group size specified by the
|
||||
// cl::required_work_group_size(X, Y, Z) qualifier.
|
||||
cl_int check_queries(const std::vector<cl_kernel>& kernels,
|
||||
cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue)
|
||||
{
|
||||
(void) device;
|
||||
(void) context;
|
||||
size_t compile_wg_size[] = { 1, 1, 1 };
|
||||
cl_int error = clGetKernelWorkGroupInfo(
|
||||
kernels[0], device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
|
||||
sizeof(compile_wg_size), compile_wg_size, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
|
||||
if(m_set_spec_constant)
|
||||
{
|
||||
if(compile_wg_size[0] != m_work_group_size_0
|
||||
|| compile_wg_size[1] != 1
|
||||
|| compile_wg_size[2] != 1)
|
||||
{
|
||||
error = -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(compile_wg_size[0] != 1
|
||||
|| compile_wg_size[1] != 1
|
||||
|| compile_wg_size[2] != 1)
|
||||
{
|
||||
error = -1;
|
||||
}
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// Sets spec constant
|
||||
cl_int set_spec_constants(const cl_program& program)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
if(m_set_spec_constant)
|
||||
{
|
||||
error = clSetProgramSpecializationConstant(
|
||||
program, cl_uint(1), sizeof(cl_uint), static_cast<void*>(&m_work_group_size_0)
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// Each work-item writes get_local_size(0) to output[work-item-global-id]
|
||||
std::string generate_program(bool with_attribute)
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
std::string att = " ";
|
||||
if(with_attribute)
|
||||
{
|
||||
std::string work_group_size_0 = "1";
|
||||
if(m_set_spec_constant)
|
||||
{
|
||||
work_group_size_0 = std::to_string(m_work_group_size_0);
|
||||
}
|
||||
att = "\n__attribute__((reqd_work_group_size(" + work_group_size_0 + ",1,1)))\n";
|
||||
}
|
||||
return
|
||||
"__kernel" + att + "void " + this->get_kernel_name() + "(global uint *output)\n"
|
||||
"{\n"
|
||||
" uint gid = get_global_id(0);\n"
|
||||
" output[gid] = get_local_size(0);\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
std::string att = "";
|
||||
if(with_attribute)
|
||||
{
|
||||
att = "[[cl::required_work_group_size(spec1, 1, 1)]]\n";
|
||||
}
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_spec_constant>\n"
|
||||
"using namespace cl;\n"
|
||||
"spec_constant<uint, 1> spec1{1};\n"
|
||||
+ att +
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" uint gid = get_global_id(0);\n"
|
||||
" output[gid] = get_local_size(0);\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
// Each work-item writes get_local_size(0) to output[work-item-global-id]
|
||||
std::string generate_program()
|
||||
{
|
||||
return generate_program(true);
|
||||
}
|
||||
|
||||
private:
|
||||
bool m_set_spec_constant;
|
||||
cl_uint m_work_group_size_0;
|
||||
};
|
||||
|
||||
// This function return max work-group size that can be used
|
||||
// for kernels defined in source
|
||||
size_t get_max_wg_size(const std::string& source,
|
||||
const std::vector<std::string>& kernel_names,
|
||||
size_t work_group_size, // max wg size we want to have
|
||||
cl_device_id device,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
cl_int& err)
|
||||
{
|
||||
cl_program program;
|
||||
std::vector<cl_kernel> kernels;
|
||||
if(kernel_names.empty())
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "No kernel to run");
|
||||
}
|
||||
kernels.resize(kernel_names.size());
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
err = create_opencl_kernel(context, &program, &(kernels[0]), source, kernel_names[0], "-cl-std=CL2.0", false);
|
||||
RETURN_ON_ERROR(err)
|
||||
for(size_t i = 1; i < kernels.size(); i++)
|
||||
{
|
||||
kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateKernel");
|
||||
}
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &(kernels[0]), source, kernel_names[0]);
|
||||
RETURN_ON_ERROR(err)
|
||||
for(size_t i = 1; i < kernels.size(); i++)
|
||||
{
|
||||
kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateKernel");
|
||||
}
|
||||
#endif
|
||||
size_t wg_size = work_group_size;
|
||||
for(auto& k : kernels)
|
||||
{
|
||||
size_t max_wg_size;
|
||||
err = clGetKernelWorkGroupInfo(
|
||||
k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
wg_size = (std::min)(wg_size, max_wg_size);
|
||||
}
|
||||
return wg_size;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_spec_constants_in_kernel_attributes)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// If ONLY_SPIRV_COMPILATION is defined we can't check the max work-group size for the
|
||||
// kernel because OpenCL kernel object is never created in that mode.
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
const size_t max_wg_size = 16;
|
||||
#else
|
||||
// Get max work-group size that can be used in [[cl::required_work_group_size(X, 1, 1)]]
|
||||
// We do this by building kernel without this attribute and checking what is the max
|
||||
// work-group size we can use with it.
|
||||
auto test = spec_const_required_work_group_size_test(true, 1);
|
||||
const size_t max_wg_size = get_max_wg_size(
|
||||
test.generate_program(false), test.get_kernel_names(),
|
||||
1024, // max wg size we want to test
|
||||
device, context, queue,
|
||||
error
|
||||
);
|
||||
RETURN_ON_ERROR_MSG(error, "Can't get max work-group size");
|
||||
#endif
|
||||
|
||||
// Run tests when specialization constant spec1 is set (kernel
|
||||
// attribute is [[cl::required_work_group_size(spec1, 1, 1)]]).
|
||||
for(size_t i = 1; i <= max_wg_size; i *=2)
|
||||
{
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(
|
||||
spec_const_required_work_group_size_test(
|
||||
true, i
|
||||
)
|
||||
);
|
||||
}
|
||||
// This test does not set spec constant
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(
|
||||
spec_const_required_work_group_size_test(
|
||||
false, 9999999 // This value is incorrect, but won't be set and kernel
|
||||
// attribute should be [[cl::required_work_group_size(1, 1, 1)]]
|
||||
)
|
||||
);
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_ATTRIBUTES_HPP
|
||||
@@ -1,161 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_IF_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_IF_HPP
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// This class tests using specialization constant in if statement
|
||||
template <class T /* spec constant type*/>
|
||||
struct spec_const_in_if_test : public spec_constants_test<cl_uint>
|
||||
{
|
||||
// See generate_program() to know what set_spec_constant is for.
|
||||
spec_const_in_if_test(const bool set_spec_constant)
|
||||
: m_set_spec_constant(set_spec_constant)
|
||||
{
|
||||
static_assert(
|
||||
is_vector_type<T>::value == false,
|
||||
"Specialization constant can be only scalar int or float type"
|
||||
);
|
||||
switch (sizeof(T))
|
||||
{
|
||||
case 1:
|
||||
m_test_value = T(127);
|
||||
break;
|
||||
case 2:
|
||||
m_test_value = T(0xdeadU);
|
||||
break;
|
||||
// 4 and 8
|
||||
default:
|
||||
m_test_value = T(0xdeaddeadU);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "spec_const_in_if_" + type_name<T>();
|
||||
}
|
||||
|
||||
cl_uint operator()(size_t i, size_t work_group_size)
|
||||
{
|
||||
(void) work_group_size;
|
||||
if(m_set_spec_constant)
|
||||
{
|
||||
return m_test_value;
|
||||
}
|
||||
return static_cast<cl_uint>(i);
|
||||
}
|
||||
|
||||
// Sets spec constant
|
||||
cl_int set_spec_constants(const cl_program& program)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
if(m_set_spec_constant)
|
||||
{
|
||||
T spec1 = static_cast<T>(m_test_value);
|
||||
error = clSetProgramSpecializationConstant(
|
||||
program, cl_uint(1), sizeof(T), static_cast<void*>(&spec1)
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// IF set_spec_constant == true:
|
||||
// each work-item writes T(m_test_value) to output[work-item-global-id]
|
||||
// Otherwise:
|
||||
// each work-item writes T(get_global_id(0)) to output[work-item-global-id]
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
std::string result = "gid";
|
||||
if(m_set_spec_constant)
|
||||
result = std::to_string(m_test_value);
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
|
||||
"{\n"
|
||||
" uint gid = get_global_id(0);\n"
|
||||
" output[gid] = " + result + ";\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_spec_constant>\n"
|
||||
"using namespace cl;\n"
|
||||
"typedef " + type_name<T>() + " TYPE;\n"
|
||||
"spec_constant<TYPE, 1> spec1{TYPE(0)};\n"
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" uint gid = get_global_id(0);\n"
|
||||
" if(get(spec1) == TYPE(" + std::to_string(m_test_value) +"))\n"
|
||||
" {\n"
|
||||
" output[gid] = " + std::to_string(m_test_value) +";\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" output[gid] = gid;\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
bool m_set_spec_constant;
|
||||
cl_uint m_test_value;
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_spec_constants_in_if_statement)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
const std::vector<bool> set_spec_const_options { true, false };
|
||||
for(auto option : set_spec_const_options)
|
||||
{
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_char>(option));
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_uchar>(option));
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_int>(option));
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_uint>(option));
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_long>(option));
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_ulong>(option));
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_float>(option));
|
||||
if(is_extension_available(device, "cl_khr_fp16"))
|
||||
{
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_half>(option));
|
||||
}
|
||||
if(is_extension_available(device, "cl_khr_fp64"))
|
||||
{
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_double>(option));
|
||||
}
|
||||
}
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_IF_HPP
|
||||
@@ -1,174 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_INIT_VARS_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_INIT_VARS_HPP
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
// This class tests initializing variables with a specialization constant value.
|
||||
template <class T /* spec constant type*/>
|
||||
struct spec_const_init_var : public spec_constants_test<cl_uint>
|
||||
{
|
||||
// See generate_program() to know what set_spec_constant is for.
|
||||
spec_const_init_var(const bool set_spec_constant)
|
||||
: m_set_spec_constant(set_spec_constant)
|
||||
{
|
||||
static_assert(
|
||||
is_vector_type<T>::value == false,
|
||||
"Specialization constant can be only scalar int or float type"
|
||||
);
|
||||
switch (sizeof(T))
|
||||
{
|
||||
case 1:
|
||||
m_test_value = T(127);
|
||||
break;
|
||||
case 2:
|
||||
m_test_value = T(0xdeadU);
|
||||
break;
|
||||
// 4 and 8
|
||||
default:
|
||||
m_test_value = T(0xdeaddeadU);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return "spec_const_init_var_" + type_name<T>();
|
||||
}
|
||||
|
||||
cl_uint operator()(size_t i, size_t work_group_size)
|
||||
{
|
||||
(void) work_group_size;
|
||||
if(m_set_spec_constant)
|
||||
{
|
||||
return m_test_value;
|
||||
}
|
||||
return static_cast<cl_uint>(i);
|
||||
}
|
||||
|
||||
// Sets spec constant
|
||||
cl_int set_spec_constants(const cl_program& program)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
if(m_set_spec_constant)
|
||||
{
|
||||
T spec = static_cast<T>(m_test_value);
|
||||
// spec1
|
||||
error = clSetProgramSpecializationConstant(
|
||||
program, cl_uint(1), sizeof(T), static_cast<void*>(&spec)
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
|
||||
// spec2
|
||||
error = clSetProgramSpecializationConstant(
|
||||
program, cl_uint(2), sizeof(T), static_cast<void*>(&spec)
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// IF set_spec_constant == true:
|
||||
// each work-item writes T(m_test_value) to output[work-item-global-id]
|
||||
// Otherwise:
|
||||
// each work-item writes T(get_global_id(0)) to output[work-item-global-id]
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
std::string result = "gid";
|
||||
if(m_set_spec_constant)
|
||||
result = std::to_string(m_test_value);
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
|
||||
"{\n"
|
||||
" uint gid = get_global_id(0);\n"
|
||||
" output[gid] = " + result + ";\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_spec_constant>\n"
|
||||
"using namespace cl;\n"
|
||||
"typedef " + type_name<T>() + " TYPE;\n"
|
||||
"spec_constant<TYPE, 1> spec1{TYPE(0)};\n"
|
||||
"spec_constant<TYPE, 2> spec2{TYPE(0)};\n"
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" uint gid = get_global_id(0);\n"
|
||||
" TYPE var1(spec1.get());\n"
|
||||
" TYPE var2(spec2);\n"
|
||||
" TYPE var3; var3 = spec2;\n"
|
||||
" if((var1 == TYPE(" + std::to_string(m_test_value) +")) "
|
||||
"&& (var2 == TYPE(" + std::to_string(m_test_value) +"))\n"
|
||||
"&& (var3 == TYPE(" + std::to_string(m_test_value) +")))\n"
|
||||
" {\n"
|
||||
" output[gid] = " + std::to_string(m_test_value) +";\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" output[gid] = gid;\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
bool m_set_spec_constant;
|
||||
cl_uint m_test_value;
|
||||
};
|
||||
|
||||
AUTO_TEST_CASE(test_spec_constants_initializing_variables)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
const std::vector<bool> set_spec_const_options { true, false };
|
||||
for(auto option : set_spec_const_options)
|
||||
{
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_char>(option));
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_uchar>(option));
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_int>(option));
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_uint>(option));
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_long>(option));
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_ulong>(option));
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_float>(option));
|
||||
if(is_extension_available(device, "cl_khr_fp16"))
|
||||
{
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_half>(option));
|
||||
}
|
||||
if(is_extension_available(device, "cl_khr_fp64"))
|
||||
{
|
||||
RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_double>(option));
|
||||
}
|
||||
}
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_INIT_VARS_HPP
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fe4f34d616ed7ef70e870c22078f60655f68b0c5191c8d8b9d045dd0e7390bc2
|
||||
size 5529152
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_SUBGROUPS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,97 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
|
||||
enum class work_group_op : int {
|
||||
add, min, max
|
||||
};
|
||||
|
||||
std::string to_string(work_group_op op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case work_group_op::add:
|
||||
return "add";
|
||||
case work_group_op::min:
|
||||
return "min";
|
||||
case work_group_op::max:
|
||||
return "max";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
std::vector<CL_INT_TYPE> generate_input(size_t count, size_t wg_size)
|
||||
{
|
||||
std::vector<CL_INT_TYPE> input(count, CL_INT_TYPE(1));
|
||||
switch (op)
|
||||
{
|
||||
case work_group_op::add:
|
||||
return input;
|
||||
case work_group_op::min:
|
||||
{
|
||||
size_t j = wg_size;
|
||||
for(size_t i = 0; i < count; i++)
|
||||
{
|
||||
input[i] = static_cast<CL_INT_TYPE>(j);
|
||||
j--;
|
||||
if(j == 0)
|
||||
{
|
||||
j = wg_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case work_group_op::max:
|
||||
{
|
||||
size_t j = 0;
|
||||
for(size_t i = 0; i < count; i++)
|
||||
{
|
||||
input[i] = static_cast<CL_INT_TYPE>(j);
|
||||
j++;
|
||||
if(j == wg_size)
|
||||
{
|
||||
j = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
std::vector<CL_INT_TYPE> generate_output(size_t count, size_t wg_size)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case work_group_op::add:
|
||||
return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
|
||||
case work_group_op::min:
|
||||
return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::max)());
|
||||
case work_group_op::max:
|
||||
return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::min)());
|
||||
}
|
||||
return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP
|
||||
@@ -1,29 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "test_sg_all.hpp"
|
||||
#include "test_sg_any.hpp"
|
||||
#include "test_sg_broadcast.hpp"
|
||||
#include "test_sg_reduce.hpp"
|
||||
#include "test_sg_scan_inclusive.hpp"
|
||||
#include "test_sg_scan_exclusive.hpp"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,221 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
// Common for tests of sub-group functions
|
||||
#include "common.hpp"
|
||||
|
||||
std::string generate_sg_all_kernel_code()
|
||||
{
|
||||
return "#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_work_group>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sg_all(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong tid = get_global_id(0);\n"
|
||||
" bool result = sub_group_all(input[tid] < input[tid+1]);\n"
|
||||
" if(!result) {\n output[tid] = 0;\n return;\n }\n"
|
||||
" output[tid] = 1;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
int verify_sg_all(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < count; i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
|
||||
{
|
||||
// sub-group all
|
||||
bool all = true;
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if(!(in[i+j+k] < in[i+j+k+1]))
|
||||
{
|
||||
all = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert bool to uint
|
||||
cl_uint all_uint = all ? 1 : 0;
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (all_uint != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_all %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(all_uint),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> generate_input_sg_all(size_t count, size_t wg_size)
|
||||
{
|
||||
std::vector<cl_uint> input(count, cl_uint(0));
|
||||
size_t j = wg_size;
|
||||
for(size_t i = 0; i < count; i++)
|
||||
{
|
||||
input[i] = static_cast<cl_uint>(i);
|
||||
// In one place in ~half of work-groups (input[tid] < input[tid+1]) will
|
||||
// generate false, it means that for sub_group_all(input[tid] < input[tid+1])
|
||||
// should return false for all sub-groups in that work-groups
|
||||
if((j == wg_size/2) && (i > count/2))
|
||||
{
|
||||
input[i] = input[i - 1];
|
||||
}
|
||||
j--;
|
||||
if(j == 0)
|
||||
{
|
||||
j = wg_size;
|
||||
}
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> generate_output_sg_all(size_t count, size_t wg_size)
|
||||
{
|
||||
(void) wg_size;
|
||||
return std::vector<cl_uint>(count, cl_uint(1));
|
||||
}
|
||||
|
||||
int sub_group_all(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
|
||||
{
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t wg_size;
|
||||
size_t sg_max_size;
|
||||
size_t work_size[1];
|
||||
int err;
|
||||
|
||||
std::string code_str = generate_sg_all_kernel_code();
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_all");
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
|
||||
return CL_SUCCESS;
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_all");
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
|
||||
size_t param_value_size = 0;
|
||||
err = clGetKernelSubGroupInfo(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t), static_cast<void*>(&wg_size),
|
||||
sizeof(size_t), static_cast<void*>(&sg_max_size),
|
||||
¶m_value_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
|
||||
|
||||
// Verify size of returned param
|
||||
if(param_value_size != sizeof(size_t))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
|
||||
sizeof(size_t),
|
||||
param_value_size
|
||||
)
|
||||
}
|
||||
|
||||
// Calculate global work size
|
||||
size_t flat_work_size;
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
flat_work_size = wg_number * wg_size;
|
||||
work_size[0] = flat_work_size;
|
||||
|
||||
std::vector<cl_uint> input = generate_input_sg_all(flat_work_size + 1, wg_size);
|
||||
std::vector<cl_uint> output = generate_output_sg_all(flat_work_size, wg_size);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * input.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
if (verify_sg_all(input, output, flat_work_size, wg_size, sg_max_size) != CL_SUCCESS)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "sub_group_all failed");
|
||||
}
|
||||
log_info("sub_group_all passed\n");
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_all)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int err = CL_SUCCESS;
|
||||
err = sub_group_all(device, context, queue, n_elems);
|
||||
CHECK_ERROR(err)
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP
|
||||
@@ -1,221 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
// Common for tests of sub-group functions
|
||||
#include "common.hpp"
|
||||
|
||||
std::string generate_sg_any_kernel_code()
|
||||
{
|
||||
return "#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_work_group>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sg_any(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong tid = get_global_id(0);\n"
|
||||
" bool result = sub_group_any(input[tid] == input[tid+1]);\n"
|
||||
" if(!result) {\n output[tid] = 0;\n return;\n }\n"
|
||||
" output[tid] = 1;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
int verify_sg_any(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < count; i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
|
||||
{
|
||||
// sub-group any
|
||||
bool any = false;
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if(in[i+j+k] == in[i+j+k+1])
|
||||
{
|
||||
any = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert bool to uint
|
||||
cl_uint any_uint = any ? 1 : 0;
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (any_uint != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(any_uint),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> generate_input_sg_any(size_t count, size_t wg_size)
|
||||
{
|
||||
std::vector<cl_uint> input(count, cl_uint(0));
|
||||
size_t j = wg_size;
|
||||
for(size_t i = 0; i < count; i++)
|
||||
{
|
||||
input[i] = static_cast<cl_uint>(i);
|
||||
// In one place in ~half of work-groups (input[tid] == input[tid+1]) will
|
||||
// generate true, it means that for sub_group_all(input[tid] == input[tid+1])
|
||||
// should return false for one sub-group in that work-groups
|
||||
if((j == wg_size/2) && (i > count/2))
|
||||
{
|
||||
input[i] = input[i - 1];
|
||||
}
|
||||
j--;
|
||||
if(j == 0)
|
||||
{
|
||||
j = wg_size;
|
||||
}
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> generate_output_sg_any(size_t count, size_t wg_size)
|
||||
{
|
||||
(void) wg_size;
|
||||
return std::vector<cl_uint>(count, cl_uint(1));
|
||||
}
|
||||
|
||||
int sub_group_any(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
|
||||
{
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t wg_size;
|
||||
size_t sg_max_size;
|
||||
size_t work_size[1];
|
||||
int err;
|
||||
|
||||
std::string code_str = generate_sg_any_kernel_code();
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_any");
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
|
||||
return CL_SUCCESS;
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_any");
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
|
||||
size_t param_value_size = 0;
|
||||
err = clGetKernelSubGroupInfo(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t), static_cast<void*>(&wg_size),
|
||||
sizeof(size_t), static_cast<void*>(&sg_max_size),
|
||||
¶m_value_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
|
||||
|
||||
// Verify size of returned param
|
||||
if(param_value_size != sizeof(size_t))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
|
||||
sizeof(size_t),
|
||||
param_value_size
|
||||
)
|
||||
}
|
||||
|
||||
// Calculate global work size
|
||||
size_t flat_work_size;
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
flat_work_size = wg_number * wg_size;
|
||||
work_size[0] = flat_work_size;
|
||||
|
||||
std::vector<cl_uint> input = generate_input_sg_any(flat_work_size + 1, wg_size);
|
||||
std::vector<cl_uint> output = generate_output_sg_any(flat_work_size, wg_size);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * input.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
if (verify_sg_any(input, output, flat_work_size, wg_size, sg_max_size) != CL_SUCCESS)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "sub_group_any failed");
|
||||
}
|
||||
log_info("sub_group_any passed\n");
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_any)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int err = CL_SUCCESS;
|
||||
err = sub_group_any(device, context, queue, n_elems);
|
||||
CHECK_ERROR(err)
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP
|
||||
@@ -1,206 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
// Common for tests of sub-group functions
|
||||
#include "common.hpp"
|
||||
|
||||
std::string generate_sg_broadcast_kernel_code()
|
||||
{
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_work_group>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong tid = get_global_id(0);\n"
|
||||
" uint result = sub_group_broadcast(input[tid], 0);\n"
|
||||
" output[tid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
int
|
||||
verify_sg_broadcast(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < count; i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
|
||||
{
|
||||
// sub-group broadcast
|
||||
cl_uint broadcast_result = in[i+j];
|
||||
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (broadcast_result != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(broadcast_result),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> generate_input_sg_broadcast(size_t count, size_t wg_size)
|
||||
{
|
||||
std::vector<cl_uint> input(count, cl_uint(0));
|
||||
size_t j = wg_size;
|
||||
for(size_t i = 0; i < count; i++)
|
||||
{
|
||||
input[i] = static_cast<cl_uint>(j);
|
||||
j--;
|
||||
if(j == 0)
|
||||
{
|
||||
j = wg_size;
|
||||
}
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> generate_output_sg_broadcast(size_t count, size_t wg_size)
|
||||
{
|
||||
(void) wg_size;
|
||||
return std::vector<cl_uint>(count, cl_uint(1));
|
||||
}
|
||||
|
||||
int sub_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
|
||||
{
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t wg_size;
|
||||
size_t sg_max_size;
|
||||
size_t work_size[] = { 1 };
|
||||
int err;
|
||||
|
||||
// Get kernel source code
|
||||
std::string code_str = generate_sg_broadcast_kernel_code();
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast");
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
|
||||
return CL_SUCCESS;
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast");
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
// Get max flat workgroup size
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
|
||||
size_t param_value_size = 0;
|
||||
err = clGetKernelSubGroupInfo(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t), static_cast<void*>(&wg_size),
|
||||
sizeof(size_t), static_cast<void*>(&sg_max_size),
|
||||
¶m_value_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
|
||||
|
||||
// Verify size of returned param
|
||||
if(param_value_size != sizeof(size_t))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
|
||||
sizeof(size_t),
|
||||
param_value_size
|
||||
)
|
||||
}
|
||||
|
||||
// Calculate global work size
|
||||
size_t flat_work_size = count;
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
flat_work_size = wg_number * wg_size;
|
||||
work_size[0] = flat_work_size;
|
||||
|
||||
std::vector<cl_uint> input = generate_input_sg_broadcast(flat_work_size, wg_size);
|
||||
std::vector<cl_uint> output = generate_output_sg_broadcast(flat_work_size, wg_size);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * input.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
int result = verify_sg_broadcast( input, output, work_size[0], wg_size, sg_max_size);
|
||||
RETURN_ON_ERROR_MSG(result, "sub_group_broadcast failed")
|
||||
log_info("sub_group_broadcast passed\n");
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_broadcast)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int err = CL_SUCCESS;
|
||||
err = sub_group_broadcast(device, context, queue, n_elems);
|
||||
CHECK_ERROR(err)
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
|
||||
@@ -1,348 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
// Common for tests of sub-group functions
|
||||
#include "common.hpp"
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
std::string generate_sg_reduce_kernel_code()
|
||||
{
|
||||
return "#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_work_group>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sg_reduce(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
|
||||
"global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" ulong tid = get_global_id(0);\n"
|
||||
" " + type_name<CL_INT_TYPE>() + " result = sub_group_reduce<work_group_op::" + to_string(op) + ">(input[tid]);\n"
|
||||
" output[tid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_reduce_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE sum = 0;
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
sum += in[i + j + k];
|
||||
}
|
||||
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (sum != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_reduce_add %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(sum),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_reduce_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
|
||||
}
|
||||
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (min != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_reduce_min %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(min),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_reduce_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
|
||||
}
|
||||
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (max != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_reduce_max %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(max),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
int verify_sg_reduce(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case work_group_op::add:
|
||||
return verify_sg_reduce_add(in, out, wg_size, sg_size);
|
||||
case work_group_op::min:
|
||||
return verify_sg_reduce_min(in, out, wg_size, sg_size);
|
||||
case work_group_op::max:
|
||||
return verify_sg_reduce_max(in, out, wg_size, sg_size);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
int sub_group_reduce(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
|
||||
{
|
||||
// don't run test for unsupported types
|
||||
if(!type_supported<CL_INT_TYPE>(device))
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t wg_size;
|
||||
size_t sg_max_size;
|
||||
size_t work_size[1];
|
||||
int err;
|
||||
|
||||
std::string code_str = generate_sg_reduce_kernel_code<CL_INT_TYPE, op>();
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_reduce");
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
|
||||
return CL_SUCCESS;
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_reduce");
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
|
||||
size_t param_value_size = 0;
|
||||
err = clGetKernelSubGroupInfo(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t), static_cast<void*>(&wg_size),
|
||||
sizeof(size_t), static_cast<void*>(&sg_max_size),
|
||||
¶m_value_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
|
||||
|
||||
// Verify size of returned param
|
||||
if(param_value_size != sizeof(size_t))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
|
||||
sizeof(size_t),
|
||||
param_value_size
|
||||
)
|
||||
}
|
||||
|
||||
// Calculate global work size
|
||||
size_t flat_work_size;
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
flat_work_size = wg_number * wg_size;
|
||||
work_size[0] = flat_work_size;
|
||||
|
||||
std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
|
||||
std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
buffers[1] =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
if (verify_sg_reduce<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "sub_group_reduce_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
|
||||
}
|
||||
log_info("sub_group_reduce_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_reduce_add)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_reduce<cl_int, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_uint, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_long, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_reduce_min)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_reduce<cl_int, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_uint, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_long, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_reduce_max)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_reduce<cl_int, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_uint, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_long, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP
|
||||
@@ -1,328 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
// Common for tests of sub-group functions
|
||||
#include "common.hpp"
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
std::string generate_sg_scan_exclusive_kernel_code()
|
||||
{
|
||||
return "#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_work_group>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sg_scan_exclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
|
||||
"global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" ulong tid = get_global_id(0);\n"
|
||||
" " + type_name<CL_INT_TYPE>() + " result = sub_group_scan_exclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
|
||||
" output[tid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_scan_exclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE sum = 0;
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (sum != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(sum),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
sum += in[i + j + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_scan_exclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (min != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(min),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_scan_exclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (max != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(max),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
int verify_sg_scan_exclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case work_group_op::add:
|
||||
return verify_sg_scan_exclusive_add(in, out, wg_size, sg_size);
|
||||
case work_group_op::min:
|
||||
return verify_sg_scan_exclusive_min(in, out, wg_size, sg_size);
|
||||
case work_group_op::max:
|
||||
return verify_sg_scan_exclusive_max(in, out, wg_size, sg_size);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
int sub_group_scan_exclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
|
||||
{
|
||||
// don't run test for unsupported types
|
||||
if(!type_supported<CL_INT_TYPE>(device))
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t wg_size;
|
||||
size_t sg_max_size;
|
||||
size_t work_size[1];
|
||||
int err;
|
||||
|
||||
std::string code_str = generate_sg_scan_exclusive_kernel_code<CL_INT_TYPE, op>();
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_exclusive");
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
|
||||
return CL_SUCCESS;
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_exclusive");
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
|
||||
size_t param_value_size = 0;
|
||||
err = clGetKernelSubGroupInfo(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t), static_cast<void*>(&wg_size),
|
||||
sizeof(size_t), static_cast<void*>(&sg_max_size),
|
||||
¶m_value_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
|
||||
|
||||
// Verify size of returned param
|
||||
if(param_value_size != sizeof(size_t))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
|
||||
sizeof(size_t),
|
||||
param_value_size
|
||||
)
|
||||
}
|
||||
|
||||
// Calculate global work size
|
||||
size_t flat_work_size;
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
flat_work_size = wg_number * wg_size;
|
||||
work_size[0] = flat_work_size;
|
||||
|
||||
std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
|
||||
std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
buffers[1] =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
if (verify_sg_scan_exclusive<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "sub_group_scan_exclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
|
||||
}
|
||||
log_info("sub_group_scan_exclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_scan_exclusive_add)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_scan_exclusive_min)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
local_error = sub_group_scan_exclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
local_error = sub_group_scan_exclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_scan_exclusive_max)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP
|
||||
@@ -1,335 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
// Common for tests of sub-group functions
|
||||
#include "common.hpp"
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
std::string generate_sg_scan_inclusive_kernel_code()
|
||||
{
|
||||
return "#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_work_group>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sg_scan_inclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
|
||||
"global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" ulong tid = get_global_id(0);\n"
|
||||
" " + type_name<CL_INT_TYPE>() + " result = sub_group_scan_inclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
|
||||
" output[tid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_scan_inclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE sum = 0;
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
sum += in[i + j + k];
|
||||
if (sum != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(sum),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_scan_inclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
|
||||
if (min != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(min),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_scan_inclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
|
||||
if (max != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(max),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
int verify_sg_scan_inclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case work_group_op::add:
|
||||
return verify_sg_scan_inclusive_add(in, out, wg_size, sg_size);
|
||||
case work_group_op::min:
|
||||
return verify_sg_scan_inclusive_min(in, out, wg_size, sg_size);
|
||||
case work_group_op::max:
|
||||
return verify_sg_scan_inclusive_max(in, out, wg_size, sg_size);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
int sub_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
|
||||
{
|
||||
// don't run test for unsupported types
|
||||
if(!type_supported<CL_INT_TYPE>(device))
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t wg_size;
|
||||
size_t sg_max_size;
|
||||
size_t work_size[1];
|
||||
int err;
|
||||
|
||||
std::string code_str = generate_sg_scan_inclusive_kernel_code<CL_INT_TYPE, op>();
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive");
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
|
||||
return CL_SUCCESS;
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive");
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
|
||||
size_t param_value_size = 0;
|
||||
err = clGetKernelSubGroupInfo(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t), static_cast<void*>(&wg_size),
|
||||
sizeof(size_t), static_cast<void*>(&sg_max_size),
|
||||
¶m_value_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
|
||||
|
||||
// Verify size of returned param
|
||||
if(param_value_size != sizeof(size_t))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
|
||||
sizeof(size_t),
|
||||
param_value_size
|
||||
)
|
||||
}
|
||||
|
||||
// Calculate global work size
|
||||
size_t flat_work_size;
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
flat_work_size = wg_number * wg_size;
|
||||
work_size[0] = flat_work_size;
|
||||
|
||||
std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
|
||||
std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
buffers[1] =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
if (verify_sg_scan_inclusive<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "sub_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
|
||||
}
|
||||
log_info("sub_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_scan_inclusive_add)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_scan_inclusive_min)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_scan_inclusive_max)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
|
||||
@@ -1,7 +0,0 @@
|
||||
set(MODULE_NAME CPP_SYNCHRONIZATION)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -1,27 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "test_work_group_barrier.hpp"
|
||||
#include "test_sub_group_barrier.hpp"
|
||||
#include "named_barrier/test_spec_example.hpp"
|
||||
#include "named_barrier/test_named_barrier.hpp"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
auto& tests = autotest::test_suite::global_test_suite().test_defs;
|
||||
return runTestHarness(argc, argv, tests.size(), tests.data(), false, 0);
|
||||
}
|
||||
@@ -1,172 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_COMMON_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_COMMON_HPP
|
||||
|
||||
#include <vector>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../../common.hpp"
|
||||
#include "../../funcs_test_utils.hpp"
|
||||
|
||||
#define RUN_WG_NAMED_BARRIER_TEST_MACRO(TEST_CLASS) \
|
||||
last_error = run_work_group_named_barrier_barrier_test( \
|
||||
device, context, queue, num_elements, TEST_CLASS \
|
||||
); \
|
||||
CHECK_ERROR(last_error) \
|
||||
error |= last_error;
|
||||
|
||||
namespace named_barrier {
|
||||
|
||||
struct work_group_named_barrier_test_base : public detail::base_func_type<cl_uint>
|
||||
{
|
||||
// Returns test name
|
||||
virtual std::string str() = 0;
|
||||
// Returns OpenCL program source
|
||||
// It's assumed that this program has only one kernel.
|
||||
virtual std::string generate_program() = 0;
|
||||
// Return value that is expected to be in output_buffer[i]
|
||||
virtual cl_uint operator()(size_t i, size_t work_group_size, size_t mas_sub_group_size) = 0;
|
||||
// Kernel execution
|
||||
// This covers typical case: kernel is executed once, kernel
|
||||
// has only one argument which is output buffer
|
||||
virtual cl_int execute(const cl_kernel kernel,
|
||||
const cl_mem output_buffer,
|
||||
const cl_command_queue& queue,
|
||||
const size_t work_size,
|
||||
const size_t work_group_size)
|
||||
{
|
||||
cl_int err;
|
||||
err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg")
|
||||
|
||||
err = clEnqueueNDRangeKernel(
|
||||
queue, kernel, 1,
|
||||
NULL, &work_size, &work_group_size,
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
|
||||
return err;
|
||||
}
|
||||
// Calculates maximal work-group size (one dim)
|
||||
virtual size_t get_max_local_size(const cl_kernel kernel,
|
||||
const cl_device_id device,
|
||||
const size_t work_group_size, // default work-group size
|
||||
cl_int& error)
|
||||
{
|
||||
size_t max_wg_size;
|
||||
error = clGetKernelWorkGroupInfo(
|
||||
kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
|
||||
);
|
||||
RETURN_ON_ERROR(error)
|
||||
return (std::min)(work_group_size, max_wg_size);
|
||||
}
|
||||
// if work-groups should be uniform
|
||||
virtual bool enforce_uniform()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <class work_group_named_barrier_test>
|
||||
int run_work_group_named_barrier_barrier_test(cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
size_t count, work_group_named_barrier_test test)
|
||||
{
|
||||
cl_mem buffers[1];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t work_group_size;
|
||||
size_t work_size[1];
|
||||
cl_int err;
|
||||
|
||||
std::string code_str = test.generate_program();
|
||||
std::string kernel_name = test.get_kernel_name();
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
|
||||
RETURN_ON_ERROR(err)
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
// Find the max possible wg size for among all the kernels
|
||||
work_group_size = test.get_max_local_size(kernel, device, 256, err);
|
||||
RETURN_ON_ERROR(err);
|
||||
if(work_group_size == 0)
|
||||
{
|
||||
log_info("SKIPPED: Can't produce local size with enough sub-groups. Skipping tests.\n");
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
work_size[0] = count;
|
||||
// uniform work-group
|
||||
if(test.enforce_uniform())
|
||||
{
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(work_size[0]) / work_group_size)
|
||||
);
|
||||
work_size[0] = wg_number * work_group_size;
|
||||
}
|
||||
|
||||
// host output vector
|
||||
std::vector<cl_uint> output = generate_output<cl_uint>(work_size[0], 9999);
|
||||
|
||||
// device output buffer
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer")
|
||||
|
||||
// Execute test kernels
|
||||
err = test.execute(kernel, buffers[0], queue, work_size[0], work_group_size);
|
||||
RETURN_ON_ERROR(err)
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer")
|
||||
|
||||
// Check output values
|
||||
for(size_t i = 0; i < output.size(); i++)
|
||||
{
|
||||
cl_uint v = test(i, work_group_size, i);
|
||||
if(!(are_equal(v, output[i], ::detail::make_value<cl_uint>(0), test)))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"test_%s(%s) failed. Expected: %s, got: %s", test.str().c_str(), type_name<cl_uint>().c_str(),
|
||||
format_value(v).c_str(), format_value(output[i]).c_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
log_info("test_%s(%s) passed\n", test.str().c_str(), type_name<cl_uint>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
} // namespace named_barrier
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_COMMON_HPP
|
||||
@@ -1,491 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_NAMED_BARRIER_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_NAMED_BARRIER_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
namespace named_barrier {
|
||||
|
||||
struct local_fence_named_barrier_test : public work_group_named_barrier_test_base
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "local_fence";
|
||||
}
|
||||
|
||||
// Return value that is expected to be in output_buffer[i]
|
||||
cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size)
|
||||
{
|
||||
return static_cast<cl_uint>(i);
|
||||
}
|
||||
|
||||
// At the end every work-item writes its global id to ouput[work-item-global-id].
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global uint *output, "
|
||||
"local uint * lmem)\n"
|
||||
"{\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" output[gid] = gid;\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
return
|
||||
"#define cl_khr_subgroup_named_barrier\n"
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_synchronization>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
|
||||
"local_ptr<uint[]> lmem)\n"
|
||||
"{\n\n"
|
||||
" local<work_group_named_barrier> a(1);\n"
|
||||
" local<work_group_named_barrier> b(2);\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" size_t lid = get_local_id(0);\n"
|
||||
" size_t value;\n"
|
||||
" if(get_num_sub_groups() == 1)\n"
|
||||
" {\n"
|
||||
" size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n"
|
||||
" size_t other_gid = (gid - lid) + other_lid;\n"
|
||||
" lmem[other_lid] = other_gid;\n"
|
||||
" a.wait(mem_fence::local);\n"
|
||||
" value = lmem[lid];" // lmem[lid] shoule be equal to gid
|
||||
" }\n"
|
||||
" else if(get_num_sub_groups() == 2)\n"
|
||||
" {\n"
|
||||
" size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n"
|
||||
" size_t other_gid = (gid - lid) + other_lid;\n"
|
||||
" lmem[other_lid] = other_gid;\n"
|
||||
" b.wait(mem_fence::local);\n"
|
||||
" value = lmem[lid];" // lmem[lid] shoule be equal to gid
|
||||
" }\n"
|
||||
" else if(get_num_sub_groups() > 2)\n"
|
||||
" {\n"
|
||||
" if(get_sub_group_id() < 2)\n"
|
||||
" {\n"
|
||||
" const size_t two_first_subgroups = 2 * get_max_sub_group_size();"
|
||||
// local and global id of some work-item outside of work-item subgroup,
|
||||
// but within subgroups 0 and 1.
|
||||
" size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n"
|
||||
" size_t other_gid = (gid - lid) + other_lid;\n"
|
||||
" lmem[other_lid] = other_gid;\n"
|
||||
" b.wait(mem_fence::local);\n" // subgroup 0 and 1 are sync (local)
|
||||
" value = lmem[lid];" // lmem[lid] shoule be equal to gid
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" value = gid;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" output[gid] = value;\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t get_max_local_size(const cl_kernel kernel,
|
||||
const cl_device_id device,
|
||||
const size_t work_group_size, // default work-group size
|
||||
cl_int& error)
|
||||
{
|
||||
// Set size of the local memory, we need to to this to correctly calculate
|
||||
// max possible work-group size.
|
||||
size_t wg_size;
|
||||
for(wg_size = work_group_size; wg_size > 1; wg_size /= 2)
|
||||
{
|
||||
error = clSetKernelArg(kernel, 1, wg_size * sizeof(cl_uint), NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
size_t max_wg_size;
|
||||
error = clGetKernelWorkGroupInfo(
|
||||
kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
|
||||
if(max_wg_size >= wg_size) break;
|
||||
}
|
||||
return wg_size;
|
||||
}
|
||||
|
||||
cl_int execute(const cl_kernel kernel,
|
||||
const cl_mem output_buffer,
|
||||
const cl_command_queue queue,
|
||||
const size_t work_size,
|
||||
const size_t work_group_size)
|
||||
{
|
||||
cl_int err;
|
||||
// Get context from queue
|
||||
cl_context context;
|
||||
err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
err |= clSetKernelArg(kernel, 1, work_group_size * sizeof(cl_uint), NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg")
|
||||
|
||||
err = clEnqueueNDRangeKernel(
|
||||
queue, kernel, 1,
|
||||
NULL, &work_size, &work_group_size,
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
|
||||
|
||||
err = clFinish(queue);
|
||||
return err;
|
||||
}
|
||||
};
|
||||
|
||||
struct global_fence_named_barrier_test : public work_group_named_barrier_test_base
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "global_fence";
|
||||
}
|
||||
|
||||
// Return value that is expected to be in output_buffer[i]
|
||||
cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size)
|
||||
{
|
||||
return static_cast<cl_uint>(i % work_group_size);
|
||||
}
|
||||
|
||||
// At the end every work-item writes its local id to ouput[work-item-global-id].
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global uint * output, "
|
||||
"global uint * temp)\n"
|
||||
"{\n"
|
||||
"size_t gid = get_global_id(0);\n"
|
||||
"output[gid] = get_local_id(0);\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
return
|
||||
"#define cl_khr_subgroup_named_barrier\n"
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_synchronization>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
|
||||
"global_ptr<uint[]> temp)\n"
|
||||
"{\n\n"
|
||||
" local<work_group_named_barrier> a(1);\n"
|
||||
" local<work_group_named_barrier> b(2);\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" size_t lid = get_local_id(0);\n"
|
||||
" size_t value;\n"
|
||||
" if(get_num_sub_groups() == 1)\n"
|
||||
" {\n"
|
||||
" size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n"
|
||||
" size_t other_gid = (gid - lid) + other_lid;\n"
|
||||
" temp[other_gid] = other_lid + 1;\n"
|
||||
" a.wait(mem_fence::global);\n"
|
||||
" size_t other_lid_same_subgroup = (lid + 2) % get_sub_group_size();\n"
|
||||
" size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n"
|
||||
" temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n"
|
||||
" a.wait(mem_fence::global, memory_scope_sub_group);\n"
|
||||
" value = temp[gid];" // temp[gid] shoule be equal to lid
|
||||
" }\n"
|
||||
" else if(get_num_sub_groups() == 2)\n"
|
||||
" {\n"
|
||||
" size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n"
|
||||
" size_t other_gid = (gid - lid) + other_lid;\n"
|
||||
" temp[other_gid] = other_lid + 1;\n"
|
||||
" b.wait(mem_fence::global);\n" // both subgroups wait, both are sync
|
||||
" size_t other_lid_same_subgroup = "
|
||||
"((lid + 1) % get_sub_group_size()) + (get_sub_group_id() * get_sub_group_size());\n"
|
||||
" size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n"
|
||||
" temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n"
|
||||
" b.wait(mem_fence::global, memory_scope_sub_group);\n" // both subgroups wait, sync only within subgroup
|
||||
" value = temp[gid];" // temp[gid] shoule be equal to lid
|
||||
" }\n"
|
||||
" else if(get_num_sub_groups() > 2)\n"
|
||||
" {\n"
|
||||
" if(get_sub_group_id() < 2)\n"
|
||||
" {\n"
|
||||
" const size_t two_first_subgroups = 2 * get_max_sub_group_size();"
|
||||
// local and global id of some work-item outside of work-item subgroup,
|
||||
// but within subgroups 0 and 1.
|
||||
" size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n"
|
||||
" size_t other_gid = (gid - lid) + other_lid;\n"
|
||||
" temp[other_gid] = other_lid + 1;\n"
|
||||
" b.wait(mem_fence::global);\n" // both subgroups wait, both are sync
|
||||
// local and global id of some other work-item within work-item subgroup
|
||||
" size_t other_lid_same_subgroup = "
|
||||
"((lid + 1) % get_sub_group_size()) + (get_sub_group_id() * get_sub_group_size());\n"
|
||||
" size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n"
|
||||
" temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n"
|
||||
" b.wait(mem_fence::global, memory_scope_sub_group);\n" // both subgroups wait, sync only within subgroup
|
||||
" value = temp[gid];" // temp[gid] shoule be equal to lid
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" value = lid;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" output[gid] = value;\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t get_max_local_size(const cl_kernel kernel,
|
||||
const cl_device_id device,
|
||||
const size_t work_group_size, // default work-group size
|
||||
cl_int& error)
|
||||
{
|
||||
size_t max_wg_size;
|
||||
error = clGetKernelWorkGroupInfo(
|
||||
kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
|
||||
return (std::min)(max_wg_size, work_group_size);
|
||||
}
|
||||
|
||||
cl_int execute(const cl_kernel kernel,
|
||||
const cl_mem output_buffer,
|
||||
const cl_command_queue queue,
|
||||
const size_t work_size,
|
||||
const size_t work_group_size)
|
||||
{
|
||||
cl_int err;
|
||||
// Get context from queue
|
||||
cl_context context;
|
||||
err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
|
||||
|
||||
// create temp buffer
|
||||
auto temp_buffer =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * work_size, NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer")
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg")
|
||||
|
||||
err = clEnqueueNDRangeKernel(
|
||||
queue, kernel, 1,
|
||||
NULL, &work_size, &work_group_size,
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
|
||||
|
||||
err = clFinish(queue);
|
||||
err |= clReleaseMemObject(temp_buffer);
|
||||
|
||||
return err;
|
||||
}
|
||||
};
|
||||
|
||||
struct global_local_fence_named_barrier_test : public work_group_named_barrier_test_base
|
||||
{
|
||||
std::string str()
|
||||
{
|
||||
return "global_local_fence";
|
||||
}
|
||||
|
||||
// Return value that is expected to be in output_buffer[i]
|
||||
cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size)
|
||||
{
|
||||
return static_cast<cl_uint>(i % work_group_size);
|
||||
}
|
||||
|
||||
// At the end every work-item writes its local id to ouput[work-item-global-id].
|
||||
std::string generate_program()
|
||||
{
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
return
|
||||
"__kernel void " + this->get_kernel_name() + "(global uint * output, "
|
||||
"global uint * temp,"
|
||||
"local uint * lmem)\n"
|
||||
"{\n"
|
||||
"size_t gid = get_global_id(0);\n"
|
||||
"output[gid] = get_local_id(0);\n"
|
||||
"}\n";
|
||||
|
||||
#else
|
||||
return
|
||||
"#define cl_khr_subgroup_named_barrier\n"
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_synchronization>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
|
||||
"global_ptr<uint[]> temp,"
|
||||
"local_ptr<uint[]> lmem)\n"
|
||||
"{\n\n"
|
||||
" local<work_group_named_barrier> a(1);\n"
|
||||
" local<work_group_named_barrier> b(2);\n"
|
||||
" size_t gid = get_global_id(0);\n"
|
||||
" size_t lid = get_local_id(0);\n"
|
||||
" size_t value = 0;\n"
|
||||
" if(get_num_sub_groups() == 1)\n"
|
||||
" {\n"
|
||||
" size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n"
|
||||
" size_t other_gid = (gid - lid) + other_lid;\n"
|
||||
" lmem[other_lid] = other_gid;\n"
|
||||
" temp[other_gid] = other_lid;\n"
|
||||
" a.wait(mem_fence::local | mem_fence::global);\n"
|
||||
" if(lmem[lid] == gid) value = temp[gid];\n"
|
||||
" }\n"
|
||||
" else if(get_num_sub_groups() == 2)\n"
|
||||
" {\n"
|
||||
" size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n"
|
||||
" size_t other_gid = (gid - lid) + other_lid;\n"
|
||||
" lmem[other_lid] = other_gid;\n"
|
||||
" temp[other_gid] = other_lid;\n"
|
||||
" b.wait(mem_fence::local | mem_fence::global);\n"
|
||||
" if(lmem[lid] == gid) value = temp[gid];\n"
|
||||
" }\n"
|
||||
" else if(get_num_sub_groups() > 2)\n"
|
||||
" {\n"
|
||||
" if(get_sub_group_id() < 2)\n"
|
||||
" {\n"
|
||||
" const size_t two_first_subgroups = 2 * get_max_sub_group_size();"
|
||||
// local and global id of some work-item outside of work-item subgroup,
|
||||
// but within subgroups 0 and 1.
|
||||
" size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n"
|
||||
" size_t other_gid = (gid - lid) + other_lid;\n"
|
||||
" lmem[other_lid] = other_gid;\n"
|
||||
" temp[other_gid] = other_lid;\n"
|
||||
" b.wait(mem_fence::local | mem_fence::global);\n"
|
||||
" if(lmem[lid] == gid) value = temp[gid];\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" value = lid;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" output[gid] = value;\n"
|
||||
"}\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t get_max_local_size(const cl_kernel kernel,
|
||||
const cl_device_id device,
|
||||
const size_t work_group_size, // default work-group size
|
||||
cl_int& error)
|
||||
{
|
||||
// Set size of the local memory, we need to to this to correctly calculate
|
||||
// max possible work-group size.
|
||||
size_t wg_size;
|
||||
for(wg_size = work_group_size; wg_size > 1; wg_size /= 2)
|
||||
{
|
||||
error = clSetKernelArg(kernel, 2, wg_size * sizeof(cl_uint), NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clSetKernelArg")
|
||||
|
||||
size_t max_wg_size;
|
||||
error = clGetKernelWorkGroupInfo(
|
||||
kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
|
||||
if(max_wg_size >= wg_size) break;
|
||||
}
|
||||
return wg_size;
|
||||
}
|
||||
|
||||
cl_int execute(const cl_kernel kernel,
|
||||
const cl_mem output_buffer,
|
||||
const cl_command_queue queue,
|
||||
const size_t work_size,
|
||||
const size_t work_group_size)
|
||||
{
|
||||
cl_int err;
|
||||
// Get context from queue
|
||||
cl_context context;
|
||||
err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
|
||||
|
||||
// create temp buffer
|
||||
auto temp_buffer =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_uint) * work_size, NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer")
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer);
|
||||
err |= clSetKernelArg(kernel, 2, work_group_size * sizeof(cl_uint), NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg")
|
||||
|
||||
err = clEnqueueNDRangeKernel(
|
||||
queue, kernel, 1,
|
||||
NULL, &work_size, &work_group_size,
|
||||
0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
|
||||
|
||||
err = clFinish(queue);
|
||||
err |= clReleaseMemObject(temp_buffer);
|
||||
|
||||
return err;
|
||||
}
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------------
|
||||
// -------------------------- RUN TESTS -----------------------------------------
|
||||
// ------------------------------------------------------------------------------
|
||||
AUTO_TEST_CASE(test_work_group_named_barrier)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int last_error = CL_SUCCESS;
|
||||
|
||||
#if !(defined(DEVELOPMENT) && (defined(USE_OPENCLC_KERNELS) || defined(ONLY_SPIRV_COMPILATION)))
|
||||
if(!is_extension_available(device, "cl_khr_subgroup_named_barrier"))
|
||||
{
|
||||
log_info("SKIPPED: Extension `cl_khr_subgroup_named_barrier` is not supported. Skipping tests.\n");
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
// An implementation shall support at least 8 named barriers per work-group. The exact
|
||||
// maximum number can be queried using clGetDeviceInfo with CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR
|
||||
// from the OpenCL 2.2 Extension Specification.
|
||||
cl_uint named_barrier_count;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR, sizeof(cl_uint), &named_barrier_count, NULL);
|
||||
RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
|
||||
|
||||
if(named_barrier_count < 8)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "Maximum number of named barriers must be at least 8.");
|
||||
}
|
||||
#endif
|
||||
|
||||
RUN_WG_NAMED_BARRIER_TEST_MACRO(local_fence_named_barrier_test())
|
||||
RUN_WG_NAMED_BARRIER_TEST_MACRO(global_fence_named_barrier_test())
|
||||
RUN_WG_NAMED_BARRIER_TEST_MACRO(global_local_fence_named_barrier_test())
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_NAMED_BARRIER_HPP
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user