mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Merge branch 'main' into cl_khr_unified_svm
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
set(MODULE_NAME API)
|
||||
|
||||
find_package(Python3 COMPONENTS Interpreter QUIET)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
negative_platform.cpp
|
||||
@@ -40,6 +42,20 @@ set(${MODULE_NAME}_SOURCES
|
||||
test_pipe_properties_queries.cpp
|
||||
test_wg_suggested_local_work_size.cpp
|
||||
test_device_command_queue.cpp
|
||||
test_spirv_queries.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def
|
||||
)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def
|
||||
COMMENT "Generating spirv_capability_deps.def..."
|
||||
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_spirv_capability_deps.py
|
||||
--grammar "${SPIRV_INCLUDE_DIR}/include/spirv/unified1/spirv.core.grammar.json"
|
||||
--output "${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def"
|
||||
DEPENDS generate_spirv_capability_deps.py "${SPIRV_INCLUDE_DIR}/include/spirv/unified1/spirv.core.grammar.json"
|
||||
USES_TERMINAL
|
||||
VERBATIM)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
|
||||
target_include_directories(${${MODULE_NAME}_OUT} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
102
test_conformance/api/generate_spirv_capability_deps.py
Normal file
102
test_conformance/api/generate_spirv_capability_deps.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
#####################################################################
|
||||
# Copyright (c) 2025 The Khronos Group Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#####################################################################
|
||||
|
||||
"""
|
||||
Generates a file describing the SPIR-V extension dependencies or SPIR-V version
|
||||
dependencies for a SPIR-V capability. This can be used to ensure that if support
|
||||
for a SPIR-V capability is reported, the necessary SPIR-V extensions or SPIR-V
|
||||
version is also supported.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
|
||||
header_text = """\
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
// This file is generated from the SPIR-V JSON grammar file.
|
||||
// Please do not edit it directly!
|
||||
"""
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Generate SPIR-V extension and version dependencies for SPIR-V capabilities')
|
||||
|
||||
parser.add_argument('--grammar', metavar='<path>',
|
||||
type=str, required=True,
|
||||
help='input JSON grammar file')
|
||||
parser.add_argument('--output', metavar='<path>',
|
||||
type=str, required=False,
|
||||
help='output file path (default: stdout)')
|
||||
args = parser.parse_args()
|
||||
|
||||
dependencies = {}
|
||||
capabilities = []
|
||||
with open(args.grammar) as json_file:
|
||||
grammar_json = json.loads(json_file.read())
|
||||
for operand_kind in grammar_json['operand_kinds']:
|
||||
if operand_kind['kind'] == 'Capability':
|
||||
for cap in operand_kind['enumerants']:
|
||||
capname = cap['enumerant']
|
||||
capabilities.append(capname)
|
||||
dependencies[capname] = {}
|
||||
dependencies[capname]['extensions'] = cap['extensions'] if 'extensions' in cap else []
|
||||
dependencies[capname]['version'] = ("SPIR-V_" + cap['version']) if 'version' in cap and cap['version'] != 'None' else ""
|
||||
|
||||
capabilities.sort()
|
||||
|
||||
output = []
|
||||
output.append(header_text)
|
||||
output.append("// clang-format off")
|
||||
if False:
|
||||
for cap in capabilities:
|
||||
deps = dependencies[cap]
|
||||
extensions_str = ', '.join(f'"{ext}"' for ext in deps['extensions'])
|
||||
|
||||
output.append('SPIRV_CAPABILITY_DEPENDENCIES( {}, {{{}}}, "{}" )'.format(
|
||||
cap, extensions_str, deps['version']))
|
||||
else:
|
||||
for cap in capabilities:
|
||||
deps = dependencies[cap]
|
||||
if deps['version'] != "":
|
||||
output.append('SPIRV_CAPABILITY_VERSION_DEPENDENCY( {}, "{}" )'.format(cap, deps['version']))
|
||||
for ext in deps['extensions']:
|
||||
output.append('SPIRV_CAPABILITY_EXTENSION_DEPENDENCY( {}, "{}" )'.format(cap, ext))
|
||||
output.append("// clang-format on")
|
||||
|
||||
if args.output:
|
||||
with open(args.output, 'w') as output_file:
|
||||
output_file.write('\n'.join(output))
|
||||
else:
|
||||
print('\n'.join(output))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1088,10 +1088,12 @@ REGISTER_TEST(min_max_image_buffer_size)
|
||||
pixelBytes = maxAllocSize / maxDimensionPixels;
|
||||
if (pixelBytes == 0)
|
||||
{
|
||||
log_error("Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than "
|
||||
"CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image "
|
||||
"of maximum size!\n");
|
||||
return -1;
|
||||
log_info(
|
||||
"Note, the value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is %zu pixels, "
|
||||
"therefore the size of the allocated image may be larger than the "
|
||||
"scaled CL_DEVICE_MAX_MEM_ALLOC_SIZE of %" PRIu64 " bytes.\n",
|
||||
maxDimensionPixels, maxAllocSize);
|
||||
pixelBytes = 1;
|
||||
}
|
||||
|
||||
error = -1;
|
||||
|
||||
@@ -87,6 +87,35 @@ const char *sample_two_kernel_program[] = {
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
const char *sample_mem_obj_size_test_kernel = R"(
|
||||
__kernel void mem_obj_size_test(__global int *src, __global int *dst)
|
||||
{
|
||||
size_t tid = get_global_id(0);
|
||||
dst[tid] = src[tid];
|
||||
}
|
||||
)";
|
||||
|
||||
const char *sample_local_size_test_kernel = R"(
|
||||
__kernel void local_size_test(__local int *src, __global int *dst)
|
||||
{
|
||||
size_t tid = get_global_id(0);
|
||||
dst[tid] = src[tid];
|
||||
}
|
||||
)";
|
||||
|
||||
const char *sample_read_only_image_test_kernel = R"(
|
||||
__kernel void read_only_image_test(__write_only image2d_t img, __global uint4 *src)
|
||||
{
|
||||
write_imageui(img, (int2)(get_global_id(0), get_global_id(1)), src[0]);
|
||||
}
|
||||
)";
|
||||
|
||||
const char *sample_write_only_image_test_kernel = R"(
|
||||
__kernel void write_only_image_test(__read_only image2d_t src, __global uint4 *dst)
|
||||
{
|
||||
dst[0]=read_imageui(src, (int2)(get_global_id(0), get_global_id(1)));
|
||||
}
|
||||
)";
|
||||
|
||||
REGISTER_TEST(get_kernel_info)
|
||||
{
|
||||
@@ -669,7 +698,7 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg)
|
||||
test_error(error,
|
||||
"Unable to get sample_image_test kernel for built program");
|
||||
|
||||
std::vector<cl_uchar> mem_data(size_dim * size_dim);
|
||||
std::vector<cl_uchar> mem_data(size_dim * size_dim * 4);
|
||||
buffer = clCreateBuffer(context, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
|
||||
sizeof(cl_int) * size_dim, mem_data.data(), &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
@@ -704,3 +733,169 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg)
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_invalid_arg_mem_obj)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper mem_obj_arg_kernel;
|
||||
|
||||
// Setup the test
|
||||
error =
|
||||
create_single_kernel_helper(context, &program, nullptr, 1,
|
||||
&sample_mem_obj_size_test_kernel, nullptr);
|
||||
test_error(error, "Unable to build test program");
|
||||
|
||||
mem_obj_arg_kernel = clCreateKernel(program, "mem_obj_size_test", &error);
|
||||
test_error(error,
|
||||
"Unable to get mem_obj_size_test kernel for built program");
|
||||
|
||||
std::vector<cl_uchar> mem_data(256, 0);
|
||||
clMemWrapper buffer = clCreateBuffer(
|
||||
context, CL_MEM_USE_HOST_PTR, mem_data.size(), mem_data.data(), &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
// Run the test - CL_INVALID_ARG_SIZE
|
||||
error = clSetKernelArg(mem_obj_arg_kernel, 0, sizeof(cl_mem) * 2, &buffer);
|
||||
test_failure_error_ret(
|
||||
error, CL_INVALID_ARG_SIZE,
|
||||
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
|
||||
"argument is a memory object and arg_size > sizeof(cl_mem)",
|
||||
TEST_FAIL);
|
||||
|
||||
error = clSetKernelArg(mem_obj_arg_kernel, 0, sizeof(cl_mem) / 2, &buffer);
|
||||
test_failure_error_ret(
|
||||
error, CL_INVALID_ARG_SIZE,
|
||||
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
|
||||
"argument is a memory object and arg_size < sizeof(cl_mem)",
|
||||
TEST_FAIL);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_invalid_kernel)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
clMemWrapper mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||
sizeof(cl_float), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
// Run the test - CL_INVALID_KERNEL
|
||||
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem);
|
||||
test_failure_error_ret(
|
||||
error, CL_INVALID_KERNEL,
|
||||
"clSetKernelArg is supposed to fail with CL_INVALID_KERNEL when kernel "
|
||||
"is not a valid kernel object",
|
||||
TEST_FAIL);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_invalid_arg_index)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
// Setup the test
|
||||
error = create_single_kernel_helper(context, &program, nullptr, 1,
|
||||
sample_single_test_kernel, nullptr);
|
||||
test_error(error, "Unable to build test program");
|
||||
|
||||
kernel = clCreateKernel(program, "sample_test", &error);
|
||||
test_error(error, "Unable to get sample_test kernel for built program");
|
||||
|
||||
// Run the test - 2 index is out or range - expected CL_INVALID_ARG_INDEX
|
||||
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), nullptr);
|
||||
test_failure_error_ret(
|
||||
error, CL_INVALID_ARG_INDEX,
|
||||
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_INDEX when "
|
||||
"arg_index is not a valid argument index",
|
||||
TEST_FAIL);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_invalid_arg_size_local)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper local_arg_kernel;
|
||||
|
||||
// Setup the test
|
||||
error = create_single_kernel_helper(
|
||||
context, &program, nullptr, 1, &sample_local_size_test_kernel, nullptr);
|
||||
test_error(error, "Unable to build test program");
|
||||
|
||||
local_arg_kernel = clCreateKernel(program, "local_size_test", &error);
|
||||
test_error(error, "Unable to get local_size_test kernel for built program");
|
||||
|
||||
// Run the test
|
||||
error = clSetKernelArg(local_arg_kernel, 0, 0, nullptr);
|
||||
test_failure_error_ret(
|
||||
error, CL_INVALID_ARG_SIZE,
|
||||
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when 0 is "
|
||||
"passed to a local qualifier kernel argument",
|
||||
TEST_FAIL);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(negative_set_read_write_image_arg)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper write_image_kernel, read_image_kernel;
|
||||
clMemWrapper write_only_image, read_only_image;
|
||||
const char *test_kernels[2] = { sample_read_only_image_test_kernel,
|
||||
sample_write_only_image_test_kernel };
|
||||
constexpr cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT8 };
|
||||
const int size_dim = 128;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT(device);
|
||||
|
||||
// Setup the test
|
||||
error = create_single_kernel_helper(context, &program, nullptr, 2,
|
||||
test_kernels, nullptr);
|
||||
test_error(error, "Unable to build test program");
|
||||
|
||||
read_image_kernel = clCreateKernel(program, "read_only_image_test", &error);
|
||||
test_error(error,
|
||||
"Unable to get read_only_image_test kernel for built program");
|
||||
|
||||
write_image_kernel =
|
||||
clCreateKernel(program, "write_only_image_test", &error);
|
||||
test_error(error,
|
||||
"Unable to get write_only_image_test kernel for built program");
|
||||
|
||||
read_only_image = create_image_2d(context, CL_MEM_READ_ONLY, &format,
|
||||
size_dim, size_dim, 0, nullptr, &error);
|
||||
test_error(error, "create_image_2d failed");
|
||||
|
||||
write_only_image = create_image_2d(context, CL_MEM_WRITE_ONLY, &format,
|
||||
size_dim, size_dim, 0, nullptr, &error);
|
||||
test_error(error, "create_image_2d failed");
|
||||
|
||||
// Run the test
|
||||
error = clSetKernelArg(read_image_kernel, 0, sizeof(read_only_image),
|
||||
&read_only_image);
|
||||
test_failure_error_ret(error, CL_INVALID_ARG_VALUE,
|
||||
"clSetKernelArg is supposed to fail "
|
||||
"with CL_INVALID_ARG_VALUE when an image is "
|
||||
"created with CL_MEM_READ_ONLY is "
|
||||
"passed to a write_only kernel argument",
|
||||
TEST_FAIL);
|
||||
|
||||
error = clSetKernelArg(write_image_kernel, 0, sizeof(write_only_image),
|
||||
&write_only_image);
|
||||
test_failure_error_ret(error, CL_INVALID_ARG_VALUE,
|
||||
"clSetKernelArg is supposed to fail "
|
||||
"with CL_INVALID_ARG_VALUE when an image is "
|
||||
"created with CL_MEM_WRITE_ONLY is "
|
||||
"passed to a read_only kernel argument",
|
||||
TEST_FAIL);
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
767
test_conformance/api/test_spirv_queries.cpp
Normal file
767
test_conformance/api/test_spirv_queries.cpp
Normal file
@@ -0,0 +1,767 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#define SPV_ENABLE_UTILITY_CODE
|
||||
#include <spirv/unified1/spirv.hpp>
|
||||
|
||||
static bool is_spirv_version_supported(cl_device_id deviceID,
|
||||
const std::string& version)
|
||||
{
|
||||
std::string ilVersions = get_device_il_version_string(deviceID);
|
||||
return ilVersions.find(version) != std::string::npos;
|
||||
}
|
||||
|
||||
static int doQueries(cl_device_id device,
|
||||
std::vector<const char*>& extendedInstructionSets,
|
||||
std::vector<const char*>& extensions,
|
||||
std::vector<cl_uint>& capabilities)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
size_t size = 0;
|
||||
error =
|
||||
clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR,
|
||||
0, nullptr, &size);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for "
|
||||
"CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR size\n");
|
||||
|
||||
extendedInstructionSets.resize(size / sizeof(const char*));
|
||||
error =
|
||||
clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR,
|
||||
size, extendedInstructionSets.data(), nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for "
|
||||
"CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR\n");
|
||||
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENSIONS_KHR, 0, nullptr,
|
||||
&size);
|
||||
test_error(
|
||||
error,
|
||||
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_EXTENSIONS_KHR size\n");
|
||||
|
||||
extensions.resize(size / sizeof(const char*));
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENSIONS_KHR, size,
|
||||
extensions.data(), nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_EXTENSIONS_KHR\n");
|
||||
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_CAPABILITIES_KHR, 0,
|
||||
nullptr, &size);
|
||||
test_error(
|
||||
error,
|
||||
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_CAPABILITIES_KHR size\n");
|
||||
|
||||
capabilities.resize(size / sizeof(cl_uint));
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_CAPABILITIES_KHR, size,
|
||||
capabilities.data(), nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_CAPABILITIES_KHR\n");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
static int findRequirements(cl_device_id device,
|
||||
std::vector<const char*>& extendedInstructionSets,
|
||||
std::vector<const char*>& extensions,
|
||||
std::vector<cl_uint>& capabilities)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
auto version = get_device_cl_version(device);
|
||||
auto ilVersions = get_device_il_version_string(device);
|
||||
|
||||
// If no SPIR-V versions are supported, there are no requirements.
|
||||
if (ilVersions.find("SPIR-V") == std::string::npos)
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_bool deviceImageSupport = CL_FALSE;
|
||||
cl_bool deviceReadWriteImageSupport = CL_FALSE;
|
||||
cl_bool deviceSubGroupsSupport = CL_FALSE;
|
||||
cl_bool deviceGenericAddressSpaceSupport = CL_FALSE;
|
||||
cl_bool deviceWorkGroupCollectiveFunctionsSupport = CL_FALSE;
|
||||
cl_bool devicePipeSupport = CL_FALSE;
|
||||
cl_bool deviceDeviceEnqueueSupport = CL_FALSE;
|
||||
cl_device_integer_dot_product_capabilities_khr
|
||||
deviceIntegerDotProductCapabilities = 0;
|
||||
cl_device_fp_atomic_capabilities_ext deviceFp32AtomicCapabilities = 0;
|
||||
cl_device_fp_atomic_capabilities_ext deviceFp16AtomicCapabilities = 0;
|
||||
cl_device_fp_atomic_capabilities_ext deviceFp64AtomicCapabilities = 0;
|
||||
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
|
||||
sizeof(deviceImageSupport), &deviceImageSupport,
|
||||
nullptr);
|
||||
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE_SUPPORT\n");
|
||||
|
||||
if (version >= Version(2, 0))
|
||||
{
|
||||
cl_uint deviceMaxReadWriteImageArgs = 0;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
|
||||
sizeof(deviceMaxReadWriteImageArgs),
|
||||
&deviceMaxReadWriteImageArgs, nullptr);
|
||||
test_error(
|
||||
error,
|
||||
"clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
|
||||
|
||||
deviceReadWriteImageSupport =
|
||||
deviceMaxReadWriteImageArgs != 0 ? CL_TRUE : CL_FALSE;
|
||||
}
|
||||
|
||||
if (version >= Version(2, 1))
|
||||
{
|
||||
cl_uint deviceMaxNumSubGroups = 0;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS,
|
||||
sizeof(deviceMaxNumSubGroups),
|
||||
&deviceMaxNumSubGroups, nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for CL_DEVICE_MAX_NUM_SUB_GROUPS\n");
|
||||
|
||||
deviceSubGroupsSupport =
|
||||
deviceMaxNumSubGroups != 0 ? CL_TRUE : CL_FALSE;
|
||||
}
|
||||
else if (is_extension_available(device, "cl_khr_subgroups"))
|
||||
{
|
||||
deviceSubGroupsSupport = CL_TRUE;
|
||||
}
|
||||
|
||||
if (version >= Version(3, 0))
|
||||
{
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT,
|
||||
sizeof(deviceGenericAddressSpaceSupport),
|
||||
&deviceGenericAddressSpaceSupport, nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for "
|
||||
"CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT\n");
|
||||
|
||||
error = clGetDeviceInfo(
|
||||
device, CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT,
|
||||
sizeof(deviceWorkGroupCollectiveFunctionsSupport),
|
||||
&deviceWorkGroupCollectiveFunctionsSupport, nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for "
|
||||
"CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT\n");
|
||||
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_PIPE_SUPPORT,
|
||||
sizeof(devicePipeSupport), &devicePipeSupport,
|
||||
nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for CL_DEVICE_PIPE_SUPPORT\n");
|
||||
|
||||
cl_device_device_enqueue_capabilities deviceDeviceEnqueueCapabilities =
|
||||
0;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES,
|
||||
sizeof(deviceDeviceEnqueueCapabilities),
|
||||
&deviceDeviceEnqueueCapabilities, nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for "
|
||||
"CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES\n");
|
||||
|
||||
deviceDeviceEnqueueSupport =
|
||||
deviceDeviceEnqueueCapabilities != 0 ? CL_TRUE : CL_FALSE;
|
||||
}
|
||||
else if (version >= Version(2, 0))
|
||||
{
|
||||
deviceGenericAddressSpaceSupport = CL_TRUE;
|
||||
deviceWorkGroupCollectiveFunctionsSupport = CL_TRUE;
|
||||
devicePipeSupport = CL_TRUE;
|
||||
deviceDeviceEnqueueSupport = CL_TRUE;
|
||||
}
|
||||
|
||||
if (is_extension_available(device, "cl_khr_integer_dot_product"))
|
||||
{
|
||||
error = clGetDeviceInfo(device,
|
||||
CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR,
|
||||
sizeof(deviceIntegerDotProductCapabilities),
|
||||
&deviceIntegerDotProductCapabilities, nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for "
|
||||
"CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR\n");
|
||||
}
|
||||
|
||||
if (is_extension_available(device, "cl_ext_float_atomics"))
|
||||
{
|
||||
error =
|
||||
clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
|
||||
sizeof(deviceFp32AtomicCapabilities),
|
||||
&deviceFp32AtomicCapabilities, nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for "
|
||||
"CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT\n");
|
||||
|
||||
error =
|
||||
clGetDeviceInfo(device, CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT,
|
||||
sizeof(deviceFp16AtomicCapabilities),
|
||||
&deviceFp16AtomicCapabilities, nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for "
|
||||
"CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT\n");
|
||||
|
||||
error =
|
||||
clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
|
||||
sizeof(deviceFp64AtomicCapabilities),
|
||||
&deviceFp64AtomicCapabilities, nullptr);
|
||||
test_error(error,
|
||||
"clGetDeviceInfo failed for "
|
||||
"CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT\n");
|
||||
}
|
||||
|
||||
// Required.
|
||||
extendedInstructionSets.push_back("OpenCL.std");
|
||||
|
||||
capabilities.push_back(spv::CapabilityAddresses);
|
||||
capabilities.push_back(spv::CapabilityFloat16Buffer);
|
||||
capabilities.push_back(spv::CapabilityInt16);
|
||||
capabilities.push_back(spv::CapabilityInt8);
|
||||
capabilities.push_back(spv::CapabilityKernel);
|
||||
capabilities.push_back(spv::CapabilityLinkage);
|
||||
capabilities.push_back(spv::CapabilityVector16);
|
||||
|
||||
// Required for FULL_PROFILE devices, or devices supporting
|
||||
// cles_khr_int64.
|
||||
if (gHasLong)
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityInt64);
|
||||
}
|
||||
|
||||
// Required for devices supporting images.
|
||||
if (deviceImageSupport == CL_TRUE)
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityImage1D);
|
||||
capabilities.push_back(spv::CapabilityImageBasic);
|
||||
capabilities.push_back(spv::CapabilityImageBuffer);
|
||||
capabilities.push_back(spv::CapabilityLiteralSampler);
|
||||
capabilities.push_back(spv::CapabilitySampled1D);
|
||||
capabilities.push_back(spv::CapabilitySampledBuffer);
|
||||
}
|
||||
|
||||
// Required for devices supporting SPIR-V 1.6.
|
||||
if (ilVersions.find("SPIR-V_1.6") != std::string::npos)
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityUniformDecoration);
|
||||
}
|
||||
|
||||
// Required for devices supporting read-write images.
|
||||
if (deviceReadWriteImageSupport == CL_TRUE)
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityImageReadWrite);
|
||||
}
|
||||
|
||||
// Required for devices supporting the generic address space.
|
||||
if (deviceGenericAddressSpaceSupport == CL_TRUE)
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityGenericPointer);
|
||||
}
|
||||
|
||||
// Required for devices supporting sub-groups or work-group collective
|
||||
// functions.
|
||||
if (deviceSubGroupsSupport == CL_TRUE
|
||||
|| deviceWorkGroupCollectiveFunctionsSupport == CL_TRUE)
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityGroups);
|
||||
}
|
||||
|
||||
// Required for devices supporting pipes.
|
||||
if (devicePipeSupport == CL_TRUE)
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityPipes);
|
||||
}
|
||||
|
||||
// Required for devices supporting device-side enqueue.
|
||||
if (deviceDeviceEnqueueSupport == CL_TRUE)
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityDeviceEnqueue);
|
||||
}
|
||||
|
||||
// Required for devices supporting SPIR-V 1.1 and OpenCL 2.2.
|
||||
if (ilVersions.find("SPIR-V_1.1") != std::string::npos
|
||||
&& version == Version(2, 2))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityPipeStorage);
|
||||
}
|
||||
|
||||
// Required for devices supporting SPIR-V 1.1 and either OpenCL 2.2 or
|
||||
// OpenCL 3.0 devices supporting sub-groups.
|
||||
if (ilVersions.find("SPIR-V_1.1") != std::string::npos
|
||||
&& (version == Version(2, 2)
|
||||
|| (version >= Version(3, 0) && deviceSubGroupsSupport == CL_TRUE)))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilitySubgroupDispatch);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_expect_assume.
|
||||
if (is_extension_available(device, "cl_khr_expect_assume"))
|
||||
{
|
||||
extensions.push_back("SPV_KHR_expect_assume");
|
||||
capabilities.push_back(spv::CapabilityExpectAssumeKHR);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_extended_bit_ops.
|
||||
if (is_extension_available(device, "cl_khr_extended_bit_ops"))
|
||||
{
|
||||
extensions.push_back("SPV_KHR_bit_instructions");
|
||||
capabilities.push_back(spv::CapabilityBitInstructions);
|
||||
}
|
||||
|
||||
// Required for devices supporting half-precision floating-point
|
||||
// (cl_khr_fp16).
|
||||
if (is_extension_available(device, "cl_khr_fp16"))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityFloat16);
|
||||
}
|
||||
|
||||
// Required for devices supporting double-precision floating-point
|
||||
// (cl_khr_fp64).
|
||||
if (is_extension_available(device, "cl_khr_fp64"))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityFloat64);
|
||||
}
|
||||
|
||||
// Required for devices supporting 64-bit atomics
|
||||
// (cl_khr_int64_base_atomics or cl_khr_int64_extended_atomics).
|
||||
if (is_extension_available(device, "cl_khr_int64_base_atomics")
|
||||
|| is_extension_available(device, "cl_khr_int64_extended_atomics"))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityInt64Atomics);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_integer_dot_product.
|
||||
if (is_extension_available(device, "cl_khr_integer_dot_product"))
|
||||
{
|
||||
extensions.push_back("SPV_KHR_integer_dot_product");
|
||||
capabilities.push_back(spv::CapabilityDotProduct);
|
||||
capabilities.push_back(spv::CapabilityDotProductInput4x8BitPacked);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_integer_dot_product and
|
||||
// CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR.
|
||||
if (is_extension_available(device, "cl_khr_integer_dot_product")
|
||||
&& (deviceIntegerDotProductCapabilities
|
||||
& CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityDotProductInput4x8Bit);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_kernel_clock.
|
||||
if (is_extension_available(device, "cl_khr_kernel_clock"))
|
||||
{
|
||||
extensions.push_back("SPV_KHR_shader_clock");
|
||||
capabilities.push_back(spv::CapabilityShaderClockKHR);
|
||||
}
|
||||
|
||||
// Required for devices supporting both cl_khr_mipmap_image and
|
||||
// cl_khr_mipmap_image_writes.
|
||||
if (is_extension_available(device, "cl_khr_mipmap_image")
|
||||
&& is_extension_available(device, "cl_khr_mipmap_image_writes"))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityImageMipmap);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_spirv_extended_debug_info.
|
||||
if (is_extension_available(device, "cl_khr_spirv_extended_debug_info"))
|
||||
{
|
||||
extendedInstructionSets.push_back("OpenCL.DebugInfo.100");
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_spirv_linkonce_odr.
|
||||
if (is_extension_available(device, "cl_khr_spirv_linkonce_odr"))
|
||||
{
|
||||
extensions.push_back("SPV_KHR_linkonce_odr");
|
||||
}
|
||||
|
||||
// Required for devices supporting
|
||||
// cl_khr_spirv_no_integer_wrap_decoration.
|
||||
if (is_extension_available(device,
|
||||
"cl_khr_spirv_no_integer_wrap_decoration"))
|
||||
{
|
||||
extensions.push_back("SPV_KHR_no_integer_wrap_decoration");
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_subgroup_ballot.
|
||||
if (is_extension_available(device, "cl_khr_subgroup_ballot"))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityGroupNonUniformBallot);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_subgroup_clustered_reduce.
|
||||
if (is_extension_available(device, "cl_khr_subgroup_clustered_reduce"))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityGroupNonUniformClustered);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_subgroup_named_barrier.
|
||||
if (is_extension_available(device, "cl_khr_subgroup_named_barrier"))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityNamedBarrier);
|
||||
}
|
||||
|
||||
// Required for devices supporting
|
||||
// cl_khr_subgroup_non_uniform_arithmetic.
|
||||
if (is_extension_available(device,
|
||||
"cl_khr_subgroup_non_uniform_arithmetic"))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityGroupNonUniformArithmetic);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_subgroup_non_uniform_vote.
|
||||
if (is_extension_available(device, "cl_khr_subgroup_non_uniform_vote"))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityGroupNonUniform);
|
||||
capabilities.push_back(spv::CapabilityGroupNonUniformVote);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_subgroup_rotate.
|
||||
if (is_extension_available(device, "cl_khr_subgroup_rotate"))
|
||||
{
|
||||
extensions.push_back("SPV_KHR_subgroup_rotate");
|
||||
capabilities.push_back(spv::CapabilityGroupNonUniformRotateKHR);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_subgroup_shuffle.
|
||||
if (is_extension_available(device, "cl_khr_subgroup_shuffle"))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityGroupNonUniformShuffle);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_subgroup_shuffle_relative.
|
||||
if (is_extension_available(device, "cl_khr_subgroup_shuffle_relative"))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityGroupNonUniformShuffleRelative);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_khr_work_group_uniform_arithmetic.
|
||||
if (is_extension_available(device, "cl_khr_work_group_uniform_arithmetic"))
|
||||
{
|
||||
extensions.push_back("SPV_KHR_uniform_group_instructions");
|
||||
capabilities.push_back(spv::CapabilityGroupUniformArithmeticKHR);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_ext_float_atomics and fp32 atomic
|
||||
// adds.
|
||||
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||
&& (deviceFp32AtomicCapabilities
|
||||
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
|
||||
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityAtomicFloat32AddEXT);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_ext_float_atomics and fp32 atomic
|
||||
// min and max.
|
||||
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||
&& (deviceFp32AtomicCapabilities
|
||||
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
|
||||
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityAtomicFloat32MinMaxEXT);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_ext_float_atomics and fp16 atomic
|
||||
// adds.
|
||||
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||
&& (deviceFp16AtomicCapabilities
|
||||
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
|
||||
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))
|
||||
{
|
||||
extensions.push_back("SPV_EXT_shader_atomic_float16_add");
|
||||
capabilities.push_back(spv::CapabilityAtomicFloat16AddEXT);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_ext_float_atomics and fp16 atomic
|
||||
// min and max.
|
||||
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||
&& (deviceFp16AtomicCapabilities
|
||||
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
|
||||
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityAtomicFloat16MinMaxEXT);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_ext_float_atomics and fp64 atomic
|
||||
// adds.
|
||||
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||
&& (deviceFp64AtomicCapabilities
|
||||
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
|
||||
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityAtomicFloat64AddEXT);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_ext_float_atomics and fp64 atomic
|
||||
// min and max.
|
||||
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||
&& (deviceFp64AtomicCapabilities
|
||||
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
|
||||
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))
|
||||
{
|
||||
capabilities.push_back(spv::CapabilityAtomicFloat64MinMaxEXT);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_ext_float_atomics and fp16, fp32,
|
||||
// or fp64 atomic min or max.
|
||||
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||
&& ((deviceFp32AtomicCapabilities
|
||||
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
|
||||
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))
|
||||
|| (deviceFp16AtomicCapabilities
|
||||
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
|
||||
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))
|
||||
|| (deviceFp64AtomicCapabilities
|
||||
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
|
||||
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))))
|
||||
{
|
||||
extensions.push_back("SPV_EXT_shader_atomic_float_min_max");
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_ext_float_atomics and fp32 or fp64
|
||||
// atomic adds.
|
||||
if (is_extension_available(device, "cl_ext_float_atomics")
|
||||
&& ((deviceFp32AtomicCapabilities
|
||||
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
|
||||
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))
|
||||
|| (deviceFp64AtomicCapabilities
|
||||
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
|
||||
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))))
|
||||
{
|
||||
extensions.push_back("SPV_EXT_shader_atomic_float_add");
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_intel_bfloat16_conversions.
|
||||
if (is_extension_available(device, "cl_intel_bfloat16_conversions"))
|
||||
{
|
||||
extensions.push_back("SPV_INTEL_bfloat16_conversion");
|
||||
capabilities.push_back(spv::CapabilityBFloat16ConversionINTEL);
|
||||
}
|
||||
|
||||
// Required for devices supporting
|
||||
// cl_intel_spirv_device_side_avc_motion_estimation.
|
||||
if (is_extension_available(
|
||||
device, "cl_intel_spirv_device_side_avc_motion_estimation"))
|
||||
{
|
||||
extensions.push_back("SPV_INTEL_device_side_avc_motion_estimation");
|
||||
capabilities.push_back(
|
||||
spv::CapabilitySubgroupAvcMotionEstimationChromaINTEL);
|
||||
capabilities.push_back(spv::CapabilitySubgroupAvcMotionEstimationINTEL);
|
||||
capabilities.push_back(
|
||||
spv::CapabilitySubgroupAvcMotionEstimationIntraINTEL);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_intel_spirv_media_block_io.
|
||||
if (is_extension_available(device, "cl_intel_spirv_media_block_io"))
|
||||
{
|
||||
extensions.push_back("SPV_INTEL_media_block_io");
|
||||
capabilities.push_back(spv::CapabilitySubgroupImageMediaBlockIOINTEL);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_intel_spirv_subgroups.
|
||||
if (is_extension_available(device, "cl_intel_spirv_subgroups"))
|
||||
{
|
||||
extensions.push_back("SPV_INTEL_subgroups");
|
||||
capabilities.push_back(spv::CapabilitySubgroupBufferBlockIOINTEL);
|
||||
capabilities.push_back(spv::CapabilitySubgroupImageBlockIOINTEL);
|
||||
capabilities.push_back(spv::CapabilitySubgroupShuffleINTEL);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_intel_split_work_group_barrier.
|
||||
if (is_extension_available(device, "cl_intel_split_work_group_barrier"))
|
||||
{
|
||||
extensions.push_back("SPV_INTEL_split_barrier");
|
||||
capabilities.push_back(spv::CapabilitySplitBarrierINTEL);
|
||||
}
|
||||
|
||||
// Required for devices supporting cl_intel_subgroup_buffer_prefetch.
|
||||
if (is_extension_available(device, "cl_intel_subgroup_buffer_prefetch"))
|
||||
{
|
||||
extensions.push_back("SPV_INTEL_subgroup_buffer_prefetch");
|
||||
capabilities.push_back(spv::CapabilitySubgroupBufferPrefetchINTEL);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(spirv_query_requirements)
|
||||
{
|
||||
if (!is_extension_available(device, "cl_khr_spirv_queries"))
|
||||
{
|
||||
log_info("cl_khr_spirv_queries is not supported; skipping test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
cl_int error;
|
||||
|
||||
std::vector<const char*> queriedExtendedInstructionSets;
|
||||
std::vector<const char*> queriedExtensions;
|
||||
std::vector<cl_uint> queriedCapabilities;
|
||||
|
||||
error = doQueries(device, queriedExtendedInstructionSets, queriedExtensions,
|
||||
queriedCapabilities);
|
||||
test_error_fail(error, "Unable to perform SPIR-V queries");
|
||||
|
||||
std::vector<const char*> requiredExtendedInstructionSets;
|
||||
std::vector<const char*> requiredExtensions;
|
||||
std::vector<cl_uint> requiredCapabilities;
|
||||
error = findRequirements(device, requiredExtendedInstructionSets,
|
||||
requiredExtensions, requiredCapabilities);
|
||||
test_error_fail(error, "Unable to find SPIR-V requirements");
|
||||
|
||||
for (auto check : requiredExtendedInstructionSets)
|
||||
{
|
||||
auto cmp = [=](const char* queried) {
|
||||
return strcmp(check, queried) == 0;
|
||||
};
|
||||
auto it = std::find_if(queriedExtendedInstructionSets.begin(),
|
||||
queriedExtendedInstructionSets.end(), cmp);
|
||||
if (it == queriedExtendedInstructionSets.end())
|
||||
{
|
||||
test_fail("Missing required extended instruction set: %s\n", check);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto check : requiredExtensions)
|
||||
{
|
||||
auto cmp = [=](const char* queried) {
|
||||
return strcmp(check, queried) == 0;
|
||||
};
|
||||
auto it = std::find_if(queriedExtensions.begin(),
|
||||
queriedExtensions.end(), cmp);
|
||||
if (it == queriedExtensions.end())
|
||||
{
|
||||
test_fail("Missing required extension: %s\n", check);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto check : requiredCapabilities)
|
||||
{
|
||||
if (std::find(queriedCapabilities.begin(), queriedCapabilities.end(),
|
||||
check)
|
||||
== queriedCapabilities.end())
|
||||
{
|
||||
test_fail(
|
||||
"Missing required capability: %s\n",
|
||||
spv::CapabilityToString(static_cast<spv::Capability>(check)));
|
||||
}
|
||||
}
|
||||
|
||||
// Find any extraneous capabilities (informational):
|
||||
for (auto check : queriedCapabilities)
|
||||
{
|
||||
if (std::find(requiredCapabilities.begin(), requiredCapabilities.end(),
|
||||
check)
|
||||
== requiredCapabilities.end())
|
||||
{
|
||||
log_info(
|
||||
"Found non-required capability: %s\n",
|
||||
spv::CapabilityToString(static_cast<spv::Capability>(check)));
|
||||
}
|
||||
}
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(spirv_query_dependencies)
|
||||
{
|
||||
if (!is_extension_available(device, "cl_khr_spirv_queries"))
|
||||
{
|
||||
log_info("cl_khr_spirv_queries is not supported; skipping test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
cl_int error;
|
||||
|
||||
std::vector<const char*> queriedExtendedInstructionSets;
|
||||
std::vector<const char*> queriedExtensions;
|
||||
std::vector<cl_uint> queriedCapabilities;
|
||||
|
||||
error = doQueries(device, queriedExtendedInstructionSets, queriedExtensions,
|
||||
queriedCapabilities);
|
||||
test_error_fail(error, "Unable to perform SPIR-V queries");
|
||||
|
||||
struct CapabilityDependencies
|
||||
{
|
||||
std::vector<std::string> extensions;
|
||||
std::string version;
|
||||
};
|
||||
|
||||
std::map<spv::Capability, CapabilityDependencies> dependencies;
|
||||
|
||||
#define SPIRV_CAPABILITY_VERSION_DEPENDENCY(_cap, _ver) \
|
||||
dependencies[spv::Capability##_cap].version = _ver;
|
||||
#define SPIRV_CAPABILITY_EXTENSION_DEPENDENCY(_cap, _ext) \
|
||||
dependencies[spv::Capability##_cap].extensions.push_back(_ext);
|
||||
#include "spirv_capability_deps.def"
|
||||
|
||||
// For each queried SPIR-V capability, ensure that either that any SPIR-V
|
||||
// version dependencies or SPIR-V extension dependencies are satisfied.
|
||||
|
||||
for (auto check : queriedCapabilities)
|
||||
{
|
||||
// Log and skip any unknown capabilities
|
||||
auto it = dependencies.find(static_cast<spv::Capability>(check));
|
||||
if (it == dependencies.end())
|
||||
{
|
||||
log_info(
|
||||
"No known dependencies for queried capability %s!\n",
|
||||
spv::CapabilityToString(static_cast<spv::Capability>(check)));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if a SPIR-V version dependency is satisfied
|
||||
const auto& version_dep = it->second.version;
|
||||
if (!version_dep.empty()
|
||||
&& is_spirv_version_supported(device, version_dep))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if a SPIR-V extension dependency is satisfied
|
||||
bool found = false;
|
||||
for (const auto& extension_dep : it->second.extensions)
|
||||
{
|
||||
if (std::find(queriedExtensions.begin(), queriedExtensions.end(),
|
||||
extension_dep)
|
||||
!= queriedExtensions.end())
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we get here then the capability has an unsatisfied dependency.
|
||||
log_error("Couldn't find a dependency for queried capability %s!\n",
|
||||
spv::CapabilityToString(static_cast<spv::Capability>(check)));
|
||||
if (!version_dep.empty())
|
||||
{
|
||||
log_error("Checked for SPIR-V version %s.\n", version_dep.c_str());
|
||||
}
|
||||
for (const auto& extension_dep : it->second.extensions)
|
||||
{
|
||||
log_error("Checked for SPIR-V extension %s.n",
|
||||
extension_dep.c_str());
|
||||
}
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
@@ -108,7 +108,11 @@ REGISTER_TEST_VERSION(sub_group_dispatch, Version(2, 1))
|
||||
nullptr);
|
||||
test_error(error, "clGetDeviceInfo failed");
|
||||
|
||||
max_local = max_work_item_sizes[0];
|
||||
error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
|
||||
sizeof(max_local), &max_local, nullptr);
|
||||
test_error(error, "clGetKernelWorkGroupInfo failed");
|
||||
|
||||
max_local = std::min(max_local, max_work_item_sizes[0]);
|
||||
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
|
||||
(void *)&platform, NULL);
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
set(MODULE_NAME BASIC)
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
|
||||
add_cxx_flag_if_supported(-Wno-narrowing)
|
||||
endif()
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
test_fpmath.cpp
|
||||
|
||||
@@ -161,7 +161,7 @@ REGISTER_TEST(arraycopy)
|
||||
err |= clSetKernelArg(kernel, 1, sizeof results, &results);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
size_t threads[3] = { num_elements, 0, 0 };
|
||||
size_t threads[3] = { static_cast<size_t>(num_elements), 0, 0 };
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
@@ -35,9 +35,9 @@ static int test_arrayimagecopy_single_format(
|
||||
std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
|
||||
imgptr{ nullptr, free };
|
||||
clMemWrapper buffer, image;
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
|
||||
size_t img_width = 512;
|
||||
size_t img_height = 512;
|
||||
size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
|
||||
size_t elem_size;
|
||||
size_t buffer_size;
|
||||
cl_int err;
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <cmath>
|
||||
using std::isnan;
|
||||
#include "harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
@@ -26,6 +25,7 @@ using std::isnan;
|
||||
|
||||
#include <CL/cl_half.h>
|
||||
#include "harness/conversions.h"
|
||||
#include "harness/mathHelpers.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
|
||||
extern cl_half_rounding_mode halfRoundingMode;
|
||||
@@ -102,16 +102,6 @@ const char * kernel_explicit_s2v_set[NUM_VEC_TYPES][NUM_VEC_TYPES][5] = {
|
||||
|
||||
// clang-format on
|
||||
|
||||
bool IsHalfNaN(cl_half v)
|
||||
{
|
||||
// Extract FP16 exponent and mantissa
|
||||
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||
uint16_t h_mant = ((cl_half)v) & 0x3FF;
|
||||
|
||||
// NaN test
|
||||
return (h_exp == 0x1F && h_mant != 0);
|
||||
}
|
||||
|
||||
static int test_explicit_s2v_function(cl_context context,
|
||||
cl_command_queue queue, cl_kernel kernel,
|
||||
ExplicitType srcType, unsigned int count,
|
||||
@@ -183,20 +173,21 @@ static int test_explicit_s2v_function(cl_context context,
|
||||
{
|
||||
bool isSrcNaN =
|
||||
(((srcType == kHalf)
|
||||
&& IsHalfNaN(*reinterpret_cast<cl_half *>(inPtr)))
|
||||
&& isnan_fp(*reinterpret_cast<cl_half *>(inPtr)))
|
||||
|| ((srcType == kFloat)
|
||||
&& isnan(*reinterpret_cast<cl_float *>(inPtr)))
|
||||
&& isnan_fp(*reinterpret_cast<cl_float *>(inPtr)))
|
||||
|| ((srcType == kDouble)
|
||||
&& isnan(*reinterpret_cast<cl_double *>(inPtr))));
|
||||
bool isDestNaN = (((destType == kHalf)
|
||||
&& IsHalfNaN(*reinterpret_cast<cl_half *>(
|
||||
outPtr + destTypeSize * s)))
|
||||
|| ((destType == kFloat)
|
||||
&& isnan(*reinterpret_cast<cl_float *>(
|
||||
outPtr + destTypeSize * s)))
|
||||
|| ((destType == kDouble)
|
||||
&& isnan(*reinterpret_cast<cl_double *>(
|
||||
outPtr + destTypeSize * s))));
|
||||
&& isnan_fp(*reinterpret_cast<cl_double *>(inPtr))));
|
||||
bool isDestNaN =
|
||||
(((destType == kHalf)
|
||||
&& isnan_fp(*reinterpret_cast<cl_half *>(
|
||||
outPtr + destTypeSize * s)))
|
||||
|| ((destType == kFloat)
|
||||
&& isnan_fp(*reinterpret_cast<cl_float *>(
|
||||
outPtr + destTypeSize * s)))
|
||||
|| ((destType == kDouble)
|
||||
&& isnan_fp(*reinterpret_cast<cl_double *>(
|
||||
outPtr + destTypeSize * s))));
|
||||
|
||||
if (isSrcNaN && isDestNaN)
|
||||
{
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "harness/compat.h"
|
||||
#include "harness/mathHelpers.h"
|
||||
#include "harness/rounding_mode.h"
|
||||
#include "harness/stringHelpers.h"
|
||||
|
||||
@@ -57,16 +58,6 @@ template <typename T> double toDouble(T val)
|
||||
return val;
|
||||
}
|
||||
|
||||
bool isHalfNan(cl_half v)
|
||||
{
|
||||
// Extract FP16 exponent and mantissa
|
||||
uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||
uint16_t h_mant = v & 0x3FF;
|
||||
|
||||
// NaN test
|
||||
return (h_exp == 0x1F && h_mant != 0);
|
||||
}
|
||||
|
||||
cl_half half_plus(cl_half a, cl_half b)
|
||||
{
|
||||
return HFF(std::plus<float>()(HTF(a), HTF(b)));
|
||||
@@ -101,14 +92,7 @@ int verify_fp(std::vector<T> (&input)[2], std::vector<T> &output,
|
||||
T r = test.ref(inA[i], inB[i]);
|
||||
bool both_nan = false;
|
||||
|
||||
if (std::is_same<T, cl_half>::value)
|
||||
{
|
||||
both_nan = isHalfNan(r) && isHalfNan(output[i]);
|
||||
}
|
||||
else if (std::is_floating_point<T>::value)
|
||||
{
|
||||
both_nan = std::isnan(r) && std::isnan(output[i]);
|
||||
}
|
||||
both_nan = isnan_fp(r) && isnan_fp(output[i]);
|
||||
|
||||
// If not both nan, check if the result is the same
|
||||
if (!both_nan && (r != output[i]))
|
||||
|
||||
@@ -100,8 +100,8 @@ REGISTER_TEST(hostptr)
|
||||
cl_image_format img_format;
|
||||
cl_uchar *rgba8_inptr, *rgba8_outptr;
|
||||
void *lock_buffer;
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
size_t img_width = 512;
|
||||
size_t img_height = 512;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
RoundingMode oldRoundMode;
|
||||
|
||||
@@ -56,8 +56,9 @@ __kernel void test_if(__global int *src, __global int *dst)
|
||||
int verify_if(std::vector<cl_int> input, std::vector<cl_int> output)
|
||||
{
|
||||
const cl_int results[] = {
|
||||
0x12345678, 0x23456781, 0x34567812, 0x45678123,
|
||||
0x56781234, 0x67812345, 0x78123456, 0x81234567,
|
||||
(cl_int)0x12345678, (cl_int)0x23456781, (cl_int)0x34567812,
|
||||
(cl_int)0x45678123, (cl_int)0x56781234, (cl_int)0x67812345,
|
||||
(cl_int)0x78123456, (cl_int)0x81234567,
|
||||
};
|
||||
|
||||
auto predicate = [&results](cl_int a, cl_int b) {
|
||||
|
||||
@@ -144,8 +144,8 @@ verify_byte_image(unsigned char *image, unsigned char *outptr, int w, int h, int
|
||||
|
||||
REGISTER_TEST(image_multipass_integer_coord)
|
||||
{
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
size_t img_width = 512;
|
||||
size_t img_height = 512;
|
||||
cl_image_format img_format;
|
||||
|
||||
int num_input_streams = 8;
|
||||
@@ -397,8 +397,8 @@ REGISTER_TEST(image_multipass_integer_coord)
|
||||
|
||||
REGISTER_TEST(image_multipass_float_coord)
|
||||
{
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
size_t img_width = 512;
|
||||
size_t img_height = 512;
|
||||
cl_image_format img_format;
|
||||
|
||||
int num_input_streams = 8;
|
||||
|
||||
@@ -35,9 +35,9 @@ static int test_imagearraycopy_single_format(
|
||||
std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
|
||||
imgptr{ nullptr, free };
|
||||
clMemWrapper buffer, image;
|
||||
const int img_width = 512;
|
||||
const int img_height = 512;
|
||||
const int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
|
||||
const size_t img_width = 512;
|
||||
const size_t img_height = 512;
|
||||
const size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
|
||||
size_t elem_size;
|
||||
size_t buffer_size;
|
||||
cl_int err;
|
||||
|
||||
@@ -111,8 +111,8 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context,
|
||||
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
|
||||
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
|
||||
clMemWrapper streams[6];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
size_t img_width = 512;
|
||||
size_t img_height = 512;
|
||||
int i, err;
|
||||
MTdataHolder d(gRandomSeed);
|
||||
|
||||
@@ -153,7 +153,7 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context,
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
void *p, *outp;
|
||||
int x, y, delta_w = img_width / 8, delta_h = img_height / 16;
|
||||
size_t x, y, delta_w = img_width / 8, delta_h = img_height / 16;
|
||||
|
||||
switch (i)
|
||||
{
|
||||
@@ -197,10 +197,11 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context,
|
||||
copy_origin, copy_region, 0, NULL, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("Copy %d (origin [%d, %d], size [%d, %d], image "
|
||||
"size [%d x %d]) Failed\n",
|
||||
copy_number, x, y, delta_w, delta_h, img_width,
|
||||
img_height);
|
||||
log_error(
|
||||
"Copy %d (origin [%zu, %zu], size [%zu, %zu], image "
|
||||
"size [%zu x %zu]) Failed\n",
|
||||
copy_number, x, y, delta_w, delta_h, img_width,
|
||||
img_height);
|
||||
}
|
||||
test_error(err, "clEnqueueCopyImage failed");
|
||||
}
|
||||
|
||||
@@ -115,9 +115,9 @@ static int test_imagecopy3d_impl(cl_device_id device, cl_context context,
|
||||
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
|
||||
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
|
||||
clMemWrapper streams[6];
|
||||
int img_width = 128;
|
||||
int img_height = 128;
|
||||
int img_depth = 64;
|
||||
size_t img_width = 128;
|
||||
size_t img_height = 128;
|
||||
size_t img_depth = 64;
|
||||
int i;
|
||||
cl_int err;
|
||||
unsigned num_elements = img_width * img_height * img_depth * 4;
|
||||
|
||||
@@ -82,8 +82,8 @@ REGISTER_TEST(imagenpot)
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3], local_threads[3];
|
||||
size_t local_workgroup_size;
|
||||
int img_width;
|
||||
int img_height;
|
||||
size_t img_width;
|
||||
size_t img_height;
|
||||
int err;
|
||||
cl_uint m;
|
||||
size_t max_local_workgroup_size[3];
|
||||
|
||||
@@ -123,15 +123,15 @@ REGISTER_TEST(imagerandomcopy)
|
||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||
float *rgbafp_inptr, *rgbafp_outptr;
|
||||
clMemWrapper streams[6];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
size_t img_width = 512;
|
||||
size_t img_height = 512;
|
||||
int i, j;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
log_info("Testing with image %d x %d.\n", img_width, img_height);
|
||||
log_info("Testing with image %zu x %zu.\n", img_width, img_height);
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
|
||||
@@ -191,8 +191,8 @@ REGISTER_TEST(imagerandomcopy)
|
||||
}
|
||||
|
||||
size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1};
|
||||
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
|
||||
// err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL);
|
||||
err = clEnqueueWriteImage(queue, streams[i * 2], CL_TRUE, origin,
|
||||
region, 0, 0, p, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteImage failed");
|
||||
|
||||
for (j=0; j<NUM_COPIES; j++)
|
||||
|
||||
@@ -195,8 +195,8 @@ REGISTER_TEST(imagereadwrite)
|
||||
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
|
||||
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
|
||||
clMemWrapper streams[3];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
size_t img_width = 512;
|
||||
size_t img_height = 512;
|
||||
int num_tries = 200;
|
||||
int i, j, err;
|
||||
MTdataHolder d(gRandomSeed);
|
||||
@@ -242,10 +242,10 @@ REGISTER_TEST(imagereadwrite)
|
||||
|
||||
for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++)
|
||||
{
|
||||
int x = (int)get_random_float(0, img_width, d);
|
||||
int y = (int)get_random_float(0, img_height, d);
|
||||
int w = (int)get_random_float(1, (img_width - x), d);
|
||||
int h = (int)get_random_float(1, (img_height - y), d);
|
||||
size_t x = (size_t)get_random_float(0, img_width, d);
|
||||
size_t y = (size_t)get_random_float(0, img_height, d);
|
||||
size_t w = (size_t)get_random_float(1, (img_width - x), d);
|
||||
size_t h = (size_t)get_random_float(1, (img_height - y), d);
|
||||
size_t input_pitch;
|
||||
int set_input_pitch = (int)(genrand_int32(d) & 0x01);
|
||||
int packed_update = (int)(genrand_int32(d) & 0x01);
|
||||
@@ -386,7 +386,8 @@ REGISTER_TEST(imagereadwrite)
|
||||
img_width, img_height);
|
||||
if (err)
|
||||
{
|
||||
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
|
||||
log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x,
|
||||
y, w, h, (int)input_pitch, (int)i);
|
||||
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
|
||||
}
|
||||
break;
|
||||
@@ -396,7 +397,8 @@ REGISTER_TEST(imagereadwrite)
|
||||
img_width, img_height);
|
||||
if (err)
|
||||
{
|
||||
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
|
||||
log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x,
|
||||
y, w, h, (int)input_pitch, (int)i);
|
||||
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
|
||||
}
|
||||
break;
|
||||
@@ -406,7 +408,8 @@ REGISTER_TEST(imagereadwrite)
|
||||
img_width, img_height);
|
||||
if (err)
|
||||
{
|
||||
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
|
||||
log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x,
|
||||
y, w, h, (int)input_pitch, (int)i);
|
||||
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -205,10 +205,10 @@ REGISTER_TEST(imagereadwrite3d)
|
||||
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
|
||||
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
|
||||
clMemWrapper streams[3];
|
||||
int img_width = 64;
|
||||
int img_height = 64;
|
||||
int img_depth = 32;
|
||||
int img_slice = img_width * img_height;
|
||||
size_t img_width = 64;
|
||||
size_t img_height = 64;
|
||||
size_t img_depth = 32;
|
||||
size_t img_slice = img_width * img_height;
|
||||
int num_tries = 30;
|
||||
int i, j, err;
|
||||
MTdataHolder mtData(gRandomSeed);
|
||||
@@ -257,12 +257,12 @@ REGISTER_TEST(imagereadwrite3d)
|
||||
|
||||
for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++)
|
||||
{
|
||||
int x = (int)get_random_float(0, (float)img_width - 1, mtData);
|
||||
int y = (int)get_random_float(0, (float)img_height - 1, mtData);
|
||||
int z = (int)get_random_float(0, (float)img_depth - 1, mtData);
|
||||
int w = (int)get_random_float(1, (float)(img_width - x), mtData);
|
||||
int h = (int)get_random_float(1, (float)(img_height - y), mtData);
|
||||
int d = (int)get_random_float(1, (float)(img_depth - z), mtData);
|
||||
size_t x = (size_t)get_random_float(0, (float)img_width - 1, mtData);
|
||||
size_t y = (size_t)get_random_float(0, (float)img_height - 1, mtData);
|
||||
size_t z = (size_t)get_random_float(0, (float)img_depth - 1, mtData);
|
||||
size_t w = (size_t)get_random_float(1, (float)(img_width - x), mtData);
|
||||
size_t h = (size_t)get_random_float(1, (float)(img_height - y), mtData);
|
||||
size_t d = (size_t)get_random_float(1, (float)(img_depth - z), mtData);
|
||||
size_t input_pitch, input_slice_pitch;
|
||||
int set_input_pitch = (int)(genrand_int32(mtData) & 0x01);
|
||||
int packed_update = (int)(genrand_int32(mtData) & 0x01);
|
||||
@@ -401,7 +401,10 @@ REGISTER_TEST(imagereadwrite3d)
|
||||
img_width, img_height, img_depth);
|
||||
if (err)
|
||||
{
|
||||
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
|
||||
log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, "
|
||||
"slice_pitch=%d, try=%d\n",
|
||||
x, y, z, w, h, d, (int)input_pitch,
|
||||
(int)input_slice_pitch, (int)i);
|
||||
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
|
||||
}
|
||||
break;
|
||||
@@ -411,7 +414,10 @@ REGISTER_TEST(imagereadwrite3d)
|
||||
img_width, img_height, img_depth);
|
||||
if (err)
|
||||
{
|
||||
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
|
||||
log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, "
|
||||
"slice_pitch=%d, try=%d\n",
|
||||
x, y, z, w, h, d, (int)input_pitch,
|
||||
(int)input_slice_pitch, (int)i);
|
||||
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
|
||||
}
|
||||
break;
|
||||
@@ -421,7 +427,10 @@ REGISTER_TEST(imagereadwrite3d)
|
||||
img_width, img_height, img_depth);
|
||||
if (err)
|
||||
{
|
||||
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
|
||||
log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, "
|
||||
"slice_pitch=%d, try=%d\n",
|
||||
x, y, z, w, h, d, (int)input_pitch,
|
||||
(int)input_slice_pitch, (int)i);
|
||||
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -66,7 +66,7 @@ REGISTER_TEST(kernel_call_kernel_function)
|
||||
clKernelWrapper kernel1, kernel2, kernel_to_call;
|
||||
clMemWrapper streams[2];
|
||||
|
||||
size_t threads[] = {num_elements,1,1};
|
||||
size_t threads[] = { static_cast<size_t>(num_elements), 1, 1 };
|
||||
cl_int *input, *output, *expected;
|
||||
cl_int times = 4;
|
||||
int pass = 0;
|
||||
|
||||
@@ -117,8 +117,8 @@ REGISTER_TEST(mri_multiple)
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[2];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
size_t img_width = 512;
|
||||
size_t img_height = 512;
|
||||
int i, err;
|
||||
MTdata d;
|
||||
|
||||
|
||||
@@ -100,8 +100,8 @@ REGISTER_TEST(mri_one)
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[2];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
size_t img_width = 512;
|
||||
size_t img_height = 512;
|
||||
int i, err;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {img_width, img_height, 1};
|
||||
|
||||
@@ -1264,7 +1264,7 @@ static int l_write_read_for_type(cl_device_id device, cl_context context,
|
||||
}
|
||||
|
||||
cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
|
||||
queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
|
||||
queue, read_mem, CL_TRUE, CL_MAP_WRITE, 0, read_data_size, 0, 0,
|
||||
0, 0);
|
||||
memset(read_data, -1, read_data_size);
|
||||
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
|
||||
@@ -1503,7 +1503,7 @@ static int l_init_write_read_for_type(cl_device_id device, cl_context context,
|
||||
clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
|
||||
|
||||
cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
|
||||
queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
|
||||
queue, read_mem, CL_TRUE, CL_MAP_WRITE, 0, read_data_size, 0, 0,
|
||||
0, 0);
|
||||
memset(read_data, -1, read_data_size);
|
||||
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
|
||||
|
||||
@@ -476,7 +476,7 @@ struct TestWorkItemFnsOutOfRange
|
||||
maxWorkItemSizes[2] };
|
||||
// check if maximum work group size for current dimention is not
|
||||
// exceeded
|
||||
cl_uint work_group_size = max_workgroup_size + 1;
|
||||
size_t work_group_size = max_workgroup_size + 1;
|
||||
while (max_workgroup_size < work_group_size && work_group_size != 1)
|
||||
{
|
||||
work_group_size = 1;
|
||||
@@ -492,9 +492,9 @@ struct TestWorkItemFnsOutOfRange
|
||||
|
||||
// compute max number of work groups based on buffer size and max
|
||||
// group size
|
||||
cl_uint max_work_groups = testData.size() / work_group_size;
|
||||
size_t max_work_groups = testData.size() / work_group_size;
|
||||
// take into account number of dimentions
|
||||
cl_uint work_groups_per_dim =
|
||||
size_t work_groups_per_dim =
|
||||
std::max(1, (int)pow(max_work_groups, 1.f / dim));
|
||||
|
||||
for (size_t j = 0; j < dim; j++)
|
||||
|
||||
@@ -24,8 +24,9 @@
|
||||
|
||||
#include "CL/cl_half.h"
|
||||
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
|
||||
#define MAX_HOST_THREADS GetThreadCount()
|
||||
@@ -74,9 +75,11 @@ extern int
|
||||
gMaxDeviceThreads; // maximum number of threads executed on OCL device
|
||||
extern cl_device_atomic_capabilities gAtomicMemCap,
|
||||
gAtomicFenceCap; // atomic memory and fence capabilities for this device
|
||||
|
||||
extern cl_half_rounding_mode gHalfRoundingMode;
|
||||
extern bool gFloatAtomicsSupported;
|
||||
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
|
||||
extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps;
|
||||
|
||||
extern const char *
|
||||
get_memory_order_type_name(TExplicitMemoryOrderType orderType);
|
||||
@@ -174,6 +177,13 @@ public:
|
||||
{
|
||||
return false;
|
||||
}
|
||||
virtual bool
|
||||
IsTestNotAsExpected(const HostDataType &expected,
|
||||
const std::vector<HostAtomicType> &testValues,
|
||||
cl_uint whichDestValue)
|
||||
{
|
||||
return expected != testValues[whichDestValue];
|
||||
}
|
||||
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||
MTdata d)
|
||||
{
|
||||
@@ -883,14 +893,15 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
|
||||
header += std::string("__global volatile ") + aTypeName + " destMemory["
|
||||
+ ss.str() + "] = {\n";
|
||||
ss.str("");
|
||||
|
||||
if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
|
||||
!= TYPE_ATOMIC_HALF)
|
||||
ss << _startValue;
|
||||
else
|
||||
== TYPE_ATOMIC_FLOAT)
|
||||
ss << std::setprecision(10) << _startValue;
|
||||
else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
|
||||
== TYPE_ATOMIC_HALF)
|
||||
ss << static_cast<HostDataType>(
|
||||
cl_half_to_float(static_cast<cl_half>(_startValue)));
|
||||
|
||||
else
|
||||
ss << _startValue;
|
||||
for (cl_uint i = 0; i < maxNumDestItems; i++)
|
||||
{
|
||||
if (aTypeName == "atomic_flag")
|
||||
@@ -1449,7 +1460,7 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
|
||||
startRefValues.size() ? &startRefValues[0] : 0, i))
|
||||
break; // no expected value function provided
|
||||
|
||||
if (expected != destItems[i])
|
||||
if (IsTestNotAsExpected(expected, destItems, i))
|
||||
{
|
||||
std::stringstream logLine;
|
||||
logLine << "ERROR: Result " << i
|
||||
|
||||
@@ -17,6 +17,9 @@
|
||||
#define HOST_ATOMICS_H_
|
||||
|
||||
#include "harness/testHarness.h"
|
||||
#include <mutex>
|
||||
|
||||
#include "CL/cl_half.h"
|
||||
|
||||
#ifdef WIN32
|
||||
#include "Windows.h"
|
||||
@@ -87,6 +90,8 @@ enum TExplicitMemoryOrderType
|
||||
|
||||
#define HOST_FLAG cl_int
|
||||
|
||||
extern cl_half_rounding_mode gHalfRoundingMode;
|
||||
|
||||
// host atomic functions
|
||||
void host_atomic_thread_fence(TExplicitMemoryOrderType order);
|
||||
|
||||
@@ -94,28 +99,51 @@ template <typename AtomicType, typename CorrespondingType>
|
||||
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
|
||||
TExplicitMemoryOrderType order)
|
||||
{
|
||||
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
|
||||
{
|
||||
static std::mutex mx;
|
||||
std::lock_guard<std::mutex> lock(mx);
|
||||
CorrespondingType old_value = *a;
|
||||
*a += c;
|
||||
return old_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
||||
return InterlockedExchangeAdd(a, c);
|
||||
return InterlockedExchangeAdd(a, c);
|
||||
#elif defined(__GNUC__)
|
||||
return __sync_fetch_and_add(a, c);
|
||||
return __sync_fetch_and_add(a, c);
|
||||
#else
|
||||
log_info("Host function not implemented: atomic_fetch_add\n");
|
||||
return 0;
|
||||
log_info("Host function not implemented: atomic_fetch_add\n");
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template <typename AtomicType, typename CorrespondingType>
|
||||
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
|
||||
TExplicitMemoryOrderType order)
|
||||
{
|
||||
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
||||
return InterlockedExchangeSubtract(a, c);
|
||||
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
|
||||
{
|
||||
static std::mutex mx;
|
||||
std::lock_guard<std::mutex> lock(mx);
|
||||
CorrespondingType old_value = *a;
|
||||
*a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)),
|
||||
gHalfRoundingMode);
|
||||
return old_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
|
||||
return InterlockedExchangeSubtract(a, c);
|
||||
#elif defined(__GNUC__)
|
||||
return __sync_fetch_and_sub(a, c);
|
||||
return __sync_fetch_and_sub(a, c);
|
||||
#else
|
||||
log_info("Host function not implemented: atomic_fetch_sub\n");
|
||||
return 0;
|
||||
log_info("Host function not implemented: atomic_fetch_sub\n");
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template <typename AtomicType, typename CorrespondingType>
|
||||
@@ -144,19 +172,34 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
|
||||
TExplicitMemoryOrderType order_success,
|
||||
TExplicitMemoryOrderType order_failure)
|
||||
{
|
||||
CorrespondingType tmp;
|
||||
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
||||
tmp = InterlockedCompareExchange(a, desired, *expected);
|
||||
CorrespondingType tmp;
|
||||
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
|
||||
{
|
||||
static std::mutex mtx;
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
tmp = *reinterpret_cast<volatile float *>(a);
|
||||
if (tmp == *expected)
|
||||
{
|
||||
*reinterpret_cast<volatile float *>(a) = desired;
|
||||
return true;
|
||||
}
|
||||
*expected = tmp;
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
|
||||
|
||||
tmp = InterlockedCompareExchange(a, desired, *expected);
|
||||
#elif defined(__GNUC__)
|
||||
tmp = __sync_val_compare_and_swap(a, *expected, desired);
|
||||
tmp = __sync_val_compare_and_swap(a, *expected, desired);
|
||||
#else
|
||||
log_info("Host function not implemented: atomic_compare_exchange\n");
|
||||
tmp = 0;
|
||||
log_info("Host function not implemented: atomic_compare_exchange\n");
|
||||
tmp = 0;
|
||||
#endif
|
||||
if(tmp == *expected)
|
||||
return true;
|
||||
*expected = tmp;
|
||||
return false;
|
||||
if (tmp == *expected) return true;
|
||||
*expected = tmp;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename AtomicType, typename CorrespondingType>
|
||||
|
||||
@@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap,
|
||||
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
|
||||
bool gFloatAtomicsSupported = false;
|
||||
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
|
||||
cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
|
||||
|
||||
test_status InitCL(cl_device_id device) {
|
||||
auto version = get_device_cl_version(device);
|
||||
@@ -132,6 +133,12 @@ test_status InitCL(cl_device_id device) {
|
||||
if (is_extension_available(device, "cl_ext_float_atomics"))
|
||||
{
|
||||
gFloatAtomicsSupported = true;
|
||||
|
||||
cl_int error = clGetDeviceInfo(
|
||||
device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
|
||||
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
|
||||
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
|
||||
|
||||
if (is_extension_available(device, "cl_khr_fp16"))
|
||||
{
|
||||
cl_int error = clGetDeviceInfo(
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -8,7 +8,6 @@ set(VULKAN_WRAPPER_SOURCES
|
||||
# needed by Vulkan wrapper to compile
|
||||
set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
|
||||
add_cxx_flag_if_supported(-Wmisleading-indentation)
|
||||
add_cxx_flag_if_supported(-Wno-narrowing)
|
||||
add_cxx_flag_if_supported(-Wno-format)
|
||||
add_cxx_flag_if_supported(-Wno-error)
|
||||
add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive
|
||||
|
||||
@@ -863,7 +863,7 @@ clExternalMemoryImage::clExternalMemoryImage(
|
||||
size_t clImageFormatSize;
|
||||
cl_image_desc image_desc;
|
||||
memset(&image_desc, 0x0, sizeof(cl_image_desc));
|
||||
cl_image_format img_format = { 0 };
|
||||
img_format = { 0 };
|
||||
const VkImageCreateInfo VulkanImageCreateInfo =
|
||||
image2D.getVkImageCreateInfo();
|
||||
|
||||
@@ -1233,7 +1233,7 @@ int clExternalExportableSemaphore::signal(cl_command_queue cmd_queue)
|
||||
import.fd = fd;
|
||||
import.pNext = nullptr;
|
||||
import.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
|
||||
import.flags = 0;
|
||||
import.flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT;
|
||||
|
||||
VkResult res =
|
||||
vkImportSemaphoreFdKHR(m_deviceSemaphore.getDevice(), &import);
|
||||
|
||||
@@ -106,6 +106,7 @@ protected:
|
||||
cl_mem m_externalMemory;
|
||||
int fd;
|
||||
void *handle;
|
||||
cl_image_format img_format;
|
||||
clExternalMemoryImage();
|
||||
|
||||
public:
|
||||
@@ -117,6 +118,7 @@ public:
|
||||
cl_device_id deviceId);
|
||||
virtual ~clExternalMemoryImage();
|
||||
cl_mem getExternalMemoryImage();
|
||||
cl_image_format getImageFormat() { return img_format; };
|
||||
};
|
||||
|
||||
class clExternalSemaphore {
|
||||
|
||||
@@ -243,6 +243,8 @@ getSupportedVulkanExternalMemoryHandleTypeList(
|
||||
VkPhysicalDeviceExternalBufferInfo buffer_info = {};
|
||||
buffer_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO;
|
||||
buffer_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
||||
buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT
|
||||
| VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||
|
||||
VkExternalBufferProperties buffer_properties = {};
|
||||
buffer_properties.sType = VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES;
|
||||
@@ -307,7 +309,9 @@ getSupportedVulkanExternalSemaphoreHandleTypeList(const VulkanDevice &vkDevice)
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO, nullptr,
|
||||
handle_type.vk_type
|
||||
};
|
||||
VkExternalSemaphoreProperties query_result = {};
|
||||
VkExternalSemaphoreProperties query_result = {
|
||||
VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES
|
||||
};
|
||||
vkGetPhysicalDeviceExternalSemaphorePropertiesKHR(
|
||||
vkDevice.getPhysicalDevice(), &handle_query, &query_result);
|
||||
if (query_result.externalSemaphoreFeatures
|
||||
|
||||
@@ -17,10 +17,31 @@ set(${MODULE_NAME}_SOURCES
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
|
||||
# Include the relative paths to SPV assembly files
|
||||
configure_file(spirv_asm_list.txt ${CMAKE_CURRENT_BINARY_DIR}/spirv_asm_list.txt)
|
||||
include(${CMAKE_CURRENT_BINARY_DIR}/spirv_asm_list.txt)
|
||||
|
||||
# Determine the corresponding binary outputs to the SPV assembly input files
|
||||
set(COMPILER_ASM_REL_PATH spirv_asm)
|
||||
set(COMPILER_ASM_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${COMPILER_ASM_REL_PATH}")
|
||||
set(COMPILER_SPV_PATH "${CMAKE_CURRENT_BINARY_DIR}/spirv_bin")
|
||||
|
||||
# Copy the required test include directories into the build directory.
|
||||
if(NOT DEFINED COMPILER_TEST_RESOURCES)
|
||||
set(COMPILER_TEST_RESOURCES $<TARGET_FILE_DIR:${${MODULE_NAME}_OUT}>)
|
||||
endif()
|
||||
|
||||
set(COMPILER_SPV_EXTRA "")
|
||||
if(SPIRV_TOOLS_DIR AND IS_ABSOLUTE "${SPIRV_TOOLS_DIR}" AND
|
||||
IS_DIRECTORY "${SPIRV_TOOLS_DIR}")
|
||||
message("Using SPIR-V tools from '${SPIRV_TOOLS_DIR}'")
|
||||
set(COMPILER_SPV_EXTRA "--assembler=${SPIRV_TOOLS_DIR}/spirv-as" "--validator=${SPIRV_TOOLS_DIR}/spirv-val")
|
||||
endif()
|
||||
set(COMPILER_ASSEMBLY_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../spirv_new/spirv_asm/assemble_spirv.py)
|
||||
|
||||
include(CMakePrintHelpers)
|
||||
cmake_print_variables(COMPILER_ASSEMBLY_SCRIPT)
|
||||
|
||||
add_custom_command(
|
||||
COMMENT "Copying compiler test resources..."
|
||||
TARGET ${${MODULE_NAME}_OUT}
|
||||
@@ -30,7 +51,10 @@ add_custom_command(
|
||||
${COMPILER_TEST_RESOURCES}/includeTestDirectory
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
||||
${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory
|
||||
${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory)
|
||||
${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory
|
||||
COMMAND ${COMPILER_ASSEMBLY_SCRIPT} --source-dir "${COMPILER_ASM_PATH}" --output-dir "${COMPILER_SPV_PATH}" ${COMPILER_SPV_EXTRA} --verbose
|
||||
DEPENDS ${COMPILER_ASSEMBLY_SCRIPT} ${COMPILER_ASM}
|
||||
VERBATIM)
|
||||
|
||||
include(GNUInstallDirs)
|
||||
|
||||
|
||||
4
test_conformance/compiler/spirv_asm_list.txt
Normal file
4
test_conformance/compiler/spirv_asm_list.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
set(COMPILER_SPIRV_NEW_ASM
|
||||
compiler_spirv_asm/write_kernel.spvasm32
|
||||
compiler_spirv_asm/write_kernel.spvasm64
|
||||
)
|
||||
@@ -14,6 +14,9 @@
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <time.h>
|
||||
#elif defined(__linux__) || defined(__APPLE__)
|
||||
@@ -3020,15 +3023,6 @@ REGISTER_TEST(execute_after_embedded_header_link)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(__APPLE__) || defined(__linux)
|
||||
#define _mkdir(x) mkdir(x, S_IRWXU)
|
||||
#define _chdir chdir
|
||||
#define _rmdir rmdir
|
||||
#define _unlink unlink
|
||||
#else
|
||||
#include <direct.h>
|
||||
#endif
|
||||
|
||||
REGISTER_TEST(execute_after_included_header_link)
|
||||
{
|
||||
int error;
|
||||
@@ -3047,100 +3041,60 @@ REGISTER_TEST(execute_after_included_header_link)
|
||||
}
|
||||
|
||||
/* setup */
|
||||
#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__))
|
||||
/* Some tests systems doesn't allow one to write in the test directory */
|
||||
if (_chdir("/tmp") != 0)
|
||||
std::error_code ec;
|
||||
auto temp_dir_path = std::filesystem::temp_directory_path(ec);
|
||||
if (ec)
|
||||
{
|
||||
log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n",
|
||||
__FILE__, __LINE__);
|
||||
log_error("ERROR: Unable to get the temporary directory path\n");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
if (_mkdir("foo") != 0)
|
||||
temp_dir_path = temp_dir_path / "foo" / "bar";
|
||||
std::filesystem::create_directories(temp_dir_path, ec);
|
||||
if (ec)
|
||||
{
|
||||
log_error("ERROR: Unable to create directory foo! (in %s:%d)\n",
|
||||
__FILE__, __LINE__);
|
||||
log_error("ERROR: Unable to create directory: %s, error: %d (%s)\n",
|
||||
temp_dir_path.u8string().c_str(), ec.value(),
|
||||
ec.message().c_str());
|
||||
return -1;
|
||||
}
|
||||
if (_mkdir("foo/bar") != 0)
|
||||
{
|
||||
log_error("ERROR: Unable to create directory foo/bar! (in %s:%d)\n",
|
||||
__FILE__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
if (_chdir("foo/bar") != 0)
|
||||
{
|
||||
log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n",
|
||||
__FILE__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
FILE *simple_header_file = fopen(simple_header_name, "w");
|
||||
|
||||
const auto simple_header_path = temp_dir_path / simple_header_name;
|
||||
FILE *simple_header_file =
|
||||
fopen(simple_header_path.u8string().c_str(), "w");
|
||||
if (simple_header_file == NULL)
|
||||
{
|
||||
log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n",
|
||||
simple_header_name, __FILE__, __LINE__);
|
||||
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
if (fprintf(simple_header_file, "%s", simple_header) < 0)
|
||||
{
|
||||
log_error(
|
||||
"ERROR: Unable to write to simple header file %s! (in %s:%d)\n",
|
||||
simple_header_name, __FILE__, __LINE__);
|
||||
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
if (fclose(simple_header_file) != 0)
|
||||
{
|
||||
log_error("ERROR: Unable to close simple header file %s! (in %s:%d)\n",
|
||||
simple_header_name, __FILE__, __LINE__);
|
||||
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
if (_chdir("../..") != 0)
|
||||
{
|
||||
log_error("ERROR: Unable to change to original working directory! (in "
|
||||
"%s:%d)\n",
|
||||
__FILE__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__))
|
||||
error = clCompileProgram(program, 1, &device, "-I/tmp/foo/bar", 0, NULL,
|
||||
|
||||
const std::string include_path =
|
||||
std::string("-I") + temp_dir_path.generic_u8string();
|
||||
error = clCompileProgram(program, 1, &device, include_path.c_str(), 0, NULL,
|
||||
NULL, NULL, NULL);
|
||||
#else
|
||||
error = clCompileProgram(program, 1, &device, "-Ifoo/bar", 0, NULL, NULL,
|
||||
NULL, NULL);
|
||||
#endif
|
||||
test_error(error,
|
||||
"Unable to compile a simple program with included header");
|
||||
|
||||
/* cleanup */
|
||||
if (_chdir("foo/bar") != 0)
|
||||
std::filesystem::remove_all(temp_dir_path, ec);
|
||||
if (ec)
|
||||
{
|
||||
log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n",
|
||||
__FILE__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
if (_unlink(simple_header_name) != 0)
|
||||
{
|
||||
log_error("ERROR: Unable to remove simple header file %s! (in %s:%d)\n",
|
||||
simple_header_name, __FILE__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
if (_chdir("../..") != 0)
|
||||
{
|
||||
log_error("ERROR: Unable to change to original working directory! (in "
|
||||
"%s:%d)\n",
|
||||
__FILE__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
if (_rmdir("foo/bar") != 0)
|
||||
{
|
||||
log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n",
|
||||
__FILE__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
if (_rmdir("foo") != 0)
|
||||
{
|
||||
log_error("ERROR: Unable to remove directory foo! (in %s:%d)\n",
|
||||
__FILE__, __LINE__);
|
||||
log_error("ERROR: Unable to delete directory: %s, error: %d (%s)",
|
||||
temp_dir_path.u8string().c_str(), ec.value(),
|
||||
ec.message().c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
@@ -95,7 +95,9 @@ const char *known_extensions[] = {
|
||||
"cl_khr_command_buffer",
|
||||
"cl_khr_command_buffer_mutable_dispatch",
|
||||
"cl_khr_command_buffer_multi_device",
|
||||
"cl_khr_external_memory_android_hardware_buffer"
|
||||
"cl_khr_external_memory_android_hardware_buffer",
|
||||
"cl_khr_unified_svm",
|
||||
"cl_khr_spirv_queries"
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
|
||||
@@ -656,6 +656,32 @@ static int test_feature_macro_integer_dot_product_input_4x8bit(
|
||||
compiler_status, supported);
|
||||
}
|
||||
|
||||
static int test_feature_macro_ext_image_unorm_int_2_101010(
|
||||
cl_device_id deviceID, cl_context context, std::string test_macro_name,
|
||||
cl_bool& supported)
|
||||
{
|
||||
cl_int error = TEST_FAIL;
|
||||
cl_bool api_status = CL_TRUE;
|
||||
cl_bool compiler_status;
|
||||
log_info("\n%s ...\n", test_macro_name.c_str());
|
||||
|
||||
if (!is_extension_available(deviceID, "cl_ext_image_unorm_int_2_101010"))
|
||||
{
|
||||
supported = false;
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
error = check_compiler_feature_info(deviceID, context, test_macro_name,
|
||||
compiler_status);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
return error;
|
||||
}
|
||||
|
||||
return feature_macro_verify_results(test_macro_name, api_status,
|
||||
compiler_status, supported);
|
||||
}
|
||||
|
||||
static int test_feature_macro_int64(cl_device_id deviceID, cl_context context,
|
||||
std::string test_macro_name,
|
||||
cl_bool& supported)
|
||||
@@ -833,6 +859,7 @@ REGISTER_TEST_VERSION(features_macro, Version(3, 0))
|
||||
NEW_FEATURE_MACRO_TEST(int64);
|
||||
NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit);
|
||||
NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit_packed);
|
||||
NEW_FEATURE_MACRO_TEST(ext_image_unorm_int_2_101010);
|
||||
|
||||
error |= test_consistency_c_features_list(device, supported_features_vec);
|
||||
|
||||
|
||||
@@ -25,6 +25,43 @@
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
|
||||
#if defined(_WIN32)
|
||||
const std::string slash = "\\";
|
||||
#else
|
||||
const std::string slash = "/";
|
||||
#endif
|
||||
std::string compilerSpvBinaries = "test_conformance" + slash + "compiler"
|
||||
+ slash + "spirv_bin" + slash + "write_kernel.spv";
|
||||
|
||||
const std::string spvExt = ".spv";
|
||||
|
||||
std::vector<unsigned char> readBinary(const char *file_name)
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
ifstream file(file_name, ios::in | ios::binary | ios::ate);
|
||||
|
||||
std::vector<char> tmpBuffer(0);
|
||||
|
||||
if (file.is_open())
|
||||
{
|
||||
size_t size = file.tellg();
|
||||
tmpBuffer.resize(size);
|
||||
file.seekg(0, ios::beg);
|
||||
file.read(&tmpBuffer[0], size);
|
||||
file.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error("File %s not found\n", file_name);
|
||||
}
|
||||
|
||||
std::vector<unsigned char> result(tmpBuffer.begin(), tmpBuffer.end());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -299,18 +336,12 @@ public:
|
||||
throw unload_test_failure("Failure getting device address bits");
|
||||
}
|
||||
|
||||
switch (address_bits)
|
||||
{
|
||||
case 32:
|
||||
m_spirv_binary = write_kernel_32_spv.data();
|
||||
m_spirv_size = write_kernel_32_spv.size();
|
||||
break;
|
||||
case 64:
|
||||
m_spirv_binary = write_kernel_64_spv.data();
|
||||
m_spirv_size = write_kernel_64_spv.size();
|
||||
break;
|
||||
default: throw unload_test_failure("Invalid address bits");
|
||||
}
|
||||
std::vector<unsigned char> kernel_buffer;
|
||||
|
||||
std::string file_name =
|
||||
compilerSpvBinaries + std::to_string(address_bits);
|
||||
m_spirv_binary = readBinary(file_name.c_str());
|
||||
m_spirv_size = m_spirv_binary.size();
|
||||
}
|
||||
|
||||
void create() final
|
||||
@@ -320,7 +351,7 @@ public:
|
||||
assert(nullptr == m_program);
|
||||
|
||||
cl_int err = CL_INVALID_PLATFORM;
|
||||
m_program = m_CreateProgramWithIL(m_context, m_spirv_binary,
|
||||
m_program = m_CreateProgramWithIL(m_context, &m_spirv_binary[0],
|
||||
m_spirv_size, &err);
|
||||
if (CL_SUCCESS != err)
|
||||
throw unload_test_failure("clCreateProgramWithIL()", err);
|
||||
@@ -347,7 +378,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
void *m_spirv_binary;
|
||||
std::vector<unsigned char> m_spirv_binary;
|
||||
size_t m_spirv_size;
|
||||
bool m_enabled;
|
||||
|
||||
|
||||
@@ -4,47 +4,3 @@ static const char write_kernel_source[] = R"(
|
||||
kernel void write_kernel(global unsigned int *p) {
|
||||
*p = 42;
|
||||
})";
|
||||
|
||||
/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm64 */
|
||||
static std::array<unsigned char, 216> write_kernel_64_spv{
|
||||
{ 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x0e, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
|
||||
0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }
|
||||
};
|
||||
|
||||
/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm32 */
|
||||
static std::array<unsigned char, 216> write_kernel_32_spv{
|
||||
{ 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x0e, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
|
||||
0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }
|
||||
};
|
||||
|
||||
@@ -284,6 +284,11 @@ int main( int argc, const char **argv )
|
||||
|
||||
static int ParseArgs( int argc, const char **argv )
|
||||
{
|
||||
if (gListTests)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
gArgList = (const char **)calloc( argc, sizeof( char*) );
|
||||
|
||||
if( NULL == gArgList )
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
set(MODULE_NAME CONVERSIONS)
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
|
||||
add_cxx_flag_if_supported(-Wno-narrowing)
|
||||
endif()
|
||||
|
||||
set (${MODULE_NAME}_SOURCES
|
||||
Sleep.cpp test_conversions.cpp basic_test_conversions.cpp
|
||||
)
|
||||
|
||||
@@ -13,9 +13,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "harness/mathHelpers.h"
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/compat.h"
|
||||
#include "harness/ThreadPool.h"
|
||||
#include "harness/parseParameters.h"
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <sys/sysctl.h>
|
||||
@@ -52,17 +54,17 @@
|
||||
|
||||
#include "basic_test_conversions.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
#include <mmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#else // !_WIN32
|
||||
#else
|
||||
#if defined(__SSE__)
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSE2__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#endif // _WIN32
|
||||
#endif
|
||||
|
||||
cl_context gContext = NULL;
|
||||
cl_command_queue gQueue = NULL;
|
||||
@@ -76,7 +78,6 @@ cl_mem gInBuffer;
|
||||
cl_mem gOutBuffers[kCallStyleCount];
|
||||
size_t gComputeDevices = 0;
|
||||
uint32_t gDeviceFrequency = 0;
|
||||
int gWimpyMode = 0;
|
||||
int gWimpyReductionFactor = 128;
|
||||
int gSkipTesting = 0;
|
||||
int gForceFTZ = 0;
|
||||
@@ -955,24 +956,6 @@ void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
|
||||
// destroyed automatically soon after we exit.
|
||||
}
|
||||
|
||||
template <typename T> static bool isnan_fp(const T &v)
|
||||
{
|
||||
if (std::is_same<T, cl_half>::value)
|
||||
{
|
||||
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||
uint16_t h_mant = ((cl_half)v) & 0x3FF;
|
||||
return (h_exp == 0x1F && h_mant != 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
#if !defined(_WIN32)
|
||||
return std::isnan(v);
|
||||
#else
|
||||
return _isnan(v);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template <typename InType>
|
||||
void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType, void *input)
|
||||
{
|
||||
|
||||
@@ -80,7 +80,6 @@ extern int gHasDouble;
|
||||
extern int gTestDouble;
|
||||
extern int gHasHalfs;
|
||||
extern int gTestHalfs;
|
||||
extern int gWimpyMode;
|
||||
extern int gWimpyReductionFactor;
|
||||
extern int gSkipTesting;
|
||||
extern int gMinVectorSize;
|
||||
|
||||
@@ -343,7 +343,7 @@ float DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int(float f)
|
||||
volatile float x = f;
|
||||
float magicVal = magic[f < 0];
|
||||
|
||||
#if defined(__SSE__)
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
||||
// incorrectly
|
||||
__m128 v = _mm_set_ss(x);
|
||||
@@ -376,7 +376,7 @@ DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int_and_clamp(double f)
|
||||
{
|
||||
volatile double x = f;
|
||||
double magicVal = magic[f < 0];
|
||||
#if defined(__SSE2__) || defined(_MSC_VER)
|
||||
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
|
||||
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
|
||||
// incorrectly
|
||||
__m128d v = _mm_set_sd(x);
|
||||
@@ -479,7 +479,7 @@ void DataInfoSpec<InType, OutType, InFP, OutFP>::conv(OutType *out, InType *in)
|
||||
{
|
||||
if (std::is_same<cl_double, OutType>::value)
|
||||
{
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
double result;
|
||||
|
||||
if (std::is_same<cl_ulong, InType>::value)
|
||||
|
||||
@@ -112,6 +112,35 @@ int main(int argc, const char **argv)
|
||||
int error;
|
||||
|
||||
argc = parseCustomParam(argc, argv);
|
||||
if (gListTests)
|
||||
{
|
||||
for (unsigned dst = 0; dst < kTypeCount; dst++)
|
||||
{
|
||||
for (unsigned src = 0; src < kTypeCount; src++)
|
||||
{
|
||||
for (unsigned sat = 0; sat < 2; sat++)
|
||||
{
|
||||
// skip illegal saturated conversions to float type
|
||||
if (gSaturationNames[sat] == std::string("_sat")
|
||||
&& (gTypeNames[dst] == std::string("float")
|
||||
|| gTypeNames[dst] == std::string("half")
|
||||
|| gTypeNames[dst] == std::string("double")))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
for (unsigned rnd = 0; rnd < kRoundingModeCount; rnd++)
|
||||
{
|
||||
vlog("\t%s\n",
|
||||
(std::string(gTypeNames[dst])
|
||||
+ gSaturationNames[sat] + gRoundingModeNames[rnd]
|
||||
+ "_" + gTypeNames[src])
|
||||
.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (argc == -1)
|
||||
{
|
||||
return 1;
|
||||
@@ -218,7 +247,6 @@ static int ParseArgs(int argc, const char **argv)
|
||||
case 'h': gTestHalfs ^= 1; break;
|
||||
case 'l': gSkipTesting ^= 1; break;
|
||||
case 'm': gMultithread ^= 1; break;
|
||||
case 'w': gWimpyMode ^= 1; break;
|
||||
case '[':
|
||||
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
|
||||
break;
|
||||
@@ -287,14 +315,6 @@ static int ParseArgs(int argc, const char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
// Check for the wimpy mode environment variable
|
||||
if (getenv("CL_WIMPY_MODE"))
|
||||
{
|
||||
vlog("\n");
|
||||
vlog("*** Detected CL_WIMPY_MODE env ***\n");
|
||||
gWimpyMode = 1;
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
PrintArch();
|
||||
@@ -335,9 +355,6 @@ static void PrintUsage(void)
|
||||
vlog("\t\t-l\tToggle link check mode. When on, testing is skipped, and we "
|
||||
"just check to see that the kernels build. (Off by default.)\n");
|
||||
vlog("\t\t-m\tToggle Multithreading. (On by default.)\n");
|
||||
vlog("\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very "
|
||||
"small subset of the tests for each fn. NOT A VALID TEST! (Off by "
|
||||
"default.)\n");
|
||||
vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
|
||||
"1-12, default factor(%u)\n",
|
||||
gWimpyReductionFactor);
|
||||
|
||||
@@ -1,22 +1,4 @@
|
||||
if(WIN32)
|
||||
|
||||
set(D3D10_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include)
|
||||
|
||||
if(${ARCH} STREQUAL "i686")
|
||||
set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86)
|
||||
endif(${ARCH} STREQUAL "i686")
|
||||
|
||||
if(${ARCH} STREQUAL "x86_64")
|
||||
set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64)
|
||||
endif(${ARCH} STREQUAL "x86_64")
|
||||
|
||||
list(APPEND CLConform_INCLUDE_DIR ${D3D10_INCLUDE_DIR})
|
||||
include_directories (${CLConform_SOURCE_DIR}/test_common/harness
|
||||
${CLConform_INCLUDE_DIR} )
|
||||
link_directories(${CL_LIB_DIR}, ${D3D10_LIB_DIR})
|
||||
|
||||
list(APPEND CLConform_LIBRARIES d3d10 dxgi)
|
||||
|
||||
set(MODULE_NAME D3D10)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
@@ -28,10 +10,9 @@ set(${MODULE_NAME}_SOURCES
|
||||
harness.cpp
|
||||
)
|
||||
|
||||
set_source_files_properties(
|
||||
${MODULE_NAME}_SOURCES
|
||||
PROPERTIES LANGUAGE CXX)
|
||||
list(APPEND CLConform_LIBRARIES d3d10 dxgi)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
include(../CMakeCommon.txt)
|
||||
endif(WIN32)
|
||||
else()
|
||||
message(STATUS "D3D10 tests are only supported on Windows.")
|
||||
endif()
|
||||
|
||||
@@ -1,22 +1,4 @@
|
||||
if(WIN32)
|
||||
|
||||
set(D3D11_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include)
|
||||
|
||||
if(${ARCH} STREQUAL "i686")
|
||||
set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86)
|
||||
endif(${ARCH} STREQUAL "i686")
|
||||
|
||||
if(${ARCH} STREQUAL "x86_64")
|
||||
set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64)
|
||||
endif(${ARCH} STREQUAL "x86_64")
|
||||
|
||||
list(APPEND CLConform_INCLUDE_DIR ${D3D11_INCLUDE_DIR})
|
||||
include_directories (${CLConform_SOURCE_DIR}/test_common/harness
|
||||
${CLConform_INCLUDE_DIR} )
|
||||
link_directories(${CL_LIB_DIR}, ${D3D11_LIB_DIR})
|
||||
|
||||
list(APPEND CLConform_LIBRARIES d3d11 dxgi)
|
||||
|
||||
set(MODULE_NAME D3D11)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
@@ -28,10 +10,9 @@ set(${MODULE_NAME}_SOURCES
|
||||
harness.cpp
|
||||
)
|
||||
|
||||
set_source_files_properties(
|
||||
${MODULE_NAME}_SOURCES
|
||||
PROPERTIES LANGUAGE CXX)
|
||||
list(APPEND CLConform_LIBRARIES d3d11 dxgi)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
include(../CMakeCommon.txt)
|
||||
endif(WIN32)
|
||||
else()
|
||||
message(STATUS "D3D11 tests are only supported on Windows.")
|
||||
endif()
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <string.h>
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include "harness/parseParameters.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@@ -25,7 +26,6 @@
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
extern int gWimpyMode;
|
||||
|
||||
// clang-format off
|
||||
static const char* enqueue_simple_block[] = { R"(
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <string.h>
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include "harness/parseParameters.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@@ -25,7 +26,6 @@
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
extern int gWimpyMode;
|
||||
#define BITS_DEPTH 28
|
||||
|
||||
static const char* enqueue_flags_wait_kernel_simple[] =
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <string.h>
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include "harness/parseParameters.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@@ -24,10 +25,7 @@
|
||||
#include <time.h>
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
extern int gWimpyMode;
|
||||
static const char enqueue_block_multi_queue[] =
|
||||
NL "#define BLOCK_COMPLETED 0"
|
||||
NL "#define BLOCK_SUBMITTED 1"
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <string.h>
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include "harness/parseParameters.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
@@ -26,7 +27,6 @@
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
extern int gWimpyMode;
|
||||
static const char *helper_ndrange_1d_glo[] = {
|
||||
NL,
|
||||
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <string.h>
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include "harness/parseParameters.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@@ -25,7 +26,6 @@
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
extern int gWimpyMode;
|
||||
static int nestingLevel = 3;
|
||||
|
||||
static const char* enqueue_1D_wg_size_single[] =
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <string.h>
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include "harness/parseParameters.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@@ -25,7 +26,6 @@
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
extern int gWimpyMode;
|
||||
static const char* multi_queue_simple_block1[] =
|
||||
{
|
||||
NL, "void block_fn(size_t tid, int mul, __global int* res)"
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <string.h>
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include "harness/parseParameters.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
@@ -24,8 +25,6 @@
|
||||
#include "utils.h"
|
||||
#include <time.h>
|
||||
|
||||
extern int gWimpyMode;
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
|
||||
static const char* enqueue_block_first_kernel[] =
|
||||
|
||||
@@ -25,7 +25,6 @@
|
||||
#include "utils.h"
|
||||
|
||||
std::string gKernelName;
|
||||
int gWimpyMode = 0;
|
||||
|
||||
test_status InitCL(cl_device_id device) {
|
||||
auto version = get_device_cl_version(device);
|
||||
@@ -71,11 +70,6 @@ int main(int argc, const char *argv[])
|
||||
gKernelName = std::string(argv[i + 1]);
|
||||
argsRemoveNum += 2;
|
||||
}
|
||||
if (strcmp(argv[i], "-w") == 0 ){
|
||||
gWimpyMode = 1;
|
||||
argsRemoveNum += 1;
|
||||
}
|
||||
|
||||
|
||||
if (argsRemoveNum > 0) {
|
||||
for (int j = i; j < (argc - argsRemoveNum); ++j)
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <string.h>
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include "harness/parseParameters.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
@@ -27,7 +28,6 @@
|
||||
#ifdef CL_VERSION_2_0
|
||||
|
||||
static int gNestingLevel = 4;
|
||||
extern int gWimpyMode;
|
||||
|
||||
static const char* enqueue_nested_blocks_single[] =
|
||||
{
|
||||
|
||||
@@ -5,6 +5,9 @@
|
||||
add_subdirectory( cl_ext_cxx_for_opencl )
|
||||
add_subdirectory( cl_khr_command_buffer )
|
||||
add_subdirectory( cl_khr_dx9_media_sharing )
|
||||
if(ANDROID_PLATFORM GREATER 28)
|
||||
add_subdirectory( cl_khr_external_memory_ahb )
|
||||
endif ()
|
||||
add_subdirectory( cl_khr_external_memory_dma_buf )
|
||||
add_subdirectory( cl_khr_semaphore )
|
||||
add_subdirectory( cl_khr_kernel_clock )
|
||||
|
||||
@@ -435,3 +435,40 @@ bool InterleavedEnqueueTest::Skip()
|
||||
{
|
||||
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
|
||||
}
|
||||
|
||||
cl_int EnqueueAndReleaseTest::Run()
|
||||
{
|
||||
cl_int error = clCommandNDRangeKernelKHR(
|
||||
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr, nullptr);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
|
||||
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||
|
||||
cl_int pattern = 42;
|
||||
error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
|
||||
data_size(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, nullptr,
|
||||
nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
// Calls release on cl_command_buffer_khr handle inside wrapper class, and
|
||||
// sets the handle to nullptr, so that release doesn't get called again at
|
||||
// end of test when wrapper object is destroyed.
|
||||
command_buffer.reset();
|
||||
|
||||
std::vector<cl_int> output_data(num_elements);
|
||||
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
|
||||
output_data.data(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -128,6 +128,15 @@ struct InterleavedEnqueueTest : public BasicCommandBufferTest
|
||||
bool Skip() override;
|
||||
};
|
||||
|
||||
// Test releasing a command-buffer after it has been submitted for execution,
|
||||
// but before the user has waited on completion of the enqueue.
|
||||
struct EnqueueAndReleaseTest : public BasicCommandBufferTest
|
||||
{
|
||||
using BasicCommandBufferTest::BasicCommandBufferTest;
|
||||
|
||||
cl_int Run() override;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
int MakeAndRunTest(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
|
||||
@@ -44,3 +44,9 @@ REGISTER_TEST(explicit_flush)
|
||||
return MakeAndRunTest<ExplicitFlushTest>(device, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(enqueue_and_release)
|
||||
{
|
||||
return MakeAndRunTest<EnqueueAndReleaseTest>(device, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
|
||||
@@ -14,6 +14,8 @@ set(${MODULE_NAME}_SOURCES
|
||||
mutable_command_multiple_dispatches.cpp
|
||||
mutable_command_iterative_arg_update.cpp
|
||||
mutable_command_work_groups.cpp
|
||||
mutable_command_work_dim.cpp
|
||||
mutable_command_update_state.cpp
|
||||
../basic_command_buffer.cpp
|
||||
)
|
||||
|
||||
|
||||
@@ -135,7 +135,7 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
if (i >= update_global_size && global_work_size != resultData[i])
|
||||
{
|
||||
log_error("Data failed to verify: update_global_size != "
|
||||
log_error("Data failed to verify: global_work_size != "
|
||||
"resultData[%zu]=%d\n",
|
||||
i, resultData[i]);
|
||||
return TEST_FAIL;
|
||||
@@ -154,7 +154,7 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest
|
||||
|
||||
size_t info_global_size = 0;
|
||||
const size_t update_global_size = 3;
|
||||
const size_t sizeToAllocate = global_work_size;
|
||||
const size_t sizeToAllocate = global_work_size * sizeof(cl_int);
|
||||
const size_t num_elements = sizeToAllocate / sizeof(cl_int);
|
||||
cl_mutable_command_khr command = nullptr;
|
||||
};
|
||||
|
||||
@@ -116,26 +116,6 @@ struct PropertiesArray : public InfoMutableCommandBufferTest
|
||||
: InfoMutableCommandBufferTest(device, context, queue)
|
||||
{}
|
||||
|
||||
virtual bool Skip() override
|
||||
{
|
||||
Version device_version = get_device_cl_version(device);
|
||||
if ((device_version >= Version(3, 0))
|
||||
|| is_extension_available(device, "cl_khr_extended_versioning"))
|
||||
{
|
||||
|
||||
cl_version extension_version = get_extension_version(
|
||||
device, "cl_khr_command_buffer_mutable_dispatch");
|
||||
|
||||
if (extension_version != CL_MAKE_VERSION(0, 9, 3))
|
||||
{
|
||||
log_info("cl_khr_command_buffer_mutable_dispatch version 0.9.3 "
|
||||
"is required to run the test, skipping.\n ");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return InfoMutableCommandBufferTest::Skip();
|
||||
}
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_command_properties_khr props[] = {
|
||||
|
||||
@@ -297,6 +297,7 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
||||
{
|
||||
cl_int offset;
|
||||
std::vector<cl_int> output_buffer;
|
||||
std::vector<cl_int> updated_output_buffer;
|
||||
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
|
||||
clEventWrapper wait_events[3];
|
||||
};
|
||||
@@ -337,6 +338,8 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
||||
* buffer_size_multiplier,
|
||||
nullptr, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
// Retain new output memory object until the end of the test.
|
||||
retained_output_buffers.push_back(new_out_mem);
|
||||
|
||||
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
|
||||
&new_out_mem };
|
||||
@@ -373,7 +376,7 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
||||
|
||||
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_FALSE,
|
||||
pd.offset * sizeof(cl_int), data_size(),
|
||||
pd.output_buffer.data(), 1,
|
||||
pd.updated_output_buffer.data(), 1,
|
||||
&pd.wait_events[2], nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
@@ -388,8 +391,10 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
||||
cl_int offset = static_cast<cl_int>(num_elements);
|
||||
|
||||
std::vector<SimulPassData> simul_passes = {
|
||||
{ 0, std::vector<cl_int>(num_elements) },
|
||||
{ offset, std::vector<cl_int>(num_elements) }
|
||||
{ 0, std::vector<cl_int>(num_elements),
|
||||
std::vector<cl_int>(num_elements) },
|
||||
{ offset, std::vector<cl_int>(num_elements),
|
||||
std::vector<cl_int>(num_elements) }
|
||||
};
|
||||
|
||||
for (auto&& pass : simul_passes)
|
||||
@@ -407,13 +412,26 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
// verify the result buffers
|
||||
for (auto&& pass : simul_passes)
|
||||
auto& first_pass_output = simul_passes[0].output_buffer;
|
||||
auto& first_pass_updated_output = simul_passes[0].updated_output_buffer;
|
||||
auto& second_pass_output = simul_passes[1].output_buffer;
|
||||
auto& second_pass_updated_output =
|
||||
simul_passes[1].updated_output_buffer;
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
auto& res_data = pass.output_buffer;
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i);
|
||||
}
|
||||
// First pass:
|
||||
// Before updating, out_mem is copied from in_mem (pattern_pri)
|
||||
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_output[i], i);
|
||||
// After updating, new_out_mem is copied from in_mem (pattern_pri)
|
||||
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_updated_output[i],
|
||||
i);
|
||||
// Second pass:
|
||||
// Before updating, out_mem is filled with overwritten_pattern
|
||||
CHECK_VERIFICATION_ERROR(overwritten_pattern, second_pass_output[i],
|
||||
i);
|
||||
// After updating, new_out_mem is copied from in_mem (pattern_pri)
|
||||
CHECK_VERIFICATION_ERROR(pattern_pri, second_pass_updated_output[i],
|
||||
i);
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
@@ -429,6 +447,8 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
|
||||
clKernelWrapper kernel_fill;
|
||||
clProgramWrapper program_fill;
|
||||
|
||||
std::vector<clMemWrapper> retained_output_buffers;
|
||||
|
||||
const size_t test_global_work_size = 3 * sizeof(cl_int);
|
||||
const cl_int pattern_pri = 42;
|
||||
|
||||
|
||||
@@ -0,0 +1,280 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "testHarness.h"
|
||||
#include "mutable_command_basic.h"
|
||||
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Tests related to ensuring the state of the updated command-buffer is expected
|
||||
// and the effects of operations on it don't have side effects on other objects.
|
||||
//
|
||||
// - Tests the updates applied to a command-buffer persist over all subsequent
|
||||
// enqueues.
|
||||
// - Tests interaction of `clSetKernelArg` with mutable-dispatch extension.
|
||||
|
||||
struct MutableDispatchUpdateStateTest : public BasicMutableCommandBufferTest
|
||||
{
|
||||
MutableDispatchUpdateStateTest(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: BasicMutableCommandBufferTest(device, context, queue),
|
||||
buffer(nullptr), command(nullptr)
|
||||
{}
|
||||
|
||||
bool Skip() override
|
||||
{
|
||||
if (BasicMutableCommandBufferTest::Skip()) return true;
|
||||
|
||||
cl_mutable_dispatch_fields_khr mutable_capabilities;
|
||||
bool mutable_support =
|
||||
!clGetDeviceInfo(
|
||||
device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
|
||||
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
|
||||
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
|
||||
return !mutable_support;
|
||||
}
|
||||
|
||||
cl_int SetUpKernelArgs() override
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
num_elements * sizeof(cl_int), nullptr, &error);
|
||||
test_error(error, "clCreateBuffer error");
|
||||
|
||||
// Zero initialize buffer
|
||||
const cl_int zero_pattern = 0;
|
||||
error = clEnqueueFillBuffer(
|
||||
queue, buffer, &zero_pattern, sizeof(cl_int), 0,
|
||||
num_elements * sizeof(cl_int), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueFillBuffer failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
|
||||
test_error(error, "Unable to set kernel argument 0");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_int SetUpKernel() override
|
||||
{
|
||||
const char *add_kernel =
|
||||
R"(
|
||||
__kernel void add_kernel(__global int *data, int value)
|
||||
{
|
||||
size_t tid = get_global_id(0);
|
||||
data[tid] += value;
|
||||
})";
|
||||
|
||||
cl_int error = create_single_kernel_helper(
|
||||
context, &program, &kernel, 1, &add_kernel, "add_kernel");
|
||||
test_error(error, "Creating kernel failed");
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
bool verify_result(cl_int ref)
|
||||
{
|
||||
std::vector<cl_int> data(num_elements);
|
||||
cl_int error =
|
||||
clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size(),
|
||||
data.data(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
if (data[i] != ref)
|
||||
{
|
||||
log_error("Modified verification failed at index %zu: Got %d, "
|
||||
"wanted %d\n",
|
||||
i, data[i], ref);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
clMemWrapper buffer;
|
||||
cl_mutable_command_khr command;
|
||||
};
|
||||
|
||||
struct MutableDispatchUpdatesPersistTest : public MutableDispatchUpdateStateTest
|
||||
{
|
||||
MutableDispatchUpdatesPersistTest(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: MutableDispatchUpdateStateTest(device, context, queue)
|
||||
{}
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
const cl_int original_val = 42;
|
||||
cl_int error =
|
||||
clSetKernelArg(kernel, 1, sizeof(original_val), &original_val);
|
||||
test_error(error, "Unable to set kernel argument 1");
|
||||
|
||||
cl_command_properties_khr props[] = {
|
||||
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
|
||||
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
|
||||
};
|
||||
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr, &command);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
|
||||
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||
|
||||
// Modify the command buffer before executing
|
||||
const cl_int new_command_val = 5;
|
||||
cl_mutable_dispatch_arg_khr arg{ 1, sizeof(new_command_val),
|
||||
&new_command_val };
|
||||
cl_mutable_dispatch_config_khr dispatch_config{
|
||||
command,
|
||||
1 /* num_args */,
|
||||
0 /* num_svm_arg */,
|
||||
0 /* num_exec_infos */,
|
||||
0 /* work_dim - 0 means no change to dimensions */,
|
||||
&arg /* arg_list */,
|
||||
nullptr /* arg_svm_list - nullptr means no change*/,
|
||||
nullptr /* exec_info_list */,
|
||||
nullptr /* global_work_offset */,
|
||||
nullptr /* global_work_size */,
|
||||
nullptr /* local_work_size */
|
||||
};
|
||||
|
||||
cl_uint num_configs = 1;
|
||||
cl_command_buffer_update_type_khr config_types[1] = {
|
||||
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
|
||||
};
|
||||
const void *configs[1] = { &dispatch_config };
|
||||
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
|
||||
config_types, configs);
|
||||
test_error(error, "clUpdateMutableCommandsKHR failed");
|
||||
|
||||
const unsigned iterations = 5;
|
||||
for (unsigned i = 0; i < iterations; i++)
|
||||
{
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
}
|
||||
|
||||
// Check the results execution sequence is the clEnqueueNDRangeKernel
|
||||
// value + the updated command-buffer value, not using the original
|
||||
// command value in the operation.
|
||||
constexpr cl_int ref = iterations * new_command_val;
|
||||
return verify_result(ref) ? TEST_PASS : TEST_FAIL;
|
||||
}
|
||||
};
|
||||
|
||||
struct MutableDispatchSetKernelArgTest : public MutableDispatchUpdateStateTest
|
||||
{
|
||||
MutableDispatchSetKernelArgTest(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: MutableDispatchUpdateStateTest(device, context, queue)
|
||||
{}
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
const cl_int original_val = 42;
|
||||
cl_int error =
|
||||
clSetKernelArg(kernel, 1, sizeof(original_val), &original_val);
|
||||
test_error(error, "Unable to set kernel argument 1");
|
||||
|
||||
cl_command_properties_khr props[] = {
|
||||
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
|
||||
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
|
||||
};
|
||||
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr, &command);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
|
||||
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||
|
||||
// Set new kernel argument for later clEnqueueNDRangeKernel
|
||||
const cl_int new_eager_val = 10;
|
||||
error =
|
||||
clSetKernelArg(kernel, 1, sizeof(new_eager_val), &new_eager_val);
|
||||
test_error(error, "Unable to set kernel argument 1");
|
||||
|
||||
// Modify the command buffer before executing
|
||||
const cl_int new_command_val = 5;
|
||||
cl_mutable_dispatch_arg_khr arg{ 1, sizeof(new_command_val),
|
||||
&new_command_val };
|
||||
cl_mutable_dispatch_config_khr dispatch_config{
|
||||
command,
|
||||
1 /* num_args */,
|
||||
0 /* num_svm_arg */,
|
||||
0 /* num_exec_infos */,
|
||||
0 /* work_dim - 0 means no change to dimensions */,
|
||||
&arg /* arg_list */,
|
||||
nullptr /* arg_svm_list - nullptr means no change*/,
|
||||
nullptr /* exec_info_list */,
|
||||
nullptr /* global_work_offset */,
|
||||
nullptr /* global_work_size */,
|
||||
nullptr /* local_work_size */
|
||||
};
|
||||
|
||||
cl_uint num_configs = 1;
|
||||
cl_command_buffer_update_type_khr config_types[1] = {
|
||||
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
|
||||
};
|
||||
const void *configs[1] = { &dispatch_config };
|
||||
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
|
||||
config_types, configs);
|
||||
test_error(error, "clUpdateMutableCommandsKHR failed");
|
||||
|
||||
// Eager kernel enqueue, followed by command-buffer enqueue
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &num_elements,
|
||||
nullptr, 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
// Check the results execution sequence is the clEnqueueNDRangeKernel
|
||||
// value + the updated command-buffer value, not using the original
|
||||
// command value in the operation.
|
||||
constexpr cl_int ref = new_eager_val + new_command_val;
|
||||
return verify_result(ref) ? TEST_PASS : TEST_FAIL;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
REGISTER_TEST(mutable_dispatch_updates_persist)
|
||||
{
|
||||
return MakeAndRunTest<MutableDispatchUpdatesPersistTest>(
|
||||
device, context, queue, num_elements);
|
||||
}
|
||||
|
||||
REGISTER_TEST(mutable_dispatch_set_kernel_arg)
|
||||
{
|
||||
return MakeAndRunTest<MutableDispatchSetKernelArgTest>(device, context,
|
||||
queue, num_elements);
|
||||
}
|
||||
@@ -0,0 +1,225 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include <extensionHelpers.h>
|
||||
#include "mutable_command_basic.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
// mutable dispatch tests setting `work_dim` to the original 3D value
|
||||
// behaves as expected.
|
||||
|
||||
struct MutableDispatchWorkDim : public InfoMutableCommandBufferTest
|
||||
{
|
||||
using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
|
||||
|
||||
MutableDispatchWorkDim(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue)
|
||||
: InfoMutableCommandBufferTest(device, context, queue)
|
||||
{}
|
||||
|
||||
cl_int SetUp(int elements) override
|
||||
{
|
||||
result_data.resize(update_total_elements);
|
||||
return InfoMutableCommandBufferTest::SetUp(elements);
|
||||
}
|
||||
|
||||
bool Skip() override
|
||||
{
|
||||
cl_mutable_dispatch_fields_khr mutable_capabilities;
|
||||
|
||||
bool mutable_support =
|
||||
!clGetDeviceInfo(
|
||||
device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
|
||||
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
|
||||
&& (mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR);
|
||||
|
||||
return !mutable_support || InfoMutableCommandBufferTest::Skip();
|
||||
}
|
||||
|
||||
bool Verify(cl_mem buffer, cl_uint expected_value, size_t total_elements)
|
||||
{
|
||||
std::memset(result_data.data(), 0, alloc_size);
|
||||
cl_int error =
|
||||
clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, alloc_size,
|
||||
result_data.data(), 0, nullptr, nullptr);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (size_t i = 0; i < total_elements; i++)
|
||||
{
|
||||
if (result_data[i] != expected_value)
|
||||
{
|
||||
log_error("Data failed to verify at index %zu. "
|
||||
"Expected %u, result was %u\n",
|
||||
i, expected_value, result_data[i]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
const char *global_size_kernel =
|
||||
R"(
|
||||
__kernel void three_dim(__global uint *dst0,
|
||||
__global uint *dst1,
|
||||
__global uint *dst2)
|
||||
{
|
||||
size_t gid = get_global_linear_id();
|
||||
dst0[gid] = get_global_size(0);
|
||||
dst1[gid] = get_global_size(1);
|
||||
dst2[gid] = get_global_size(2);
|
||||
})";
|
||||
|
||||
cl_int error = create_single_kernel_helper(
|
||||
context, &program, &kernel, 1, &global_size_kernel, "three_dim");
|
||||
test_error(error, "Creating kernel failed");
|
||||
|
||||
// Create a buffer for each of the three dimensions to write the
|
||||
// global size into.
|
||||
clMemWrapper stream1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||
alloc_size, nullptr, &error);
|
||||
test_error(error, "Creating test array failed");
|
||||
|
||||
clMemWrapper stream2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||
alloc_size, nullptr, &error);
|
||||
test_error(error, "Creating test array failed");
|
||||
|
||||
clMemWrapper stream3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||
alloc_size, nullptr, &error);
|
||||
test_error(error, "Creating test array failed");
|
||||
|
||||
// Set the arguments
|
||||
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream1);
|
||||
test_error(error, "Unable to set indexed kernel arguments");
|
||||
|
||||
error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &stream2);
|
||||
test_error(error, "Unable to set indexed kernel arguments");
|
||||
|
||||
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &stream3);
|
||||
test_error(error, "Unable to set indexed kernel arguments");
|
||||
|
||||
// Command-buffer contains a single kernel
|
||||
error = clCommandNDRangeKernelKHR(
|
||||
command_buffer, nullptr, nullptr, kernel, work_dim, nullptr,
|
||||
global_size_3D.data(), nullptr, 0, nullptr, nullptr, &command);
|
||||
test_error(error, "clCommandNDRangeKernelKHR failed");
|
||||
|
||||
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||
test_error(error, "clFinalizeCommandBufferKHR failed");
|
||||
|
||||
// Enqueue command-buffer and wait on completion
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed.");
|
||||
|
||||
// Verify results before any update
|
||||
if (!Verify(stream1, global_size_3D[0], original_total_elements))
|
||||
{
|
||||
return TEST_FAIL;
|
||||
}
|
||||
if (!Verify(stream2, global_size_3D[1], original_total_elements))
|
||||
{
|
||||
return TEST_FAIL;
|
||||
}
|
||||
if (!Verify(stream3, global_size_3D[2], original_total_elements))
|
||||
{
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
// Update command with a mutable config where we use a different 3D
|
||||
// global size, but hardcode `work_dim` to 3 (the original value).
|
||||
cl_mutable_dispatch_config_khr dispatch_config{
|
||||
command,
|
||||
0 /* num_args */,
|
||||
0 /* num_svm_arg */,
|
||||
0 /* num_exec_infos */,
|
||||
work_dim /* work_dim */,
|
||||
nullptr /* arg_list */,
|
||||
nullptr /* arg_svm_list - nullptr means no change*/,
|
||||
nullptr /* exec_info_list */,
|
||||
nullptr /* global_work_offset */,
|
||||
update_global_size_3D.data() /* global_work_size */,
|
||||
nullptr /* local_work_size */
|
||||
};
|
||||
|
||||
cl_uint num_configs = 1;
|
||||
cl_command_buffer_update_type_khr config_types[1] = {
|
||||
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
|
||||
};
|
||||
const void *configs[1] = { &dispatch_config };
|
||||
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
|
||||
config_types, configs);
|
||||
test_error(error, "clUpdateMutableCommandsKHR failed");
|
||||
|
||||
// Enqueue updated command-buffer
|
||||
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
|
||||
nullptr, nullptr);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
// Verify update is reflected in buffer output.
|
||||
if (!Verify(stream1, update_global_size_3D[0], update_total_elements))
|
||||
{
|
||||
return TEST_FAIL;
|
||||
}
|
||||
if (!Verify(stream2, update_global_size_3D[1], update_total_elements))
|
||||
{
|
||||
return TEST_FAIL;
|
||||
}
|
||||
if (!Verify(stream3, update_global_size_3D[2], update_total_elements))
|
||||
{
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
static const cl_uint work_dim = 3;
|
||||
// 3D global size of kernel command when created
|
||||
static const size_t original_elements = 2;
|
||||
static constexpr std::array<size_t, work_dim> global_size_3D = {
|
||||
original_elements, original_elements, original_elements
|
||||
};
|
||||
// 3D global size to update kernel command to.
|
||||
static const size_t update_elements = 4;
|
||||
static constexpr std::array<size_t, work_dim> update_global_size_3D = {
|
||||
update_elements, update_elements, update_elements
|
||||
};
|
||||
// Total number of work items in original and updated grids
|
||||
static const size_t original_total_elements =
|
||||
original_elements * original_elements * original_elements;
|
||||
static const size_t update_total_elements =
|
||||
update_elements * update_elements * update_elements;
|
||||
// Size in bytes of each of the 3 cl_mem buffers (using the larger size)
|
||||
static const size_t alloc_size = update_total_elements * sizeof(cl_uint);
|
||||
|
||||
cl_mutable_command_khr command = nullptr;
|
||||
std::vector<cl_uint> result_data;
|
||||
};
|
||||
|
||||
// get_global_linear() used in kernel is an OpenCL 2.0 API
|
||||
REGISTER_TEST_VERSION(mutable_dispatch_work_dim, Version(2, 0))
|
||||
{
|
||||
return MakeAndRunTest<MutableDispatchWorkDim>(device, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
@@ -250,9 +250,6 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
|
||||
&trigger_event, &execute_event);
|
||||
test_error(error, "clEnqueueCommandBufferKHR failed");
|
||||
|
||||
// verify pending state
|
||||
error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
|
||||
|
||||
// execute command buffer
|
||||
cl_int signal_error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
|
||||
|
||||
|
||||
@@ -124,8 +124,6 @@ struct EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState
|
||||
|
||||
error = EnqueueCommandBuffer();
|
||||
test_error(error, "EnqueueCommandBuffer failed");
|
||||
error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
|
||||
test_error(error, "State is not Pending");
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -89,8 +89,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
|
||||
|
||||
error = EnqueueCommandBuffer();
|
||||
test_error(error, "EnqueueCommandBuffer failed");
|
||||
error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
|
||||
test_error(error, "State is not Pending");
|
||||
|
||||
error = clFinalizeCommandBufferKHR(command_buffer);
|
||||
test_failure_error_ret(error, CL_INVALID_OPERATION,
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
set(MODULE_NAME CL_KHR_EXTERNAL_MEMORY_AHB)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
test_ahb.cpp
|
||||
test_ahb_negative.cpp
|
||||
debug_ahb.cpp
|
||||
)
|
||||
|
||||
link_libraries(OpenCL nativewindow)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
@@ -0,0 +1,193 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "debug_ahb.h"
|
||||
|
||||
constexpr AHardwareBuffer_UsageFlags flag_list[] = {
|
||||
AHARDWAREBUFFER_USAGE_CPU_READ_RARELY,
|
||||
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN,
|
||||
AHARDWAREBUFFER_USAGE_CPU_WRITE_NEVER,
|
||||
AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY,
|
||||
AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
|
||||
AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK,
|
||||
AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE,
|
||||
AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER,
|
||||
AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY,
|
||||
AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT,
|
||||
AHARDWAREBUFFER_USAGE_VIDEO_ENCODE,
|
||||
AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA,
|
||||
AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER,
|
||||
AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP,
|
||||
AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE,
|
||||
AHARDWAREBUFFER_USAGE_FRONT_BUFFER,
|
||||
};
|
||||
|
||||
std::string
|
||||
ahardwareBufferDecodeUsageFlagsToString(const AHardwareBuffer_UsageFlags flags)
|
||||
{
|
||||
if (flags == 0)
|
||||
{
|
||||
return "UNKNOWN FLAG";
|
||||
}
|
||||
|
||||
std::vector<std::string> active_flags;
|
||||
for (const auto flag : flag_list)
|
||||
{
|
||||
if (flag & flags)
|
||||
{
|
||||
active_flags.push_back(ahardwareBufferUsageFlagToString(flag));
|
||||
}
|
||||
}
|
||||
|
||||
if (active_flags.empty())
|
||||
{
|
||||
return "UNKNOWN FLAG";
|
||||
}
|
||||
|
||||
return std::accumulate(active_flags.begin() + 1, active_flags.end(),
|
||||
active_flags.front(),
|
||||
[](std::string acc, const std::string& flag) {
|
||||
return std::move(acc) + "|" + flag;
|
||||
});
|
||||
}
|
||||
|
||||
std::string
|
||||
ahardwareBufferUsageFlagToString(const AHardwareBuffer_UsageFlags flag)
|
||||
{
|
||||
std::string result;
|
||||
switch (flag)
|
||||
{
|
||||
case AHARDWAREBUFFER_USAGE_CPU_READ_NEVER:
|
||||
result = "AHARDWAREBUFFER_USAGE_CPU_READ_NEVER";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_CPU_READ_RARELY:
|
||||
result = "AHARDWAREBUFFER_USAGE_CPU_READ_RARELY";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN:
|
||||
result = "AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_CPU_READ_MASK:
|
||||
result = "AHARDWAREBUFFER_USAGE_CPU_READ_MASK";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY:
|
||||
result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN:
|
||||
result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK:
|
||||
result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE:
|
||||
result = "AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER:
|
||||
result = "AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY:
|
||||
result = "AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT:
|
||||
result = "AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_VIDEO_ENCODE:
|
||||
result = "AHARDWAREBUFFER_USAGE_VIDEO_ENCODE";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA:
|
||||
result = "AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER:
|
||||
result = "AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP:
|
||||
result = "AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP";
|
||||
break;
|
||||
case AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE:
|
||||
result = "AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE";
|
||||
break;
|
||||
default: result = "Unknown flag";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format)
|
||||
{
|
||||
std::string result;
|
||||
switch (format)
|
||||
{
|
||||
case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
|
||||
result = "AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
|
||||
result = "AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
|
||||
result = "AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM:
|
||||
result = "AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT:
|
||||
result = "AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM:
|
||||
result = "AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_BLOB:
|
||||
result = "AHARDWAREBUFFER_FORMAT_BLOB";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_D16_UNORM:
|
||||
result = "AHARDWAREBUFFER_FORMAT_D16_UNORM";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_D24_UNORM:
|
||||
result = "AHARDWAREBUFFER_FORMAT_D24_UNORM";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT:
|
||||
result = "AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_D32_FLOAT:
|
||||
result = "AHARDWAREBUFFER_FORMAT_D32_FLOAT";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT:
|
||||
result = "AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_S8_UINT:
|
||||
result = "AHARDWAREBUFFER_FORMAT_S8_UINT";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420:
|
||||
result = "AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_YCbCr_P010:
|
||||
result = "AHARDWAREBUFFER_FORMAT_YCbCr_P010";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_YCbCr_P210:
|
||||
result = "AHARDWAREBUFFER_FORMAT_YCbCr_P210";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_R8_UNORM:
|
||||
result = "AHARDWAREBUFFER_FORMAT_R8_UNORM";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_R16_UINT:
|
||||
result = "AHARDWAREBUFFER_FORMAT_R16_UINT";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_R16G16_UINT:
|
||||
result = "AHARDWAREBUFFER_FORMAT_R16G16_UINT";
|
||||
break;
|
||||
case AHARDWAREBUFFER_FORMAT_R10G10B10A10_UNORM:
|
||||
result = "AHARDWAREBUFFER_FORMAT_R10G10B10A10_UNORM";
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include <android/hardware_buffer.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#define CHECK_AHARDWARE_BUFFER_SUPPORT(ahardwareBuffer_Desc, format) \
|
||||
if (!AHardwareBuffer_isSupported(&ahardwareBuffer_Desc)) \
|
||||
{ \
|
||||
const std::string usage_string = \
|
||||
ahardwareBufferDecodeUsageFlagsToString( \
|
||||
static_cast<AHardwareBuffer_UsageFlags>( \
|
||||
ahardwareBuffer_Desc.usage)); \
|
||||
log_info("Unsupported format %s:\n Usage flags %s\n Size (%u, " \
|
||||
"%u, layers = %u)\n", \
|
||||
ahardwareBufferFormatToString(format.aHardwareBufferFormat) \
|
||||
.c_str(), \
|
||||
usage_string.c_str(), ahardwareBuffer_Desc.width, \
|
||||
ahardwareBuffer_Desc.height, ahardwareBuffer_Desc.layers); \
|
||||
continue; \
|
||||
}
|
||||
|
||||
std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format);
|
||||
std::string ahardwareBufferUsageFlagToString(AHardwareBuffer_UsageFlags flag);
|
||||
std::string
|
||||
ahardwareBufferDecodeUsageFlagsToString(AHardwareBuffer_UsageFlags flags);
|
||||
@@ -0,0 +1,23 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "harness/testHarness.h"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
|
||||
test_registry::getInstance().definitions(), false, 0);
|
||||
}
|
||||
1911
test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
Normal file
1911
test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,246 @@
|
||||
//
|
||||
// Copyright (c) 2025 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "harness/compat.h"
|
||||
#include "harness/kernelHelpers.h"
|
||||
#include "harness/imageHelpers.h"
|
||||
#include "harness/errorHelpers.h"
|
||||
#include <android/hardware_buffer.h>
|
||||
#include "debug_ahb.h"
|
||||
|
||||
REGISTER_TEST(test_buffer_format_negative)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
if (!is_extension_available(device, "cl_khr_external_memory"))
|
||||
{
|
||||
log_info("cl_khr_external_memory is not supported on this platform. "
|
||||
"Skipping test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
if (!is_extension_available(
|
||||
device, "cl_khr_external_memory_android_hardware_buffer"))
|
||||
{
|
||||
log_info("cl_khr_external_memory_android_hardware_buffer is not "
|
||||
"supported on this platform. "
|
||||
"Skipping test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
|
||||
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
|
||||
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
|
||||
aHardwareBufferDesc.width = 64;
|
||||
aHardwareBufferDesc.height = 1;
|
||||
aHardwareBufferDesc.layers = 1;
|
||||
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
|
||||
|
||||
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
|
||||
{
|
||||
const std::string usage_string =
|
||||
ahardwareBufferDecodeUsageFlagsToString(
|
||||
static_cast<AHardwareBuffer_UsageFlags>(
|
||||
aHardwareBufferDesc.usage));
|
||||
log_info(
|
||||
"Unsupported format %s, usage flags %s\n",
|
||||
ahardwareBufferFormatToString(
|
||||
static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
|
||||
.c_str(),
|
||||
usage_string.c_str());
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||
const int ahb_result =
|
||||
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
|
||||
if (ahb_result != 0)
|
||||
{
|
||||
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
log_info("Testing %s\n",
|
||||
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
|
||||
aHardwareBufferDesc.format))
|
||||
.c_str());
|
||||
|
||||
cl_mem_properties props[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||
};
|
||||
|
||||
cl_mem buffer = clCreateBufferWithProperties(
|
||||
context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
|
||||
test_assert_error(err == CL_INVALID_OPERATION,
|
||||
"To create a buffer the aHardwareFormat must be "
|
||||
"AHARDWAREBUFFER_FORMAT_BLOB");
|
||||
|
||||
if (buffer != nullptr)
|
||||
{
|
||||
test_error(clReleaseMemObject(buffer), "Failed to release buffer");
|
||||
}
|
||||
|
||||
AHardwareBuffer_release(aHardwareBuffer);
|
||||
aHardwareBuffer = nullptr;
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(test_buffer_size_negative)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
constexpr size_t buffer_size = 64;
|
||||
|
||||
if (!is_extension_available(device, "cl_khr_external_memory"))
|
||||
{
|
||||
log_info("cl_khr_external_memory is not supported on this platform. "
|
||||
"Skipping test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
if (!is_extension_available(
|
||||
device, "cl_khr_external_memory_android_hardware_buffer"))
|
||||
{
|
||||
log_info("cl_khr_external_memory_android_hardware_buffer is not "
|
||||
"supported on this platform. "
|
||||
"Skipping test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
|
||||
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
|
||||
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
|
||||
aHardwareBufferDesc.width = buffer_size;
|
||||
aHardwareBufferDesc.height = 1;
|
||||
aHardwareBufferDesc.layers = 1;
|
||||
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
|
||||
|
||||
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
|
||||
{
|
||||
const std::string usage_string =
|
||||
ahardwareBufferDecodeUsageFlagsToString(
|
||||
static_cast<AHardwareBuffer_UsageFlags>(
|
||||
aHardwareBufferDesc.usage));
|
||||
log_info(
|
||||
"Unsupported format %s, usage flags %s\n",
|
||||
ahardwareBufferFormatToString(
|
||||
static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
|
||||
.c_str(),
|
||||
usage_string.c_str());
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||
const int ahb_result =
|
||||
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
|
||||
if (ahb_result != 0)
|
||||
{
|
||||
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
log_info("Testing %s\n",
|
||||
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
|
||||
aHardwareBufferDesc.format))
|
||||
.c_str());
|
||||
|
||||
cl_mem_properties props[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||
};
|
||||
|
||||
cl_mem buffer = clCreateBufferWithProperties(
|
||||
context, props, CL_MEM_READ_WRITE, buffer_size / 2, nullptr, &err);
|
||||
test_assert_error(err == CL_INVALID_BUFFER_SIZE,
|
||||
"Wrong error value returned");
|
||||
|
||||
if (buffer != nullptr)
|
||||
{
|
||||
test_error(clReleaseMemObject(buffer), "Failed to release buffer");
|
||||
}
|
||||
|
||||
AHardwareBuffer_release(aHardwareBuffer);
|
||||
aHardwareBuffer = nullptr;
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
REGISTER_TEST(test_images_negative)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
if (!is_extension_available(device, "cl_khr_external_memory"))
|
||||
{
|
||||
log_info("cl_khr_external_memory is not supported on this platform. "
|
||||
"Skipping test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
if (!is_extension_available(
|
||||
device, "cl_khr_external_memory_android_hardware_buffer"))
|
||||
{
|
||||
log_info("cl_khr_external_memory_android_hardware_buffer is not "
|
||||
"supported on this platform. "
|
||||
"Skipping test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
|
||||
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
|
||||
aHardwareBufferDesc.usage = static_cast<AHardwareBuffer_UsageFlags>(
|
||||
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
|
||||
| AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
|
||||
| AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE
|
||||
| AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER);
|
||||
aHardwareBufferDesc.width = 64;
|
||||
aHardwareBufferDesc.height = 64;
|
||||
aHardwareBufferDesc.layers = 1;
|
||||
|
||||
AHardwareBuffer *aHardwareBuffer = nullptr;
|
||||
int ahb_result =
|
||||
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
|
||||
if (ahb_result != 0)
|
||||
{
|
||||
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
const cl_mem_properties props[] = {
|
||||
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
|
||||
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
|
||||
};
|
||||
|
||||
constexpr cl_image_format image_format = { CL_RGBA, CL_UNORM_INT8 };
|
||||
cl_mem image =
|
||||
clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE,
|
||||
&image_format, nullptr, nullptr, &err);
|
||||
test_assert_error(err == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
|
||||
"Wrong error value returned");
|
||||
if (image != nullptr)
|
||||
{
|
||||
test_error(clReleaseMemObject(image), "Failed to release image");
|
||||
}
|
||||
|
||||
constexpr cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, 64, 64 };
|
||||
image = clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE,
|
||||
nullptr, &image_desc, nullptr, &err);
|
||||
test_assert_error(err == CL_INVALID_IMAGE_DESCRIPTOR,
|
||||
"Wrong error value returned");
|
||||
if (image != nullptr)
|
||||
{
|
||||
test_error(clReleaseMemObject(image), "Failed to release image");
|
||||
}
|
||||
AHardwareBuffer_release(aHardwareBuffer);
|
||||
aHardwareBuffer = nullptr;
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
@@ -388,109 +388,6 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
// Confirm that signal a semaphore with no event dependencies will not result
|
||||
// in an implicit dependency on everything previously submitted
|
||||
REGISTER_TEST_VERSION(external_semaphores_simple_2, Version(1, 2))
|
||||
{
|
||||
REQUIRE_EXTENSION("cl_khr_external_semaphore");
|
||||
|
||||
if (init_vulkan_device(1, &device))
|
||||
{
|
||||
log_info("Cannot initialise Vulkan. "
|
||||
"Skipping test.\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
VulkanDevice vkDevice;
|
||||
|
||||
// Obtain pointers to semaphore's API
|
||||
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
|
||||
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
|
||||
|
||||
std::vector<VulkanExternalSemaphoreHandleType>
|
||||
vkExternalSemaphoreHandleTypeList =
|
||||
getSupportedInteropExternalSemaphoreHandleTypes(device, vkDevice);
|
||||
|
||||
if (vkExternalSemaphoreHandleTypeList.empty())
|
||||
{
|
||||
test_fail("No external semaphore handle types found\n");
|
||||
}
|
||||
|
||||
for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType :
|
||||
vkExternalSemaphoreHandleTypeList)
|
||||
{
|
||||
log_info_semaphore_type(vkExternalSemaphoreHandleType);
|
||||
VulkanSemaphore vkVk2CLSemaphore(vkDevice,
|
||||
vkExternalSemaphoreHandleType);
|
||||
|
||||
auto sema_ext = clExternalImportableSemaphore(
|
||||
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, device);
|
||||
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
// Create ooo queue
|
||||
clCommandQueueWrapper queue = clCreateCommandQueue(
|
||||
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
|
||||
test_error(err, "Could not create command queue");
|
||||
|
||||
// Create user event
|
||||
clEventWrapper user_event = clCreateUserEvent(context, &err);
|
||||
test_error(err, "Could not create user event");
|
||||
|
||||
// Create Kernel
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1,
|
||||
&source, "empty");
|
||||
test_error(err, "Could not create kernel");
|
||||
|
||||
// Enqueue task_1 (dependency on user_event)
|
||||
clEventWrapper task_1_event;
|
||||
err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event);
|
||||
test_error(err, "Could not enqueue task 1");
|
||||
|
||||
// Signal semaphore
|
||||
clEventWrapper signal_event;
|
||||
err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
|
||||
nullptr, 0, nullptr, &signal_event);
|
||||
test_error(err, "Could not signal semaphore");
|
||||
|
||||
// Wait semaphore
|
||||
clEventWrapper wait_event;
|
||||
err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
|
||||
nullptr, 0, nullptr, &wait_event);
|
||||
test_error(err, "Could not wait semaphore");
|
||||
|
||||
// Flush and delay
|
||||
err = clFlush(queue);
|
||||
test_error(err, "Could not flush queue");
|
||||
|
||||
cl_event event_list[] = { signal_event, wait_event };
|
||||
err = clWaitForEvents(2, event_list);
|
||||
test_error(err, "Could not wait on events");
|
||||
|
||||
// Ensure all events are completed except for task_1
|
||||
test_assert_event_inprogress(task_1_event);
|
||||
test_assert_event_complete(signal_event);
|
||||
test_assert_event_complete(wait_event);
|
||||
|
||||
// Complete user_event
|
||||
err = clSetUserEventStatus(user_event, CL_COMPLETE);
|
||||
test_error(err, "Could not set user event to CL_COMPLETE");
|
||||
|
||||
// Finish
|
||||
err = clFinish(queue);
|
||||
test_error(err, "Could not finish queue");
|
||||
|
||||
// Ensure all events are completed
|
||||
test_assert_event_complete(task_1_event);
|
||||
test_assert_event_complete(signal_event);
|
||||
test_assert_event_complete(wait_event);
|
||||
}
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
// Confirm that a semaphore can be reused multiple times
|
||||
REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
|
||||
{
|
||||
|
||||
@@ -76,87 +76,6 @@ struct SimpleSemaphore1 : public SemaphoreTestBase
|
||||
}
|
||||
};
|
||||
|
||||
struct SimpleSemaphore2 : public SemaphoreTestBase
|
||||
{
|
||||
SimpleSemaphore2(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, cl_int nelems)
|
||||
: SemaphoreTestBase(device, context, queue, nelems)
|
||||
{}
|
||||
|
||||
cl_int Run() override
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
// Create ooo queue
|
||||
clCommandQueueWrapper queue = clCreateCommandQueue(
|
||||
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
|
||||
test_error(err, "Could not create command queue");
|
||||
|
||||
// Create semaphore
|
||||
cl_semaphore_properties_khr sema_props[] = {
|
||||
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
|
||||
static_cast<cl_semaphore_properties_khr>(
|
||||
CL_SEMAPHORE_TYPE_BINARY_KHR),
|
||||
0
|
||||
};
|
||||
semaphore =
|
||||
clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
|
||||
test_error(err, "Could not create semaphore");
|
||||
|
||||
// Create user event
|
||||
clEventWrapper user_event = clCreateUserEvent(context, &err);
|
||||
test_error(err, "Could not create user event");
|
||||
|
||||
// Create Kernel
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1,
|
||||
&source, "empty");
|
||||
test_error(err, "Could not create kernel");
|
||||
|
||||
// Enqueue task_1 (dependency on user_event)
|
||||
clEventWrapper task_1_event;
|
||||
err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event);
|
||||
test_error(err, "Could not enqueue task 1");
|
||||
|
||||
// Signal semaphore
|
||||
clEventWrapper signal_event;
|
||||
err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
|
||||
nullptr, &signal_event);
|
||||
test_error(err, "Could not signal semaphore");
|
||||
|
||||
// Wait semaphore
|
||||
clEventWrapper wait_event;
|
||||
err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
|
||||
nullptr, &wait_event);
|
||||
test_error(err, "Could not wait semaphore");
|
||||
|
||||
// Flush and delay
|
||||
err = clFlush(queue);
|
||||
test_error(err, "Could not flush queue");
|
||||
std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
|
||||
|
||||
// Ensure all events are completed except for task_1
|
||||
test_assert_event_inprogress(task_1_event);
|
||||
test_assert_event_complete(signal_event);
|
||||
test_assert_event_complete(wait_event);
|
||||
|
||||
// Complete user_event
|
||||
err = clSetUserEventStatus(user_event, CL_COMPLETE);
|
||||
test_error(err, "Could not set user event to CL_COMPLETE");
|
||||
|
||||
// Finish
|
||||
err = clFinish(queue);
|
||||
test_error(err, "Could not finish queue");
|
||||
|
||||
// Ensure all events are completed
|
||||
test_assert_event_complete(task_1_event);
|
||||
test_assert_event_complete(signal_event);
|
||||
test_assert_event_complete(wait_event);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
struct SemaphoreReuse : public SemaphoreTestBase
|
||||
{
|
||||
SemaphoreReuse(cl_device_id device, cl_context context,
|
||||
@@ -387,14 +306,6 @@ REGISTER_TEST_VERSION(semaphores_simple_1, Version(1, 2))
|
||||
num_elements);
|
||||
}
|
||||
|
||||
// Confirm that signal a semaphore with no event dependencies will not result
|
||||
// in an implicit dependency on everything previously submitted
|
||||
REGISTER_TEST_VERSION(semaphores_simple_2, Version(1, 2))
|
||||
{
|
||||
return MakeAndRunTest<SimpleSemaphore2>(device, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
|
||||
// Confirm that a semaphore can be reused multiple times
|
||||
REGISTER_TEST_VERSION(semaphores_reuse, Version(1, 2))
|
||||
{
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "harness/compat.h"
|
||||
#include "harness/kernelHelpers.h"
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/parseParameters.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
|
||||
@@ -58,7 +58,6 @@ uint32_t gDeviceFrequency = 0;
|
||||
uint32_t gComputeDevices = 0;
|
||||
size_t gMaxThreadGroupSize = 0;
|
||||
size_t gWorkGroupSize = 0;
|
||||
bool gWimpyMode = false;
|
||||
int gWimpyReductionFactor = 512;
|
||||
int gTestDouble = 0;
|
||||
bool gHostReset = false;
|
||||
|
||||
@@ -74,7 +74,6 @@ extern bool gHostReset;
|
||||
// gWimpyMode indicates if we run the test in wimpy mode where we limit the
|
||||
// size of 32 bit ranges to a much smaller set. This is meant to be used
|
||||
// as a smoke test
|
||||
extern bool gWimpyMode;
|
||||
extern int gWimpyReductionFactor;
|
||||
|
||||
uint64_t ReadTime( void );
|
||||
|
||||
@@ -83,13 +83,6 @@ int main (int argc, const char **argv )
|
||||
if( (error = ParseArgs( argc, argv )) )
|
||||
goto exit;
|
||||
|
||||
if (gIsEmbedded) {
|
||||
vlog( "\tProfile: Embedded\n" );
|
||||
}else
|
||||
{
|
||||
vlog( "\tProfile: Full\n" );
|
||||
}
|
||||
|
||||
fflush( stdout );
|
||||
error = runTestHarnessWithCheck(
|
||||
argCount, argList, test_registry::getInstance().num_tests(),
|
||||
@@ -114,6 +107,10 @@ exit:
|
||||
|
||||
static int ParseArgs( int argc, const char **argv )
|
||||
{
|
||||
if (gListTests)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
int i;
|
||||
argList = (const char **)calloc(argc, sizeof(char *));
|
||||
if( NULL == argList )
|
||||
@@ -181,9 +178,6 @@ static int ParseArgs( int argc, const char **argv )
|
||||
|
||||
case 'r': gHostReset = true; break;
|
||||
|
||||
case 'w': // Wimpy mode
|
||||
gWimpyMode = true;
|
||||
break;
|
||||
case '[':
|
||||
parseWimpyReductionFactor( arg, gWimpyReductionFactor);
|
||||
break;
|
||||
@@ -202,12 +196,6 @@ static int ParseArgs( int argc, const char **argv )
|
||||
}
|
||||
}
|
||||
|
||||
if (getenv("CL_WIMPY_MODE")) {
|
||||
vlog( "\n" );
|
||||
vlog( "*** Detected CL_WIMPY_MODE env ***\n" );
|
||||
gWimpyMode = 1;
|
||||
}
|
||||
|
||||
PrintArch();
|
||||
if( gWimpyMode )
|
||||
{
|
||||
@@ -217,6 +205,16 @@ static int ParseArgs( int argc, const char **argv )
|
||||
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
|
||||
vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
|
||||
}
|
||||
|
||||
if (gIsEmbedded)
|
||||
{
|
||||
vlog("\tProfile: Embedded\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog("\tProfile: Full\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -227,7 +225,6 @@ static void PrintUsage( void )
|
||||
"supported)\n");
|
||||
vlog("\t\t-t\tToggle reporting performance data.\n");
|
||||
vlog("\t\t-r\tReset buffers on host instead of on device.\n");
|
||||
vlog("\t\t-w\tRun in wimpy mode\n");
|
||||
vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
|
||||
"1-12, default factor(%u)\n",
|
||||
gWimpyReductionFactor);
|
||||
|
||||
@@ -248,7 +248,7 @@ clMemWrapper create_image(cl_context context, cl_command_queue queue,
|
||||
cl_mem_flags buffer_flags = CL_MEM_READ_WRITE;
|
||||
if (enable_pitch)
|
||||
{
|
||||
if (version.major() == 1)
|
||||
if (version.get_major() == 1)
|
||||
{
|
||||
host_ptr = malloc(imageInfo->rowPitch);
|
||||
}
|
||||
|
||||
@@ -878,18 +878,16 @@ int test_read_image(cl_context context, cl_command_queue queue,
|
||||
numTries, numClamped,
|
||||
true, lod);
|
||||
log_error("Step by step:\n");
|
||||
FloatPixel temp =
|
||||
sample_image_pixel_float_offset(
|
||||
imagePtr, imageInfo,
|
||||
xOffsetValues[j],
|
||||
yOffsetValues[j],
|
||||
zOffsetValues[j],
|
||||
norm_offset_x,
|
||||
norm_offset_y,
|
||||
norm_offset_z,
|
||||
imageSampler, tempOut,
|
||||
1 /*verbose*/,
|
||||
&hasDenormals, lod);
|
||||
sample_image_pixel_float_offset(
|
||||
imagePtr, imageInfo,
|
||||
xOffsetValues[j],
|
||||
yOffsetValues[j],
|
||||
zOffsetValues[j],
|
||||
norm_offset_x,
|
||||
norm_offset_y,
|
||||
norm_offset_z, imageSampler,
|
||||
tempOut, 1 /*verbose*/,
|
||||
&hasDenormals, lod);
|
||||
log_error(
|
||||
"\tulps: %2.2f (max "
|
||||
"allowed: %2.2f)\n\n",
|
||||
@@ -931,9 +929,6 @@ int test_read_image(cl_context context, cl_command_queue queue,
|
||||
// Validate float results
|
||||
float *resultPtr = (float *)(char *)resultValues;
|
||||
float expected[4], error = 0.0f;
|
||||
float maxErr = get_max_relative_error(
|
||||
imageInfo->format, imageSampler, image_type_3D,
|
||||
CL_FILTER_LINEAR == imageSampler->filter_mode);
|
||||
|
||||
for (size_t z = 0, j = 0; z < depth_lod; z++)
|
||||
{
|
||||
@@ -1242,26 +1237,25 @@ int test_read_image(cl_context context, cl_command_queue queue,
|
||||
j, numTries, numClamped,
|
||||
true, lod);
|
||||
log_error("Step by step:\n");
|
||||
FloatPixel temp =
|
||||
sample_image_pixel_float_offset(
|
||||
imagePtr, imageInfo,
|
||||
xOffsetValues[j],
|
||||
(num_dimensions > 1)
|
||||
? yOffsetValues[j]
|
||||
: 0.0f,
|
||||
image_type_3D
|
||||
? zOffsetValues[j]
|
||||
: 0.0f,
|
||||
norm_offset_x,
|
||||
(num_dimensions > 1)
|
||||
? norm_offset_y
|
||||
: 0.0f,
|
||||
image_type_3D
|
||||
? norm_offset_z
|
||||
: 0.0f,
|
||||
imageSampler, tempOut,
|
||||
1 /*verbose*/,
|
||||
&hasDenormals, lod);
|
||||
sample_image_pixel_float_offset(
|
||||
imagePtr, imageInfo,
|
||||
xOffsetValues[j],
|
||||
(num_dimensions > 1)
|
||||
? yOffsetValues[j]
|
||||
: 0.0f,
|
||||
image_type_3D
|
||||
? zOffsetValues[j]
|
||||
: 0.0f,
|
||||
norm_offset_x,
|
||||
(num_dimensions > 1)
|
||||
? norm_offset_y
|
||||
: 0.0f,
|
||||
image_type_3D
|
||||
? norm_offset_z
|
||||
: 0.0f,
|
||||
imageSampler, tempOut,
|
||||
1 /*verbose*/,
|
||||
&hasDenormals, lod);
|
||||
log_error(
|
||||
"\tulps: %2.2f, %2.2f, "
|
||||
"%2.2f, %2.2f (max "
|
||||
@@ -1632,26 +1626,25 @@ int test_read_image(cl_context context, cl_command_queue queue,
|
||||
j, numTries, numClamped,
|
||||
true, lod);
|
||||
log_error("Step by step:\n");
|
||||
FloatPixel temp =
|
||||
sample_image_pixel_float_offset(
|
||||
imagePtr, imageInfo,
|
||||
xOffsetValues[j],
|
||||
(num_dimensions > 1)
|
||||
? yOffsetValues[j]
|
||||
: 0.0f,
|
||||
image_type_3D
|
||||
? zOffsetValues[j]
|
||||
: 0.0f,
|
||||
norm_offset_x,
|
||||
(num_dimensions > 1)
|
||||
? norm_offset_y
|
||||
: 0.0f,
|
||||
image_type_3D
|
||||
? norm_offset_z
|
||||
: 0.0f,
|
||||
imageSampler, tempOut,
|
||||
1 /*verbose*/,
|
||||
&hasDenormals, lod);
|
||||
sample_image_pixel_float_offset(
|
||||
imagePtr, imageInfo,
|
||||
xOffsetValues[j],
|
||||
(num_dimensions > 1)
|
||||
? yOffsetValues[j]
|
||||
: 0.0f,
|
||||
image_type_3D
|
||||
? zOffsetValues[j]
|
||||
: 0.0f,
|
||||
norm_offset_x,
|
||||
(num_dimensions > 1)
|
||||
? norm_offset_y
|
||||
: 0.0f,
|
||||
image_type_3D
|
||||
? norm_offset_z
|
||||
: 0.0f,
|
||||
imageSampler, tempOut,
|
||||
1 /*verbose*/,
|
||||
&hasDenormals, lod);
|
||||
log_error(
|
||||
"\tulps: %2.2f, %2.2f, "
|
||||
"%2.2f, %2.2f (max "
|
||||
|
||||
@@ -945,7 +945,7 @@ int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double
|
||||
// Validate float results
|
||||
float *resultPtr = (float *)(char *)resultValues;
|
||||
float expected[4], error=0.0f;
|
||||
float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
|
||||
|
||||
for( size_t y = 0, j = 0; y < height_lod; y++ )
|
||||
{
|
||||
for( size_t x = 0; x < width_lod; x++, j++ )
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
set(MODULE_NAME INTEGER_OPS)
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
|
||||
add_cxx_flag_if_supported(-Wno-narrowing)
|
||||
endif()
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
test_int_basic_ops.cpp
|
||||
|
||||
@@ -26,14 +26,32 @@
|
||||
|
||||
void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d )
|
||||
{
|
||||
static const cl_long sUniqueValues[] = { 0x3333333333333333LL, 0x5555555555555555LL, 0x9999999999999999LL, 0xaaaaaaaaaaaaaaaaLL, 0xccccccccccccccccLL,
|
||||
0x3030303030303030LL, 0x5050505050505050LL, 0x9090909090909090LL, 0xa0a0a0a0a0a0a0a0LL, 0xc0c0c0c0c0c0c0c0LL, 0xf0f0f0f0f0f0f0f0LL,
|
||||
0x0303030303030303LL, 0x0505050505050505LL, 0x0909090909090909LL, 0x0a0a0a0a0a0a0a0aLL, 0x0c0c0c0c0c0c0c0cLL, 0x0f0f0f0f0f0f0f0fLL,
|
||||
0x3300330033003300LL, 0x5500550055005500LL, 0x9900990099009900LL, 0xaa00aa00aa00aa00LL, 0xcc00cc00cc00cc00LL, 0xff00ff00ff00ff00LL,
|
||||
0x0033003300330033LL, 0x0055005500550055LL, 0x0099009900990099LL, 0x00aa00aa00aa00aaLL, 0x00cc00cc00cc00ccLL, 0x00ff00ff00ff00ffLL,
|
||||
0x3333333300000000LL, 0x5555555500000000LL, 0x9999999900000000LL, 0xaaaaaaaa00000000LL, 0xcccccccc00000000LL, 0xffffffff00000000LL,
|
||||
0x0000000033333333LL, 0x0000000055555555LL, 0x0000000099999999LL, 0x00000000aaaaaaaaLL, 0x00000000ccccccccLL, 0x00000000ffffffffLL,
|
||||
0x3333000000003333LL, 0x5555000000005555LL, 0x9999000000009999LL, 0xaaaa00000000aaaaLL, 0xcccc00000000ccccLL, 0xffff00000000ffffLL};
|
||||
static const cl_long sUniqueValues[] = {
|
||||
(cl_long)0x3333333333333333LL, (cl_long)0x5555555555555555LL,
|
||||
(cl_long)0x9999999999999999LL, (cl_long)0xaaaaaaaaaaaaaaaaLL,
|
||||
(cl_long)0xccccccccccccccccLL, (cl_long)0x3030303030303030LL,
|
||||
(cl_long)0x5050505050505050LL, (cl_long)0x9090909090909090LL,
|
||||
(cl_long)0xa0a0a0a0a0a0a0a0LL, (cl_long)0xc0c0c0c0c0c0c0c0LL,
|
||||
(cl_long)0xf0f0f0f0f0f0f0f0LL, (cl_long)0x0303030303030303LL,
|
||||
(cl_long)0x0505050505050505LL, (cl_long)0x0909090909090909LL,
|
||||
(cl_long)0x0a0a0a0a0a0a0a0aLL, (cl_long)0x0c0c0c0c0c0c0c0cLL,
|
||||
(cl_long)0x0f0f0f0f0f0f0f0fLL, (cl_long)0x3300330033003300LL,
|
||||
(cl_long)0x5500550055005500LL, (cl_long)0x9900990099009900LL,
|
||||
(cl_long)0xaa00aa00aa00aa00LL, (cl_long)0xcc00cc00cc00cc00LL,
|
||||
(cl_long)0xff00ff00ff00ff00LL, (cl_long)0x0033003300330033LL,
|
||||
(cl_long)0x0055005500550055LL, (cl_long)0x0099009900990099LL,
|
||||
(cl_long)0x00aa00aa00aa00aaLL, (cl_long)0x00cc00cc00cc00ccLL,
|
||||
(cl_long)0x00ff00ff00ff00ffLL, (cl_long)0x3333333300000000LL,
|
||||
(cl_long)0x5555555500000000LL, (cl_long)0x9999999900000000LL,
|
||||
(cl_long)0xaaaaaaaa00000000LL, (cl_long)0xcccccccc00000000LL,
|
||||
(cl_long)0xffffffff00000000LL, (cl_long)0x0000000033333333LL,
|
||||
(cl_long)0x0000000055555555LL, (cl_long)0x0000000099999999LL,
|
||||
(cl_long)0x00000000aaaaaaaaLL, (cl_long)0x00000000ccccccccLL,
|
||||
(cl_long)0x00000000ffffffffLL, (cl_long)0x3333000000003333LL,
|
||||
(cl_long)0x5555000000005555LL, (cl_long)0x9999000000009999LL,
|
||||
(cl_long)0xaaaa00000000aaaaLL, (cl_long)0xcccc00000000ccccLL,
|
||||
(cl_long)0xffff00000000ffffLL
|
||||
};
|
||||
static cl_long sSpecialValues[ 128 + 128 + 128 + ( sizeof( sUniqueValues ) / sizeof( sUniqueValues[ 0 ] ) ) ] = { 0 };
|
||||
|
||||
if( sSpecialValues[ 0 ] == 0 )
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "harness/conversions.h"
|
||||
#include "harness/ThreadPool.h"
|
||||
#include "harness/parseParameters.h"
|
||||
|
||||
#define NUM_TESTS 23
|
||||
|
||||
@@ -823,10 +824,10 @@ int run_specific_test(cl_device_id deviceID, cl_context context, cl_command_queu
|
||||
int run_multiple_tests(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int *tests, int total_tests) {
|
||||
int errors = 0;
|
||||
|
||||
if (getenv("CL_WIMPY_MODE") && num == LONG_MATH_SHIFT_SIZE) {
|
||||
log_info("Detected CL_WIMPY_MODE env\n");
|
||||
log_info("Skipping long test\n");
|
||||
return 0;
|
||||
if (gWimpyMode && num == LONG_MATH_SHIFT_SIZE)
|
||||
{
|
||||
log_info("Running in wimpy mode, skipping long test\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int i;
|
||||
|
||||
@@ -1370,7 +1370,9 @@ verify_ushort(int test, size_t vector_size, cl_ushort *inptrA, cl_ushort *inptrB
|
||||
void
|
||||
init_ushort_data(uint64_t indx, int num_elements, cl_ushort *input_ptr[], MTdata d)
|
||||
{
|
||||
static const cl_ushort specialCaseList[] = { 0, -1, 1, CL_SHRT_MAX, CL_SHRT_MAX + 1, CL_USHRT_MAX };
|
||||
static const cl_ushort specialCaseList[] = {
|
||||
0, (cl_ushort)-1, 1, CL_SHRT_MAX, CL_SHRT_MAX + 1, CL_USHRT_MAX
|
||||
};
|
||||
int j;
|
||||
|
||||
// Set the inputs to a random number
|
||||
@@ -1812,7 +1814,9 @@ verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, c
|
||||
void
|
||||
init_uchar_data(uint64_t indx, int num_elements, cl_uchar *input_ptr[], MTdata d)
|
||||
{
|
||||
static const cl_uchar specialCaseList[] = { 0, -1, 1, CL_CHAR_MAX, CL_CHAR_MAX + 1, CL_UCHAR_MAX };
|
||||
static const cl_uchar specialCaseList[] = {
|
||||
0, (cl_uchar)-1, 1, CL_CHAR_MAX, CL_CHAR_MAX + 1, CL_UCHAR_MAX
|
||||
};
|
||||
int j;
|
||||
|
||||
// FIXME: we really should just check every char against every char here
|
||||
|
||||
@@ -248,7 +248,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
}
|
||||
|
||||
// Init input array
|
||||
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
||||
cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||
size_t idx = 0;
|
||||
int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
|
||||
@@ -257,7 +257,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
// Test edge cases
|
||||
if (job_id <= (cl_uint)lastSpecialJobIndex)
|
||||
{
|
||||
cl_double *fp = (cl_double *)p;
|
||||
cl_int *ip2 = (cl_int *)p2;
|
||||
uint32_t x, y;
|
||||
|
||||
@@ -266,7 +265,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
|
||||
|
||||
for (; idx < buffer_elements; idx++)
|
||||
{
|
||||
fp[idx] = specialValues[x];
|
||||
p[idx] = specialValues[x];
|
||||
ip2[idx] = specialValuesInt[y];
|
||||
if (++x >= specialValuesCount)
|
||||
{
|
||||
|
||||
@@ -754,10 +754,12 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
||||
test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
|
||||
}
|
||||
|
||||
bool correctlyRounded = strcmp(f->name, "divide_cr") == 0;
|
||||
|
||||
// Init the kernels
|
||||
BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
|
||||
test_info.programs, f->nameInCode,
|
||||
relaxedMode };
|
||||
test_info.programs, f->nameInCode,
|
||||
relaxedMode, correctlyRounded };
|
||||
if ((error = ThreadPool_Do(BuildKernelFn,
|
||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||
&build_info)))
|
||||
|
||||
@@ -260,7 +260,7 @@ int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||
if (t[j] == q[j] && t2[j] == q2[j]) continue;
|
||||
|
||||
// Check for paired NaNs
|
||||
if (IsHalfNaN(t[j]) && IsHalfNaN(q[j]) && t2[j] == q2[j])
|
||||
if (isnan_fp(t[j]) && isnan_fp(q[j]) && t2[j] == q2[j])
|
||||
continue;
|
||||
|
||||
cl_half test = ((cl_half *)q)[j];
|
||||
@@ -282,7 +282,7 @@ int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode)
|
||||
// then the standard either neglects to say what is returned
|
||||
// in iptr or leaves it undefined or implementation defined.
|
||||
int iptrUndefined = IsHalfInfinity(p[j]) || (HTF(p2[j]) == 0.0f)
|
||||
|| IsHalfNaN(p2[j]) || IsHalfNaN(p[j]);
|
||||
|| isnan_fp(p2[j]) || isnan_fp(p[j]);
|
||||
if (iptrUndefined) iErr = 0;
|
||||
|
||||
int fail = !(fabsf(err) <= half_ulps && iErr == 0);
|
||||
|
||||
@@ -102,7 +102,7 @@ void EmitEnableExtension(std::ostringstream &kernel,
|
||||
if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
|
||||
}
|
||||
|
||||
std::string GetBuildOptions(bool relaxed_mode)
|
||||
std::string GetBuildOptions(const BuildKernelInfo &info)
|
||||
{
|
||||
std::ostringstream options;
|
||||
|
||||
@@ -111,16 +111,16 @@ std::string GetBuildOptions(bool relaxed_mode)
|
||||
options << " -cl-denorms-are-zero";
|
||||
}
|
||||
|
||||
if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
|
||||
{
|
||||
options << " -cl-fp32-correctly-rounded-divide-sqrt";
|
||||
}
|
||||
|
||||
if (relaxed_mode)
|
||||
if (info.relaxedMode)
|
||||
{
|
||||
options << " -cl-fast-relaxed-math";
|
||||
}
|
||||
|
||||
if (info.correctlyRounded)
|
||||
{
|
||||
options << " -cl-fp32-correctly-rounded-divide-sqrt";
|
||||
}
|
||||
|
||||
return options.str();
|
||||
}
|
||||
|
||||
@@ -581,7 +581,7 @@ cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
|
||||
|
||||
// Create the program.
|
||||
clProgramWrapper &program = info.programs[vector_size_index];
|
||||
auto options = GetBuildOptions(info.relaxedMode);
|
||||
auto options = GetBuildOptions(info);
|
||||
int error =
|
||||
create_single_kernel_helper(gContext, &program, nullptr, sources.size(),
|
||||
sources.data(), nullptr, options.c_str());
|
||||
|
||||
@@ -84,6 +84,9 @@ struct BuildKernelInfo
|
||||
|
||||
// Whether to build with -cl-fast-relaxed-math.
|
||||
bool relaxedMode;
|
||||
|
||||
// Whether to build with -cl-fp32-correctly-rounded-divide-sqrt.
|
||||
bool correctlyRounded;
|
||||
};
|
||||
|
||||
// Data common to all math tests.
|
||||
|
||||
@@ -375,8 +375,8 @@ const Func functionList[] = {
|
||||
{ NULL },
|
||||
3.0f,
|
||||
0.0f,
|
||||
0.0f,
|
||||
1.0f,
|
||||
1.5f,
|
||||
1.5f,
|
||||
4.0f,
|
||||
INFINITY,
|
||||
INFINITY,
|
||||
|
||||
@@ -66,7 +66,6 @@ int gSkipCorrectnessTesting = 0;
|
||||
static int gStopOnError = 0;
|
||||
static bool gSkipRestOfTests;
|
||||
int gForceFTZ = 0;
|
||||
int gWimpyMode = 0;
|
||||
int gHostFill = 0;
|
||||
static int gHasDouble = 0;
|
||||
static int gTestFloat = 1;
|
||||
@@ -82,7 +81,6 @@ static int gTestFastRelaxed = 1;
|
||||
OpenCL 2.0 spec then it has to be changed through a command line argument.
|
||||
*/
|
||||
int gFastRelaxedDerived = 1;
|
||||
static int gToggleCorrectlyRoundedDivideSqrt = 0;
|
||||
int gHasHalf = 0;
|
||||
cl_device_fp_config gHalfCapabilities = 0;
|
||||
int gDeviceILogb0 = 1;
|
||||
@@ -385,21 +383,24 @@ int main(int argc, const char *argv[])
|
||||
error = ParseArgs(argc, argv);
|
||||
if (error) return error;
|
||||
|
||||
// This takes a while, so prevent the machine from going to sleep.
|
||||
PreventSleep();
|
||||
atexit(ResumeSleep);
|
||||
if (!gListTests)
|
||||
{
|
||||
// This takes a while, so prevent the machine from going to sleep.
|
||||
PreventSleep();
|
||||
atexit(ResumeSleep);
|
||||
|
||||
if (gSkipCorrectnessTesting)
|
||||
vlog("*** Skipping correctness testing! ***\n\n");
|
||||
else if (gStopOnError)
|
||||
vlog("Stopping at first error.\n");
|
||||
if (gSkipCorrectnessTesting)
|
||||
vlog("*** Skipping correctness testing! ***\n\n");
|
||||
else if (gStopOnError)
|
||||
vlog("Stopping at first error.\n");
|
||||
|
||||
vlog(" \t ");
|
||||
if (gWimpyMode) vlog(" ");
|
||||
if (!gSkipCorrectnessTesting) vlog("\t max_ulps");
|
||||
vlog(" \t ");
|
||||
if (gWimpyMode) vlog(" ");
|
||||
if (!gSkipCorrectnessTesting) vlog("\t max_ulps");
|
||||
|
||||
vlog("\n-------------------------------------------------------------------"
|
||||
"----------------------------------------\n");
|
||||
vlog("\n---------------------------------------------------------------"
|
||||
"--------------------------------------------\n");
|
||||
}
|
||||
|
||||
gMTdata = MTdataHolder(gRandomSeed);
|
||||
|
||||
@@ -426,6 +427,10 @@ int main(int argc, const char *argv[])
|
||||
|
||||
static int ParseArgs(int argc, const char **argv)
|
||||
{
|
||||
if (gListTests)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
// We only pass test names to runTestHarnessWithCheck, hence global command
|
||||
// line options defined by the harness cannot be used by the user.
|
||||
// To respect the implementation details of runTestHarnessWithCheck,
|
||||
@@ -469,8 +474,6 @@ static int ParseArgs(int argc, const char **argv)
|
||||
optionFound = 1;
|
||||
switch (*arg)
|
||||
{
|
||||
case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
|
||||
|
||||
case 'd': gHasDouble ^= 1; break;
|
||||
|
||||
case 'e': gFastRelaxedDerived ^= 1; break;
|
||||
@@ -498,10 +501,6 @@ static int ParseArgs(int argc, const char **argv)
|
||||
|
||||
case 'v': gVerboseBruteForce ^= 1; break;
|
||||
|
||||
case 'w': // wimpy mode
|
||||
gWimpyMode ^= 1;
|
||||
break;
|
||||
|
||||
case '[':
|
||||
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
|
||||
break;
|
||||
@@ -581,14 +580,6 @@ static int ParseArgs(int argc, const char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
// Check for the wimpy mode environment variable
|
||||
if (getenv("CL_WIMPY_MODE"))
|
||||
{
|
||||
vlog("\n");
|
||||
vlog("*** Detected CL_WIMPY_MODE env ***\n");
|
||||
gWimpyMode = 1;
|
||||
}
|
||||
|
||||
PrintArch();
|
||||
|
||||
if (gWimpyMode)
|
||||
@@ -629,8 +620,6 @@ static void PrintUsage(void)
|
||||
{
|
||||
vlog("%s [-cglsz]: <optional: math function names>\n", appName);
|
||||
vlog("\toptions:\n");
|
||||
vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
|
||||
"off)\n");
|
||||
vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
|
||||
"on)\n");
|
||||
vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
|
||||
@@ -645,7 +634,6 @@ static void PrintUsage(void)
|
||||
"accuracy checks.)\n");
|
||||
vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n");
|
||||
vlog("\t\t-s\tStop on error\n");
|
||||
vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
|
||||
vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
|
||||
"1-10, default factor(%u)\n",
|
||||
gWimpyReductionFactor);
|
||||
@@ -942,13 +930,6 @@ test_status InitCL(cl_device_id device)
|
||||
vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
|
||||
no_yes[0
|
||||
!= (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
|
||||
if (gToggleCorrectlyRoundedDivideSqrt)
|
||||
{
|
||||
gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
|
||||
}
|
||||
vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
|
||||
no_yes[0
|
||||
!= (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
|
||||
vlog("\tTesting with FTZ mode ON for floats? %s\n",
|
||||
no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
|
||||
vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
|
||||
@@ -1330,8 +1311,7 @@ float Bruteforce_Ulp_Error_Double(double test, long double reference)
|
||||
|
||||
// reference is a normal power of two or a zero
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp =
|
||||
DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
|
||||
int ulp_exp = DBL_MANT_DIG - std::max(ilogbl(reference), DBL_MIN_EXP);
|
||||
|
||||
// allow correctly rounded results to pass through unmolested. (We might add
|
||||
// error to it below.) There is something of a performance optimization here
|
||||
|
||||
@@ -25,12 +25,10 @@
|
||||
|
||||
#include "utility.h"
|
||||
|
||||
#if defined(__SSE__) \
|
||||
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
#if defined(__SSE2__) \
|
||||
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
|
||||
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
@@ -721,9 +719,9 @@ double reference_tanpi(double x)
|
||||
double z = reference_fabs(x);
|
||||
|
||||
// if big and even -- caution: only works if x only has single precision
|
||||
if (z >= HEX_DBL(+, 1, 0, +, 24))
|
||||
if (!(z < HEX_DBL(+, 1, 0, +, 24)))
|
||||
{
|
||||
if (z == INFINITY) return x - x; // nan
|
||||
if (!isfinite(z)) return x - x; // nan
|
||||
|
||||
return reference_copysign(
|
||||
0.0, x); // tanpi ( n ) is copysign( 0.0, n) for even integers n.
|
||||
@@ -855,8 +853,7 @@ double reference_add(double x, double y)
|
||||
volatile float a = (float)x;
|
||||
volatile float b = (float)y;
|
||||
|
||||
#if defined(__SSE__) \
|
||||
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
// defeat x87
|
||||
__m128 va = _mm_set_ss((float)a);
|
||||
__m128 vb = _mm_set_ss((float)b);
|
||||
@@ -953,8 +950,7 @@ double reference_subtract(double x, double y)
|
||||
{
|
||||
volatile float a = (float)x;
|
||||
volatile float b = (float)y;
|
||||
#if defined(__SSE__) \
|
||||
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
// defeat x87
|
||||
__m128 va = _mm_set_ss((float)a);
|
||||
__m128 vb = _mm_set_ss((float)b);
|
||||
@@ -970,8 +966,7 @@ double reference_multiply(double x, double y)
|
||||
{
|
||||
volatile float a = (float)x;
|
||||
volatile float b = (float)y;
|
||||
#if defined(__SSE__) \
|
||||
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
|
||||
#if defined(__SSE__) || _M_IX86_FP == 1
|
||||
// defeat x87
|
||||
__m128 va = _mm_set_ss((float)a);
|
||||
__m128 vb = _mm_set_ss((float)b);
|
||||
@@ -1223,6 +1218,8 @@ double reference_relaxed_exp2(double x) { return reference_exp2(x); }
|
||||
double reference_exp2(double x)
|
||||
{ // Note: only suitable for verifying single precision. Doesn't have range of a
|
||||
// full double exp2 implementation.
|
||||
if (isnan(x)) return x;
|
||||
|
||||
if (x == 0.0) return 1.0;
|
||||
|
||||
// separate x into fractional and integer parts
|
||||
@@ -2781,7 +2778,7 @@ static inline void shift_right_sticky_128(cl_ulong *hi, cl_ulong *lo, int shift)
|
||||
sticky |= (0 != l);
|
||||
l = 0;
|
||||
}
|
||||
else
|
||||
else if (shift > 0)
|
||||
{
|
||||
sticky |= (0 != (l << (64 - shift)));
|
||||
l >>= shift;
|
||||
@@ -3088,9 +3085,9 @@ long double reference_tanpil(long double x)
|
||||
long double z = reference_fabsl(x);
|
||||
|
||||
// if big and even -- caution: only works if x only has single precision
|
||||
if (z >= HEX_LDBL(+, 1, 0, +, 53))
|
||||
if (!(z < HEX_LDBL(+, 1, 0, +, 53)))
|
||||
{
|
||||
if (z == INFINITY) return x - x; // nan
|
||||
if (!isfinite(z)) return x - x; // nan
|
||||
|
||||
return reference_copysignl(
|
||||
0.0L, x); // tanpi ( n ) is copysign( 0.0, n) for even integers n.
|
||||
@@ -5027,8 +5024,9 @@ static double reference_scalbn(double x, int n)
|
||||
u.d -= 1.0;
|
||||
e = (int)((u.l & 0x7ff0000000000000LL) >> 52) - 1022;
|
||||
}
|
||||
if (n >= 2098) return reference_copysign(INFINITY, x);
|
||||
e += n;
|
||||
if (e >= 2047 || n >= 2098) return reference_copysign(INFINITY, x);
|
||||
if (e >= 2047) return reference_copysign(INFINITY, x);
|
||||
if (e < -51 || n < -2097) return reference_copysign(0.0, x);
|
||||
if (e <= 0)
|
||||
{
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user