Merge branch 'main' into cl_khr_unified_svm

This commit is contained in:
Ben Ashbaugh
2025-09-09 17:39:11 -07:00
152 changed files with 6767 additions and 1914 deletions

View File

@@ -1,5 +1,7 @@
set(MODULE_NAME API)
find_package(Python3 COMPONENTS Interpreter QUIET)
set(${MODULE_NAME}_SOURCES
main.cpp
negative_platform.cpp
@@ -40,6 +42,20 @@ set(${MODULE_NAME}_SOURCES
test_pipe_properties_queries.cpp
test_wg_suggested_local_work_size.cpp
test_device_command_queue.cpp
test_spirv_queries.cpp
${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def
)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def
COMMENT "Generating spirv_capability_deps.def..."
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_spirv_capability_deps.py
--grammar "${SPIRV_INCLUDE_DIR}/include/spirv/unified1/spirv.core.grammar.json"
--output "${CMAKE_CURRENT_BINARY_DIR}/spirv_capability_deps.def"
DEPENDS generate_spirv_capability_deps.py "${SPIRV_INCLUDE_DIR}/include/spirv/unified1/spirv.core.grammar.json"
USES_TERMINAL
VERBATIM)
include(../CMakeCommon.txt)
target_include_directories(${${MODULE_NAME}_OUT} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

View File

@@ -0,0 +1,102 @@
#!/usr/bin/env python3
#####################################################################
# Copyright (c) 2025 The Khronos Group Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#####################################################################
"""
Generates a file describing the SPIR-V extension dependencies or SPIR-V version
dependencies for a SPIR-V capability. This can be used to ensure that if support
for a SPIR-V capability is reported, the necessary SPIR-V extensions or SPIR-V
version is also supported.
"""
import argparse
import json
header_text = """\
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file is generated from the SPIR-V JSON grammar file.
// Please do not edit it directly!
"""
def main():
parser = argparse.ArgumentParser(description='Generate SPIR-V extension and version dependencies for SPIR-V capabilities')
parser.add_argument('--grammar', metavar='<path>',
type=str, required=True,
help='input JSON grammar file')
parser.add_argument('--output', metavar='<path>',
type=str, required=False,
help='output file path (default: stdout)')
args = parser.parse_args()
dependencies = {}
capabilities = []
with open(args.grammar) as json_file:
grammar_json = json.loads(json_file.read())
for operand_kind in grammar_json['operand_kinds']:
if operand_kind['kind'] == 'Capability':
for cap in operand_kind['enumerants']:
capname = cap['enumerant']
capabilities.append(capname)
dependencies[capname] = {}
dependencies[capname]['extensions'] = cap['extensions'] if 'extensions' in cap else []
dependencies[capname]['version'] = ("SPIR-V_" + cap['version']) if 'version' in cap and cap['version'] != 'None' else ""
capabilities.sort()
output = []
output.append(header_text)
output.append("// clang-format off")
if False:
for cap in capabilities:
deps = dependencies[cap]
extensions_str = ', '.join(f'"{ext}"' for ext in deps['extensions'])
output.append('SPIRV_CAPABILITY_DEPENDENCIES( {}, {{{}}}, "{}" )'.format(
cap, extensions_str, deps['version']))
else:
for cap in capabilities:
deps = dependencies[cap]
if deps['version'] != "":
output.append('SPIRV_CAPABILITY_VERSION_DEPENDENCY( {}, "{}" )'.format(cap, deps['version']))
for ext in deps['extensions']:
output.append('SPIRV_CAPABILITY_EXTENSION_DEPENDENCY( {}, "{}" )'.format(cap, ext))
output.append("// clang-format on")
if args.output:
with open(args.output, 'w') as output_file:
output_file.write('\n'.join(output))
else:
print('\n'.join(output))
if __name__ == '__main__':
main()

View File

@@ -1088,10 +1088,12 @@ REGISTER_TEST(min_max_image_buffer_size)
pixelBytes = maxAllocSize / maxDimensionPixels;
if (pixelBytes == 0)
{
log_error("Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than "
"CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image "
"of maximum size!\n");
return -1;
log_info(
"Note, the value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is %zu pixels, "
"therefore the size of the allocated image may be larger than the "
"scaled CL_DEVICE_MAX_MEM_ALLOC_SIZE of %" PRIu64 " bytes.\n",
maxDimensionPixels, maxAllocSize);
pixelBytes = 1;
}
error = -1;

View File

@@ -87,6 +87,35 @@ const char *sample_two_kernel_program[] = {
"\n"
"}\n" };
const char *sample_mem_obj_size_test_kernel = R"(
__kernel void mem_obj_size_test(__global int *src, __global int *dst)
{
size_t tid = get_global_id(0);
dst[tid] = src[tid];
}
)";
const char *sample_local_size_test_kernel = R"(
__kernel void local_size_test(__local int *src, __global int *dst)
{
size_t tid = get_global_id(0);
dst[tid] = src[tid];
}
)";
const char *sample_read_only_image_test_kernel = R"(
__kernel void read_only_image_test(__write_only image2d_t img, __global uint4 *src)
{
write_imageui(img, (int2)(get_global_id(0), get_global_id(1)), src[0]);
}
)";
const char *sample_write_only_image_test_kernel = R"(
__kernel void write_only_image_test(__read_only image2d_t src, __global uint4 *dst)
{
dst[0]=read_imageui(src, (int2)(get_global_id(0), get_global_id(1)));
}
)";
REGISTER_TEST(get_kernel_info)
{
@@ -669,7 +698,7 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg)
test_error(error,
"Unable to get sample_image_test kernel for built program");
std::vector<cl_uchar> mem_data(size_dim * size_dim);
std::vector<cl_uchar> mem_data(size_dim * size_dim * 4);
buffer = clCreateBuffer(context, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
sizeof(cl_int) * size_dim, mem_data.data(), &error);
test_error(error, "clCreateBuffer failed");
@@ -704,3 +733,169 @@ REGISTER_TEST(negative_set_immutable_memory_to_writeable_kernel_arg)
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_mem_obj)
{
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper mem_obj_arg_kernel;
// Setup the test
error =
create_single_kernel_helper(context, &program, nullptr, 1,
&sample_mem_obj_size_test_kernel, nullptr);
test_error(error, "Unable to build test program");
mem_obj_arg_kernel = clCreateKernel(program, "mem_obj_size_test", &error);
test_error(error,
"Unable to get mem_obj_size_test kernel for built program");
std::vector<cl_uchar> mem_data(256, 0);
clMemWrapper buffer = clCreateBuffer(
context, CL_MEM_USE_HOST_PTR, mem_data.size(), mem_data.data(), &error);
test_error(error, "clCreateBuffer failed");
// Run the test - CL_INVALID_ARG_SIZE
error = clSetKernelArg(mem_obj_arg_kernel, 0, sizeof(cl_mem) * 2, &buffer);
test_failure_error_ret(
error, CL_INVALID_ARG_SIZE,
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
"argument is a memory object and arg_size > sizeof(cl_mem)",
TEST_FAIL);
error = clSetKernelArg(mem_obj_arg_kernel, 0, sizeof(cl_mem) / 2, &buffer);
test_failure_error_ret(
error, CL_INVALID_ARG_SIZE,
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when "
"argument is a memory object and arg_size < sizeof(cl_mem)",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_kernel)
{
cl_int error = CL_SUCCESS;
clKernelWrapper kernel;
clMemWrapper mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(cl_float), NULL, &error);
test_error(error, "clCreateBuffer failed");
// Run the test - CL_INVALID_KERNEL
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem);
test_failure_error_ret(
error, CL_INVALID_KERNEL,
"clSetKernelArg is supposed to fail with CL_INVALID_KERNEL when kernel "
"is not a valid kernel object",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_index)
{
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper kernel;
// Setup the test
error = create_single_kernel_helper(context, &program, nullptr, 1,
sample_single_test_kernel, nullptr);
test_error(error, "Unable to build test program");
kernel = clCreateKernel(program, "sample_test", &error);
test_error(error, "Unable to get sample_test kernel for built program");
// Run the test - 2 index is out or range - expected CL_INVALID_ARG_INDEX
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), nullptr);
test_failure_error_ret(
error, CL_INVALID_ARG_INDEX,
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_INDEX when "
"arg_index is not a valid argument index",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_invalid_arg_size_local)
{
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper local_arg_kernel;
// Setup the test
error = create_single_kernel_helper(
context, &program, nullptr, 1, &sample_local_size_test_kernel, nullptr);
test_error(error, "Unable to build test program");
local_arg_kernel = clCreateKernel(program, "local_size_test", &error);
test_error(error, "Unable to get local_size_test kernel for built program");
// Run the test
error = clSetKernelArg(local_arg_kernel, 0, 0, nullptr);
test_failure_error_ret(
error, CL_INVALID_ARG_SIZE,
"clSetKernelArg is supposed to fail with CL_INVALID_ARG_SIZE when 0 is "
"passed to a local qualifier kernel argument",
TEST_FAIL);
return TEST_PASS;
}
REGISTER_TEST(negative_set_read_write_image_arg)
{
cl_int error = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper write_image_kernel, read_image_kernel;
clMemWrapper write_only_image, read_only_image;
const char *test_kernels[2] = { sample_read_only_image_test_kernel,
sample_write_only_image_test_kernel };
constexpr cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT8 };
const int size_dim = 128;
PASSIVE_REQUIRE_IMAGE_SUPPORT(device);
// Setup the test
error = create_single_kernel_helper(context, &program, nullptr, 2,
test_kernels, nullptr);
test_error(error, "Unable to build test program");
read_image_kernel = clCreateKernel(program, "read_only_image_test", &error);
test_error(error,
"Unable to get read_only_image_test kernel for built program");
write_image_kernel =
clCreateKernel(program, "write_only_image_test", &error);
test_error(error,
"Unable to get write_only_image_test kernel for built program");
read_only_image = create_image_2d(context, CL_MEM_READ_ONLY, &format,
size_dim, size_dim, 0, nullptr, &error);
test_error(error, "create_image_2d failed");
write_only_image = create_image_2d(context, CL_MEM_WRITE_ONLY, &format,
size_dim, size_dim, 0, nullptr, &error);
test_error(error, "create_image_2d failed");
// Run the test
error = clSetKernelArg(read_image_kernel, 0, sizeof(read_only_image),
&read_only_image);
test_failure_error_ret(error, CL_INVALID_ARG_VALUE,
"clSetKernelArg is supposed to fail "
"with CL_INVALID_ARG_VALUE when an image is "
"created with CL_MEM_READ_ONLY is "
"passed to a write_only kernel argument",
TEST_FAIL);
error = clSetKernelArg(write_image_kernel, 0, sizeof(write_only_image),
&write_only_image);
test_failure_error_ret(error, CL_INVALID_ARG_VALUE,
"clSetKernelArg is supposed to fail "
"with CL_INVALID_ARG_VALUE when an image is "
"created with CL_MEM_WRITE_ONLY is "
"passed to a read_only kernel argument",
TEST_FAIL);
return TEST_PASS;
}

View File

@@ -0,0 +1,767 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include <algorithm>
#include <map>
#include <vector>
#define SPV_ENABLE_UTILITY_CODE
#include <spirv/unified1/spirv.hpp>
static bool is_spirv_version_supported(cl_device_id deviceID,
const std::string& version)
{
std::string ilVersions = get_device_il_version_string(deviceID);
return ilVersions.find(version) != std::string::npos;
}
static int doQueries(cl_device_id device,
std::vector<const char*>& extendedInstructionSets,
std::vector<const char*>& extensions,
std::vector<cl_uint>& capabilities)
{
cl_int error = CL_SUCCESS;
size_t size = 0;
error =
clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR,
0, nullptr, &size);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR size\n");
extendedInstructionSets.resize(size / sizeof(const char*));
error =
clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR,
size, extendedInstructionSets.data(), nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR\n");
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENSIONS_KHR, 0, nullptr,
&size);
test_error(
error,
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_EXTENSIONS_KHR size\n");
extensions.resize(size / sizeof(const char*));
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_EXTENSIONS_KHR, size,
extensions.data(), nullptr);
test_error(error,
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_EXTENSIONS_KHR\n");
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_CAPABILITIES_KHR, 0,
nullptr, &size);
test_error(
error,
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_CAPABILITIES_KHR size\n");
capabilities.resize(size / sizeof(cl_uint));
error = clGetDeviceInfo(device, CL_DEVICE_SPIRV_CAPABILITIES_KHR, size,
capabilities.data(), nullptr);
test_error(error,
"clGetDeviceInfo failed for CL_DEVICE_SPIRV_CAPABILITIES_KHR\n");
return CL_SUCCESS;
}
static int findRequirements(cl_device_id device,
std::vector<const char*>& extendedInstructionSets,
std::vector<const char*>& extensions,
std::vector<cl_uint>& capabilities)
{
cl_int error = CL_SUCCESS;
auto version = get_device_cl_version(device);
auto ilVersions = get_device_il_version_string(device);
// If no SPIR-V versions are supported, there are no requirements.
if (ilVersions.find("SPIR-V") == std::string::npos)
{
return CL_SUCCESS;
}
cl_bool deviceImageSupport = CL_FALSE;
cl_bool deviceReadWriteImageSupport = CL_FALSE;
cl_bool deviceSubGroupsSupport = CL_FALSE;
cl_bool deviceGenericAddressSpaceSupport = CL_FALSE;
cl_bool deviceWorkGroupCollectiveFunctionsSupport = CL_FALSE;
cl_bool devicePipeSupport = CL_FALSE;
cl_bool deviceDeviceEnqueueSupport = CL_FALSE;
cl_device_integer_dot_product_capabilities_khr
deviceIntegerDotProductCapabilities = 0;
cl_device_fp_atomic_capabilities_ext deviceFp32AtomicCapabilities = 0;
cl_device_fp_atomic_capabilities_ext deviceFp16AtomicCapabilities = 0;
cl_device_fp_atomic_capabilities_ext deviceFp64AtomicCapabilities = 0;
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
sizeof(deviceImageSupport), &deviceImageSupport,
nullptr);
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE_SUPPORT\n");
if (version >= Version(2, 0))
{
cl_uint deviceMaxReadWriteImageArgs = 0;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
sizeof(deviceMaxReadWriteImageArgs),
&deviceMaxReadWriteImageArgs, nullptr);
test_error(
error,
"clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
deviceReadWriteImageSupport =
deviceMaxReadWriteImageArgs != 0 ? CL_TRUE : CL_FALSE;
}
if (version >= Version(2, 1))
{
cl_uint deviceMaxNumSubGroups = 0;
error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS,
sizeof(deviceMaxNumSubGroups),
&deviceMaxNumSubGroups, nullptr);
test_error(error,
"clGetDeviceInfo failed for CL_DEVICE_MAX_NUM_SUB_GROUPS\n");
deviceSubGroupsSupport =
deviceMaxNumSubGroups != 0 ? CL_TRUE : CL_FALSE;
}
else if (is_extension_available(device, "cl_khr_subgroups"))
{
deviceSubGroupsSupport = CL_TRUE;
}
if (version >= Version(3, 0))
{
error = clGetDeviceInfo(device, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT,
sizeof(deviceGenericAddressSpaceSupport),
&deviceGenericAddressSpaceSupport, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT\n");
error = clGetDeviceInfo(
device, CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT,
sizeof(deviceWorkGroupCollectiveFunctionsSupport),
&deviceWorkGroupCollectiveFunctionsSupport, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT\n");
error = clGetDeviceInfo(device, CL_DEVICE_PIPE_SUPPORT,
sizeof(devicePipeSupport), &devicePipeSupport,
nullptr);
test_error(error,
"clGetDeviceInfo failed for CL_DEVICE_PIPE_SUPPORT\n");
cl_device_device_enqueue_capabilities deviceDeviceEnqueueCapabilities =
0;
error = clGetDeviceInfo(device, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES,
sizeof(deviceDeviceEnqueueCapabilities),
&deviceDeviceEnqueueCapabilities, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES\n");
deviceDeviceEnqueueSupport =
deviceDeviceEnqueueCapabilities != 0 ? CL_TRUE : CL_FALSE;
}
else if (version >= Version(2, 0))
{
deviceGenericAddressSpaceSupport = CL_TRUE;
deviceWorkGroupCollectiveFunctionsSupport = CL_TRUE;
devicePipeSupport = CL_TRUE;
deviceDeviceEnqueueSupport = CL_TRUE;
}
if (is_extension_available(device, "cl_khr_integer_dot_product"))
{
error = clGetDeviceInfo(device,
CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR,
sizeof(deviceIntegerDotProductCapabilities),
&deviceIntegerDotProductCapabilities, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR\n");
}
if (is_extension_available(device, "cl_ext_float_atomics"))
{
error =
clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(deviceFp32AtomicCapabilities),
&deviceFp32AtomicCapabilities, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT\n");
error =
clGetDeviceInfo(device, CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(deviceFp16AtomicCapabilities),
&deviceFp16AtomicCapabilities, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT\n");
error =
clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(deviceFp64AtomicCapabilities),
&deviceFp64AtomicCapabilities, nullptr);
test_error(error,
"clGetDeviceInfo failed for "
"CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT\n");
}
// Required.
extendedInstructionSets.push_back("OpenCL.std");
capabilities.push_back(spv::CapabilityAddresses);
capabilities.push_back(spv::CapabilityFloat16Buffer);
capabilities.push_back(spv::CapabilityInt16);
capabilities.push_back(spv::CapabilityInt8);
capabilities.push_back(spv::CapabilityKernel);
capabilities.push_back(spv::CapabilityLinkage);
capabilities.push_back(spv::CapabilityVector16);
// Required for FULL_PROFILE devices, or devices supporting
// cles_khr_int64.
if (gHasLong)
{
capabilities.push_back(spv::CapabilityInt64);
}
// Required for devices supporting images.
if (deviceImageSupport == CL_TRUE)
{
capabilities.push_back(spv::CapabilityImage1D);
capabilities.push_back(spv::CapabilityImageBasic);
capabilities.push_back(spv::CapabilityImageBuffer);
capabilities.push_back(spv::CapabilityLiteralSampler);
capabilities.push_back(spv::CapabilitySampled1D);
capabilities.push_back(spv::CapabilitySampledBuffer);
}
// Required for devices supporting SPIR-V 1.6.
if (ilVersions.find("SPIR-V_1.6") != std::string::npos)
{
capabilities.push_back(spv::CapabilityUniformDecoration);
}
// Required for devices supporting read-write images.
if (deviceReadWriteImageSupport == CL_TRUE)
{
capabilities.push_back(spv::CapabilityImageReadWrite);
}
// Required for devices supporting the generic address space.
if (deviceGenericAddressSpaceSupport == CL_TRUE)
{
capabilities.push_back(spv::CapabilityGenericPointer);
}
// Required for devices supporting sub-groups or work-group collective
// functions.
if (deviceSubGroupsSupport == CL_TRUE
|| deviceWorkGroupCollectiveFunctionsSupport == CL_TRUE)
{
capabilities.push_back(spv::CapabilityGroups);
}
// Required for devices supporting pipes.
if (devicePipeSupport == CL_TRUE)
{
capabilities.push_back(spv::CapabilityPipes);
}
// Required for devices supporting device-side enqueue.
if (deviceDeviceEnqueueSupport == CL_TRUE)
{
capabilities.push_back(spv::CapabilityDeviceEnqueue);
}
// Required for devices supporting SPIR-V 1.1 and OpenCL 2.2.
if (ilVersions.find("SPIR-V_1.1") != std::string::npos
&& version == Version(2, 2))
{
capabilities.push_back(spv::CapabilityPipeStorage);
}
// Required for devices supporting SPIR-V 1.1 and either OpenCL 2.2 or
// OpenCL 3.0 devices supporting sub-groups.
if (ilVersions.find("SPIR-V_1.1") != std::string::npos
&& (version == Version(2, 2)
|| (version >= Version(3, 0) && deviceSubGroupsSupport == CL_TRUE)))
{
capabilities.push_back(spv::CapabilitySubgroupDispatch);
}
// Required for devices supporting cl_khr_expect_assume.
if (is_extension_available(device, "cl_khr_expect_assume"))
{
extensions.push_back("SPV_KHR_expect_assume");
capabilities.push_back(spv::CapabilityExpectAssumeKHR);
}
// Required for devices supporting cl_khr_extended_bit_ops.
if (is_extension_available(device, "cl_khr_extended_bit_ops"))
{
extensions.push_back("SPV_KHR_bit_instructions");
capabilities.push_back(spv::CapabilityBitInstructions);
}
// Required for devices supporting half-precision floating-point
// (cl_khr_fp16).
if (is_extension_available(device, "cl_khr_fp16"))
{
capabilities.push_back(spv::CapabilityFloat16);
}
// Required for devices supporting double-precision floating-point
// (cl_khr_fp64).
if (is_extension_available(device, "cl_khr_fp64"))
{
capabilities.push_back(spv::CapabilityFloat64);
}
// Required for devices supporting 64-bit atomics
// (cl_khr_int64_base_atomics or cl_khr_int64_extended_atomics).
if (is_extension_available(device, "cl_khr_int64_base_atomics")
|| is_extension_available(device, "cl_khr_int64_extended_atomics"))
{
capabilities.push_back(spv::CapabilityInt64Atomics);
}
// Required for devices supporting cl_khr_integer_dot_product.
if (is_extension_available(device, "cl_khr_integer_dot_product"))
{
extensions.push_back("SPV_KHR_integer_dot_product");
capabilities.push_back(spv::CapabilityDotProduct);
capabilities.push_back(spv::CapabilityDotProductInput4x8BitPacked);
}
// Required for devices supporting cl_khr_integer_dot_product and
// CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR.
if (is_extension_available(device, "cl_khr_integer_dot_product")
&& (deviceIntegerDotProductCapabilities
& CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR))
{
capabilities.push_back(spv::CapabilityDotProductInput4x8Bit);
}
// Required for devices supporting cl_khr_kernel_clock.
if (is_extension_available(device, "cl_khr_kernel_clock"))
{
extensions.push_back("SPV_KHR_shader_clock");
capabilities.push_back(spv::CapabilityShaderClockKHR);
}
// Required for devices supporting both cl_khr_mipmap_image and
// cl_khr_mipmap_image_writes.
if (is_extension_available(device, "cl_khr_mipmap_image")
&& is_extension_available(device, "cl_khr_mipmap_image_writes"))
{
capabilities.push_back(spv::CapabilityImageMipmap);
}
// Required for devices supporting cl_khr_spirv_extended_debug_info.
if (is_extension_available(device, "cl_khr_spirv_extended_debug_info"))
{
extendedInstructionSets.push_back("OpenCL.DebugInfo.100");
}
// Required for devices supporting cl_khr_spirv_linkonce_odr.
if (is_extension_available(device, "cl_khr_spirv_linkonce_odr"))
{
extensions.push_back("SPV_KHR_linkonce_odr");
}
// Required for devices supporting
// cl_khr_spirv_no_integer_wrap_decoration.
if (is_extension_available(device,
"cl_khr_spirv_no_integer_wrap_decoration"))
{
extensions.push_back("SPV_KHR_no_integer_wrap_decoration");
}
// Required for devices supporting cl_khr_subgroup_ballot.
if (is_extension_available(device, "cl_khr_subgroup_ballot"))
{
capabilities.push_back(spv::CapabilityGroupNonUniformBallot);
}
// Required for devices supporting cl_khr_subgroup_clustered_reduce.
if (is_extension_available(device, "cl_khr_subgroup_clustered_reduce"))
{
capabilities.push_back(spv::CapabilityGroupNonUniformClustered);
}
// Required for devices supporting cl_khr_subgroup_named_barrier.
if (is_extension_available(device, "cl_khr_subgroup_named_barrier"))
{
capabilities.push_back(spv::CapabilityNamedBarrier);
}
// Required for devices supporting
// cl_khr_subgroup_non_uniform_arithmetic.
if (is_extension_available(device,
"cl_khr_subgroup_non_uniform_arithmetic"))
{
capabilities.push_back(spv::CapabilityGroupNonUniformArithmetic);
}
// Required for devices supporting cl_khr_subgroup_non_uniform_vote.
if (is_extension_available(device, "cl_khr_subgroup_non_uniform_vote"))
{
capabilities.push_back(spv::CapabilityGroupNonUniform);
capabilities.push_back(spv::CapabilityGroupNonUniformVote);
}
// Required for devices supporting cl_khr_subgroup_rotate.
if (is_extension_available(device, "cl_khr_subgroup_rotate"))
{
extensions.push_back("SPV_KHR_subgroup_rotate");
capabilities.push_back(spv::CapabilityGroupNonUniformRotateKHR);
}
// Required for devices supporting cl_khr_subgroup_shuffle.
if (is_extension_available(device, "cl_khr_subgroup_shuffle"))
{
capabilities.push_back(spv::CapabilityGroupNonUniformShuffle);
}
// Required for devices supporting cl_khr_subgroup_shuffle_relative.
if (is_extension_available(device, "cl_khr_subgroup_shuffle_relative"))
{
capabilities.push_back(spv::CapabilityGroupNonUniformShuffleRelative);
}
// Required for devices supporting cl_khr_work_group_uniform_arithmetic.
if (is_extension_available(device, "cl_khr_work_group_uniform_arithmetic"))
{
extensions.push_back("SPV_KHR_uniform_group_instructions");
capabilities.push_back(spv::CapabilityGroupUniformArithmeticKHR);
}
// Required for devices supporting cl_ext_float_atomics and fp32 atomic
// adds.
if (is_extension_available(device, "cl_ext_float_atomics")
&& (deviceFp32AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))
{
capabilities.push_back(spv::CapabilityAtomicFloat32AddEXT);
}
// Required for devices supporting cl_ext_float_atomics and fp32 atomic
// min and max.
if (is_extension_available(device, "cl_ext_float_atomics")
&& (deviceFp32AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))
{
capabilities.push_back(spv::CapabilityAtomicFloat32MinMaxEXT);
}
// Required for devices supporting cl_ext_float_atomics and fp16 atomic
// adds.
if (is_extension_available(device, "cl_ext_float_atomics")
&& (deviceFp16AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))
{
extensions.push_back("SPV_EXT_shader_atomic_float16_add");
capabilities.push_back(spv::CapabilityAtomicFloat16AddEXT);
}
// Required for devices supporting cl_ext_float_atomics and fp16 atomic
// min and max.
if (is_extension_available(device, "cl_ext_float_atomics")
&& (deviceFp16AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))
{
capabilities.push_back(spv::CapabilityAtomicFloat16MinMaxEXT);
}
// Required for devices supporting cl_ext_float_atomics and fp64 atomic
// adds.
if (is_extension_available(device, "cl_ext_float_atomics")
&& (deviceFp64AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT)))
{
capabilities.push_back(spv::CapabilityAtomicFloat64AddEXT);
}
// Required for devices supporting cl_ext_float_atomics and fp64 atomic
// min and max.
if (is_extension_available(device, "cl_ext_float_atomics")
&& (deviceFp64AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)))
{
capabilities.push_back(spv::CapabilityAtomicFloat64MinMaxEXT);
}
// Required for devices supporting cl_ext_float_atomics and fp16, fp32,
// or fp64 atomic min or max.
if (is_extension_available(device, "cl_ext_float_atomics")
&& ((deviceFp32AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))
|| (deviceFp16AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))
|| (deviceFp64AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT))))
{
extensions.push_back("SPV_EXT_shader_atomic_float_min_max");
}
// Required for devices supporting cl_ext_float_atomics and fp32 or fp64
// atomic adds.
if (is_extension_available(device, "cl_ext_float_atomics")
&& ((deviceFp32AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))
|| (deviceFp64AtomicCapabilities
& (CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
| CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT))))
{
extensions.push_back("SPV_EXT_shader_atomic_float_add");
}
// Required for devices supporting cl_intel_bfloat16_conversions.
if (is_extension_available(device, "cl_intel_bfloat16_conversions"))
{
extensions.push_back("SPV_INTEL_bfloat16_conversion");
capabilities.push_back(spv::CapabilityBFloat16ConversionINTEL);
}
// Required for devices supporting
// cl_intel_spirv_device_side_avc_motion_estimation.
if (is_extension_available(
device, "cl_intel_spirv_device_side_avc_motion_estimation"))
{
extensions.push_back("SPV_INTEL_device_side_avc_motion_estimation");
capabilities.push_back(
spv::CapabilitySubgroupAvcMotionEstimationChromaINTEL);
capabilities.push_back(spv::CapabilitySubgroupAvcMotionEstimationINTEL);
capabilities.push_back(
spv::CapabilitySubgroupAvcMotionEstimationIntraINTEL);
}
// Required for devices supporting cl_intel_spirv_media_block_io.
if (is_extension_available(device, "cl_intel_spirv_media_block_io"))
{
extensions.push_back("SPV_INTEL_media_block_io");
capabilities.push_back(spv::CapabilitySubgroupImageMediaBlockIOINTEL);
}
// Required for devices supporting cl_intel_spirv_subgroups.
if (is_extension_available(device, "cl_intel_spirv_subgroups"))
{
extensions.push_back("SPV_INTEL_subgroups");
capabilities.push_back(spv::CapabilitySubgroupBufferBlockIOINTEL);
capabilities.push_back(spv::CapabilitySubgroupImageBlockIOINTEL);
capabilities.push_back(spv::CapabilitySubgroupShuffleINTEL);
}
// Required for devices supporting cl_intel_split_work_group_barrier.
if (is_extension_available(device, "cl_intel_split_work_group_barrier"))
{
extensions.push_back("SPV_INTEL_split_barrier");
capabilities.push_back(spv::CapabilitySplitBarrierINTEL);
}
// Required for devices supporting cl_intel_subgroup_buffer_prefetch.
if (is_extension_available(device, "cl_intel_subgroup_buffer_prefetch"))
{
extensions.push_back("SPV_INTEL_subgroup_buffer_prefetch");
capabilities.push_back(spv::CapabilitySubgroupBufferPrefetchINTEL);
}
return CL_SUCCESS;
}
REGISTER_TEST(spirv_query_requirements)
{
if (!is_extension_available(device, "cl_khr_spirv_queries"))
{
log_info("cl_khr_spirv_queries is not supported; skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
cl_int error;
std::vector<const char*> queriedExtendedInstructionSets;
std::vector<const char*> queriedExtensions;
std::vector<cl_uint> queriedCapabilities;
error = doQueries(device, queriedExtendedInstructionSets, queriedExtensions,
queriedCapabilities);
test_error_fail(error, "Unable to perform SPIR-V queries");
std::vector<const char*> requiredExtendedInstructionSets;
std::vector<const char*> requiredExtensions;
std::vector<cl_uint> requiredCapabilities;
error = findRequirements(device, requiredExtendedInstructionSets,
requiredExtensions, requiredCapabilities);
test_error_fail(error, "Unable to find SPIR-V requirements");
for (auto check : requiredExtendedInstructionSets)
{
auto cmp = [=](const char* queried) {
return strcmp(check, queried) == 0;
};
auto it = std::find_if(queriedExtendedInstructionSets.begin(),
queriedExtendedInstructionSets.end(), cmp);
if (it == queriedExtendedInstructionSets.end())
{
test_fail("Missing required extended instruction set: %s\n", check);
}
}
for (auto check : requiredExtensions)
{
auto cmp = [=](const char* queried) {
return strcmp(check, queried) == 0;
};
auto it = std::find_if(queriedExtensions.begin(),
queriedExtensions.end(), cmp);
if (it == queriedExtensions.end())
{
test_fail("Missing required extension: %s\n", check);
}
}
for (auto check : requiredCapabilities)
{
if (std::find(queriedCapabilities.begin(), queriedCapabilities.end(),
check)
== queriedCapabilities.end())
{
test_fail(
"Missing required capability: %s\n",
spv::CapabilityToString(static_cast<spv::Capability>(check)));
}
}
// Find any extraneous capabilities (informational):
for (auto check : queriedCapabilities)
{
if (std::find(requiredCapabilities.begin(), requiredCapabilities.end(),
check)
== requiredCapabilities.end())
{
log_info(
"Found non-required capability: %s\n",
spv::CapabilityToString(static_cast<spv::Capability>(check)));
}
}
return TEST_PASS;
}
REGISTER_TEST(spirv_query_dependencies)
{
if (!is_extension_available(device, "cl_khr_spirv_queries"))
{
log_info("cl_khr_spirv_queries is not supported; skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
cl_int error;
std::vector<const char*> queriedExtendedInstructionSets;
std::vector<const char*> queriedExtensions;
std::vector<cl_uint> queriedCapabilities;
error = doQueries(device, queriedExtendedInstructionSets, queriedExtensions,
queriedCapabilities);
test_error_fail(error, "Unable to perform SPIR-V queries");
struct CapabilityDependencies
{
std::vector<std::string> extensions;
std::string version;
};
std::map<spv::Capability, CapabilityDependencies> dependencies;
#define SPIRV_CAPABILITY_VERSION_DEPENDENCY(_cap, _ver) \
dependencies[spv::Capability##_cap].version = _ver;
#define SPIRV_CAPABILITY_EXTENSION_DEPENDENCY(_cap, _ext) \
dependencies[spv::Capability##_cap].extensions.push_back(_ext);
#include "spirv_capability_deps.def"
// For each queried SPIR-V capability, ensure that either that any SPIR-V
// version dependencies or SPIR-V extension dependencies are satisfied.
for (auto check : queriedCapabilities)
{
// Log and skip any unknown capabilities
auto it = dependencies.find(static_cast<spv::Capability>(check));
if (it == dependencies.end())
{
log_info(
"No known dependencies for queried capability %s!\n",
spv::CapabilityToString(static_cast<spv::Capability>(check)));
continue;
}
// Check if a SPIR-V version dependency is satisfied
const auto& version_dep = it->second.version;
if (!version_dep.empty()
&& is_spirv_version_supported(device, version_dep))
{
continue;
}
// Check if a SPIR-V extension dependency is satisfied
bool found = false;
for (const auto& extension_dep : it->second.extensions)
{
if (std::find(queriedExtensions.begin(), queriedExtensions.end(),
extension_dep)
!= queriedExtensions.end())
{
found = true;
break;
}
}
if (found)
{
continue;
}
// If we get here then the capability has an unsatisfied dependency.
log_error("Couldn't find a dependency for queried capability %s!\n",
spv::CapabilityToString(static_cast<spv::Capability>(check)));
if (!version_dep.empty())
{
log_error("Checked for SPIR-V version %s.\n", version_dep.c_str());
}
for (const auto& extension_dep : it->second.extensions)
{
log_error("Checked for SPIR-V extension %s.n",
extension_dep.c_str());
}
return TEST_FAIL;
}
return TEST_PASS;
}

View File

@@ -108,7 +108,11 @@ REGISTER_TEST_VERSION(sub_group_dispatch, Version(2, 1))
nullptr);
test_error(error, "clGetDeviceInfo failed");
max_local = max_work_item_sizes[0];
error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
sizeof(max_local), &max_local, nullptr);
test_error(error, "clGetKernelWorkGroupInfo failed");
max_local = std::min(max_local, max_work_item_sizes[0]);
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
(void *)&platform, NULL);

View File

@@ -1,9 +1,5 @@
set(MODULE_NAME BASIC)
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
add_cxx_flag_if_supported(-Wno-narrowing)
endif()
set(${MODULE_NAME}_SOURCES
main.cpp
test_fpmath.cpp

View File

@@ -161,7 +161,7 @@ REGISTER_TEST(arraycopy)
err |= clSetKernelArg(kernel, 1, sizeof results, &results);
test_error(err, "clSetKernelArg failed");
size_t threads[3] = { num_elements, 0, 0 };
size_t threads[3] = { static_cast<size_t>(num_elements), 0, 0 };
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed");

View File

@@ -35,9 +35,9 @@ static int test_arrayimagecopy_single_format(
std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
imgptr{ nullptr, free };
clMemWrapper buffer, image;
int img_width = 512;
int img_height = 512;
int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
size_t img_width = 512;
size_t img_height = 512;
size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
size_t elem_size;
size_t buffer_size;
cl_int err;

View File

@@ -14,7 +14,6 @@
// limitations under the License.
//
#include <cmath>
using std::isnan;
#include "harness/compat.h"
#include <stdio.h>
@@ -26,6 +25,7 @@ using std::isnan;
#include <CL/cl_half.h>
#include "harness/conversions.h"
#include "harness/mathHelpers.h"
#include "harness/typeWrappers.h"
extern cl_half_rounding_mode halfRoundingMode;
@@ -102,16 +102,6 @@ const char * kernel_explicit_s2v_set[NUM_VEC_TYPES][NUM_VEC_TYPES][5] = {
// clang-format on
bool IsHalfNaN(cl_half v)
{
// Extract FP16 exponent and mantissa
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
uint16_t h_mant = ((cl_half)v) & 0x3FF;
// NaN test
return (h_exp == 0x1F && h_mant != 0);
}
static int test_explicit_s2v_function(cl_context context,
cl_command_queue queue, cl_kernel kernel,
ExplicitType srcType, unsigned int count,
@@ -183,20 +173,21 @@ static int test_explicit_s2v_function(cl_context context,
{
bool isSrcNaN =
(((srcType == kHalf)
&& IsHalfNaN(*reinterpret_cast<cl_half *>(inPtr)))
&& isnan_fp(*reinterpret_cast<cl_half *>(inPtr)))
|| ((srcType == kFloat)
&& isnan(*reinterpret_cast<cl_float *>(inPtr)))
&& isnan_fp(*reinterpret_cast<cl_float *>(inPtr)))
|| ((srcType == kDouble)
&& isnan(*reinterpret_cast<cl_double *>(inPtr))));
bool isDestNaN = (((destType == kHalf)
&& IsHalfNaN(*reinterpret_cast<cl_half *>(
outPtr + destTypeSize * s)))
|| ((destType == kFloat)
&& isnan(*reinterpret_cast<cl_float *>(
outPtr + destTypeSize * s)))
|| ((destType == kDouble)
&& isnan(*reinterpret_cast<cl_double *>(
outPtr + destTypeSize * s))));
&& isnan_fp(*reinterpret_cast<cl_double *>(inPtr))));
bool isDestNaN =
(((destType == kHalf)
&& isnan_fp(*reinterpret_cast<cl_half *>(
outPtr + destTypeSize * s)))
|| ((destType == kFloat)
&& isnan_fp(*reinterpret_cast<cl_float *>(
outPtr + destTypeSize * s)))
|| ((destType == kDouble)
&& isnan_fp(*reinterpret_cast<cl_double *>(
outPtr + destTypeSize * s))));
if (isSrcNaN && isDestNaN)
{

View File

@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "harness/compat.h"
#include "harness/mathHelpers.h"
#include "harness/rounding_mode.h"
#include "harness/stringHelpers.h"
@@ -57,16 +58,6 @@ template <typename T> double toDouble(T val)
return val;
}
bool isHalfNan(cl_half v)
{
// Extract FP16 exponent and mantissa
uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
uint16_t h_mant = v & 0x3FF;
// NaN test
return (h_exp == 0x1F && h_mant != 0);
}
cl_half half_plus(cl_half a, cl_half b)
{
return HFF(std::plus<float>()(HTF(a), HTF(b)));
@@ -101,14 +92,7 @@ int verify_fp(std::vector<T> (&input)[2], std::vector<T> &output,
T r = test.ref(inA[i], inB[i]);
bool both_nan = false;
if (std::is_same<T, cl_half>::value)
{
both_nan = isHalfNan(r) && isHalfNan(output[i]);
}
else if (std::is_floating_point<T>::value)
{
both_nan = std::isnan(r) && std::isnan(output[i]);
}
both_nan = isnan_fp(r) && isnan_fp(output[i]);
// If not both nan, check if the result is the same
if (!both_nan && (r != output[i]))

View File

@@ -100,8 +100,8 @@ REGISTER_TEST(hostptr)
cl_image_format img_format;
cl_uchar *rgba8_inptr, *rgba8_outptr;
void *lock_buffer;
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
cl_int err;
MTdata d;
RoundingMode oldRoundMode;

View File

@@ -56,8 +56,9 @@ __kernel void test_if(__global int *src, __global int *dst)
int verify_if(std::vector<cl_int> input, std::vector<cl_int> output)
{
const cl_int results[] = {
0x12345678, 0x23456781, 0x34567812, 0x45678123,
0x56781234, 0x67812345, 0x78123456, 0x81234567,
(cl_int)0x12345678, (cl_int)0x23456781, (cl_int)0x34567812,
(cl_int)0x45678123, (cl_int)0x56781234, (cl_int)0x67812345,
(cl_int)0x78123456, (cl_int)0x81234567,
};
auto predicate = [&results](cl_int a, cl_int b) {

View File

@@ -144,8 +144,8 @@ verify_byte_image(unsigned char *image, unsigned char *outptr, int w, int h, int
REGISTER_TEST(image_multipass_integer_coord)
{
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
cl_image_format img_format;
int num_input_streams = 8;
@@ -397,8 +397,8 @@ REGISTER_TEST(image_multipass_integer_coord)
REGISTER_TEST(image_multipass_float_coord)
{
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
cl_image_format img_format;
int num_input_streams = 8;

View File

@@ -35,9 +35,9 @@ static int test_imagearraycopy_single_format(
std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
imgptr{ nullptr, free };
clMemWrapper buffer, image;
const int img_width = 512;
const int img_height = 512;
const int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
const size_t img_width = 512;
const size_t img_height = 512;
const size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
size_t elem_size;
size_t buffer_size;
cl_int err;

View File

@@ -111,8 +111,8 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context,
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
clMemWrapper streams[6];
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
int i, err;
MTdataHolder d(gRandomSeed);
@@ -153,7 +153,7 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context,
for (i = 0; i < 3; i++)
{
void *p, *outp;
int x, y, delta_w = img_width / 8, delta_h = img_height / 16;
size_t x, y, delta_w = img_width / 8, delta_h = img_height / 16;
switch (i)
{
@@ -197,10 +197,11 @@ static int test_imagecopy_impl(cl_device_id device, cl_context context,
copy_origin, copy_region, 0, NULL, NULL);
if (err)
{
log_error("Copy %d (origin [%d, %d], size [%d, %d], image "
"size [%d x %d]) Failed\n",
copy_number, x, y, delta_w, delta_h, img_width,
img_height);
log_error(
"Copy %d (origin [%zu, %zu], size [%zu, %zu], image "
"size [%zu x %zu]) Failed\n",
copy_number, x, y, delta_w, delta_h, img_width,
img_height);
}
test_error(err, "clEnqueueCopyImage failed");
}

View File

@@ -115,9 +115,9 @@ static int test_imagecopy3d_impl(cl_device_id device, cl_context context,
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
clMemWrapper streams[6];
int img_width = 128;
int img_height = 128;
int img_depth = 64;
size_t img_width = 128;
size_t img_height = 128;
size_t img_depth = 64;
int i;
cl_int err;
unsigned num_elements = img_width * img_height * img_depth * 4;

View File

@@ -82,8 +82,8 @@ REGISTER_TEST(imagenpot)
cl_kernel kernel;
size_t global_threads[3], local_threads[3];
size_t local_workgroup_size;
int img_width;
int img_height;
size_t img_width;
size_t img_height;
int err;
cl_uint m;
size_t max_local_workgroup_size[3];

View File

@@ -123,15 +123,15 @@ REGISTER_TEST(imagerandomcopy)
unsigned short *rgba16_inptr, *rgba16_outptr;
float *rgbafp_inptr, *rgbafp_outptr;
clMemWrapper streams[6];
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
int i, j;
cl_int err;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
log_info("Testing with image %d x %d.\n", img_width, img_height);
log_info("Testing with image %zu x %zu.\n", img_width, img_height);
d = init_genrand( gRandomSeed );
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
@@ -191,8 +191,8 @@ REGISTER_TEST(imagerandomcopy)
}
size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1};
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
// err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL);
err = clEnqueueWriteImage(queue, streams[i * 2], CL_TRUE, origin,
region, 0, 0, p, 0, NULL, NULL);
test_error(err, "clEnqueueWriteImage failed");
for (j=0; j<NUM_COPIES; j++)

View File

@@ -195,8 +195,8 @@ REGISTER_TEST(imagereadwrite)
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
clMemWrapper streams[3];
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
int num_tries = 200;
int i, j, err;
MTdataHolder d(gRandomSeed);
@@ -242,10 +242,10 @@ REGISTER_TEST(imagereadwrite)
for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++)
{
int x = (int)get_random_float(0, img_width, d);
int y = (int)get_random_float(0, img_height, d);
int w = (int)get_random_float(1, (img_width - x), d);
int h = (int)get_random_float(1, (img_height - y), d);
size_t x = (size_t)get_random_float(0, img_width, d);
size_t y = (size_t)get_random_float(0, img_height, d);
size_t w = (size_t)get_random_float(1, (img_width - x), d);
size_t h = (size_t)get_random_float(1, (img_height - y), d);
size_t input_pitch;
int set_input_pitch = (int)(genrand_int32(d) & 0x01);
int packed_update = (int)(genrand_int32(d) & 0x01);
@@ -386,7 +386,8 @@ REGISTER_TEST(imagereadwrite)
img_width, img_height);
if (err)
{
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x,
y, w, h, (int)input_pitch, (int)i);
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
}
break;
@@ -396,7 +397,8 @@ REGISTER_TEST(imagereadwrite)
img_width, img_height);
if (err)
{
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x,
y, w, h, (int)input_pitch, (int)i);
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
}
break;
@@ -406,7 +408,8 @@ REGISTER_TEST(imagereadwrite)
img_width, img_height);
if (err)
{
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
log_error("x=%zu y=%zu w=%zu h=%zu, pitch=%d, try=%d\n", x,
y, w, h, (int)input_pitch, (int)i);
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
}
break;

View File

@@ -205,10 +205,10 @@ REGISTER_TEST(imagereadwrite3d)
std::unique_ptr<unsigned short[]> rgba16_inptr, rgba16_outptr;
std::unique_ptr<float[]> rgbafp_inptr, rgbafp_outptr;
clMemWrapper streams[3];
int img_width = 64;
int img_height = 64;
int img_depth = 32;
int img_slice = img_width * img_height;
size_t img_width = 64;
size_t img_height = 64;
size_t img_depth = 32;
size_t img_slice = img_width * img_height;
int num_tries = 30;
int i, j, err;
MTdataHolder mtData(gRandomSeed);
@@ -257,12 +257,12 @@ REGISTER_TEST(imagereadwrite3d)
for (i = 0, j = 0; i < num_tries * image_formats_count; i++, j++)
{
int x = (int)get_random_float(0, (float)img_width - 1, mtData);
int y = (int)get_random_float(0, (float)img_height - 1, mtData);
int z = (int)get_random_float(0, (float)img_depth - 1, mtData);
int w = (int)get_random_float(1, (float)(img_width - x), mtData);
int h = (int)get_random_float(1, (float)(img_height - y), mtData);
int d = (int)get_random_float(1, (float)(img_depth - z), mtData);
size_t x = (size_t)get_random_float(0, (float)img_width - 1, mtData);
size_t y = (size_t)get_random_float(0, (float)img_height - 1, mtData);
size_t z = (size_t)get_random_float(0, (float)img_depth - 1, mtData);
size_t w = (size_t)get_random_float(1, (float)(img_width - x), mtData);
size_t h = (size_t)get_random_float(1, (float)(img_height - y), mtData);
size_t d = (size_t)get_random_float(1, (float)(img_depth - z), mtData);
size_t input_pitch, input_slice_pitch;
int set_input_pitch = (int)(genrand_int32(mtData) & 0x01);
int packed_update = (int)(genrand_int32(mtData) & 0x01);
@@ -401,7 +401,10 @@ REGISTER_TEST(imagereadwrite3d)
img_width, img_height, img_depth);
if (err)
{
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, "
"slice_pitch=%d, try=%d\n",
x, y, z, w, h, d, (int)input_pitch,
(int)input_slice_pitch, (int)i);
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
}
break;
@@ -411,7 +414,10 @@ REGISTER_TEST(imagereadwrite3d)
img_width, img_height, img_depth);
if (err)
{
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, "
"slice_pitch=%d, try=%d\n",
x, y, z, w, h, d, (int)input_pitch,
(int)input_slice_pitch, (int)i);
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
}
break;
@@ -421,7 +427,10 @@ REGISTER_TEST(imagereadwrite3d)
img_width, img_height, img_depth);
if (err)
{
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
log_error("x=%zu y=%zu z=%zu w=%zu h=%zu d=%zu pitch=%d, "
"slice_pitch=%d, try=%d\n",
x, y, z, w, h, d, (int)input_pitch,
(int)input_slice_pitch, (int)i);
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
}
break;

View File

@@ -66,7 +66,7 @@ REGISTER_TEST(kernel_call_kernel_function)
clKernelWrapper kernel1, kernel2, kernel_to_call;
clMemWrapper streams[2];
size_t threads[] = {num_elements,1,1};
size_t threads[] = { static_cast<size_t>(num_elements), 1, 1 };
cl_int *input, *output, *expected;
cl_int times = 4;
int pass = 0;

View File

@@ -117,8 +117,8 @@ REGISTER_TEST(mri_multiple)
cl_program program;
cl_kernel kernel;
size_t threads[2];
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
int i, err;
MTdata d;

View File

@@ -100,8 +100,8 @@ REGISTER_TEST(mri_one)
cl_program program;
cl_kernel kernel;
size_t threads[2];
int img_width = 512;
int img_height = 512;
size_t img_width = 512;
size_t img_height = 512;
int i, err;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, 1};

View File

@@ -1264,7 +1264,7 @@ static int l_write_read_for_type(cl_device_id device, cl_context context,
}
cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
queue, read_mem, CL_TRUE, CL_MAP_WRITE, 0, read_data_size, 0, 0,
0, 0);
memset(read_data, -1, read_data_size);
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
@@ -1503,7 +1503,7 @@ static int l_init_write_read_for_type(cl_device_id device, cl_context context,
clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
queue, read_mem, CL_TRUE, CL_MAP_WRITE, 0, read_data_size, 0, 0,
0, 0);
memset(read_data, -1, read_data_size);
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);

View File

@@ -476,7 +476,7 @@ struct TestWorkItemFnsOutOfRange
maxWorkItemSizes[2] };
// check if maximum work group size for current dimention is not
// exceeded
cl_uint work_group_size = max_workgroup_size + 1;
size_t work_group_size = max_workgroup_size + 1;
while (max_workgroup_size < work_group_size && work_group_size != 1)
{
work_group_size = 1;
@@ -492,9 +492,9 @@ struct TestWorkItemFnsOutOfRange
// compute max number of work groups based on buffer size and max
// group size
cl_uint max_work_groups = testData.size() / work_group_size;
size_t max_work_groups = testData.size() / work_group_size;
// take into account number of dimentions
cl_uint work_groups_per_dim =
size_t work_groups_per_dim =
std::max(1, (int)pow(max_work_groups, 1.f / dim));
for (size_t j = 0; j < dim; j++)

View File

@@ -24,8 +24,9 @@
#include "CL/cl_half.h"
#include <vector>
#include <iomanip>
#include <sstream>
#include <vector>
#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
#define MAX_HOST_THREADS GetThreadCount()
@@ -74,9 +75,11 @@ extern int
gMaxDeviceThreads; // maximum number of threads executed on OCL device
extern cl_device_atomic_capabilities gAtomicMemCap,
gAtomicFenceCap; // atomic memory and fence capabilities for this device
extern cl_half_rounding_mode gHalfRoundingMode;
extern bool gFloatAtomicsSupported;
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps;
extern const char *
get_memory_order_type_name(TExplicitMemoryOrderType orderType);
@@ -174,6 +177,13 @@ public:
{
return false;
}
virtual bool
IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue)
{
return expected != testValues[whichDestValue];
}
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d)
{
@@ -883,14 +893,15 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
header += std::string("__global volatile ") + aTypeName + " destMemory["
+ ss.str() + "] = {\n";
ss.str("");
if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
!= TYPE_ATOMIC_HALF)
ss << _startValue;
else
== TYPE_ATOMIC_FLOAT)
ss << std::setprecision(10) << _startValue;
else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
== TYPE_ATOMIC_HALF)
ss << static_cast<HostDataType>(
cl_half_to_float(static_cast<cl_half>(_startValue)));
else
ss << _startValue;
for (cl_uint i = 0; i < maxNumDestItems; i++)
{
if (aTypeName == "atomic_flag")
@@ -1449,7 +1460,7 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
startRefValues.size() ? &startRefValues[0] : 0, i))
break; // no expected value function provided
if (expected != destItems[i])
if (IsTestNotAsExpected(expected, destItems, i))
{
std::stringstream logLine;
logLine << "ERROR: Result " << i

View File

@@ -17,6 +17,9 @@
#define HOST_ATOMICS_H_
#include "harness/testHarness.h"
#include <mutex>
#include "CL/cl_half.h"
#ifdef WIN32
#include "Windows.h"
@@ -87,6 +90,8 @@ enum TExplicitMemoryOrderType
#define HOST_FLAG cl_int
extern cl_half_rounding_mode gHalfRoundingMode;
// host atomic functions
void host_atomic_thread_fence(TExplicitMemoryOrderType order);
@@ -94,28 +99,51 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order)
{
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a += c;
return old_value;
}
else
{
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
return InterlockedExchangeAdd(a, c);
return InterlockedExchangeAdd(a, c);
#elif defined(__GNUC__)
return __sync_fetch_and_add(a, c);
return __sync_fetch_and_add(a, c);
#else
log_info("Host function not implemented: atomic_fetch_add\n");
return 0;
log_info("Host function not implemented: atomic_fetch_add\n");
return 0;
#endif
}
}
template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order)
{
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
return InterlockedExchangeSubtract(a, c);
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)),
gHalfRoundingMode);
return old_value;
}
else
{
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
return InterlockedExchangeSubtract(a, c);
#elif defined(__GNUC__)
return __sync_fetch_and_sub(a, c);
return __sync_fetch_and_sub(a, c);
#else
log_info("Host function not implemented: atomic_fetch_sub\n");
return 0;
log_info("Host function not implemented: atomic_fetch_sub\n");
return 0;
#endif
}
}
template <typename AtomicType, typename CorrespondingType>
@@ -144,19 +172,34 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
TExplicitMemoryOrderType order_success,
TExplicitMemoryOrderType order_failure)
{
CorrespondingType tmp;
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
tmp = InterlockedCompareExchange(a, desired, *expected);
CorrespondingType tmp;
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{
static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx);
tmp = *reinterpret_cast<volatile float *>(a);
if (tmp == *expected)
{
*reinterpret_cast<volatile float *>(a) = desired;
return true;
}
*expected = tmp;
}
else
{
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
tmp = InterlockedCompareExchange(a, desired, *expected);
#elif defined(__GNUC__)
tmp = __sync_val_compare_and_swap(a, *expected, desired);
tmp = __sync_val_compare_and_swap(a, *expected, desired);
#else
log_info("Host function not implemented: atomic_compare_exchange\n");
tmp = 0;
log_info("Host function not implemented: atomic_compare_exchange\n");
tmp = 0;
#endif
if(tmp == *expected)
return true;
*expected = tmp;
return false;
if (tmp == *expected) return true;
*expected = tmp;
}
return false;
}
template <typename AtomicType, typename CorrespondingType>

View File

@@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap,
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
bool gFloatAtomicsSupported = false;
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
test_status InitCL(cl_device_id device) {
auto version = get_device_cl_version(device);
@@ -132,6 +133,12 @@ test_status InitCL(cl_device_id device) {
if (is_extension_available(device, "cl_ext_float_atomics"))
{
gFloatAtomicsSupported = true;
cl_int error = clGetDeviceInfo(
device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
if (is_extension_available(device, "cl_khr_fp16"))
{
cl_int error = clGetDeviceInfo(

File diff suppressed because it is too large Load Diff

View File

@@ -8,7 +8,6 @@ set(VULKAN_WRAPPER_SOURCES
# needed by Vulkan wrapper to compile
set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
add_cxx_flag_if_supported(-Wmisleading-indentation)
add_cxx_flag_if_supported(-Wno-narrowing)
add_cxx_flag_if_supported(-Wno-format)
add_cxx_flag_if_supported(-Wno-error)
add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive

View File

@@ -863,7 +863,7 @@ clExternalMemoryImage::clExternalMemoryImage(
size_t clImageFormatSize;
cl_image_desc image_desc;
memset(&image_desc, 0x0, sizeof(cl_image_desc));
cl_image_format img_format = { 0 };
img_format = { 0 };
const VkImageCreateInfo VulkanImageCreateInfo =
image2D.getVkImageCreateInfo();
@@ -1233,7 +1233,7 @@ int clExternalExportableSemaphore::signal(cl_command_queue cmd_queue)
import.fd = fd;
import.pNext = nullptr;
import.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
import.flags = 0;
import.flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT;
VkResult res =
vkImportSemaphoreFdKHR(m_deviceSemaphore.getDevice(), &import);

View File

@@ -106,6 +106,7 @@ protected:
cl_mem m_externalMemory;
int fd;
void *handle;
cl_image_format img_format;
clExternalMemoryImage();
public:
@@ -117,6 +118,7 @@ public:
cl_device_id deviceId);
virtual ~clExternalMemoryImage();
cl_mem getExternalMemoryImage();
cl_image_format getImageFormat() { return img_format; };
};
class clExternalSemaphore {

View File

@@ -243,6 +243,8 @@ getSupportedVulkanExternalMemoryHandleTypeList(
VkPhysicalDeviceExternalBufferInfo buffer_info = {};
buffer_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO;
buffer_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
buffer_info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT
| VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VkExternalBufferProperties buffer_properties = {};
buffer_properties.sType = VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES;
@@ -307,7 +309,9 @@ getSupportedVulkanExternalSemaphoreHandleTypeList(const VulkanDevice &vkDevice)
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO, nullptr,
handle_type.vk_type
};
VkExternalSemaphoreProperties query_result = {};
VkExternalSemaphoreProperties query_result = {
VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES
};
vkGetPhysicalDeviceExternalSemaphorePropertiesKHR(
vkDevice.getPhysicalDevice(), &handle_query, &query_result);
if (query_result.externalSemaphoreFeatures

View File

@@ -17,10 +17,31 @@ set(${MODULE_NAME}_SOURCES
include(../CMakeCommon.txt)
# Include the relative paths to SPV assembly files
configure_file(spirv_asm_list.txt ${CMAKE_CURRENT_BINARY_DIR}/spirv_asm_list.txt)
include(${CMAKE_CURRENT_BINARY_DIR}/spirv_asm_list.txt)
# Determine the corresponding binary outputs to the SPV assembly input files
set(COMPILER_ASM_REL_PATH spirv_asm)
set(COMPILER_ASM_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${COMPILER_ASM_REL_PATH}")
set(COMPILER_SPV_PATH "${CMAKE_CURRENT_BINARY_DIR}/spirv_bin")
# Copy the required test include directories into the build directory.
if(NOT DEFINED COMPILER_TEST_RESOURCES)
set(COMPILER_TEST_RESOURCES $<TARGET_FILE_DIR:${${MODULE_NAME}_OUT}>)
endif()
set(COMPILER_SPV_EXTRA "")
if(SPIRV_TOOLS_DIR AND IS_ABSOLUTE "${SPIRV_TOOLS_DIR}" AND
IS_DIRECTORY "${SPIRV_TOOLS_DIR}")
message("Using SPIR-V tools from '${SPIRV_TOOLS_DIR}'")
set(COMPILER_SPV_EXTRA "--assembler=${SPIRV_TOOLS_DIR}/spirv-as" "--validator=${SPIRV_TOOLS_DIR}/spirv-val")
endif()
set(COMPILER_ASSEMBLY_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../spirv_new/spirv_asm/assemble_spirv.py)
include(CMakePrintHelpers)
cmake_print_variables(COMPILER_ASSEMBLY_SCRIPT)
add_custom_command(
COMMENT "Copying compiler test resources..."
TARGET ${${MODULE_NAME}_OUT}
@@ -30,7 +51,10 @@ add_custom_command(
${COMPILER_TEST_RESOURCES}/includeTestDirectory
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory
${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory)
${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory
COMMAND ${COMPILER_ASSEMBLY_SCRIPT} --source-dir "${COMPILER_ASM_PATH}" --output-dir "${COMPILER_SPV_PATH}" ${COMPILER_SPV_EXTRA} --verbose
DEPENDS ${COMPILER_ASSEMBLY_SCRIPT} ${COMPILER_ASM}
VERBATIM)
include(GNUInstallDirs)

View File

@@ -0,0 +1,4 @@
set(COMPILER_SPIRV_NEW_ASM
compiler_spirv_asm/write_kernel.spvasm32
compiler_spirv_asm/write_kernel.spvasm64
)

View File

@@ -14,6 +14,9 @@
// limitations under the License.
//
#include "testBase.h"
#include <filesystem>
#if defined(_WIN32)
#include <time.h>
#elif defined(__linux__) || defined(__APPLE__)
@@ -3020,15 +3023,6 @@ REGISTER_TEST(execute_after_embedded_header_link)
return 0;
}
#if defined(__APPLE__) || defined(__linux)
#define _mkdir(x) mkdir(x, S_IRWXU)
#define _chdir chdir
#define _rmdir rmdir
#define _unlink unlink
#else
#include <direct.h>
#endif
REGISTER_TEST(execute_after_included_header_link)
{
int error;
@@ -3047,100 +3041,60 @@ REGISTER_TEST(execute_after_included_header_link)
}
/* setup */
#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__))
/* Some tests systems doesn't allow one to write in the test directory */
if (_chdir("/tmp") != 0)
std::error_code ec;
auto temp_dir_path = std::filesystem::temp_directory_path(ec);
if (ec)
{
log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n",
__FILE__, __LINE__);
log_error("ERROR: Unable to get the temporary directory path\n");
return -1;
}
#endif
if (_mkdir("foo") != 0)
temp_dir_path = temp_dir_path / "foo" / "bar";
std::filesystem::create_directories(temp_dir_path, ec);
if (ec)
{
log_error("ERROR: Unable to create directory foo! (in %s:%d)\n",
__FILE__, __LINE__);
log_error("ERROR: Unable to create directory: %s, error: %d (%s)\n",
temp_dir_path.u8string().c_str(), ec.value(),
ec.message().c_str());
return -1;
}
if (_mkdir("foo/bar") != 0)
{
log_error("ERROR: Unable to create directory foo/bar! (in %s:%d)\n",
__FILE__, __LINE__);
return -1;
}
if (_chdir("foo/bar") != 0)
{
log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n",
__FILE__, __LINE__);
return -1;
}
FILE *simple_header_file = fopen(simple_header_name, "w");
const auto simple_header_path = temp_dir_path / simple_header_name;
FILE *simple_header_file =
fopen(simple_header_path.u8string().c_str(), "w");
if (simple_header_file == NULL)
{
log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n",
simple_header_name, __FILE__, __LINE__);
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
return -1;
}
if (fprintf(simple_header_file, "%s", simple_header) < 0)
{
log_error(
"ERROR: Unable to write to simple header file %s! (in %s:%d)\n",
simple_header_name, __FILE__, __LINE__);
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
return -1;
}
if (fclose(simple_header_file) != 0)
{
log_error("ERROR: Unable to close simple header file %s! (in %s:%d)\n",
simple_header_name, __FILE__, __LINE__);
simple_header_path.u8string().c_str(), __FILE__, __LINE__);
return -1;
}
if (_chdir("../..") != 0)
{
log_error("ERROR: Unable to change to original working directory! (in "
"%s:%d)\n",
__FILE__, __LINE__);
return -1;
}
#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__))
error = clCompileProgram(program, 1, &device, "-I/tmp/foo/bar", 0, NULL,
const std::string include_path =
std::string("-I") + temp_dir_path.generic_u8string();
error = clCompileProgram(program, 1, &device, include_path.c_str(), 0, NULL,
NULL, NULL, NULL);
#else
error = clCompileProgram(program, 1, &device, "-Ifoo/bar", 0, NULL, NULL,
NULL, NULL);
#endif
test_error(error,
"Unable to compile a simple program with included header");
/* cleanup */
if (_chdir("foo/bar") != 0)
std::filesystem::remove_all(temp_dir_path, ec);
if (ec)
{
log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n",
__FILE__, __LINE__);
return -1;
}
if (_unlink(simple_header_name) != 0)
{
log_error("ERROR: Unable to remove simple header file %s! (in %s:%d)\n",
simple_header_name, __FILE__, __LINE__);
return -1;
}
if (_chdir("../..") != 0)
{
log_error("ERROR: Unable to change to original working directory! (in "
"%s:%d)\n",
__FILE__, __LINE__);
return -1;
}
if (_rmdir("foo/bar") != 0)
{
log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n",
__FILE__, __LINE__);
return -1;
}
if (_rmdir("foo") != 0)
{
log_error("ERROR: Unable to remove directory foo! (in %s:%d)\n",
__FILE__, __LINE__);
log_error("ERROR: Unable to delete directory: %s, error: %d (%s)",
temp_dir_path.u8string().c_str(), ec.value(),
ec.message().c_str());
return -1;
}

View File

@@ -95,7 +95,9 @@ const char *known_extensions[] = {
"cl_khr_command_buffer",
"cl_khr_command_buffer_mutable_dispatch",
"cl_khr_command_buffer_multi_device",
"cl_khr_external_memory_android_hardware_buffer"
"cl_khr_external_memory_android_hardware_buffer",
"cl_khr_unified_svm",
"cl_khr_spirv_queries"
};
// clang-format on

View File

@@ -656,6 +656,32 @@ static int test_feature_macro_integer_dot_product_input_4x8bit(
compiler_status, supported);
}
static int test_feature_macro_ext_image_unorm_int_2_101010(
cl_device_id deviceID, cl_context context, std::string test_macro_name,
cl_bool& supported)
{
cl_int error = TEST_FAIL;
cl_bool api_status = CL_TRUE;
cl_bool compiler_status;
log_info("\n%s ...\n", test_macro_name.c_str());
if (!is_extension_available(deviceID, "cl_ext_image_unorm_int_2_101010"))
{
supported = false;
return TEST_PASS;
}
error = check_compiler_feature_info(deviceID, context, test_macro_name,
compiler_status);
if (error != CL_SUCCESS)
{
return error;
}
return feature_macro_verify_results(test_macro_name, api_status,
compiler_status, supported);
}
static int test_feature_macro_int64(cl_device_id deviceID, cl_context context,
std::string test_macro_name,
cl_bool& supported)
@@ -833,6 +859,7 @@ REGISTER_TEST_VERSION(features_macro, Version(3, 0))
NEW_FEATURE_MACRO_TEST(int64);
NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit);
NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit_packed);
NEW_FEATURE_MACRO_TEST(ext_image_unorm_int_2_101010);
error |= test_consistency_c_features_list(device, supported_features_vec);

View File

@@ -25,6 +25,43 @@
#include <string>
#include <thread>
#include <vector>
#include <fstream>
#if defined(_WIN32)
const std::string slash = "\\";
#else
const std::string slash = "/";
#endif
std::string compilerSpvBinaries = "test_conformance" + slash + "compiler"
+ slash + "spirv_bin" + slash + "write_kernel.spv";
const std::string spvExt = ".spv";
std::vector<unsigned char> readBinary(const char *file_name)
{
using namespace std;
ifstream file(file_name, ios::in | ios::binary | ios::ate);
std::vector<char> tmpBuffer(0);
if (file.is_open())
{
size_t size = file.tellg();
tmpBuffer.resize(size);
file.seekg(0, ios::beg);
file.read(&tmpBuffer[0], size);
file.close();
}
else
{
log_error("File %s not found\n", file_name);
}
std::vector<unsigned char> result(tmpBuffer.begin(), tmpBuffer.end());
return result;
}
namespace {
@@ -299,18 +336,12 @@ public:
throw unload_test_failure("Failure getting device address bits");
}
switch (address_bits)
{
case 32:
m_spirv_binary = write_kernel_32_spv.data();
m_spirv_size = write_kernel_32_spv.size();
break;
case 64:
m_spirv_binary = write_kernel_64_spv.data();
m_spirv_size = write_kernel_64_spv.size();
break;
default: throw unload_test_failure("Invalid address bits");
}
std::vector<unsigned char> kernel_buffer;
std::string file_name =
compilerSpvBinaries + std::to_string(address_bits);
m_spirv_binary = readBinary(file_name.c_str());
m_spirv_size = m_spirv_binary.size();
}
void create() final
@@ -320,7 +351,7 @@ public:
assert(nullptr == m_program);
cl_int err = CL_INVALID_PLATFORM;
m_program = m_CreateProgramWithIL(m_context, m_spirv_binary,
m_program = m_CreateProgramWithIL(m_context, &m_spirv_binary[0],
m_spirv_size, &err);
if (CL_SUCCESS != err)
throw unload_test_failure("clCreateProgramWithIL()", err);
@@ -347,7 +378,7 @@ public:
}
private:
void *m_spirv_binary;
std::vector<unsigned char> m_spirv_binary;
size_t m_spirv_size;
bool m_enabled;

View File

@@ -4,47 +4,3 @@ static const char write_kernel_source[] = R"(
kernel void write_kernel(global unsigned int *p) {
*p = 42;
})";
/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm64 */
static std::array<unsigned char, 216> write_kernel_64_spv{
{ 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00,
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00,
0x0e, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00,
0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }
};
/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm32 */
static std::array<unsigned char, 216> write_kernel_32_spv{
{ 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00,
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00,
0x0e, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00,
0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }
};

View File

@@ -284,6 +284,11 @@ int main( int argc, const char **argv )
static int ParseArgs( int argc, const char **argv )
{
if (gListTests)
{
return 0;
}
gArgList = (const char **)calloc( argc, sizeof( char*) );
if( NULL == gArgList )

View File

@@ -1,9 +1,5 @@
set(MODULE_NAME CONVERSIONS)
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
add_cxx_flag_if_supported(-Wno-narrowing)
endif()
set (${MODULE_NAME}_SOURCES
Sleep.cpp test_conversions.cpp basic_test_conversions.cpp
)

View File

@@ -13,9 +13,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/mathHelpers.h"
#include "harness/testHarness.h"
#include "harness/compat.h"
#include "harness/ThreadPool.h"
#include "harness/parseParameters.h"
#if defined(__APPLE__)
#include <sys/sysctl.h>
@@ -52,17 +54,17 @@
#include "basic_test_conversions.h"
#if defined(_WIN32)
#if defined(_M_IX86) || defined(_M_X64)
#include <mmintrin.h>
#include <emmintrin.h>
#else // !_WIN32
#else
#if defined(__SSE__)
#include <xmmintrin.h>
#endif
#if defined(__SSE2__)
#include <emmintrin.h>
#endif
#endif // _WIN32
#endif
cl_context gContext = NULL;
cl_command_queue gQueue = NULL;
@@ -76,7 +78,6 @@ cl_mem gInBuffer;
cl_mem gOutBuffers[kCallStyleCount];
size_t gComputeDevices = 0;
uint32_t gDeviceFrequency = 0;
int gWimpyMode = 0;
int gWimpyReductionFactor = 128;
int gSkipTesting = 0;
int gForceFTZ = 0;
@@ -955,24 +956,6 @@ void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
// destroyed automatically soon after we exit.
}
template <typename T> static bool isnan_fp(const T &v)
{
if (std::is_same<T, cl_half>::value)
{
uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
uint16_t h_mant = ((cl_half)v) & 0x3FF;
return (h_exp == 0x1F && h_mant != 0);
}
else
{
#if !defined(_WIN32)
return std::isnan(v);
#else
return _isnan(v);
#endif
}
}
template <typename InType>
void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType, void *input)
{

View File

@@ -80,7 +80,6 @@ extern int gHasDouble;
extern int gTestDouble;
extern int gHasHalfs;
extern int gTestHalfs;
extern int gWimpyMode;
extern int gWimpyReductionFactor;
extern int gSkipTesting;
extern int gMinVectorSize;

View File

@@ -343,7 +343,7 @@ float DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int(float f)
volatile float x = f;
float magicVal = magic[f < 0];
#if defined(__SSE__)
#if defined(__SSE__) || _M_IX86_FP == 1
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
// incorrectly
__m128 v = _mm_set_ss(x);
@@ -376,7 +376,7 @@ DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int_and_clamp(double f)
{
volatile double x = f;
double magicVal = magic[f < 0];
#if defined(__SSE2__) || defined(_MSC_VER)
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
// Defeat x87 based arithmetic, which cant do FTZ, and will round this
// incorrectly
__m128d v = _mm_set_sd(x);
@@ -479,7 +479,7 @@ void DataInfoSpec<InType, OutType, InFP, OutFP>::conv(OutType *out, InType *in)
{
if (std::is_same<cl_double, OutType>::value)
{
#if defined(_MSC_VER)
#if defined(_M_IX86) || defined(_M_X64)
double result;
if (std::is_same<cl_ulong, InType>::value)

View File

@@ -112,6 +112,35 @@ int main(int argc, const char **argv)
int error;
argc = parseCustomParam(argc, argv);
if (gListTests)
{
for (unsigned dst = 0; dst < kTypeCount; dst++)
{
for (unsigned src = 0; src < kTypeCount; src++)
{
for (unsigned sat = 0; sat < 2; sat++)
{
// skip illegal saturated conversions to float type
if (gSaturationNames[sat] == std::string("_sat")
&& (gTypeNames[dst] == std::string("float")
|| gTypeNames[dst] == std::string("half")
|| gTypeNames[dst] == std::string("double")))
{
continue;
}
for (unsigned rnd = 0; rnd < kRoundingModeCount; rnd++)
{
vlog("\t%s\n",
(std::string(gTypeNames[dst])
+ gSaturationNames[sat] + gRoundingModeNames[rnd]
+ "_" + gTypeNames[src])
.c_str());
}
}
}
}
return 0;
}
if (argc == -1)
{
return 1;
@@ -218,7 +247,6 @@ static int ParseArgs(int argc, const char **argv)
case 'h': gTestHalfs ^= 1; break;
case 'l': gSkipTesting ^= 1; break;
case 'm': gMultithread ^= 1; break;
case 'w': gWimpyMode ^= 1; break;
case '[':
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
break;
@@ -287,14 +315,6 @@ static int ParseArgs(int argc, const char **argv)
}
}
// Check for the wimpy mode environment variable
if (getenv("CL_WIMPY_MODE"))
{
vlog("\n");
vlog("*** Detected CL_WIMPY_MODE env ***\n");
gWimpyMode = 1;
}
vlog("\n");
PrintArch();
@@ -335,9 +355,6 @@ static void PrintUsage(void)
vlog("\t\t-l\tToggle link check mode. When on, testing is skipped, and we "
"just check to see that the kernels build. (Off by default.)\n");
vlog("\t\t-m\tToggle Multithreading. (On by default.)\n");
vlog("\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very "
"small subset of the tests for each fn. NOT A VALID TEST! (Off by "
"default.)\n");
vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
"1-12, default factor(%u)\n",
gWimpyReductionFactor);

View File

@@ -1,22 +1,4 @@
if(WIN32)
set(D3D10_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include)
if(${ARCH} STREQUAL "i686")
set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86)
endif(${ARCH} STREQUAL "i686")
if(${ARCH} STREQUAL "x86_64")
set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64)
endif(${ARCH} STREQUAL "x86_64")
list(APPEND CLConform_INCLUDE_DIR ${D3D10_INCLUDE_DIR})
include_directories (${CLConform_SOURCE_DIR}/test_common/harness
${CLConform_INCLUDE_DIR} )
link_directories(${CL_LIB_DIR}, ${D3D10_LIB_DIR})
list(APPEND CLConform_LIBRARIES d3d10 dxgi)
set(MODULE_NAME D3D10)
set(${MODULE_NAME}_SOURCES
@@ -28,10 +10,9 @@ set(${MODULE_NAME}_SOURCES
harness.cpp
)
set_source_files_properties(
${MODULE_NAME}_SOURCES
PROPERTIES LANGUAGE CXX)
list(APPEND CLConform_LIBRARIES d3d10 dxgi)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include(../CMakeCommon.txt)
endif(WIN32)
else()
message(STATUS "D3D10 tests are only supported on Windows.")
endif()

View File

@@ -1,22 +1,4 @@
if(WIN32)
set(D3D11_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include)
if(${ARCH} STREQUAL "i686")
set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86)
endif(${ARCH} STREQUAL "i686")
if(${ARCH} STREQUAL "x86_64")
set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64)
endif(${ARCH} STREQUAL "x86_64")
list(APPEND CLConform_INCLUDE_DIR ${D3D11_INCLUDE_DIR})
include_directories (${CLConform_SOURCE_DIR}/test_common/harness
${CLConform_INCLUDE_DIR} )
link_directories(${CL_LIB_DIR}, ${D3D11_LIB_DIR})
list(APPEND CLConform_LIBRARIES d3d11 dxgi)
set(MODULE_NAME D3D11)
set(${MODULE_NAME}_SOURCES
@@ -28,10 +10,9 @@ set(${MODULE_NAME}_SOURCES
harness.cpp
)
set_source_files_properties(
${MODULE_NAME}_SOURCES
PROPERTIES LANGUAGE CXX)
list(APPEND CLConform_LIBRARIES d3d11 dxgi)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include(../CMakeCommon.txt)
endif(WIN32)
else()
message(STATUS "D3D11 tests are only supported on Windows.")
endif()

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <vector>
@@ -25,7 +26,6 @@
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
// clang-format off
static const char* enqueue_simple_block[] = { R"(

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <vector>
@@ -25,7 +26,6 @@
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
#define BITS_DEPTH 28
static const char* enqueue_flags_wait_kernel_simple[] =

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <vector>
@@ -24,10 +25,7 @@
#include <time.h>
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
static const char enqueue_block_multi_queue[] =
NL "#define BLOCK_COMPLETED 0"
NL "#define BLOCK_SUBMITTED 1"

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <algorithm>
#include <vector>
@@ -26,7 +27,6 @@
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
static const char *helper_ndrange_1d_glo[] = {
NL,
"void block_fn(int len, __global atomic_uint* val)" NL,

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <vector>
@@ -25,7 +26,6 @@
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
static int nestingLevel = 3;
static const char* enqueue_1D_wg_size_single[] =

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <vector>
@@ -25,7 +26,6 @@
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
static const char* multi_queue_simple_block1[] =
{
NL, "void block_fn(size_t tid, int mul, __global int* res)"

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <algorithm>
#include <vector>
@@ -24,8 +25,6 @@
#include "utils.h"
#include <time.h>
extern int gWimpyMode;
#ifdef CL_VERSION_2_0
static const char* enqueue_block_first_kernel[] =

View File

@@ -25,7 +25,6 @@
#include "utils.h"
std::string gKernelName;
int gWimpyMode = 0;
test_status InitCL(cl_device_id device) {
auto version = get_device_cl_version(device);
@@ -71,11 +70,6 @@ int main(int argc, const char *argv[])
gKernelName = std::string(argv[i + 1]);
argsRemoveNum += 2;
}
if (strcmp(argv[i], "-w") == 0 ){
gWimpyMode = 1;
argsRemoveNum += 1;
}
if (argsRemoveNum > 0) {
for (int j = i; j < (argc - argsRemoveNum); ++j)

View File

@@ -17,6 +17,7 @@
#include <string.h>
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/parseParameters.h"
#include <vector>
@@ -27,7 +28,6 @@
#ifdef CL_VERSION_2_0
static int gNestingLevel = 4;
extern int gWimpyMode;
static const char* enqueue_nested_blocks_single[] =
{

View File

@@ -5,6 +5,9 @@
add_subdirectory( cl_ext_cxx_for_opencl )
add_subdirectory( cl_khr_command_buffer )
add_subdirectory( cl_khr_dx9_media_sharing )
if(ANDROID_PLATFORM GREATER 28)
add_subdirectory( cl_khr_external_memory_ahb )
endif ()
add_subdirectory( cl_khr_external_memory_dma_buf )
add_subdirectory( cl_khr_semaphore )
add_subdirectory( cl_khr_kernel_clock )

View File

@@ -435,3 +435,40 @@ bool InterleavedEnqueueTest::Skip()
{
return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
}
cl_int EnqueueAndReleaseTest::Run()
{
cl_int error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
cl_int pattern = 42;
error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
data_size(), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, nullptr,
nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
// Calls release on cl_command_buffer_khr handle inside wrapper class, and
// sets the handle to nullptr, so that release doesn't get called again at
// end of test when wrapper object is destroyed.
command_buffer.reset();
std::vector<cl_int> output_data(num_elements);
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
output_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
}
return CL_SUCCESS;
}

View File

@@ -128,6 +128,15 @@ struct InterleavedEnqueueTest : public BasicCommandBufferTest
bool Skip() override;
};
// Test releasing a command-buffer after it has been submitted for execution,
// but before the user has waited on completion of the enqueue.
struct EnqueueAndReleaseTest : public BasicCommandBufferTest
{
using BasicCommandBufferTest::BasicCommandBufferTest;
cl_int Run() override;
};
template <class T>
int MakeAndRunTest(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)

View File

@@ -44,3 +44,9 @@ REGISTER_TEST(explicit_flush)
return MakeAndRunTest<ExplicitFlushTest>(device, context, queue,
num_elements);
}
REGISTER_TEST(enqueue_and_release)
{
return MakeAndRunTest<EnqueueAndReleaseTest>(device, context, queue,
num_elements);
}

View File

@@ -14,6 +14,8 @@ set(${MODULE_NAME}_SOURCES
mutable_command_multiple_dispatches.cpp
mutable_command_iterative_arg_update.cpp
mutable_command_work_groups.cpp
mutable_command_work_dim.cpp
mutable_command_update_state.cpp
../basic_command_buffer.cpp
)

View File

@@ -135,7 +135,7 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest
for (size_t i = 0; i < num_elements; i++)
if (i >= update_global_size && global_work_size != resultData[i])
{
log_error("Data failed to verify: update_global_size != "
log_error("Data failed to verify: global_work_size != "
"resultData[%zu]=%d\n",
i, resultData[i]);
return TEST_FAIL;
@@ -154,7 +154,7 @@ struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest
size_t info_global_size = 0;
const size_t update_global_size = 3;
const size_t sizeToAllocate = global_work_size;
const size_t sizeToAllocate = global_work_size * sizeof(cl_int);
const size_t num_elements = sizeToAllocate / sizeof(cl_int);
cl_mutable_command_khr command = nullptr;
};

View File

@@ -116,26 +116,6 @@ struct PropertiesArray : public InfoMutableCommandBufferTest
: InfoMutableCommandBufferTest(device, context, queue)
{}
virtual bool Skip() override
{
Version device_version = get_device_cl_version(device);
if ((device_version >= Version(3, 0))
|| is_extension_available(device, "cl_khr_extended_versioning"))
{
cl_version extension_version = get_extension_version(
device, "cl_khr_command_buffer_mutable_dispatch");
if (extension_version != CL_MAKE_VERSION(0, 9, 3))
{
log_info("cl_khr_command_buffer_mutable_dispatch version 0.9.3 "
"is required to run the test, skipping.\n ");
return true;
}
}
return InfoMutableCommandBufferTest::Skip();
}
cl_int Run() override
{
cl_command_properties_khr props[] = {

View File

@@ -297,6 +297,7 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
{
cl_int offset;
std::vector<cl_int> output_buffer;
std::vector<cl_int> updated_output_buffer;
// 0:user event, 1:offset-buffer fill event, 2:kernel done event
clEventWrapper wait_events[3];
};
@@ -337,6 +338,8 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
* buffer_size_multiplier,
nullptr, &error);
test_error(error, "clCreateBuffer failed");
// Retain new output memory object until the end of the test.
retained_output_buffers.push_back(new_out_mem);
cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
&new_out_mem };
@@ -373,7 +376,7 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
error = clEnqueueReadBuffer(work_queue, new_out_mem, CL_FALSE,
pd.offset * sizeof(cl_int), data_size(),
pd.output_buffer.data(), 1,
pd.updated_output_buffer.data(), 1,
&pd.wait_events[2], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
@@ -388,8 +391,10 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
cl_int offset = static_cast<cl_int>(num_elements);
std::vector<SimulPassData> simul_passes = {
{ 0, std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements) }
{ 0, std::vector<cl_int>(num_elements),
std::vector<cl_int>(num_elements) },
{ offset, std::vector<cl_int>(num_elements),
std::vector<cl_int>(num_elements) }
};
for (auto&& pass : simul_passes)
@@ -407,13 +412,26 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
test_error(error, "clFinish failed");
// verify the result buffers
for (auto&& pass : simul_passes)
auto& first_pass_output = simul_passes[0].output_buffer;
auto& first_pass_updated_output = simul_passes[0].updated_output_buffer;
auto& second_pass_output = simul_passes[1].output_buffer;
auto& second_pass_updated_output =
simul_passes[1].updated_output_buffer;
for (size_t i = 0; i < num_elements; i++)
{
auto& res_data = pass.output_buffer;
for (size_t i = 0; i < num_elements; i++)
{
CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i);
}
// First pass:
// Before updating, out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_output[i], i);
// After updating, new_out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, first_pass_updated_output[i],
i);
// Second pass:
// Before updating, out_mem is filled with overwritten_pattern
CHECK_VERIFICATION_ERROR(overwritten_pattern, second_pass_output[i],
i);
// After updating, new_out_mem is copied from in_mem (pattern_pri)
CHECK_VERIFICATION_ERROR(pattern_pri, second_pass_updated_output[i],
i);
}
return CL_SUCCESS;
@@ -429,6 +447,8 @@ struct SimultaneousMutableDispatchTest : public BasicMutableCommandBufferTest
clKernelWrapper kernel_fill;
clProgramWrapper program_fill;
std::vector<clMemWrapper> retained_output_buffers;
const size_t test_global_work_size = 3 * sizeof(cl_int);
const cl_int pattern_pri = 42;

View File

@@ -0,0 +1,280 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testHarness.h"
#include "mutable_command_basic.h"
#include <CL/cl_ext.h>
#include <vector>
namespace {
////////////////////////////////////////////////////////////////////////////////
// Tests related to ensuring the state of the updated command-buffer is expected
// and the effects of operations on it don't have side effects on other objects.
//
// - Tests the updates applied to a command-buffer persist over all subsequent
// enqueues.
// - Tests interaction of `clSetKernelArg` with mutable-dispatch extension.
struct MutableDispatchUpdateStateTest : public BasicMutableCommandBufferTest
{
MutableDispatchUpdateStateTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: BasicMutableCommandBufferTest(device, context, queue),
buffer(nullptr), command(nullptr)
{}
bool Skip() override
{
if (BasicMutableCommandBufferTest::Skip()) return true;
cl_mutable_dispatch_fields_khr mutable_capabilities;
bool mutable_support =
!clGetDeviceInfo(
device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
return !mutable_support;
}
cl_int SetUpKernelArgs() override
{
cl_int error = CL_SUCCESS;
buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
num_elements * sizeof(cl_int), nullptr, &error);
test_error(error, "clCreateBuffer error");
// Zero initialize buffer
const cl_int zero_pattern = 0;
error = clEnqueueFillBuffer(
queue, buffer, &zero_pattern, sizeof(cl_int), 0,
num_elements * sizeof(cl_int), 0, nullptr, nullptr);
test_error(error, "clEnqueueFillBuffer failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
test_error(error, "Unable to set kernel argument 0");
return CL_SUCCESS;
}
cl_int SetUpKernel() override
{
const char *add_kernel =
R"(
__kernel void add_kernel(__global int *data, int value)
{
size_t tid = get_global_id(0);
data[tid] += value;
})";
cl_int error = create_single_kernel_helper(
context, &program, &kernel, 1, &add_kernel, "add_kernel");
test_error(error, "Creating kernel failed");
return CL_SUCCESS;
}
bool verify_result(cl_int ref)
{
std::vector<cl_int> data(num_elements);
cl_int error =
clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size(),
data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
if (data[i] != ref)
{
log_error("Modified verification failed at index %zu: Got %d, "
"wanted %d\n",
i, data[i], ref);
return false;
}
}
return true;
}
clMemWrapper buffer;
cl_mutable_command_khr command;
};
struct MutableDispatchUpdatesPersistTest : public MutableDispatchUpdateStateTest
{
MutableDispatchUpdatesPersistTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: MutableDispatchUpdateStateTest(device, context, queue)
{}
cl_int Run() override
{
const cl_int original_val = 42;
cl_int error =
clSetKernelArg(kernel, 1, sizeof(original_val), &original_val);
test_error(error, "Unable to set kernel argument 1");
cl_command_properties_khr props[] = {
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
};
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
// Modify the command buffer before executing
const cl_int new_command_val = 5;
cl_mutable_dispatch_arg_khr arg{ 1, sizeof(new_command_val),
&new_command_val };
cl_mutable_dispatch_config_khr dispatch_config{
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
&arg /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void *configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
const unsigned iterations = 5;
for (unsigned i = 0; i < iterations; i++)
{
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
}
// Check the results execution sequence is the clEnqueueNDRangeKernel
// value + the updated command-buffer value, not using the original
// command value in the operation.
constexpr cl_int ref = iterations * new_command_val;
return verify_result(ref) ? TEST_PASS : TEST_FAIL;
}
};
struct MutableDispatchSetKernelArgTest : public MutableDispatchUpdateStateTest
{
MutableDispatchSetKernelArgTest(cl_device_id device, cl_context context,
cl_command_queue queue)
: MutableDispatchUpdateStateTest(device, context, queue)
{}
cl_int Run() override
{
const cl_int original_val = 42;
cl_int error =
clSetKernelArg(kernel, 1, sizeof(original_val), &original_val);
test_error(error, "Unable to set kernel argument 1");
cl_command_properties_khr props[] = {
CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
};
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
// Set new kernel argument for later clEnqueueNDRangeKernel
const cl_int new_eager_val = 10;
error =
clSetKernelArg(kernel, 1, sizeof(new_eager_val), &new_eager_val);
test_error(error, "Unable to set kernel argument 1");
// Modify the command buffer before executing
const cl_int new_command_val = 5;
cl_mutable_dispatch_arg_khr arg{ 1, sizeof(new_command_val),
&new_command_val };
cl_mutable_dispatch_config_khr dispatch_config{
command,
1 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
0 /* work_dim - 0 means no change to dimensions */,
&arg /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
nullptr /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void *configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
// Eager kernel enqueue, followed by command-buffer enqueue
error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &num_elements,
nullptr, 0, nullptr, nullptr);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
// Check the results execution sequence is the clEnqueueNDRangeKernel
// value + the updated command-buffer value, not using the original
// command value in the operation.
constexpr cl_int ref = new_eager_val + new_command_val;
return verify_result(ref) ? TEST_PASS : TEST_FAIL;
}
};
}
REGISTER_TEST(mutable_dispatch_updates_persist)
{
return MakeAndRunTest<MutableDispatchUpdatesPersistTest>(
device, context, queue, num_elements);
}
REGISTER_TEST(mutable_dispatch_set_kernel_arg)
{
return MakeAndRunTest<MutableDispatchSetKernelArgTest>(device, context,
queue, num_elements);
}

View File

@@ -0,0 +1,225 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <extensionHelpers.h>
#include "mutable_command_basic.h"
#include <array>
#include <cstring>
#include <CL/cl_ext.h>
// mutable dispatch tests setting `work_dim` to the original 3D value
// behaves as expected.
struct MutableDispatchWorkDim : public InfoMutableCommandBufferTest
{
using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
MutableDispatchWorkDim(cl_device_id device, cl_context context,
cl_command_queue queue)
: InfoMutableCommandBufferTest(device, context, queue)
{}
cl_int SetUp(int elements) override
{
result_data.resize(update_total_elements);
return InfoMutableCommandBufferTest::SetUp(elements);
}
bool Skip() override
{
cl_mutable_dispatch_fields_khr mutable_capabilities;
bool mutable_support =
!clGetDeviceInfo(
device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
&& (mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR);
return !mutable_support || InfoMutableCommandBufferTest::Skip();
}
bool Verify(cl_mem buffer, cl_uint expected_value, size_t total_elements)
{
std::memset(result_data.data(), 0, alloc_size);
cl_int error =
clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, alloc_size,
result_data.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < total_elements; i++)
{
if (result_data[i] != expected_value)
{
log_error("Data failed to verify at index %zu. "
"Expected %u, result was %u\n",
i, expected_value, result_data[i]);
return false;
}
}
return true;
}
cl_int Run() override
{
const char *global_size_kernel =
R"(
__kernel void three_dim(__global uint *dst0,
__global uint *dst1,
__global uint *dst2)
{
size_t gid = get_global_linear_id();
dst0[gid] = get_global_size(0);
dst1[gid] = get_global_size(1);
dst2[gid] = get_global_size(2);
})";
cl_int error = create_single_kernel_helper(
context, &program, &kernel, 1, &global_size_kernel, "three_dim");
test_error(error, "Creating kernel failed");
// Create a buffer for each of the three dimensions to write the
// global size into.
clMemWrapper stream1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
alloc_size, nullptr, &error);
test_error(error, "Creating test array failed");
clMemWrapper stream2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
alloc_size, nullptr, &error);
test_error(error, "Creating test array failed");
clMemWrapper stream3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
alloc_size, nullptr, &error);
test_error(error, "Creating test array failed");
// Set the arguments
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream1);
test_error(error, "Unable to set indexed kernel arguments");
error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &stream2);
test_error(error, "Unable to set indexed kernel arguments");
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &stream3);
test_error(error, "Unable to set indexed kernel arguments");
// Command-buffer contains a single kernel
error = clCommandNDRangeKernelKHR(
command_buffer, nullptr, nullptr, kernel, work_dim, nullptr,
global_size_3D.data(), nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
error = clFinalizeCommandBufferKHR(command_buffer);
test_error(error, "clFinalizeCommandBufferKHR failed");
// Enqueue command-buffer and wait on completion
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
error = clFinish(queue);
test_error(error, "clFinish failed.");
// Verify results before any update
if (!Verify(stream1, global_size_3D[0], original_total_elements))
{
return TEST_FAIL;
}
if (!Verify(stream2, global_size_3D[1], original_total_elements))
{
return TEST_FAIL;
}
if (!Verify(stream3, global_size_3D[2], original_total_elements))
{
return TEST_FAIL;
}
// Update command with a mutable config where we use a different 3D
// global size, but hardcode `work_dim` to 3 (the original value).
cl_mutable_dispatch_config_khr dispatch_config{
command,
0 /* num_args */,
0 /* num_svm_arg */,
0 /* num_exec_infos */,
work_dim /* work_dim */,
nullptr /* arg_list */,
nullptr /* arg_svm_list - nullptr means no change*/,
nullptr /* exec_info_list */,
nullptr /* global_work_offset */,
update_global_size_3D.data() /* global_work_size */,
nullptr /* local_work_size */
};
cl_uint num_configs = 1;
cl_command_buffer_update_type_khr config_types[1] = {
CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
};
const void *configs[1] = { &dispatch_config };
error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
config_types, configs);
test_error(error, "clUpdateMutableCommandsKHR failed");
// Enqueue updated command-buffer
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
// Verify update is reflected in buffer output.
if (!Verify(stream1, update_global_size_3D[0], update_total_elements))
{
return TEST_FAIL;
}
if (!Verify(stream2, update_global_size_3D[1], update_total_elements))
{
return TEST_FAIL;
}
if (!Verify(stream3, update_global_size_3D[2], update_total_elements))
{
return TEST_FAIL;
}
return CL_SUCCESS;
}
static const cl_uint work_dim = 3;
// 3D global size of kernel command when created
static const size_t original_elements = 2;
static constexpr std::array<size_t, work_dim> global_size_3D = {
original_elements, original_elements, original_elements
};
// 3D global size to update kernel command to.
static const size_t update_elements = 4;
static constexpr std::array<size_t, work_dim> update_global_size_3D = {
update_elements, update_elements, update_elements
};
// Total number of work items in original and updated grids
static const size_t original_total_elements =
original_elements * original_elements * original_elements;
static const size_t update_total_elements =
update_elements * update_elements * update_elements;
// Size in bytes of each of the 3 cl_mem buffers (using the larger size)
static const size_t alloc_size = update_total_elements * sizeof(cl_uint);
cl_mutable_command_khr command = nullptr;
std::vector<cl_uint> result_data;
};
// get_global_linear() used in kernel is an OpenCL 2.0 API
REGISTER_TEST_VERSION(mutable_dispatch_work_dim, Version(2, 0))
{
return MakeAndRunTest<MutableDispatchWorkDim>(device, context, queue,
num_elements);
}

View File

@@ -250,9 +250,6 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
&trigger_event, &execute_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
// verify pending state
error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
// execute command buffer
cl_int signal_error = clSetUserEventStatus(trigger_event, CL_COMPLETE);

View File

@@ -124,8 +124,6 @@ struct EnqueueCommandBufferWithoutSimultaneousUseNotInPendingState
error = EnqueueCommandBuffer();
test_error(error, "EnqueueCommandBuffer failed");
error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
test_error(error, "State is not Pending");
return CL_SUCCESS;
}

View File

@@ -89,8 +89,6 @@ struct FinalizeCommandBufferNotRecordingState : public BasicCommandBufferTest
error = EnqueueCommandBuffer();
test_error(error, "EnqueueCommandBuffer failed");
error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
test_error(error, "State is not Pending");
error = clFinalizeCommandBufferKHR(command_buffer);
test_failure_error_ret(error, CL_INVALID_OPERATION,

View File

@@ -0,0 +1,12 @@
set(MODULE_NAME CL_KHR_EXTERNAL_MEMORY_AHB)
set(${MODULE_NAME}_SOURCES
main.cpp
test_ahb.cpp
test_ahb_negative.cpp
debug_ahb.cpp
)
link_libraries(OpenCL nativewindow)
include(../../CMakeCommon.txt)

View File

@@ -0,0 +1,193 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "debug_ahb.h"
constexpr AHardwareBuffer_UsageFlags flag_list[] = {
AHARDWAREBUFFER_USAGE_CPU_READ_RARELY,
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN,
AHARDWAREBUFFER_USAGE_CPU_WRITE_NEVER,
AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY,
AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK,
AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE,
AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER,
AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY,
AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT,
AHARDWAREBUFFER_USAGE_VIDEO_ENCODE,
AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA,
AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER,
AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP,
AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE,
AHARDWAREBUFFER_USAGE_FRONT_BUFFER,
};
std::string
ahardwareBufferDecodeUsageFlagsToString(const AHardwareBuffer_UsageFlags flags)
{
if (flags == 0)
{
return "UNKNOWN FLAG";
}
std::vector<std::string> active_flags;
for (const auto flag : flag_list)
{
if (flag & flags)
{
active_flags.push_back(ahardwareBufferUsageFlagToString(flag));
}
}
if (active_flags.empty())
{
return "UNKNOWN FLAG";
}
return std::accumulate(active_flags.begin() + 1, active_flags.end(),
active_flags.front(),
[](std::string acc, const std::string& flag) {
return std::move(acc) + "|" + flag;
});
}
std::string
ahardwareBufferUsageFlagToString(const AHardwareBuffer_UsageFlags flag)
{
std::string result;
switch (flag)
{
case AHARDWAREBUFFER_USAGE_CPU_READ_NEVER:
result = "AHARDWAREBUFFER_USAGE_CPU_READ_NEVER";
break;
case AHARDWAREBUFFER_USAGE_CPU_READ_RARELY:
result = "AHARDWAREBUFFER_USAGE_CPU_READ_RARELY";
break;
case AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN:
result = "AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN";
break;
case AHARDWAREBUFFER_USAGE_CPU_READ_MASK:
result = "AHARDWAREBUFFER_USAGE_CPU_READ_MASK";
break;
case AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY:
result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY";
break;
case AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN:
result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN";
break;
case AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK:
result = "AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK";
break;
case AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE:
result = "AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE";
break;
case AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER:
result = "AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER";
break;
case AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY:
result = "AHARDWAREBUFFER_USAGE_COMPOSER_OVERLAY";
break;
case AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT:
result = "AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT";
break;
case AHARDWAREBUFFER_USAGE_VIDEO_ENCODE:
result = "AHARDWAREBUFFER_USAGE_VIDEO_ENCODE";
break;
case AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA:
result = "AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA";
break;
case AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER:
result = "AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER";
break;
case AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP:
result = "AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP";
break;
case AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE:
result = "AHARDWAREBUFFER_USAGE_GPU_MIPMAP_COMPLETE";
break;
default: result = "Unknown flag";
}
return result;
}
std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format)
{
std::string result;
switch (format)
{
case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT:
result = "AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT";
break;
case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_BLOB:
result = "AHARDWAREBUFFER_FORMAT_BLOB";
break;
case AHARDWAREBUFFER_FORMAT_D16_UNORM:
result = "AHARDWAREBUFFER_FORMAT_D16_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_D24_UNORM:
result = "AHARDWAREBUFFER_FORMAT_D24_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT:
result = "AHARDWAREBUFFER_FORMAT_D24_UNORM_S8_UINT";
break;
case AHARDWAREBUFFER_FORMAT_D32_FLOAT:
result = "AHARDWAREBUFFER_FORMAT_D32_FLOAT";
break;
case AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT:
result = "AHARDWAREBUFFER_FORMAT_D32_FLOAT_S8_UINT";
break;
case AHARDWAREBUFFER_FORMAT_S8_UINT:
result = "AHARDWAREBUFFER_FORMAT_S8_UINT";
break;
case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420:
result = "AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420";
break;
case AHARDWAREBUFFER_FORMAT_YCbCr_P010:
result = "AHARDWAREBUFFER_FORMAT_YCbCr_P010";
break;
case AHARDWAREBUFFER_FORMAT_YCbCr_P210:
result = "AHARDWAREBUFFER_FORMAT_YCbCr_P210";
break;
case AHARDWAREBUFFER_FORMAT_R8_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R8_UNORM";
break;
case AHARDWAREBUFFER_FORMAT_R16_UINT:
result = "AHARDWAREBUFFER_FORMAT_R16_UINT";
break;
case AHARDWAREBUFFER_FORMAT_R16G16_UINT:
result = "AHARDWAREBUFFER_FORMAT_R16G16_UINT";
break;
case AHARDWAREBUFFER_FORMAT_R10G10B10A10_UNORM:
result = "AHARDWAREBUFFER_FORMAT_R10G10B10A10_UNORM";
break;
}
return result;
}

View File

@@ -0,0 +1,42 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#pragma once
#include <android/hardware_buffer.h>
#include <string>
#include <vector>
#include <numeric>
#define CHECK_AHARDWARE_BUFFER_SUPPORT(ahardwareBuffer_Desc, format) \
if (!AHardwareBuffer_isSupported(&ahardwareBuffer_Desc)) \
{ \
const std::string usage_string = \
ahardwareBufferDecodeUsageFlagsToString( \
static_cast<AHardwareBuffer_UsageFlags>( \
ahardwareBuffer_Desc.usage)); \
log_info("Unsupported format %s:\n Usage flags %s\n Size (%u, " \
"%u, layers = %u)\n", \
ahardwareBufferFormatToString(format.aHardwareBufferFormat) \
.c_str(), \
usage_string.c_str(), ahardwareBuffer_Desc.width, \
ahardwareBuffer_Desc.height, ahardwareBuffer_Desc.layers); \
continue; \
}
std::string ahardwareBufferFormatToString(AHardwareBuffer_Format format);
std::string ahardwareBufferUsageFlagToString(AHardwareBuffer_UsageFlags flag);
std::string
ahardwareBufferDecodeUsageFlagsToString(AHardwareBuffer_UsageFlags flags);

View File

@@ -0,0 +1,23 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/testHarness.h"
int main(int argc, const char *argv[])
{
return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
test_registry::getInstance().definitions(), false, 0);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,246 @@
//
// Copyright (c) 2025 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include "harness/kernelHelpers.h"
#include "harness/imageHelpers.h"
#include "harness/errorHelpers.h"
#include <android/hardware_buffer.h>
#include "debug_ahb.h"
REGISTER_TEST(test_buffer_format_negative)
{
cl_int err = CL_SUCCESS;
if (!is_extension_available(device, "cl_khr_external_memory"))
{
log_info("cl_khr_external_memory is not supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer"))
{
log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
aHardwareBufferDesc.width = 64;
aHardwareBufferDesc.height = 1;
aHardwareBufferDesc.layers = 1;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
const std::string usage_string =
ahardwareBufferDecodeUsageFlagsToString(
static_cast<AHardwareBuffer_UsageFlags>(
aHardwareBufferDesc.usage));
log_info(
"Unsupported format %s, usage flags %s\n",
ahardwareBufferFormatToString(
static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
.c_str(),
usage_string.c_str());
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer *aHardwareBuffer = nullptr;
const int ahb_result =
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
return TEST_FAIL;
}
log_info("Testing %s\n",
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
aHardwareBufferDesc.format))
.c_str());
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
};
cl_mem buffer = clCreateBufferWithProperties(
context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
test_assert_error(err == CL_INVALID_OPERATION,
"To create a buffer the aHardwareFormat must be "
"AHARDWAREBUFFER_FORMAT_BLOB");
if (buffer != nullptr)
{
test_error(clReleaseMemObject(buffer), "Failed to release buffer");
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
return TEST_PASS;
}
REGISTER_TEST(test_buffer_size_negative)
{
cl_int err = CL_SUCCESS;
constexpr size_t buffer_size = 64;
if (!is_extension_available(device, "cl_khr_external_memory"))
{
log_info("cl_khr_external_memory is not supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer"))
{
log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
aHardwareBufferDesc.width = buffer_size;
aHardwareBufferDesc.height = 1;
aHardwareBufferDesc.layers = 1;
aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
{
const std::string usage_string =
ahardwareBufferDecodeUsageFlagsToString(
static_cast<AHardwareBuffer_UsageFlags>(
aHardwareBufferDesc.usage));
log_info(
"Unsupported format %s, usage flags %s\n",
ahardwareBufferFormatToString(
static_cast<AHardwareBuffer_Format>(aHardwareBufferDesc.format))
.c_str(),
usage_string.c_str());
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer *aHardwareBuffer = nullptr;
const int ahb_result =
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
return TEST_FAIL;
}
log_info("Testing %s\n",
ahardwareBufferFormatToString(static_cast<AHardwareBuffer_Format>(
aHardwareBufferDesc.format))
.c_str());
cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
};
cl_mem buffer = clCreateBufferWithProperties(
context, props, CL_MEM_READ_WRITE, buffer_size / 2, nullptr, &err);
test_assert_error(err == CL_INVALID_BUFFER_SIZE,
"Wrong error value returned");
if (buffer != nullptr)
{
test_error(clReleaseMemObject(buffer), "Failed to release buffer");
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
return TEST_PASS;
}
REGISTER_TEST(test_images_negative)
{
cl_int err = CL_SUCCESS;
if (!is_extension_available(device, "cl_khr_external_memory"))
{
log_info("cl_khr_external_memory is not supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
if (!is_extension_available(
device, "cl_khr_external_memory_android_hardware_buffer"))
{
log_info("cl_khr_external_memory_android_hardware_buffer is not "
"supported on this platform. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
aHardwareBufferDesc.usage = static_cast<AHardwareBuffer_UsageFlags>(
AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
| AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
| AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE
| AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER);
aHardwareBufferDesc.width = 64;
aHardwareBufferDesc.height = 64;
aHardwareBufferDesc.layers = 1;
AHardwareBuffer *aHardwareBuffer = nullptr;
int ahb_result =
AHardwareBuffer_allocate(&aHardwareBufferDesc, &aHardwareBuffer);
if (ahb_result != 0)
{
log_error("AHardwareBuffer_allocate failed with code %d\n", ahb_result);
return TEST_FAIL;
}
const cl_mem_properties props[] = {
CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
reinterpret_cast<cl_mem_properties>(aHardwareBuffer), 0
};
constexpr cl_image_format image_format = { CL_RGBA, CL_UNORM_INT8 };
cl_mem image =
clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE,
&image_format, nullptr, nullptr, &err);
test_assert_error(err == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
"Wrong error value returned");
if (image != nullptr)
{
test_error(clReleaseMemObject(image), "Failed to release image");
}
constexpr cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, 64, 64 };
image = clCreateImageWithProperties(context, props, CL_MEM_READ_WRITE,
nullptr, &image_desc, nullptr, &err);
test_assert_error(err == CL_INVALID_IMAGE_DESCRIPTOR,
"Wrong error value returned");
if (image != nullptr)
{
test_error(clReleaseMemObject(image), "Failed to release image");
}
AHardwareBuffer_release(aHardwareBuffer);
aHardwareBuffer = nullptr;
return TEST_PASS;
}

View File

@@ -388,109 +388,6 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
return TEST_PASS;
}
// Confirm that signal a semaphore with no event dependencies will not result
// in an implicit dependency on everything previously submitted
REGISTER_TEST_VERSION(external_semaphores_simple_2, Version(1, 2))
{
REQUIRE_EXTENSION("cl_khr_external_semaphore");
if (init_vulkan_device(1, &device))
{
log_info("Cannot initialise Vulkan. "
"Skipping test.\n");
return TEST_SKIPPED_ITSELF;
}
VulkanDevice vkDevice;
// Obtain pointers to semaphore's API
GET_PFN(device, clEnqueueSignalSemaphoresKHR);
GET_PFN(device, clEnqueueWaitSemaphoresKHR);
std::vector<VulkanExternalSemaphoreHandleType>
vkExternalSemaphoreHandleTypeList =
getSupportedInteropExternalSemaphoreHandleTypes(device, vkDevice);
if (vkExternalSemaphoreHandleTypeList.empty())
{
test_fail("No external semaphore handle types found\n");
}
for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType :
vkExternalSemaphoreHandleTypeList)
{
log_info_semaphore_type(vkExternalSemaphoreHandleType);
VulkanSemaphore vkVk2CLSemaphore(vkDevice,
vkExternalSemaphoreHandleType);
auto sema_ext = clExternalImportableSemaphore(
vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, device);
cl_int err = CL_SUCCESS;
// Create ooo queue
clCommandQueueWrapper queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
test_error(err, "Could not create command queue");
// Create user event
clEventWrapper user_event = clCreateUserEvent(context, &err);
test_error(err, "Could not create user event");
// Create Kernel
clProgramWrapper program;
clKernelWrapper kernel;
err = create_single_kernel_helper(context, &program, &kernel, 1,
&source, "empty");
test_error(err, "Could not create kernel");
// Enqueue task_1 (dependency on user_event)
clEventWrapper task_1_event;
err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event);
test_error(err, "Could not enqueue task 1");
// Signal semaphore
clEventWrapper signal_event;
err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
nullptr, 0, nullptr, &signal_event);
test_error(err, "Could not signal semaphore");
// Wait semaphore
clEventWrapper wait_event;
err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
nullptr, 0, nullptr, &wait_event);
test_error(err, "Could not wait semaphore");
// Flush and delay
err = clFlush(queue);
test_error(err, "Could not flush queue");
cl_event event_list[] = { signal_event, wait_event };
err = clWaitForEvents(2, event_list);
test_error(err, "Could not wait on events");
// Ensure all events are completed except for task_1
test_assert_event_inprogress(task_1_event);
test_assert_event_complete(signal_event);
test_assert_event_complete(wait_event);
// Complete user_event
err = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(err, "Could not set user event to CL_COMPLETE");
// Finish
err = clFinish(queue);
test_error(err, "Could not finish queue");
// Ensure all events are completed
test_assert_event_complete(task_1_event);
test_assert_event_complete(signal_event);
test_assert_event_complete(wait_event);
}
return TEST_PASS;
}
// Confirm that a semaphore can be reused multiple times
REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
{

View File

@@ -76,87 +76,6 @@ struct SimpleSemaphore1 : public SemaphoreTestBase
}
};
struct SimpleSemaphore2 : public SemaphoreTestBase
{
SimpleSemaphore2(cl_device_id device, cl_context context,
cl_command_queue queue, cl_int nelems)
: SemaphoreTestBase(device, context, queue, nelems)
{}
cl_int Run() override
{
cl_int err = CL_SUCCESS;
// Create ooo queue
clCommandQueueWrapper queue = clCreateCommandQueue(
context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
test_error(err, "Could not create command queue");
// Create semaphore
cl_semaphore_properties_khr sema_props[] = {
static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_TYPE_BINARY_KHR),
0
};
semaphore =
clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
test_error(err, "Could not create semaphore");
// Create user event
clEventWrapper user_event = clCreateUserEvent(context, &err);
test_error(err, "Could not create user event");
// Create Kernel
clProgramWrapper program;
clKernelWrapper kernel;
err = create_single_kernel_helper(context, &program, &kernel, 1,
&source, "empty");
test_error(err, "Could not create kernel");
// Enqueue task_1 (dependency on user_event)
clEventWrapper task_1_event;
err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event);
test_error(err, "Could not enqueue task 1");
// Signal semaphore
clEventWrapper signal_event;
err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
nullptr, &signal_event);
test_error(err, "Could not signal semaphore");
// Wait semaphore
clEventWrapper wait_event;
err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
nullptr, &wait_event);
test_error(err, "Could not wait semaphore");
// Flush and delay
err = clFlush(queue);
test_error(err, "Could not flush queue");
std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
// Ensure all events are completed except for task_1
test_assert_event_inprogress(task_1_event);
test_assert_event_complete(signal_event);
test_assert_event_complete(wait_event);
// Complete user_event
err = clSetUserEventStatus(user_event, CL_COMPLETE);
test_error(err, "Could not set user event to CL_COMPLETE");
// Finish
err = clFinish(queue);
test_error(err, "Could not finish queue");
// Ensure all events are completed
test_assert_event_complete(task_1_event);
test_assert_event_complete(signal_event);
test_assert_event_complete(wait_event);
return CL_SUCCESS;
}
};
struct SemaphoreReuse : public SemaphoreTestBase
{
SemaphoreReuse(cl_device_id device, cl_context context,
@@ -387,14 +306,6 @@ REGISTER_TEST_VERSION(semaphores_simple_1, Version(1, 2))
num_elements);
}
// Confirm that signal a semaphore with no event dependencies will not result
// in an implicit dependency on everything previously submitted
REGISTER_TEST_VERSION(semaphores_simple_2, Version(1, 2))
{
return MakeAndRunTest<SimpleSemaphore2>(device, context, queue,
num_elements);
}
// Confirm that a semaphore can be reused multiple times
REGISTER_TEST_VERSION(semaphores_reuse, Version(1, 2))
{

View File

@@ -16,6 +16,7 @@
#include "harness/compat.h"
#include "harness/kernelHelpers.h"
#include "harness/testHarness.h"
#include "harness/parseParameters.h"
#include <string.h>

View File

@@ -58,7 +58,6 @@ uint32_t gDeviceFrequency = 0;
uint32_t gComputeDevices = 0;
size_t gMaxThreadGroupSize = 0;
size_t gWorkGroupSize = 0;
bool gWimpyMode = false;
int gWimpyReductionFactor = 512;
int gTestDouble = 0;
bool gHostReset = false;

View File

@@ -74,7 +74,6 @@ extern bool gHostReset;
// gWimpyMode indicates if we run the test in wimpy mode where we limit the
// size of 32 bit ranges to a much smaller set. This is meant to be used
// as a smoke test
extern bool gWimpyMode;
extern int gWimpyReductionFactor;
uint64_t ReadTime( void );

View File

@@ -83,13 +83,6 @@ int main (int argc, const char **argv )
if( (error = ParseArgs( argc, argv )) )
goto exit;
if (gIsEmbedded) {
vlog( "\tProfile: Embedded\n" );
}else
{
vlog( "\tProfile: Full\n" );
}
fflush( stdout );
error = runTestHarnessWithCheck(
argCount, argList, test_registry::getInstance().num_tests(),
@@ -114,6 +107,10 @@ exit:
static int ParseArgs( int argc, const char **argv )
{
if (gListTests)
{
return 0;
}
int i;
argList = (const char **)calloc(argc, sizeof(char *));
if( NULL == argList )
@@ -181,9 +178,6 @@ static int ParseArgs( int argc, const char **argv )
case 'r': gHostReset = true; break;
case 'w': // Wimpy mode
gWimpyMode = true;
break;
case '[':
parseWimpyReductionFactor( arg, gWimpyReductionFactor);
break;
@@ -202,12 +196,6 @@ static int ParseArgs( int argc, const char **argv )
}
}
if (getenv("CL_WIMPY_MODE")) {
vlog( "\n" );
vlog( "*** Detected CL_WIMPY_MODE env ***\n" );
gWimpyMode = 1;
}
PrintArch();
if( gWimpyMode )
{
@@ -217,6 +205,16 @@ static int ParseArgs( int argc, const char **argv )
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
}
if (gIsEmbedded)
{
vlog("\tProfile: Embedded\n");
}
else
{
vlog("\tProfile: Full\n");
}
return 0;
}
@@ -227,7 +225,6 @@ static void PrintUsage( void )
"supported)\n");
vlog("\t\t-t\tToggle reporting performance data.\n");
vlog("\t\t-r\tReset buffers on host instead of on device.\n");
vlog("\t\t-w\tRun in wimpy mode\n");
vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
"1-12, default factor(%u)\n",
gWimpyReductionFactor);

View File

@@ -248,7 +248,7 @@ clMemWrapper create_image(cl_context context, cl_command_queue queue,
cl_mem_flags buffer_flags = CL_MEM_READ_WRITE;
if (enable_pitch)
{
if (version.major() == 1)
if (version.get_major() == 1)
{
host_ptr = malloc(imageInfo->rowPitch);
}

View File

@@ -878,18 +878,16 @@ int test_read_image(cl_context context, cl_command_queue queue,
numTries, numClamped,
true, lod);
log_error("Step by step:\n");
FloatPixel temp =
sample_image_pixel_float_offset(
imagePtr, imageInfo,
xOffsetValues[j],
yOffsetValues[j],
zOffsetValues[j],
norm_offset_x,
norm_offset_y,
norm_offset_z,
imageSampler, tempOut,
1 /*verbose*/,
&hasDenormals, lod);
sample_image_pixel_float_offset(
imagePtr, imageInfo,
xOffsetValues[j],
yOffsetValues[j],
zOffsetValues[j],
norm_offset_x,
norm_offset_y,
norm_offset_z, imageSampler,
tempOut, 1 /*verbose*/,
&hasDenormals, lod);
log_error(
"\tulps: %2.2f (max "
"allowed: %2.2f)\n\n",
@@ -931,9 +929,6 @@ int test_read_image(cl_context context, cl_command_queue queue,
// Validate float results
float *resultPtr = (float *)(char *)resultValues;
float expected[4], error = 0.0f;
float maxErr = get_max_relative_error(
imageInfo->format, imageSampler, image_type_3D,
CL_FILTER_LINEAR == imageSampler->filter_mode);
for (size_t z = 0, j = 0; z < depth_lod; z++)
{
@@ -1242,26 +1237,25 @@ int test_read_image(cl_context context, cl_command_queue queue,
j, numTries, numClamped,
true, lod);
log_error("Step by step:\n");
FloatPixel temp =
sample_image_pixel_float_offset(
imagePtr, imageInfo,
xOffsetValues[j],
(num_dimensions > 1)
? yOffsetValues[j]
: 0.0f,
image_type_3D
? zOffsetValues[j]
: 0.0f,
norm_offset_x,
(num_dimensions > 1)
? norm_offset_y
: 0.0f,
image_type_3D
? norm_offset_z
: 0.0f,
imageSampler, tempOut,
1 /*verbose*/,
&hasDenormals, lod);
sample_image_pixel_float_offset(
imagePtr, imageInfo,
xOffsetValues[j],
(num_dimensions > 1)
? yOffsetValues[j]
: 0.0f,
image_type_3D
? zOffsetValues[j]
: 0.0f,
norm_offset_x,
(num_dimensions > 1)
? norm_offset_y
: 0.0f,
image_type_3D
? norm_offset_z
: 0.0f,
imageSampler, tempOut,
1 /*verbose*/,
&hasDenormals, lod);
log_error(
"\tulps: %2.2f, %2.2f, "
"%2.2f, %2.2f (max "
@@ -1632,26 +1626,25 @@ int test_read_image(cl_context context, cl_command_queue queue,
j, numTries, numClamped,
true, lod);
log_error("Step by step:\n");
FloatPixel temp =
sample_image_pixel_float_offset(
imagePtr, imageInfo,
xOffsetValues[j],
(num_dimensions > 1)
? yOffsetValues[j]
: 0.0f,
image_type_3D
? zOffsetValues[j]
: 0.0f,
norm_offset_x,
(num_dimensions > 1)
? norm_offset_y
: 0.0f,
image_type_3D
? norm_offset_z
: 0.0f,
imageSampler, tempOut,
1 /*verbose*/,
&hasDenormals, lod);
sample_image_pixel_float_offset(
imagePtr, imageInfo,
xOffsetValues[j],
(num_dimensions > 1)
? yOffsetValues[j]
: 0.0f,
image_type_3D
? zOffsetValues[j]
: 0.0f,
norm_offset_x,
(num_dimensions > 1)
? norm_offset_y
: 0.0f,
image_type_3D
? norm_offset_z
: 0.0f,
imageSampler, tempOut,
1 /*verbose*/,
&hasDenormals, lod);
log_error(
"\tulps: %2.2f, %2.2f, "
"%2.2f, %2.2f (max "

View File

@@ -945,7 +945,7 @@ int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double
// Validate float results
float *resultPtr = (float *)(char *)resultValues;
float expected[4], error=0.0f;
float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
for( size_t y = 0, j = 0; y < height_lod; y++ )
{
for( size_t x = 0; x < width_lod; x++, j++ )

View File

@@ -1,9 +1,5 @@
set(MODULE_NAME INTEGER_OPS)
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
add_cxx_flag_if_supported(-Wno-narrowing)
endif()
set(${MODULE_NAME}_SOURCES
main.cpp
test_int_basic_ops.cpp

View File

@@ -26,14 +26,32 @@
void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d )
{
static const cl_long sUniqueValues[] = { 0x3333333333333333LL, 0x5555555555555555LL, 0x9999999999999999LL, 0xaaaaaaaaaaaaaaaaLL, 0xccccccccccccccccLL,
0x3030303030303030LL, 0x5050505050505050LL, 0x9090909090909090LL, 0xa0a0a0a0a0a0a0a0LL, 0xc0c0c0c0c0c0c0c0LL, 0xf0f0f0f0f0f0f0f0LL,
0x0303030303030303LL, 0x0505050505050505LL, 0x0909090909090909LL, 0x0a0a0a0a0a0a0a0aLL, 0x0c0c0c0c0c0c0c0cLL, 0x0f0f0f0f0f0f0f0fLL,
0x3300330033003300LL, 0x5500550055005500LL, 0x9900990099009900LL, 0xaa00aa00aa00aa00LL, 0xcc00cc00cc00cc00LL, 0xff00ff00ff00ff00LL,
0x0033003300330033LL, 0x0055005500550055LL, 0x0099009900990099LL, 0x00aa00aa00aa00aaLL, 0x00cc00cc00cc00ccLL, 0x00ff00ff00ff00ffLL,
0x3333333300000000LL, 0x5555555500000000LL, 0x9999999900000000LL, 0xaaaaaaaa00000000LL, 0xcccccccc00000000LL, 0xffffffff00000000LL,
0x0000000033333333LL, 0x0000000055555555LL, 0x0000000099999999LL, 0x00000000aaaaaaaaLL, 0x00000000ccccccccLL, 0x00000000ffffffffLL,
0x3333000000003333LL, 0x5555000000005555LL, 0x9999000000009999LL, 0xaaaa00000000aaaaLL, 0xcccc00000000ccccLL, 0xffff00000000ffffLL};
static const cl_long sUniqueValues[] = {
(cl_long)0x3333333333333333LL, (cl_long)0x5555555555555555LL,
(cl_long)0x9999999999999999LL, (cl_long)0xaaaaaaaaaaaaaaaaLL,
(cl_long)0xccccccccccccccccLL, (cl_long)0x3030303030303030LL,
(cl_long)0x5050505050505050LL, (cl_long)0x9090909090909090LL,
(cl_long)0xa0a0a0a0a0a0a0a0LL, (cl_long)0xc0c0c0c0c0c0c0c0LL,
(cl_long)0xf0f0f0f0f0f0f0f0LL, (cl_long)0x0303030303030303LL,
(cl_long)0x0505050505050505LL, (cl_long)0x0909090909090909LL,
(cl_long)0x0a0a0a0a0a0a0a0aLL, (cl_long)0x0c0c0c0c0c0c0c0cLL,
(cl_long)0x0f0f0f0f0f0f0f0fLL, (cl_long)0x3300330033003300LL,
(cl_long)0x5500550055005500LL, (cl_long)0x9900990099009900LL,
(cl_long)0xaa00aa00aa00aa00LL, (cl_long)0xcc00cc00cc00cc00LL,
(cl_long)0xff00ff00ff00ff00LL, (cl_long)0x0033003300330033LL,
(cl_long)0x0055005500550055LL, (cl_long)0x0099009900990099LL,
(cl_long)0x00aa00aa00aa00aaLL, (cl_long)0x00cc00cc00cc00ccLL,
(cl_long)0x00ff00ff00ff00ffLL, (cl_long)0x3333333300000000LL,
(cl_long)0x5555555500000000LL, (cl_long)0x9999999900000000LL,
(cl_long)0xaaaaaaaa00000000LL, (cl_long)0xcccccccc00000000LL,
(cl_long)0xffffffff00000000LL, (cl_long)0x0000000033333333LL,
(cl_long)0x0000000055555555LL, (cl_long)0x0000000099999999LL,
(cl_long)0x00000000aaaaaaaaLL, (cl_long)0x00000000ccccccccLL,
(cl_long)0x00000000ffffffffLL, (cl_long)0x3333000000003333LL,
(cl_long)0x5555000000005555LL, (cl_long)0x9999000000009999LL,
(cl_long)0xaaaa00000000aaaaLL, (cl_long)0xcccc00000000ccccLL,
(cl_long)0xffff00000000ffffLL
};
static cl_long sSpecialValues[ 128 + 128 + 128 + ( sizeof( sUniqueValues ) / sizeof( sUniqueValues[ 0 ] ) ) ] = { 0 };
if( sSpecialValues[ 0 ] == 0 )

View File

@@ -22,6 +22,7 @@
#include "harness/conversions.h"
#include "harness/ThreadPool.h"
#include "harness/parseParameters.h"
#define NUM_TESTS 23
@@ -823,10 +824,10 @@ int run_specific_test(cl_device_id deviceID, cl_context context, cl_command_queu
int run_multiple_tests(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int *tests, int total_tests) {
int errors = 0;
if (getenv("CL_WIMPY_MODE") && num == LONG_MATH_SHIFT_SIZE) {
log_info("Detected CL_WIMPY_MODE env\n");
log_info("Skipping long test\n");
return 0;
if (gWimpyMode && num == LONG_MATH_SHIFT_SIZE)
{
log_info("Running in wimpy mode, skipping long test\n");
return 0;
}
int i;

View File

@@ -1370,7 +1370,9 @@ verify_ushort(int test, size_t vector_size, cl_ushort *inptrA, cl_ushort *inptrB
void
init_ushort_data(uint64_t indx, int num_elements, cl_ushort *input_ptr[], MTdata d)
{
static const cl_ushort specialCaseList[] = { 0, -1, 1, CL_SHRT_MAX, CL_SHRT_MAX + 1, CL_USHRT_MAX };
static const cl_ushort specialCaseList[] = {
0, (cl_ushort)-1, 1, CL_SHRT_MAX, CL_SHRT_MAX + 1, CL_USHRT_MAX
};
int j;
// Set the inputs to a random number
@@ -1812,7 +1814,9 @@ verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, c
void
init_uchar_data(uint64_t indx, int num_elements, cl_uchar *input_ptr[], MTdata d)
{
static const cl_uchar specialCaseList[] = { 0, -1, 1, CL_CHAR_MAX, CL_CHAR_MAX + 1, CL_UCHAR_MAX };
static const cl_uchar specialCaseList[] = {
0, (cl_uchar)-1, 1, CL_CHAR_MAX, CL_CHAR_MAX + 1, CL_UCHAR_MAX
};
int j;
// FIXME: we really should just check every char against every char here

View File

@@ -248,7 +248,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
}
// Init input array
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
size_t idx = 0;
int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
@@ -257,7 +257,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
// Test edge cases
if (job_id <= (cl_uint)lastSpecialJobIndex)
{
cl_double *fp = (cl_double *)p;
cl_int *ip2 = (cl_int *)p2;
uint32_t x, y;
@@ -266,7 +265,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
for (; idx < buffer_elements; idx++)
{
fp[idx] = specialValues[x];
p[idx] = specialValues[x];
ip2[idx] = specialValuesInt[y];
if (++x >= specialValuesCount)
{

View File

@@ -754,10 +754,12 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
}
bool correctlyRounded = strcmp(f->name, "divide_cr") == 0;
// Init the kernels
BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
test_info.programs, f->nameInCode,
relaxedMode };
test_info.programs, f->nameInCode,
relaxedMode, correctlyRounded };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))

View File

@@ -260,7 +260,7 @@ int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode)
if (t[j] == q[j] && t2[j] == q2[j]) continue;
// Check for paired NaNs
if (IsHalfNaN(t[j]) && IsHalfNaN(q[j]) && t2[j] == q2[j])
if (isnan_fp(t[j]) && isnan_fp(q[j]) && t2[j] == q2[j])
continue;
cl_half test = ((cl_half *)q)[j];
@@ -282,7 +282,7 @@ int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode)
// then the standard either neglects to say what is returned
// in iptr or leaves it undefined or implementation defined.
int iptrUndefined = IsHalfInfinity(p[j]) || (HTF(p2[j]) == 0.0f)
|| IsHalfNaN(p2[j]) || IsHalfNaN(p[j]);
|| isnan_fp(p2[j]) || isnan_fp(p[j]);
if (iptrUndefined) iErr = 0;
int fail = !(fabsf(err) <= half_ulps && iErr == 0);

View File

@@ -102,7 +102,7 @@ void EmitEnableExtension(std::ostringstream &kernel,
if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
}
std::string GetBuildOptions(bool relaxed_mode)
std::string GetBuildOptions(const BuildKernelInfo &info)
{
std::ostringstream options;
@@ -111,16 +111,16 @@ std::string GetBuildOptions(bool relaxed_mode)
options << " -cl-denorms-are-zero";
}
if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
{
options << " -cl-fp32-correctly-rounded-divide-sqrt";
}
if (relaxed_mode)
if (info.relaxedMode)
{
options << " -cl-fast-relaxed-math";
}
if (info.correctlyRounded)
{
options << " -cl-fp32-correctly-rounded-divide-sqrt";
}
return options.str();
}
@@ -581,7 +581,7 @@ cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
// Create the program.
clProgramWrapper &program = info.programs[vector_size_index];
auto options = GetBuildOptions(info.relaxedMode);
auto options = GetBuildOptions(info);
int error =
create_single_kernel_helper(gContext, &program, nullptr, sources.size(),
sources.data(), nullptr, options.c_str());

View File

@@ -84,6 +84,9 @@ struct BuildKernelInfo
// Whether to build with -cl-fast-relaxed-math.
bool relaxedMode;
// Whether to build with -cl-fp32-correctly-rounded-divide-sqrt.
bool correctlyRounded;
};
// Data common to all math tests.

View File

@@ -375,8 +375,8 @@ const Func functionList[] = {
{ NULL },
3.0f,
0.0f,
0.0f,
1.0f,
1.5f,
1.5f,
4.0f,
INFINITY,
INFINITY,

View File

@@ -66,7 +66,6 @@ int gSkipCorrectnessTesting = 0;
static int gStopOnError = 0;
static bool gSkipRestOfTests;
int gForceFTZ = 0;
int gWimpyMode = 0;
int gHostFill = 0;
static int gHasDouble = 0;
static int gTestFloat = 1;
@@ -82,7 +81,6 @@ static int gTestFastRelaxed = 1;
OpenCL 2.0 spec then it has to be changed through a command line argument.
*/
int gFastRelaxedDerived = 1;
static int gToggleCorrectlyRoundedDivideSqrt = 0;
int gHasHalf = 0;
cl_device_fp_config gHalfCapabilities = 0;
int gDeviceILogb0 = 1;
@@ -385,21 +383,24 @@ int main(int argc, const char *argv[])
error = ParseArgs(argc, argv);
if (error) return error;
// This takes a while, so prevent the machine from going to sleep.
PreventSleep();
atexit(ResumeSleep);
if (!gListTests)
{
// This takes a while, so prevent the machine from going to sleep.
PreventSleep();
atexit(ResumeSleep);
if (gSkipCorrectnessTesting)
vlog("*** Skipping correctness testing! ***\n\n");
else if (gStopOnError)
vlog("Stopping at first error.\n");
if (gSkipCorrectnessTesting)
vlog("*** Skipping correctness testing! ***\n\n");
else if (gStopOnError)
vlog("Stopping at first error.\n");
vlog(" \t ");
if (gWimpyMode) vlog(" ");
if (!gSkipCorrectnessTesting) vlog("\t max_ulps");
vlog(" \t ");
if (gWimpyMode) vlog(" ");
if (!gSkipCorrectnessTesting) vlog("\t max_ulps");
vlog("\n-------------------------------------------------------------------"
"----------------------------------------\n");
vlog("\n---------------------------------------------------------------"
"--------------------------------------------\n");
}
gMTdata = MTdataHolder(gRandomSeed);
@@ -426,6 +427,10 @@ int main(int argc, const char *argv[])
static int ParseArgs(int argc, const char **argv)
{
if (gListTests)
{
return 0;
}
// We only pass test names to runTestHarnessWithCheck, hence global command
// line options defined by the harness cannot be used by the user.
// To respect the implementation details of runTestHarnessWithCheck,
@@ -469,8 +474,6 @@ static int ParseArgs(int argc, const char **argv)
optionFound = 1;
switch (*arg)
{
case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
case 'd': gHasDouble ^= 1; break;
case 'e': gFastRelaxedDerived ^= 1; break;
@@ -498,10 +501,6 @@ static int ParseArgs(int argc, const char **argv)
case 'v': gVerboseBruteForce ^= 1; break;
case 'w': // wimpy mode
gWimpyMode ^= 1;
break;
case '[':
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
break;
@@ -581,14 +580,6 @@ static int ParseArgs(int argc, const char **argv)
}
}
// Check for the wimpy mode environment variable
if (getenv("CL_WIMPY_MODE"))
{
vlog("\n");
vlog("*** Detected CL_WIMPY_MODE env ***\n");
gWimpyMode = 1;
}
PrintArch();
if (gWimpyMode)
@@ -629,8 +620,6 @@ static void PrintUsage(void)
{
vlog("%s [-cglsz]: <optional: math function names>\n", appName);
vlog("\toptions:\n");
vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
"off)\n");
vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
"on)\n");
vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
@@ -645,7 +634,6 @@ static void PrintUsage(void)
"accuracy checks.)\n");
vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n");
vlog("\t\t-s\tStop on error\n");
vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
"1-10, default factor(%u)\n",
gWimpyReductionFactor);
@@ -942,13 +930,6 @@ test_status InitCL(cl_device_id device)
vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
no_yes[0
!= (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
if (gToggleCorrectlyRoundedDivideSqrt)
{
gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
}
vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
no_yes[0
!= (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
vlog("\tTesting with FTZ mode ON for floats? %s\n",
no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
@@ -1330,8 +1311,7 @@ float Bruteforce_Ulp_Error_Double(double test, long double reference)
// reference is a normal power of two or a zero
// The unbiased exponent of the ulp unit place
int ulp_exp =
DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
int ulp_exp = DBL_MANT_DIG - std::max(ilogbl(reference), DBL_MIN_EXP);
// allow correctly rounded results to pass through unmolested. (We might add
// error to it below.) There is something of a performance optimization here

View File

@@ -25,12 +25,10 @@
#include "utility.h"
#if defined(__SSE__) \
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
#if defined(__SSE__) || _M_IX86_FP == 1
#include <xmmintrin.h>
#endif
#if defined(__SSE2__) \
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
#if defined(__SSE2__) || _M_IX86_FP == 2 || defined(_M_X64)
#include <emmintrin.h>
#endif
@@ -721,9 +719,9 @@ double reference_tanpi(double x)
double z = reference_fabs(x);
// if big and even -- caution: only works if x only has single precision
if (z >= HEX_DBL(+, 1, 0, +, 24))
if (!(z < HEX_DBL(+, 1, 0, +, 24)))
{
if (z == INFINITY) return x - x; // nan
if (!isfinite(z)) return x - x; // nan
return reference_copysign(
0.0, x); // tanpi ( n ) is copysign( 0.0, n) for even integers n.
@@ -855,8 +853,7 @@ double reference_add(double x, double y)
volatile float a = (float)x;
volatile float b = (float)y;
#if defined(__SSE__) \
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
#if defined(__SSE__) || _M_IX86_FP == 1
// defeat x87
__m128 va = _mm_set_ss((float)a);
__m128 vb = _mm_set_ss((float)b);
@@ -953,8 +950,7 @@ double reference_subtract(double x, double y)
{
volatile float a = (float)x;
volatile float b = (float)y;
#if defined(__SSE__) \
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
#if defined(__SSE__) || _M_IX86_FP == 1
// defeat x87
__m128 va = _mm_set_ss((float)a);
__m128 vb = _mm_set_ss((float)b);
@@ -970,8 +966,7 @@ double reference_multiply(double x, double y)
{
volatile float a = (float)x;
volatile float b = (float)y;
#if defined(__SSE__) \
|| (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
#if defined(__SSE__) || _M_IX86_FP == 1
// defeat x87
__m128 va = _mm_set_ss((float)a);
__m128 vb = _mm_set_ss((float)b);
@@ -1223,6 +1218,8 @@ double reference_relaxed_exp2(double x) { return reference_exp2(x); }
double reference_exp2(double x)
{ // Note: only suitable for verifying single precision. Doesn't have range of a
// full double exp2 implementation.
if (isnan(x)) return x;
if (x == 0.0) return 1.0;
// separate x into fractional and integer parts
@@ -2781,7 +2778,7 @@ static inline void shift_right_sticky_128(cl_ulong *hi, cl_ulong *lo, int shift)
sticky |= (0 != l);
l = 0;
}
else
else if (shift > 0)
{
sticky |= (0 != (l << (64 - shift)));
l >>= shift;
@@ -3088,9 +3085,9 @@ long double reference_tanpil(long double x)
long double z = reference_fabsl(x);
// if big and even -- caution: only works if x only has single precision
if (z >= HEX_LDBL(+, 1, 0, +, 53))
if (!(z < HEX_LDBL(+, 1, 0, +, 53)))
{
if (z == INFINITY) return x - x; // nan
if (!isfinite(z)) return x - x; // nan
return reference_copysignl(
0.0L, x); // tanpi ( n ) is copysign( 0.0, n) for even integers n.
@@ -5027,8 +5024,9 @@ static double reference_scalbn(double x, int n)
u.d -= 1.0;
e = (int)((u.l & 0x7ff0000000000000LL) >> 52) - 1022;
}
if (n >= 2098) return reference_copysign(INFINITY, x);
e += n;
if (e >= 2047 || n >= 2098) return reference_copysign(INFINITY, x);
if (e >= 2047) return reference_copysign(INFINITY, x);
if (e < -51 || n < -2097) return reference_copysign(0.0, x);
if (e <= 0)
{

Some files were not shown because too many files have changed in this diff Show More