mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 14:09:03 +00:00
* Initial CTS for external sharing extensions Initial set of tests for below extensions with Vulkan as producer 1. cl_khr_external_memory 2. cl_khr_external_memory_win32 3. cl_khr_external_memory_opaque_fd 4. cl_khr_external_semaphore 5. cl_khr_external_semaphore_win32 6. cl_khr_external_semaphore_opaque_fd * Updates to external sharing CTS Updates to external sharing CTS 1. Fix some build issues to remove unnecessary, non-existent files 2. Add new tests for platform and device queries. 3. Some added checks for VK Support. * Update CTS build script for Vulkan Headers Update CTS build to clone Vulkan Headers repo and pass it to CTS build in preparation for external memory and semaphore tests * Fix Vulkan header path Fix Vulkan header include path. * Add Vulkan loader dependency Vulkan loader is required to build test_vulkan of OpenCL-CTS. Clone and build Vulkan loader as prerequisite to OpenCL-CTS. * Fix Vulkan loader path in test_vulkan Remove arch/os suffix in Vulkan loader path to match vulkan loader repo build. * Fix warnings around getHandle API. Return type of getHandle is defined differently based on win or linux builds. Use appropriate guards when using API at other places. While at it remove duplicate definition of ARRAY_SIZE. * Use ARRAY_SIZE in harness. Use already defined ARRAY_SIZE macro from test_harness. * Fix build issues for test_vulkan Fix build issues for test_vulkan 1. Add cl_ext.h in common files 2. Replace cl_mem_properties_khr with cl_mem_properties 3. Replace cl_external_mem_handle_type_khr with cl_external_memory_handle_type_khr 4. Type-cast malloc as required. * Fix code formatting. Fix code formatting to get CTS CI builds clean. * Fix formatting fixes part-2 Another set of formatting fixes. * Fix code formatting part-3 Some more code formatting fixes. * Fix code formatting issues part-4 More code formatting fixes. * Formatting fixes part-5 Some more formatting fixes * Fix formatting part-6 More formatting fixes continued. * Code formatting fixes part-7 Code formatting fixes for image * Code formatting fixes part-8 Fixes for platform and device query tests. * Code formatting fixes part-9 More formatting fixes for vulkan_wrapper * Code formatting fixes part-10 More fixes to wrapper header * Code formatting fixes part-11 Formatting fixes for api_list * Code formatting fixes part-12 Formatting fixes for api_list_map. * Code formatting changes part-13 Code formatting changes for utility. * Code formatting fixes part-15 Formatting fixes for wrapper. * Misc Code formatting fixes Some more misc code formatting fixes. * Fix build breaks due to code formatting Fix build issues arised with recent code formatting issues. * Fix presubmit script after merge Fix presubmit script after merge conflicts. * Fix Vulkan loader build in presubmit script. Use cmake ninja and appropriate toolchain for Vulkan loader dependency to fix linking issue on arm/aarch64. * Use static array sizes Use static array sizes to fix windows builds. * Some left-out formatting fixes. Fix remaining formatting issues. * Fix harness header path Fix harness header path While at it, remove Misc and test pragma. * Add/Fix license information Add Khronos License info for test_vulkan. Replace Apple license with Khronos as applicable. * Fix headers for Mac OSX builds. Use appropriate headers for Mac OSX builds * Fix Mac OSX builds. Use appropriate headers for Mac OSX builds. Also, fix some build issues due to type-casting. * Fix new code formatting issues Fix new code formatting issues with recent MacOS fixes. * Add back missing case statement Add back missing case statement that was accidentally removed. * Disable USE_GAS for Vulkan Loader build. Disable USE_GAS for Vulkan Loader build to fix aarch64 build. * Update Copyright Year. Update Copyright Year to 2022 for external memory sharing tests. * Android specific fixes Android specific fixes to external sharing tests.
1806 lines
62 KiB
C++
1806 lines
62 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "crc32.h"
|
|
#include "kernelHelpers.h"
|
|
#include "deviceInfo.h"
|
|
#include "errorHelpers.h"
|
|
#include "imageHelpers.h"
|
|
#include "typeWrappers.h"
|
|
#include "testHarness.h"
|
|
#include "parseParameters.h"
|
|
|
|
#include <cassert>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <iomanip>
|
|
#include <mutex>
|
|
#include <algorithm>
|
|
|
|
#if defined(_WIN32)
|
|
std::string slash = "\\";
|
|
#else
|
|
std::string slash = "/";
|
|
#endif
|
|
|
|
static std::mutex gCompilerMutex;
|
|
|
|
static cl_int get_first_device_id(const cl_context context,
|
|
cl_device_id &device);
|
|
|
|
long get_file_size(const std::string &fileName)
|
|
{
|
|
std::ifstream ifs(fileName.c_str(), std::ios::binary);
|
|
if (!ifs.good()) return 0;
|
|
// get length of file:
|
|
ifs.seekg(0, std::ios::end);
|
|
std::ios::pos_type length = ifs.tellg();
|
|
return static_cast<long>(length);
|
|
}
|
|
|
|
static std::string get_kernel_content(unsigned int numKernelLines,
|
|
const char *const *kernelProgram)
|
|
{
|
|
std::string kernel;
|
|
for (size_t i = 0; i < numKernelLines; ++i)
|
|
{
|
|
std::string chunk(kernelProgram[i], 0, std::string::npos);
|
|
kernel += chunk;
|
|
}
|
|
|
|
return kernel;
|
|
}
|
|
|
|
std::string get_kernel_name(const std::string &source)
|
|
{
|
|
// Create list of kernel names
|
|
std::string kernelsList;
|
|
size_t kPos = source.find("kernel");
|
|
while (kPos != std::string::npos)
|
|
{
|
|
// check for '__kernel'
|
|
size_t pos = kPos;
|
|
if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
|
|
pos -= 2;
|
|
|
|
// check character before 'kernel' (white space expected)
|
|
size_t wsPos = source.find_last_of(" \t\r\n", pos);
|
|
if (wsPos == std::string::npos || wsPos + 1 == pos)
|
|
{
|
|
// check character after 'kernel' (white space expected)
|
|
size_t akPos = kPos + sizeof("kernel") - 1;
|
|
wsPos = source.find_first_of(" \t\r\n", akPos);
|
|
if (!(wsPos == akPos))
|
|
{
|
|
kPos = source.find("kernel", kPos + 1);
|
|
continue;
|
|
}
|
|
|
|
bool attributeFound;
|
|
do
|
|
{
|
|
attributeFound = false;
|
|
// find '(' after kernel name name
|
|
size_t pPos = source.find("(", akPos);
|
|
if (!(pPos != std::string::npos)) continue;
|
|
|
|
// check for not empty kernel name before '('
|
|
pos = source.find_last_not_of(" \t\r\n", pPos - 1);
|
|
if (!(pos != std::string::npos && pos > akPos)) continue;
|
|
|
|
// find character before kernel name
|
|
wsPos = source.find_last_of(" \t\r\n", pos);
|
|
if (!(wsPos != std::string::npos && wsPos >= akPos)) continue;
|
|
|
|
std::string name =
|
|
source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
|
|
// check for kernel attribute
|
|
if (name == "__attribute__")
|
|
{
|
|
attributeFound = true;
|
|
int pCount = 1;
|
|
akPos = pPos + 1;
|
|
while (pCount > 0 && akPos != std::string::npos)
|
|
{
|
|
akPos = source.find_first_of("()", akPos + 1);
|
|
if (akPos != std::string::npos)
|
|
{
|
|
if (source[akPos] == '(')
|
|
pCount++;
|
|
else
|
|
pCount--;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
kernelsList += name + ".";
|
|
}
|
|
} while (attributeFound);
|
|
}
|
|
kPos = source.find("kernel", kPos + 1);
|
|
}
|
|
std::ostringstream oss;
|
|
if (MAX_LEN_FOR_KERNEL_LIST > 0)
|
|
{
|
|
if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1)
|
|
{
|
|
kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
|
|
kernelsList[kernelsList.size() - 1] = '.';
|
|
kernelsList[kernelsList.size() - 1] = '.';
|
|
}
|
|
oss << kernelsList;
|
|
}
|
|
return oss.str();
|
|
}
|
|
|
|
static std::string
|
|
get_offline_compilation_file_type_str(const CompilationMode compilationMode)
|
|
{
|
|
switch (compilationMode)
|
|
{
|
|
default: assert(0 && "Invalid compilation mode"); abort();
|
|
case kOnline:
|
|
assert(0 && "Invalid compilation mode for offline compilation");
|
|
abort();
|
|
case kBinary: return "binary";
|
|
case kSpir_v: return "SPIR-V";
|
|
}
|
|
}
|
|
|
|
static std::string get_unique_filename_prefix(unsigned int numKernelLines,
|
|
const char *const *kernelProgram,
|
|
const char *buildOptions)
|
|
{
|
|
std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
|
|
std::string kernelName = get_kernel_name(kernel);
|
|
cl_uint kernelCrc = crc32(kernel.data(), kernel.size());
|
|
std::ostringstream oss;
|
|
oss << kernelName << std::hex << std::setfill('0') << std::setw(8)
|
|
<< kernelCrc;
|
|
if (buildOptions)
|
|
{
|
|
cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
|
|
oss << '.' << std::hex << std::setfill('0') << std::setw(8)
|
|
<< bOptionsCrc;
|
|
}
|
|
return oss.str();
|
|
}
|
|
|
|
|
|
static std::string
|
|
get_cl_build_options_filename_with_path(const std::string &filePath,
|
|
const std::string &fileNamePrefix)
|
|
{
|
|
return filePath + slash + fileNamePrefix + ".options";
|
|
}
|
|
|
|
static std::string
|
|
get_cl_source_filename_with_path(const std::string &filePath,
|
|
const std::string &fileNamePrefix)
|
|
{
|
|
return filePath + slash + fileNamePrefix + ".cl";
|
|
}
|
|
|
|
static std::string
|
|
get_binary_filename_with_path(CompilationMode mode, cl_uint deviceAddrSpaceSize,
|
|
const std::string &filePath,
|
|
const std::string &fileNamePrefix)
|
|
{
|
|
std::string binaryFilename = filePath + slash + fileNamePrefix;
|
|
if (kSpir_v == mode)
|
|
{
|
|
std::ostringstream extension;
|
|
extension << ".spv" << deviceAddrSpaceSize;
|
|
binaryFilename += extension.str();
|
|
}
|
|
return binaryFilename;
|
|
}
|
|
|
|
static bool file_exist_on_disk(const std::string &filePath,
|
|
const std::string &fileName)
|
|
{
|
|
std::string fileNameWithPath = filePath + slash + fileName;
|
|
bool exist = false;
|
|
std::ifstream ifs;
|
|
|
|
ifs.open(fileNameWithPath.c_str(), std::ios::binary);
|
|
if (ifs.good()) exist = true;
|
|
ifs.close();
|
|
return exist;
|
|
}
|
|
|
|
static bool should_save_kernel_source_to_disk(CompilationMode mode,
|
|
CompilationCacheMode cacheMode,
|
|
const std::string &binaryPath,
|
|
const std::string &binaryName)
|
|
{
|
|
bool saveToDisk = false;
|
|
if (cacheMode == kCacheModeDumpCl
|
|
|| (cacheMode == kCacheModeOverwrite && mode != kOnline))
|
|
{
|
|
saveToDisk = true;
|
|
}
|
|
if (cacheMode == kCacheModeCompileIfAbsent && mode != kOnline)
|
|
{
|
|
saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
|
|
}
|
|
return saveToDisk;
|
|
}
|
|
|
|
static int save_kernel_build_options_to_disk(const std::string &path,
|
|
const std::string &prefix,
|
|
const char *buildOptions)
|
|
{
|
|
std::string filename =
|
|
get_cl_build_options_filename_with_path(path, prefix);
|
|
std::ofstream ofs(filename.c_str(), std::ios::binary);
|
|
if (!ofs.good())
|
|
{
|
|
log_info("Can't save kernel build options: %s\n", filename.c_str());
|
|
return -1;
|
|
}
|
|
ofs.write(buildOptions, strlen(buildOptions));
|
|
ofs.close();
|
|
log_info("Saved kernel build options to file: %s\n", filename.c_str());
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static int save_kernel_source_to_disk(const std::string &path,
|
|
const std::string &prefix,
|
|
const std::string &source)
|
|
{
|
|
std::string filename = get_cl_source_filename_with_path(path, prefix);
|
|
std::ofstream ofs(filename.c_str(), std::ios::binary);
|
|
if (!ofs.good())
|
|
{
|
|
log_info("Can't save kernel source: %s\n", filename.c_str());
|
|
return -1;
|
|
}
|
|
ofs.write(source.c_str(), source.size());
|
|
ofs.close();
|
|
log_info("Saved kernel source to file: %s\n", filename.c_str());
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static int
|
|
save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
|
|
const char *const *kernelProgram,
|
|
const char *buildOptions)
|
|
{
|
|
int error;
|
|
|
|
std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
|
|
std::string kernelNamePrefix =
|
|
get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
|
|
|
|
// save kernel source to disk
|
|
error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix,
|
|
kernel);
|
|
|
|
// save kernel build options to disk if exists
|
|
if (buildOptions != NULL)
|
|
error |= save_kernel_build_options_to_disk(
|
|
gCompilationCachePath, kernelNamePrefix, buildOptions);
|
|
|
|
return error;
|
|
}
|
|
|
|
static std::string
|
|
get_compilation_mode_str(const CompilationMode compilationMode)
|
|
{
|
|
switch (compilationMode)
|
|
{
|
|
default: assert(0 && "Invalid compilation mode"); abort();
|
|
case kOnline: return "online";
|
|
case kBinary: return "binary";
|
|
case kSpir_v: return "spir-v";
|
|
}
|
|
}
|
|
|
|
static cl_int get_cl_device_info_str(const cl_device_id device,
|
|
const cl_uint device_address_space_size,
|
|
const CompilationMode compilationMode,
|
|
std::string &clDeviceInfo)
|
|
{
|
|
std::string extensionsString = get_device_extensions_string(device);
|
|
std::string versionString = get_device_version_string(device);
|
|
|
|
std::ostringstream clDeviceInfoStream;
|
|
std::string file_type =
|
|
get_offline_compilation_file_type_str(compilationMode);
|
|
clDeviceInfoStream << "# OpenCL device info affecting " << file_type
|
|
<< " offline compilation:" << std::endl
|
|
<< "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size
|
|
<< std::endl
|
|
<< "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\""
|
|
<< std::endl;
|
|
/* We only need the device's supported IL version(s) when compiling IL
|
|
* that will be loaded with clCreateProgramWithIL() */
|
|
if (compilationMode == kSpir_v)
|
|
{
|
|
std::string ilVersionString = get_device_il_version_string(device);
|
|
clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString
|
|
<< "\"" << std::endl;
|
|
}
|
|
clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\""
|
|
<< std::endl;
|
|
clDeviceInfoStream << "CL_DEVICE_IMAGE_SUPPORT="
|
|
<< (0 == checkForImageSupport(device)) << std::endl;
|
|
clDeviceInfoStream << "CL_DEVICE_NAME=\"" << get_device_name(device).c_str()
|
|
<< "\"" << std::endl;
|
|
|
|
clDeviceInfo = clDeviceInfoStream.str();
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static int write_cl_device_info(const cl_device_id device,
|
|
const cl_uint device_address_space_size,
|
|
const CompilationMode compilationMode,
|
|
std::string &clDeviceInfoFilename)
|
|
{
|
|
std::string clDeviceInfo;
|
|
int error = get_cl_device_info_str(device, device_address_space_size,
|
|
compilationMode, clDeviceInfo);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
return error;
|
|
}
|
|
|
|
cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size());
|
|
|
|
/* Get the filename for the clDeviceInfo file.
|
|
* Note: the file includes the hash on its content, so it is usually
|
|
* unnecessary to delete it. */
|
|
std::ostringstream clDeviceInfoFilenameStream;
|
|
clDeviceInfoFilenameStream << gCompilationCachePath << slash
|
|
<< "clDeviceInfo-";
|
|
clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8)
|
|
<< crc << ".txt";
|
|
|
|
clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
|
|
|
|
if ((size_t)get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
|
|
{
|
|
/* The CL device info file has already been created.
|
|
* Nothing to do. */
|
|
return 0;
|
|
}
|
|
|
|
/* The file does not exist or its length is not as expected.
|
|
* Create/overwrite it. */
|
|
std::ofstream ofs(clDeviceInfoFilename);
|
|
if (!ofs.good())
|
|
{
|
|
log_info("OfflineCompiler: can't create CL device info file: %s\n",
|
|
clDeviceInfoFilename.c_str());
|
|
return -1;
|
|
}
|
|
ofs << clDeviceInfo;
|
|
ofs.close();
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static std::string get_offline_compilation_command(
|
|
const cl_uint device_address_space_size,
|
|
const CompilationMode compilationMode, const std::string &bOptions,
|
|
const std::string &sourceFilename, const std::string &outputFilename,
|
|
const std::string &clDeviceInfoFilename)
|
|
{
|
|
std::ostringstream wrapperOptions;
|
|
|
|
wrapperOptions << gCompilationProgram
|
|
<< " --mode=" << get_compilation_mode_str(compilationMode)
|
|
<< " --source=" << sourceFilename
|
|
<< " --output=" << outputFilename
|
|
<< " --cl-device-info=" << clDeviceInfoFilename;
|
|
|
|
if (bOptions != "")
|
|
{
|
|
// Add build options passed to this function
|
|
wrapperOptions << " -- " << bOptions;
|
|
}
|
|
|
|
return wrapperOptions.str();
|
|
}
|
|
|
|
static int invoke_offline_compiler(const cl_device_id device,
|
|
const cl_uint device_address_space_size,
|
|
const CompilationMode compilationMode,
|
|
const std::string &bOptions,
|
|
const std::string &sourceFilename,
|
|
const std::string &outputFilename)
|
|
{
|
|
std::string runString;
|
|
std::string clDeviceInfoFilename;
|
|
|
|
// See cl_offline_compiler-interface.txt for a description of the
|
|
// format of the CL device information file generated below, and
|
|
// the internal command line interface for invoking the offline
|
|
// compiler.
|
|
|
|
cl_int err = write_cl_device_info(device, device_address_space_size,
|
|
compilationMode, clDeviceInfoFilename);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
log_error("Failed writing CL device info file\n");
|
|
return err;
|
|
}
|
|
|
|
runString = get_offline_compilation_command(
|
|
device_address_space_size, compilationMode, bOptions, sourceFilename,
|
|
outputFilename, clDeviceInfoFilename);
|
|
|
|
// execute script
|
|
log_info("Executing command: %s\n", runString.c_str());
|
|
fflush(stdout);
|
|
int returnCode = system(runString.c_str());
|
|
if (returnCode != 0)
|
|
{
|
|
log_error("ERROR: Command finished with error: 0x%x\n", returnCode);
|
|
return CL_COMPILE_PROGRAM_FAILURE;
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static cl_int get_first_device_id(const cl_context context,
|
|
cl_device_id &device)
|
|
{
|
|
cl_uint numDevices = 0;
|
|
cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
|
|
sizeof(cl_uint), &numDevices, NULL);
|
|
test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
|
|
|
|
if (numDevices == 0)
|
|
{
|
|
log_error("ERROR: No CL devices found\n");
|
|
return -1;
|
|
}
|
|
|
|
std::vector<cl_device_id> devices(numDevices, 0);
|
|
error =
|
|
clGetContextInfo(context, CL_CONTEXT_DEVICES,
|
|
numDevices * sizeof(cl_device_id), &devices[0], NULL);
|
|
test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
|
|
|
|
device = devices[0];
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static cl_int get_device_address_bits(const cl_device_id device,
|
|
cl_uint &device_address_space_size)
|
|
{
|
|
cl_int error =
|
|
clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
|
|
&device_address_space_size, NULL);
|
|
test_error(error, "Unable to obtain device address bits");
|
|
|
|
if (device_address_space_size != 32 && device_address_space_size != 64)
|
|
{
|
|
log_error("ERROR: Unexpected number of device address bits: %u\n",
|
|
device_address_space_size);
|
|
return -1;
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static int get_offline_compiler_output(
|
|
std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize,
|
|
const CompilationMode compilationMode, const std::string &bOptions,
|
|
const std::string &kernelPath, const std::string &kernelNamePrefix)
|
|
{
|
|
std::string sourceFilename =
|
|
get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
|
|
std::string outputFilename = get_binary_filename_with_path(
|
|
compilationMode, deviceAddrSpaceSize, kernelPath, kernelNamePrefix);
|
|
|
|
ifs.open(outputFilename.c_str(), std::ios::binary);
|
|
if (!ifs.good())
|
|
{
|
|
std::string file_type =
|
|
get_offline_compilation_file_type_str(compilationMode);
|
|
if (gCompilationCacheMode == kCacheModeForceRead)
|
|
{
|
|
log_info("OfflineCompiler: can't open cached %s file: %s\n",
|
|
file_type.c_str(), outputFilename.c_str());
|
|
return -1;
|
|
}
|
|
else
|
|
{
|
|
int error = invoke_offline_compiler(device, deviceAddrSpaceSize,
|
|
compilationMode, bOptions,
|
|
sourceFilename, outputFilename);
|
|
if (error != CL_SUCCESS) return error;
|
|
|
|
// open output file for reading
|
|
ifs.open(outputFilename.c_str(), std::ios::binary);
|
|
if (!ifs.good())
|
|
{
|
|
log_info("OfflineCompiler: can't read generated %s file: %s\n",
|
|
file_type.c_str(), outputFilename.c_str());
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (compilationMode == kSpir_v && !gDisableSPIRVValidation)
|
|
{
|
|
std::string runString = gSPIRVValidator + " " + outputFilename;
|
|
|
|
int returnCode = system(runString.c_str());
|
|
if (returnCode == -1)
|
|
{
|
|
log_error("Error: failed to invoke SPIR-V validator\n");
|
|
return CL_COMPILE_PROGRAM_FAILURE;
|
|
}
|
|
else if (returnCode != 0)
|
|
{
|
|
log_error(
|
|
"Failed to validate SPIR-V file %s: system() returned 0x%x\n",
|
|
outputFilename.c_str(), returnCode);
|
|
return CL_COMPILE_PROGRAM_FAILURE;
|
|
}
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static int create_single_kernel_helper_create_program_offline(
|
|
cl_context context, cl_device_id device, cl_program *outProgram,
|
|
unsigned int numKernelLines, const char *const *kernelProgram,
|
|
const char *buildOptions, CompilationMode compilationMode)
|
|
{
|
|
if (kCacheModeDumpCl == gCompilationCacheMode)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
// Get device CL_DEVICE_ADDRESS_BITS
|
|
int error;
|
|
cl_uint device_address_space_size = 0;
|
|
if (device == NULL)
|
|
{
|
|
error = get_first_device_id(context, device);
|
|
test_error(error, "Failed to get device ID for first device");
|
|
}
|
|
error = get_device_address_bits(device, device_address_space_size);
|
|
if (error != CL_SUCCESS) return error;
|
|
|
|
// set build options
|
|
std::string bOptions;
|
|
bOptions += buildOptions ? std::string(buildOptions) : "";
|
|
|
|
std::string kernelName =
|
|
get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
|
|
|
|
|
|
std::ifstream ifs;
|
|
error = get_offline_compiler_output(ifs, device, device_address_space_size,
|
|
compilationMode, bOptions,
|
|
gCompilationCachePath, kernelName);
|
|
if (error != CL_SUCCESS) return error;
|
|
|
|
ifs.seekg(0, ifs.end);
|
|
size_t length = static_cast<size_t>(ifs.tellg());
|
|
ifs.seekg(0, ifs.beg);
|
|
|
|
// treat modifiedProgram as input for clCreateProgramWithBinary
|
|
if (compilationMode == kBinary)
|
|
{
|
|
// read binary from file:
|
|
std::vector<unsigned char> modifiedKernelBuf(length);
|
|
|
|
ifs.read((char *)&modifiedKernelBuf[0], length);
|
|
ifs.close();
|
|
|
|
size_t lengths = modifiedKernelBuf.size();
|
|
const unsigned char *binaries = { &modifiedKernelBuf[0] };
|
|
log_info("offlineCompiler: clCreateProgramWithSource replaced with "
|
|
"clCreateProgramWithBinary\n");
|
|
*outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths,
|
|
&binaries, NULL, &error);
|
|
if (*outProgram == NULL || error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "clCreateProgramWithBinary failed");
|
|
return error;
|
|
}
|
|
}
|
|
// treat modifiedProgram as input for clCreateProgramWithIL
|
|
else if (compilationMode == kSpir_v)
|
|
{
|
|
// read spir-v from file:
|
|
std::vector<unsigned char> modifiedKernelBuf(length);
|
|
|
|
ifs.read((char *)&modifiedKernelBuf[0], length);
|
|
ifs.close();
|
|
|
|
size_t length = modifiedKernelBuf.size();
|
|
log_info("offlineCompiler: clCreateProgramWithSource replaced with "
|
|
"clCreateProgramWithIL\n");
|
|
if (gCoreILProgram)
|
|
{
|
|
*outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0],
|
|
length, &error);
|
|
}
|
|
else
|
|
{
|
|
cl_platform_id platform;
|
|
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
|
|
sizeof(cl_platform_id), &platform, NULL);
|
|
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
|
|
|
|
clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
|
|
clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
|
|
clGetExtensionFunctionAddressForPlatform(
|
|
platform, "clCreateProgramWithILKHR");
|
|
if (clCreateProgramWithILKHR == NULL)
|
|
{
|
|
log_error(
|
|
"ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
|
|
return -1;
|
|
}
|
|
*outProgram = clCreateProgramWithILKHR(
|
|
context, &modifiedKernelBuf[0], length, &error);
|
|
}
|
|
|
|
if (*outProgram == NULL || error != CL_SUCCESS)
|
|
{
|
|
if (gCoreILProgram)
|
|
{
|
|
print_error(error, "clCreateProgramWithIL failed");
|
|
}
|
|
else
|
|
{
|
|
print_error(error, "clCreateProgramWithILKHR failed");
|
|
}
|
|
return error;
|
|
}
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static int create_single_kernel_helper_create_program(
|
|
cl_context context, cl_device_id device, cl_program *outProgram,
|
|
unsigned int numKernelLines, const char **kernelProgram,
|
|
const char *buildOptions, CompilationMode compilationMode)
|
|
{
|
|
std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
|
|
|
|
std::string filePrefix =
|
|
get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
|
|
bool shouldSaveToDisk = should_save_kernel_source_to_disk(
|
|
compilationMode, gCompilationCacheMode, gCompilationCachePath,
|
|
filePrefix);
|
|
|
|
if (shouldSaveToDisk)
|
|
{
|
|
if (CL_SUCCESS
|
|
!= save_kernel_source_and_options_to_disk(
|
|
numKernelLines, kernelProgram, buildOptions))
|
|
{
|
|
log_error("Unable to dump kernel source to disk");
|
|
return -1;
|
|
}
|
|
}
|
|
if (compilationMode == kOnline)
|
|
{
|
|
int error = CL_SUCCESS;
|
|
|
|
/* Create the program object from source */
|
|
*outProgram = clCreateProgramWithSource(context, numKernelLines,
|
|
kernelProgram, NULL, &error);
|
|
if (*outProgram == NULL || error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "clCreateProgramWithSource failed");
|
|
return error;
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
else
|
|
{
|
|
return create_single_kernel_helper_create_program_offline(
|
|
context, device, outProgram, numKernelLines, kernelProgram,
|
|
buildOptions, compilationMode);
|
|
}
|
|
}
|
|
|
|
int create_single_kernel_helper_create_program(cl_context context,
|
|
cl_program *outProgram,
|
|
unsigned int numKernelLines,
|
|
const char **kernelProgram,
|
|
const char *buildOptions)
|
|
{
|
|
return create_single_kernel_helper_create_program(
|
|
context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
|
|
gCompilationMode);
|
|
}
|
|
|
|
int create_single_kernel_helper_create_program_for_device(
|
|
cl_context context, cl_device_id device, cl_program *outProgram,
|
|
unsigned int numKernelLines, const char **kernelProgram,
|
|
const char *buildOptions)
|
|
{
|
|
return create_single_kernel_helper_create_program(
|
|
context, device, outProgram, numKernelLines, kernelProgram,
|
|
buildOptions, gCompilationMode);
|
|
}
|
|
|
|
int create_single_kernel_helper_with_build_options(
|
|
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
|
|
unsigned int numKernelLines, const char **kernelProgram,
|
|
const char *kernelName, const char *buildOptions)
|
|
{
|
|
return create_single_kernel_helper(context, outProgram, outKernel,
|
|
numKernelLines, kernelProgram,
|
|
kernelName, buildOptions);
|
|
}
|
|
|
|
// Creates and builds OpenCL C/C++ program, and creates a kernel
|
|
int create_single_kernel_helper(cl_context context, cl_program *outProgram,
|
|
cl_kernel *outKernel,
|
|
unsigned int numKernelLines,
|
|
const char **kernelProgram,
|
|
const char *kernelName,
|
|
const char *buildOptions)
|
|
{
|
|
// For the logic that automatically adds -cl-std it is much cleaner if the
|
|
// build options have RAII. This buffer will store the potentially updated
|
|
// build options, in which case buildOptions will point at the string owned
|
|
// by this buffer.
|
|
std::string build_options_internal{ buildOptions ? buildOptions : "" };
|
|
|
|
// Check the build options for the -cl-std option.
|
|
if (!buildOptions || !strstr(buildOptions, "-cl-std"))
|
|
{
|
|
// If the build option isn't present add it using the latest OpenCL-C
|
|
// version supported by the device. This allows calling code to force a
|
|
// particular CL C version if it is required, but also means that
|
|
// callers need not specify a version if they want to assume the most
|
|
// recent CL C.
|
|
|
|
auto version = get_max_OpenCL_C_for_context(context);
|
|
|
|
std::string cl_std{};
|
|
if (version >= Version(3, 0))
|
|
{
|
|
cl_std = "-cl-std=CL3.0";
|
|
}
|
|
else if (version >= Version(2, 0) && version < Version(3, 0))
|
|
{
|
|
cl_std = "-cl-std=CL2.0";
|
|
}
|
|
else
|
|
{
|
|
// If the -cl-std build option is not specified, the highest OpenCL
|
|
// C 1.x language version supported by each device is used when
|
|
// compiling the program for each device.
|
|
cl_std = "";
|
|
}
|
|
build_options_internal += ' ';
|
|
build_options_internal += cl_std;
|
|
buildOptions = build_options_internal.c_str();
|
|
}
|
|
int error = create_single_kernel_helper_create_program(
|
|
context, outProgram, numKernelLines, kernelProgram, buildOptions);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_error("Create program failed: %d, line: %d\n", error, __LINE__);
|
|
return error;
|
|
}
|
|
|
|
// Remove offline-compiler-only build options
|
|
std::string newBuildOptions;
|
|
if (buildOptions != NULL)
|
|
{
|
|
newBuildOptions = buildOptions;
|
|
std::string offlineCompierOptions[] = {
|
|
"-cl-fp16-enable", "-cl-fp64-enable", "-cl-zero-init-local-mem-vars"
|
|
};
|
|
for (auto &s : offlineCompierOptions)
|
|
{
|
|
std::string::size_type i = newBuildOptions.find(s);
|
|
if (i != std::string::npos) newBuildOptions.erase(i, s.length());
|
|
}
|
|
}
|
|
// Build program and create kernel
|
|
return build_program_create_kernel_helper(
|
|
context, outProgram, outKernel, numKernelLines, kernelProgram,
|
|
kernelName, newBuildOptions.c_str());
|
|
}
|
|
|
|
// Builds OpenCL C/C++ program and creates
|
|
int build_program_create_kernel_helper(
|
|
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
|
|
unsigned int numKernelLines, const char **kernelProgram,
|
|
const char *kernelName, const char *buildOptions)
|
|
{
|
|
int error;
|
|
/* Compile the program */
|
|
int buildProgramFailed = 0;
|
|
int printedSource = 0;
|
|
error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
unsigned int i;
|
|
print_error(error, "clBuildProgram failed");
|
|
buildProgramFailed = 1;
|
|
printedSource = 1;
|
|
log_error("Build options: %s\n", buildOptions);
|
|
log_error("Original source is: ------------\n");
|
|
for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]);
|
|
}
|
|
|
|
// Verify the build status on all devices
|
|
cl_uint deviceCount = 0;
|
|
error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES,
|
|
sizeof(deviceCount), &deviceCount, NULL);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
|
|
return error;
|
|
}
|
|
|
|
if (deviceCount == 0)
|
|
{
|
|
log_error("No devices found for program.\n");
|
|
return -1;
|
|
}
|
|
|
|
cl_device_id *devices =
|
|
(cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
|
|
if (NULL == devices) return -1;
|
|
BufferOwningPtr<cl_device_id> devicesBuf(devices);
|
|
|
|
memset(devices, 0, deviceCount * sizeof(cl_device_id));
|
|
error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES,
|
|
sizeof(cl_device_id) * deviceCount, devices, NULL);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
|
|
return error;
|
|
}
|
|
|
|
cl_uint z;
|
|
bool buildFailed = false;
|
|
for (z = 0; z < deviceCount; z++)
|
|
{
|
|
char deviceName[4096] = "";
|
|
error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName),
|
|
deviceName, NULL);
|
|
if (error != CL_SUCCESS || deviceName[0] == '\0')
|
|
{
|
|
log_error("Device \"%d\" failed to return a name\n", z);
|
|
print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
|
|
}
|
|
|
|
cl_build_status buildStatus;
|
|
error = clGetProgramBuildInfo(*outProgram, devices[z],
|
|
CL_PROGRAM_BUILD_STATUS,
|
|
sizeof(buildStatus), &buildStatus, NULL);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
print_error(error,
|
|
"clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
|
|
return error;
|
|
}
|
|
|
|
if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed
|
|
&& deviceCount == 1)
|
|
{
|
|
buildFailed = true;
|
|
log_error("clBuildProgram returned an error, but buildStatus is "
|
|
"marked as CL_BUILD_SUCCESS.\n");
|
|
}
|
|
|
|
if (buildStatus != CL_BUILD_SUCCESS)
|
|
{
|
|
|
|
char statusString[64] = "";
|
|
if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
|
|
sprintf(statusString, "CL_BUILD_SUCCESS");
|
|
else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
|
|
sprintf(statusString, "CL_BUILD_NONE");
|
|
else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
|
|
sprintf(statusString, "CL_BUILD_ERROR");
|
|
else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
|
|
sprintf(statusString, "CL_BUILD_IN_PROGRESS");
|
|
else
|
|
sprintf(statusString, "UNKNOWN (%d)", buildStatus);
|
|
|
|
if (buildStatus != CL_BUILD_SUCCESS)
|
|
log_error(
|
|
"Build not successful for device \"%s\", status: %s\n",
|
|
deviceName, statusString);
|
|
size_t paramSize = 0;
|
|
error = clGetProgramBuildInfo(*outProgram, devices[z],
|
|
CL_PROGRAM_BUILD_LOG, 0, NULL,
|
|
¶mSize);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
|
|
print_error(
|
|
error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
|
return error;
|
|
}
|
|
|
|
std::string log;
|
|
log.resize(paramSize / sizeof(char));
|
|
error = clGetProgramBuildInfo(*outProgram, devices[z],
|
|
CL_PROGRAM_BUILD_LOG, paramSize,
|
|
&log[0], NULL);
|
|
if (error != CL_SUCCESS || log[0] == '\0')
|
|
{
|
|
log_error("Device %d (%s) failed to return a build log\n", z,
|
|
deviceName);
|
|
if (error)
|
|
{
|
|
print_error(
|
|
error,
|
|
"clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
|
return error;
|
|
}
|
|
else
|
|
{
|
|
log_error("clGetProgramBuildInfo returned an empty log.\n");
|
|
return -1;
|
|
}
|
|
}
|
|
// In this case we've already printed out the code above.
|
|
if (!printedSource)
|
|
{
|
|
unsigned int i;
|
|
log_error("Original source is: ------------\n");
|
|
for (i = 0; i < numKernelLines; i++)
|
|
log_error("%s", kernelProgram[i]);
|
|
printedSource = 1;
|
|
}
|
|
log_error("Build log for device \"%s\" is: ------------\n",
|
|
deviceName);
|
|
log_error("%s\n", log.c_str());
|
|
log_error("\n----------\n");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (buildFailed)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
/* And create a kernel from it */
|
|
if (kernelName != NULL)
|
|
{
|
|
*outKernel = clCreateKernel(*outProgram, kernelName, &error);
|
|
if (*outKernel == NULL || error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "Unable to create kernel");
|
|
return error;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
|
|
size_t *outMaxSize, size_t *outLimits)
|
|
{
|
|
cl_device_id *devices;
|
|
size_t size, maxCommonSize = 0;
|
|
int numDevices, i, j, error;
|
|
cl_uint numDims;
|
|
size_t outSize;
|
|
size_t sizeLimit[] = { 1, 1, 1 };
|
|
|
|
|
|
/* Assume fewer than 16 devices will be returned */
|
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &outSize);
|
|
test_error(error, "Unable to obtain list of devices size for context");
|
|
devices = (cl_device_id *)malloc(outSize);
|
|
BufferOwningPtr<cl_device_id> devicesBuf(devices);
|
|
|
|
error =
|
|
clGetContextInfo(context, CL_CONTEXT_DEVICES, outSize, devices, NULL);
|
|
test_error(error, "Unable to obtain list of devices for context");
|
|
|
|
numDevices = (int)(outSize / sizeof(cl_device_id));
|
|
|
|
for (i = 0; i < numDevices; i++)
|
|
{
|
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE,
|
|
sizeof(size), &size, NULL);
|
|
test_error(error, "Unable to obtain max work group size for device");
|
|
if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
|
|
|
|
error = clGetKernelWorkGroupInfo(kernel, devices[i],
|
|
CL_KERNEL_WORK_GROUP_SIZE,
|
|
sizeof(size), &size, NULL);
|
|
test_error(
|
|
error,
|
|
"Unable to obtain max work group size for device and kernel combo");
|
|
if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
|
|
|
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
|
|
sizeof(numDims), &numDims, NULL);
|
|
test_error(
|
|
error,
|
|
"clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
|
|
sizeLimit[0] = 1;
|
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
|
numDims * sizeof(size_t), sizeLimit, NULL);
|
|
test_error(error,
|
|
"clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
|
|
|
if (outLimits != NULL)
|
|
{
|
|
if (i == 0)
|
|
{
|
|
for (j = 0; j < 3; j++) outLimits[j] = sizeLimit[j];
|
|
}
|
|
else
|
|
{
|
|
for (j = 0; j < (int)numDims; j++)
|
|
{
|
|
if (sizeLimit[j] < outLimits[j])
|
|
outLimits[j] = sizeLimit[j];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
*outMaxSize = (unsigned int)maxCommonSize;
|
|
return 0;
|
|
}
|
|
|
|
|
|
extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
|
|
cl_kernel kernel,
|
|
size_t *outSize)
|
|
{
|
|
cl_uint maxDim;
|
|
size_t maxWgSize;
|
|
size_t *maxWgSizePerDim;
|
|
int error;
|
|
|
|
error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
|
|
sizeof(size_t), &maxWgSize, NULL);
|
|
test_error(error,
|
|
"clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed");
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
|
|
sizeof(cl_uint), &maxDim, NULL);
|
|
test_error(error,
|
|
"clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
|
|
maxWgSizePerDim = (size_t *)malloc(maxDim * sizeof(size_t));
|
|
if (!maxWgSizePerDim)
|
|
{
|
|
log_error("Unable to allocate maxWgSizePerDim\n");
|
|
return -1;
|
|
}
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
|
maxDim * sizeof(size_t), maxWgSizePerDim, NULL);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_error("clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n");
|
|
free(maxWgSizePerDim);
|
|
return error;
|
|
}
|
|
|
|
// "maxWgSize" is limited to that of the first dimension.
|
|
if (maxWgSize > maxWgSizePerDim[0])
|
|
{
|
|
maxWgSize = maxWgSizePerDim[0];
|
|
}
|
|
|
|
free(maxWgSizePerDim);
|
|
|
|
*outSize = maxWgSize;
|
|
return 0;
|
|
}
|
|
|
|
|
|
int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
|
|
size_t globalThreadSize, size_t *outMaxSize)
|
|
{
|
|
size_t sizeLimit[3];
|
|
int error =
|
|
get_max_allowed_work_group_size(context, kernel, outMaxSize, sizeLimit);
|
|
if (error != 0) return error;
|
|
|
|
/* Now find the largest factor of globalThreadSize that is <= maxCommonSize
|
|
*/
|
|
/* Note for speed, we don't need to check the range of maxCommonSize, b/c
|
|
once it gets to 1, the modulo test will succeed and break the loop anyway
|
|
*/
|
|
for (;
|
|
(globalThreadSize % *outMaxSize) != 0 || (*outMaxSize > sizeLimit[0]);
|
|
(*outMaxSize)--)
|
|
;
|
|
return 0;
|
|
}
|
|
|
|
int get_max_common_2D_work_group_size(cl_context context, cl_kernel kernel,
|
|
size_t *globalThreadSizes,
|
|
size_t *outMaxSizes)
|
|
{
|
|
size_t sizeLimit[3];
|
|
size_t maxSize;
|
|
int error =
|
|
get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
|
|
if (error != 0) return error;
|
|
|
|
/* Now find a set of factors, multiplied together less than maxSize, but
|
|
each a factor of the global sizes */
|
|
|
|
/* Simple case */
|
|
if (globalThreadSizes[0] * globalThreadSizes[1] <= maxSize)
|
|
{
|
|
if (globalThreadSizes[0] <= sizeLimit[0]
|
|
&& globalThreadSizes[1] <= sizeLimit[1])
|
|
{
|
|
outMaxSizes[0] = globalThreadSizes[0];
|
|
outMaxSizes[1] = globalThreadSizes[1];
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
size_t remainingSize, sizeForThisOne;
|
|
remainingSize = maxSize;
|
|
int i, j;
|
|
for (i = 0; i < 2; i++)
|
|
{
|
|
if (globalThreadSizes[i] > remainingSize)
|
|
sizeForThisOne = remainingSize;
|
|
else
|
|
sizeForThisOne = globalThreadSizes[i];
|
|
for (; (globalThreadSizes[i] % sizeForThisOne) != 0
|
|
|| (sizeForThisOne > sizeLimit[i]);
|
|
sizeForThisOne--)
|
|
;
|
|
outMaxSizes[i] = sizeForThisOne;
|
|
remainingSize = maxSize;
|
|
for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int get_max_common_3D_work_group_size(cl_context context, cl_kernel kernel,
|
|
size_t *globalThreadSizes,
|
|
size_t *outMaxSizes)
|
|
{
|
|
size_t sizeLimit[3];
|
|
size_t maxSize;
|
|
int error =
|
|
get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
|
|
if (error != 0) return error;
|
|
/* Now find a set of factors, multiplied together less than maxSize, but
|
|
each a factor of the global sizes */
|
|
|
|
/* Simple case */
|
|
if (globalThreadSizes[0] * globalThreadSizes[1] * globalThreadSizes[2]
|
|
<= maxSize)
|
|
{
|
|
if (globalThreadSizes[0] <= sizeLimit[0]
|
|
&& globalThreadSizes[1] <= sizeLimit[1]
|
|
&& globalThreadSizes[2] <= sizeLimit[2])
|
|
{
|
|
outMaxSizes[0] = globalThreadSizes[0];
|
|
outMaxSizes[1] = globalThreadSizes[1];
|
|
outMaxSizes[2] = globalThreadSizes[2];
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
size_t remainingSize, sizeForThisOne;
|
|
remainingSize = maxSize;
|
|
int i, j;
|
|
for (i = 0; i < 3; i++)
|
|
{
|
|
if (globalThreadSizes[i] > remainingSize)
|
|
sizeForThisOne = remainingSize;
|
|
else
|
|
sizeForThisOne = globalThreadSizes[i];
|
|
for (; (globalThreadSizes[i] % sizeForThisOne) != 0
|
|
|| (sizeForThisOne > sizeLimit[i]);
|
|
sizeForThisOne--)
|
|
;
|
|
outMaxSizes[i] = sizeForThisOne;
|
|
remainingSize = maxSize;
|
|
for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Helper to determine if a device supports an image format */
|
|
int is_image_format_supported(cl_context context, cl_mem_flags flags,
|
|
cl_mem_object_type image_type,
|
|
const cl_image_format *fmt)
|
|
{
|
|
cl_image_format *list;
|
|
cl_uint count = 0;
|
|
cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
|
|
NULL, &count);
|
|
if (count == 0) return 0;
|
|
|
|
list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
|
|
if (NULL == list)
|
|
{
|
|
log_error("Error: unable to allocate %zu byte buffer for image format "
|
|
"list at %s:%d (err = %d)\n",
|
|
count * sizeof(cl_image_format), __FILE__, __LINE__, err);
|
|
return 0;
|
|
}
|
|
BufferOwningPtr<cl_image_format> listBuf(list);
|
|
|
|
|
|
cl_int error = clGetSupportedImageFormats(context, flags, image_type, count,
|
|
list, NULL);
|
|
if (error)
|
|
{
|
|
log_error("Error: failed to obtain supported image type list at %s:%d "
|
|
"(err = %d)\n",
|
|
__FILE__, __LINE__, err);
|
|
return 0;
|
|
}
|
|
|
|
// iterate looking for a match.
|
|
cl_uint i;
|
|
for (i = 0; i < count; i++)
|
|
{
|
|
if (fmt->image_channel_data_type == list[i].image_channel_data_type
|
|
&& fmt->image_channel_order == list[i].image_channel_order)
|
|
break;
|
|
}
|
|
|
|
return (i < count) ? 1 : 0;
|
|
}
|
|
|
|
size_t get_pixel_bytes(const cl_image_format *fmt);
|
|
size_t get_pixel_bytes(const cl_image_format *fmt)
|
|
{
|
|
size_t chanCount;
|
|
switch (fmt->image_channel_order)
|
|
{
|
|
case CL_R:
|
|
case CL_A:
|
|
case CL_Rx:
|
|
case CL_INTENSITY:
|
|
case CL_LUMINANCE:
|
|
case CL_DEPTH: chanCount = 1; break;
|
|
case CL_RG:
|
|
case CL_RA:
|
|
case CL_RGx: chanCount = 2; break;
|
|
case CL_RGB:
|
|
case CL_RGBx:
|
|
case CL_sRGB:
|
|
case CL_sRGBx: chanCount = 3; break;
|
|
case CL_RGBA:
|
|
case CL_ARGB:
|
|
case CL_BGRA:
|
|
case CL_sBGRA:
|
|
case CL_sRGBA:
|
|
#ifdef CL_1RGB_APPLE
|
|
case CL_1RGB_APPLE:
|
|
#endif
|
|
#ifdef CL_BGR1_APPLE
|
|
case CL_BGR1_APPLE:
|
|
#endif
|
|
chanCount = 4;
|
|
break;
|
|
default:
|
|
log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__);
|
|
abort();
|
|
break;
|
|
}
|
|
|
|
switch (fmt->image_channel_data_type)
|
|
{
|
|
case CL_UNORM_SHORT_565:
|
|
case CL_UNORM_SHORT_555: return 2;
|
|
|
|
case CL_UNORM_INT_101010: return 4;
|
|
|
|
case CL_SNORM_INT8:
|
|
case CL_UNORM_INT8:
|
|
case CL_SIGNED_INT8:
|
|
case CL_UNSIGNED_INT8: return chanCount;
|
|
|
|
case CL_SNORM_INT16:
|
|
case CL_UNORM_INT16:
|
|
case CL_HALF_FLOAT:
|
|
case CL_SIGNED_INT16:
|
|
case CL_UNSIGNED_INT16:
|
|
#ifdef CL_SFIXED14_APPLE
|
|
case CL_SFIXED14_APPLE:
|
|
#endif
|
|
return chanCount * 2;
|
|
|
|
case CL_SIGNED_INT32:
|
|
case CL_UNSIGNED_INT32:
|
|
case CL_FLOAT: return chanCount * 4;
|
|
|
|
default:
|
|
log_error("Unknown channel data type at %s:%d!\n", __FILE__,
|
|
__LINE__);
|
|
abort();
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
test_status verifyImageSupport(cl_device_id device)
|
|
{
|
|
int result = checkForImageSupport(device);
|
|
if (result == 0)
|
|
{
|
|
return TEST_PASS;
|
|
}
|
|
if (result == CL_IMAGE_FORMAT_NOT_SUPPORTED)
|
|
{
|
|
log_error("SKIPPED: Device does not supported images as required by "
|
|
"this test!\n");
|
|
return TEST_SKIP;
|
|
}
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
int checkForImageSupport(cl_device_id device)
|
|
{
|
|
cl_uint i;
|
|
int error;
|
|
|
|
|
|
/* Check the device props to see if images are supported at all first */
|
|
error =
|
|
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
|
|
test_error(error, "Unable to query device for image support");
|
|
if (i == 0)
|
|
{
|
|
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
|
}
|
|
|
|
/* So our support is good */
|
|
return 0;
|
|
}
|
|
|
|
int checkFor3DImageSupport(cl_device_id device)
|
|
{
|
|
cl_uint i;
|
|
int error;
|
|
|
|
/* Check the device props to see if images are supported at all first */
|
|
error =
|
|
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
|
|
test_error(error, "Unable to query device for image support");
|
|
if (i == 0)
|
|
{
|
|
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
|
}
|
|
|
|
char profile[128];
|
|
error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
|
|
NULL);
|
|
test_error(error, "Unable to query device for CL_DEVICE_PROFILE");
|
|
if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
|
|
{
|
|
size_t width = -1L;
|
|
size_t height = -1L;
|
|
size_t depth = -1L;
|
|
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
|
|
sizeof(width), &width, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH");
|
|
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
|
|
sizeof(height), &height, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT");
|
|
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
|
|
sizeof(depth), &depth, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH");
|
|
|
|
if (0 == (height | width | depth)) return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
|
}
|
|
|
|
/* So our support is good */
|
|
return 0;
|
|
}
|
|
|
|
int checkForReadWriteImageSupport(cl_device_id device)
|
|
{
|
|
if (checkForImageSupport(device))
|
|
{
|
|
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
|
}
|
|
|
|
auto device_cl_version = get_device_cl_version(device);
|
|
if (device_cl_version >= Version(3, 0))
|
|
{
|
|
// In OpenCL 3.0, Read-Write images are optional.
|
|
// Check if they are supported.
|
|
cl_uint are_rw_images_supported{};
|
|
test_error(
|
|
clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
|
|
sizeof(are_rw_images_supported),
|
|
&are_rw_images_supported, nullptr),
|
|
"clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
|
|
if (0 == are_rw_images_supported)
|
|
{
|
|
log_info("READ_WRITE_IMAGE tests skipped, not supported.\n");
|
|
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
|
}
|
|
}
|
|
// READ_WRITE images are not supported on 1.X devices.
|
|
else if (device_cl_version < Version(2, 0))
|
|
{
|
|
log_info("READ_WRITE_IMAGE tests skipped, Opencl 2.0+ is requried.");
|
|
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
|
}
|
|
// Support for read-write image arguments is required
|
|
// for an 2.X device if the device supports images.
|
|
|
|
/* So our support is good */
|
|
return 0;
|
|
}
|
|
|
|
size_t get_min_alignment(cl_context context)
|
|
{
|
|
static cl_uint align_size = 0;
|
|
|
|
if (0 == align_size)
|
|
{
|
|
cl_device_id *devices;
|
|
size_t devices_size = 0;
|
|
cl_uint result = 0;
|
|
cl_int error;
|
|
int i;
|
|
|
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
|
|
&devices_size);
|
|
test_error_ret(error, "clGetContextInfo failed", 0);
|
|
|
|
devices = (cl_device_id *)malloc(devices_size);
|
|
if (devices == NULL)
|
|
{
|
|
print_error(error, "malloc failed");
|
|
return 0;
|
|
}
|
|
|
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size,
|
|
(void *)devices, NULL);
|
|
test_error_ret(error, "clGetContextInfo failed", 0);
|
|
|
|
for (i = 0; i < (int)(devices_size / sizeof(cl_device_id)); i++)
|
|
{
|
|
cl_uint alignment = 0;
|
|
|
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_MEM_BASE_ADDR_ALIGN,
|
|
sizeof(cl_uint), (void *)&alignment, NULL);
|
|
|
|
if (error == CL_SUCCESS)
|
|
{
|
|
alignment >>= 3; // convert bits to bytes
|
|
result = (alignment > result) ? alignment : result;
|
|
}
|
|
else
|
|
print_error(error, "clGetDeviceInfo failed");
|
|
}
|
|
|
|
align_size = result;
|
|
free(devices);
|
|
}
|
|
|
|
return align_size;
|
|
}
|
|
|
|
cl_device_fp_config get_default_rounding_mode(cl_device_id device)
|
|
{
|
|
char profileStr[128] = "";
|
|
cl_device_fp_config single = 0;
|
|
int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
|
|
sizeof(single), &single, NULL);
|
|
if (error)
|
|
test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG",
|
|
0);
|
|
|
|
if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
|
|
|
|
if (0 == (single & CL_FP_ROUND_TO_ZERO))
|
|
test_error_ret(-1,
|
|
"FAILURE: device must support either "
|
|
"CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST",
|
|
0);
|
|
|
|
// Make sure we are an embedded device before allowing a pass
|
|
if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profileStr),
|
|
&profileStr, NULL)))
|
|
test_error_ret(error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0);
|
|
|
|
if (strcmp(profileStr, "EMBEDDED_PROFILE"))
|
|
test_error_ret(error,
|
|
"FAILURE: non-EMBEDDED_PROFILE devices must support "
|
|
"CL_FP_ROUND_TO_NEAREST",
|
|
0);
|
|
|
|
return CL_FP_ROUND_TO_ZERO;
|
|
}
|
|
|
|
int checkDeviceForQueueSupport(cl_device_id device,
|
|
cl_command_queue_properties prop)
|
|
{
|
|
cl_command_queue_properties realProps;
|
|
cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
|
|
sizeof(realProps), &realProps, NULL);
|
|
test_error_ret(error, "FAILURE: Unable to get device queue properties", 0);
|
|
|
|
return (realProps & prop) ? 1 : 0;
|
|
}
|
|
|
|
int printDeviceHeader(cl_device_id device)
|
|
{
|
|
char deviceName[512], deviceVendor[512], deviceVersion[512],
|
|
cLangVersion[512];
|
|
int error;
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName),
|
|
deviceName, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_NAME for device");
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(deviceVendor),
|
|
deviceVendor, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_VENDOR for device");
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion),
|
|
deviceVersion, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_VERSION for device");
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
|
|
sizeof(cLangVersion), cLangVersion, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device");
|
|
|
|
log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute "
|
|
"Device Version = %s%s%s\n",
|
|
deviceName, deviceVendor, deviceVersion,
|
|
(error == CL_SUCCESS) ? ", CL C Version = " : "",
|
|
(error == CL_SUCCESS) ? cLangVersion : "");
|
|
|
|
auto version = get_device_cl_version(device);
|
|
if (version >= Version(3, 0))
|
|
{
|
|
auto ctsVersion = get_device_info_string(
|
|
device, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED);
|
|
log_info("Device latest conformance version passed: %s\n",
|
|
ctsVersion.c_str());
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
Version get_device_cl_c_version(cl_device_id device)
|
|
{
|
|
auto device_cl_version = get_device_cl_version(device);
|
|
|
|
// The second special case is OpenCL-1.0 where CL_DEVICE_OPENCL_C_VERSION
|
|
// did not exist, but since this is just the first version we can
|
|
// return 1.0.
|
|
if (device_cl_version == Version{ 1, 0 })
|
|
{
|
|
return Version{ 1, 0 };
|
|
}
|
|
|
|
// Otherwise we know we have a 1.1 <= device_version <= 2.0 where all CL C
|
|
// versions are backwards compatible, hence querying with the
|
|
// CL_DEVICE_OPENCL_C_VERSION query must return the most recent supported
|
|
// OpenCL C version.
|
|
size_t opencl_c_version_size_in_bytes{};
|
|
auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr,
|
|
&opencl_c_version_size_in_bytes);
|
|
test_error_ret(error,
|
|
"clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
|
|
(Version{ -1, 0 }));
|
|
|
|
std::string opencl_c_version(opencl_c_version_size_in_bytes, '\0');
|
|
error =
|
|
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
|
|
opencl_c_version.size(), &opencl_c_version[0], nullptr);
|
|
|
|
test_error_ret(error,
|
|
"clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
|
|
(Version{ -1, 0 }));
|
|
|
|
// Scrape out the major, minor pair from the string.
|
|
auto major = opencl_c_version[opencl_c_version.find('.') - 1];
|
|
auto minor = opencl_c_version[opencl_c_version.find('.') + 1];
|
|
|
|
return Version{ major - '0', minor - '0' };
|
|
}
|
|
|
|
Version get_device_latest_cl_c_version(cl_device_id device)
|
|
{
|
|
auto device_cl_version = get_device_cl_version(device);
|
|
|
|
// If the device version >= 3.0 it must support the
|
|
// CL_DEVICE_OPENCL_C_ALL_VERSIONS query from which we can extract the most
|
|
// recent CL C version supported by the device.
|
|
if (device_cl_version >= Version{ 3, 0 })
|
|
{
|
|
size_t opencl_c_all_versions_size_in_bytes{};
|
|
auto error =
|
|
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
|
|
&opencl_c_all_versions_size_in_bytes);
|
|
test_error_ret(
|
|
error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
|
|
(Version{ -1, 0 }));
|
|
std::vector<cl_name_version> name_versions(
|
|
opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
|
|
error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
|
|
opencl_c_all_versions_size_in_bytes,
|
|
name_versions.data(), nullptr);
|
|
test_error_ret(
|
|
error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
|
|
(Version{ -1, 0 }));
|
|
|
|
Version max_supported_cl_c_version{};
|
|
for (const auto &name_version : name_versions)
|
|
{
|
|
Version current_version{
|
|
static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
|
|
static_cast<int>(CL_VERSION_MINOR(name_version.version))
|
|
};
|
|
max_supported_cl_c_version =
|
|
(current_version > max_supported_cl_c_version)
|
|
? current_version
|
|
: max_supported_cl_c_version;
|
|
}
|
|
return max_supported_cl_c_version;
|
|
}
|
|
|
|
return get_device_cl_c_version(device);
|
|
}
|
|
|
|
Version get_max_OpenCL_C_for_context(cl_context context)
|
|
{
|
|
// Get all the devices in the context and find the maximum
|
|
// universally supported OpenCL C version.
|
|
size_t devices_size_in_bytes{};
|
|
auto error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr,
|
|
&devices_size_in_bytes);
|
|
test_error_ret(error, "clGetDeviceInfo failed for CL_CONTEXT_DEVICES",
|
|
(Version{ -1, 0 }));
|
|
std::vector<cl_device_id> devices(devices_size_in_bytes
|
|
/ sizeof(cl_device_id));
|
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size_in_bytes,
|
|
devices.data(), nullptr);
|
|
auto current_version = get_device_latest_cl_c_version(devices[0]);
|
|
std::for_each(std::next(devices.begin()), devices.end(),
|
|
[¤t_version](cl_device_id device) {
|
|
auto device_version =
|
|
get_device_latest_cl_c_version(device);
|
|
// OpenCL 3.0 is not backwards compatible with 2.0.
|
|
// If we have 3.0 and 2.0 in the same driver we
|
|
// use 1.2.
|
|
if (((device_version >= Version(2, 0)
|
|
&& device_version < Version(3, 0))
|
|
&& current_version >= Version(3, 0))
|
|
|| (device_version >= Version(3, 0)
|
|
&& (current_version >= Version(2, 0)
|
|
&& current_version < Version(3, 0))))
|
|
{
|
|
current_version = Version(1, 2);
|
|
}
|
|
else
|
|
{
|
|
current_version =
|
|
std::min(device_version, current_version);
|
|
}
|
|
});
|
|
return current_version;
|
|
}
|
|
|
|
bool device_supports_cl_c_version(cl_device_id device, Version version)
|
|
{
|
|
auto device_cl_version = get_device_cl_version(device);
|
|
|
|
// In general, a device does not support an OpenCL C version if it is <=
|
|
// CL_DEVICE_OPENCL_C_VERSION AND it does not appear in the
|
|
// CL_DEVICE_OPENCL_C_ALL_VERSIONS query.
|
|
|
|
// If the device version >= 3.0 it must support the
|
|
// CL_DEVICE_OPENCL_C_ALL_VERSIONS query, and the version of OpenCL C being
|
|
// used must appear in the query result if it's <=
|
|
// CL_DEVICE_OPENCL_C_VERSION.
|
|
if (device_cl_version >= Version{ 3, 0 })
|
|
{
|
|
size_t opencl_c_all_versions_size_in_bytes{};
|
|
auto error =
|
|
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
|
|
&opencl_c_all_versions_size_in_bytes);
|
|
test_error_ret(
|
|
error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
|
|
(false));
|
|
std::vector<cl_name_version> name_versions(
|
|
opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
|
|
error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
|
|
opencl_c_all_versions_size_in_bytes,
|
|
name_versions.data(), nullptr);
|
|
test_error_ret(
|
|
error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
|
|
(false));
|
|
|
|
for (const auto &name_version : name_versions)
|
|
{
|
|
Version current_version{
|
|
static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
|
|
static_cast<int>(CL_VERSION_MINOR(name_version.version))
|
|
};
|
|
if (current_version == version)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return version <= get_device_cl_c_version(device);
|
|
}
|
|
|
|
bool poll_until(unsigned timeout_ms, unsigned interval_ms,
|
|
std::function<bool()> fn)
|
|
{
|
|
unsigned time_spent_ms = 0;
|
|
bool ret = false;
|
|
|
|
while (time_spent_ms < timeout_ms)
|
|
{
|
|
ret = fn();
|
|
if (ret)
|
|
{
|
|
break;
|
|
}
|
|
usleep(interval_ms * 1000);
|
|
time_spent_ms += interval_ms;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
bool device_supports_double(cl_device_id device)
|
|
{
|
|
if (is_extension_available(device, "cl_khr_fp64"))
|
|
{
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
cl_device_fp_config double_fp_config;
|
|
cl_int err = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
|
|
sizeof(double_fp_config),
|
|
&double_fp_config, nullptr);
|
|
test_error(err,
|
|
"clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
|
|
return double_fp_config != 0;
|
|
}
|
|
}
|
|
|
|
bool device_supports_half(cl_device_id device)
|
|
{
|
|
return is_extension_available(device, "cl_khr_fp16");
|
|
}
|