mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Initial open source release of OpenCL 2.0 CTS.
This commit is contained in:
151
CMakeLists.txt
Normal file
151
CMakeLists.txt
Normal file
@@ -0,0 +1,151 @@
|
||||
set( CONFORMANCE_SUFFIX "" )
|
||||
|
||||
set(CLConform_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
project(CLConform${CONFORMANCE_SUFFIX})
|
||||
|
||||
if(CMAKE_BUILD_TYPE STREQUAL "release")
|
||||
set (BUILD_FLAVOR "release")
|
||||
else(CMAKE_BUILD_TYPE STREQUAL "release")
|
||||
set (BUILD_FLAVOR "debug")
|
||||
endif(CMAKE_BUILD_TYPE STREQUAL "release")
|
||||
|
||||
set(CLConform_VERSION_MAJOR "2")
|
||||
set(CLConform_VERSION_MINOR "0")
|
||||
set(CLConform_VERSION_MICRO "0")
|
||||
set(CLConform_VERSION_EXTRA "")
|
||||
set(CLConform_VERSION "${CLConform_VERSION_MAJOR}.${CLConform_VERSION_MINOR}")
|
||||
set(CLConform_VERSION_FULL
|
||||
"${CLConform_VERSION}.${CLConform_VERSION_MICRO}${CLConform_VERSION_EXTRA}")
|
||||
|
||||
cmake_minimum_required(VERSION 2.8)
|
||||
|
||||
add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_0_APIS=1)
|
||||
add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_1_APIS=1)
|
||||
|
||||
# where to look first for cmake modules, before ${CMAKE_ROOT}/Modules/ is checked
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules)
|
||||
|
||||
|
||||
# Support both VS2008 and VS2012.
|
||||
set(BUILD_DIR "$ENV{ADRENO_DRIVER}/build")
|
||||
if(MSVC90)
|
||||
set(VS_BUILD_DIR "${BUILD_DIR}/vs2008")
|
||||
else(MSVC110)
|
||||
set(VS_BUILD_DIR "${BUILD_DIR}/vs2012")
|
||||
endif(MSVC90)
|
||||
|
||||
#-----------------------------------------------------------
|
||||
# Default Configurable Test Set
|
||||
#-----------------------------------------------------------
|
||||
set(D3D10_IS_SUPPORTED)
|
||||
set(D3D11_IS_SUPPORTED)
|
||||
set(GL_IS_SUPPORTED)
|
||||
|
||||
#-----------------------------------------------------------
|
||||
# Vendor Customization
|
||||
#-----------------------------------------------------------
|
||||
#Vendor Customization File can be included here to provide a way to automatically
|
||||
#build driver as a depencency of the conformance tests, or other such CMake customization
|
||||
include(CMakeVendor.txt OPTIONAL)
|
||||
|
||||
if(CL_INCLUDE_DIR AND CL_LIB_DIR)
|
||||
set(OPENCL_INCLUDE_DIR ${CL_INCLUDE_DIR})
|
||||
link_directories(${CL_LIB_DIR})
|
||||
else(CL_INCLUDE_DIR AND CL_LIB_DIR)
|
||||
message(STATUS "OpenCL hasn't been found!")
|
||||
message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR and -DCL_LIB_DIR")
|
||||
endif(CL_INCLUDE_DIR AND CL_LIB_DIR)
|
||||
|
||||
include(CheckFunctionExists)
|
||||
include(CheckIncludeFiles)
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
# -msse -mfpmath=sse to force gcc to use sse for float math,
|
||||
# avoiding excess precision problems that cause tests like int2float
|
||||
# to falsely fail. -ffloat-store also works, but WG suggested
|
||||
# that sse would be better.
|
||||
if(CMAKE_ARM_COMPILER OR ANDROID)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -std=gnu99")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
|
||||
else(CMAKE_ARM_COMPILER OR ANDROID)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -std=gnu99 -msse -mfpmath=sse")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -msse -mfpmath=sse")
|
||||
endif(CMAKE_ARM_COMPILER OR ANDROID)
|
||||
else()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
|
||||
endif()
|
||||
|
||||
list(APPEND CLConform_LIBRARIES ${OPENCL_LIBRARIES})
|
||||
if(ANDROID)
|
||||
list(APPEND CLConform_LIBRARIES m)
|
||||
endif(ANDROID)
|
||||
|
||||
if(APPLE)
|
||||
find_library(corefoundation CoreFoundation)
|
||||
find_library(iokit IOKit)
|
||||
list(APPEND CLConform_LIBRARIES ${corefoundation})
|
||||
list(APPEND CLConform_LIBRARIES ${iokit})
|
||||
endif(APPLE)
|
||||
|
||||
list(APPEND CLConform_INCLUDE_DIR ${OPENCL_INCLUDE_DIR})
|
||||
|
||||
include_directories (${CLConform_SOURCE_DIR}/test_common/harness
|
||||
${CLConform_SOURCE_DIR}/test_common/gles
|
||||
${CLConform_SOURCE_DIR}/test_common/gl
|
||||
${CLConform_INCLUDE_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_common/harness
|
||||
)
|
||||
|
||||
if(CMAKE_BUILD_TYPE STREQUAL "release")
|
||||
set (BUILD_FLAVOR "release")
|
||||
elseif (CMAKE_BUILD_TYPE STREQUAL "debug")
|
||||
set (BUILD_FLAVOR "debug")
|
||||
endif(CMAKE_BUILD_TYPE STREQUAL "release")
|
||||
|
||||
|
||||
add_subdirectory(test_conformance)
|
||||
|
||||
set (PY_PATH "${CLConform_SOURCE_DIR}/test_conformance/*.py")
|
||||
set (CSV_PATH "${CLConform_SOURCE_DIR}/test_conformance/*.csv")
|
||||
# Support both VS2008 and VS2012.
|
||||
set (DLL_FILES "${VS_BUILD_DIR}/Debug/*.dll")
|
||||
set (DST_DIR "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Debug/")
|
||||
|
||||
if (WIN32)
|
||||
set (COPY "echo")
|
||||
add_custom_target( COPY_DLL${CONFORMANCE_SUFFIX} ALL
|
||||
COMMAND ${COPY} \"${DLL_FILES}\" \"${DST_DIR}\"
|
||||
COMMENT "Copying dll files.. ")
|
||||
else (WIN32)
|
||||
set (COPY cp)
|
||||
add_custom_target( COPY_DLL${CONFORMANCE_SUFFIX} )
|
||||
endif(WIN32)
|
||||
|
||||
set_property(TARGET COPY_DLL${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
|
||||
|
||||
if(WIN32)
|
||||
add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} ALL
|
||||
COMMAND ${COPY} ${PY_PATH} ${DST_DIR}
|
||||
COMMAND ${COPY} ${CSV_PATH} ${DST_DIR}
|
||||
COMMAND ${COPY} ${DLL_FILES} ${DST_DIR}
|
||||
COMMENT "Copying other files to output folder..." )
|
||||
else(WIN32)
|
||||
add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} )
|
||||
endif(WIN32)
|
||||
|
||||
# Copy required CL include directories into the build directory
|
||||
# as required for the compiler testing.
|
||||
|
||||
# ... For running the compiler test on the command line.
|
||||
file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/includeTestDirectory" DESTINATION ${DST_DIR})
|
||||
file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory" DESTINATION ${DST_DIR})
|
||||
|
||||
# ... For running the compiler test with VisualStudio.
|
||||
if(MSVC)
|
||||
file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/includeTestDirectory" DESTINATION "${CLConform_SOURCE_DIR}/build/test_conformance/compiler")
|
||||
file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory" DESTINATION "${CLConform_SOURCE_DIR}/build/test_conformance/compiler")
|
||||
endif(MSVC)
|
||||
|
||||
set_property(TARGET COPY_FILES${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
|
||||
21
CMakeVendor.txt
Normal file
21
CMakeVendor.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
# Set the prefix and suffix for the generated executables
|
||||
# For example, if you want the api executable to be test_conformance_api_12
|
||||
# Set previx to "test_conformance_" and suffix to "_12"
|
||||
set(CONFORMANCE_PREFIX "test_conformance_" )
|
||||
set(CONFORMNACE_SUFFIX "" )
|
||||
|
||||
# Include cmake files to build driver
|
||||
# to build driver as a dependency of tests
|
||||
# in this example environment variable $OPENCL_DRIVER points to driver base
|
||||
# Ex include($ENV{OPENCL_DRIVER}/driver.cmake)
|
||||
|
||||
# We intentionally hardcode "_win32" to ensure backwards compatibility (to avoid breaking HAAVE)
|
||||
if(ANDROID)
|
||||
if(ARM64_V8A)
|
||||
set(ARCH "64")
|
||||
else(ARM64_V8A)
|
||||
set(ARCH "32")
|
||||
endif(ARM64_V8A)
|
||||
endif (ANDROID)
|
||||
|
||||
set (CL_INCLUDE_DIR "$ENV{OPENCL_DRIVER}/include/public/")
|
||||
159
build_android.py
Normal file
159
build_android.py
Normal file
@@ -0,0 +1,159 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------#
|
||||
# android-cmake and android-ndk based build script for conformance
|
||||
#-------------------------------------------------------------------------------#
|
||||
"""
|
||||
Dependencies:
|
||||
|
||||
1) android-ndk version android-ndk-r10d or higher is required. Further, the environment
|
||||
variable ANDROID_NDK should be defined to point to it.
|
||||
|
||||
2) android-cmake should be installed (else the script can install it for you). If installed,
|
||||
the environment variable ANDROID_CMAKE should point to install location, unless it is in the current
|
||||
working directory in which case it is picked up by default.
|
||||
|
||||
3) CL_INCLUDE_DIR should be defined to point to CL headers. Alternately, this can be provided
|
||||
as an input (-I)
|
||||
|
||||
4) Path to opencl library to link against (libOpenCL.so) can be provided using -L. If this isn't
|
||||
available the script will try to use CL_LIB_DIR_64 or CL_LIB_DIR_32 environment variables -
|
||||
if available - to pick up the right library for the architecture being built.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import argparse
|
||||
import time
|
||||
import shlex
|
||||
|
||||
start = time.time()
|
||||
script = os.path.basename( sys.argv[ 0 ] )
|
||||
|
||||
def die (msg):
|
||||
print msg
|
||||
exit(-1)
|
||||
|
||||
def execute (cmdline):
|
||||
retcode = subprocess.call(cmdline)
|
||||
if retcode != 0:
|
||||
raise Exception("Failed to execute '%s', got %d" % (commandLine, retcode))
|
||||
|
||||
def build(args):
|
||||
if not (args.testDir):
|
||||
print("building...")
|
||||
execute("make")
|
||||
else:
|
||||
if os.path.exists( os.path.join(args.bld_dir, "test_conformance", args.testDir) ):
|
||||
os.chdir( os.path.join("test_conformance",args.testDir) )
|
||||
print("Building test: %s..." %args.testDir)
|
||||
execute("make")
|
||||
os.chdir(args.bld_dir)
|
||||
else:
|
||||
print ("Error: %s test doesn't exist" %args.testDir)
|
||||
|
||||
|
||||
def configure (args):
|
||||
print("configuring...")
|
||||
cmdline = []
|
||||
cmdline.extend(['cmake', "-DCMAKE_TOOLCHAIN_FILE=" + os.path.join(args.android_cmake,"android.toolchain.cmake")])
|
||||
for var in args.cmake_defs :
|
||||
cmdline.extend([ '-D', var ])
|
||||
cmdline.extend(['-DCL_INCLUDE_DIR=' + args.inc_dir])
|
||||
cmdline.extend(['-DCL_LIB_DIR=' + args.lib_dir])
|
||||
cmdline.extend(['-DANDROID_NATIVE_API_LEVEL=' + "android-21"])
|
||||
if args.arch == "64":
|
||||
cmdline.extend(['-DANDROID_ABI=arm64-v8a'])
|
||||
cmdline.extend(['-DANDROID_SO_UNDEFINED=ON'])
|
||||
cmdline.extend([args.src_dir])
|
||||
execute(cmdline)
|
||||
|
||||
def check_var (parser, args, name):
|
||||
if not(args.__dict__[name]):
|
||||
parser.error("%s needs to be defined" % name)
|
||||
|
||||
def print_config(args):
|
||||
print("----------CONFIGURATION--------------\n")
|
||||
print("android_cmake: %s" % args.android_cmake)
|
||||
print("android_ndk: %s" % args.android_ndk)
|
||||
print("lib_dir: %s" % args.lib_dir)
|
||||
print("inc_dir: %s" % args.inc_dir)
|
||||
if len(args.cmake_defs):
|
||||
print("cmake options:" + "\n:".join( [ " `%s'" % dir for dir in args.cmake_defs ] ))
|
||||
print("architecture: %s" % args.arch)
|
||||
print("-------------------------------------\n")
|
||||
|
||||
def get_input():
|
||||
yes = set(['yes','y', 'ye', ''])
|
||||
no = set(['no','n'])
|
||||
|
||||
choice = raw_input().lower()
|
||||
if choice in yes:
|
||||
return True
|
||||
elif choice in no:
|
||||
return False
|
||||
else:
|
||||
sys.stdout.write("Please respond with 'yes' or 'no'")
|
||||
exit()
|
||||
|
||||
def install_android_cmake():
|
||||
parser.print_help()
|
||||
print "\nandroid-cmake doesn't seem to be installed - It should be provided as a) cmdline input b) environment variable $ANDROID_CMAKE or c) present in the current directory\n"
|
||||
print "if you would like to download and install it in the current directory please enter yes\n"
|
||||
print "if you would like to provide an environment variable($ANDROID_CMAKE) or command-line input(--android_cmake) rerun the script enter no\n"
|
||||
print "input: "
|
||||
if get_input():
|
||||
print("installing android-cmake")
|
||||
subprocess.call(['git', 'clone', 'https://github.com/taka-no-me/android-cmake'])
|
||||
args.android_cmake = os.path.join(args.src_dir,"android-cmake")
|
||||
else:
|
||||
exit()
|
||||
|
||||
try:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--android_cmake', dest='android_cmake', default=os.environ.get('ANDROID_CMAKE'), help="Path to android-cmake (can also be set using environment variable $ANDROID_CMAKE).")
|
||||
parser.add_argument('--android_ndk', dest='android_ndk', default=os.environ.get('ANDROID_NDK'), help="Path to android-ndk (can also be set using environment variable $ANDROID_NDK).")
|
||||
parser.add_argument('-L','--lib_dir', dest='lib_dir', default="", help="Path to libOpenCL to link against (can also be set using environment variable $CL_LIB_DIR_32 and $CL_LIB_DIR_64).")
|
||||
parser.add_argument('-I','--include_dir', dest='inc_dir', default=os.environ.get('CL_INCLUDE_DIR'), help="Path to headers (can also be set using environment variable $CL_INCLUDE_DIR).")
|
||||
parser.add_argument('-D', dest='cmake_defs', action='append', default=[], help="Define CMAKE variable")
|
||||
parser.add_argument('-a','--arch', default="32", help="Architecture to build for (32 or 64)")
|
||||
parser.add_argument('-t','--test', dest='testDir', default="", help="Builds the given test")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
args.src_dir = os.path.realpath(os.path.dirname( sys.argv[ 0 ]))
|
||||
|
||||
if not (args.android_cmake):
|
||||
if os.path.exists(os.path.join(args.src_dir,"android-cmake")):
|
||||
args.android_cmake = os.path.join(args.src_dir,"android-cmake")
|
||||
else:
|
||||
install_android_cmake()
|
||||
|
||||
if not (args.lib_dir):
|
||||
lib_var_name = "CL_LIB_DIR_" + ("32" if (args.arch == "32") else "64")
|
||||
args.lib_dir = os.environ.get(lib_var_name)
|
||||
|
||||
check_var(parser, args, "android_cmake")
|
||||
check_var(parser, args, "lib_dir")
|
||||
check_var(parser, args, "inc_dir")
|
||||
check_var(parser, args, "android_ndk")
|
||||
|
||||
print_config(args)
|
||||
|
||||
args.bld_dir = os.path.join(args.src_dir, 'bld_android_%s' % args.arch)
|
||||
if not os.path.exists(args.bld_dir):
|
||||
os.makedirs(args.bld_dir)
|
||||
os.chdir(args.bld_dir)
|
||||
|
||||
configure(args)
|
||||
build(args)
|
||||
|
||||
sys.exit( 0 )
|
||||
|
||||
finally:
|
||||
finish = time.time()
|
||||
print("Elapsed time: %.0f s." % ( finish - start ) )
|
||||
6
build_lnx.sh
Normal file
6
build_lnx.sh
Normal file
@@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
mkdir -p build_lnx
|
||||
cd build_lnx
|
||||
cmake -g "Unix Makefiles" ../
|
||||
make --jobs 8
|
||||
35
build_win.bat
Normal file
35
build_win.bat
Normal file
@@ -0,0 +1,35 @@
|
||||
@ECHO off
|
||||
setlocal ENABLEDELAYEDEXPANSION
|
||||
|
||||
IF DEFINED ProgramFiles(x86) SET ProgFilesDir=%ProgramFiles(x86)%
|
||||
IF NOT DEFINED ProgFilesDir SET ProgFilesDir=%ProgramFiles%
|
||||
|
||||
rem -------------------------------- Update these to match what's on your PC ------------------------------------------------
|
||||
|
||||
SET VCPATH="%ProgFilesDir%\Microsoft Visual Studio 11.0\Common7\IDE\devenv.com"
|
||||
|
||||
SET PATH=%CMAKEPATH%;%PATH%
|
||||
|
||||
rem -------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
setlocal ENABLEDELAYEDEXPANSION
|
||||
|
||||
call "%VS110COMNTOOLS%\vsvars32.bat"
|
||||
|
||||
mkdir build_win
|
||||
pushd build_win
|
||||
IF NOT EXIST CLConform.sln (
|
||||
echo "Solution file not found, running Cmake"
|
||||
cmake -G "Visual Studio 11" ..\
|
||||
) else (
|
||||
echo "Solution file found CLConform.sln "
|
||||
)
|
||||
|
||||
|
||||
|
||||
echo Building CLConform.sln...
|
||||
%VCPATH% CLConform.sln /build
|
||||
|
||||
|
||||
GOTO:EOF
|
||||
|
||||
104
clean_tests.py
Normal file
104
clean_tests.py
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import sys, os, re
|
||||
from subprocess import Popen, PIPE
|
||||
from optparse import OptionParser
|
||||
|
||||
# trail_spaces: This method removes the trailing whitespaces and trailing tabs
|
||||
def trail_spaces(line):
|
||||
newline=line
|
||||
carreturn = 0
|
||||
if re.search("\r\n",line):
|
||||
carreturn = 1
|
||||
status = re.search("\s+$",line)
|
||||
if status:
|
||||
if carreturn:
|
||||
newline = re.sub("\s+$","\r\n",line)
|
||||
else:
|
||||
newline = re.sub("\s+$","\n",line)
|
||||
|
||||
status = re.search("\t+$",newline)
|
||||
if status:
|
||||
newline = re.sub("\t+$","",newline)
|
||||
return newline
|
||||
|
||||
#convert_tabs: This methos converts tabs to 4 spaces
|
||||
def convert_tabs(line):
|
||||
newline=line
|
||||
status = re.search("\t",line)
|
||||
if status:
|
||||
newline = re.sub("\t"," ",line)
|
||||
return newline
|
||||
|
||||
#convert_lineends: This method converts lineendings from DOS to Unix
|
||||
def convert_lineends(line):
|
||||
newline=line
|
||||
status = re.search("\r\n",line)
|
||||
if status:
|
||||
newline = re.sub("\r\n","\n",line)
|
||||
return newline
|
||||
|
||||
#processfile: This method processes each file passed to it depending
|
||||
# on the flags passed
|
||||
|
||||
def processfile(file,tabs, lineends,trails,verbose):
|
||||
processed_data = []
|
||||
if verbose:
|
||||
print "processing file: "+file
|
||||
|
||||
with open(file,'r') as fr:
|
||||
data = fr.readlines()
|
||||
for line in data:
|
||||
if tabs:
|
||||
line = convert_tabs(line)
|
||||
if lineends:
|
||||
line = convert_lineends(line)
|
||||
if trails:
|
||||
line = trail_spaces(line)
|
||||
processed_data.append(line)
|
||||
|
||||
with open(file,'w') as fw:
|
||||
fw.writelines(processed_data)
|
||||
|
||||
#findfiles: This method finds all the code files present in current
|
||||
# directory and subdirectories.
|
||||
|
||||
def findfiles(tabs,lineends,trails,verbose):
|
||||
testfiles = []
|
||||
for root, dirs, files in os.walk("./"):
|
||||
for file in files:
|
||||
for extn in ('.c','.cpp','.h','.hpp'):
|
||||
if file.endswith(extn):
|
||||
testfiles.append(os.path.join(root, file))
|
||||
for file in testfiles:
|
||||
processfile(file,tabs,lineends,trails,verbose)
|
||||
|
||||
# Main function
|
||||
|
||||
def main():
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("--notabs", dest="tabs", action="store_false", default=True, help="Disable converting tabs to 4 spaces.")
|
||||
parser.add_option("--notrails", dest="trails", action="store_false", default=True, help="Disable removing trailing whitespaces and trailing tabs.")
|
||||
parser.add_option("--nolineends", dest="lineends", action="store_false", default=True, help=" Disable converting line endings to Unix from DOS.")
|
||||
parser.add_option("--verbose", dest="verbose", action="store_true", default=False, help="Prints out the files being processed.")
|
||||
parser.add_option("--git", dest="SHA1", default="", help="Processes only the files present in the particular <SHA1> commit.")
|
||||
parser.add_option('-o', action="store", default=True, help="Default: All the code files (.c,.cpp,.h,.hpp) in the current directory and subdirectories will be processed")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if options.SHA1:
|
||||
pl = Popen(["git","show", "--pretty=format:", "--name-only",options.SHA1], stdout=PIPE)
|
||||
cmdout = pl.communicate()[0]
|
||||
gitout=cmdout.split("\n")
|
||||
for file in gitout:
|
||||
print file
|
||||
if file:
|
||||
processfile(file,options.tabs,options.lineends,options.trails,options.verbose)
|
||||
|
||||
|
||||
if not options.SHA1:
|
||||
findfiles(options.tabs,options.lineends,options.trails,options.verbose)
|
||||
|
||||
# start the process by calling main
|
||||
main()
|
||||
4
license.txt
Normal file
4
license.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
The code inside this directory and its subdirectories is
|
||||
"Open GL (including Open CL) Automated Test System - Common Code"
|
||||
and is subject to the license agreement between Apple and the licensee.
|
||||
|
||||
32
test_common/Makefile
Normal file
32
test_common/Makefile
Normal file
@@ -0,0 +1,32 @@
|
||||
|
||||
PRODUCTS = harness/\
|
||||
|
||||
# utils/
|
||||
|
||||
TOP=$(shell pwd)
|
||||
|
||||
all: $(PRODUCTS)
|
||||
|
||||
clean:
|
||||
@for testdir in $(dir $(PRODUCTS)) ; \
|
||||
do ( \
|
||||
echo "==================================================================================" ; \
|
||||
echo "Cleaning $$testdir" ; \
|
||||
echo "==================================================================================" ; \
|
||||
if test -d $$testdir; \
|
||||
then cd $$testdir && make clean; \
|
||||
else echo "Warning: Directory '$$testdir' Does Not Exist"; \
|
||||
fi; \
|
||||
); \
|
||||
done \
|
||||
|
||||
$(PRODUCTS):
|
||||
@echo "==================================================================================" ;
|
||||
@echo "(`date "+%H:%M:%S"`) Make $@" ;
|
||||
@echo "==================================================================================" ;
|
||||
@if test -d $@; \
|
||||
then cd $(dir $@) && make; \
|
||||
else echo "Warning: Directory '$@' Does Not Exist"; \
|
||||
fi; \
|
||||
|
||||
.PHONY: clean $(PRODUCTS) all
|
||||
60
test_common/gl/gl_headers.h
Normal file
60
test_common/gl/gl_headers.h
Normal file
@@ -0,0 +1,60 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _gl_headers_h
|
||||
#define _gl_headers_h
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenGL/OpenGL.h>
|
||||
#if defined(CGL_VERSION_1_3)
|
||||
#include <OpenGL/gl3.h>
|
||||
#include <OpenGL/gl3ext.h>
|
||||
#else
|
||||
#include <OpenGL/gl.h>
|
||||
#include <OpenGL/glext.h>
|
||||
#endif
|
||||
#include <GLUT/glut.h>
|
||||
#else
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#if defined( __ANDROID__ )
|
||||
#ifndef GL_GLEXT_PROTOTYPES
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
#endif
|
||||
#include <GLES/gl.h>
|
||||
#include <GLES/glext.h>
|
||||
#else
|
||||
#include <GL/glew.h>
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glext.h>
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
#include <GL/glut.h>
|
||||
#elif !defined(__ANDROID__)
|
||||
#include <GL/freeglut.h>
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString);
|
||||
// No glutGetProcAddress in the standard glut v3.7.
|
||||
#define glutGetProcAddress(procName) wglGetProcAddress(procName)
|
||||
#endif
|
||||
|
||||
|
||||
#endif // __gl_headers_h
|
||||
|
||||
2496
test_common/gl/helpers.cpp
Normal file
2496
test_common/gl/helpers.cpp
Normal file
File diff suppressed because it is too large
Load Diff
362
test_common/gl/helpers.h
Normal file
362
test_common/gl/helpers.h
Normal file
@@ -0,0 +1,362 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _helpers_h
|
||||
#define _helpers_h
|
||||
|
||||
#include "../harness/compat.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#if !defined (__APPLE__)
|
||||
#include <CL/cl.h>
|
||||
#include "gl_headers.h"
|
||||
#include <CL/cl_gl.h>
|
||||
#else
|
||||
#include "gl_headers.h"
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/threadTesting.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLBuffer_fn)(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
GLuint bufobj,
|
||||
int * errcode_ret);
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLTexture_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLenum target ,
|
||||
GLint miplevel ,
|
||||
GLuint texture ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLTexture2D_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLenum target ,
|
||||
GLint miplevel ,
|
||||
GLuint texture ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLTexture3D_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLenum target ,
|
||||
GLint miplevel ,
|
||||
GLuint texture ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLRenderbuffer_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLuint renderbuffer ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clGetGLObjectInfo_fn)(cl_mem memobj ,
|
||||
cl_gl_object_type * gl_object_type ,
|
||||
GLuint * gl_object_name) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clGetGLTextureInfo_fn)(cl_mem memobj ,
|
||||
cl_gl_texture_info param_name ,
|
||||
size_t param_value_size ,
|
||||
void * param_value ,
|
||||
size_t * param_value_size_ret) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clEnqueueAcquireGLObjects_fn)(cl_command_queue command_queue ,
|
||||
cl_uint num_objects ,
|
||||
const cl_mem * mem_objects ,
|
||||
cl_uint num_events_in_wait_list ,
|
||||
const cl_event * event_wait_list ,
|
||||
cl_event * event) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clEnqueueReleaseGLObjects_fn)(cl_command_queue command_queue ,
|
||||
cl_uint num_objects ,
|
||||
const cl_mem * mem_objects ,
|
||||
cl_uint num_events_in_wait_list ,
|
||||
const cl_event * event_wait_list ,
|
||||
cl_event * event) ;
|
||||
|
||||
|
||||
extern clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
|
||||
extern clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
|
||||
extern clCreateFromGLTexture2D_fn clCreateFromGLTexture2D_ptr;
|
||||
extern clCreateFromGLTexture3D_fn clCreateFromGLTexture3D_ptr;
|
||||
extern clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
|
||||
extern clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
|
||||
extern clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
|
||||
extern clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
|
||||
extern clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
|
||||
|
||||
|
||||
class glBufferWrapper
|
||||
{
|
||||
public:
|
||||
glBufferWrapper() { mBuffer = 0; }
|
||||
glBufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glBufferWrapper() { if( mBuffer != 0 ) glDeleteBuffers( 1, &mBuffer ); }
|
||||
|
||||
glBufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glTextureWrapper
|
||||
{
|
||||
public:
|
||||
glTextureWrapper() { mHandle = 0; }
|
||||
glTextureWrapper( GLuint b ) { mHandle = b; }
|
||||
~glTextureWrapper() {
|
||||
if( mHandle != 0 ) glDeleteTextures( 1, &mHandle );
|
||||
}
|
||||
|
||||
glTextureWrapper & operator=( const GLuint &rhs ) { mHandle = rhs; return *this; }
|
||||
operator GLuint() { return mHandle; }
|
||||
operator GLuint *() { return &mHandle; }
|
||||
|
||||
GLuint * operator&() { return &mHandle; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mHandle == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
// The texture handle.
|
||||
GLuint mHandle;
|
||||
};
|
||||
|
||||
class glRenderbufferWrapper
|
||||
{
|
||||
public:
|
||||
glRenderbufferWrapper() { mBuffer = 0; }
|
||||
glRenderbufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glRenderbufferWrapper() { if( mBuffer != 0 ) glDeleteRenderbuffersEXT( 1, &mBuffer ); }
|
||||
|
||||
glRenderbufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glFramebufferWrapper
|
||||
{
|
||||
public:
|
||||
glFramebufferWrapper() { mBuffer = 0; }
|
||||
glFramebufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glFramebufferWrapper() { if( mBuffer != 0 ) glDeleteFramebuffersEXT( 1, &mBuffer ); }
|
||||
|
||||
glFramebufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glVertexArraysWrapper
|
||||
{
|
||||
public:
|
||||
glVertexArraysWrapper() { mBuffer = 0; }
|
||||
glVertexArraysWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glVertexArraysWrapper() { if( mBuffer != 0 ) glDeleteVertexArrays( 1, &mBuffer ); }
|
||||
|
||||
glVertexArraysWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glProgramWrapper
|
||||
{
|
||||
public:
|
||||
glProgramWrapper() { mProgram = 0; }
|
||||
glProgramWrapper( GLuint b ) { mProgram = b; }
|
||||
~glProgramWrapper() { if( mProgram != 0 ) glDeleteProgram( mProgram ); }
|
||||
|
||||
glProgramWrapper & operator=( const GLuint &rhs ) { mProgram = rhs; return *this; }
|
||||
operator GLuint() { return mProgram; }
|
||||
operator GLuint *() { return &mProgram; }
|
||||
|
||||
GLuint * operator&() { return &mProgram; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mProgram == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mProgram;
|
||||
};
|
||||
|
||||
class glShaderWrapper
|
||||
{
|
||||
public:
|
||||
glShaderWrapper() { mShader = 0; }
|
||||
glShaderWrapper( GLuint b ) { mShader = b; }
|
||||
~glShaderWrapper() { if( mShader != 0 ) glDeleteShader( mShader ); }
|
||||
|
||||
glShaderWrapper & operator=( const GLuint &rhs ) { mShader = rhs; return *this; }
|
||||
operator GLuint() { return mShader; }
|
||||
operator GLuint *() { return &mShader; }
|
||||
|
||||
GLuint * operator&() { return &mShader; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mShader == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mShader;
|
||||
};
|
||||
|
||||
// Helper functions (defined in helpers.cpp)
|
||||
|
||||
extern void * CreateGLTexture1DArray( size_t width, size_t length,
|
||||
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID, int *outError,
|
||||
bool allocateMem, MTdata d);
|
||||
|
||||
extern void * CreateGLTexture2DArray( size_t width, size_t height, size_t length,
|
||||
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID, int *outError,
|
||||
bool allocateMem, MTdata d);
|
||||
|
||||
extern void * CreateGLTextureBuffer( size_t width,
|
||||
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTex, GLuint *outBuf, int *outError,
|
||||
bool allocateMem, MTdata d);
|
||||
|
||||
extern void * CreateGLTexture1D(size_t width,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, bool allocateMem, MTdata d );
|
||||
|
||||
extern void * CreateGLTexture2D( size_t width, size_t height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, bool allocateMem, MTdata d );
|
||||
|
||||
|
||||
extern void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, MTdata d, bool allocateMem = true );
|
||||
|
||||
#ifdef GL_VERSION_3_2
|
||||
extern void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, bool allocateMem, MTdata d,
|
||||
bool fixedSampleLocations );
|
||||
|
||||
extern void * CreateGLTexture2DArrayMultisample( size_t width, size_t height,
|
||||
size_t length, size_t samples,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, bool allocateMem, MTdata d,
|
||||
bool fixedSampleLocations );
|
||||
#endif
|
||||
|
||||
extern void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GLuint glBuf, GLint width,
|
||||
GLenum glFormat, GLenum glInternalFormat,
|
||||
GLenum glType, ExplicitType typeToReadAs,
|
||||
size_t outWidth, size_t outHeight );
|
||||
|
||||
extern int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
GLuint *outFramebuffer,
|
||||
GLuint *outRenderbuffer );
|
||||
|
||||
extern void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type,
|
||||
GLuint *outFramebuffer,
|
||||
GLuint *outRenderbuffer,
|
||||
int *outError, MTdata d, bool allocateMem );
|
||||
|
||||
extern void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
|
||||
GLenum attachment, GLenum glFormat,
|
||||
GLenum glInternalFormat, GLenum glType,
|
||||
ExplicitType typeToReadAs,
|
||||
size_t outWidth, size_t outHeight );
|
||||
|
||||
extern void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer);
|
||||
extern const char *GetGLTypeName( GLenum type );
|
||||
extern const char *GetGLAttachmentName( GLenum att );
|
||||
extern const char *GetGLTargetName( GLenum tgt );
|
||||
extern const char *GetGLBaseFormatName( GLenum baseformat );
|
||||
extern const char *GetGLFormatName( GLenum format );
|
||||
|
||||
extern void* CreateRandomData( ExplicitType type, size_t count, MTdata d );
|
||||
|
||||
extern GLenum GetGLFormat(GLenum internalFormat);
|
||||
extern GLenum GetGLTypeForExplicitType(ExplicitType type);
|
||||
extern size_t GetGLTypeSize(GLenum type);
|
||||
extern ExplicitType GetExplicitTypeForGLType(GLenum type);
|
||||
|
||||
extern GLenum get_base_gl_target( GLenum target );
|
||||
|
||||
extern int init_clgl_ext( void );
|
||||
|
||||
extern GLint get_gl_max_samples( GLenum target, GLenum internalformat );
|
||||
|
||||
#endif // _helpers_h
|
||||
|
||||
|
||||
|
||||
48
test_common/gl/setup.h
Normal file
48
test_common/gl/setup.h
Normal file
@@ -0,0 +1,48 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _setup_h
|
||||
#define _setup_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "gl_headers.h"
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
|
||||
// Note: the idea here is to have every platform define their own setup.cpp file that implements a GLEnvironment
|
||||
// subclass internally, then return it as a definition for GLEnvironment::Create
|
||||
|
||||
class GLEnvironment
|
||||
{
|
||||
public:
|
||||
GLEnvironment() {}
|
||||
virtual ~GLEnvironment() {}
|
||||
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 ) = 0;
|
||||
virtual cl_context CreateCLContext( void ) = 0;
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type) = 0;
|
||||
|
||||
static GLEnvironment * Instance( void );
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif // _setup_h
|
||||
156
test_common/gl/setup_osx.cpp
Normal file
156
test_common/gl/setup_osx.cpp
Normal file
@@ -0,0 +1,156 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "setup.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include <OpenGL/CGLDevice.h>
|
||||
|
||||
class OSXGLEnvironment : public GLEnvironment
|
||||
{
|
||||
public:
|
||||
OSXGLEnvironment()
|
||||
{
|
||||
mCGLContext = NULL;
|
||||
}
|
||||
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 )
|
||||
{
|
||||
if (!use_opengl_32) {
|
||||
|
||||
// Create a GLUT window to render into
|
||||
glutInit( argc, argv );
|
||||
glutInitWindowSize( 512, 512 );
|
||||
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
|
||||
glutCreateWindow( "OpenCL <-> OpenGL Test" );
|
||||
}
|
||||
|
||||
else {
|
||||
|
||||
CGLPixelFormatAttribute attribs[] = {
|
||||
kCGLPFAOpenGLProfile, (CGLPixelFormatAttribute)kCGLOGLPVersion_3_2_Core,
|
||||
kCGLPFAAllowOfflineRenderers,
|
||||
kCGLPFANoRecovery,
|
||||
kCGLPFAAccelerated,
|
||||
kCGLPFADoubleBuffer,
|
||||
(CGLPixelFormatAttribute)0
|
||||
};
|
||||
|
||||
CGLError err;
|
||||
CGLPixelFormatObj pix;
|
||||
GLint npix;
|
||||
err = CGLChoosePixelFormat (attribs, &pix, &npix);
|
||||
if(err != kCGLNoError)
|
||||
{
|
||||
log_error("Failed to choose pixel format\n");
|
||||
return -1;
|
||||
}
|
||||
err = CGLCreateContext(pix, NULL, &mCGLContext);
|
||||
if(err != kCGLNoError)
|
||||
{
|
||||
log_error("Failed to create GL context\n");
|
||||
return -1;
|
||||
}
|
||||
CGLSetCurrentContext(mCGLContext);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual cl_context CreateCLContext( void )
|
||||
{
|
||||
int error;
|
||||
|
||||
if( mCGLContext == NULL )
|
||||
mCGLContext = CGLGetCurrentContext();
|
||||
|
||||
CGLShareGroupObj share_group = CGLGetShareGroup(mCGLContext);
|
||||
cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)share_group, 0 };
|
||||
cl_context context = clCreateContext(properties, 0, 0, 0, 0, &error);
|
||||
if (error) {
|
||||
print_error(error, "clCreateContext failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Verify that all devices in the context support the required extension
|
||||
cl_device_id devices[64];
|
||||
size_t size_out;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &size_out);
|
||||
if (error) {
|
||||
print_error(error, "clGetContextInfo failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char extensions[8192];
|
||||
for (int i=0; i<(int)(size_out/sizeof(cl_device_id)); i++) {
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceInfo failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
|
||||
log_error("Device %d does not supporte required extension cl_APPLE_gl_sharing.\n", i);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return context;
|
||||
}
|
||||
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type )
|
||||
{
|
||||
int found_valid_device = 0;
|
||||
cl_device_id devices[64];
|
||||
cl_uint num_of_devices;
|
||||
int error;
|
||||
error = clGetDeviceIDs(NULL, device_type, 64, devices, &num_of_devices);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceIDs failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char extensions[8192];
|
||||
for (int i=0; i<(int)num_of_devices; i++) {
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceInfo failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
|
||||
log_info("Device %d of %d does not support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
|
||||
} else {
|
||||
log_info("Device %d of %d does support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
|
||||
found_valid_device = 1;
|
||||
}
|
||||
}
|
||||
return found_valid_device;
|
||||
}
|
||||
|
||||
virtual ~OSXGLEnvironment()
|
||||
{
|
||||
CGLDestroyContext( mCGLContext );
|
||||
}
|
||||
|
||||
CGLContextObj mCGLContext;
|
||||
|
||||
};
|
||||
|
||||
GLEnvironment * GLEnvironment::Instance( void )
|
||||
{
|
||||
static OSXGLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new OSXGLEnvironment();
|
||||
return env;
|
||||
}
|
||||
209
test_common/gl/setup_win32.cpp
Normal file
209
test_common/gl/setup_win32.cpp
Normal file
@@ -0,0 +1,209 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
|
||||
#include "setup.h"
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glut.h>
|
||||
#include <GL/glext.h>
|
||||
#include <GL/glut.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties *properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
// Rename references to this dynamically linked function to avoid
|
||||
// collision with static link version
|
||||
#define clGetGLContextInfoKHR clGetGLContextInfoKHR_proc
|
||||
static clGetGLContextInfoKHR_fn clGetGLContextInfoKHR;
|
||||
|
||||
#define MAX_DEVICES 32
|
||||
|
||||
class WGLEnvironment : public GLEnvironment
|
||||
{
|
||||
private:
|
||||
cl_device_id m_devices[MAX_DEVICES];
|
||||
int m_device_count;
|
||||
cl_platform_id m_platform;
|
||||
bool m_is_glut_init;
|
||||
|
||||
public:
|
||||
WGLEnvironment()
|
||||
{
|
||||
m_device_count = 0;
|
||||
m_platform = 0;
|
||||
m_is_glut_init = false;
|
||||
}
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 )
|
||||
{
|
||||
if (!m_is_glut_init)
|
||||
{
|
||||
// Create a GLUT window to render into
|
||||
glutInit( argc, argv );
|
||||
glutInitWindowSize( 512, 512 );
|
||||
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
|
||||
glutCreateWindow( "OpenCL <-> OpenGL Test" );
|
||||
glewInit();
|
||||
m_is_glut_init = true;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual cl_context CreateCLContext( void )
|
||||
{
|
||||
HGLRC hGLRC = wglGetCurrentContext();
|
||||
HDC hDC = wglGetCurrentDC();
|
||||
cl_context_properties properties[] = {
|
||||
CL_CONTEXT_PLATFORM, (cl_context_properties) m_platform,
|
||||
CL_GL_CONTEXT_KHR, (cl_context_properties) hGLRC,
|
||||
CL_WGL_HDC_KHR, (cl_context_properties) hDC,
|
||||
0
|
||||
};
|
||||
cl_device_id devices[MAX_DEVICES];
|
||||
size_t dev_size;
|
||||
cl_int status;
|
||||
|
||||
if (!hGLRC || !hDC) {
|
||||
print_error(CL_INVALID_CONTEXT, "No GL context bound");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!clGetGLContextInfoKHR) {
|
||||
// As OpenCL for the platforms. Warn if more than one platform found,
|
||||
// since this might not be the platform we want. By default, we simply
|
||||
// use the first returned platform.
|
||||
|
||||
cl_uint nplatforms;
|
||||
cl_platform_id platform;
|
||||
clGetPlatformIDs(0, NULL, &nplatforms);
|
||||
clGetPlatformIDs(1, &platform, NULL);
|
||||
|
||||
if (nplatforms > 1) {
|
||||
log_info("clGetPlatformIDs returned multiple values. This is not "
|
||||
"an error, but might result in obtaining incorrect function "
|
||||
"pointers if you do not want the first returned platform.\n");
|
||||
|
||||
// Show them the platform name, in case it is a problem.
|
||||
|
||||
size_t size;
|
||||
char *name;
|
||||
|
||||
clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
|
||||
name = (char*)malloc(size);
|
||||
clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
|
||||
|
||||
log_info("Using platform with name: %s \n", name);
|
||||
free(name);
|
||||
}
|
||||
|
||||
clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clGetGLContextInfoKHR");
|
||||
if (!clGetGLContextInfoKHR) {
|
||||
print_error(CL_INVALID_PLATFORM, "Failed to query proc address for clGetGLContextInfoKHR");
|
||||
}
|
||||
}
|
||||
|
||||
status = clGetGLContextInfoKHR(properties,
|
||||
CL_DEVICES_FOR_GL_CONTEXT_KHR,
|
||||
sizeof(devices),
|
||||
devices,
|
||||
&dev_size);
|
||||
if (status != CL_SUCCESS) {
|
||||
print_error(status, "clGetGLContextInfoKHR failed");
|
||||
return 0;
|
||||
}
|
||||
dev_size /= sizeof(cl_device_id);
|
||||
log_info("GL context supports %d compute devices\n", dev_size);
|
||||
|
||||
status = clGetGLContextInfoKHR(properties,
|
||||
CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
|
||||
sizeof(devices),
|
||||
devices,
|
||||
&dev_size);
|
||||
if (status != CL_SUCCESS) {
|
||||
print_error(status, "clGetGLContextInfoKHR failed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
cl_device_id ctxDevice = m_devices[0];
|
||||
if (dev_size > 0) {
|
||||
log_info("GL context current device: 0x%x\n", devices[0]);
|
||||
for (int i = 0; i < m_device_count; i++) {
|
||||
if (m_devices[i] == devices[0]) {
|
||||
ctxDevice = devices[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log_info("GL context current device is not a CL device, using device %d.\n", ctxDevice);
|
||||
}
|
||||
|
||||
return clCreateContext(properties, 1, &ctxDevice, NULL, NULL, &status);
|
||||
}
|
||||
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type )
|
||||
{
|
||||
cl_device_id devices[MAX_DEVICES];
|
||||
cl_uint num_of_devices;
|
||||
int error;
|
||||
error = clGetPlatformIDs(1, &m_platform, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetPlatformIDs failed");
|
||||
return -1;
|
||||
}
|
||||
error = clGetDeviceIDs(m_platform, device_type, MAX_DEVICES, devices, &num_of_devices);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceIDs failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check all devices, search for one that supports cl_khr_gl_sharing
|
||||
char extensions[8192];
|
||||
for (int i=0; i<(int)num_of_devices; i++) {
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceInfo failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (strstr(extensions, "cl_khr_gl_sharing") == NULL) {
|
||||
log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
|
||||
} else {
|
||||
log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
|
||||
m_devices[m_device_count++] = devices[i];
|
||||
}
|
||||
}
|
||||
return m_device_count > 0;
|
||||
}
|
||||
|
||||
virtual ~WGLEnvironment()
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
GLEnvironment * GLEnvironment::Instance( void )
|
||||
{
|
||||
static WGLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new WGLEnvironment();
|
||||
return env;
|
||||
}
|
||||
122
test_common/gl/setup_x11.cpp
Normal file
122
test_common/gl/setup_x11.cpp
Normal file
@@ -0,0 +1,122 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
|
||||
#include "setup.h"
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glut.h>
|
||||
#include <GL/glext.h>
|
||||
#include <GL/freeglut.h>
|
||||
#include <GL/glx.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
class X11GLEnvironment : public GLEnvironment
|
||||
{
|
||||
private:
|
||||
cl_device_id m_devices[64];
|
||||
cl_uint m_device_count;
|
||||
|
||||
public:
|
||||
X11GLEnvironment()
|
||||
{
|
||||
m_device_count = 0;
|
||||
}
|
||||
virtual int Init( int *argc, char **argv, int use_opencl_32 )
|
||||
{
|
||||
// Create a GLUT window to render into
|
||||
glutInit( argc, argv );
|
||||
glutInitWindowSize( 512, 512 );
|
||||
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
|
||||
glutCreateWindow( "OpenCL <-> OpenGL Test" );
|
||||
glewInit();
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual cl_context CreateCLContext( void )
|
||||
{
|
||||
GLXContext context = glXGetCurrentContext();
|
||||
Display *dpy = glXGetCurrentDisplay();
|
||||
|
||||
cl_context_properties properties[] = {
|
||||
CL_GL_CONTEXT_KHR, (cl_context_properties) context,
|
||||
CL_GLX_DISPLAY_KHR, (cl_context_properties) dpy,
|
||||
0
|
||||
};
|
||||
cl_int status;
|
||||
|
||||
if (!context || !dpy) {
|
||||
print_error(CL_INVALID_CONTEXT, "No GL context bound");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return clCreateContext(properties, 1, m_devices, NULL, NULL, &status);
|
||||
}
|
||||
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type )
|
||||
{
|
||||
int found_valid_device = 0;
|
||||
cl_platform_id platform;
|
||||
cl_device_id devices[64];
|
||||
cl_uint num_of_devices;
|
||||
int error;
|
||||
error = clGetPlatformIDs(1, &platform, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetPlatformIDs failed");
|
||||
return -1;
|
||||
}
|
||||
error = clGetDeviceIDs(platform, device_type, 64, devices, &num_of_devices);
|
||||
// If this platform doesn't have any of the requested device_type (namely GPUs) then return 0
|
||||
if (error == CL_DEVICE_NOT_FOUND)
|
||||
return 0;
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceIDs failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char extensions[8192];
|
||||
for (int i=0; i<(int)num_of_devices; i++) {
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceInfo failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (strstr(extensions, "cl_khr_gl_sharing ") == NULL) {
|
||||
log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
|
||||
} else {
|
||||
log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
|
||||
found_valid_device = 1;
|
||||
m_devices[m_device_count++] = devices[i];
|
||||
}
|
||||
}
|
||||
return found_valid_device;
|
||||
}
|
||||
|
||||
virtual ~X11GLEnvironment()
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
GLEnvironment * GLEnvironment::Instance( void )
|
||||
{
|
||||
static X11GLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new X11GLEnvironment();
|
||||
return env;
|
||||
}
|
||||
64
test_common/gles/gl_headers.h
Normal file
64
test_common/gles/gl_headers.h
Normal file
@@ -0,0 +1,64 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _gl_headers_h
|
||||
#define _gl_headers_h
|
||||
|
||||
#define GL_GLEXT_PROTOTYPES 1
|
||||
|
||||
#include <EGL/egl.h>
|
||||
|
||||
#ifdef GLES3
|
||||
#include <GLES3/gl3.h>
|
||||
#else
|
||||
#include <GLES2/gl2.h>
|
||||
#endif
|
||||
|
||||
#include <GLES2/gl2ext.h>
|
||||
#include <GLES2/gl2extQCOM.h>
|
||||
|
||||
// Some macros to minimize the changes in the tests from GL to GLES2
|
||||
#define glGenRenderbuffersEXT glGenRenderbuffers
|
||||
#define glDeleteRenderbuffersEXT glDeleteRenderbuffers
|
||||
#define glBindRenderbufferEXT glBindRenderbuffer
|
||||
#define glRenderbufferStorageEXT glRenderbufferStorage
|
||||
#define glGetRenderbufferParameterivEXT glGetRenderbufferParameteriv
|
||||
#define glCheckFramebufferStatusEXT glCheckFramebufferStatus
|
||||
#define glGenFramebuffersEXT glGenFramebuffers
|
||||
#define glDeleteFramebuffersEXT glDeleteFramebuffers
|
||||
#define glBindFramebufferEXT glBindFramebuffer
|
||||
#define glFramebufferRenderbufferEXT glFramebufferRenderbuffer
|
||||
#define glTexImage3D glTexImage3DOES
|
||||
#define glutGetProcAddress eglGetProcAddress
|
||||
|
||||
#define GL_FRAMEBUFFER_EXT GL_FRAMEBUFFER
|
||||
#define GL_FRAMEBUFFER_COMPLETE_EXT GL_FRAMEBUFFER_COMPLETE
|
||||
#define GL_RENDERBUFFER_INTERNAL_FORMAT_EXT GL_RENDERBUFFER_INTERNAL_FORMAT
|
||||
#define GL_RENDERBUFFER_EXT GL_RENDERBUFFER
|
||||
#define GL_COLOR_ATTACHMENT0_EXT GL_COLOR_ATTACHMENT0
|
||||
#define GL_DEPTH_ATTACHMENT_EXT GL_DEPTH_ATTACHMENT
|
||||
#define GL_TEXTURE_3D GL_TEXTURE_3D_OES
|
||||
#define GL_READ_ONLY GL_BUFFER_ACCESS_OES
|
||||
|
||||
#define GL_HALF_FLOAT_ARB GL_HALF_FLOAT_OES
|
||||
#define GL_BGRA GL_BGRA_EXT
|
||||
#define GL_RGBA32F_ARB GL_RGBA
|
||||
|
||||
typedef unsigned short GLhalf;
|
||||
|
||||
GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString);
|
||||
|
||||
#endif // __gl_headers_h
|
||||
|
||||
1282
test_common/gles/helpers.cpp
Normal file
1282
test_common/gles/helpers.cpp
Normal file
File diff suppressed because it is too large
Load Diff
247
test_common/gles/helpers.h
Normal file
247
test_common/gles/helpers.h
Normal file
@@ -0,0 +1,247 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _helpers_h
|
||||
#define _helpers_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#if !defined (__APPLE__)
|
||||
#include <CL/cl.h>
|
||||
#include "gl_headers.h"
|
||||
#include <CL/cl_gl.h>
|
||||
#else
|
||||
#include "gl_headers.h"
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/threadTesting.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLBuffer_fn)(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
GLuint bufobj,
|
||||
int * errcode_ret);
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLTexture_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLenum target ,
|
||||
GLint miplevel ,
|
||||
GLuint texture ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLRenderbuffer_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLuint renderbuffer ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clGetGLObjectInfo_fn)(cl_mem memobj ,
|
||||
cl_gl_object_type * gl_object_type ,
|
||||
GLuint * gl_object_name) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clGetGLTextureInfo_fn)(cl_mem memobj ,
|
||||
cl_gl_texture_info param_name ,
|
||||
size_t param_value_size ,
|
||||
void * param_value ,
|
||||
size_t * param_value_size_ret) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clEnqueueAcquireGLObjects_fn)(cl_command_queue command_queue ,
|
||||
cl_uint num_objects ,
|
||||
const cl_mem * mem_objects ,
|
||||
cl_uint num_events_in_wait_list ,
|
||||
const cl_event * event_wait_list ,
|
||||
cl_event * event) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clEnqueueReleaseGLObjects_fn)(cl_command_queue command_queue ,
|
||||
cl_uint num_objects ,
|
||||
const cl_mem * mem_objects ,
|
||||
cl_uint num_events_in_wait_list ,
|
||||
const cl_event * event_wait_list ,
|
||||
cl_event * event) ;
|
||||
|
||||
|
||||
extern clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
|
||||
extern clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
|
||||
extern clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
|
||||
extern clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
|
||||
extern clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
|
||||
extern clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
|
||||
extern clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
|
||||
|
||||
|
||||
class glBufferWrapper
|
||||
{
|
||||
public:
|
||||
glBufferWrapper() { mBuffer = 0; }
|
||||
glBufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glBufferWrapper() { if( mBuffer != 0 ) glDeleteBuffers( 1, &mBuffer ); }
|
||||
|
||||
glBufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glTextureWrapper
|
||||
{
|
||||
public:
|
||||
glTextureWrapper() { mBuffer = 0; }
|
||||
glTextureWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glTextureWrapper() { if( mBuffer != 0 ) glDeleteTextures( 1, &mBuffer ); }
|
||||
|
||||
glTextureWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glRenderbufferWrapper
|
||||
{
|
||||
public:
|
||||
glRenderbufferWrapper() { mBuffer = 0; }
|
||||
glRenderbufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glRenderbufferWrapper() { if( mBuffer != 0 ) glDeleteRenderbuffersEXT( 1, &mBuffer ); }
|
||||
|
||||
glRenderbufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glFramebufferWrapper
|
||||
{
|
||||
public:
|
||||
glFramebufferWrapper() { mBuffer = 0; }
|
||||
glFramebufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glFramebufferWrapper() { if( mBuffer != 0 ) glDeleteFramebuffersEXT( 1, &mBuffer ); }
|
||||
|
||||
glFramebufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
|
||||
// Helper functions (defined in helpers.cpp)
|
||||
extern void * CreateGLTexture2D( size_t width, size_t height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, bool allocateMem, MTdata d );
|
||||
|
||||
|
||||
extern void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, MTdata d, bool allocateMem = true );
|
||||
|
||||
extern void * ReadGLTexture( GLenum glTarget, GLuint glTexture,
|
||||
GLenum glFormat, GLenum glInternalFormat,
|
||||
GLenum glType, ExplicitType typeToReadAs,
|
||||
size_t outWidth, size_t outHeight );
|
||||
|
||||
void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
|
||||
GLenum attachment,
|
||||
GLenum rbFormat, GLenum rbType,
|
||||
GLenum texFormat, GLenum texType,
|
||||
ExplicitType type,
|
||||
GLuint *outFramebuffer,
|
||||
GLuint *outRenderbuffer,
|
||||
int *outError, MTdata d, bool allocateMem );
|
||||
|
||||
int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
|
||||
GLenum attachment,
|
||||
GLenum rbFormat, GLenum rbType,
|
||||
GLuint *outFramebuffer,
|
||||
GLuint *outRenderbuffer );
|
||||
|
||||
void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
|
||||
GLenum attachment,
|
||||
GLenum rbFormat, GLenum rbType,
|
||||
GLenum texFormat, GLenum texType,
|
||||
ExplicitType typeToReadAs,
|
||||
size_t outWidth, size_t outHeight );
|
||||
|
||||
extern void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer);
|
||||
extern const char *GetGLTypeName( GLenum type );
|
||||
extern const char *GetGLAttachmentName( GLenum att );
|
||||
extern const char *GetGLTargetName( GLenum tgt );
|
||||
extern const char *GetGLBaseFormatName( GLenum baseformat );
|
||||
extern const char *GetGLFormatName( GLenum format );
|
||||
|
||||
extern void* CreateRandomData( ExplicitType type, size_t count, MTdata d );
|
||||
|
||||
extern GLenum GetGLFormat(GLenum internalFormat);
|
||||
extern GLenum GetGLTypeForExplicitType(ExplicitType type);
|
||||
extern size_t GetGLTypeSize(GLenum type);
|
||||
extern ExplicitType GetExplicitTypeForGLType(GLenum type);
|
||||
|
||||
extern GLenum get_base_gl_target( GLenum target );
|
||||
|
||||
extern int init_clgl_ext( cl_platform_id platform_id );
|
||||
|
||||
#endif // _helpers_h
|
||||
|
||||
|
||||
|
||||
46
test_common/gles/setup.h
Normal file
46
test_common/gles/setup.h
Normal file
@@ -0,0 +1,46 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _setup_h
|
||||
#define _setup_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "gl_headers.h"
|
||||
#include <CL/cl.h>
|
||||
|
||||
|
||||
// Note: the idea here is to have every platform define their own setup.cpp file that implements a GLEnvironment
|
||||
// subclass internally, then return it as a definition for GLEnvironment::Create
|
||||
|
||||
class GLEnvironment
|
||||
{
|
||||
public:
|
||||
GLEnvironment() {}
|
||||
virtual ~GLEnvironment() {}
|
||||
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 ) = 0;
|
||||
virtual cl_context CreateCLContext( void ) = 0;
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type) = 0;
|
||||
|
||||
// cleanup EGL environment properly when the test exit.
|
||||
// This change does not affect any functionality of the test
|
||||
virtual void terminate_egl_display() = 0;
|
||||
|
||||
static GLEnvironment * Instance( void );
|
||||
};
|
||||
|
||||
#endif // _setup_h
|
||||
18
test_common/harness/Jamfile
Normal file
18
test_common/harness/Jamfile
Normal file
@@ -0,0 +1,18 @@
|
||||
project
|
||||
: requirements <include>.
|
||||
<toolset>gcc:<cflags>"-xc++"
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
<warnings-as-errors>off
|
||||
: usage-requirements <include>.
|
||||
;
|
||||
|
||||
local harness.objs ;
|
||||
for source in [ glob *.c *.cpp ]
|
||||
{
|
||||
harness.objs += [ obj $(source:B).obj : $(source) ] ;
|
||||
}
|
||||
|
||||
alias harness : $(harness.objs)
|
||||
: <use>/Runtime//OpenCL.lib :
|
||||
: <library>/Runtime//OpenCL.lib
|
||||
;
|
||||
41
test_common/harness/Makefile
Normal file
41
test_common/harness/Makefile
Normal file
@@ -0,0 +1,41 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = conversions.c \
|
||||
errorHelpers.c \
|
||||
genericThread.cpp \
|
||||
imageHelpers.cpp \
|
||||
kernelHelpers.c \
|
||||
mt19937.c \
|
||||
rounding_mode.c \
|
||||
testHarness.c \
|
||||
testHarness.cpp \
|
||||
ThreadPool.c \
|
||||
threadTesting.c \
|
||||
typeWrappers.cpp
|
||||
|
||||
DEFINES = DONT_TEST_GARBAGE_POINTERS
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
HEADERS =
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
all: $(OBJECTS)
|
||||
|
||||
clean:
|
||||
rm -f $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
931
test_common/harness/ThreadPool.c
Normal file
931
test_common/harness/ThreadPool.c
Normal file
@@ -0,0 +1,931 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "ThreadPool.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "fpcontrol.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 ) // or any other POSIX system
|
||||
|
||||
#if defined( _WIN32 )
|
||||
#include <windows.h>
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#include "mingw_compat.h"
|
||||
#include <process.h>
|
||||
#else // !_WIN32
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/errno.h>
|
||||
#ifdef __linux__
|
||||
#include <sched.h>
|
||||
#endif
|
||||
#endif // !_WIN32
|
||||
|
||||
// declarations
|
||||
#ifdef _WIN32
|
||||
void ThreadPool_WorkerFunc( void *p );
|
||||
#else
|
||||
void *ThreadPool_WorkerFunc( void *p );
|
||||
#endif
|
||||
void ThreadPool_Init(void);
|
||||
void ThreadPool_Exit(void);
|
||||
|
||||
#if defined (__MINGW32__)
|
||||
// Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
|
||||
CRITICAL_SECTION gAtomicLock;
|
||||
#elif defined( __GNUC__ ) || defined( _MSC_VER)
|
||||
#else
|
||||
pthread_mutex_t gAtomicLock;
|
||||
#endif
|
||||
|
||||
// Atomic add operator with mem barrier. Mem barrier needed to protect state modified by the worker functions.
|
||||
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
|
||||
{
|
||||
#if defined (__MINGW32__)
|
||||
// No atomics on Mingw32
|
||||
EnterCriticalSection(&gAtomicLock);
|
||||
cl_int old = *a;
|
||||
*a = old + b;
|
||||
LeaveCriticalSection(&gAtomicLock);
|
||||
return old;
|
||||
#elif defined( __GNUC__ )
|
||||
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
||||
return __sync_fetch_and_add( a, b );
|
||||
// do we need __sync_synchronize() here, too? GCC docs are unclear whether __sync_fetch_and_add does a synchronize
|
||||
#elif defined( _MSC_VER )
|
||||
return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
|
||||
#else
|
||||
#warning Please add a atomic add implementation here, with memory barrier. Fallback code is slow.
|
||||
if( pthread_mutex_lock(&gAtomicLock) )
|
||||
log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
|
||||
cl_int old = *a;
|
||||
*a = old + b;
|
||||
if( pthread_mutex_unlock(&gAtomicLock) )
|
||||
log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
|
||||
return old;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined( _WIN32 )
|
||||
// Uncomment the following line if Windows XP support is not required.
|
||||
// #define HAS_INIT_ONCE_EXECUTE_ONCE 1
|
||||
|
||||
#if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
|
||||
#define _INIT_ONCE INIT_ONCE
|
||||
#define _PINIT_ONCE PINIT_ONCE
|
||||
#define _InitOnceExecuteOnce InitOnceExecuteOnce
|
||||
#else // !HAS_INIT_ONCE_EXECUTE_ONCE
|
||||
|
||||
typedef volatile LONG _INIT_ONCE;
|
||||
typedef _INIT_ONCE *_PINIT_ONCE;
|
||||
typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
|
||||
|
||||
#define _INIT_ONCE_UNINITIALIZED 0
|
||||
#define _INIT_ONCE_IN_PROGRESS 1
|
||||
#define _INIT_ONCE_DONE 2
|
||||
|
||||
static BOOL _InitOnceExecuteOnce(
|
||||
_PINIT_ONCE InitOnce,
|
||||
_PINIT_ONCE_FN InitFn,
|
||||
PVOID Parameter,
|
||||
LPVOID *Context
|
||||
)
|
||||
{
|
||||
while ( *InitOnce != _INIT_ONCE_DONE )
|
||||
{
|
||||
if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
|
||||
{
|
||||
InitFn( InitOnce, Parameter, Context );
|
||||
*InitOnce = _INIT_ONCE_DONE;
|
||||
return TRUE;
|
||||
}
|
||||
Sleep( 1 );
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
#endif // !HAS_INIT_ONCE_EXECUTE_ONCE
|
||||
|
||||
// Uncomment the following line if Windows XP support is not required.
|
||||
// #define HAS_CONDITION_VARIABLE 1
|
||||
|
||||
#if defined(HAS_CONDITION_VARIABLE)
|
||||
#define _CONDITION_VARIABLE CONDITION_VARIABLE
|
||||
#define _InitializeConditionVariable InitializeConditionVariable
|
||||
#define _SleepConditionVariableCS SleepConditionVariableCS
|
||||
#define _WakeAllConditionVariable WakeAllConditionVariable
|
||||
#else // !HAS_CONDITION_VARIABLE
|
||||
typedef struct
|
||||
{
|
||||
HANDLE mEvent; // Used to park the thread.
|
||||
CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
|
||||
volatile cl_int mWaiters; // Number of threads waiting on this cond var.
|
||||
volatile cl_int mGeneration; // Wait generation count.
|
||||
volatile cl_int mReleaseCount; // Number of releases to execute before reseting the event.
|
||||
} _CONDITION_VARIABLE;
|
||||
|
||||
typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
|
||||
|
||||
static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
|
||||
{
|
||||
cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
|
||||
InitializeCriticalSection( cond_var->mLock );
|
||||
cond_var->mWaiters = 0;
|
||||
cond_var->mGeneration = 0;
|
||||
#if !defined ( NDEBUG )
|
||||
cond_var->mReleaseCount = 0;
|
||||
#endif // !NDEBUG
|
||||
}
|
||||
|
||||
static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
|
||||
{
|
||||
EnterCriticalSection( cond_var->mLock );
|
||||
cl_int generation = cond_var->mGeneration;
|
||||
++cond_var->mWaiters;
|
||||
LeaveCriticalSection( cond_var->mLock );
|
||||
LeaveCriticalSection( cond_lock );
|
||||
|
||||
while ( TRUE )
|
||||
{
|
||||
WaitForSingleObject( cond_var->mEvent, INFINITE );
|
||||
EnterCriticalSection( cond_var->mLock );
|
||||
BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
|
||||
LeaveCriticalSection( cond_var->mLock );
|
||||
if ( done )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
EnterCriticalSection( cond_lock );
|
||||
EnterCriticalSection( cond_var->mLock );
|
||||
if ( --cond_var->mReleaseCount == 0 )
|
||||
{
|
||||
ResetEvent( cond_var->mEvent );
|
||||
}
|
||||
--cond_var->mWaiters;
|
||||
LeaveCriticalSection( cond_var->mLock );
|
||||
}
|
||||
|
||||
static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
|
||||
{
|
||||
EnterCriticalSection( cond_var->mLock );
|
||||
if (cond_var->mWaiters > 0 )
|
||||
{
|
||||
++cond_var->mGeneration;
|
||||
cond_var->mReleaseCount = cond_var->mWaiters;
|
||||
SetEvent( cond_var->mEvent );
|
||||
}
|
||||
LeaveCriticalSection( cond_var->mLock );
|
||||
}
|
||||
#endif // !HAS_CONDITION_VARIABLE
|
||||
#endif // _WIN32
|
||||
|
||||
#define MAX_COUNT (1<<29)
|
||||
|
||||
// Global state to coordinate whether the threads have been launched successfully or not
|
||||
#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
|
||||
static _INIT_ONCE threadpool_init_control;
|
||||
#elif defined (_WIN32) // MingW of XP
|
||||
static int threadpool_init_control;
|
||||
#else // Posix platforms
|
||||
pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
|
||||
#endif
|
||||
cl_int threadPoolInitErr = -1; // set to CL_SUCCESS on successful thread launch
|
||||
|
||||
// critical region lock around ThreadPool_Do. We can only run one ThreadPool_Do at a time,
|
||||
// because we are too lazy to set up a queue here, and don't expect to need one.
|
||||
#if defined( _WIN32 )
|
||||
CRITICAL_SECTION gThreadPoolLock[1];
|
||||
#else // !_WIN32
|
||||
pthread_mutex_t gThreadPoolLock;
|
||||
#endif // !_WIN32
|
||||
|
||||
// Condition variable to park ThreadPool threads when not working
|
||||
#if defined( _WIN32 )
|
||||
CRITICAL_SECTION cond_lock[1];
|
||||
_CONDITION_VARIABLE cond_var[1];
|
||||
#else // !_WIN32
|
||||
pthread_mutex_t cond_lock;
|
||||
pthread_cond_t cond_var;
|
||||
#endif // !_WIN32
|
||||
volatile cl_int gRunCount = 0; // Condition variable state. How many iterations on the function left to run.
|
||||
// set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
|
||||
|
||||
// State that only changes when the threadpool is not working.
|
||||
volatile TPFuncPtr gFunc_ptr = NULL;
|
||||
volatile void *gUserInfo = NULL;
|
||||
volatile cl_int gJobCount = 0;
|
||||
|
||||
// State that may change while the thread pool is working
|
||||
volatile cl_int jobError = CL_SUCCESS; // err code return for the job as a whole
|
||||
|
||||
// Condition variable to park caller while waiting
|
||||
#if defined( _WIN32 )
|
||||
HANDLE caller_event;
|
||||
#else // !_WIN32
|
||||
pthread_mutex_t caller_cond_lock;
|
||||
pthread_cond_t caller_cond_var;
|
||||
#endif // !_WIN32
|
||||
volatile cl_int gRunning = 0; // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
|
||||
|
||||
// The total number of threads launched.
|
||||
volatile cl_int gThreadCount = 0;
|
||||
#ifdef _WIN32
|
||||
void ThreadPool_WorkerFunc( void *p )
|
||||
#else
|
||||
void *ThreadPool_WorkerFunc( void *p )
|
||||
#endif
|
||||
{
|
||||
cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
|
||||
cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
|
||||
// log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
|
||||
|
||||
while( MAX_COUNT > item )
|
||||
{
|
||||
cl_int err;
|
||||
|
||||
// check for more work to do
|
||||
if( 0 >= item )
|
||||
{
|
||||
// log_info( "Thread %d has run out of work.\n", threadID );
|
||||
|
||||
// No work to do. Attempt to block waiting for work
|
||||
#if defined( _WIN32 )
|
||||
EnterCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_mutex_lock( &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
|
||||
// log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
|
||||
if( 1 == remaining )
|
||||
{ // last thread out signal the main thread to wake up
|
||||
#if defined( _WIN32 )
|
||||
SetEvent( caller_event );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_mutex_lock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
if( (err = pthread_cond_broadcast( &caller_cond_var )))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
// loop in case we are woken only to discover that some other thread already did all the work
|
||||
while( 0 >= item )
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
_SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
|
||||
pthread_mutex_unlock( &cond_lock);
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// try again to get a valid item id
|
||||
item = ThreadPool_AtomicAdd( &gRunCount, -1 );
|
||||
if( MAX_COUNT <= item ) // exit if we are done
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
LeaveCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
pthread_mutex_unlock( &cond_lock);
|
||||
#endif // !_WIN32
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
ThreadPool_AtomicAdd( &gRunning, 1 );
|
||||
// log_info( "Thread %d has found work.\n", threadID);
|
||||
|
||||
#if defined( _WIN32 )
|
||||
LeaveCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_mutex_unlock( &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
}
|
||||
|
||||
// we have a valid item, so do the work
|
||||
if( CL_SUCCESS == jobError ) // but only if we haven't already encountered an error
|
||||
{
|
||||
// log_info( "Thread %d doing job %d\n", threadID, item - 1);
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// On most platforms which support denorm, default is FTZ off. However,
|
||||
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
|
||||
// This creates issues in result verification. Since spec allows the implementation to either flush or
|
||||
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
|
||||
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
|
||||
// where reference is being computed to make sure we get non-flushed reference result. If implementation
|
||||
// returns flushed result, we correctly take care of that in verification code.
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ( &oldMode );
|
||||
#endif
|
||||
|
||||
// Call the user's function with this item ID
|
||||
err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore FP state
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
|
||||
if( err )
|
||||
{
|
||||
#if (__MINGW32__)
|
||||
EnterCriticalSection(&gAtomicLock);
|
||||
if( jobError == CL_SUCCESS );
|
||||
jobError = err;
|
||||
gRunCount = 0;
|
||||
LeaveCriticalSection(&gAtomicLock);
|
||||
#elif defined( __GNUC__ )
|
||||
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
||||
// set the new error if we are the first one there.
|
||||
__sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
|
||||
|
||||
// drop run count to 0
|
||||
gRunCount = 0;
|
||||
__sync_synchronize();
|
||||
#elif defined( _MSC_VER )
|
||||
// set the new error if we are the first one there.
|
||||
_InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
|
||||
|
||||
// drop run count to 0
|
||||
gRunCount = 0;
|
||||
_mm_mfence();
|
||||
#else
|
||||
if( pthread_mutex_lock(&gAtomicLock) )
|
||||
log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
|
||||
if( jobError == CL_SUCCESS );
|
||||
jobError = err;
|
||||
gRunCount = 0;
|
||||
if( pthread_mutex_unlock(&gAtomicLock) )
|
||||
log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// get the next item
|
||||
item = ThreadPool_AtomicAdd( &gRunCount, -1 );
|
||||
}
|
||||
|
||||
exit:
|
||||
log_info( "ThreadPool: thread %d exiting.\n", threadID );
|
||||
ThreadPool_AtomicAdd( &gThreadCount, -1 );
|
||||
#if !defined(_WIN32)
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
// SetThreadCount() may be used to artifically set the number of worker threads
|
||||
// If the value is 0 (the default) the number of threads will be determined based on
|
||||
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
|
||||
// that we still get some testing for thread safety.
|
||||
//
|
||||
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
|
||||
// code will run single threaded, but will report an error to indicate that the test
|
||||
// is invalid. This option is intended for debugging purposes only. It is suggested
|
||||
// as a convention that test apps set the thread count to 1 in response to the -m flag.
|
||||
//
|
||||
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
|
||||
// otherwise the behavior is indefined.
|
||||
void SetThreadCount( int count )
|
||||
{
|
||||
if( threadPoolInitErr == CL_SUCCESS )
|
||||
{
|
||||
log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
|
||||
abort();
|
||||
}
|
||||
|
||||
gThreadCount = count;
|
||||
}
|
||||
|
||||
void ThreadPool_Init(void)
|
||||
{
|
||||
cl_int i;
|
||||
int err;
|
||||
volatile cl_uint threadID = 0;
|
||||
|
||||
// Check for manual override of multithreading code. We add this for better debuggability.
|
||||
if( getenv( "CL_TEST_SINGLE_THREADED" ) )
|
||||
{
|
||||
gThreadCount = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
// Figure out how many threads to run -- check first for non-zero to give the implementation the chance
|
||||
if( 0 == gThreadCount )
|
||||
{
|
||||
#if defined(_MSC_VER) || defined (__MINGW64__)
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
|
||||
DWORD length = 0;
|
||||
|
||||
GetLogicalProcessorInformation( NULL, &length );
|
||||
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
|
||||
if( buffer != NULL )
|
||||
{
|
||||
if ( GetLogicalProcessorInformation( buffer, &length ) == TRUE )
|
||||
{
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
|
||||
while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
|
||||
{
|
||||
if( ptr->Relationship == RelationProcessorCore )
|
||||
{
|
||||
// Count the number of bits in ProcessorMask (number of logical cores)
|
||||
ULONG mask = ptr->ProcessorMask;
|
||||
while( mask )
|
||||
{
|
||||
++gThreadCount;
|
||||
mask &= mask - 1; // Remove 1 bit at a time
|
||||
}
|
||||
}
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
free(buffer);
|
||||
}
|
||||
#elif defined (__MINGW32__)
|
||||
{
|
||||
#warning How about this, instead of hard coding it to 2?
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetSystemInfo( &sysinfo );
|
||||
gThreadCount = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
#elif defined (__linux__) && !defined(__ANDROID__)
|
||||
cpu_set_t affinity;
|
||||
if ( 0 == sched_getaffinity(0, sizeof(cpu_set_t), &affinity) )
|
||||
{
|
||||
#if !(defined(CPU_COUNT))
|
||||
gThreadCount = 1;
|
||||
#else
|
||||
gThreadCount = CPU_COUNT(&affinity);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
|
||||
}
|
||||
#else // !_WIN32
|
||||
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
|
||||
#endif // !_WIN32
|
||||
|
||||
// Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
|
||||
if( 1 == gThreadCount )
|
||||
gThreadCount = 2;
|
||||
}
|
||||
|
||||
// When working in 32 bit limit the thread number to 12
|
||||
// This fix was made due to memory issues in integer_ops test
|
||||
// When running integer_ops, the test opens as many threads as the
|
||||
// machine has and each thread allocates a fixed amount of memory
|
||||
// When running this test on dual socket machine in 32-bit, the
|
||||
// process memory is not sufficient and the test fails
|
||||
#if defined(_WIN32) && !defined(_M_X64)
|
||||
if (gThreadCount > 12) {
|
||||
gThreadCount = 12;
|
||||
}
|
||||
#endif
|
||||
|
||||
//Allow the app to set thread count to <0 for debugging purposes. This will cause the test to run single threaded.
|
||||
if( gThreadCount < 2 )
|
||||
{
|
||||
log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
|
||||
gThreadCount = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined( _WIN32 )
|
||||
InitializeCriticalSection( gThreadPoolLock );
|
||||
InitializeCriticalSection( cond_lock );
|
||||
_InitializeConditionVariable( cond_var );
|
||||
caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
|
||||
#elif defined (__GNUC__)
|
||||
// Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
|
||||
// with some flavors of gcc compilers.
|
||||
pthread_cond_init(&cond_var, NULL);
|
||||
pthread_mutex_init(&cond_lock ,NULL);
|
||||
pthread_cond_init(&caller_cond_var, NULL);
|
||||
pthread_mutex_init(&caller_cond_lock, NULL);
|
||||
pthread_mutex_init(&gThreadPoolLock, NULL);
|
||||
#endif
|
||||
|
||||
#if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
|
||||
pthread_mutex_initialize(gAtomicLock);
|
||||
#elif defined (__MINGW32__)
|
||||
InitializeCriticalSection(&gAtomicLock);
|
||||
#endif
|
||||
// Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
|
||||
// That would cause a deadlock.
|
||||
#if !defined( _WIN32 )
|
||||
if((err = pthread_mutex_lock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
|
||||
gThreadCount = 1;
|
||||
return;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
gRunning = gThreadCount;
|
||||
// init threads
|
||||
for( i = 0; i < gThreadCount; i++ )
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
|
||||
err = ( handle == 0 );
|
||||
#else // !_WIN32
|
||||
pthread_t tid = 0;
|
||||
err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
|
||||
#endif // !_WIN32
|
||||
if( err )
|
||||
{
|
||||
log_error( "Error %d launching thread %d\n", err, i );
|
||||
threadPoolInitErr = err;
|
||||
gThreadCount = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
atexit( ThreadPool_Exit );
|
||||
|
||||
// block until they are done launching.
|
||||
do
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
WaitForSingleObject( caller_event, INFINITE );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
|
||||
pthread_mutex_unlock( &caller_cond_lock);
|
||||
return;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
}
|
||||
while( gRunCount != -gThreadCount );
|
||||
#if !defined( _WIN32 )
|
||||
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
|
||||
return;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
threadPoolInitErr = CL_SUCCESS;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
|
||||
{
|
||||
ThreadPool_Init();
|
||||
return TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
void ThreadPool_Exit(void)
|
||||
{
|
||||
int err, count;
|
||||
gRunCount = CL_INT_MAX;
|
||||
|
||||
#if defined( __GNUC__ )
|
||||
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
||||
__sync_synchronize();
|
||||
#elif defined( _MSC_VER )
|
||||
_mm_mfence();
|
||||
#else
|
||||
#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
|
||||
#endif
|
||||
|
||||
// spin waiting for threads to die
|
||||
for (count = 0; 0 != gThreadCount && count < 1000; count++)
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
_WakeAllConditionVariable( cond_var );
|
||||
Sleep(1);
|
||||
#else // !_WIN32
|
||||
if( (err = pthread_cond_broadcast( &cond_var )))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
|
||||
break;
|
||||
}
|
||||
usleep(1000);
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
if( gThreadCount )
|
||||
log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
|
||||
else
|
||||
log_info( "Thread pool exited in a orderly fashion.\n" );
|
||||
}
|
||||
|
||||
|
||||
// Blocking API that farms out count jobs to a thread pool.
|
||||
// It may return with some work undone if func_ptr() returns a non-zero
|
||||
// result.
|
||||
//
|
||||
// This function obviously has its shortcommings. Only one call to ThreadPool_Do
|
||||
// can be running at a time. It is not intended for general purpose use.
|
||||
// If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
|
||||
// all available then it would make more sense to use those features.
|
||||
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
|
||||
cl_uint count,
|
||||
void *userInfo )
|
||||
{
|
||||
cl_int newErr;
|
||||
cl_int err = 0;
|
||||
// Lazily set up our threads
|
||||
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
|
||||
err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
|
||||
#elif defined (_WIN32)
|
||||
if (threadpool_init_control == 0) {
|
||||
#warning This is buggy and race prone. Find a better way.
|
||||
ThreadPool_Init();
|
||||
threadpool_init_control = 1;
|
||||
}
|
||||
#else //posix platform
|
||||
err = pthread_once( &threadpool_init_control, ThreadPool_Init );
|
||||
if( err )
|
||||
{
|
||||
log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
// Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
|
||||
if( threadPoolInitErr )
|
||||
{
|
||||
cl_uint currentJob = 0;
|
||||
cl_int result = CL_SUCCESS;
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// On most platforms which support denorm, default is FTZ off. However,
|
||||
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
|
||||
// This creates issues in result verification. Since spec allows the implementation to either flush or
|
||||
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
|
||||
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
|
||||
// where reference is being computed to make sure we get non-flushed reference result. If implementation
|
||||
// returns flushed result, we correctly take care of that in verification code.
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ( &oldMode );
|
||||
#endif
|
||||
for( currentJob = 0; currentJob < count; currentJob++ )
|
||||
if((result = func_ptr( currentJob, 0, userInfo )))
|
||||
{
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore FP state before leaving
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore FP state before leaving
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
if( count >= MAX_COUNT )
|
||||
{
|
||||
log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Enter critical region
|
||||
#if defined( _WIN32 )
|
||||
EnterCriticalSection( gThreadPoolLock );
|
||||
#else // !_WIN32
|
||||
if( (err = pthread_mutex_lock( &gThreadPoolLock )))
|
||||
{
|
||||
switch (err)
|
||||
{
|
||||
case EDEADLK:
|
||||
log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
|
||||
break;
|
||||
case EINVAL:
|
||||
log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// Start modifying the job state observable by worker threads
|
||||
#if defined( _WIN32 )
|
||||
EnterCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_mutex_lock( &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
|
||||
// That would cause a deadlock.
|
||||
#if !defined( _WIN32 )
|
||||
if((err = pthread_mutex_lock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// Prime the worker threads to get going
|
||||
jobError = CL_SUCCESS;
|
||||
gRunCount = gJobCount = count;
|
||||
gFunc_ptr = func_ptr;
|
||||
gUserInfo = userInfo;
|
||||
|
||||
#if defined( _WIN32 )
|
||||
ResetEvent(caller_event);
|
||||
_WakeAllConditionVariable( cond_var );
|
||||
LeaveCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
if( (err = pthread_cond_broadcast( &cond_var )))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
if((err = pthread_mutex_unlock( &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// block until they are done. It would be slightly more efficient to do some of the work here though.
|
||||
do
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
WaitForSingleObject( caller_event, INFINITE );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
|
||||
pthread_mutex_unlock( &caller_cond_lock);
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
}
|
||||
while( gRunning );
|
||||
#if !defined(_WIN32)
|
||||
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
err = jobError;
|
||||
|
||||
exit:
|
||||
// exit critical region
|
||||
#if defined( _WIN32 )
|
||||
LeaveCriticalSection( gThreadPoolLock );
|
||||
#else // !_WIN32
|
||||
newErr = pthread_mutex_unlock( &gThreadPoolLock );
|
||||
if( newErr)
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
|
||||
return err;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
cl_uint GetThreadCount( void )
|
||||
{
|
||||
// Lazily set up our threads
|
||||
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
|
||||
cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
|
||||
#elif defined (_WIN32)
|
||||
if (threadpool_init_control == 0) {
|
||||
#warning This is buggy and race prone. Find a better way.
|
||||
ThreadPool_Init();
|
||||
threadpool_init_control = 1;
|
||||
}
|
||||
#else
|
||||
cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
|
||||
if( err )
|
||||
{
|
||||
log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
|
||||
return err;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
if( gThreadCount < 1 )
|
||||
return 1;
|
||||
|
||||
return gThreadCount;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
|
||||
#error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
|
||||
#endif
|
||||
//
|
||||
// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
|
||||
// of OpenCL API, while also checking
|
||||
//
|
||||
// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
|
||||
// It is not okay to use this for conformance testing!!!
|
||||
//
|
||||
// Exception: If your operating system does not support multithreaded execution of any kind, then you may use this code.
|
||||
//
|
||||
|
||||
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
|
||||
{
|
||||
cl_uint r = *a;
|
||||
|
||||
// since this fallback code path is not multithreaded, we just do a regular add here
|
||||
// If your operating system supports memory-barrier-atomics, use those here
|
||||
*a = r + b;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
// Blocking API that farms out count jobs to a thread pool.
|
||||
// It may return with some work undone if func_ptr() returns a non-zero
|
||||
// result.
|
||||
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
|
||||
cl_uint count,
|
||||
void *userInfo )
|
||||
{
|
||||
cl_uint currentJob = 0;
|
||||
cl_int result = CL_SUCCESS;
|
||||
|
||||
#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
|
||||
// THIS FUNCTION IS NOT INTENDED FOR USE!!
|
||||
log_error( "ERROR: Test must be multithreaded!\n" );
|
||||
exit(-1);
|
||||
#else
|
||||
static int spewCount = 0;
|
||||
|
||||
if( 0 == spewCount )
|
||||
{
|
||||
log_info( "\nWARNING: The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
|
||||
spewCount = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
// The multithreaded code should mimic this behavior:
|
||||
for( currentJob = 0; currentJob < count; currentJob++ )
|
||||
if((result = func_ptr( currentJob, 0, userInfo )))
|
||||
return result;
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_uint GetThreadCount( void )
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
void SetThreadCount( int count )
|
||||
{
|
||||
if( count > 1 )
|
||||
log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
|
||||
}
|
||||
|
||||
#endif
|
||||
76
test_common/harness/ThreadPool.h
Normal file
76
test_common/harness/ThreadPool.h
Normal file
@@ -0,0 +1,76 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef THREAD_POOL_H
|
||||
#define THREAD_POOL_H
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//
|
||||
// An atomic add operator
|
||||
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b ); // returns old value
|
||||
|
||||
// Your function prototype
|
||||
//
|
||||
// A function pointer to the function you want to execute in a multithreaded context. No
|
||||
// synchronization primitives are provided, other than the atomic add above. You may not
|
||||
// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
|
||||
// work, however.
|
||||
//
|
||||
// job ids and thread ids are 0 based. If number of jobs or threads was 8, they will numbered be 0 through 7.
|
||||
// Note that while every job will be run, it is not guaranteed that every thread will wake up before
|
||||
// the work is done.
|
||||
typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
|
||||
|
||||
// returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
|
||||
// Some workitems may not run if a non-zero result is returned from func_ptr().
|
||||
// This function may not be called from a TPFuncPtr.
|
||||
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
|
||||
cl_uint count,
|
||||
void *userInfo );
|
||||
|
||||
// Returns the number of worker threads that underlie the threadpool. The value passed
|
||||
// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
|
||||
// This is safe to call from a TPFuncPtr.
|
||||
cl_uint GetThreadCount( void );
|
||||
|
||||
// SetThreadCount() may be used to artifically set the number of worker threads
|
||||
// If the value is 0 (the default) the number of threads will be determined based on
|
||||
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
|
||||
// that we still get some testing for thread safety.
|
||||
//
|
||||
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
|
||||
// code will run single threaded, but will report an error to indicate that the test
|
||||
// is invalid. This option is intended for debugging purposes only. It is suggested
|
||||
// as a convention that test apps set the thread count to 1 in response to the -m flag.
|
||||
//
|
||||
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
|
||||
// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
|
||||
void SetThreadCount( int count );
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* THREAD_POOL_H */
|
||||
290
test_common/harness/clImageHelper.h
Normal file
290
test_common/harness/clImageHelper.h
Normal file
@@ -0,0 +1,290 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef test_conformance_clImageHelper_h
|
||||
#define test_conformance_clImageHelper_h
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include "errorHelpers.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
// helper function to replace clCreateImage2D , to make the existing code use
|
||||
// the functions of version 1.2 and veriosn 1.1 respectively
|
||||
|
||||
static inline cl_mem create_image_2d (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
size_t image_row_pitch,
|
||||
void *host_ptr,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage = NULL;
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
cl_image_desc image_desc_dest;
|
||||
image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
|
||||
image_desc_dest.image_width = image_width;
|
||||
image_desc_dest.image_height = image_height;
|
||||
image_desc_dest.image_depth= 0;// not usedfor 2d
|
||||
image_desc_dest.image_array_size = 0;// not used for 2d
|
||||
image_desc_dest.image_row_pitch = image_row_pitch;
|
||||
image_desc_dest.image_slice_pitch = 0;
|
||||
image_desc_dest.num_mip_levels = 0;
|
||||
image_desc_dest.num_samples = 0;
|
||||
image_desc_dest.mem_object = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
|
||||
mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
#else
|
||||
mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
#endif
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
// helper function to replace clCreateImage2D , to make the existing code use
|
||||
// the functions of version 1.2 and veriosn 1.1 respectively
|
||||
|
||||
static inline cl_mem create_image_2d_buffer (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
size_t image_row_pitch,
|
||||
cl_mem buffer,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage = NULL;
|
||||
|
||||
cl_image_desc image_desc_dest;
|
||||
image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
|
||||
image_desc_dest.image_width = image_width;
|
||||
image_desc_dest.image_height = image_height;
|
||||
image_desc_dest.image_depth= 0;// not usedfor 2d
|
||||
image_desc_dest.image_array_size = 0;// not used for 2d
|
||||
image_desc_dest.image_row_pitch = image_row_pitch;
|
||||
image_desc_dest.image_slice_pitch = 0;
|
||||
image_desc_dest.num_mip_levels = 0;
|
||||
image_desc_dest.num_samples = 0;
|
||||
image_desc_dest.mem_object = buffer;
|
||||
mImage = clCreateImage( context, flags, image_format, &image_desc_dest, NULL, errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline cl_mem create_image_3d (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
size_t image_depth,
|
||||
size_t image_row_pitch,
|
||||
size_t image_slice_pitch,
|
||||
void *host_ptr,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage;
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
|
||||
image_desc.image_width = image_width;
|
||||
image_desc.image_height = image_height;
|
||||
image_desc.image_depth = image_depth;
|
||||
image_desc.image_array_size = 0;// not used for one image
|
||||
image_desc.image_row_pitch = image_row_pitch;
|
||||
image_desc.image_slice_pitch = image_slice_pitch;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.mem_object = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
|
||||
mImage = clCreateImage( context,
|
||||
flags,
|
||||
image_format,
|
||||
&image_desc,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
#else
|
||||
mImage = clCreateImage3D( context,
|
||||
flags, image_format,
|
||||
image_width,
|
||||
image_height,
|
||||
image_depth,
|
||||
image_row_pitch,
|
||||
image_slice_pitch,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
#endif
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
static inline cl_mem create_image_2d_array (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
size_t image_array_size,
|
||||
size_t image_row_pitch,
|
||||
size_t image_slice_pitch,
|
||||
void *host_ptr,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage;
|
||||
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
|
||||
image_desc.image_width = image_width;
|
||||
image_desc.image_height = image_height;
|
||||
image_desc.image_depth = 1;
|
||||
image_desc.image_array_size = image_array_size;
|
||||
image_desc.image_row_pitch = image_row_pitch;
|
||||
image_desc.image_slice_pitch = image_slice_pitch;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.mem_object = NULL;
|
||||
mImage = clCreateImage( context,
|
||||
flags,
|
||||
image_format,
|
||||
&image_desc,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
static inline cl_mem create_image_1d_array (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_array_size,
|
||||
size_t image_row_pitch,
|
||||
size_t image_slice_pitch,
|
||||
void *host_ptr,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage;
|
||||
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
|
||||
image_desc.image_width = image_width;
|
||||
image_desc.image_height = 1;
|
||||
image_desc.image_depth = 1;
|
||||
image_desc.image_array_size = image_array_size;
|
||||
image_desc.image_row_pitch = image_row_pitch;
|
||||
image_desc.image_slice_pitch = image_slice_pitch;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.mem_object = NULL;
|
||||
mImage = clCreateImage( context,
|
||||
flags,
|
||||
image_format,
|
||||
&image_desc,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
static inline cl_mem create_image_1d (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_row_pitch,
|
||||
void *host_ptr,
|
||||
cl_mem buffer,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage;
|
||||
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
|
||||
image_desc.image_width = image_width;
|
||||
image_desc.image_height = 1;
|
||||
image_desc.image_depth = 1;
|
||||
image_desc.image_row_pitch = image_row_pitch;
|
||||
image_desc.image_slice_pitch = 0;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.mem_object = buffer;
|
||||
mImage = clCreateImage( context,
|
||||
flags,
|
||||
image_format,
|
||||
&image_desc,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
382
test_common/harness/compat.h
Normal file
382
test_common/harness/compat.h
Normal file
@@ -0,0 +1,382 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _COMPAT_H_
|
||||
#define _COMPAT_H_
|
||||
|
||||
#if defined(_WIN32) && defined (_MSC_VER)
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define EXTERN_C extern "C"
|
||||
#else
|
||||
#define EXTERN_C
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// stdlib.h
|
||||
//
|
||||
|
||||
#include <stdlib.h> // On Windows, _MAX_PATH defined there.
|
||||
|
||||
// llabs appeared in MS C v16 (VS 10/2010).
|
||||
#if defined( _MSC_VER ) && _MSC_VER <= 1500
|
||||
EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// stdbool.h
|
||||
//
|
||||
|
||||
// stdbool.h appeared in MS C v18 (VS 12/2013).
|
||||
#if defined( _MSC_VER ) && MSC_VER <= 1700
|
||||
#if !defined(__cplusplus)
|
||||
typedef char bool;
|
||||
#define true 1
|
||||
#define false 0
|
||||
#endif
|
||||
#else
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//
|
||||
// stdint.h
|
||||
//
|
||||
|
||||
// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
|
||||
#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
|
||||
typedef unsigned char uint8_t;
|
||||
typedef char int8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef short int16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef int int32_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
typedef long long int64_t;
|
||||
#else
|
||||
#ifndef __STDC_LIMIT_MACROS
|
||||
#define __STDC_LIMIT_MACROS
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//
|
||||
// float.h
|
||||
//
|
||||
|
||||
#include <float.h>
|
||||
|
||||
|
||||
|
||||
//
|
||||
// fenv.h
|
||||
//
|
||||
|
||||
// fenv.h appeared in MS C v18 (VS 12/2013).
|
||||
#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
|
||||
// reimplement fenv.h because windows doesn't have it
|
||||
#define FE_INEXACT 0x0020
|
||||
#define FE_UNDERFLOW 0x0010
|
||||
#define FE_OVERFLOW 0x0008
|
||||
#define FE_DIVBYZERO 0x0004
|
||||
#define FE_INVALID 0x0001
|
||||
#define FE_ALL_EXCEPT 0x003D
|
||||
int fetestexcept(int excepts);
|
||||
int feclearexcept(int excepts);
|
||||
#else
|
||||
#include <fenv.h>
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// math.h
|
||||
//
|
||||
|
||||
#if defined( __INTEL_COMPILER )
|
||||
#include <mathimf.h>
|
||||
#else
|
||||
#include <math.h>
|
||||
#endif
|
||||
|
||||
#if defined( _MSC_VER )
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846264338327950288
|
||||
#endif
|
||||
|
||||
#if ! defined( __INTEL_COMPILER )
|
||||
|
||||
#ifndef NAN
|
||||
#define NAN (INFINITY - INFINITY)
|
||||
#endif
|
||||
#ifndef HUGE_VALF
|
||||
#define HUGE_VALF (float)HUGE_VAL
|
||||
#endif
|
||||
#ifndef INFINITY
|
||||
#define INFINITY (FLT_MAX + FLT_MAX)
|
||||
#endif
|
||||
#ifndef isfinite
|
||||
#define isfinite(x) _finite(x)
|
||||
#endif
|
||||
#ifndef isnan
|
||||
#define isnan( x ) ((x) != (x))
|
||||
#endif
|
||||
#ifndef isinf
|
||||
#define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY)
|
||||
#endif
|
||||
|
||||
double rint( double x);
|
||||
float rintf( float x);
|
||||
long double rintl( long double x);
|
||||
|
||||
float cbrtf( float );
|
||||
double cbrt( double );
|
||||
|
||||
int ilogb( double x);
|
||||
int ilogbf (float x);
|
||||
int ilogbl(long double x);
|
||||
|
||||
double fmax(double x, double y);
|
||||
double fmin(double x, double y);
|
||||
float fmaxf( float x, float y );
|
||||
float fminf(float x, float y);
|
||||
|
||||
double log2(double x);
|
||||
long double log2l(long double x);
|
||||
|
||||
double exp2(double x);
|
||||
long double exp2l(long double x);
|
||||
|
||||
double fdim(double x, double y);
|
||||
float fdimf(float x, float y);
|
||||
long double fdiml(long double x, long double y);
|
||||
|
||||
double remquo( double x, double y, int *quo);
|
||||
float remquof( float x, float y, int *quo);
|
||||
long double remquol( long double x, long double y, int *quo);
|
||||
|
||||
long double scalblnl(long double x, long n);
|
||||
|
||||
float hypotf(float x, float y);
|
||||
long double hypotl(long double x, long double y) ;
|
||||
double lgamma(double x);
|
||||
float lgammaf(float x);
|
||||
|
||||
double trunc(double x);
|
||||
float truncf(float x);
|
||||
|
||||
double log1p(double x);
|
||||
float log1pf(float x);
|
||||
long double log1pl(long double x);
|
||||
|
||||
double copysign(double x, double y);
|
||||
float copysignf(float x, float y);
|
||||
long double copysignl(long double x, long double y);
|
||||
|
||||
long lround(double x);
|
||||
long lroundf(float x);
|
||||
//long lroundl(long double x)
|
||||
|
||||
double round(double x);
|
||||
float roundf(float x);
|
||||
long double roundl(long double x);
|
||||
|
||||
int cf_signbit(double x);
|
||||
int cf_signbitf(float x);
|
||||
|
||||
static int signbit(double x) { return cf_signbit(x); }
|
||||
static int signbitf(float x) { return cf_signbitf(x); }
|
||||
|
||||
long int lrint (double flt);
|
||||
long int lrintf (float flt);
|
||||
|
||||
float int2float (int32_t ix);
|
||||
int32_t float2int (float fx);
|
||||
|
||||
#endif
|
||||
|
||||
#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
|
||||
// These functions appeared in Intel C v13.
|
||||
float nanf( const char* str);
|
||||
double nan( const char* str);
|
||||
long double nanl( const char* str);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined( __ANDROID__ )
|
||||
#define log2(X) (log(X)/log(2))
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//
|
||||
// stdio.h
|
||||
//
|
||||
|
||||
#if defined( _MSC_VER )
|
||||
#define snprintf sprintf_s
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//
|
||||
// unistd.h
|
||||
//
|
||||
|
||||
#if defined( _MSC_VER )
|
||||
EXTERN_C unsigned int sleep( unsigned int sec );
|
||||
EXTERN_C int usleep( int usec );
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//
|
||||
// syscall.h
|
||||
//
|
||||
|
||||
#if defined( __ANDROID__ )
|
||||
// Android bionic's isn't providing SYS_sysctl wrappers.
|
||||
#define SYS__sysctl __NR__sysctl
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// Some tests use _malloca which defined in malloc.h.
|
||||
#if !defined (__APPLE__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// ???
|
||||
//
|
||||
|
||||
#if defined( _MSC_VER )
|
||||
|
||||
#define MAXPATHLEN _MAX_PATH
|
||||
|
||||
EXTERN_C uint64_t ReadTime( void );
|
||||
EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
|
||||
|
||||
/** Returns the number of leading 0-bits in x,
|
||||
starting at the most significant bit position.
|
||||
If x is 0, the result is undefined.
|
||||
*/
|
||||
EXTERN_C int __builtin_clz(unsigned int pattern);
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x,y) (((x)<(y))?(x):(y))
|
||||
#endif
|
||||
#ifndef MAX
|
||||
#define MAX(x,y) (((x)>(y))?(x):(y))
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
------------------------------------------------------------------------------------------------
|
||||
WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
|
||||
|
||||
This is a typical usage of the macros:
|
||||
|
||||
double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
|
||||
|
||||
(taken from math_brute_force/reference_math.c). There are two problems:
|
||||
|
||||
1. There is an error here. On Windows in will produce incorrect result
|
||||
`0x1.5555555555555p+50'. To have a correct result it should be written as
|
||||
`MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
|
||||
third argument is not obvious -- sometimes it should be the same as exponent of the
|
||||
first argument, but sometimes not.
|
||||
|
||||
2. Information is duplicated. It is easy to make a mistake.
|
||||
|
||||
Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
|
||||
------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
|
||||
|
||||
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
|
||||
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
|
||||
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
|
||||
|
||||
#else
|
||||
|
||||
// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
|
||||
#define MAKE_HEX_FLOAT(x,y,z) x
|
||||
#define MAKE_HEX_DOUBLE(x,y,z) x
|
||||
#define MAKE_HEX_LONG(x,y,z) x
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
------------------------------------------------------------------------------------------------
|
||||
HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
|
||||
double respectively. Arguments:
|
||||
|
||||
sm -- sign of number,
|
||||
int -- integer part of mantissa (without `0x' prefix),
|
||||
fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
|
||||
se -- sign of exponent,
|
||||
exp -- absolute value of (binary) exponent.
|
||||
|
||||
Example:
|
||||
|
||||
double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
|
||||
|
||||
Note:
|
||||
|
||||
We have to pass signs as separate arguments because gcc pass negative integer values
|
||||
(e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
|
||||
`0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
|
||||
literal.
|
||||
------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
|
||||
// If compiler does not support hex floating point literals:
|
||||
#define HEX_FLT( sm, int, fract, se, exp ) sm ldexpf( (float)( 0x ## int ## fract ## UL ), se exp + ilogbf( (float) 0x ## int ) - ilogbf( ( float )( 0x ## int ## fract ## UL ) ) )
|
||||
#define HEX_DBL( sm, int, fract, se, exp ) sm ldexp( (double)( 0x ## int ## fract ## ULL ), se exp + ilogb( (double) 0x ## int ) - ilogb( ( double )( 0x ## int ## fract ## ULL ) ) )
|
||||
#define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
|
||||
#else
|
||||
// If compiler supports hex floating point literals: just concatenate all the parts into a literal.
|
||||
#define HEX_FLT( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
|
||||
#define HEX_DBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
|
||||
#define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
|
||||
#endif
|
||||
|
||||
#if defined(__MINGW32__)
|
||||
#include <Windows.h>
|
||||
#define sleep(sec) Sleep((sec) * 1000)
|
||||
#endif
|
||||
|
||||
#endif // _COMPAT_H_
|
||||
1198
test_common/harness/conversions.c
Normal file
1198
test_common/harness/conversions.c
Normal file
File diff suppressed because it is too large
Load Diff
126
test_common/harness/conversions.h
Normal file
126
test_common/harness/conversions.h
Normal file
@@ -0,0 +1,126 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _conversions_h
|
||||
#define _conversions_h
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
#include "errorHelpers.h"
|
||||
#include "mt19937.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Note: the next three all have to match in size and order!! */
|
||||
|
||||
enum ExplicitTypes
|
||||
{
|
||||
kBool = 0,
|
||||
kChar,
|
||||
kUChar,
|
||||
kUnsignedChar,
|
||||
kShort,
|
||||
kUShort,
|
||||
kUnsignedShort,
|
||||
kInt,
|
||||
kUInt,
|
||||
kUnsignedInt,
|
||||
kLong,
|
||||
kULong,
|
||||
kUnsignedLong,
|
||||
kFloat,
|
||||
kHalf,
|
||||
kDouble,
|
||||
kNumExplicitTypes
|
||||
};
|
||||
|
||||
typedef enum ExplicitTypes ExplicitType;
|
||||
|
||||
enum RoundingTypes
|
||||
{
|
||||
kRoundToEven = 0,
|
||||
kRoundToZero,
|
||||
kRoundToPosInf,
|
||||
kRoundToNegInf,
|
||||
kRoundToNearest,
|
||||
|
||||
kNumRoundingTypes,
|
||||
|
||||
kDefaultRoundingType = kRoundToNearest
|
||||
};
|
||||
|
||||
typedef enum RoundingTypes RoundingType;
|
||||
|
||||
extern void print_type_to_string(ExplicitType type, void *data, char* string);
|
||||
extern size_t get_explicit_type_size( ExplicitType type );
|
||||
extern const char * get_explicit_type_name( ExplicitType type );
|
||||
extern void convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
|
||||
|
||||
extern void generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
|
||||
extern void * create_random_data( ExplicitType type, MTdata d, size_t count );
|
||||
|
||||
extern cl_long read_upscale_signed( void *inRaw, ExplicitType inType );
|
||||
extern cl_ulong read_upscale_unsigned( void *inRaw, ExplicitType inType );
|
||||
extern float read_as_float( void *inRaw, ExplicitType inType );
|
||||
|
||||
extern float get_random_float(float low, float high, MTdata d);
|
||||
extern double get_random_double(double low, double high, MTdata d);
|
||||
extern float any_float( MTdata d );
|
||||
extern double any_double( MTdata d );
|
||||
|
||||
extern int random_in_range( int minV, int maxV, MTdata d );
|
||||
|
||||
size_t get_random_size_t(size_t low, size_t high, MTdata d);
|
||||
|
||||
// Note: though this takes a double, this is for use with single precision tests
|
||||
static inline int IsFloatSubnormal( float x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ float d; uint32_t u;}u;
|
||||
u.d = fabsf(x);
|
||||
return (u.u-1) < 0x007fffffU;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) FLT_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int IsDoubleSubnormal( double x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ double d; uint64_t u;}u;
|
||||
u.d = fabs( x);
|
||||
return (u.u-1) < 0x000fffffffffffffULL;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) DBL_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _conversions_h
|
||||
|
||||
|
||||
754
test_common/harness/errorHelpers.c
Normal file
754
test_common/harness/errorHelpers.c
Normal file
@@ -0,0 +1,754 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "compat.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "errorHelpers.h"
|
||||
|
||||
const char *IGetErrorString( int clErrorCode )
|
||||
{
|
||||
switch( clErrorCode )
|
||||
{
|
||||
case CL_SUCCESS: return "CL_SUCCESS";
|
||||
case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
|
||||
case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
|
||||
case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
|
||||
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
|
||||
case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
|
||||
case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
|
||||
case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
|
||||
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
||||
case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
|
||||
case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
|
||||
case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
|
||||
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
|
||||
case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
|
||||
case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
|
||||
case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
|
||||
case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
|
||||
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
|
||||
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
|
||||
case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
|
||||
case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
|
||||
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
|
||||
case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES";
|
||||
case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
|
||||
case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
|
||||
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
|
||||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
||||
case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
|
||||
case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
|
||||
case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
|
||||
case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
|
||||
case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
|
||||
case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE";
|
||||
case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
|
||||
case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION";
|
||||
case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
|
||||
case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
|
||||
case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
|
||||
case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
|
||||
case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS";
|
||||
case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION";
|
||||
case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE";
|
||||
case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE";
|
||||
case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET";
|
||||
case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST";
|
||||
case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
|
||||
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
|
||||
case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT";
|
||||
case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE";
|
||||
case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL";
|
||||
case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
|
||||
case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
|
||||
case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
|
||||
case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
|
||||
case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
|
||||
case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT";
|
||||
default: return "(unknown)";
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetChannelOrderName( cl_channel_order order )
|
||||
{
|
||||
switch( order )
|
||||
{
|
||||
case CL_R: return "CL_R";
|
||||
case CL_A: return "CL_A";
|
||||
case CL_Rx: return "CL_Rx";
|
||||
case CL_RG: return "CL_RG";
|
||||
case CL_RA: return "CL_RA";
|
||||
case CL_RGx: return "CL_RGx";
|
||||
case CL_RGB: return "CL_RGB";
|
||||
case CL_RGBx: return "CL_RGBx";
|
||||
case CL_RGBA: return "CL_RGBA";
|
||||
case CL_ARGB: return "CL_ARGB";
|
||||
case CL_BGRA: return "CL_BGRA";
|
||||
case CL_INTENSITY: return "CL_INTENSITY";
|
||||
case CL_LUMINANCE: return "CL_LUMINANCE";
|
||||
#if defined CL_1RGB_APPLE
|
||||
case CL_1RGB_APPLE: return "CL_1RGB_APPLE";
|
||||
#endif
|
||||
#if defined CL_BGR1_APPLE
|
||||
case CL_BGR1_APPLE: return "CL_BGR1_APPLE";
|
||||
#endif
|
||||
#if defined CL_ABGR_APPLE
|
||||
case CL_ABGR_APPLE: return "CL_ABGR_APPLE";
|
||||
#endif
|
||||
case CL_DEPTH: return "CL_DEPTH";
|
||||
case CL_DEPTH_STENCIL: return "CL_DEPTH_STENCIL";
|
||||
case CL_sRGB: return "CL_sRGB";
|
||||
case CL_sRGBA: return "CL_sRGBA";
|
||||
case CL_sRGBx: return "CL_sRGBx";
|
||||
case CL_sBGRA: return "CL_sBGRA";
|
||||
case CL_ABGR: return "CL_ABGR";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int IsChannelOrderSupported( cl_channel_order order )
|
||||
{
|
||||
switch( order )
|
||||
{
|
||||
case CL_R:
|
||||
case CL_A:
|
||||
case CL_Rx:
|
||||
case CL_RG:
|
||||
case CL_RA:
|
||||
case CL_RGx:
|
||||
case CL_RGB:
|
||||
case CL_RGBx:
|
||||
case CL_RGBA:
|
||||
case CL_ARGB:
|
||||
case CL_BGRA:
|
||||
case CL_INTENSITY:
|
||||
case CL_LUMINANCE:
|
||||
case CL_ABGR:
|
||||
case CL_sRGB:
|
||||
case CL_sRGBx:
|
||||
case CL_sBGRA:
|
||||
case CL_sRGBA:
|
||||
case CL_DEPTH:
|
||||
return 1;
|
||||
#if defined CL_1RGB_APPLE
|
||||
case CL_1RGB_APPLE:
|
||||
return 1;
|
||||
#endif
|
||||
#if defined CL_BGR1_APPLE
|
||||
case CL_BGR1_APPLE:
|
||||
return 1;
|
||||
#endif
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetChannelTypeName( cl_channel_type type )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
case CL_SNORM_INT8: return "CL_SNORM_INT8";
|
||||
case CL_SNORM_INT16: return "CL_SNORM_INT16";
|
||||
case CL_UNORM_INT8: return "CL_UNORM_INT8";
|
||||
case CL_UNORM_INT16: return "CL_UNORM_INT16";
|
||||
case CL_UNORM_SHORT_565: return "CL_UNORM_SHORT_565";
|
||||
case CL_UNORM_SHORT_555: return "CL_UNORM_SHORT_555";
|
||||
case CL_UNORM_INT_101010: return "CL_UNORM_INT_101010";
|
||||
case CL_SIGNED_INT8: return "CL_SIGNED_INT8";
|
||||
case CL_SIGNED_INT16: return "CL_SIGNED_INT16";
|
||||
case CL_SIGNED_INT32: return "CL_SIGNED_INT32";
|
||||
case CL_UNSIGNED_INT8: return "CL_UNSIGNED_INT8";
|
||||
case CL_UNSIGNED_INT16: return "CL_UNSIGNED_INT16";
|
||||
case CL_UNSIGNED_INT32: return "CL_UNSIGNED_INT32";
|
||||
case CL_HALF_FLOAT: return "CL_HALF_FLOAT";
|
||||
case CL_FLOAT: return "CL_FLOAT";
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE: return "CL_SFIXED14_APPLE";
|
||||
#endif
|
||||
case CL_UNORM_INT24: return "CL_UNORM_INT24";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int IsChannelTypeSupported( cl_channel_type type )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
case CL_SNORM_INT8:
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNORM_INT8:
|
||||
case CL_UNORM_INT16:
|
||||
case CL_UNORM_INT24:
|
||||
case CL_UNORM_SHORT_565:
|
||||
case CL_UNORM_SHORT_555:
|
||||
case CL_UNORM_INT_101010:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_UNSIGNED_INT8:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_UNSIGNED_INT32:
|
||||
case CL_HALF_FLOAT:
|
||||
case CL_FLOAT:
|
||||
return 1;
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
return 1;
|
||||
#endif
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetAddressModeName( cl_addressing_mode mode )
|
||||
{
|
||||
switch( mode )
|
||||
{
|
||||
case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
|
||||
case CL_ADDRESS_CLAMP_TO_EDGE: return "CL_ADDRESS_CLAMP_TO_EDGE";
|
||||
case CL_ADDRESS_CLAMP: return "CL_ADDRESS_CLAMP";
|
||||
case CL_ADDRESS_REPEAT: return "CL_ADDRESS_REPEAT";
|
||||
case CL_ADDRESS_MIRRORED_REPEAT: return "CL_ADDRESS_MIRRORED_REPEAT";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetDeviceTypeName( cl_device_type type )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
|
||||
case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: return "CL_DEVICE_TYPE_ACCELERATOR";
|
||||
case CL_DEVICE_TYPE_ALL: return "CL_DEVICE_TYPE_ALL";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
|
||||
{
|
||||
static char scratch[ 1024 ];
|
||||
size_t i, j;
|
||||
|
||||
if( buffer == NULL )
|
||||
buffer = scratch;
|
||||
|
||||
unsigned char *p = (unsigned char *)dataBuffer;
|
||||
char *bPtr;
|
||||
|
||||
buffer[ 0 ] = 0;
|
||||
bPtr = buffer;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
{
|
||||
if( i > 0 )
|
||||
{
|
||||
bPtr[ 0 ] = ' ';
|
||||
bPtr++;
|
||||
}
|
||||
for( j = 0; j < typeSize; j++ )
|
||||
{
|
||||
sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
|
||||
bPtr += 2;
|
||||
}
|
||||
p += typeSize;
|
||||
}
|
||||
bPtr[ 0 ] = 0;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b))
|
||||
#endif
|
||||
|
||||
#if defined( _MSC_VER )
|
||||
#define scalbnf(_a, _i ) ldexpf( _a, _i )
|
||||
#define scalbn(_a, _i ) ldexp( _a, _i )
|
||||
#define scalbnl(_a, _i ) ldexpl( _a, _i )
|
||||
#endif
|
||||
|
||||
static float Ulp_Error_Half_Float( float test, double reference );
|
||||
static inline float half2float( cl_ushort half );
|
||||
|
||||
// taken from math tests
|
||||
#define HALF_MIN_EXP -13
|
||||
#define HALF_MANT_DIG 11
|
||||
static float Ulp_Error_Half_Float( float test, double reference )
|
||||
{
|
||||
union{ double d; uint64_t u; }u; u.d = reference;
|
||||
|
||||
// Note: This function presumes that someone has already tested whether the result is correctly,
|
||||
// rounded before calling this function. That test:
|
||||
//
|
||||
// if( (float) reference == test )
|
||||
// return 0.0f;
|
||||
//
|
||||
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
|
||||
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
|
||||
// results.
|
||||
|
||||
double testVal = test;
|
||||
if( u.u & 0x000fffffffffffffULL )
|
||||
{ // Non-power of two and NaN
|
||||
if( isnan( reference ) && isnan( test ) )
|
||||
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
||||
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
return (float) scalbn( testVal - reference, ulp_exp );
|
||||
}
|
||||
|
||||
if( isinf( reference ) )
|
||||
{
|
||||
if( (double) test == reference )
|
||||
return 0.0f;
|
||||
|
||||
return (float) (testVal - reference );
|
||||
}
|
||||
|
||||
// reference is a normal power of two or a zero
|
||||
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
return (float) scalbn( testVal - reference, ulp_exp );
|
||||
}
|
||||
|
||||
// Taken from vLoadHalf test
|
||||
static inline float half2float( cl_ushort us )
|
||||
{
|
||||
uint32_t u = us;
|
||||
uint32_t sign = (u << 16) & 0x80000000;
|
||||
int32_t exponent = (u & 0x7c00) >> 10;
|
||||
uint32_t mantissa = (u & 0x03ff) << 13;
|
||||
union{ unsigned int u; float f;}uu;
|
||||
|
||||
if( exponent == 0 )
|
||||
{
|
||||
if( mantissa == 0 )
|
||||
return sign ? -0.0f : 0.0f;
|
||||
|
||||
int shift = __builtin_clz( mantissa ) - 8;
|
||||
exponent -= shift-1;
|
||||
mantissa <<= shift;
|
||||
mantissa &= 0x007fffff;
|
||||
}
|
||||
else
|
||||
if( exponent == 31)
|
||||
{
|
||||
uu.u = mantissa | sign;
|
||||
if( mantissa )
|
||||
uu.u |= 0x7fc00000;
|
||||
else
|
||||
uu.u |= 0x7f800000;
|
||||
|
||||
return uu.f;
|
||||
}
|
||||
|
||||
exponent += 127 - 15;
|
||||
exponent <<= 23;
|
||||
|
||||
exponent |= mantissa;
|
||||
uu.u = exponent | sign;
|
||||
|
||||
return uu.f;
|
||||
}
|
||||
|
||||
float Ulp_Error_Half( cl_ushort test, float reference )
|
||||
{
|
||||
return Ulp_Error_Half_Float( half2float(test), reference );
|
||||
}
|
||||
|
||||
|
||||
float Ulp_Error( float test, double reference )
|
||||
{
|
||||
union{ double d; uint64_t u; }u; u.d = reference;
|
||||
double testVal = test;
|
||||
|
||||
// Note: This function presumes that someone has already tested whether the result is correctly,
|
||||
// rounded before calling this function. That test:
|
||||
//
|
||||
// if( (float) reference == test )
|
||||
// return 0.0f;
|
||||
//
|
||||
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
|
||||
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
|
||||
// results.
|
||||
|
||||
|
||||
if( isinf( reference ) )
|
||||
{
|
||||
if( testVal == reference )
|
||||
return 0.0f;
|
||||
|
||||
return (float) (testVal - reference );
|
||||
}
|
||||
|
||||
if( isinf( testVal) )
|
||||
{ // infinite test value, but finite (but possibly overflowing in float) reference.
|
||||
//
|
||||
// The function probably overflowed prematurely here. Formally, the spec says this is
|
||||
// an infinite ulp error and should not be tolerated. Unfortunately, this would mean
|
||||
// that the internal precision of some half_pow implementations would have to be 29+ bits
|
||||
// at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
|
||||
// is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
|
||||
// after rounding to single is 4*32 = 128, which will ultimately result in premature
|
||||
// overflow, even though a good faith representation would be correct to within 2**-29
|
||||
// interally.
|
||||
|
||||
// In the interest of not requiring the implementation go to extraordinary lengths to
|
||||
// deliver a half precision function, we allow premature overflow within the limit
|
||||
// of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
|
||||
// 2**128, the next value that would appear in the number line if float had sufficient range.
|
||||
testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
|
||||
|
||||
// Note that the same hack may not work in long double, which is not guaranteed to have
|
||||
// more range than double. It is not clear that premature overflow should be tolerated for
|
||||
// double.
|
||||
}
|
||||
|
||||
if( u.u & 0x000fffffffffffffULL )
|
||||
{ // Non-power of two and NaN
|
||||
if( isnan( reference ) && isnan( test ) )
|
||||
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
||||
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
return (float) scalbn( testVal - reference, ulp_exp );
|
||||
}
|
||||
|
||||
// reference is a normal power of two or a zero
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
return (float) scalbn( testVal - reference, ulp_exp );
|
||||
}
|
||||
|
||||
float Ulp_Error_Double( double test, long double reference )
|
||||
{
|
||||
// Deal with long double = double
|
||||
// On most systems long double is a higher precision type than double. They provide either
|
||||
// a 80-bit or greater floating point type, or they provide a head-tail double double format.
|
||||
// That is sufficient to represent the accuracy of a floating point result to many more bits
|
||||
// than double and we can calculate sub-ulp errors. This is the standard system for which this
|
||||
// test suite is designed.
|
||||
//
|
||||
// On some systems double and long double are the same thing. Then we run into a problem,
|
||||
// because our representation of the infinitely precise result (passed in as reference above)
|
||||
// can be off by as much as a half double precision ulp itself. In this case, we inflate the
|
||||
// reported error by half an ulp to take this into account. A more correct and permanent fix
|
||||
// would be to undertake refactoring the reference code to return results in this format:
|
||||
//
|
||||
// typedef struct DoubleReference
|
||||
// { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult) (infinitely precise)
|
||||
// double correctlyRoundedResult; // as best we can
|
||||
// double ulps; // plus a fractional amount to account for the difference
|
||||
// }DoubleReference; // between infinitely precise result and correctlyRoundedResult, in units of ulps.
|
||||
//
|
||||
// This would provide a useful higher-than-double precision format for everyone that we can use,
|
||||
// and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
|
||||
// that use a head to tail double double for long double.
|
||||
|
||||
// Note: This function presumes that someone has already tested whether the result is correctly,
|
||||
// rounded before calling this function. That test:
|
||||
//
|
||||
// if( (float) reference == test )
|
||||
// return 0.0f;
|
||||
//
|
||||
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
|
||||
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
|
||||
// results.
|
||||
|
||||
|
||||
int x;
|
||||
long double testVal = test;
|
||||
if( 0.5L != frexpl( reference, &x) )
|
||||
{ // Non-power of two and NaN
|
||||
if( isinf( reference ) )
|
||||
{
|
||||
if( testVal == reference )
|
||||
return 0.0f;
|
||||
|
||||
return (float) ( testVal - reference );
|
||||
}
|
||||
|
||||
if( isnan( reference ) && isnan( test ) )
|
||||
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
||||
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
float result = (float) scalbnl( testVal - reference, ulp_exp );
|
||||
|
||||
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
|
||||
if( sizeof(long double) == sizeof( double ) )
|
||||
result += copysignf( 0.5f, result);
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
// reference is a normal power of two or a zero
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
float result = (float) scalbnl( testVal - reference, ulp_exp );
|
||||
|
||||
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
|
||||
if( sizeof(long double) == sizeof( double ) )
|
||||
result += copysignf( 0.5f, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
|
||||
{
|
||||
int error;
|
||||
size_t size_ret;
|
||||
|
||||
// Does the program object exist?
|
||||
if (program != NULL) {
|
||||
|
||||
// Was the number of devices given
|
||||
if (num_devices == 0) {
|
||||
|
||||
// If zero devices were specified then allocate and query the device list from the context
|
||||
cl_context context;
|
||||
error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
|
||||
test_error( error, "Unable to query program's context" );
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
|
||||
test_error( error, "Unable to query context's device size" );
|
||||
num_devices = size_ret / sizeof(cl_device_id);
|
||||
device_list = (cl_device_id *) malloc(size_ret);
|
||||
if (device_list == NULL) {
|
||||
print_error( error, "malloc failed" );
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
|
||||
test_error( error, "Unable to query context's devices" );
|
||||
|
||||
}
|
||||
|
||||
// For each device in the device_list
|
||||
unsigned int i;
|
||||
for (i = 0; i < num_devices; i++) {
|
||||
|
||||
// Get the build status
|
||||
cl_build_status build_status;
|
||||
error = clGetProgramBuildInfo(program,
|
||||
device_list[i],
|
||||
CL_PROGRAM_BUILD_STATUS,
|
||||
sizeof(build_status),
|
||||
&build_status,
|
||||
&size_ret);
|
||||
test_error( error, "Unable to query build status" );
|
||||
|
||||
// If the build failed then log the status, and allocate the build log, log it and free it
|
||||
if (build_status != CL_BUILD_SUCCESS) {
|
||||
|
||||
log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
|
||||
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
|
||||
test_error( error, "Unable to query build log size" );
|
||||
char *build_log = (char *) malloc(size_ret);
|
||||
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
|
||||
test_error( error, "Unable to query build log" );
|
||||
log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
|
||||
free(build_log);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Was the number of devices given
|
||||
if (num_devices == 0) {
|
||||
|
||||
// If zero devices were specified then free the device list
|
||||
free(device_list);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
const char * subtests_requiring_opencl_1_2[] = {
|
||||
"device_partition_equally",
|
||||
"device_partition_by_counts",
|
||||
"device_partition_by_affinity_domain_numa",
|
||||
"device_partition_by_affinity_domain_l4_cache",
|
||||
"device_partition_by_affinity_domain_l3_cache",
|
||||
"device_partition_by_affinity_domain_l2_cache",
|
||||
"device_partition_by_affinity_domain_l1_cache",
|
||||
"device_partition_by_affinity_domain_next_partitionable",
|
||||
"device_partition_all",
|
||||
"buffer_fill_int",
|
||||
"buffer_fill_uint",
|
||||
"buffer_fill_short",
|
||||
"buffer_fill_ushort",
|
||||
"buffer_fill_char",
|
||||
"buffer_fill_uchar",
|
||||
"buffer_fill_long",
|
||||
"buffer_fill_ulong",
|
||||
"buffer_fill_float",
|
||||
"buffer_fill_struct",
|
||||
"test_mem_host_write_only_buffer",
|
||||
"test_mem_host_write_only_subbuffer",
|
||||
"test_mem_host_no_access_buffer",
|
||||
"test_mem_host_no_access_subbuffer",
|
||||
"test_mem_host_read_only_image",
|
||||
"test_mem_host_write_only_image",
|
||||
"test_mem_host_no_access_image",
|
||||
// CL_MEM_HOST_{READ|WRITE}_ONLY api/
|
||||
"get_buffer_info",
|
||||
"get_image1d_info",
|
||||
"get_image1d_array_info",
|
||||
"get_image2d_array_info",
|
||||
// gl/
|
||||
"images_read_1D",
|
||||
"images_write_1D",
|
||||
"images_1D_getinfo",
|
||||
"images_read_1Darray",
|
||||
"images_write_1Darray",
|
||||
"images_1Darray_getinfo",
|
||||
"images_read_2Darray",
|
||||
"images_write_2Darray",
|
||||
"images_2Darray_getinfo",
|
||||
"buffer_migrate",
|
||||
"image_migrate",
|
||||
// compiler/
|
||||
"load_program_source",
|
||||
"load_multistring_source",
|
||||
"load_two_kernel_source",
|
||||
"load_null_terminated_source",
|
||||
"load_null_terminated_multi_line_source",
|
||||
"load_null_terminated_partial_multi_line_source",
|
||||
"load_discreet_length_source",
|
||||
"get_program_source",
|
||||
"get_program_build_info",
|
||||
"get_program_info",
|
||||
"large_compile",
|
||||
"async_build",
|
||||
"options_build_optimizations",
|
||||
"options_build_macro",
|
||||
"options_build_macro_existence",
|
||||
"options_include_directory",
|
||||
"options_denorm_cache",
|
||||
"preprocessor_define_udef",
|
||||
"preprocessor_include",
|
||||
"preprocessor_line_error",
|
||||
"preprocessor_pragma",
|
||||
"compiler_defines_for_extensions",
|
||||
"image_macro",
|
||||
"simple_compile_only",
|
||||
"simple_static_compile_only",
|
||||
"simple_extern_compile_only",
|
||||
"simple_compile_with_callback",
|
||||
"simple_embedded_header_compile",
|
||||
"simple_link_only",
|
||||
"two_file_regular_variable_access",
|
||||
"two_file_regular_struct_access",
|
||||
"two_file_regular_function_access",
|
||||
"simple_link_with_callback",
|
||||
"simple_embedded_header_link",
|
||||
"execute_after_simple_compile_and_link",
|
||||
"execute_after_simple_compile_and_link_no_device_info",
|
||||
"execute_after_simple_compile_and_link_with_defines",
|
||||
"execute_after_simple_compile_and_link_with_callbacks",
|
||||
"execute_after_simple_library_with_link",
|
||||
"execute_after_two_file_link",
|
||||
"execute_after_two_file_link",
|
||||
"execute_after_embedded_header_link",
|
||||
"execute_after_included_header_link",
|
||||
"execute_after_serialize_reload_object",
|
||||
"execute_after_serialize_reload_library",
|
||||
"simple_library_only",
|
||||
"simple_library_with_callback",
|
||||
"simple_library_with_link",
|
||||
"two_file_link",
|
||||
"multi_file_libraries",
|
||||
"multiple_files",
|
||||
"multiple_libraries",
|
||||
"multiple_files_multiple_libraries",
|
||||
"multiple_embedded_headers",
|
||||
"program_binary_type",
|
||||
"compile_and_link_status_options_log",
|
||||
// CL_PROGRAM_NUM_KERNELS, in api/
|
||||
"get_kernel_arg_info",
|
||||
"create_kernels_in_program",
|
||||
// clEnqueue..WithWaitList, in events/
|
||||
"event_enqueue_marker_with_event_list",
|
||||
"event_enqueue_barrier_with_event_list",
|
||||
"popcount"
|
||||
};
|
||||
|
||||
|
||||
int check_opencl_version_with_testname(const char *subtestname, cl_device_id device)
|
||||
{
|
||||
int nRequiring12 = sizeof(subtests_requiring_opencl_1_2)/sizeof(char *);
|
||||
size_t i;
|
||||
for(i=0; i < nRequiring12; ++i) {
|
||||
if(!strcmp(subtestname, subtests_requiring_opencl_1_2[i])) {
|
||||
return check_opencl_version(device, 1, 2);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int check_opencl_version(cl_device_id device, cl_uint requestedMajorVersion, cl_uint requestedMinorVersion) {
|
||||
int error;
|
||||
char device_version[1024];
|
||||
cl_uint majorVersion = 0, minorVersion = 0;
|
||||
const char * required_version_ocl_12="OpenCL 1.2 ";
|
||||
|
||||
memset( device_version, 0, sizeof( device_version ) );
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
|
||||
test_error(error, "unable to get CL_DEVICE_VERSION");
|
||||
|
||||
if ( strncmp( device_version, "OpenCL 1.2", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
|
||||
majorVersion = 1;
|
||||
minorVersion = 2;
|
||||
} else if ( strncmp( device_version, "OpenCL 1.1", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
|
||||
majorVersion = 1;
|
||||
minorVersion = 1;
|
||||
} else if ( strncmp( device_version, "OpenCL 2.0", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
|
||||
majorVersion = 2;
|
||||
minorVersion = 0;
|
||||
} else {
|
||||
log_error( "ERROR: Unexpected version string: `%s'.\n", device_version );
|
||||
return 1;
|
||||
};
|
||||
|
||||
if (majorVersion >= requestedMajorVersion)
|
||||
return 0;
|
||||
|
||||
if (minorVersion >= requestedMinorVersion)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
159
test_common/harness/errorHelpers.h
Normal file
159
test_common/harness/errorHelpers.h
Normal file
@@ -0,0 +1,159 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _errorHelpers_h
|
||||
#define _errorHelpers_h
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define LOWER_IS_BETTER 0
|
||||
#define HIGHER_IS_BETTER 1
|
||||
|
||||
// If USE_ATF is defined, all log_error and log_info calls can be routed to test library
|
||||
// functions as described below. This is helpful for integration into an automated testing
|
||||
// system.
|
||||
#if USE_ATF
|
||||
// export BUILD_WITH_ATF=1
|
||||
#include <ATF/ATF.h>
|
||||
#define test_start() ATFTestStart()
|
||||
#define log_info ATFLogInfo
|
||||
#define log_error ATFLogError
|
||||
#define log_missing_feature ATFLogMissingFeature
|
||||
#define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format, ##__VA_ARGS__)
|
||||
#define test_finish() ATFTestFinish()
|
||||
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
|
||||
#define vlog ATFLogInfo
|
||||
#define vlog_error ATFLogError
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#define test_start()
|
||||
#define log_info printf
|
||||
#define log_error printf
|
||||
#define log_missing_feature printf
|
||||
#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
|
||||
_higherBetter?"higher is better":"lower is better", _number )
|
||||
#define test_finish()
|
||||
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
|
||||
_higherBetter?"higher is better":"lower is better" , _number)
|
||||
#ifdef _WIN32
|
||||
#ifdef __MINGW32__
|
||||
// Use __mingw_printf since it supports "%a" format specifier
|
||||
#define vlog __mingw_printf
|
||||
#define vlog_error __mingw_printf
|
||||
#else
|
||||
// Use home-baked function that treats "%a" as "%f"
|
||||
static int vlog_win32(const char *format, ...);
|
||||
#define vlog vlog_win32
|
||||
#define vlog_error vlog_win32
|
||||
#endif
|
||||
#else
|
||||
#define vlog_error printf
|
||||
#define vlog printf
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define ct_assert(b) ct_assert_i(b, __LINE__)
|
||||
#define ct_assert_i(b, line) ct_assert_ii(b, line)
|
||||
#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
|
||||
|
||||
#define test_error(errCode,msg) test_error_ret(errCode,msg,errCode)
|
||||
#define test_error_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
|
||||
#define print_error(errCode,msg) log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
|
||||
|
||||
#define test_missing_feature(errCode, msg) test_missing_feature_ret(errCode, msg, errCode)
|
||||
// this macro should always return CL_SUCCESS, but print the missing feature message
|
||||
#define test_missing_feature_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_missing_feature( errCode, msg ); return CL_SUCCESS ; } }
|
||||
#define print_missing_feature(errCode, msg) log_missing_feature("ERROR: Subtest %s tests a feature not supported by the device version! (from %s:%d)\n", msg, __FILE__, __LINE__ );
|
||||
|
||||
// expected error code vs. what we got
|
||||
#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
|
||||
#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
|
||||
#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
|
||||
#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
|
||||
#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
|
||||
#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
|
||||
|
||||
extern const char *IGetErrorString( int clErrorCode );
|
||||
|
||||
extern float Ulp_Error_Half( cl_ushort test, float reference );
|
||||
extern float Ulp_Error( float test, double reference );
|
||||
extern float Ulp_Error_Double( double test, long double reference );
|
||||
|
||||
extern const char *GetChannelTypeName( cl_channel_type type );
|
||||
extern int IsChannelTypeSupported( cl_channel_type type );
|
||||
extern const char *GetChannelOrderName( cl_channel_order order );
|
||||
extern int IsChannelOrderSupported( cl_channel_order order );
|
||||
extern const char *GetAddressModeName( cl_addressing_mode mode );
|
||||
|
||||
extern const char *GetDeviceTypeName( cl_device_type type );
|
||||
int check_opencl_version_with_testname(const char *subtestname, cl_device_id device);
|
||||
int check_opencl_version(cl_device_id device, cl_uint requestedMajorVersion, cl_uint requestedMinorVersion);
|
||||
|
||||
// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
|
||||
extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
|
||||
|
||||
#if defined (_WIN32) && !defined(__MINGW32__)
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
static int vlog_win32(const char *format, ...)
|
||||
{
|
||||
const char *new_format = format;
|
||||
|
||||
if (strstr(format, "%a")) {
|
||||
char *temp;
|
||||
if ((temp = strdup(format)) == NULL) {
|
||||
printf("vlog_win32: Failed to allocate memory for strdup\n");
|
||||
return -1;
|
||||
}
|
||||
new_format = temp;
|
||||
while (*temp) {
|
||||
// replace %a with %f
|
||||
if ((*temp == '%') && (*(temp+1) == 'a')) {
|
||||
*(temp+1) = 'f';
|
||||
}
|
||||
temp++;
|
||||
}
|
||||
}
|
||||
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
vprintf(new_format, args);
|
||||
va_end(args);
|
||||
|
||||
if (new_format != format) {
|
||||
free((void*)new_format);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _errorHelpers_h
|
||||
|
||||
|
||||
104
test_common/harness/fpcontrol.h
Normal file
104
test_common/harness/fpcontrol.h
Normal file
@@ -0,0 +1,104 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _fpcontrol_h
|
||||
#define _fpcontrol_h
|
||||
|
||||
// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
|
||||
// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations
|
||||
// in integer code, and have found this is the only way to correctly verify operation.
|
||||
//
|
||||
// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both
|
||||
// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default
|
||||
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
|
||||
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
|
||||
typedef int FPU_mode_type;
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
|
||||
#include <xmmintrin.h>
|
||||
#elif defined( __PPC__ )
|
||||
#include <fpu_control.h>
|
||||
extern __thread fpu_control_t fpu_control;
|
||||
#endif
|
||||
// Set the reference hardware floating point unit to FTZ mode
|
||||
static inline void ForceFTZ( FPU_mode_type *mode )
|
||||
{
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
|
||||
*mode = _mm_getcsr();
|
||||
_mm_setcsr( *mode | 0x8040);
|
||||
#elif defined( __PPC__ )
|
||||
*mode = fpu_control;
|
||||
fpu_control |= _FPU_MASK_NI;
|
||||
#elif defined ( __arm__ )
|
||||
unsigned fpscr;
|
||||
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
|
||||
// Add 64 bit support
|
||||
#elif defined (__aarch64__)
|
||||
unsigned fpscr;
|
||||
__asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
__asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24)));
|
||||
#else
|
||||
#error ForceFTZ needs an implentation
|
||||
#endif
|
||||
}
|
||||
|
||||
// Disable the denorm flush to zero
|
||||
static inline void DisableFTZ( FPU_mode_type *mode )
|
||||
{
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
|
||||
*mode = _mm_getcsr();
|
||||
_mm_setcsr( *mode & ~0x8040);
|
||||
#elif defined( __PPC__ )
|
||||
*mode = fpu_control;
|
||||
fpu_control &= ~_FPU_MASK_NI;
|
||||
#elif defined ( __arm__ )
|
||||
unsigned fpscr;
|
||||
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
|
||||
// Add 64 bit support
|
||||
#elif defined (__aarch64__)
|
||||
unsigned fpscr;
|
||||
__asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
__asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
|
||||
#else
|
||||
#error DisableFTZ needs an implentation
|
||||
#endif
|
||||
}
|
||||
|
||||
// Restore the reference hardware to floating point state indicated by *mode
|
||||
static inline void RestoreFPState( FPU_mode_type *mode )
|
||||
{
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
|
||||
_mm_setcsr( *mode );
|
||||
#elif defined( __PPC__)
|
||||
fpu_control = *mode;
|
||||
#elif defined (__arm__)
|
||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
|
||||
// Add 64 bit support
|
||||
#elif defined (__aarch64__)
|
||||
__asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
|
||||
#else
|
||||
#error RestoreFPState needs an implementation
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
#error ForceFTZ and RestoreFPState need implentations
|
||||
#endif
|
||||
|
||||
#endif
|
||||
53
test_common/harness/genericThread.cpp
Normal file
53
test_common/harness/genericThread.cpp
Normal file
@@ -0,0 +1,53 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "genericThread.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#else // !_WIN32
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
void * genericThread::IStaticReflector( void * data )
|
||||
{
|
||||
genericThread *t = (genericThread *)data;
|
||||
return t->IRun();
|
||||
}
|
||||
|
||||
bool genericThread::Start( void )
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
|
||||
return ( mHandle != NULL );
|
||||
#else // !_WIN32
|
||||
int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
|
||||
return ( error == 0 );
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
void * genericThread::Join( void )
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
WaitForSingleObject( (HANDLE)mHandle, INFINITE );
|
||||
return NULL;
|
||||
#else // !_WIN32
|
||||
void * retVal;
|
||||
int error = pthread_join( (pthread_t)mHandle, &retVal );
|
||||
if( error != 0 )
|
||||
retVal = NULL;
|
||||
return retVal;
|
||||
#endif // !_WIN32
|
||||
}
|
||||
42
test_common/harness/genericThread.h
Normal file
42
test_common/harness/genericThread.h
Normal file
@@ -0,0 +1,42 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _genericThread_h
|
||||
#define _genericThread_h
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
class genericThread
|
||||
{
|
||||
public:
|
||||
|
||||
virtual ~genericThread() {}
|
||||
|
||||
bool Start( void );
|
||||
void * Join( void );
|
||||
|
||||
protected:
|
||||
|
||||
virtual void * IRun( void ) = 0;
|
||||
|
||||
private:
|
||||
|
||||
void* mHandle;
|
||||
|
||||
static void * IStaticReflector( void * data );
|
||||
};
|
||||
|
||||
#endif // _genericThread_h
|
||||
|
||||
3862
test_common/harness/imageHelpers.cpp
Normal file
3862
test_common/harness/imageHelpers.cpp
Normal file
File diff suppressed because it is too large
Load Diff
646
test_common/harness/imageHelpers.h
Normal file
646
test_common/harness/imageHelpers.h
Normal file
@@ -0,0 +1,646 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _imageHelpers_h
|
||||
#define _imageHelpers_h
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
|
||||
#include "errorHelpers.h"
|
||||
|
||||
#include "conversions.h"
|
||||
#include "typeWrappers.h"
|
||||
#include "kernelHelpers.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "mt19937.h"
|
||||
#include "rounding_mode.h"
|
||||
#include "clImageHelper.h"
|
||||
|
||||
extern int gTestCount;
|
||||
extern int gTestFailure;
|
||||
extern cl_device_type gDeviceType;
|
||||
|
||||
// Number of iterations per image format to test if not testing max images, rounding, or small images
|
||||
#define NUM_IMAGE_ITERATIONS 3
|
||||
|
||||
|
||||
// Definition for our own sampler type, to mirror the cl_sampler internals
|
||||
#define MAX_sRGB_TO_lRGB_CONVERSION_ERROR 0.5
|
||||
#define MAX_lRGB_TO_sRGB_CONVERSION_ERROR 0.6
|
||||
|
||||
// Definition for our own sampler type, to mirror the cl_sampler internals
|
||||
typedef struct {
|
||||
cl_addressing_mode addressing_mode;
|
||||
cl_filter_mode filter_mode;
|
||||
bool normalized_coords;
|
||||
} image_sampler_data;
|
||||
|
||||
int round_to_even( float v );
|
||||
|
||||
#define NORMALIZE( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max : round_to_even( v * max ) ) )
|
||||
#define NORMALIZE_UNROUNDED( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max : v * max ) )
|
||||
#define NORMALIZE_SIGNED( v, min, max ) ( v < -1.0f ? min : ( v > 1.f ? max : round_to_even( v * max ) ) )
|
||||
#define NORMALIZE_SIGNED_UNROUNDED( v, min, max ) ( v < -1.0f ? min : ( v > 1.f ? max : v * max ) )
|
||||
#define CONVERT_INT( v, min, max, max_val) ( v < min ? min : ( v > max ? max_val : round_to_even( v ) ) )
|
||||
#define CONVERT_UINT( v, max, max_val) ( v < 0 ? 0 : ( v > max ? max_val : round_to_even( v ) ) )
|
||||
|
||||
extern void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err = false, int t = 0 );
|
||||
extern void print_write_header( cl_image_format *format, bool err);
|
||||
extern void print_header( cl_image_format *format, bool err );
|
||||
extern bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind );
|
||||
extern bool check_minimum_supported( cl_image_format *formatList, unsigned int numFormats, cl_mem_flags flags );
|
||||
|
||||
extern size_t get_format_type_size( const cl_image_format *format );
|
||||
extern size_t get_channel_data_type_size( cl_channel_type channelType );
|
||||
extern size_t get_format_channel_count( const cl_image_format *format );
|
||||
extern size_t get_channel_order_channel_count( cl_channel_order order );
|
||||
cl_channel_type get_channel_type_from_name( const char *name );
|
||||
cl_channel_order get_channel_order_from_name( const char *name );
|
||||
extern int is_format_signed( const cl_image_format *format );
|
||||
extern size_t get_pixel_size( cl_image_format *format );
|
||||
|
||||
/* Helper to get any ol image format as long as it is 8-bits-per-channel */
|
||||
extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
|
||||
|
||||
/* Helper to get any ol image format as long as it is 32-bits-per-channel */
|
||||
extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
|
||||
|
||||
int random_in_range( int minV, int maxV, MTdata d );
|
||||
int random_log_in_range( int minV, int maxV, MTdata d );
|
||||
|
||||
typedef struct
|
||||
{
|
||||
size_t width;
|
||||
size_t height;
|
||||
size_t depth;
|
||||
size_t rowPitch;
|
||||
size_t slicePitch;
|
||||
size_t arraySize;
|
||||
cl_image_format *format;
|
||||
cl_mem buffer;
|
||||
cl_mem_object_type type;
|
||||
cl_uint num_mip_levels;
|
||||
} image_descriptor;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float p[4];
|
||||
}FloatPixel;
|
||||
|
||||
void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
|
||||
size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
|
||||
const cl_ulong maxIndividualAllocSize, const cl_ulong maxTotalAllocSize, cl_mem_object_type image_type, cl_image_format *format, int usingMaxPixelSize=0);
|
||||
extern size_t get_format_max_int( cl_image_format *format );
|
||||
|
||||
extern cl_ulong get_image_size( image_descriptor const *imageInfo );
|
||||
extern cl_ulong get_image_size_mb( image_descriptor const *imageInfo );
|
||||
|
||||
extern char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &Owner, MTdata d );
|
||||
|
||||
extern int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
|
||||
void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ, size_t lod = 0 );
|
||||
|
||||
extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
|
||||
unsigned int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
|
||||
extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
|
||||
int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
|
||||
extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
|
||||
float *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
|
||||
|
||||
extern void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
|
||||
const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] );
|
||||
|
||||
int has_alpha(cl_image_format *format);
|
||||
|
||||
extern bool alpha_is_x(cl_image_format *format);
|
||||
|
||||
extern bool is_sRGBA_order(cl_channel_order image_channel_order);
|
||||
|
||||
inline float calculate_array_index( float coord, float extent );
|
||||
|
||||
cl_uint compute_max_mip_levels( size_t width, size_t height, size_t depth);
|
||||
cl_ulong compute_mipmapped_image_size( image_descriptor imageInfo);
|
||||
size_t compute_mip_level_offset( image_descriptor * imageInfo , size_t lod);
|
||||
|
||||
template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
|
||||
int x, int y, int z, T *outData, int lod )
|
||||
{
|
||||
float convert_half_to_float( unsigned short halfValue );
|
||||
size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth, slice_pitch_lod = 0/*imageInfo->slicePitch*/ , row_pitch_lod = 0/*imageInfo->rowPitch*/;
|
||||
width_lod = ( imageInfo->width >> lod) ?( imageInfo->width >> lod):1;
|
||||
|
||||
if ( imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
|
||||
height_lod = ( imageInfo->height >> lod) ?( imageInfo->height >> lod):1;
|
||||
|
||||
if(imageInfo->type == CL_MEM_OBJECT_IMAGE3D)
|
||||
depth_lod = ( imageInfo->depth >> lod) ? ( imageInfo->depth >> lod) : 1;
|
||||
row_pitch_lod = (imageInfo->num_mip_levels > 0)? (width_lod * get_pixel_size( imageInfo->format )): imageInfo->rowPitch;
|
||||
slice_pitch_lod = (imageInfo->num_mip_levels > 0)? (row_pitch_lod * height_lod): imageInfo->slicePitch;
|
||||
|
||||
// correct depth_lod and height_lod for array image types in order to avoid
|
||||
// return
|
||||
if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY && height_lod == 1 && depth_lod == 1) {
|
||||
depth_lod = 0;
|
||||
height_lod = 0;
|
||||
|
||||
}
|
||||
|
||||
if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && depth_lod == 1) {
|
||||
depth_lod = 0;
|
||||
}
|
||||
|
||||
if ( x < 0 || x >= (int)width_lod
|
||||
|| ( height_lod != 0 && ( y < 0 || y >= (int)height_lod ) )
|
||||
|| ( depth_lod != 0 && ( z < 0 || z >= (int)depth_lod ) )
|
||||
|| ( imageInfo->arraySize != 0 && ( z < 0 || z >= (int)imageInfo->arraySize ) ) )
|
||||
{
|
||||
// Border color
|
||||
if (imageInfo->format->image_channel_order == CL_DEPTH)
|
||||
{
|
||||
outData[ 0 ] = 1;
|
||||
}
|
||||
else {
|
||||
outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
|
||||
if (!has_alpha(imageInfo->format))
|
||||
outData[3] = 1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
cl_image_format *format = imageInfo->format;
|
||||
|
||||
unsigned int i;
|
||||
T tempData[ 4 ];
|
||||
|
||||
// Advance to the right spot
|
||||
char *ptr = (char *)imageData;
|
||||
size_t pixelSize = get_pixel_size( format );
|
||||
|
||||
ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
|
||||
|
||||
// OpenCL only supports reading floats from certain formats
|
||||
switch( format->image_channel_data_type )
|
||||
{
|
||||
case CL_SNORM_INT8:
|
||||
{
|
||||
cl_char *dPtr = (cl_char *)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)dPtr[ i ];
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_UNORM_INT8:
|
||||
{
|
||||
cl_uchar *dPtr = (cl_uchar *)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)dPtr[ i ];
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_SIGNED_INT8:
|
||||
{
|
||||
cl_char *dPtr = (cl_char *)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)dPtr[ i ];
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_UNSIGNED_INT8:
|
||||
{
|
||||
cl_uchar *dPtr = (cl_uchar*)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)dPtr[ i ];
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_SNORM_INT16:
|
||||
{
|
||||
cl_short *dPtr = (cl_short *)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)dPtr[ i ];
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_UNORM_INT16:
|
||||
{
|
||||
cl_ushort *dPtr = (cl_ushort *)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)dPtr[ i ];
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_SIGNED_INT16:
|
||||
{
|
||||
cl_short *dPtr = (cl_short *)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)dPtr[ i ];
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_UNSIGNED_INT16:
|
||||
{
|
||||
cl_ushort *dPtr = (cl_ushort *)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)dPtr[ i ];
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_HALF_FLOAT:
|
||||
{
|
||||
cl_ushort *dPtr = (cl_ushort *)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)convert_half_to_float( dPtr[ i ] );
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_SIGNED_INT32:
|
||||
{
|
||||
cl_int *dPtr = (cl_int *)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)dPtr[ i ];
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_UNSIGNED_INT32:
|
||||
{
|
||||
cl_uint *dPtr = (cl_uint *)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)dPtr[ i ];
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_UNORM_SHORT_565:
|
||||
{
|
||||
cl_ushort *dPtr = (cl_ushort*)ptr;
|
||||
tempData[ 0 ] = (T)( dPtr[ 0 ] >> 11 );
|
||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
|
||||
tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef OBSOLETE_FORMAT
|
||||
case CL_UNORM_SHORT_565_REV:
|
||||
{
|
||||
unsigned short *dPtr = (unsigned short *)ptr;
|
||||
tempData[ 2 ] = (T)( dPtr[ 0 ] >> 11 );
|
||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
|
||||
tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_UNORM_SHORT_555_REV:
|
||||
{
|
||||
unsigned short *dPtr = (unsigned short *)ptr;
|
||||
tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
|
||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
|
||||
tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_UNORM_INT_8888:
|
||||
{
|
||||
unsigned int *dPtr = (unsigned int *)ptr;
|
||||
tempData[ 3 ] = (T)( dPtr[ 0 ] >> 24 );
|
||||
tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
|
||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
|
||||
tempData[ 0 ] = (T)( dPtr[ 0 ] & 0xff );
|
||||
break;
|
||||
}
|
||||
case CL_UNORM_INT_8888_REV:
|
||||
{
|
||||
unsigned int *dPtr = (unsigned int *)ptr;
|
||||
tempData[ 0 ] = (T)( dPtr[ 0 ] >> 24 );
|
||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
|
||||
tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
|
||||
tempData[ 3 ] = (T)( dPtr[ 0 ] & 0xff );
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_UNORM_INT_101010_REV:
|
||||
{
|
||||
unsigned int *dPtr = (unsigned int *)ptr;
|
||||
tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
|
||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
|
||||
tempData[ 0 ] = (T)( dPtr[ 0 ] & 0x3ff );
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case CL_UNORM_SHORT_555:
|
||||
{
|
||||
cl_ushort *dPtr = (cl_ushort *)ptr;
|
||||
tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
|
||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
|
||||
tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_UNORM_INT_101010:
|
||||
{
|
||||
cl_uint *dPtr = (cl_uint *)ptr;
|
||||
tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
|
||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
|
||||
tempData[ 2 ] = (T)( dPtr[ 0 ] & 0x3ff );
|
||||
break;
|
||||
}
|
||||
|
||||
case CL_FLOAT:
|
||||
{
|
||||
cl_float *dPtr = (cl_float *)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)dPtr[ i ];
|
||||
break;
|
||||
}
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
{
|
||||
cl_float *dPtr = (cl_float *)ptr;
|
||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
||||
tempData[ i ] = (T)dPtr[ i ] + 0x4000;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
|
||||
outData[ 3 ] = 1;
|
||||
|
||||
if( format->image_channel_order == CL_A )
|
||||
{
|
||||
outData[ 3 ] = tempData[ 0 ];
|
||||
}
|
||||
else if( format->image_channel_order == CL_R )
|
||||
{
|
||||
outData[ 0 ] = tempData[ 0 ];
|
||||
}
|
||||
else if( format->image_channel_order == CL_Rx )
|
||||
{
|
||||
outData[ 0 ] = tempData[ 0 ];
|
||||
}
|
||||
else if( format->image_channel_order == CL_RA )
|
||||
{
|
||||
outData[ 0 ] = tempData[ 0 ];
|
||||
outData[ 3 ] = tempData[ 1 ];
|
||||
}
|
||||
else if( format->image_channel_order == CL_RG )
|
||||
{
|
||||
outData[ 0 ] = tempData[ 0 ];
|
||||
outData[ 1 ] = tempData[ 1 ];
|
||||
}
|
||||
else if( format->image_channel_order == CL_RGx )
|
||||
{
|
||||
outData[ 0 ] = tempData[ 0 ];
|
||||
outData[ 1 ] = tempData[ 1 ];
|
||||
}
|
||||
else if(( format->image_channel_order == CL_RGB ) || ( format->image_channel_order == CL_sRGB ))
|
||||
{
|
||||
outData[ 0 ] = tempData[ 0 ];
|
||||
outData[ 1 ] = tempData[ 1 ];
|
||||
outData[ 2 ] = tempData[ 2 ];
|
||||
}
|
||||
else if(( format->image_channel_order == CL_RGBx ) || ( format->image_channel_order == CL_sRGBx ))
|
||||
{
|
||||
outData[ 0 ] = tempData[ 0 ];
|
||||
outData[ 1 ] = tempData[ 1 ];
|
||||
outData[ 2 ] = tempData[ 2 ];
|
||||
outData[ 3 ] = 0;
|
||||
}
|
||||
else if(( format->image_channel_order == CL_RGBA ) || ( format->image_channel_order == CL_sRGBA ))
|
||||
{
|
||||
outData[ 0 ] = tempData[ 0 ];
|
||||
outData[ 1 ] = tempData[ 1 ];
|
||||
outData[ 2 ] = tempData[ 2 ];
|
||||
outData[ 3 ] = tempData[ 3 ];
|
||||
}
|
||||
else if( format->image_channel_order == CL_ARGB )
|
||||
{
|
||||
outData[ 0 ] = tempData[ 1 ];
|
||||
outData[ 1 ] = tempData[ 2 ];
|
||||
outData[ 2 ] = tempData[ 3 ];
|
||||
outData[ 3 ] = tempData[ 0 ];
|
||||
}
|
||||
else if(( format->image_channel_order == CL_BGRA ) || ( format->image_channel_order == CL_sBGRA ))
|
||||
{
|
||||
outData[ 0 ] = tempData[ 2 ];
|
||||
outData[ 1 ] = tempData[ 1 ];
|
||||
outData[ 2 ] = tempData[ 0 ];
|
||||
outData[ 3 ] = tempData[ 3 ];
|
||||
}
|
||||
else if( format->image_channel_order == CL_INTENSITY )
|
||||
{
|
||||
outData[ 1 ] = tempData[ 0 ];
|
||||
outData[ 2 ] = tempData[ 0 ];
|
||||
outData[ 3 ] = tempData[ 0 ];
|
||||
}
|
||||
else if( format->image_channel_order == CL_LUMINANCE )
|
||||
{
|
||||
outData[ 1 ] = tempData[ 0 ];
|
||||
outData[ 2 ] = tempData[ 0 ];
|
||||
}
|
||||
else if( format->image_channel_order == CL_DEPTH )
|
||||
{
|
||||
outData[ 0 ] = tempData[ 0 ];
|
||||
}
|
||||
#ifdef CL_1RGB_APPLE
|
||||
else if( format->image_channel_order == CL_1RGB_APPLE )
|
||||
{
|
||||
outData[ 0 ] = tempData[ 1 ];
|
||||
outData[ 1 ] = tempData[ 2 ];
|
||||
outData[ 2 ] = tempData[ 3 ];
|
||||
outData[ 3 ] = 0xff;
|
||||
}
|
||||
#endif
|
||||
#ifdef CL_BGR1_APPLE
|
||||
else if( format->image_channel_order == CL_BGR1_APPLE )
|
||||
{
|
||||
outData[ 0 ] = tempData[ 2 ];
|
||||
outData[ 1 ] = tempData[ 1 ];
|
||||
outData[ 2 ] = tempData[ 0 ];
|
||||
outData[ 3 ] = 0xff;
|
||||
}
|
||||
#endif
|
||||
else
|
||||
{
|
||||
log_error("Invalid format:");
|
||||
print_header(format, true);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
|
||||
int x, int y, int z, T *outData )
|
||||
{
|
||||
read_image_pixel<T>( imageData, imageInfo, x, y, z, outData, 0);
|
||||
}
|
||||
|
||||
// Stupid template rules
|
||||
bool get_integer_coords( float x, float y, float z,
|
||||
size_t width, size_t height, size_t depth,
|
||||
image_sampler_data *imageSampler, image_descriptor *imageInfo,
|
||||
int &outX, int &outY, int &outZ );
|
||||
bool get_integer_coords_offset( float x, float y, float z,
|
||||
float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
||||
size_t width, size_t height, size_t depth,
|
||||
image_sampler_data *imageSampler, image_descriptor *imageInfo,
|
||||
int &outX, int &outY, int &outZ );
|
||||
|
||||
|
||||
template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
|
||||
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
||||
image_sampler_data *imageSampler, T *outData, int lod )
|
||||
{
|
||||
int iX = 0, iY = 0, iZ = 0;
|
||||
|
||||
float max_w = imageInfo->width;
|
||||
float max_h;
|
||||
float max_d;
|
||||
|
||||
switch (imageInfo->type) {
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
max_h = imageInfo->arraySize;
|
||||
max_d = 0;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
max_h = imageInfo->height;
|
||||
max_d = imageInfo->arraySize;
|
||||
break;
|
||||
default:
|
||||
max_h = imageInfo->height;
|
||||
max_d = imageInfo->depth;
|
||||
break;
|
||||
}
|
||||
|
||||
if( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
|
||||
{
|
||||
switch (imageInfo->type) {
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
max_d = (float)((imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1);
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
max_h = (float)((imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1);
|
||||
break;
|
||||
default:
|
||||
;
|
||||
|
||||
}
|
||||
max_w = (float)((imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1);
|
||||
}
|
||||
get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, max_w, max_h, max_d, imageSampler, imageInfo, iX, iY, iZ );
|
||||
|
||||
read_image_pixel<T>( imageData, imageInfo, iX, iY, iZ, outData, lod );
|
||||
}
|
||||
|
||||
template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
|
||||
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
||||
image_sampler_data *imageSampler, T *outData)
|
||||
{
|
||||
sample_image_pixel_offset<T>( imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset, zAddressOffset,
|
||||
imageSampler, outData, 0);
|
||||
}
|
||||
|
||||
template <class T> void sample_image_pixel( void *imageData, image_descriptor *imageInfo,
|
||||
float x, float y, float z, image_sampler_data *imageSampler, T *outData )
|
||||
{
|
||||
return sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData);
|
||||
}
|
||||
|
||||
FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
|
||||
float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
|
||||
|
||||
FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
|
||||
float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
|
||||
|
||||
FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
|
||||
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
||||
image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
|
||||
FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
|
||||
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
||||
image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
|
||||
|
||||
|
||||
extern void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData );
|
||||
extern void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData );
|
||||
extern void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData );
|
||||
extern void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results, float *errors );
|
||||
|
||||
extern char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d, bool image2DFromBuffer = false );
|
||||
|
||||
// deprecated
|
||||
//extern bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue );
|
||||
|
||||
extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
|
||||
extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
|
||||
extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
|
||||
extern int issubnormal(float);
|
||||
|
||||
|
||||
#define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
|
||||
|
||||
static inline cl_uint abs_diff_uint( cl_uint x, cl_uint y )
|
||||
{
|
||||
return y > x ? y - x : x - y;
|
||||
}
|
||||
|
||||
static inline cl_uint abs_diff_int( cl_int x, cl_int y )
|
||||
{
|
||||
return (cl_uint) (y > x ? y - x : x - y);
|
||||
}
|
||||
|
||||
static inline cl_float relative_error( float test, float expected )
|
||||
{
|
||||
// 0-0/0 is 0 in this case, not NaN
|
||||
if( test == 0.0f && expected == 0.0f )
|
||||
return 0.0f;
|
||||
|
||||
return (test - expected) / expected;
|
||||
}
|
||||
|
||||
extern float random_float(float low, float high);
|
||||
|
||||
class CoordWalker
|
||||
{
|
||||
public:
|
||||
CoordWalker( void * coords, bool useFloats, size_t vecSize );
|
||||
~CoordWalker();
|
||||
|
||||
cl_float Get( size_t idx, size_t el );
|
||||
|
||||
protected:
|
||||
cl_float * mFloatCoords;
|
||||
cl_int * mIntCoords;
|
||||
size_t mVecSize;
|
||||
};
|
||||
|
||||
extern int DetectFloatToHalfRoundingMode( cl_command_queue ); // Returns CL_SUCCESS on success
|
||||
|
||||
int inline is_half_nan( cl_ushort half ){ return (half & 0x7fff) > 0x7c00; }
|
||||
|
||||
cl_ushort convert_float_to_half( cl_float f );
|
||||
cl_float convert_half_to_float( cl_ushort h );
|
||||
|
||||
extern double sRGBmap(float fc);
|
||||
|
||||
#endif // _imageHelpers_h
|
||||
865
test_common/harness/kernelHelpers.c
Normal file
865
test_common/harness/kernelHelpers.c
Normal file
@@ -0,0 +1,865 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "kernelHelpers.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "imageHelpers.h"
|
||||
#include "typeWrappers.h"
|
||||
|
||||
#if defined(__MINGW32__)
|
||||
#include "mingw_compat.h"
|
||||
#endif
|
||||
|
||||
int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName )
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
/* Create the program object from source */
|
||||
*outProgram = clCreateProgramWithSource( context, numKernelLines, kernelProgram, NULL, &error );
|
||||
if( *outProgram == NULL || error != CL_SUCCESS)
|
||||
{
|
||||
print_error( error, "clCreateProgramWithSource failed" );
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Compile the program */
|
||||
int buildProgramFailed = 0;
|
||||
int printedSource = 0;
|
||||
error = clBuildProgram( *outProgram, 0, NULL, NULL, NULL, NULL );
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
unsigned int i;
|
||||
print_error(error, "clBuildProgram failed");
|
||||
buildProgramFailed = 1;
|
||||
printedSource = 1;
|
||||
log_error( "Original source is: ------------\n" );
|
||||
for( i = 0; i < numKernelLines; i++ )
|
||||
log_error( "%s", kernelProgram[ i ] );
|
||||
}
|
||||
|
||||
// Verify the build status on all devices
|
||||
cl_uint deviceCount = 0;
|
||||
error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
if (deviceCount == 0) {
|
||||
log_error("No devices found for program.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_device_id *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
|
||||
if( NULL == devices )
|
||||
return -1;
|
||||
BufferOwningPtr<cl_device_id> devicesBuf(devices);
|
||||
|
||||
memset( devices, 0, deviceCount * sizeof( cl_device_id ));
|
||||
error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
cl_uint z;
|
||||
for( z = 0; z < deviceCount; z++ )
|
||||
{
|
||||
char deviceName[4096] = "";
|
||||
error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
|
||||
if (error != CL_SUCCESS || deviceName[0] == '\0') {
|
||||
log_error("Device \"%d\" failed to return a name\n", z);
|
||||
print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
|
||||
}
|
||||
|
||||
cl_build_status buildStatus;
|
||||
error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
|
||||
char log[10240] = "";
|
||||
if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
|
||||
|
||||
char statusString[64] = "";
|
||||
if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
|
||||
sprintf(statusString, "CL_BUILD_SUCCESS");
|
||||
else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
|
||||
sprintf(statusString, "CL_BUILD_NONE");
|
||||
else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
|
||||
sprintf(statusString, "CL_BUILD_ERROR");
|
||||
else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
|
||||
sprintf(statusString, "CL_BUILD_IN_PROGRESS");
|
||||
else
|
||||
sprintf(statusString, "UNKNOWN (%d)", buildStatus);
|
||||
|
||||
if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
|
||||
error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
|
||||
if (error != CL_SUCCESS || log[0]=='\0'){
|
||||
log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
|
||||
if (error) {
|
||||
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
||||
return error;
|
||||
} else {
|
||||
log_error("clGetProgramBuildInfo returned an empty log.\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
// In this case we've already printed out the code above.
|
||||
if (!printedSource)
|
||||
{
|
||||
unsigned int i;
|
||||
log_error( "Original source is: ------------\n" );
|
||||
for( i = 0; i < numKernelLines; i++ )
|
||||
log_error( "%s", kernelProgram[ i ] );
|
||||
printedSource = 1;
|
||||
}
|
||||
log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
|
||||
log_error( "%s\n", log );
|
||||
log_error( "\n----------\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* And create a kernel from it */
|
||||
*outKernel = clCreateKernel( *outProgram, kernelName, &error );
|
||||
if( *outKernel == NULL || error != CL_SUCCESS)
|
||||
{
|
||||
print_error( error, "Unable to create kernel" );
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int create_single_kernel_helper_with_build_options( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines,
|
||||
const char **kernelProgram, const char *kernelName, const char *buildOptions )
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
/* Create the program object from source */
|
||||
*outProgram = clCreateProgramWithSource( context, numKernelLines, kernelProgram, NULL, &error );
|
||||
if( *outProgram == NULL || error != CL_SUCCESS)
|
||||
{
|
||||
print_error( error, "clCreateProgramWithSource failed" );
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Compile the program */
|
||||
int buildProgramFailed = 0;
|
||||
int printedSource = 0;
|
||||
error = clBuildProgram( *outProgram, 0, NULL, buildOptions, NULL, NULL );
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
unsigned int i;
|
||||
print_error(error, "clBuildProgram failed");
|
||||
buildProgramFailed = 1;
|
||||
printedSource = 1;
|
||||
log_error( "Original source is: ------------\n" );
|
||||
for( i = 0; i < numKernelLines; i++ )
|
||||
log_error( "%s", kernelProgram[ i ] );
|
||||
}
|
||||
|
||||
// Verify the build status on all devices
|
||||
cl_uint deviceCount = 0;
|
||||
error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
if (deviceCount == 0) {
|
||||
log_error("No devices found for program.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_device_id *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
|
||||
if( NULL == devices )
|
||||
return -1;
|
||||
BufferOwningPtr<cl_device_id> devicesBuf(devices);
|
||||
|
||||
memset( devices, 0, deviceCount * sizeof( cl_device_id ));
|
||||
error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
cl_uint z;
|
||||
for( z = 0; z < deviceCount; z++ )
|
||||
{
|
||||
char deviceName[4096] = "";
|
||||
error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
|
||||
if (error != CL_SUCCESS || deviceName[0] == '\0') {
|
||||
log_error("Device \"%d\" failed to return a name\n", z);
|
||||
print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
|
||||
}
|
||||
|
||||
cl_build_status buildStatus;
|
||||
error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
|
||||
char log[10240] = "";
|
||||
if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
|
||||
|
||||
char statusString[64] = "";
|
||||
if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
|
||||
sprintf(statusString, "CL_BUILD_SUCCESS");
|
||||
else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
|
||||
sprintf(statusString, "CL_BUILD_NONE");
|
||||
else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
|
||||
sprintf(statusString, "CL_BUILD_ERROR");
|
||||
else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
|
||||
sprintf(statusString, "CL_BUILD_IN_PROGRESS");
|
||||
else
|
||||
sprintf(statusString, "UNKNOWN (%d)", buildStatus);
|
||||
|
||||
if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
|
||||
error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
|
||||
if (error != CL_SUCCESS || log[0]=='\0'){
|
||||
log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
|
||||
if (error) {
|
||||
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
||||
return error;
|
||||
} else {
|
||||
log_error("clGetProgramBuildInfo returned an empty log.\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
// In this case we've already printed out the code above.
|
||||
if (!printedSource)
|
||||
{
|
||||
unsigned int i;
|
||||
log_error( "Original source is: ------------\n" );
|
||||
for( i = 0; i < numKernelLines; i++ )
|
||||
log_error( "%s", kernelProgram[ i ] );
|
||||
printedSource = 1;
|
||||
}
|
||||
log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
|
||||
log_error( "%s\n", log );
|
||||
log_error( "\n----------\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* And create a kernel from it */
|
||||
*outKernel = clCreateKernel( *outProgram, kernelName, &error );
|
||||
if( *outKernel == NULL || error != CL_SUCCESS)
|
||||
{
|
||||
print_error( error, "Unable to create kernel" );
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int get_device_version( cl_device_id id, size_t* major, size_t* minor)
|
||||
{
|
||||
cl_char buffer[ 4098 ];
|
||||
size_t length;
|
||||
|
||||
// Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
|
||||
cl_int error = clGetDeviceInfo( id, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
|
||||
test_error( error, "Unable to get device version string" );
|
||||
|
||||
char *p1 = (char *)buffer + strlen( "OpenCL " );
|
||||
char *p2;
|
||||
while( *p1 == ' ' )
|
||||
p1++;
|
||||
*major = strtol( p1, &p2, 10 );
|
||||
error = *p2 != '.';
|
||||
test_error(error, "ERROR: Version number must contain a decimal point!");
|
||||
*minor = strtol( ++p2, NULL, 10 );
|
||||
return error;
|
||||
}
|
||||
|
||||
int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
|
||||
{
|
||||
cl_device_id *devices;
|
||||
size_t size, maxCommonSize = 0;
|
||||
int numDevices, i, j, error;
|
||||
cl_uint numDims;
|
||||
size_t outSize;
|
||||
size_t sizeLimit[]={1,1,1};
|
||||
|
||||
|
||||
/* Assume fewer than 16 devices will be returned */
|
||||
error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
|
||||
test_error( error, "Unable to obtain list of devices size for context" );
|
||||
devices = (cl_device_id *)malloc(outSize);
|
||||
BufferOwningPtr<cl_device_id> devicesBuf(devices);
|
||||
|
||||
error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
|
||||
test_error( error, "Unable to obtain list of devices for context" );
|
||||
|
||||
numDevices = (int)( outSize / sizeof( cl_device_id ) );
|
||||
|
||||
for( i = 0; i < numDevices; i++ )
|
||||
{
|
||||
error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
|
||||
test_error( error, "Unable to obtain max work group size for device" );
|
||||
if( size < maxCommonSize || maxCommonSize == 0)
|
||||
maxCommonSize = size;
|
||||
|
||||
error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
|
||||
test_error( error, "Unable to obtain max work group size for device and kernel combo" );
|
||||
if( size < maxCommonSize || maxCommonSize == 0)
|
||||
maxCommonSize = size;
|
||||
|
||||
error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
|
||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
|
||||
sizeLimit[0] = 1;
|
||||
error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
|
||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
if (outLimits != NULL)
|
||||
{
|
||||
if (i == 0) {
|
||||
for (j=0; j<3; j++)
|
||||
outLimits[j] = sizeLimit[j];
|
||||
} else {
|
||||
for (j=0; j<(int)numDims; j++) {
|
||||
if (sizeLimit[j] < outLimits[j])
|
||||
outLimits[j] = sizeLimit[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*outMaxSize = (unsigned int)maxCommonSize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize )
|
||||
{
|
||||
cl_uint maxDim;
|
||||
size_t maxWgSize;
|
||||
size_t *maxWgSizePerDim;
|
||||
int error;
|
||||
|
||||
error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( size_t ), &maxWgSize, NULL );
|
||||
test_error( error, "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( cl_uint ), &maxDim, NULL );
|
||||
test_error( error, "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed" );
|
||||
maxWgSizePerDim = (size_t*)malloc( maxDim * sizeof( size_t ) );
|
||||
if( !maxWgSizePerDim )
|
||||
{
|
||||
log_error( "Unable to allocate maxWgSizePerDim\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, maxDim * sizeof( size_t ), maxWgSizePerDim, NULL );
|
||||
if( error != CL_SUCCESS)
|
||||
{
|
||||
log_error( "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n" );
|
||||
free( maxWgSizePerDim );
|
||||
return error;
|
||||
}
|
||||
|
||||
// "maxWgSize" is limited to that of the first dimension.
|
||||
if( maxWgSize > maxWgSizePerDim[0] )
|
||||
{
|
||||
maxWgSize = maxWgSizePerDim[0];
|
||||
}
|
||||
|
||||
free( maxWgSizePerDim );
|
||||
|
||||
*outSize = maxWgSize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
|
||||
size_t globalThreadSize, size_t *outMaxSize )
|
||||
{
|
||||
size_t sizeLimit[3];
|
||||
int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
|
||||
/* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
|
||||
/* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
|
||||
the modulo test will succeed and break the loop anyway */
|
||||
for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
|
||||
size_t *globalThreadSizes, size_t *outMaxSizes )
|
||||
{
|
||||
size_t sizeLimit[3];
|
||||
size_t maxSize;
|
||||
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
|
||||
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
|
||||
sizes */
|
||||
|
||||
/* Simple case */
|
||||
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
|
||||
{
|
||||
if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1]) {
|
||||
outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
|
||||
outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t remainingSize, sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
int i, j;
|
||||
for (i=0 ; i<2; i++) {
|
||||
if (globalThreadSizes[i] > remainingSize)
|
||||
sizeForThisOne = remainingSize;
|
||||
else
|
||||
sizeForThisOne = globalThreadSizes[i];
|
||||
for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
|
||||
outMaxSizes[i] = sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
for (j=0; j<=i; j++)
|
||||
remainingSize /=outMaxSizes[j];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
|
||||
size_t *globalThreadSizes, size_t *outMaxSizes )
|
||||
{
|
||||
size_t sizeLimit[3];
|
||||
size_t maxSize;
|
||||
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
|
||||
sizes */
|
||||
|
||||
/* Simple case */
|
||||
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
|
||||
{
|
||||
if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
|
||||
outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
|
||||
outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
|
||||
outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t remainingSize, sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
int i, j;
|
||||
for (i=0 ; i<3; i++) {
|
||||
if (globalThreadSizes[i] > remainingSize)
|
||||
sizeForThisOne = remainingSize;
|
||||
else
|
||||
sizeForThisOne = globalThreadSizes[i];
|
||||
for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
|
||||
outMaxSizes[i] = sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
for (j=0; j<=i; j++)
|
||||
remainingSize /=outMaxSizes[j];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper to determine if an extension is supported by a device */
|
||||
int is_extension_available( cl_device_id device, const char *extensionName )
|
||||
{
|
||||
char *extString;
|
||||
size_t size = 0;
|
||||
int err;
|
||||
int result = 0;
|
||||
|
||||
if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &size) ))
|
||||
{
|
||||
log_error( "Error: failed to determine size of device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( 0 == size )
|
||||
return 0;
|
||||
|
||||
extString = (char*) malloc( size );
|
||||
|
||||
if( NULL == extString )
|
||||
{
|
||||
log_error( "Error: unable to allocate %ld byte buffer for extension string at %s:%d (err = %d)\n", size, __FILE__, __LINE__, err );
|
||||
return 0;
|
||||
}
|
||||
BufferOwningPtr<char> extStringBuf(extString);
|
||||
|
||||
if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, size, extString, NULL) ))
|
||||
{
|
||||
log_error( "Error: failed to obtain device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( strstr( extString, extensionName ) )
|
||||
result = 1;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Helper to determine if a device supports an image format */
|
||||
int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt )
|
||||
{
|
||||
cl_image_format *list;
|
||||
cl_uint count = 0;
|
||||
cl_int err = clGetSupportedImageFormats( context, flags, image_type, 128, NULL, &count );
|
||||
if( count == 0 )
|
||||
return 0;
|
||||
|
||||
list = (cl_image_format*) malloc( count * sizeof( cl_image_format ) );
|
||||
if( NULL == list )
|
||||
{
|
||||
log_error( "Error: unable to allocate %ld byte buffer for image format list at %s:%d (err = %d)\n", count * sizeof( cl_image_format ), __FILE__, __LINE__, err );
|
||||
return 0;
|
||||
}
|
||||
BufferOwningPtr<cl_image_format> listBuf(list);
|
||||
|
||||
|
||||
cl_int error = clGetSupportedImageFormats( context, flags, image_type, count, list, NULL );
|
||||
if( error )
|
||||
{
|
||||
log_error( "Error: failed to obtain supported image type list at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
|
||||
return 0;
|
||||
}
|
||||
|
||||
// iterate looking for a match.
|
||||
cl_uint i;
|
||||
for( i = 0; i < count; i++ )
|
||||
{
|
||||
if( fmt->image_channel_data_type == list[ i ].image_channel_data_type &&
|
||||
fmt->image_channel_order == list[ i ].image_channel_order )
|
||||
break;
|
||||
}
|
||||
|
||||
return ( i < count ) ? 1 : 0;
|
||||
}
|
||||
|
||||
size_t get_pixel_bytes( const cl_image_format *fmt );
|
||||
size_t get_pixel_bytes( const cl_image_format *fmt )
|
||||
{
|
||||
size_t chanCount;
|
||||
switch( fmt->image_channel_order )
|
||||
{
|
||||
case CL_R:
|
||||
case CL_A:
|
||||
case CL_Rx:
|
||||
case CL_INTENSITY:
|
||||
case CL_LUMINANCE:
|
||||
case CL_DEPTH:
|
||||
chanCount = 1;
|
||||
break;
|
||||
case CL_RG:
|
||||
case CL_RA:
|
||||
case CL_RGx:
|
||||
chanCount = 2;
|
||||
break;
|
||||
case CL_RGB:
|
||||
case CL_RGBx:
|
||||
case CL_sRGB:
|
||||
case CL_sRGBx:
|
||||
chanCount = 3;
|
||||
break;
|
||||
case CL_RGBA:
|
||||
case CL_ARGB:
|
||||
case CL_BGRA:
|
||||
case CL_sBGRA:
|
||||
case CL_sRGBA:
|
||||
#ifdef CL_1RGB_APPLE
|
||||
case CL_1RGB_APPLE:
|
||||
#endif
|
||||
#ifdef CL_BGR1_APPLE
|
||||
case CL_BGR1_APPLE:
|
||||
#endif
|
||||
chanCount = 4;
|
||||
break;
|
||||
default:
|
||||
log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__ );
|
||||
abort();
|
||||
break;
|
||||
}
|
||||
|
||||
switch( fmt->image_channel_data_type )
|
||||
{
|
||||
case CL_UNORM_SHORT_565:
|
||||
case CL_UNORM_SHORT_555:
|
||||
return 2;
|
||||
|
||||
case CL_UNORM_INT_101010:
|
||||
return 4;
|
||||
|
||||
case CL_SNORM_INT8:
|
||||
case CL_UNORM_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
return chanCount;
|
||||
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNORM_INT16:
|
||||
case CL_HALF_FLOAT:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
return chanCount * 2;
|
||||
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_UNSIGNED_INT32:
|
||||
case CL_FLOAT:
|
||||
return chanCount * 4;
|
||||
|
||||
default:
|
||||
log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__ );
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int verifyImageSupport( cl_device_id device )
|
||||
{
|
||||
if( checkForImageSupport( device ) )
|
||||
{
|
||||
log_error( "ERROR: Device does not supported images as required by this test!\n" );
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int checkForImageSupport( cl_device_id device )
|
||||
{
|
||||
cl_uint i;
|
||||
int error;
|
||||
|
||||
|
||||
/* Check the device props to see if images are supported at all first */
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
|
||||
test_error( error, "Unable to query device for image support" );
|
||||
if( i == 0 )
|
||||
{
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* So our support is good */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int checkFor3DImageSupport( cl_device_id device )
|
||||
{
|
||||
cl_uint i;
|
||||
int error;
|
||||
|
||||
/* Check the device props to see if images are supported at all first */
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
|
||||
test_error( error, "Unable to query device for image support" );
|
||||
if( i == 0 )
|
||||
{
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
char profile[128];
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
|
||||
test_error( error, "Unable to query device for CL_DEVICE_PROFILE" );
|
||||
if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ) )
|
||||
{
|
||||
size_t width = -1L;
|
||||
size_t height = -1L;
|
||||
size_t depth = -1L;
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" );
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT" );
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" );
|
||||
|
||||
if( 0 == (height | width | depth ))
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* So our support is good */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void * align_malloc(size_t size, size_t alignment)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
return _aligned_malloc(size, alignment);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
void * ptr = NULL;
|
||||
// alignemnt must be a power of two and multiple of sizeof(void *).
|
||||
if ( alignment < sizeof( void * ) )
|
||||
{
|
||||
alignment = sizeof( void * );
|
||||
}
|
||||
#if defined(__ANDROID__)
|
||||
ptr = memalign(alignment, size);
|
||||
if ( ptr )
|
||||
return ptr;
|
||||
#else
|
||||
if (0 == posix_memalign(&ptr, alignment, size))
|
||||
return ptr;
|
||||
#endif
|
||||
return NULL;
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_malloc(size, alignment);
|
||||
#else
|
||||
#error "Please add support OS for aligned malloc"
|
||||
#endif
|
||||
}
|
||||
|
||||
void align_free(void * ptr)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
_aligned_free(ptr);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
return free(ptr);
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_free(ptr);
|
||||
#else
|
||||
#error "Please add support OS for aligned free"
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t get_min_alignment(cl_context context)
|
||||
{
|
||||
static cl_uint align_size = 0;
|
||||
|
||||
if( 0 == align_size )
|
||||
{
|
||||
cl_device_id * devices;
|
||||
size_t devices_size = 0;
|
||||
cl_uint result = 0;
|
||||
cl_int error;
|
||||
int i;
|
||||
|
||||
error = clGetContextInfo (context,
|
||||
CL_CONTEXT_DEVICES,
|
||||
0,
|
||||
NULL,
|
||||
&devices_size);
|
||||
test_error_ret(error, "clGetContextInfo failed", 0);
|
||||
|
||||
devices = (cl_device_id*)malloc(devices_size);
|
||||
if (devices == NULL) {
|
||||
print_error( error, "malloc failed" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = clGetContextInfo (context,
|
||||
CL_CONTEXT_DEVICES,
|
||||
devices_size,
|
||||
(void*)devices,
|
||||
NULL);
|
||||
test_error_ret(error, "clGetContextInfo failed", 0);
|
||||
|
||||
for (i = 0; i < (int)(devices_size/sizeof(cl_device_id)); i++)
|
||||
{
|
||||
cl_uint alignment = 0;
|
||||
|
||||
error = clGetDeviceInfo (devices[i],
|
||||
CL_DEVICE_MEM_BASE_ADDR_ALIGN,
|
||||
sizeof(cl_uint),
|
||||
(void*)&alignment,
|
||||
NULL);
|
||||
|
||||
if (error == CL_SUCCESS)
|
||||
{
|
||||
alignment >>= 3; // convert bits to bytes
|
||||
result = (alignment > result) ? alignment : result;
|
||||
}
|
||||
else
|
||||
print_error( error, "clGetDeviceInfo failed" );
|
||||
}
|
||||
|
||||
align_size = result;
|
||||
free(devices);
|
||||
}
|
||||
|
||||
return align_size;
|
||||
}
|
||||
|
||||
cl_device_fp_config get_default_rounding_mode( cl_device_id device )
|
||||
{
|
||||
char profileStr[128] = "";
|
||||
cl_device_fp_config single = 0;
|
||||
int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
|
||||
if( error )
|
||||
test_error_ret( error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0 );
|
||||
|
||||
if( single & CL_FP_ROUND_TO_NEAREST )
|
||||
return CL_FP_ROUND_TO_NEAREST;
|
||||
|
||||
if( 0 == (single & CL_FP_ROUND_TO_ZERO) )
|
||||
test_error_ret( -1, "FAILURE: device must support either CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0 );
|
||||
|
||||
// Make sure we are an embedded device before allowing a pass
|
||||
if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL ) ))
|
||||
test_error_ret( error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0 );
|
||||
|
||||
if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
|
||||
test_error_ret( error, "FAILURE: non-EMBEDDED_PROFILE devices must support CL_FP_ROUND_TO_NEAREST", 0 );
|
||||
|
||||
return CL_FP_ROUND_TO_ZERO;
|
||||
}
|
||||
|
||||
int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
|
||||
{
|
||||
cl_command_queue_properties realProps;
|
||||
cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof( realProps ), &realProps, NULL );
|
||||
test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
|
||||
|
||||
return ( realProps & prop ) ? 1 : 0;
|
||||
}
|
||||
|
||||
int printDeviceHeader( cl_device_id device )
|
||||
{
|
||||
char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
|
||||
int error;
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_NAME for device" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
|
||||
|
||||
log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
|
||||
deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
|
||||
( error == CL_SUCCESS ) ? cLangVersion : "" );
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
134
test_common/harness/kernelHelpers.h
Normal file
134
test_common/harness/kernelHelpers.h
Normal file
@@ -0,0 +1,134 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _kernelHelpers_h
|
||||
#define _kernelHelpers_h
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined (__MINGW32__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
/*
|
||||
* The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
|
||||
*
|
||||
* const char *source = {
|
||||
* INIT_OPENCL_DEBUG_INFO
|
||||
* "__kernel void foo( int x )\n"
|
||||
* "{\n"
|
||||
* " ...\n"
|
||||
* "}\n"
|
||||
* };
|
||||
*/
|
||||
#define INIT_OPENCL_DEBUG_INFO SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
|
||||
#define SET_OPENCL_LINE_INFO(_line, _file) "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
|
||||
#ifndef STRINGIFY_VALUE
|
||||
#define STRINGIFY_VALUE(_x) STRINGIFY(_x)
|
||||
#endif
|
||||
#ifndef STRINGIFY
|
||||
#define STRINGIFY(_x) #_x
|
||||
#endif
|
||||
|
||||
/* Helper that creates a single program and kernel from a single-kernel program source */
|
||||
extern int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName );
|
||||
|
||||
extern int create_single_kernel_helper_with_build_options( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines,
|
||||
const char **kernelProgram, const char *kernelName, const char *buildOptions );
|
||||
|
||||
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
|
||||
extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
|
||||
|
||||
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
|
||||
extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
|
||||
|
||||
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
|
||||
extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
|
||||
|
||||
/* Helper to get major/minor number for a device */
|
||||
extern int get_device_version( cl_device_id id, size_t* major, size_t* minor);
|
||||
|
||||
/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
|
||||
extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
|
||||
|
||||
/* Helper to obtain the biggest allowed 1D work group size on a given device */
|
||||
extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize );
|
||||
|
||||
/* Helper to determine if an extension is supported by a device */
|
||||
extern int is_extension_available( cl_device_id device, const char *extensionName );
|
||||
|
||||
/* Helper to determine if a device supports an image format */
|
||||
extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
|
||||
|
||||
/* Helper to get pixel size for a pixel format */
|
||||
size_t get_pixel_bytes( const cl_image_format *fmt );
|
||||
|
||||
/* Verify the given device supports images. 0 means you're good to go, otherwise an error */
|
||||
extern int verifyImageSupport( cl_device_id device );
|
||||
|
||||
/* Checks that the given device supports images. Same as verify, but doesn't print an error */
|
||||
extern int checkForImageSupport( cl_device_id device );
|
||||
extern int checkFor3DImageSupport( cl_device_id device );
|
||||
|
||||
/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
|
||||
extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
|
||||
|
||||
/* Helper for aligned memory allocation */
|
||||
void * align_malloc(size_t size, size_t alignment);
|
||||
void align_free(void *);
|
||||
|
||||
/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
|
||||
size_t get_min_alignment(cl_context context);
|
||||
|
||||
/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
|
||||
cl_device_fp_config get_default_rounding_mode( cl_device_id device );
|
||||
|
||||
#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) \
|
||||
if( checkForImageSupport( device ) ) \
|
||||
{ \
|
||||
log_info( "\n\tNote: device does not support images. Skipping test...\n" ); \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device ) \
|
||||
if( checkFor3DImageSupport( device ) ) \
|
||||
{ \
|
||||
log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" ); \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
/* Prints out the standard device header for all tests given the device to print for */
|
||||
extern int printDeviceHeader( cl_device_id device );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // _kernelHelpers_h
|
||||
59
test_common/harness/mingw_compat.c
Normal file
59
test_common/harness/mingw_compat.c
Normal file
@@ -0,0 +1,59 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#if defined(__MINGW32__)
|
||||
|
||||
#include "mingw_compat.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
//This function is unavailable on various mingw compilers,
|
||||
//especially 64 bit so implementing it here
|
||||
const char *basename_dot=".";
|
||||
char*
|
||||
basename(char *path)
|
||||
{
|
||||
char *p = path, *b = NULL;
|
||||
int len = strlen(path);
|
||||
|
||||
if (path == NULL) {
|
||||
return (char*)basename_dot;
|
||||
}
|
||||
|
||||
// Not absolute path on windows
|
||||
if (path[1] != ':') {
|
||||
return path;
|
||||
}
|
||||
|
||||
// Trim trailing path seperators
|
||||
if (path[len - 1] == '\\' ||
|
||||
path[len - 1] == '/' ) {
|
||||
len--;
|
||||
path[len] = '\0';
|
||||
}
|
||||
|
||||
while (len) {
|
||||
while((*p != '\\' || *p != '/') && len) {
|
||||
p++;
|
||||
len--;
|
||||
}
|
||||
p++;
|
||||
b = p;
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
#endif
|
||||
31
test_common/harness/mingw_compat.h
Normal file
31
test_common/harness/mingw_compat.h
Normal file
@@ -0,0 +1,31 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef MINGW_COMPAT_H
|
||||
#define MINGW_COMPAT_H
|
||||
|
||||
#if defined(__MINGW32__)
|
||||
char *basename(char *path);
|
||||
#include <malloc.h>
|
||||
|
||||
#if defined(__MINGW64__)
|
||||
//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
|
||||
#define __mingw_aligned_malloc _aligned_malloc
|
||||
#define __mingw_aligned_free _aligned_free
|
||||
#include <stddef.h>
|
||||
#endif //(__MINGW64__)
|
||||
|
||||
#endif //(__MINGW32__)
|
||||
#endif // MINGW_COMPAT_H
|
||||
772
test_common/harness/msvc9.c
Normal file
772
test_common/harness/msvc9.c
Normal file
@@ -0,0 +1,772 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "compat.h"
|
||||
|
||||
#if defined ( _MSC_VER )
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#if ! defined( __INTEL_COMPILER )
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// rint, rintf
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
float copysignf( float x, float y )
|
||||
{
|
||||
union{ cl_uint u; float f; }ux, uy;
|
||||
|
||||
ux.f = x;
|
||||
uy.f = y;
|
||||
|
||||
ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
|
||||
|
||||
return ux.f;
|
||||
}
|
||||
|
||||
double copysign( double x, double y )
|
||||
{
|
||||
union{ cl_ulong u; double f; }ux, uy;
|
||||
|
||||
ux.f = x;
|
||||
uy.f = y;
|
||||
|
||||
ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
|
||||
|
||||
return ux.f;
|
||||
}
|
||||
|
||||
long double copysignl( long double x, long double y )
|
||||
{
|
||||
union
|
||||
{
|
||||
long double f;
|
||||
struct{ cl_ulong m; cl_ushort sexp; }u;
|
||||
}ux, uy;
|
||||
|
||||
ux.f = x;
|
||||
uy.f = y;
|
||||
|
||||
ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
|
||||
|
||||
return ux.f;
|
||||
}
|
||||
|
||||
float rintf(float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
||||
{
|
||||
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
|
||||
float rounded = x + magic;
|
||||
rounded -= magic;
|
||||
x = copysignf( rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
double rint(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
|
||||
{
|
||||
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
|
||||
double rounded = x + magic;
|
||||
rounded -= magic;
|
||||
x = copysign( rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
long double rintl(long double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
|
||||
{
|
||||
long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
|
||||
long double rounded = x + magic;
|
||||
rounded -= magic;
|
||||
x = copysignl( rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// ilogb, ilogbf, ilogbl
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
#ifndef FP_ILOGB0
|
||||
#define FP_ILOGB0 INT_MIN
|
||||
#endif
|
||||
|
||||
#ifndef FP_ILOGBNAN
|
||||
#define FP_ILOGBNAN INT_MIN
|
||||
#endif
|
||||
|
||||
int ilogb (double x)
|
||||
{
|
||||
union{ double f; cl_ulong u;} u;
|
||||
u.f = x;
|
||||
|
||||
cl_ulong absx = u.u & CL_LONG_MAX;
|
||||
if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
|
||||
{
|
||||
switch( absx )
|
||||
{
|
||||
case 0:
|
||||
return FP_ILOGB0;
|
||||
case 0x7ff0000000000000ULL:
|
||||
return INT_MAX;
|
||||
default:
|
||||
if( absx > 0x7ff0000000000000ULL )
|
||||
return FP_ILOGBNAN;
|
||||
|
||||
// subnormal
|
||||
u.u = absx | 0x3ff0000000000000ULL;
|
||||
u.f -= 1.0;
|
||||
return (u.u >> 52) - (1023 + 1022);
|
||||
}
|
||||
}
|
||||
|
||||
return (absx >> 52) - 1023;
|
||||
}
|
||||
|
||||
|
||||
int ilogbf (float x)
|
||||
{
|
||||
union{ float f; cl_uint u;} u;
|
||||
u.f = x;
|
||||
|
||||
cl_uint absx = u.u & 0x7fffffff;
|
||||
if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
|
||||
{
|
||||
switch( absx )
|
||||
{
|
||||
case 0:
|
||||
return FP_ILOGB0;
|
||||
case 0x7f800000U:
|
||||
return INT_MAX;
|
||||
default:
|
||||
if( absx > 0x7f800000 )
|
||||
return FP_ILOGBNAN;
|
||||
|
||||
// subnormal
|
||||
u.u = absx | 0x3f800000U;
|
||||
u.f -= 1.0f;
|
||||
return (u.u >> 23) - (127 + 126);
|
||||
}
|
||||
}
|
||||
|
||||
return (absx >> 23) - 127;
|
||||
}
|
||||
|
||||
int ilogbl (long double x)
|
||||
{
|
||||
union
|
||||
{
|
||||
long double f;
|
||||
struct{ cl_ulong m; cl_ushort sexp; }u;
|
||||
} u;
|
||||
u.f = x;
|
||||
|
||||
int exp = u.u.sexp & 0x7fff;
|
||||
if( 0 == exp )
|
||||
{
|
||||
if( 0 == u.u.m )
|
||||
return FP_ILOGB0;
|
||||
|
||||
//subnormal
|
||||
u.u.sexp = 0x3fff;
|
||||
u.f -= 1.0f;
|
||||
exp = u.u.sexp & 0x7fff;
|
||||
|
||||
return exp - (0x3fff + 0x3ffe);
|
||||
}
|
||||
else if( 0x7fff == exp )
|
||||
{
|
||||
if( u.u.m & CL_LONG_MAX )
|
||||
return FP_ILOGBNAN;
|
||||
|
||||
return INT_MAX;
|
||||
}
|
||||
|
||||
return exp - 0x3fff;
|
||||
}
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// fmax, fmin, fmaxf, fminf
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
static void GET_BITS_SP32(float fx, unsigned int* ux)
|
||||
{
|
||||
volatile union {float f; unsigned int u;} _bitsy;
|
||||
_bitsy.f = (fx);
|
||||
*ux = _bitsy.u;
|
||||
}
|
||||
/* static void GET_BITS_SP32(float fx, unsigned int* ux) */
|
||||
/* { */
|
||||
/* volatile union {float f; unsigned int i;} _bitsy; */
|
||||
/* _bitsy.f = (fx); */
|
||||
/* *ux = _bitsy.i; */
|
||||
/* } */
|
||||
static void PUT_BITS_SP32(unsigned int ux, float* fx)
|
||||
{
|
||||
volatile union {float f; unsigned int u;} _bitsy;
|
||||
_bitsy.u = (ux);
|
||||
*fx = _bitsy.f;
|
||||
}
|
||||
/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
|
||||
/* { */
|
||||
/* volatile union {float f; unsigned int i;} _bitsy; */
|
||||
/* _bitsy.i = (ux); */
|
||||
/* *fx = _bitsy.f; */
|
||||
/* } */
|
||||
static void GET_BITS_DP64(double dx, unsigned __int64* lx)
|
||||
{
|
||||
volatile union {double d; unsigned __int64 l;} _bitsy;
|
||||
_bitsy.d = (dx);
|
||||
*lx = _bitsy.l;
|
||||
}
|
||||
static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
|
||||
{
|
||||
volatile union {double d; unsigned __int64 l;} _bitsy;
|
||||
_bitsy.l = (lx);
|
||||
*dx = _bitsy.d;
|
||||
}
|
||||
|
||||
#if 0
|
||||
int SIGNBIT_DP64(double x )
|
||||
{
|
||||
int hx;
|
||||
_GET_HIGH_WORD(hx,x);
|
||||
return((hx>>31));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* fmax(x, y) returns the larger (more positive) of x and y.
|
||||
NaNs are treated as missing values: if one argument is NaN,
|
||||
the other argument is returned. If both arguments are NaN,
|
||||
the first argument is returned. */
|
||||
|
||||
/* This works so long as the compiler knows that (x != x) means
|
||||
that x is NaN; gcc does. */
|
||||
double fmax(double x, double y)
|
||||
{
|
||||
if( isnan(y) )
|
||||
return x;
|
||||
|
||||
return x >= y ? x : y;
|
||||
}
|
||||
|
||||
|
||||
/* fmin(x, y) returns the smaller (more negative) of x and y.
|
||||
NaNs are treated as missing values: if one argument is NaN,
|
||||
the other argument is returned. If both arguments are NaN,
|
||||
the first argument is returned. */
|
||||
|
||||
double fmin(double x, double y)
|
||||
{
|
||||
if( isnan(y) )
|
||||
return x;
|
||||
|
||||
return x <= y ? x : y;
|
||||
}
|
||||
|
||||
|
||||
float fmaxf( float x, float y )
|
||||
{
|
||||
if( isnan(y) )
|
||||
return x;
|
||||
|
||||
return x >= y ? x : y;
|
||||
}
|
||||
|
||||
/* fminf(x, y) returns the smaller (more negative) of x and y.
|
||||
NaNs are treated as missing values: if one argument is NaN,
|
||||
the other argument is returned. If both arguments are NaN,
|
||||
the first argument is returned. */
|
||||
|
||||
float fminf(float x, float y)
|
||||
{
|
||||
if( isnan(y) )
|
||||
return x;
|
||||
|
||||
return x <= y ? x : y;
|
||||
}
|
||||
|
||||
long double scalblnl(long double x, long n)
|
||||
{
|
||||
union
|
||||
{
|
||||
long double d;
|
||||
struct{ cl_ulong m; cl_ushort sexp;}u;
|
||||
}u;
|
||||
u.u.m = CL_LONG_MIN;
|
||||
|
||||
if( x == 0.0L || n < -2200)
|
||||
return copysignl( 0.0L, x );
|
||||
|
||||
if( n > 2200 )
|
||||
return INFINITY;
|
||||
|
||||
if( n < 0 )
|
||||
{
|
||||
u.u.sexp = 0x3fff - 1022;
|
||||
while( n <= -1022 )
|
||||
{
|
||||
x *= u.d;
|
||||
n += 1022;
|
||||
}
|
||||
u.u.sexp = 0x3fff + n;
|
||||
x *= u.d;
|
||||
return x;
|
||||
}
|
||||
|
||||
if( n > 0 )
|
||||
{
|
||||
u.u.sexp = 0x3fff + 1023;
|
||||
while( n >= 1023 )
|
||||
{
|
||||
x *= u.d;
|
||||
n -= 1023;
|
||||
}
|
||||
u.u.sexp = 0x3fff + n;
|
||||
x *= u.d;
|
||||
return x;
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// log2
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
const static cl_double log_e_base2 = 1.4426950408889634074;
|
||||
const static cl_double log_10_base2 = 3.3219280948873623478;
|
||||
|
||||
//double log10(double x);
|
||||
|
||||
double log2(double x)
|
||||
{
|
||||
return 1.44269504088896340735992468100189214 * log(x);
|
||||
}
|
||||
|
||||
long double log2l(long double x)
|
||||
{
|
||||
return 1.44269504088896340735992468100189214L * log(x);
|
||||
}
|
||||
|
||||
double trunc(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
|
||||
{
|
||||
cl_long rounded = x;
|
||||
x = copysign( (double) rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
float truncf(float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
||||
{
|
||||
cl_int rounded = x;
|
||||
x = copysignf( (float) rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
long lround(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 0.5 )
|
||||
return 0;
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
|
||||
{
|
||||
absx += 0.5;
|
||||
cl_long rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysign( absx, x );
|
||||
}
|
||||
|
||||
if( x >= (double) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
return (long) x;
|
||||
}
|
||||
|
||||
long lroundf(float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( absx < 0.5f )
|
||||
return 0;
|
||||
|
||||
if( absx < 8388608.0f )
|
||||
{
|
||||
absx += 0.5f;
|
||||
cl_int rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysignf( absx, x );
|
||||
}
|
||||
|
||||
if( x >= (float) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
return (long) x;
|
||||
}
|
||||
|
||||
double round(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 0.5 )
|
||||
return copysign( 0.0, x);
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
|
||||
{
|
||||
absx += 0.5;
|
||||
cl_long rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysign( absx, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
float roundf(float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( absx < 0.5f )
|
||||
return copysignf( 0.0f, x);
|
||||
|
||||
if( absx < 8388608.0f )
|
||||
{
|
||||
absx += 0.5f;
|
||||
cl_int rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysignf( absx, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
long double roundl(long double x)
|
||||
{
|
||||
long double absx = fabsl(x);
|
||||
|
||||
if( absx < 0.5L )
|
||||
return copysignl( 0.0L, x);
|
||||
|
||||
if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
|
||||
{
|
||||
absx += 0.5L;
|
||||
cl_ulong rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysignl( absx, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
float cbrtf( float x )
|
||||
{
|
||||
float z = pow( fabs((double) x), 1.0 / 3.0 );
|
||||
return copysignf( z, x );
|
||||
}
|
||||
|
||||
double cbrt( double x )
|
||||
{
|
||||
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
|
||||
}
|
||||
|
||||
long int lrint (double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( x >= (double) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
|
||||
{
|
||||
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
|
||||
double rounded = x + magic;
|
||||
rounded -= magic;
|
||||
return (long int) rounded;
|
||||
}
|
||||
|
||||
return (long int) x;
|
||||
}
|
||||
|
||||
long int lrintf (float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( x >= (float) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
||||
{
|
||||
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
|
||||
float rounded = x + magic;
|
||||
rounded -= magic;
|
||||
return (long int) rounded;
|
||||
}
|
||||
|
||||
return (long int) x;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// fenv functions
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
int fetestexcept(int excepts)
|
||||
{
|
||||
unsigned int status = _statusfp();
|
||||
return excepts & (
|
||||
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
|
||||
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
|
||||
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
|
||||
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
|
||||
((status & _SW_INVALID) ? FE_INVALID : 0)
|
||||
);
|
||||
}
|
||||
|
||||
int feclearexcept(int excepts)
|
||||
{
|
||||
_clearfp();
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // __INTEL_COMPILER
|
||||
|
||||
#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
|
||||
|
||||
float make_nan()
|
||||
{
|
||||
/* This is the IEEE 754 single-precision format:
|
||||
unsigned int mantissa: 22;
|
||||
unsigned int quiet_nan: 1;
|
||||
unsigned int exponent: 8;
|
||||
unsigned int negative: 1;
|
||||
*/
|
||||
//const static unsigned
|
||||
static const int32_t _nan = 0x7fc00000;
|
||||
return *(const float*)(&_nan);
|
||||
}
|
||||
|
||||
float nanf( const char* str)
|
||||
{
|
||||
cl_uint u = atoi( str );
|
||||
u |= 0x7fc00000U;
|
||||
return *( float*)(&u);
|
||||
}
|
||||
|
||||
|
||||
double nan( const char* str)
|
||||
{
|
||||
cl_ulong u = atoi( str );
|
||||
u |= 0x7ff8000000000000ULL;
|
||||
return *( double*)(&u);
|
||||
}
|
||||
|
||||
// double check this implementatation
|
||||
long double nanl( const char* str)
|
||||
{
|
||||
union
|
||||
{
|
||||
long double f;
|
||||
struct { cl_ulong m; cl_ushort sexp; }u;
|
||||
}u;
|
||||
u.u.sexp = 0x7fff;
|
||||
u.u.m = 0x8000000000000000ULL | atoi( str );
|
||||
|
||||
return u.f;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// misc functions
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
// This function is commented out because the Windows implementation should never call munmap.
|
||||
// If it is calling it, we have a bug. Please file a bugzilla.
|
||||
int munmap(void *addr, size_t len)
|
||||
{
|
||||
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
|
||||
|
||||
return (int)VirtualAlloc( (LPVOID)addr, len,
|
||||
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
|
||||
}
|
||||
*/
|
||||
|
||||
uint64_t ReadTime( void )
|
||||
{
|
||||
LARGE_INTEGER current;
|
||||
QueryPerformanceCounter(¤t);
|
||||
return (uint64_t)current.QuadPart;
|
||||
}
|
||||
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime )
|
||||
{
|
||||
static double PerformanceFrequency = 0.0;
|
||||
|
||||
if (PerformanceFrequency == 0.0) {
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
PerformanceFrequency = (double) frequency.QuadPart;
|
||||
}
|
||||
|
||||
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
|
||||
}
|
||||
|
||||
int cf_signbit(double x)
|
||||
{
|
||||
union
|
||||
{
|
||||
double f;
|
||||
cl_ulong u;
|
||||
}u;
|
||||
u.f = x;
|
||||
return u.u >> 63;
|
||||
}
|
||||
|
||||
int cf_signbitf(float x)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f;
|
||||
cl_uint u;
|
||||
}u;
|
||||
u.f = x;
|
||||
return u.u >> 31;
|
||||
}
|
||||
|
||||
float int2float (int32_t ix)
|
||||
{
|
||||
union {
|
||||
float f;
|
||||
int32_t i;
|
||||
} u;
|
||||
u.i = ix;
|
||||
return u.f;
|
||||
}
|
||||
|
||||
int32_t float2int (float fx)
|
||||
{
|
||||
union {
|
||||
float f;
|
||||
int32_t i;
|
||||
} u;
|
||||
u.f = fx;
|
||||
return u.i;
|
||||
}
|
||||
|
||||
#if !defined(_WIN64)
|
||||
/** Returns the number of leading 0-bits in x,
|
||||
starting at the most significant bit position.
|
||||
If x is 0, the result is undefined.
|
||||
*/
|
||||
int __builtin_clz(unsigned int pattern)
|
||||
{
|
||||
#if 0
|
||||
int res;
|
||||
__asm {
|
||||
mov eax, pattern
|
||||
bsr eax, eax
|
||||
mov res, eax
|
||||
}
|
||||
return 31 - res;
|
||||
#endif
|
||||
unsigned long index;
|
||||
unsigned char res = _BitScanReverse( &index, pattern);
|
||||
if (res) {
|
||||
return 8*sizeof(int) - 1 - index;
|
||||
} else {
|
||||
return 8*sizeof(int);
|
||||
}
|
||||
}
|
||||
#else
|
||||
int __builtin_clz(unsigned int pattern)
|
||||
{
|
||||
int count;
|
||||
if (pattern == 0u) {
|
||||
return 32;
|
||||
}
|
||||
count = 31;
|
||||
if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
|
||||
if (pattern >= 1u<<8) { pattern >>= 8; count -= 8; }
|
||||
if (pattern >= 1u<<4) { pattern >>= 4; count -= 4; }
|
||||
if (pattern >= 1u<<2) { pattern >>= 2; count -= 2; }
|
||||
if (pattern >= 1u<<1) { count -= 1; }
|
||||
return count;
|
||||
}
|
||||
|
||||
#endif // !defined(_WIN64)
|
||||
|
||||
#include <intrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
int usleep(int usec)
|
||||
{
|
||||
Sleep((usec + 999) / 1000);
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned int sleep( unsigned int sec )
|
||||
{
|
||||
Sleep( sec * 1000 );
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // defined( _MSC_VER )
|
||||
280
test_common/harness/mt19937.c
Normal file
280
test_common/harness/mt19937.c
Normal file
@@ -0,0 +1,280 @@
|
||||
/*
|
||||
A C-program for MT19937, with initialization improved 2002/1/26.
|
||||
Coded by Takuji Nishimura and Makoto Matsumoto.
|
||||
|
||||
Before using, initialize the state by using init_genrand(seed)
|
||||
or init_by_array(init_key, key_length).
|
||||
|
||||
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. The names of its contributors may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Any feedback is very welcome.
|
||||
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
|
||||
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
|
||||
|
||||
Modifications for use in OpenCL by Ian Ollmann, Apple Inc.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "mt19937.h"
|
||||
#include "mingw_compat.h"
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
static void * align_malloc(size_t size, size_t alignment)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
return _aligned_malloc(size, alignment);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
void * ptr = NULL;
|
||||
#if defined(__ANDROID__)
|
||||
ptr = memalign(alignment, size);
|
||||
if ( ptr )
|
||||
return ptr;
|
||||
#else
|
||||
if (0 == posix_memalign(&ptr, alignment, size))
|
||||
return ptr;
|
||||
#endif
|
||||
return NULL;
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_malloc(size, alignment);
|
||||
#else
|
||||
#error "Please add support OS for aligned malloc"
|
||||
#endif
|
||||
}
|
||||
|
||||
static void align_free(void * ptr)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
_aligned_free(ptr);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
return free(ptr);
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_free(ptr);
|
||||
#else
|
||||
#error "Please add support OS for aligned free"
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* Period parameters */
|
||||
#define N 624 /* vector code requires multiple of 4 here */
|
||||
#define M 397
|
||||
#define MATRIX_A (cl_uint) 0x9908b0dfUL /* constant vector a */
|
||||
#define UPPER_MASK (cl_uint) 0x80000000UL /* most significant w-r bits */
|
||||
#define LOWER_MASK (cl_uint) 0x7fffffffUL /* least significant r bits */
|
||||
|
||||
typedef struct _MTdata
|
||||
{
|
||||
cl_uint mt[N];
|
||||
#ifdef __SSE2__
|
||||
cl_uint cache[N];
|
||||
#endif
|
||||
cl_int mti;
|
||||
}_MTdata;
|
||||
|
||||
/* initializes mt[N] with a seed */
|
||||
MTdata init_genrand(cl_uint s)
|
||||
{
|
||||
MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
|
||||
if( NULL != r )
|
||||
{
|
||||
cl_uint *mt = r->mt;
|
||||
int mti = 0;
|
||||
mt[0]= s; // & 0xffffffffUL;
|
||||
for (mti=1; mti<N; mti++) {
|
||||
mt[mti] = (cl_uint)
|
||||
(1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
|
||||
/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
|
||||
/* In the previous versions, MSBs of the seed affect */
|
||||
/* only MSBs of the array mt[]. */
|
||||
/* 2002/01/09 modified by Makoto Matsumoto */
|
||||
// mt[mti] &= 0xffffffffUL;
|
||||
/* for >32 bit machines */
|
||||
}
|
||||
r->mti = mti;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void free_mtdata( MTdata d )
|
||||
{
|
||||
if(d)
|
||||
align_free(d);
|
||||
}
|
||||
|
||||
/* generates a random number on [0,0xffffffff]-interval */
|
||||
cl_uint genrand_int32( MTdata d)
|
||||
{
|
||||
/* mag01[x] = x * MATRIX_A for x=0,1 */
|
||||
static const cl_uint mag01[2]={0x0UL, MATRIX_A};
|
||||
#ifdef __SSE2__
|
||||
static volatile int init = 0;
|
||||
static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
|
||||
#endif
|
||||
|
||||
|
||||
cl_uint *mt = d->mt;
|
||||
cl_uint y;
|
||||
|
||||
if (d->mti == N)
|
||||
{ /* generate N words at one time */
|
||||
int kk;
|
||||
|
||||
#ifdef __SSE2__
|
||||
if( 0 == init )
|
||||
{
|
||||
upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
|
||||
lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
|
||||
one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
|
||||
matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
|
||||
c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
|
||||
c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
|
||||
init = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
kk = 0;
|
||||
#ifdef __SSE2__
|
||||
// vector loop
|
||||
for( ; kk + 4 <= N-M; kk += 4 )
|
||||
{
|
||||
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
|
||||
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
||||
|
||||
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
|
||||
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
||||
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) ); // mt[kk+M] ^ (y >> 1)
|
||||
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
||||
_mm_store_si128( (__m128i*) (mt + kk ), vr );
|
||||
}
|
||||
#endif
|
||||
for ( ;kk<N-M;kk++) {
|
||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
||||
mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
// advance to next aligned location
|
||||
for (;kk<N-1 && (kk & 3);kk++) {
|
||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
||||
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
}
|
||||
|
||||
// vector loop
|
||||
for( ; kk + 4 <= N-1; kk += 4 )
|
||||
{
|
||||
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
|
||||
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
||||
|
||||
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
|
||||
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
||||
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) ); // mt[kk+M-N] ^ (y >> 1)
|
||||
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
||||
_mm_store_si128( (__m128i*) (mt + kk ), vr );
|
||||
}
|
||||
#endif
|
||||
|
||||
for (;kk<N-1;kk++) {
|
||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
||||
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
}
|
||||
y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
|
||||
mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
|
||||
#ifdef __SSE2__
|
||||
// Do the tempering ahead of time in vector code
|
||||
for( kk = 0; kk + 4 <= N; kk += 4 )
|
||||
{
|
||||
__m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) ); // y = mt[k];
|
||||
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) ); // y ^= (y >> 11);
|
||||
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) ); // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
||||
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) ); // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
||||
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) ); // y ^= (y >> 18);
|
||||
_mm_store_si128( (__m128i*)(d->cache+kk), vy );
|
||||
}
|
||||
#endif
|
||||
|
||||
d->mti = 0;
|
||||
}
|
||||
#ifdef __SSE2__
|
||||
y = d->cache[d->mti++];
|
||||
#else
|
||||
y = mt[d->mti++];
|
||||
|
||||
/* Tempering */
|
||||
y ^= (y >> 11);
|
||||
y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
||||
y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
||||
y ^= (y >> 18);
|
||||
#endif
|
||||
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
cl_ulong genrand_int64( MTdata d)
|
||||
{
|
||||
return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
|
||||
}
|
||||
|
||||
/* generates a random number on [0,1]-real-interval */
|
||||
double genrand_real1(MTdata d)
|
||||
{
|
||||
return genrand_int32(d)*(1.0/4294967295.0);
|
||||
/* divided by 2^32-1 */
|
||||
}
|
||||
|
||||
/* generates a random number on [0,1)-real-interval */
|
||||
double genrand_real2(MTdata d)
|
||||
{
|
||||
return genrand_int32(d)*(1.0/4294967296.0);
|
||||
/* divided by 2^32 */
|
||||
}
|
||||
|
||||
/* generates a random number on (0,1)-real-interval */
|
||||
double genrand_real3(MTdata d)
|
||||
{
|
||||
return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
|
||||
/* divided by 2^32 */
|
||||
}
|
||||
|
||||
/* generates a random number on [0,1) with 53-bit resolution*/
|
||||
double genrand_res53(MTdata d)
|
||||
{
|
||||
unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
|
||||
return(a*67108864.0+b)*(1.0/9007199254740992.0);
|
||||
}
|
||||
99
test_common/harness/mt19937.h
Normal file
99
test_common/harness/mt19937.h
Normal file
@@ -0,0 +1,99 @@
|
||||
|
||||
/*
|
||||
* mt19937.h
|
||||
*
|
||||
* Mersenne Twister.
|
||||
*
|
||||
A C-program for MT19937, with initialization improved 2002/1/26.
|
||||
Coded by Takuji Nishimura and Makoto Matsumoto.
|
||||
|
||||
Before using, initialize the state by using init_genrand(seed)
|
||||
or init_by_array(init_key, key_length).
|
||||
|
||||
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. The names of its contributors may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Any feedback is very welcome.
|
||||
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
|
||||
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
|
||||
*/
|
||||
|
||||
#ifndef MT19937_H
|
||||
#define MT19937_H 1
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenCL/cl_platform.h>
|
||||
#else
|
||||
#include <CL/cl_platform.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Interfaces here have been modified from original sources so that they
|
||||
* are safe to call reentrantly, so long as a different MTdata is used
|
||||
* on each thread.
|
||||
*/
|
||||
|
||||
typedef struct _MTdata *MTdata;
|
||||
|
||||
/* Create the random number generator with seed */
|
||||
MTdata init_genrand( cl_uint /*seed*/ );
|
||||
|
||||
/* release memory used by a MTdata private data */
|
||||
void free_mtdata( MTdata /*data*/ );
|
||||
|
||||
/* generates a random number on [0,0xffffffff]-interval */
|
||||
cl_uint genrand_int32( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on [0,0xffffffffffffffffULL]-interval */
|
||||
cl_ulong genrand_int64( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on [0,1]-real-interval */
|
||||
double genrand_real1( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on [0,1)-real-interval */
|
||||
double genrand_real2( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on (0,1)-real-interval */
|
||||
double genrand_real3( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on [0,1) with 53-bit resolution*/
|
||||
double genrand_res53( MTdata /*data*/ );
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* MT19937_H */
|
||||
564
test_common/harness/os_helpers.cpp
Normal file
564
test_common/harness/os_helpers.cpp
Normal file
@@ -0,0 +1,564 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "os_helpers.h"
|
||||
#include "errorHelpers.h"
|
||||
|
||||
// =================================================================================================
|
||||
// C++ interface.
|
||||
// =================================================================================================
|
||||
|
||||
#include <cerrno> // errno, error constants
|
||||
#include <climits> // PATH_MAX
|
||||
#include <cstdlib> // abort, _splitpath, _makepath
|
||||
#include <cstring> // strdup, strerror_r
|
||||
#include <sstream>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#define CHECK_PTR( ptr ) \
|
||||
if ( (ptr) == NULL ) { \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
typedef std::vector< char > buffer_t;
|
||||
|
||||
#if ! defined( PATH_MAX )
|
||||
#define PATH_MAX 1000
|
||||
#endif
|
||||
|
||||
int const _size = PATH_MAX + 1; // Initial buffer size for path.
|
||||
int const _count = 8; // How many times we will try to double buffer size.
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// MacOS X
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
|
||||
|
||||
#include <mach-o/dyld.h> // _NSGetExecutablePath
|
||||
#include <libgen.h> // dirname
|
||||
|
||||
|
||||
static
|
||||
std::string
|
||||
_err_msg(
|
||||
int err, // Error number (e. g. errno).
|
||||
int level // Nesting level, for avoiding infinite recursion.
|
||||
) {
|
||||
|
||||
/*
|
||||
There are 3 incompatible versions of strerror_r:
|
||||
|
||||
char * strerror_r( int, char *, size_t ); // GNU version
|
||||
int strerror_r( int, char *, size_t ); // BSD version
|
||||
int strerror_r( int, char *, size_t ); // XSI version
|
||||
|
||||
BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
|
||||
|
||||
*/
|
||||
|
||||
// BSD version of strerror_r.
|
||||
buffer_t buffer( 100 );
|
||||
int count = _count;
|
||||
for ( ; ; ) {
|
||||
int rc = strerror_r( err, & buffer.front(), buffer.size() );
|
||||
if ( rc == EINVAL ) {
|
||||
// Error code is not recognized, but anyway we got the message.
|
||||
return & buffer.front();
|
||||
} else if ( rc == ERANGE ) {
|
||||
// Buffer is not enough.
|
||||
if ( count > 0 ) {
|
||||
// Enlarge the buffer.
|
||||
-- count;
|
||||
buffer.resize( buffer.size() * 2 );
|
||||
} else {
|
||||
std::stringstream ostr;
|
||||
ostr
|
||||
<< "Error " << err << " "
|
||||
<< "(Getting error message failed: "
|
||||
<< "Buffer of " << buffer.size() << " bytes is still too small"
|
||||
<< ")";
|
||||
return ostr.str();
|
||||
}; // if
|
||||
} else if ( rc == 0 ) {
|
||||
// We got the message.
|
||||
return & buffer.front();
|
||||
} else {
|
||||
std::stringstream ostr;
|
||||
ostr
|
||||
<< "Error " << err << " "
|
||||
<< "(Getting error message failed: "
|
||||
<< ( level < 2 ? _err_msg( rc, level + 1 ) : "Oops" )
|
||||
<< ")";
|
||||
return ostr.str();
|
||||
}; // if
|
||||
}; // forever
|
||||
|
||||
} // _err_msg
|
||||
|
||||
|
||||
std::string
|
||||
dir_sep(
|
||||
) {
|
||||
return "/";
|
||||
} // dir_sep
|
||||
|
||||
|
||||
std::string
|
||||
exe_path(
|
||||
) {
|
||||
buffer_t path( _size );
|
||||
int count = _count;
|
||||
for ( ; ; ) {
|
||||
uint32_t size = path.size();
|
||||
int rc = _NSGetExecutablePath( & path.front(), & size );
|
||||
if ( rc == 0 ) {
|
||||
break;
|
||||
}; // if
|
||||
if ( count > 0 ) {
|
||||
-- count;
|
||||
path.resize( size );
|
||||
} else {
|
||||
log_error(
|
||||
"ERROR: Getting executable path failed: "
|
||||
"_NSGetExecutablePath failed: Buffer of %lu bytes is still too small\n",
|
||||
(unsigned long) path.size()
|
||||
);
|
||||
exit( 2 );
|
||||
}; // if
|
||||
}; // forever
|
||||
return & path.front();
|
||||
} // exe_path
|
||||
|
||||
|
||||
std::string
|
||||
exe_dir(
|
||||
) {
|
||||
std::string path = exe_path();
|
||||
// We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
|
||||
buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
|
||||
return dirname( & buffer.front() );
|
||||
} // exe_dir
|
||||
|
||||
|
||||
#endif // __APPLE__
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// Linux
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
#if defined( __linux__ )
|
||||
|
||||
|
||||
#include <cerrno> // errno
|
||||
#include <libgen.h> // dirname
|
||||
#include <unistd.h> // readlink
|
||||
|
||||
|
||||
static
|
||||
std::string
|
||||
_err_msg(
|
||||
int err,
|
||||
int level
|
||||
) {
|
||||
|
||||
/*
|
||||
There are 3 incompatible versions of strerror_r:
|
||||
|
||||
char * strerror_r( int, char *, size_t ); // GNU version
|
||||
int strerror_r( int, char *, size_t ); // BSD version
|
||||
int strerror_r( int, char *, size_t ); // XSI version
|
||||
|
||||
BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
|
||||
|
||||
*/
|
||||
|
||||
#if defined(__ANDROID__) || ( ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 ) && ! _GNU_SOURCE )
|
||||
|
||||
// XSI version of strerror_r.
|
||||
#warning Not tested!
|
||||
buffer_t buffer( 200 );
|
||||
int count = _count;
|
||||
for ( ; ; ) {
|
||||
int rc = strerror_r( err, & buffer.front(), buffer.size() );
|
||||
if ( rc == -1 ) {
|
||||
int _err = errno;
|
||||
if ( _err == ERANGE ) {
|
||||
if ( count > 0 ) {
|
||||
// Enlarge the buffer.
|
||||
-- count;
|
||||
buffer.resize( buffer.size() * 2 );
|
||||
} else {
|
||||
std::stringstream ostr;
|
||||
ostr
|
||||
<< "Error " << err << " "
|
||||
<< "(Getting error message failed: "
|
||||
<< "Buffer of " << buffer.size() << " bytes is still too small"
|
||||
<< ")";
|
||||
return ostr.str();
|
||||
}; // if
|
||||
} else {
|
||||
std::stringstream ostr;
|
||||
ostr
|
||||
<< "Error " << err << " "
|
||||
<< "(Getting error message failed: "
|
||||
<< ( level < 2 ? _err_msg( _err, level + 1 ) : "Oops" )
|
||||
<< ")";
|
||||
return ostr.str();
|
||||
}; // if
|
||||
} else {
|
||||
// We got the message.
|
||||
return & buffer.front();
|
||||
}; // if
|
||||
}; // forever
|
||||
|
||||
#else
|
||||
|
||||
// GNU version of strerror_r.
|
||||
char buffer[ 2000 ];
|
||||
return strerror_r( err, buffer, sizeof( buffer ) );
|
||||
|
||||
#endif
|
||||
|
||||
} // _err_msg
|
||||
|
||||
|
||||
std::string
|
||||
dir_sep(
|
||||
) {
|
||||
return "/";
|
||||
} // dir_sep
|
||||
|
||||
|
||||
std::string
|
||||
exe_path(
|
||||
) {
|
||||
|
||||
static std::string const exe = "/proc/self/exe";
|
||||
|
||||
buffer_t path( _size );
|
||||
int count = _count; // Max number of iterations.
|
||||
|
||||
for ( ; ; ) {
|
||||
|
||||
ssize_t len = readlink( exe.c_str(), & path.front(), path.size() );
|
||||
|
||||
if ( len < 0 ) {
|
||||
// Oops.
|
||||
int err = errno;
|
||||
log_error(
|
||||
"ERROR: Getting executable path failed: "
|
||||
"Reading symlink `%s' failed: %s\n",
|
||||
exe.c_str(), err_msg( err ).c_str()
|
||||
);
|
||||
exit( 2 );
|
||||
}; // if
|
||||
|
||||
if ( len < path.size() ) {
|
||||
// We got the path.
|
||||
path.resize( len );
|
||||
break;
|
||||
}; // if
|
||||
|
||||
// Oops, buffer is too small.
|
||||
if ( count > 0 ) {
|
||||
-- count;
|
||||
// Enlarge the buffer.
|
||||
path.resize( path.size() * 2 );
|
||||
} else {
|
||||
log_error(
|
||||
"ERROR: Getting executable path failed: "
|
||||
"Reading symlink `%s' failed: Buffer of %lu bytes is still too small\n",
|
||||
exe.c_str(),
|
||||
(unsigned long) path.size()
|
||||
);
|
||||
exit( 2 );
|
||||
}; // if
|
||||
|
||||
}; // forever
|
||||
|
||||
return std::string( & path.front(), path.size() );
|
||||
|
||||
} // exe_path
|
||||
|
||||
|
||||
std::string
|
||||
exe_dir(
|
||||
) {
|
||||
std::string path = exe_path();
|
||||
// We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
|
||||
buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
|
||||
return dirname( & buffer.front() );
|
||||
} // exe_dir
|
||||
|
||||
#endif // __linux__
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// MS Windows
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
#if defined( _WIN32 )
|
||||
|
||||
|
||||
#include <windows.h>
|
||||
#if defined( max )
|
||||
#undef max
|
||||
#endif
|
||||
|
||||
#include <cctype>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
static
|
||||
std::string
|
||||
_err_msg(
|
||||
int err,
|
||||
int level
|
||||
) {
|
||||
|
||||
std::string msg;
|
||||
|
||||
LPSTR buffer = NULL;
|
||||
DWORD flags =
|
||||
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||||
FORMAT_MESSAGE_FROM_SYSTEM |
|
||||
FORMAT_MESSAGE_IGNORE_INSERTS;
|
||||
|
||||
DWORD len =
|
||||
FormatMessageA(
|
||||
flags,
|
||||
NULL,
|
||||
err,
|
||||
LANG_USER_DEFAULT,
|
||||
reinterpret_cast< LPSTR >( & buffer ),
|
||||
0,
|
||||
NULL
|
||||
);
|
||||
|
||||
if ( buffer == NULL || len == 0 ) {
|
||||
|
||||
int _err = GetLastError();
|
||||
char str[1024] = { 0 };
|
||||
snprintf(str, sizeof(str), "Error 0x%08x (Getting error message failed: %s )", err, ( level < 2 ? _err_msg( _err, level + 1 ).c_str() : "Oops" ));
|
||||
msg = std::string(str);
|
||||
|
||||
} else {
|
||||
|
||||
// Trim trailing whitespace (including `\r' and `\n').
|
||||
while ( len > 0 && isspace( buffer[ len - 1 ] ) ) {
|
||||
-- len;
|
||||
}; // while
|
||||
|
||||
// Drop trailing full stop.
|
||||
if ( len > 0 && buffer[ len - 1 ] == '.' ) {
|
||||
-- len;
|
||||
}; // if
|
||||
|
||||
msg.assign( buffer, len );
|
||||
|
||||
}; //if
|
||||
|
||||
if ( buffer != NULL ) {
|
||||
LocalFree( buffer );
|
||||
}; // if
|
||||
|
||||
return msg;
|
||||
|
||||
} // _get_err_msg
|
||||
|
||||
|
||||
std::string
|
||||
dir_sep(
|
||||
) {
|
||||
return "\\";
|
||||
} // dir_sep
|
||||
|
||||
|
||||
std::string
|
||||
exe_path(
|
||||
) {
|
||||
|
||||
buffer_t path( _size );
|
||||
int count = _count;
|
||||
|
||||
for ( ; ; ) {
|
||||
|
||||
DWORD len = GetModuleFileNameA( NULL, & path.front(), path.size() );
|
||||
|
||||
if ( len == 0 ) {
|
||||
int err = GetLastError();
|
||||
log_error( "ERROR: Getting executable path failed: %s\n", err_msg( err ).c_str() );
|
||||
exit( 2 );
|
||||
}; // if
|
||||
|
||||
if ( len < path.size() ) {
|
||||
path.resize( len );
|
||||
break;
|
||||
}; // if
|
||||
|
||||
// Buffer too small.
|
||||
if ( count > 0 ) {
|
||||
-- count;
|
||||
path.resize( path.size() * 2 );
|
||||
} else {
|
||||
log_error(
|
||||
"ERROR: Getting executable path failed: "
|
||||
"Buffer of %lu bytes is still too small\n",
|
||||
(unsigned long) path.size()
|
||||
);
|
||||
exit( 2 );
|
||||
}; // if
|
||||
|
||||
}; // forever
|
||||
|
||||
return std::string( & path.front(), path.size() );
|
||||
|
||||
} // exe_path
|
||||
|
||||
|
||||
std::string
|
||||
exe_dir(
|
||||
) {
|
||||
|
||||
std::string exe = exe_path();
|
||||
int count = 0;
|
||||
|
||||
// Splitting path into components.
|
||||
buffer_t drv( _MAX_DRIVE );
|
||||
buffer_t dir( _MAX_DIR );
|
||||
count = _count;
|
||||
#if defined(_MSC_VER)
|
||||
for ( ; ; ) {
|
||||
int rc =
|
||||
_splitpath_s(
|
||||
exe.c_str(),
|
||||
& drv.front(), drv.size(),
|
||||
& dir.front(), dir.size(),
|
||||
NULL, 0, // We need neither name
|
||||
NULL, 0 // nor extension
|
||||
);
|
||||
if ( rc == 0 ) {
|
||||
break;
|
||||
} else if ( rc == ERANGE ) {
|
||||
if ( count > 0 ) {
|
||||
-- count;
|
||||
// Buffer is too small, but it is not clear which one.
|
||||
// So we have to enlarge all.
|
||||
drv.resize( drv.size() * 2 );
|
||||
dir.resize( dir.size() * 2 );
|
||||
} else {
|
||||
log_error(
|
||||
"ERROR: Getting executable path failed: "
|
||||
"Splitting path `%s' to components failed: "
|
||||
"Buffers of %lu and %lu bytes are still too small\n",
|
||||
exe.c_str(),
|
||||
(unsigned long) drv.size(),
|
||||
(unsigned long) dir.size()
|
||||
);
|
||||
exit( 2 );
|
||||
}; // if
|
||||
} else {
|
||||
log_error(
|
||||
"ERROR: Getting executable path failed: "
|
||||
"Splitting path `%s' to components failed: %s\n",
|
||||
exe.c_str(),
|
||||
err_msg( rc ).c_str()
|
||||
);
|
||||
exit( 2 );
|
||||
}; // if
|
||||
}; // forever
|
||||
|
||||
#else // __MINGW32__
|
||||
|
||||
// MinGW does not have the "secure" _splitpath_s, use the insecure version instead.
|
||||
_splitpath(
|
||||
exe.c_str(),
|
||||
& drv.front(),
|
||||
& dir.front(),
|
||||
NULL, // We need neither name
|
||||
NULL // nor extension
|
||||
);
|
||||
#endif // __MINGW32__
|
||||
|
||||
// Combining components back to path.
|
||||
// I failed with "secure" `_makepath_s'. If buffer is too small, instead of returning
|
||||
// ERANGE, `_makepath_s' pops up dialog box and offers to debug the program. D'oh!
|
||||
// So let us try to guess the size of result and go with insecure `_makepath'.
|
||||
buffer_t path( std::max( drv.size() + dir.size(), size_t( _MAX_PATH ) ) + 10 );
|
||||
_makepath( & path.front(), & drv.front(), & dir.front(), NULL, NULL );
|
||||
|
||||
return & path.front();
|
||||
|
||||
} // exe_dir
|
||||
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
|
||||
std::string
|
||||
err_msg(
|
||||
int err
|
||||
) {
|
||||
|
||||
return _err_msg( err, 0 );
|
||||
|
||||
} // err_msg
|
||||
|
||||
|
||||
// =================================================================================================
|
||||
// C interface.
|
||||
// =================================================================================================
|
||||
|
||||
|
||||
char *
|
||||
get_err_msg(
|
||||
int err
|
||||
) {
|
||||
char * msg = strdup( err_msg( err ).c_str() );
|
||||
CHECK_PTR( msg );
|
||||
return msg;
|
||||
} // get_err_msg
|
||||
|
||||
|
||||
char *
|
||||
get_dir_sep(
|
||||
) {
|
||||
char * sep = strdup( dir_sep().c_str() );
|
||||
CHECK_PTR( sep );
|
||||
return sep;
|
||||
} // get_dir_sep
|
||||
|
||||
|
||||
char *
|
||||
get_exe_path(
|
||||
) {
|
||||
char * path = strdup( exe_path().c_str() );
|
||||
CHECK_PTR( path );
|
||||
return path;
|
||||
} // get_exe_path
|
||||
|
||||
|
||||
char *
|
||||
get_exe_dir(
|
||||
) {
|
||||
char * dir = strdup( exe_dir().c_str() );
|
||||
CHECK_PTR( dir );
|
||||
return dir;
|
||||
} // get_exe_dir
|
||||
|
||||
|
||||
// end of file //
|
||||
53
test_common/harness/os_helpers.h
Normal file
53
test_common/harness/os_helpers.h
Normal file
@@ -0,0 +1,53 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef __os_helpers_h__
|
||||
#define __os_helpers_h__
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// C++ interface.
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
#include <string>
|
||||
|
||||
std::string err_msg( int err );
|
||||
std::string dir_sep();
|
||||
std::string exe_path();
|
||||
std::string exe_dir();
|
||||
|
||||
#endif // __cplusplus
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// C interface.
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
char * get_err_msg( int err ); // Returns system error message. Subject to free.
|
||||
char * get_dir_sep(); // Returns dir separator. Subject to free.
|
||||
char * get_exe_path(); // Returns path of current executable. Subject to free.
|
||||
char * get_exe_dir(); // Returns dir of current executable. Subject to free.
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // __os_helpers_h__
|
||||
49
test_common/harness/ref_counting.h
Normal file
49
test_common/harness/ref_counting.h
Normal file
@@ -0,0 +1,49 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _ref_counting_h
|
||||
#define _ref_counting_h
|
||||
|
||||
#define MARK_REF_COUNT_BASE( c, type, bigType ) \
|
||||
cl_uint c##_refCount; \
|
||||
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
|
||||
test_error( error, "Unable to check reference count for " #type );
|
||||
|
||||
#define TEST_REF_COUNT_BASE( c, type, bigType ) \
|
||||
cl_uint c##_refCount_new; \
|
||||
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
|
||||
test_error( error, "Unable to check reference count for " #type ); \
|
||||
if( c##_refCount != c##_refCount_new ) \
|
||||
{ \
|
||||
log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new ); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
|
||||
#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
|
||||
|
||||
#define MARK_REF_COUNT_DEVICE( c ) MARK_REF_COUNT_BASE( c, Device, DEVICE )
|
||||
#define TEST_REF_COUNT_DEVICE( c ) TEST_REF_COUNT_BASE( c, Device, DEVICE )
|
||||
|
||||
#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
|
||||
#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
|
||||
|
||||
#define MARK_REF_COUNT_PROGRAM( c ) MARK_REF_COUNT_BASE( c, Program, PROGRAM )
|
||||
#define TEST_REF_COUNT_PROGRAM( c ) TEST_REF_COUNT_BASE( c, Program, PROGRAM )
|
||||
|
||||
#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
|
||||
#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
|
||||
|
||||
#endif // _ref_counting_h
|
||||
241
test_common/harness/rounding_mode.c
Normal file
241
test_common/harness/rounding_mode.c
Normal file
@@ -0,0 +1,241 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "rounding_mode.h"
|
||||
|
||||
#if (defined( __arm__ ) || defined(__aarch64__))
|
||||
#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
|
||||
#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
|
||||
|
||||
#define _ARM_FE_FTZ 0x1000000
|
||||
#define _ARM_FE_NFTZ 0x0
|
||||
#if defined(__aarch64__)
|
||||
#define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
|
||||
#define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
|
||||
#else
|
||||
#define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
|
||||
#define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
|
||||
#define _ARM_FE_TONEAREST 0x0
|
||||
#define _ARM_FE_UPWARD 0x400000
|
||||
#define _ARM_FE_DOWNWARD 0x800000
|
||||
#define _ARM_FE_TOWARDZERO 0xc00000
|
||||
RoundingMode set_round( RoundingMode r, Type outType )
|
||||
{
|
||||
static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
|
||||
_ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
|
||||
static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
|
||||
_ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
|
||||
const int *p = int_rounds;
|
||||
if( outType == kfloat || outType == kdouble )
|
||||
p = flt_rounds;
|
||||
|
||||
int fpscr = 0;
|
||||
RoundingMode oldRound = get_round();
|
||||
|
||||
_FPU_GETCW(fpscr);
|
||||
_FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
|
||||
|
||||
return oldRound;
|
||||
}
|
||||
|
||||
RoundingMode get_round( void )
|
||||
{
|
||||
int fpscr;
|
||||
int oldRound;
|
||||
|
||||
_FPU_GETCW(fpscr);
|
||||
oldRound = (fpscr & FPSCR_ROUND_MASK);
|
||||
|
||||
switch( oldRound )
|
||||
{
|
||||
case _ARM_FE_TONEAREST:
|
||||
return kRoundToNearestEven;
|
||||
case _ARM_FE_UPWARD:
|
||||
return kRoundUp;
|
||||
case _ARM_FE_DOWNWARD:
|
||||
return kRoundDown;
|
||||
case _ARM_FE_TOWARDZERO:
|
||||
return kRoundTowardZero;
|
||||
}
|
||||
|
||||
return kDefaultRoundingMode;
|
||||
}
|
||||
|
||||
#elif !(defined(_WIN32) && defined(_MSC_VER))
|
||||
RoundingMode set_round( RoundingMode r, Type outType )
|
||||
{
|
||||
static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
|
||||
static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
|
||||
const int *p = int_rounds;
|
||||
if( outType == kfloat || outType == kdouble )
|
||||
p = flt_rounds;
|
||||
int oldRound = fegetround();
|
||||
fesetround( p[r] );
|
||||
|
||||
switch( oldRound )
|
||||
{
|
||||
case FE_TONEAREST:
|
||||
return kRoundToNearestEven;
|
||||
case FE_UPWARD:
|
||||
return kRoundUp;
|
||||
case FE_DOWNWARD:
|
||||
return kRoundDown;
|
||||
case FE_TOWARDZERO:
|
||||
return kRoundTowardZero;
|
||||
default:
|
||||
abort(); // ??!
|
||||
}
|
||||
return kDefaultRoundingMode; //never happens
|
||||
}
|
||||
|
||||
RoundingMode get_round( void )
|
||||
{
|
||||
int oldRound = fegetround();
|
||||
|
||||
switch( oldRound )
|
||||
{
|
||||
case FE_TONEAREST:
|
||||
return kRoundToNearestEven;
|
||||
case FE_UPWARD:
|
||||
return kRoundUp;
|
||||
case FE_DOWNWARD:
|
||||
return kRoundDown;
|
||||
case FE_TOWARDZERO:
|
||||
return kRoundTowardZero;
|
||||
}
|
||||
|
||||
return kDefaultRoundingMode;
|
||||
}
|
||||
|
||||
#else
|
||||
RoundingMode set_round( RoundingMode r, Type outType )
|
||||
{
|
||||
static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
|
||||
static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
|
||||
const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
|
||||
unsigned int oldRound;
|
||||
|
||||
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
|
||||
if (err) {
|
||||
vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
|
||||
return kDefaultRoundingMode; //what else never happens
|
||||
}
|
||||
|
||||
oldRound &= _MCW_RC;
|
||||
|
||||
RoundingMode old =
|
||||
(oldRound == _RC_NEAR)? kRoundToNearestEven :
|
||||
(oldRound == _RC_UP)? kRoundUp :
|
||||
(oldRound == _RC_DOWN)? kRoundDown :
|
||||
(oldRound == _RC_CHOP)? kRoundTowardZero:
|
||||
kDefaultRoundingMode;
|
||||
|
||||
_controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
|
||||
return old; //returning old rounding mode
|
||||
}
|
||||
|
||||
RoundingMode get_round( void )
|
||||
{
|
||||
unsigned int oldRound;
|
||||
|
||||
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
|
||||
oldRound &= _MCW_RC;
|
||||
return
|
||||
(oldRound == _RC_NEAR)? kRoundToNearestEven :
|
||||
(oldRound == _RC_UP)? kRoundUp :
|
||||
(oldRound == _RC_DOWN)? kRoundDown :
|
||||
(oldRound == _RC_CHOP)? kRoundTowardZero:
|
||||
kDefaultRoundingMode;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//
|
||||
// FlushToZero() sets the host processor into ftz mode. It is intended to have a remote effect on the behavior of the code in
|
||||
// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
|
||||
// software by testing against FLT_MIN or DBL_MIN in that file.
|
||||
//
|
||||
// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* have the behavior in section 7.5.3 of
|
||||
// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
|
||||
// operators do (e.g. add, subtract, multiply, divide, etc.)
|
||||
//
|
||||
// Configuring hardware to FTZ mode varies by platform.
|
||||
// CAUTION: Some C implementations may also fail to behave properly in this mode.
|
||||
//
|
||||
// On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
|
||||
// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
|
||||
// is used for floating point computation! If your OS uses x87, you'll need to figure out how
|
||||
// to turn that off for the conversions code in basic_test_conversions.c so that they flush to
|
||||
// zero properly. Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
|
||||
// in which case, these function are at liberty to do nothing.
|
||||
//
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
|
||||
#include <xmmintrin.h>
|
||||
#elif defined( __PPC__ )
|
||||
#include <fpu_control.h>
|
||||
#endif
|
||||
void *FlushToZero( void )
|
||||
{
|
||||
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
|
||||
union{ int i; void *p; }u = { _mm_getcsr() };
|
||||
_mm_setcsr( u.i | 0x8040 );
|
||||
return u.p;
|
||||
#elif defined( __arm__ ) || defined(__aarch64__)
|
||||
int fpscr;
|
||||
_FPU_GETCW(fpscr);
|
||||
_FPU_SETCW(fpscr | FPSCR_FZ);
|
||||
return NULL;
|
||||
#elif defined( __PPC__ )
|
||||
fpu_control_t flags = 0;
|
||||
_FPU_GETCW(flags);
|
||||
flags |= _FPU_MASK_NI;
|
||||
_FPU_SETCW(flags);
|
||||
return NULL;
|
||||
#else
|
||||
#error Unknown arch
|
||||
#endif
|
||||
#else
|
||||
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
|
||||
#endif
|
||||
}
|
||||
|
||||
// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
|
||||
void UnFlushToZero( void *p)
|
||||
{
|
||||
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
|
||||
union{ void *p; int i; }u = { p };
|
||||
_mm_setcsr( u.i );
|
||||
#elif defined( __arm__ ) || defined(__aarch64__)
|
||||
int fpscr;
|
||||
_FPU_GETCW(fpscr);
|
||||
_FPU_SETCW(fpscr & ~FPSCR_FZ);
|
||||
#elif defined( __PPC__)
|
||||
fpu_control_t flags = 0;
|
||||
_FPU_GETCW(flags);
|
||||
flags &= ~_FPU_MASK_NI;
|
||||
_FPU_SETCW(flags);
|
||||
#else
|
||||
#error Unknown arch
|
||||
#endif
|
||||
#else
|
||||
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
|
||||
#endif
|
||||
}
|
||||
69
test_common/harness/rounding_mode.h
Normal file
69
test_common/harness/rounding_mode.h
Normal file
@@ -0,0 +1,69 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef __ROUNDING_MODE_H__
|
||||
#define __ROUNDING_MODE_H__
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
#if (defined(_WIN32) && defined (_MSC_VER))
|
||||
#include "errorHelpers.h"
|
||||
#include "testHarness.h"
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
kDefaultRoundingMode = 0,
|
||||
kRoundToNearestEven,
|
||||
kRoundUp,
|
||||
kRoundDown,
|
||||
kRoundTowardZero,
|
||||
|
||||
kRoundingModeCount
|
||||
}RoundingMode;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
kuchar = 0,
|
||||
kchar = 1,
|
||||
kushort = 2,
|
||||
kshort = 3,
|
||||
kuint = 4,
|
||||
kint = 5,
|
||||
kfloat = 6,
|
||||
kdouble = 7,
|
||||
kulong = 8,
|
||||
klong = 9,
|
||||
|
||||
//This goes last
|
||||
kTypeCount
|
||||
}Type;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern RoundingMode set_round( RoundingMode r, Type outType );
|
||||
extern RoundingMode get_round( void );
|
||||
extern void *FlushToZero( void );
|
||||
extern void UnFlushToZero( void *p);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif /* __ROUNDING_MODE_H__ */
|
||||
840
test_common/harness/testHarness.c
Normal file
840
test_common/harness/testHarness.c
Normal file
@@ -0,0 +1,840 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testHarness.h"
|
||||
#include "compat.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "threadTesting.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "kernelHelpers.h"
|
||||
#include "fpcontrol.h"
|
||||
#include "typeWrappers.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
|
||||
#if !defined (__APPLE__)
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
int gTestsPassed = 0;
|
||||
int gTestsFailed = 0;
|
||||
cl_uint gRandomSeed = 0;
|
||||
cl_uint gReSeed = 0;
|
||||
|
||||
int gFlushDenormsToZero = 0;
|
||||
int gInfNanSupport = 1;
|
||||
int gIsEmbedded = 0;
|
||||
int gIsOpenCL_C_1_0_Device = 0;
|
||||
int gIsOpenCL_1_0_Device = 0;
|
||||
int gHasLong = 1;
|
||||
|
||||
#define DEFAULT_NUM_ELEMENTS 0x4000
|
||||
|
||||
int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
|
||||
{
|
||||
return runTestHarnessWithCheck( argc, argv, num_fns, fnList, fnNames, imageSupportRequired, forceNoContextCreation, queueProps,
|
||||
( imageSupportRequired ) ? verifyImageSupport : NULL );
|
||||
}
|
||||
|
||||
int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps,
|
||||
DeviceCheckFn deviceCheckFn )
|
||||
{
|
||||
test_start();
|
||||
|
||||
cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
cl_uint num_platforms = 0;
|
||||
cl_platform_id *platforms;
|
||||
cl_device_id device;
|
||||
int num_elements = DEFAULT_NUM_ELEMENTS;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_uint choosen_device_index = 0;
|
||||
cl_uint choosen_platform_index = 0;
|
||||
|
||||
int err, ret;
|
||||
char *endPtr;
|
||||
unsigned int i;
|
||||
int based_on_env_var = 0;
|
||||
|
||||
|
||||
/* Check for environment variable to set device type */
|
||||
char *env_mode = getenv( "CL_DEVICE_TYPE" );
|
||||
if( env_mode != NULL )
|
||||
{
|
||||
based_on_env_var = 1;
|
||||
if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
device_type = CL_DEVICE_TYPE_GPU;
|
||||
else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
device_type = CL_DEVICE_TYPE_CPU;
|
||||
else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
device_type = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
else
|
||||
{
|
||||
log_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
{
|
||||
// report on any unusual library search path indirection
|
||||
char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
|
||||
if( libSearchPath )
|
||||
log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
|
||||
|
||||
// report on any unusual framework search path indirection
|
||||
char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
|
||||
if( libSearchPath )
|
||||
log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
|
||||
}
|
||||
#endif
|
||||
|
||||
env_mode = getenv( "CL_DEVICE_INDEX" );
|
||||
if( env_mode != NULL )
|
||||
{
|
||||
choosen_device_index = atoi(env_mode);
|
||||
}
|
||||
|
||||
env_mode = getenv( "CL_PLATFORM_INDEX" );
|
||||
if( env_mode != NULL )
|
||||
{
|
||||
choosen_platform_index = atoi(env_mode);
|
||||
}
|
||||
|
||||
/* Process the command line arguments */
|
||||
|
||||
/* Special case: just list the tests */
|
||||
if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
|
||||
{
|
||||
log_info( "Usage: %s [<function name>*] [pid<num>] [id<num>] [<device type>]\n", argv[0] );
|
||||
log_info( "\t<function name>\tOne or more of: (wildcard character '*') (default *)\n");
|
||||
log_info( "\tpid<num>\t\tIndicates platform at index <num> should be used (default 0).\n" );
|
||||
log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
|
||||
log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
|
||||
|
||||
for( i = 0; i < num_fns - 1; i++ )
|
||||
{
|
||||
log_info( "\t\t%s\n", fnNames[ i ] );
|
||||
}
|
||||
test_finish();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* How are we supposed to seed the random # generators? */
|
||||
if( argc > 1 && strcmp( argv[ argc - 1 ], "randomize" ) == 0 )
|
||||
{
|
||||
gRandomSeed = (cl_uint) time( NULL );
|
||||
log_info( "Random seed: %u.\n", gRandomSeed );
|
||||
gReSeed = 1;
|
||||
argc--;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(" Initializing random seed to 0.\n");
|
||||
}
|
||||
|
||||
/* Do we have an integer to specify the number of elements to pass to tests? */
|
||||
if( argc > 1 )
|
||||
{
|
||||
ret = (int)strtol( argv[ argc - 1 ], &endPtr, 10 );
|
||||
if( endPtr != argv[ argc - 1 ] && *endPtr == 0 )
|
||||
{
|
||||
/* By spec, this means the entire string was a valid integer, so we treat it as a num_elements spec */
|
||||
/* (hence why we stored the result in ret first) */
|
||||
num_elements = ret;
|
||||
log_info( "Testing with num_elements of %d\n", num_elements );
|
||||
argc--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Do we have a CPU/GPU specification? */
|
||||
if( argc > 1 )
|
||||
{
|
||||
if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
{
|
||||
device_type = CL_DEVICE_TYPE_GPU;
|
||||
argc--;
|
||||
}
|
||||
else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
{
|
||||
device_type = CL_DEVICE_TYPE_CPU;
|
||||
argc--;
|
||||
}
|
||||
else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
{
|
||||
device_type = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
argc--;
|
||||
}
|
||||
else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
{
|
||||
device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
argc--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Did we choose a specific device index? */
|
||||
if( argc > 1 )
|
||||
{
|
||||
if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
|
||||
{
|
||||
choosen_device_index = atoi( &(argv[ argc - 1 ][2]) );
|
||||
argc--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Did we choose a specific platform index? */
|
||||
if( argc > 1 )
|
||||
{
|
||||
if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
|
||||
{
|
||||
choosen_platform_index = atoi( &(argv[ argc - 1 ][3]) );
|
||||
argc--;
|
||||
}
|
||||
}
|
||||
|
||||
switch( device_type )
|
||||
{
|
||||
case CL_DEVICE_TYPE_GPU: log_info( "Requesting GPU device " ); break;
|
||||
case CL_DEVICE_TYPE_CPU: log_info( "Requesting CPU device " ); break;
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: log_info( "Requesting Accelerator device " ); break;
|
||||
case CL_DEVICE_TYPE_DEFAULT: log_info( "Requesting Default device " ); break;
|
||||
default: log_error( "Requesting unknown device "); return -1;
|
||||
}
|
||||
log_info( based_on_env_var ? "based on environment variable " : "based on command line " );
|
||||
log_info( "for platform index %d and device index %d\n", choosen_platform_index, choosen_device_index);
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#if defined( __i386__ ) || defined( __x86_64__ )
|
||||
#define kHasSSE3 0x00000008
|
||||
#define kHasSupplementalSSE3 0x00000100
|
||||
#define kHasSSE4_1 0x00000400
|
||||
#define kHasSSE4_2 0x00000800
|
||||
/* check our environment for a hint to disable SSE variants */
|
||||
{
|
||||
const char *env = getenv( "CL_MAX_SSE" );
|
||||
if( env )
|
||||
{
|
||||
extern int _cpu_capabilities;
|
||||
int mask = 0;
|
||||
if( 0 == strcasecmp( env, "SSE4.1" ) )
|
||||
mask = kHasSSE4_2;
|
||||
else if( 0 == strcasecmp( env, "SSSE3" ) )
|
||||
mask = kHasSSE4_2 | kHasSSE4_1;
|
||||
else if( 0 == strcasecmp( env, "SSE3" ) )
|
||||
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
|
||||
else if( 0 == strcasecmp( env, "SSE2" ) )
|
||||
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
|
||||
else
|
||||
{
|
||||
log_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
|
||||
return -2;
|
||||
}
|
||||
|
||||
log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
|
||||
_cpu_capabilities &= ~mask;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Get the platform */
|
||||
err = clGetPlatformIDs(0, NULL, &num_platforms);
|
||||
if (err) {
|
||||
print_error(err, "clGetPlatformIDs failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
|
||||
if (!platforms || choosen_platform_index >= num_platforms) {
|
||||
log_error( "platform index out of range -- choosen_platform_index (%d) >= num_platforms (%d)\n", choosen_platform_index, num_platforms );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
BufferOwningPtr<cl_platform_id> platformsBuf(platforms);
|
||||
|
||||
err = clGetPlatformIDs(num_platforms, platforms, NULL);
|
||||
if (err) {
|
||||
print_error(err, "clGetPlatformIDs failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Get the number of requested devices */
|
||||
err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, 0, NULL, &num_devices );
|
||||
if (err) {
|
||||
print_error(err, "clGetDeviceIDs failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
|
||||
if (!devices || choosen_device_index >= num_devices) {
|
||||
log_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
BufferOwningPtr<cl_device_id> devicesBuf(devices);
|
||||
|
||||
|
||||
/* Get the requested device */
|
||||
err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, num_devices, devices, NULL );
|
||||
if (err) {
|
||||
print_error(err, "clGetDeviceIDs failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
device = devices[choosen_device_index];
|
||||
|
||||
if( printDeviceHeader( device ) != CL_SUCCESS )
|
||||
{
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_device_fp_config fpconfig = 0;
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( fpconfig ), &fpconfig, NULL );
|
||||
if (err) {
|
||||
print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
gFlushDenormsToZero = ( 0 == (fpconfig & CL_FP_DENORM));
|
||||
log_info( "Supports single precision denormals: %s\n", gFlushDenormsToZero ? "NO" : "YES" );
|
||||
log_info( "sizeof( void*) = %d (host)\n", (int) sizeof( void* ) );
|
||||
|
||||
//detect whether profile of the device is embedded
|
||||
char profile[1024] = "";
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
|
||||
if (err)
|
||||
{
|
||||
print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
|
||||
|
||||
//detect the floating point capabilities
|
||||
cl_device_fp_config floatCapabilities = 0;
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL);
|
||||
if (err)
|
||||
{
|
||||
print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check for problems that only embedded will have
|
||||
if( gIsEmbedded )
|
||||
{
|
||||
//If the device is embedded, we need to detect if the device supports Infinity and NaN
|
||||
if ((floatCapabilities & CL_FP_INF_NAN) == 0)
|
||||
gInfNanSupport = 0;
|
||||
|
||||
// check the extensions list to see if ulong and long are supported
|
||||
size_t extensionsStringSize = 0;
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, 0, NULL, &extensionsStringSize ) ))
|
||||
{
|
||||
print_error( err, "Unable to get extensions string size for embedded device" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
char *extensions_string = (char*) malloc(extensionsStringSize);
|
||||
if( NULL == extensions_string )
|
||||
{
|
||||
print_error( CL_OUT_OF_HOST_MEMORY, "Unable to allocate storage for extensions string for embedded device" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
BufferOwningPtr<char> extensions_stringBuf(extensions_string);
|
||||
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, extensionsStringSize, extensions_string, NULL ) ))
|
||||
{
|
||||
print_error( err, "Unable to get extensions string for embedded device" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( extensions_string[extensionsStringSize-1] != '\0' )
|
||||
{
|
||||
log_error( "FAILURE: extensions string for embedded device is not NUL terminated" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( NULL == strstr( extensions_string, "cles_khr_int64" ))
|
||||
gHasLong = 0;
|
||||
}
|
||||
|
||||
if( getenv( "OPENCL_1_0_DEVICE" ) )
|
||||
{
|
||||
char c_version[1024];
|
||||
gIsOpenCL_1_0_Device = 1;
|
||||
memset( c_version, 0, sizeof( c_version ) );
|
||||
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, NULL )) )
|
||||
{
|
||||
log_error( "FAILURE: unable to get CL_DEVICE_OPENCL_C_VERSION on 1.0 device. (%d)\n", err );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( 0 == strncmp( c_version, "OpenCL C 1.0 ", strlen( "OpenCL C 1.0 " ) ) )
|
||||
{
|
||||
gIsOpenCL_C_1_0_Device = 1;
|
||||
log_info( "Device is a OpenCL C 1.0 device\n" );
|
||||
}
|
||||
else
|
||||
log_info( "Device is a OpenCL 1.0 device, but supports OpenCL C 1.1\n" );
|
||||
}
|
||||
|
||||
cl_uint device_address_bits = 0;
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_ADDRESS_BITS, sizeof( device_address_bits ), &device_address_bits, NULL ) ))
|
||||
{
|
||||
print_error( err, "Unable to obtain device address bits" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
if( device_address_bits )
|
||||
log_info( "sizeof( void*) = %d (device)\n", device_address_bits/8 );
|
||||
else
|
||||
{
|
||||
log_error("Invalid device address bit size returned by device.\n");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* If we have a device checking function, run it */
|
||||
if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS )
|
||||
{
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (num_elements <= 0)
|
||||
num_elements = DEFAULT_NUM_ELEMENTS;
|
||||
|
||||
// On most platforms which support denorm, default is FTZ off. However,
|
||||
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
|
||||
// This creates issues in result verification. Since spec allows the implementation to either flush or
|
||||
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
|
||||
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
|
||||
// where reference is being computed to make sure we get non-flushed reference result. If implementation
|
||||
// returns flushed result, we correctly take care of that in verification code.
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ( &oldMode );
|
||||
#endif
|
||||
|
||||
int error = parseAndCallCommandLineTests( argc, argv, device, num_fns, fnList, fnNames, forceNoContextCreation, queueProps, num_elements );
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore the old FP mode before leaving.
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static int find_wildcard_matching_functions( const char *fnNames[], unsigned char fnsToCall[], unsigned int num_fns,
|
||||
const char *wildcard )
|
||||
{
|
||||
int found_tests = 0;
|
||||
size_t wildcard_length = strlen( wildcard ) - 1; /* -1 for the asterisk */
|
||||
|
||||
for( unsigned int fnIndex = 0; fnIndex < num_fns; fnIndex++ )
|
||||
{
|
||||
if( strncmp( fnNames[ fnIndex ], wildcard, wildcard_length ) == 0 )
|
||||
{
|
||||
if( fnsToCall[ fnIndex ] )
|
||||
{
|
||||
log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
fnsToCall[ fnIndex ] = 1;
|
||||
found_tests = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if( !found_tests )
|
||||
{
|
||||
log_error( "ERROR: The wildcard '%s' did not match any test names.\n", wildcard );
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static int find_argument_matching_function( const char *fnNames[], unsigned char *fnsToCall, unsigned int num_fns,
|
||||
const char *argument )
|
||||
{
|
||||
unsigned int fnIndex;
|
||||
|
||||
for( fnIndex = 0; fnIndex < num_fns; fnIndex++ )
|
||||
{
|
||||
if( strcmp( argument, fnNames[ fnIndex ] ) == 0 )
|
||||
{
|
||||
if( fnsToCall[ fnIndex ] )
|
||||
{
|
||||
log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
else
|
||||
{
|
||||
fnsToCall[ fnIndex ] = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( fnIndex == num_fns )
|
||||
{
|
||||
log_error( "ERROR: The argument '%s' did not match any test names.\n", argument );
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[], int forceNoContextCreation,
|
||||
cl_command_queue_properties queueProps, int num_elements )
|
||||
{
|
||||
int ret = EXIT_SUCCESS;
|
||||
|
||||
unsigned char *fnsToCall = ( unsigned char* ) calloc( num_fns, 1 );
|
||||
|
||||
if( argc == 1 )
|
||||
{
|
||||
/* No actual arguments, all tests will be run. */
|
||||
memset( fnsToCall, 1, num_fns );
|
||||
}
|
||||
else
|
||||
{
|
||||
for( int argIndex = 1; argIndex < argc; argIndex++ )
|
||||
{
|
||||
if( strchr( argv[ argIndex ], '*' ) != NULL )
|
||||
{
|
||||
ret = find_wildcard_matching_functions( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
|
||||
}
|
||||
else
|
||||
{
|
||||
if( strcmp( argv[ argIndex ], "all" ) == 0 )
|
||||
{
|
||||
memset( fnsToCall, 1, num_fns );
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = find_argument_matching_function( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
|
||||
}
|
||||
}
|
||||
|
||||
if( ret == EXIT_FAILURE )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( ret == EXIT_SUCCESS )
|
||||
{
|
||||
ret = callTestFunctions( fnList, fnNames, fnsToCall, num_fns, device, forceNoContextCreation, num_elements, queueProps );
|
||||
|
||||
if( gTestsFailed == 0 )
|
||||
{
|
||||
if( gTestsPassed > 1 )
|
||||
{
|
||||
log_info("PASSED %d of %d tests.\n", gTestsPassed, gTestsPassed);
|
||||
}
|
||||
else if( gTestsPassed > 0 )
|
||||
{
|
||||
log_info("PASSED test.\n");
|
||||
}
|
||||
}
|
||||
else if( gTestsFailed > 0 )
|
||||
{
|
||||
if( gTestsFailed+gTestsPassed > 1 )
|
||||
{
|
||||
log_error("FAILED %d of %d tests.\n", gTestsFailed, gTestsFailed+gTestsPassed);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error("FAILED test.\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_finish();
|
||||
|
||||
free( fnsToCall );
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
|
||||
int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse, cl_command_queue_properties queueProps )
|
||||
{
|
||||
int numErrors = 0;
|
||||
|
||||
for( int i = 0; i < numFunctions; ++i )
|
||||
{
|
||||
if( functionsToCall[ i ] )
|
||||
{
|
||||
/* Skip any unimplemented tests. */
|
||||
if( functionList[ i ] != NULL )
|
||||
{
|
||||
numErrors += callSingleTestFunction( functionList[ i ], functionNames[ i ], deviceToUse,
|
||||
forceNoContextCreation, numElementsToUse, queueProps );
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info( "%s test currently not implemented\n", functionNames[ i ] );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return numErrors;
|
||||
}
|
||||
|
||||
void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
|
||||
{
|
||||
log_info( "%s\n", errinfo );
|
||||
}
|
||||
|
||||
// Actual function execution
|
||||
int callSingleTestFunction( basefn functionToCall, const char *functionName,
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse, const cl_queue_properties queueProps )
|
||||
{
|
||||
int numErrors = 0, ret;
|
||||
cl_int error;
|
||||
cl_context context = NULL;
|
||||
cl_command_queue queue = NULL;
|
||||
const cl_command_queue_properties cmd_queueProps = (queueProps)?CL_QUEUE_PROPERTIES:0;
|
||||
cl_command_queue_properties queueCreateProps[] = {cmd_queueProps, queueProps, 0};
|
||||
|
||||
/* Create a context to work with, unless we're told not to */
|
||||
if( !forceNoContextCreation )
|
||||
{
|
||||
context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL, &error );
|
||||
if (!context)
|
||||
{
|
||||
print_error( error, "Unable to create testing context" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
queue = clCreateCommandQueueWithProperties( context, deviceToUse, &queueCreateProps[0], &error );
|
||||
if( queue == NULL )
|
||||
{
|
||||
print_error( error, "Unable to create testing command queue" );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Run the test and print the result */
|
||||
log_info( "%s...\n", functionName );
|
||||
fflush( stdout );
|
||||
|
||||
error = check_opencl_version_with_testname(functionName, deviceToUse);
|
||||
test_missing_feature(error, functionName);
|
||||
|
||||
ret = functionToCall( deviceToUse, context, queue, numElementsToUse); //test_threaded_function( ptr_basefn_list[i], group, context, num_elements);
|
||||
if( ret == TEST_NOT_IMPLEMENTED )
|
||||
{
|
||||
/* Tests can also let us know they're not implemented yet */
|
||||
log_info("%s test currently not implemented\n\n", functionName);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Print result */
|
||||
if( ret == 0 ) {
|
||||
log_info( "%s passed\n", functionName );
|
||||
gTestsPassed++;
|
||||
}
|
||||
else
|
||||
{
|
||||
numErrors++;
|
||||
log_error( "%s FAILED\n", functionName );
|
||||
gTestsFailed++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Release the context */
|
||||
if( !forceNoContextCreation )
|
||||
{
|
||||
int error = clFinish(queue);
|
||||
if (error) {
|
||||
log_error("clFinish failed: %d", error);
|
||||
numErrors++;
|
||||
}
|
||||
clReleaseCommandQueue( queue );
|
||||
clReleaseContext( context );
|
||||
}
|
||||
|
||||
return numErrors;
|
||||
}
|
||||
|
||||
void checkDeviceTypeOverride( cl_device_type *inOutType )
|
||||
{
|
||||
/* Check if we are forced to CPU mode */
|
||||
char *force_cpu = getenv( "CL_DEVICE_TYPE" );
|
||||
if( force_cpu != NULL )
|
||||
{
|
||||
if( strcmp( force_cpu, "gpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
*inOutType = CL_DEVICE_TYPE_GPU;
|
||||
else if( strcmp( force_cpu, "cpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
*inOutType = CL_DEVICE_TYPE_CPU;
|
||||
else if( strcmp( force_cpu, "accelerator" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
*inOutType = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
else if( strcmp( force_cpu, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
*inOutType = CL_DEVICE_TYPE_DEFAULT;
|
||||
}
|
||||
|
||||
switch( *inOutType )
|
||||
{
|
||||
case CL_DEVICE_TYPE_GPU: log_info( "Requesting GPU device " ); break;
|
||||
case CL_DEVICE_TYPE_CPU: log_info( "Requesting CPU device " ); break;
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: log_info( "Requesting Accelerator device " ); break;
|
||||
case CL_DEVICE_TYPE_DEFAULT: log_info( "Requesting Default device " ); break;
|
||||
default: break;
|
||||
}
|
||||
log_info( force_cpu != NULL ? "based on environment variable\n" : "based on command line\n" );
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
{
|
||||
// report on any unusual library search path indirection
|
||||
char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
|
||||
if( libSearchPath )
|
||||
log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
|
||||
|
||||
// report on any unusual framework search path indirection
|
||||
char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
|
||||
if( libSearchPath )
|
||||
log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#if ! defined( __APPLE__ )
|
||||
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
|
||||
{
|
||||
uint32_t pat = ((uint32_t*) src_pattern)[0];
|
||||
size_t count = bytes / 4;
|
||||
size_t i;
|
||||
uint32_t *d = (uint32_t*)dest;
|
||||
|
||||
for( i = 0; i < count; i++ )
|
||||
d[i] = pat;
|
||||
|
||||
d += i;
|
||||
|
||||
bytes &= 3;
|
||||
if( bytes )
|
||||
memcpy( d, src_pattern, bytes );
|
||||
}
|
||||
#endif
|
||||
|
||||
extern cl_device_type GetDeviceType( cl_device_id d )
|
||||
{
|
||||
cl_device_type result = -1;
|
||||
cl_int err = clGetDeviceInfo( d, CL_DEVICE_TYPE, sizeof( result ), &result, NULL );
|
||||
if( CL_SUCCESS != err )
|
||||
log_error( "ERROR: Unable to get device type for device %p\n", d );
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
cl_device_id GetOpposingDevice( cl_device_id device )
|
||||
{
|
||||
cl_int error;
|
||||
cl_device_id *otherDevices;
|
||||
cl_uint actualCount;
|
||||
cl_platform_id plat;
|
||||
|
||||
// Get the platform of the device to use for getting a list of devices
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_PLATFORM, sizeof( plat ), &plat, NULL );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to get device's platform" );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Get a list of all devices
|
||||
error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to get list of devices size" );
|
||||
return NULL;
|
||||
}
|
||||
otherDevices = (cl_device_id *)malloc(actualCount*sizeof(cl_device_id));
|
||||
if (NULL == otherDevices) {
|
||||
print_error( error, "Unable to allocate list of other devices." );
|
||||
return NULL;
|
||||
}
|
||||
BufferOwningPtr<cl_device_id> otherDevicesBuf(otherDevices);
|
||||
|
||||
error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices, NULL );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to get list of devices" );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if( actualCount == 1 )
|
||||
{
|
||||
return device; // NULL means error, returning self means we couldn't find another one
|
||||
}
|
||||
|
||||
// Loop and just find one that isn't the one we were given
|
||||
cl_uint i;
|
||||
for( i = 0; i < actualCount; i++ )
|
||||
{
|
||||
if( otherDevices[ i ] != device )
|
||||
{
|
||||
cl_device_type newType;
|
||||
error = clGetDeviceInfo( otherDevices[ i ], CL_DEVICE_TYPE, sizeof( newType ), &newType, NULL );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to get device type for other device" );
|
||||
return NULL;
|
||||
}
|
||||
cl_device_id result = otherDevices[ i ];
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Should never get here
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
100
test_common/harness/testHarness.h
Normal file
100
test_common/harness/testHarness.h
Normal file
@@ -0,0 +1,100 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _testHarness_h
|
||||
#define _testHarness_h
|
||||
|
||||
#include "threadTesting.h"
|
||||
#include "clImageHelper.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern cl_uint gReSeed;
|
||||
extern cl_uint gRandomSeed;
|
||||
|
||||
// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
|
||||
// setup work, and then call each function in turn as dictatated by the passed arguments.
|
||||
extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
|
||||
|
||||
// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
|
||||
typedef int (*DeviceCheckFn)( cl_device_id device );
|
||||
|
||||
// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
|
||||
extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps, DeviceCheckFn deviceCheckFn );
|
||||
|
||||
// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
|
||||
extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
|
||||
basefn *fnList, const char *fnNames[],
|
||||
int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements );
|
||||
|
||||
// Call this function if you need to do all the setup work yourself, and just need the function list called/
|
||||
// managed.
|
||||
// functionList is the actual array of functions
|
||||
// functionNames is an array of strings representing the name of each function
|
||||
// functionsToCall is an array of integers (treated as bools) which tell which function is to be called,
|
||||
// each element at index i, corresponds to the element in functionList at index i
|
||||
// numFunctions is the number of elements in the arrays
|
||||
// contextProps are used to create a testing context for each test
|
||||
// deviceToUse and numElementsToUse are all just passed to each test function
|
||||
extern int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
|
||||
int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse, cl_command_queue_properties queueProps );
|
||||
|
||||
// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
|
||||
extern int callSingleTestFunction( basefn functionToCall, const char *functionName,
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse, cl_command_queue_properties queueProps );
|
||||
|
||||
///// Miscellaneous steps
|
||||
|
||||
// Given a pre-existing device type choice, check the environment for an override, then print what
|
||||
// choice was made and how (and return the overridden choice, if there is one)
|
||||
extern void checkDeviceTypeOverride( cl_device_type *inOutType );
|
||||
|
||||
// standard callback function for context pfn_notify
|
||||
extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
|
||||
|
||||
extern cl_device_type GetDeviceType( cl_device_id );
|
||||
|
||||
// Given a device (most likely passed in by the harness, but not required), will attempt to find
|
||||
// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
|
||||
// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
|
||||
// is the only device available, the SAME device is returned, so check!
|
||||
extern cl_device_id GetOpposingDevice( cl_device_id device );
|
||||
|
||||
|
||||
extern int gFlushDenormsToZero; // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
|
||||
extern int gInfNanSupport; // This is set to 1 if the device supports infinities and NaNs
|
||||
extern int gIsEmbedded; // This is set to 1 if the device is an embedded device
|
||||
extern int gHasLong; // This is set to 1 if the device suppots long and ulong types in OpenCL C.
|
||||
extern int gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
|
||||
|
||||
#if ! defined( __APPLE__ )
|
||||
void memset_pattern4(void *, const void *, size_t);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _testHarness_h
|
||||
|
||||
|
||||
51
test_common/harness/test_mt19937.c
Normal file
51
test_common/harness/test_mt19937.c
Normal file
@@ -0,0 +1,51 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "mt19937.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main( void )
|
||||
{
|
||||
MTdata d = init_genrand(42);
|
||||
int i;
|
||||
const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
|
||||
0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
|
||||
0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
|
||||
0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
|
||||
int errcount = 0;
|
||||
|
||||
for( i = 0; i < 65536; i++ )
|
||||
{
|
||||
cl_uint u = genrand_int32( d );
|
||||
if( 0 == (i & 4095) )
|
||||
{
|
||||
if( u != reference[i>>12] )
|
||||
{
|
||||
printf("ERROR: expected *0x%8.8x at %d. Got 0x%8.8x\n", reference[i>>12], i, u );
|
||||
errcount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
|
||||
if( errcount )
|
||||
printf("mt19937 test failed.\n");
|
||||
else
|
||||
printf("mt19937 test passed.\n");
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
100
test_common/harness/threadTesting.c
Normal file
100
test_common/harness/threadTesting.c
Normal file
@@ -0,0 +1,100 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "compat.h"
|
||||
#include "threadTesting.h"
|
||||
#include "errorHelpers.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
#if 0 // Disabed for now
|
||||
|
||||
typedef struct
|
||||
{
|
||||
basefn mFunction;
|
||||
cl_device_id mDevice;
|
||||
cl_context mContext;
|
||||
int mNumElements;
|
||||
} TestFnArgs;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Thread-based testing. Spawns a new thread to run the given test function,
|
||||
// then waits for it to complete. The entire idea is that, if the thread crashes,
|
||||
// we can catch it and report it as a failure instead of crashing the entire suite
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void *test_thread_wrapper( void *data )
|
||||
{
|
||||
TestFnArgs *args;
|
||||
int retVal;
|
||||
cl_context context;
|
||||
|
||||
args = (TestFnArgs *)data;
|
||||
|
||||
/* Create a new context to use (contexts can't cross threads) */
|
||||
context = clCreateContext(NULL, args->mDeviceGroup);
|
||||
if( context == NULL )
|
||||
{
|
||||
log_error("clCreateContext failed for new thread\n");
|
||||
return (void *)(-1);
|
||||
}
|
||||
|
||||
/* Call function */
|
||||
retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
|
||||
|
||||
clReleaseContext( context );
|
||||
|
||||
return (void *)retVal;
|
||||
}
|
||||
|
||||
int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
int error;
|
||||
pthread_t threadHdl;
|
||||
void *retVal;
|
||||
TestFnArgs args;
|
||||
|
||||
|
||||
args.mFunction = fnToTest;
|
||||
args.mDeviceGroup = deviceGroup;
|
||||
args.mDevice = device;
|
||||
args.mContext = context;
|
||||
args.mNumElements = numElements;
|
||||
|
||||
|
||||
error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
|
||||
if( error != 0 )
|
||||
{
|
||||
log_error( "ERROR: Unable to create thread for testing!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Thread has been started, now just wait for it to complete (or crash) */
|
||||
error = pthread_join( threadHdl, &retVal );
|
||||
if( error != 0 )
|
||||
{
|
||||
log_error( "ERROR: Unable to join testing thread!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
return (int)((intptr_t)retVal);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
32
test_common/harness/threadTesting.h
Normal file
32
test_common/harness/threadTesting.h
Normal file
@@ -0,0 +1,32 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _threadTesting_h
|
||||
#define _threadTesting_h
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
#define TEST_NOT_IMPLEMENTED -99
|
||||
|
||||
typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
|
||||
#endif // _threadTesting_h
|
||||
|
||||
|
||||
481
test_common/harness/typeWrappers.cpp
Normal file
481
test_common/harness/typeWrappers.cpp
Normal file
@@ -0,0 +1,481 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "typeWrappers.h"
|
||||
#include "kernelHelpers.h"
|
||||
#include "errorHelpers.h"
|
||||
#include <stdlib.h>
|
||||
#include "clImageHelper.h"
|
||||
|
||||
#define ROUND_SIZE_UP( _size, _align ) (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#define kPageSize 4096
|
||||
#include <sys/mman.h>
|
||||
#include <stdlib.h>
|
||||
#elif defined(__linux__)
|
||||
#include <unistd.h>
|
||||
#define kPageSize (getpagesize())
|
||||
#endif
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, mem_flags, fmt, width );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
|
||||
{
|
||||
cl_int error;
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
size_t number_of_devices;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||
|
||||
number_of_devices /= sizeof(cl_device_id);
|
||||
for (int i=0; i<(int)number_of_devices; i++) {
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||
if (type == CL_DEVICE_TYPE_GPU) {
|
||||
protect_pages = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (protect_pages) {
|
||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
||||
size_t rowStride = rowBytes + kPageSize;
|
||||
|
||||
// create backing store
|
||||
backingStoreSize = rowStride + 8 * rowStride;
|
||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
// add guard pages
|
||||
size_t row;
|
||||
char *p = (char*) backingStore;
|
||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p += rowBytes;
|
||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
||||
p -= rowBytes;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
|
||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
||||
{
|
||||
static int spewEnv = 1;
|
||||
if(spewEnv)
|
||||
{
|
||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
||||
spewEnv = 0;
|
||||
}
|
||||
imagePtr += rowBytes - pixelBytes * width;
|
||||
}
|
||||
|
||||
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
|
||||
} else {
|
||||
backingStore = NULL;
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
|
||||
}
|
||||
#else
|
||||
|
||||
backingStore = NULL;
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
|
||||
#endif
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, mem_flags, fmt, width, height );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
|
||||
{
|
||||
cl_int error;
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
size_t number_of_devices;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||
|
||||
number_of_devices /= sizeof(cl_device_id);
|
||||
for (int i=0; i<(int)number_of_devices; i++) {
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||
if (type == CL_DEVICE_TYPE_GPU) {
|
||||
protect_pages = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (protect_pages) {
|
||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
||||
size_t rowStride = rowBytes + kPageSize;
|
||||
|
||||
// create backing store
|
||||
backingStoreSize = height * rowStride + 8 * rowStride;
|
||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
// add guard pages
|
||||
size_t row;
|
||||
char *p = (char*) backingStore;
|
||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p += rowBytes;
|
||||
for( row = 0; row < height; row++ )
|
||||
{
|
||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p -= rowBytes;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
|
||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
||||
{
|
||||
static int spewEnv = 1;
|
||||
if(spewEnv)
|
||||
{
|
||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
||||
spewEnv = 0;
|
||||
}
|
||||
imagePtr += rowBytes - pixelBytes * width;
|
||||
}
|
||||
|
||||
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
|
||||
} else {
|
||||
backingStore = NULL;
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
|
||||
}
|
||||
#else
|
||||
|
||||
backingStore = NULL;
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
|
||||
#endif
|
||||
return error;
|
||||
}
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, mem_flags, fmt, width, height, depth );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
|
||||
{
|
||||
cl_int error;
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
size_t number_of_devices;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||
|
||||
number_of_devices /= sizeof(cl_device_id);
|
||||
for (int i=0; i<(int)number_of_devices; i++) {
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||
if (type == CL_DEVICE_TYPE_GPU) {
|
||||
protect_pages = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (protect_pages) {
|
||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
||||
size_t rowStride = rowBytes + kPageSize;
|
||||
|
||||
// create backing store
|
||||
backingStoreSize = height * depth * rowStride + 8 * rowStride;
|
||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
// add guard pages
|
||||
size_t row;
|
||||
char *p = (char*) backingStore;
|
||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p += rowBytes;
|
||||
for( row = 0; row < height*depth; row++ )
|
||||
{
|
||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p -= rowBytes;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
|
||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
||||
{
|
||||
static int spewEnv = 1;
|
||||
if(spewEnv)
|
||||
{
|
||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
||||
spewEnv = 0;
|
||||
}
|
||||
imagePtr += rowBytes - pixelBytes * width;
|
||||
}
|
||||
|
||||
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
|
||||
} else {
|
||||
backingStore = NULL;
|
||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
|
||||
}
|
||||
#else
|
||||
|
||||
backingStore = NULL;
|
||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
|
||||
|
||||
#endif
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
|
||||
{
|
||||
cl_int error;
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
size_t number_of_devices;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||
|
||||
number_of_devices /= sizeof(cl_device_id);
|
||||
for (int i=0; i<(int)number_of_devices; i++) {
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||
if (type == CL_DEVICE_TYPE_GPU) {
|
||||
protect_pages = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (protect_pages) {
|
||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
||||
size_t rowStride = rowBytes + kPageSize;
|
||||
|
||||
// create backing store
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
backingStoreSize = rowStride + 8 * rowStride;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
backingStoreSize = height * rowStride + 8 * rowStride;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
backingStoreSize = height * depth * rowStride + 8 * rowStride;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
backingStoreSize = arraySize * rowStride + 8 * rowStride;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
|
||||
break;
|
||||
}
|
||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
// add guard pages
|
||||
size_t row;
|
||||
char *p = (char*) backingStore;
|
||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p += rowBytes;
|
||||
size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
|
||||
for( row = 0; row < sz; row++ )
|
||||
{
|
||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p -= rowBytes;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
|
||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
||||
{
|
||||
static int spewEnv = 1;
|
||||
if(spewEnv)
|
||||
{
|
||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
||||
spewEnv = 0;
|
||||
}
|
||||
imagePtr += rowBytes - pixelBytes * width;
|
||||
}
|
||||
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
backingStore = NULL;
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
|
||||
backingStore = NULL;
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*******
|
||||
* clProtectedArray implementation
|
||||
*******/
|
||||
clProtectedArray::clProtectedArray()
|
||||
{
|
||||
mBuffer = mValidBuffer = NULL;
|
||||
}
|
||||
|
||||
clProtectedArray::clProtectedArray( size_t sizeInBytes )
|
||||
{
|
||||
mBuffer = mValidBuffer = NULL;
|
||||
Allocate( sizeInBytes );
|
||||
}
|
||||
|
||||
clProtectedArray::~clProtectedArray()
|
||||
{
|
||||
if( mBuffer != NULL ) {
|
||||
#if defined( __APPLE__ )
|
||||
int error = munmap( mBuffer, mRealSize );
|
||||
if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
|
||||
#else
|
||||
free( mBuffer );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void clProtectedArray::Allocate( size_t sizeInBytes )
|
||||
{
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
|
||||
// Allocate enough space to: round up our actual allocation to an even number of pages
|
||||
// and allocate two pages on either side
|
||||
mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
|
||||
mRealSize = mRoundedSize + kPageSize * 2;
|
||||
|
||||
// Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
|
||||
mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
mValidBuffer = mBuffer + kPageSize;
|
||||
|
||||
// Protect guard area from access
|
||||
mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
|
||||
mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
|
||||
#else
|
||||
mRoundedSize = mRealSize = sizeInBytes;
|
||||
mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
332
test_common/harness/typeWrappers.h
Normal file
332
test_common/harness/typeWrappers.h
Normal file
@@ -0,0 +1,332 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _typeWrappers_h
|
||||
#define _typeWrappers_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include "compat.h"
|
||||
#include <stdio.h>
|
||||
#include "mt19937.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "kernelHelpers.h"
|
||||
|
||||
extern "C" cl_uint gReSeed;
|
||||
extern "C" cl_uint gRandomSeed;
|
||||
|
||||
/* cl_context wrapper */
|
||||
|
||||
class clContextWrapper
|
||||
{
|
||||
public:
|
||||
clContextWrapper() { mContext = NULL; }
|
||||
clContextWrapper( cl_context program ) { mContext = program; }
|
||||
~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
|
||||
|
||||
clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
|
||||
operator cl_context() const { return mContext; }
|
||||
|
||||
cl_context * operator&() { return &mContext; }
|
||||
|
||||
bool operator==( const cl_context &rhs ) { return mContext == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_context mContext;
|
||||
};
|
||||
|
||||
/* cl_program wrapper */
|
||||
|
||||
class clProgramWrapper
|
||||
{
|
||||
public:
|
||||
clProgramWrapper() { mProgram = NULL; }
|
||||
clProgramWrapper( cl_program program ) { mProgram = program; }
|
||||
~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
|
||||
|
||||
clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
|
||||
operator cl_program() const { return mProgram; }
|
||||
|
||||
cl_program * operator&() { return &mProgram; }
|
||||
|
||||
bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_program mProgram;
|
||||
};
|
||||
|
||||
/* cl_kernel wrapper */
|
||||
|
||||
class clKernelWrapper
|
||||
{
|
||||
public:
|
||||
clKernelWrapper() { mKernel = NULL; }
|
||||
clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
|
||||
~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
|
||||
|
||||
clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
|
||||
operator cl_kernel() const { return mKernel; }
|
||||
|
||||
cl_kernel * operator&() { return &mKernel; }
|
||||
|
||||
bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_kernel mKernel;
|
||||
};
|
||||
|
||||
/* cl_mem (stream) wrapper */
|
||||
|
||||
class clMemWrapper
|
||||
{
|
||||
public:
|
||||
clMemWrapper() { mMem = NULL; }
|
||||
clMemWrapper( cl_mem mem ) { mMem = mem; }
|
||||
~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
|
||||
|
||||
clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_mem() const { return mMem; }
|
||||
|
||||
cl_mem * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_mem mMem;
|
||||
};
|
||||
|
||||
class clProtectedImage
|
||||
{
|
||||
public:
|
||||
clProtectedImage() { image = NULL; backingStore = NULL; }
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
|
||||
~clProtectedImage()
|
||||
{
|
||||
if( image != NULL )
|
||||
clReleaseMemObject( image );
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
if(backingStore)
|
||||
munmap(backingStore, backingStoreSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
|
||||
cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
|
||||
|
||||
clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
|
||||
operator cl_mem() { return image; }
|
||||
|
||||
cl_mem * operator&() { return ℑ }
|
||||
|
||||
bool operator==( const cl_mem &rhs ) { return image == rhs; }
|
||||
|
||||
protected:
|
||||
void *backingStore;
|
||||
size_t backingStoreSize;
|
||||
cl_mem image;
|
||||
};
|
||||
|
||||
/* cl_command_queue wrapper */
|
||||
class clCommandQueueWrapper
|
||||
{
|
||||
public:
|
||||
clCommandQueueWrapper() { mMem = NULL; }
|
||||
clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
|
||||
~clCommandQueueWrapper() { if( mMem != NULL ) { clReleaseCommandQueue( mMem ); } }
|
||||
|
||||
clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_command_queue() const { return mMem; }
|
||||
|
||||
cl_command_queue * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_command_queue mMem;
|
||||
};
|
||||
|
||||
/* cl_sampler wrapper */
|
||||
class clSamplerWrapper
|
||||
{
|
||||
public:
|
||||
clSamplerWrapper() { mMem = NULL; }
|
||||
clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
|
||||
~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
|
||||
|
||||
clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_sampler() const { return mMem; }
|
||||
|
||||
cl_sampler * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_sampler mMem;
|
||||
};
|
||||
|
||||
/* cl_event wrapper */
|
||||
class clEventWrapper
|
||||
{
|
||||
public:
|
||||
clEventWrapper() { mMem = NULL; }
|
||||
clEventWrapper( cl_event mem ) { mMem = mem; }
|
||||
~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
|
||||
|
||||
clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_event() const { return mMem; }
|
||||
|
||||
cl_event * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_event &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_event mMem;
|
||||
};
|
||||
|
||||
/* Generic protected memory buffer, for verifying access within bounds */
|
||||
class clProtectedArray
|
||||
{
|
||||
public:
|
||||
clProtectedArray();
|
||||
clProtectedArray( size_t sizeInBytes );
|
||||
virtual ~clProtectedArray();
|
||||
|
||||
void Allocate( size_t sizeInBytes );
|
||||
|
||||
operator void *() { return (void *)mValidBuffer; }
|
||||
operator const void *() const { return (const void *)mValidBuffer; }
|
||||
|
||||
protected:
|
||||
|
||||
char * mBuffer;
|
||||
char * mValidBuffer;
|
||||
size_t mRealSize, mRoundedSize;
|
||||
};
|
||||
|
||||
class RandomSeed
|
||||
{
|
||||
public:
|
||||
RandomSeed( cl_uint seed ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
|
||||
~RandomSeed()
|
||||
{
|
||||
if( gReSeed )
|
||||
gRandomSeed = genrand_int32( mtData );
|
||||
free_mtdata(mtData);
|
||||
}
|
||||
|
||||
operator MTdata () {return mtData;}
|
||||
|
||||
protected:
|
||||
MTdata mtData;
|
||||
};
|
||||
|
||||
|
||||
template <typename T> class BufferOwningPtr
|
||||
{
|
||||
BufferOwningPtr(BufferOwningPtr const &); // do not implement
|
||||
void operator=(BufferOwningPtr const &); // do not implement
|
||||
|
||||
void *ptr;
|
||||
void *map;
|
||||
size_t mapsize; // Bytes allocated total, pointed to by map.
|
||||
size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
|
||||
bool aligned;
|
||||
public:
|
||||
explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
|
||||
explicit BufferOwningPtr(void *p, void *m, size_t s)
|
||||
: ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
|
||||
{
|
||||
#if ! defined( __APPLE__ )
|
||||
if(m)
|
||||
{
|
||||
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
~BufferOwningPtr() {
|
||||
if (map) {
|
||||
#if defined( __APPLE__ )
|
||||
int error = munmap(map, mapsize);
|
||||
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
|
||||
#endif
|
||||
} else {
|
||||
if ( aligned )
|
||||
{
|
||||
align_free(ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
|
||||
if (map){
|
||||
#if defined( __APPLE__ )
|
||||
int error = munmap(map, mapsize);
|
||||
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
|
||||
#else
|
||||
log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
|
||||
abort();
|
||||
#endif
|
||||
} else {
|
||||
if ( aligned )
|
||||
{
|
||||
align_free(ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
ptr = p;
|
||||
map = m;
|
||||
mapsize = mapsize_;
|
||||
allocsize = (ptr != NULL) ? allocsize_ : 0; // Force allocsize to zero if ptr is NULL.
|
||||
aligned = aligned_;
|
||||
#if ! defined( __APPLE__ )
|
||||
if(m)
|
||||
{
|
||||
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
operator T*() { return (T*)ptr; }
|
||||
|
||||
size_t getSize() const { return allocsize; };
|
||||
};
|
||||
|
||||
#endif // _typeWrappers_h
|
||||
|
||||
8
test_common/miniz/CMakeLists.txt
Normal file
8
test_common/miniz/CMakeLists.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
set(TARGET_NAME miniz)
|
||||
|
||||
add_library(
|
||||
${TARGET_NAME}
|
||||
STATIC
|
||||
miniz.c
|
||||
miniz.h
|
||||
)
|
||||
4153
test_common/miniz/miniz.c
Normal file
4153
test_common/miniz/miniz.c
Normal file
File diff suppressed because it is too large
Load Diff
749
test_common/miniz/miniz.h
Normal file
749
test_common/miniz/miniz.h
Normal file
@@ -0,0 +1,749 @@
|
||||
#ifndef MINIZ_HEADER_INCLUDED
|
||||
#define MINIZ_HEADER_INCLUDED
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
|
||||
// TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux
|
||||
#define MINIZ_NO_TIME
|
||||
#endif
|
||||
|
||||
#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__)
|
||||
// MINIZ_X86_OR_X64_CPU is only used to help set the below macros.
|
||||
#define MINIZ_X86_OR_X64_CPU 1
|
||||
#endif
|
||||
|
||||
#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
|
||||
// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian.
|
||||
#define MINIZ_LITTLE_ENDIAN 1
|
||||
#endif
|
||||
|
||||
#if MINIZ_X86_OR_X64_CPU
|
||||
// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses.
|
||||
#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
|
||||
#endif
|
||||
|
||||
#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__)
|
||||
// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions).
|
||||
#define MINIZ_HAS_64BIT_REGISTERS 1
|
||||
#endif
|
||||
|
||||
// Return status codes. MZ_PARAM_ERROR is non-standard.
|
||||
enum {
|
||||
MZ_OK = 0,
|
||||
MZ_STREAM_END = 1,
|
||||
MZ_NEED_DICT = 2,
|
||||
MZ_ERRNO = -1,
|
||||
MZ_STREAM_ERROR = -2,
|
||||
MZ_DATA_ERROR = -3,
|
||||
MZ_MEM_ERROR = -4,
|
||||
MZ_BUF_ERROR = -5,
|
||||
MZ_VERSION_ERROR = -6,
|
||||
MZ_PARAM_ERROR = -10000
|
||||
};
|
||||
|
||||
typedef unsigned long mz_ulong;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// ------------------- zlib-style API Definitions.
|
||||
|
||||
// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap.
|
||||
void mz_free(void *p);
|
||||
|
||||
#define MZ_ADLER32_INIT (1)
|
||||
// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL.
|
||||
mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len);
|
||||
|
||||
#define MZ_CRC32_INIT (0)
|
||||
// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL.
|
||||
mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len);
|
||||
|
||||
// Compression strategies.
|
||||
enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 };
|
||||
|
||||
// Method
|
||||
#define MZ_DEFLATED 8
|
||||
|
||||
#ifndef MINIZ_NO_ZLIB_APIS
|
||||
|
||||
// Heap allocation callbacks.
|
||||
// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long.
|
||||
typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size);
|
||||
typedef void (*mz_free_func)(void *opaque, void *address);
|
||||
typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size);
|
||||
|
||||
#define MZ_VERSION "9.1.15"
|
||||
#define MZ_VERNUM 0x91F0
|
||||
#define MZ_VER_MAJOR 9
|
||||
#define MZ_VER_MINOR 1
|
||||
#define MZ_VER_REVISION 15
|
||||
#define MZ_VER_SUBREVISION 0
|
||||
|
||||
// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs).
|
||||
enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 };
|
||||
|
||||
// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL.
|
||||
enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 };
|
||||
|
||||
// Window bits
|
||||
#define MZ_DEFAULT_WINDOW_BITS 15
|
||||
|
||||
struct mz_internal_state;
|
||||
|
||||
// Compression/decompression stream struct.
|
||||
typedef struct mz_stream_s
|
||||
{
|
||||
const unsigned char *next_in; // pointer to next byte to read
|
||||
unsigned int avail_in; // number of bytes available at next_in
|
||||
mz_ulong total_in; // total number of bytes consumed so far
|
||||
|
||||
unsigned char *next_out; // pointer to next byte to write
|
||||
unsigned int avail_out; // number of bytes that can be written to next_out
|
||||
mz_ulong total_out; // total number of bytes produced so far
|
||||
|
||||
char *msg; // error msg (unused)
|
||||
struct mz_internal_state *state; // internal state, allocated by zalloc/zfree
|
||||
|
||||
mz_alloc_func zalloc; // optional heap allocation function (defaults to malloc)
|
||||
mz_free_func zfree; // optional heap free function (defaults to free)
|
||||
void *opaque; // heap alloc function user pointer
|
||||
|
||||
int data_type; // data_type (unused)
|
||||
mz_ulong adler; // adler32 of the source or uncompressed data
|
||||
mz_ulong reserved; // not used
|
||||
} mz_stream;
|
||||
|
||||
typedef mz_stream *mz_streamp;
|
||||
|
||||
// Returns the version string of miniz.c.
|
||||
const char *mz_version(void);
|
||||
|
||||
// mz_deflateInit() initializes a compressor with default options:
|
||||
// Parameters:
|
||||
// pStream must point to an initialized mz_stream struct.
|
||||
// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION].
|
||||
// level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio.
|
||||
// (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.)
|
||||
// Return values:
|
||||
// MZ_OK on success.
|
||||
// MZ_STREAM_ERROR if the stream is bogus.
|
||||
// MZ_PARAM_ERROR if the input parameters are bogus.
|
||||
// MZ_MEM_ERROR on out of memory.
|
||||
int mz_deflateInit(mz_streamp pStream, int level);
|
||||
|
||||
// mz_deflateInit2() is like mz_deflate(), except with more control:
|
||||
// Additional parameters:
|
||||
// method must be MZ_DEFLATED
|
||||
// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer)
|
||||
// mem_level must be between [1, 9] (it's checked but ignored by miniz.c)
|
||||
int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy);
|
||||
|
||||
// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2().
|
||||
int mz_deflateReset(mz_streamp pStream);
|
||||
|
||||
// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible.
|
||||
// Parameters:
|
||||
// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
|
||||
// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH.
|
||||
// Return values:
|
||||
// MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full).
|
||||
// MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore.
|
||||
// MZ_STREAM_ERROR if the stream is bogus.
|
||||
// MZ_PARAM_ERROR if one of the parameters is invalid.
|
||||
// MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.)
|
||||
int mz_deflate(mz_streamp pStream, int flush);
|
||||
|
||||
// mz_deflateEnd() deinitializes a compressor:
|
||||
// Return values:
|
||||
// MZ_OK on success.
|
||||
// MZ_STREAM_ERROR if the stream is bogus.
|
||||
int mz_deflateEnd(mz_streamp pStream);
|
||||
|
||||
// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH.
|
||||
mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);
|
||||
|
||||
// Single-call compression functions mz_compress() and mz_compress2():
|
||||
// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure.
|
||||
int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
|
||||
int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level);
|
||||
|
||||
// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress().
|
||||
mz_ulong mz_compressBound(mz_ulong source_len);
|
||||
|
||||
// Initializes a decompressor.
|
||||
int mz_inflateInit(mz_streamp pStream);
|
||||
|
||||
// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer:
|
||||
// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate).
|
||||
int mz_inflateInit2(mz_streamp pStream, int window_bits);
|
||||
|
||||
// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible.
|
||||
// Parameters:
|
||||
// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
|
||||
// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH.
|
||||
// On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster).
|
||||
// MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data.
|
||||
// Return values:
|
||||
// MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full.
|
||||
// MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified.
|
||||
// MZ_STREAM_ERROR if the stream is bogus.
|
||||
// MZ_DATA_ERROR if the deflate stream is invalid.
|
||||
// MZ_PARAM_ERROR if one of the parameters is invalid.
|
||||
// MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again
|
||||
// with more input data, or with more room in the output buffer (except when using single call decompression, described above).
|
||||
int mz_inflate(mz_streamp pStream, int flush);
|
||||
|
||||
// Deinitializes a decompressor.
|
||||
int mz_inflateEnd(mz_streamp pStream);
|
||||
|
||||
// Single-call decompression.
|
||||
// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure.
|
||||
int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
|
||||
|
||||
// Returns a string description of the specified error code, or NULL if the error code is invalid.
|
||||
const char *mz_error(int err);
|
||||
|
||||
// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports.
|
||||
// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project.
|
||||
#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
|
||||
typedef unsigned char Byte;
|
||||
typedef unsigned int uInt;
|
||||
typedef mz_ulong uLong;
|
||||
typedef Byte Bytef;
|
||||
typedef uInt uIntf;
|
||||
typedef char charf;
|
||||
typedef int intf;
|
||||
typedef void *voidpf;
|
||||
typedef uLong uLongf;
|
||||
typedef void *voidp;
|
||||
typedef void *const voidpc;
|
||||
#define Z_NULL 0
|
||||
#define Z_NO_FLUSH MZ_NO_FLUSH
|
||||
#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH
|
||||
#define Z_SYNC_FLUSH MZ_SYNC_FLUSH
|
||||
#define Z_FULL_FLUSH MZ_FULL_FLUSH
|
||||
#define Z_FINISH MZ_FINISH
|
||||
#define Z_BLOCK MZ_BLOCK
|
||||
#define Z_OK MZ_OK
|
||||
#define Z_STREAM_END MZ_STREAM_END
|
||||
#define Z_NEED_DICT MZ_NEED_DICT
|
||||
#define Z_ERRNO MZ_ERRNO
|
||||
#define Z_STREAM_ERROR MZ_STREAM_ERROR
|
||||
#define Z_DATA_ERROR MZ_DATA_ERROR
|
||||
#define Z_MEM_ERROR MZ_MEM_ERROR
|
||||
#define Z_BUF_ERROR MZ_BUF_ERROR
|
||||
#define Z_VERSION_ERROR MZ_VERSION_ERROR
|
||||
#define Z_PARAM_ERROR MZ_PARAM_ERROR
|
||||
#define Z_NO_COMPRESSION MZ_NO_COMPRESSION
|
||||
#define Z_BEST_SPEED MZ_BEST_SPEED
|
||||
#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION
|
||||
#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
|
||||
#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY
|
||||
#define Z_FILTERED MZ_FILTERED
|
||||
#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY
|
||||
#define Z_RLE MZ_RLE
|
||||
#define Z_FIXED MZ_FIXED
|
||||
#define Z_DEFLATED MZ_DEFLATED
|
||||
#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
|
||||
#define alloc_func mz_alloc_func
|
||||
#define free_func mz_free_func
|
||||
#define internal_state mz_internal_state
|
||||
#define z_stream mz_stream
|
||||
#define deflateInit mz_deflateInit
|
||||
#define deflateInit2 mz_deflateInit2
|
||||
#define deflateReset mz_deflateReset
|
||||
#define deflate mz_deflate
|
||||
#define deflateEnd mz_deflateEnd
|
||||
#define deflateBound mz_deflateBound
|
||||
#define compress mz_compress
|
||||
#define compress2 mz_compress2
|
||||
#define compressBound mz_compressBound
|
||||
#define inflateInit mz_inflateInit
|
||||
#define inflateInit2 mz_inflateInit2
|
||||
#define inflate mz_inflate
|
||||
#define inflateEnd mz_inflateEnd
|
||||
#define uncompress mz_uncompress
|
||||
#define crc32 mz_crc32
|
||||
#define adler32 mz_adler32
|
||||
#define MAX_WBITS 15
|
||||
#define MAX_MEM_LEVEL 9
|
||||
#define zError mz_error
|
||||
#define ZLIB_VERSION MZ_VERSION
|
||||
#define ZLIB_VERNUM MZ_VERNUM
|
||||
#define ZLIB_VER_MAJOR MZ_VER_MAJOR
|
||||
#define ZLIB_VER_MINOR MZ_VER_MINOR
|
||||
#define ZLIB_VER_REVISION MZ_VER_REVISION
|
||||
#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION
|
||||
#define zlibVersion mz_version
|
||||
#define zlib_version mz_version()
|
||||
#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
|
||||
|
||||
#endif // MINIZ_NO_ZLIB_APIS
|
||||
|
||||
// ------------------- Types and macros
|
||||
|
||||
typedef unsigned char mz_uint8;
|
||||
typedef signed short mz_int16;
|
||||
typedef unsigned short mz_uint16;
|
||||
typedef unsigned int mz_uint32;
|
||||
typedef unsigned int mz_uint;
|
||||
typedef long long mz_int64;
|
||||
typedef unsigned long long mz_uint64;
|
||||
typedef int mz_bool;
|
||||
|
||||
#define MZ_FALSE (0)
|
||||
#define MZ_TRUE (1)
|
||||
|
||||
// An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message.
|
||||
#ifdef _MSC_VER
|
||||
#define MZ_MACRO_END while (0, 0)
|
||||
#else
|
||||
#define MZ_MACRO_END while (0)
|
||||
#endif
|
||||
|
||||
// ------------------- ZIP archive reading/writing
|
||||
|
||||
#ifndef MINIZ_NO_ARCHIVE_APIS
|
||||
|
||||
enum
|
||||
{
|
||||
MZ_ZIP_MAX_IO_BUF_SIZE = 64*1024,
|
||||
MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260,
|
||||
MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
mz_uint32 m_file_index;
|
||||
mz_uint32 m_central_dir_ofs;
|
||||
mz_uint16 m_version_made_by;
|
||||
mz_uint16 m_version_needed;
|
||||
mz_uint16 m_bit_flag;
|
||||
mz_uint16 m_method;
|
||||
#ifndef MINIZ_NO_TIME
|
||||
time_t m_time;
|
||||
#endif
|
||||
mz_uint32 m_crc32;
|
||||
mz_uint64 m_comp_size;
|
||||
mz_uint64 m_uncomp_size;
|
||||
mz_uint16 m_internal_attr;
|
||||
mz_uint32 m_external_attr;
|
||||
mz_uint64 m_local_header_ofs;
|
||||
mz_uint32 m_comment_size;
|
||||
char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE];
|
||||
char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE];
|
||||
} mz_zip_archive_file_stat;
|
||||
|
||||
typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n);
|
||||
typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n);
|
||||
|
||||
struct mz_zip_internal_state_tag;
|
||||
typedef struct mz_zip_internal_state_tag mz_zip_internal_state;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
MZ_ZIP_MODE_INVALID = 0,
|
||||
MZ_ZIP_MODE_READING = 1,
|
||||
MZ_ZIP_MODE_WRITING = 2,
|
||||
MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3
|
||||
} mz_zip_mode;
|
||||
|
||||
typedef struct mz_zip_archive_tag
|
||||
{
|
||||
mz_uint64 m_archive_size;
|
||||
mz_uint64 m_central_directory_file_ofs;
|
||||
mz_uint m_total_files;
|
||||
mz_zip_mode m_zip_mode;
|
||||
|
||||
mz_uint m_file_offset_alignment;
|
||||
|
||||
mz_alloc_func m_pAlloc;
|
||||
mz_free_func m_pFree;
|
||||
mz_realloc_func m_pRealloc;
|
||||
void *m_pAlloc_opaque;
|
||||
|
||||
mz_file_read_func m_pRead;
|
||||
mz_file_write_func m_pWrite;
|
||||
void *m_pIO_opaque;
|
||||
|
||||
mz_zip_internal_state *m_pState;
|
||||
|
||||
} mz_zip_archive;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100,
|
||||
MZ_ZIP_FLAG_IGNORE_PATH = 0x0200,
|
||||
MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400,
|
||||
MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800
|
||||
} mz_zip_flags;
|
||||
|
||||
// ZIP archive reading
|
||||
|
||||
// Inits a ZIP archive reader.
|
||||
// These functions read and validate the archive's central directory.
|
||||
mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags);
|
||||
mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags);
|
||||
|
||||
#ifndef MINIZ_NO_STDIO
|
||||
mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags);
|
||||
#endif
|
||||
|
||||
// Returns the total number of files in the archive.
|
||||
mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip);
|
||||
|
||||
// Returns detailed information about an archive file entry.
|
||||
mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat);
|
||||
|
||||
// Determines if an archive file entry is a directory entry.
|
||||
mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index);
|
||||
mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index);
|
||||
|
||||
// Retrieves the filename of an archive file entry.
|
||||
// Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename.
|
||||
mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size);
|
||||
|
||||
// Attempts to locates a file in the archive's central directory.
|
||||
// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH
|
||||
// Returns -1 if the file cannot be found.
|
||||
int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
|
||||
|
||||
// Extracts a archive file to a memory buffer using no memory allocation.
|
||||
mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
|
||||
mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
|
||||
|
||||
// Extracts a archive file to a memory buffer.
|
||||
mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags);
|
||||
mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags);
|
||||
|
||||
// Extracts a archive file to a dynamically allocated heap buffer.
|
||||
void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags);
|
||||
void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags);
|
||||
|
||||
// Extracts a archive file using a callback function to output the file's data.
|
||||
mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
|
||||
mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
|
||||
|
||||
#ifndef MINIZ_NO_STDIO
|
||||
// Extracts a archive file to a disk file and sets its last accessed and modified times.
|
||||
// This function only extracts files, not archive directory records.
|
||||
mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags);
|
||||
mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags);
|
||||
#endif
|
||||
|
||||
// Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used.
|
||||
mz_bool mz_zip_reader_end(mz_zip_archive *pZip);
|
||||
|
||||
// ZIP archive writing
|
||||
|
||||
#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
|
||||
|
||||
// Inits a ZIP archive writer.
|
||||
mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size);
|
||||
mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size);
|
||||
|
||||
#ifndef MINIZ_NO_STDIO
|
||||
mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning);
|
||||
#endif
|
||||
|
||||
// Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive.
|
||||
// For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called.
|
||||
// For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it).
|
||||
// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL.
|
||||
// Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before
|
||||
// the archive is finalized the file's central directory will be hosed.
|
||||
mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename);
|
||||
|
||||
// Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive.
|
||||
// To add a directory entry, call this method with an archive name ending in a forwardslash with empty buffer.
|
||||
// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
|
||||
mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags);
|
||||
mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32);
|
||||
|
||||
#ifndef MINIZ_NO_STDIO
|
||||
// Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive.
|
||||
// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
|
||||
mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
|
||||
#endif
|
||||
|
||||
// Adds a file to an archive by fully cloning the data from another archive.
|
||||
// This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data, and comment fields.
|
||||
mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index);
|
||||
|
||||
// Finalizes the archive by writing the central directory records followed by the end of central directory record.
|
||||
// After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end().
|
||||
// An archive must be manually finalized by calling this function for it to be valid.
|
||||
mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip);
|
||||
mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize);
|
||||
|
||||
// Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used.
|
||||
// Note for the archive to be valid, it must have been finalized before ending.
|
||||
mz_bool mz_zip_writer_end(mz_zip_archive *pZip);
|
||||
|
||||
// Misc. high-level helper functions:
|
||||
|
||||
// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive.
|
||||
// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
|
||||
mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
|
||||
|
||||
// Reads a single file from an archive into a heap block.
|
||||
// Returns NULL on failure.
|
||||
void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint zip_flags);
|
||||
|
||||
#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
|
||||
|
||||
#endif // #ifndef MINIZ_NO_ARCHIVE_APIS
|
||||
|
||||
// ------------------- Low-level Decompression API Definitions
|
||||
|
||||
// Decompression flags used by tinfl_decompress().
|
||||
// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream.
|
||||
// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input.
|
||||
// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB).
|
||||
// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes.
|
||||
enum
|
||||
{
|
||||
TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
|
||||
TINFL_FLAG_HAS_MORE_INPUT = 2,
|
||||
TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
|
||||
TINFL_FLAG_COMPUTE_ADLER32 = 8
|
||||
};
|
||||
|
||||
// High level decompression functions:
|
||||
// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc().
|
||||
// On entry:
|
||||
// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress.
|
||||
// On return:
|
||||
// Function returns a pointer to the decompressed data, or NULL on failure.
|
||||
// *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data.
|
||||
// The caller must call mz_free() on the returned block when it's no longer needed.
|
||||
void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
|
||||
|
||||
// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory.
|
||||
// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success.
|
||||
#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
|
||||
size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
|
||||
|
||||
// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer.
|
||||
// Returns 1 on success or 0 on failure.
|
||||
typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
|
||||
int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
|
||||
|
||||
struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor;
|
||||
|
||||
// Max size of LZ dictionary.
|
||||
#define TINFL_LZ_DICT_SIZE 32768
|
||||
|
||||
// Return status.
|
||||
typedef enum
|
||||
{
|
||||
TINFL_STATUS_BAD_PARAM = -3,
|
||||
TINFL_STATUS_ADLER32_MISMATCH = -2,
|
||||
TINFL_STATUS_FAILED = -1,
|
||||
TINFL_STATUS_DONE = 0,
|
||||
TINFL_STATUS_NEEDS_MORE_INPUT = 1,
|
||||
TINFL_STATUS_HAS_MORE_OUTPUT = 2
|
||||
} tinfl_status;
|
||||
|
||||
// Initializes the decompressor to its initial state.
|
||||
#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END
|
||||
#define tinfl_get_adler32(r) (r)->m_check_adler32
|
||||
|
||||
// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability.
|
||||
// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output.
|
||||
tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags);
|
||||
|
||||
// Internal/private bits follow.
|
||||
enum
|
||||
{
|
||||
TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19,
|
||||
TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
|
||||
mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
|
||||
} tinfl_huff_table;
|
||||
|
||||
#if MINIZ_HAS_64BIT_REGISTERS
|
||||
#define TINFL_USE_64BIT_BITBUF 1
|
||||
#endif
|
||||
|
||||
#if TINFL_USE_64BIT_BITBUF
|
||||
typedef mz_uint64 tinfl_bit_buf_t;
|
||||
#define TINFL_BITBUF_SIZE (64)
|
||||
#else
|
||||
typedef mz_uint32 tinfl_bit_buf_t;
|
||||
#define TINFL_BITBUF_SIZE (32)
|
||||
#endif
|
||||
|
||||
struct tinfl_decompressor_tag
|
||||
{
|
||||
mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
|
||||
tinfl_bit_buf_t m_bit_buf;
|
||||
size_t m_dist_from_out_buf_start;
|
||||
tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
|
||||
mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
|
||||
};
|
||||
|
||||
// ------------------- Low-level Compression API Definitions
|
||||
|
||||
// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently).
|
||||
#define TDEFL_LESS_MEMORY 0
|
||||
|
||||
// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search):
|
||||
// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression).
|
||||
enum
|
||||
{
|
||||
TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF
|
||||
};
|
||||
|
||||
// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data.
|
||||
// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers).
|
||||
// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing.
|
||||
// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory).
|
||||
// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1)
|
||||
// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled.
|
||||
// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables.
|
||||
// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks.
|
||||
// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK).
|
||||
enum
|
||||
{
|
||||
TDEFL_WRITE_ZLIB_HEADER = 0x01000,
|
||||
TDEFL_COMPUTE_ADLER32 = 0x02000,
|
||||
TDEFL_GREEDY_PARSING_FLAG = 0x04000,
|
||||
TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
|
||||
TDEFL_RLE_MATCHES = 0x10000,
|
||||
TDEFL_FILTER_MATCHES = 0x20000,
|
||||
TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000,
|
||||
TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000
|
||||
};
|
||||
|
||||
// High level compression functions:
|
||||
// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc().
|
||||
// On entry:
|
||||
// pSrc_buf, src_buf_len: Pointer and size of source block to compress.
|
||||
// flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression.
|
||||
// On return:
|
||||
// Function returns a pointer to the compressed data, or NULL on failure.
|
||||
// *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data.
|
||||
// The caller must free() the returned block when it's no longer needed.
|
||||
void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
|
||||
|
||||
// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory.
|
||||
// Returns 0 on failure.
|
||||
size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
|
||||
|
||||
// Compresses an image to a compressed PNG file in memory.
|
||||
// On entry:
|
||||
// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4.
|
||||
// The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory.
|
||||
// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL
|
||||
// If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps).
|
||||
// On return:
|
||||
// Function returns a pointer to the compressed data, or NULL on failure.
|
||||
// *pLen_out will be set to the size of the PNG image file.
|
||||
// The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed.
|
||||
void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip);
|
||||
void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out);
|
||||
|
||||
// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time.
|
||||
typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
|
||||
|
||||
// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally.
|
||||
mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
|
||||
|
||||
enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 };
|
||||
|
||||
// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes).
|
||||
#if TDEFL_LESS_MEMORY
|
||||
enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
|
||||
#else
|
||||
enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
|
||||
#endif
|
||||
|
||||
// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions.
|
||||
typedef enum
|
||||
{
|
||||
TDEFL_STATUS_BAD_PARAM = -2,
|
||||
TDEFL_STATUS_PUT_BUF_FAILED = -1,
|
||||
TDEFL_STATUS_OKAY = 0,
|
||||
TDEFL_STATUS_DONE = 1,
|
||||
} tdefl_status;
|
||||
|
||||
// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums
|
||||
typedef enum
|
||||
{
|
||||
TDEFL_NO_FLUSH = 0,
|
||||
TDEFL_SYNC_FLUSH = 2,
|
||||
TDEFL_FULL_FLUSH = 3,
|
||||
TDEFL_FINISH = 4
|
||||
} tdefl_flush;
|
||||
|
||||
// tdefl's compression state structure.
|
||||
typedef struct
|
||||
{
|
||||
tdefl_put_buf_func_ptr m_pPut_buf_func;
|
||||
void *m_pPut_buf_user;
|
||||
mz_uint m_flags, m_max_probes[2];
|
||||
int m_greedy_parsing;
|
||||
mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
|
||||
mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
|
||||
mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer;
|
||||
mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish;
|
||||
tdefl_status m_prev_return_status;
|
||||
const void *m_pIn_buf;
|
||||
void *m_pOut_buf;
|
||||
size_t *m_pIn_buf_size, *m_pOut_buf_size;
|
||||
tdefl_flush m_flush;
|
||||
const mz_uint8 *m_pSrc;
|
||||
size_t m_src_buf_left, m_out_buf_ofs;
|
||||
mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
|
||||
mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
|
||||
mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
|
||||
mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
|
||||
mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
|
||||
mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
|
||||
mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
|
||||
mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
|
||||
} tdefl_compressor;
|
||||
|
||||
// Initializes the compressor.
|
||||
// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory.
|
||||
// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression.
|
||||
// If pBut_buf_func is NULL the user should always call the tdefl_compress() API.
|
||||
// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.)
|
||||
tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
|
||||
|
||||
// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible.
|
||||
tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush);
|
||||
|
||||
// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr.
|
||||
// tdefl_compress_buffer() always consumes the entire input buffer.
|
||||
tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush);
|
||||
|
||||
tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d);
|
||||
mz_uint32 tdefl_get_adler32(tdefl_compressor *d);
|
||||
|
||||
// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros.
|
||||
#ifndef MINIZ_NO_ZLIB_APIS
|
||||
// Create tdefl_compress() flags given zlib-style compression parameters.
|
||||
// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files)
|
||||
// window_bits may be -15 (raw deflate) or 15 (zlib)
|
||||
// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED
|
||||
mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy);
|
||||
#endif // #ifndef MINIZ_NO_ZLIB_APIS
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINIZ_HEADER_INCLUDED
|
||||
14
test_conformance/CMakeCommon.txt
Normal file
14
test_conformance/CMakeCommon.txt
Normal file
@@ -0,0 +1,14 @@
|
||||
set_source_files_properties(
|
||||
COMPILE_FLAGS -msse2)
|
||||
|
||||
string(TOLOWER ${MODULE_NAME} MODULE_NAME_LOWER)
|
||||
|
||||
set(${MODULE_NAME}_OUT ${CONFORMANCE_PREFIX}${MODULE_NAME_LOWER}${CONFORMANCE_SUFFIX})
|
||||
|
||||
add_executable(${${MODULE_NAME}_OUT} ${${MODULE_NAME}_SOURCES})
|
||||
|
||||
set_source_files_properties(${${MODULE_NAME}_SOURCES} PROPERTIES LANGUAGE CXX)
|
||||
|
||||
set_property(TARGET ${${MODULE_NAME}_OUT} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
|
||||
|
||||
TARGET_LINK_LIBRARIES(${${MODULE_NAME}_OUT} ${CLConform_LIBRARIES})
|
||||
62
test_conformance/CMakeLists.txt
Normal file
62
test_conformance/CMakeLists.txt
Normal file
@@ -0,0 +1,62 @@
|
||||
# Remember curren source directory (`test_conformance').
|
||||
set( CLConf_Install_Base_Dir "${CMAKE_CURRENT_SOURCE_DIR}" )
|
||||
|
||||
add_subdirectory( allocations )
|
||||
add_subdirectory( api )
|
||||
add_subdirectory( atomics )
|
||||
add_subdirectory( basic )
|
||||
add_subdirectory( buffers )
|
||||
add_subdirectory( commonfns )
|
||||
add_subdirectory( compatibility )
|
||||
add_subdirectory( compiler )
|
||||
add_subdirectory( computeinfo )
|
||||
add_subdirectory( contractions )
|
||||
add_subdirectory( conversions )
|
||||
if(D3D10_IS_SUPPORTED)
|
||||
add_subdirectory( d3d10 )
|
||||
endif(D3D10_IS_SUPPORTED)
|
||||
if(D3D11_IS_SUPPORTED)
|
||||
add_subdirectory( d3d11 )
|
||||
endif(D3D11_IS_SUPPORTED)
|
||||
add_subdirectory( device_partition )
|
||||
add_subdirectory( events )
|
||||
add_subdirectory( geometrics )
|
||||
if(GL_IS_SUPPORTED)
|
||||
add_subdirectory( gl )
|
||||
endif(GL_IS_SUPPORTED)
|
||||
if(GLES_IS_SUPPORTED)
|
||||
add_subdirectory(gles)
|
||||
endif(GLES_IS_SUPPORTED)
|
||||
add_subdirectory( half )
|
||||
add_subdirectory( headers )
|
||||
add_subdirectory( images )
|
||||
add_subdirectory( integer_ops )
|
||||
add_subdirectory( math_brute_force )
|
||||
add_subdirectory( mem_host_flags )
|
||||
add_subdirectory( multiple_device_context )
|
||||
add_subdirectory( printf )
|
||||
add_subdirectory( profiling )
|
||||
add_subdirectory( relationals )
|
||||
add_subdirectory( select )
|
||||
add_subdirectory( thread_dimensions )
|
||||
add_subdirectory( vec_align )
|
||||
add_subdirectory( vec_step )
|
||||
add_subdirectory( c11_atomics )
|
||||
add_subdirectory( device_execution )
|
||||
add_subdirectory( non_uniform_work_group )
|
||||
add_subdirectory( SVM )
|
||||
add_subdirectory( generic_address_space )
|
||||
add_subdirectory( subgroups )
|
||||
add_subdirectory( workgroups )
|
||||
add_subdirectory( pipes )
|
||||
|
||||
install_files(
|
||||
opencl_conformance_tests_conversions.csv
|
||||
opencl_conformance_tests_d3d.csv
|
||||
opencl_conformance_tests_full.csv
|
||||
opencl_conformance_tests_full_no_math_or_conversions.csv
|
||||
opencl_conformance_tests_math.csv
|
||||
opencl_conformance_tests_quick.csv
|
||||
run_conformance.py
|
||||
)
|
||||
|
||||
24
test_conformance/Jamfile
Normal file
24
test_conformance/Jamfile
Normal file
@@ -0,0 +1,24 @@
|
||||
project
|
||||
: requirements
|
||||
<library>/harness//harness
|
||||
<warnings>off
|
||||
;
|
||||
|
||||
use-project /harness : ../test_common/harness ;
|
||||
|
||||
proj_lst = allocations api atomics basic buffers commonfns compiler
|
||||
computeinfo contractions conversions events geometrics gl
|
||||
half images integer_ops math_brute_force multiple_device_context
|
||||
profiling relationals select thread_dimensions ;
|
||||
|
||||
for proj in $(proj_lst)
|
||||
{
|
||||
build-project $(proj) ;
|
||||
}
|
||||
|
||||
install data
|
||||
: [ glob *.csv ] [ glob *.py ]
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance
|
||||
;
|
||||
|
||||
61
test_conformance/Makefile
Normal file
61
test_conformance/Makefile
Normal file
@@ -0,0 +1,61 @@
|
||||
|
||||
PRODUCTS = \
|
||||
allocations/ \
|
||||
api/ \
|
||||
atomics/ \
|
||||
basic/ \
|
||||
buffers/ \
|
||||
commonfns/ \
|
||||
compatibility/test_conformance/ \
|
||||
compiler/ \
|
||||
computeinfo/ \
|
||||
contractions/ \
|
||||
conversions/ \
|
||||
device_partition/ \
|
||||
events/ \
|
||||
geometrics/ \
|
||||
gl/ \
|
||||
half/ \
|
||||
headers/ \
|
||||
images/ \
|
||||
integer_ops/ \
|
||||
math_brute_force/ \
|
||||
mem_host_flags/ \
|
||||
multiple_device_context/ \
|
||||
printf/ \
|
||||
profiling/ \
|
||||
relationals/ \
|
||||
select/ \
|
||||
thread_dimensions/ \
|
||||
vec_align/ \
|
||||
vec_step/ \
|
||||
workgroups/
|
||||
|
||||
|
||||
TOP=$(shell pwd)
|
||||
|
||||
all: $(PRODUCTS)
|
||||
|
||||
clean:
|
||||
@for testdir in $(dir $(PRODUCTS)) ; \
|
||||
do ( \
|
||||
echo "==================================================================================" ; \
|
||||
echo "Cleaning $$testdir" ; \
|
||||
echo "==================================================================================" ; \
|
||||
if test -d $$testdir; \
|
||||
then cd $$testdir && make clean; \
|
||||
else echo "Warning: Directory '$$testdir' Does Not Exist"; \
|
||||
fi; \
|
||||
); \
|
||||
done \
|
||||
|
||||
$(PRODUCTS):
|
||||
@echo "==================================================================================" ;
|
||||
@echo "(`date "+%H:%M:%S"`) Make $@" ;
|
||||
@echo "==================================================================================" ;
|
||||
@if test -d $@; \
|
||||
then cd $(dir $@) && make -i; \
|
||||
else echo "Warning: Directory '$@' Does Not Exist"; \
|
||||
fi; \
|
||||
|
||||
.PHONY: clean $(PRODUCTS) all
|
||||
24
test_conformance/SVM/CMakeLists.txt
Normal file
24
test_conformance/SVM/CMakeLists.txt
Normal file
@@ -0,0 +1,24 @@
|
||||
set(MODULE_NAME SVM)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
test_allocate_shared_buffer.cpp
|
||||
test_byte_granularity.cpp
|
||||
test_cross_buffer_pointers.cpp
|
||||
test_enqueue_api.cpp
|
||||
test_fine_grain_memory_consistency.cpp
|
||||
test_fine_grain_sync_buffers.cpp
|
||||
test_pointer_passing.cpp
|
||||
test_set_kernel_exec_info_svm_ptrs.cpp
|
||||
test_shared_address_space_coarse_grain.cpp
|
||||
test_shared_address_space_fine_grain.cpp
|
||||
test_shared_address_space_fine_grain_buffers.cpp
|
||||
test_shared_sub_buffers.cpp
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/msvc9.c
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
53
test_conformance/SVM/Makefile
Normal file
53
test_conformance/SVM/Makefile
Normal file
@@ -0,0 +1,53 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c \
|
||||
test_allocate_shared_buffer.cpp \
|
||||
test_byte_granularity.cpp \
|
||||
test_cross_buffer_pointers.cpp \
|
||||
test_enqueue_api.cpp \
|
||||
test_fine_grain_memory_consistency.cpp \
|
||||
test_fine_grain_sync_buffers.cpp \
|
||||
test_pointer_passing.cpp \
|
||||
test_set_kernel_exec_info_svm_ptrs.cpp \
|
||||
test_shared_address_space_coarse_grain.cpp \
|
||||
test_shared_address_space_fine_grain_buffers.cpp \
|
||||
test_shared_address_space_fine_grain.cpp \
|
||||
test_shared_sub_buffers.cpp \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/kernelHelpers.c \
|
||||
../../test_common/harness/typeWrappers.cpp \
|
||||
../../test_common/harness/mt19937.c \
|
||||
|
||||
DEFINES = DONT_TEST_GARBAGE_POINTERS
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
HEADERS =
|
||||
TARGET = test_SVM
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
100
test_conformance/SVM/common.h
Normal file
100
test_conformance/SVM/common.h
Normal file
@@ -0,0 +1,100 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef __COMMON_H__
|
||||
#define __COMMON_H__
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
memory_order_relaxed,
|
||||
memory_order_acquire,
|
||||
memory_order_release,
|
||||
memory_order_acq_rel,
|
||||
memory_order_seq_cst
|
||||
} cl_memory_order;
|
||||
|
||||
cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order);
|
||||
cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o);
|
||||
|
||||
template <typename T>
|
||||
bool AtomicCompareExchangeStrongExplicit(volatile T *a, T *expected, T desired,
|
||||
cl_memory_order order_success,
|
||||
cl_memory_order order_failure)
|
||||
{
|
||||
T tmp;
|
||||
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
||||
tmp = (T)InterlockedCompareExchange((volatile LONG *)a, (LONG)desired, *(LONG *)expected);
|
||||
#elif defined(__GNUC__)
|
||||
tmp = (T)__sync_val_compare_and_swap((volatile intptr_t*)a, (intptr_t)(*expected), (intptr_t)desired);
|
||||
#else
|
||||
log_info("Host function not implemented: atomic_compare_exchange\n");
|
||||
tmp = 0;
|
||||
#endif
|
||||
if(tmp == *expected)
|
||||
return true;
|
||||
*expected = tmp;
|
||||
return false;
|
||||
}
|
||||
|
||||
// this checks for a NULL ptr and/or an error code
|
||||
#define test_error2(error_code, ptr, msg) { if(error != 0) { test_error(error_code, msg); } else { if(NULL == ptr) {print_null_error(msg); return -1;} } }
|
||||
#define print_null_error(msg) log_error("ERROR: %s! (NULL pointer detected %s:%d)\n", msg, __FILE__, __LINE__ );
|
||||
|
||||
// max possible number of queues needed, 1 for each device in platform.
|
||||
#define MAXQ 32
|
||||
|
||||
typedef struct Node{
|
||||
cl_int global_id;
|
||||
cl_int position_in_list;
|
||||
struct Node* pNext;
|
||||
} Node;
|
||||
|
||||
extern void create_linked_lists(Node* pNodes, size_t num_lists, int list_length);
|
||||
extern cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length);
|
||||
|
||||
extern cl_int create_linked_lists_on_device(int qi, cl_command_queue q, cl_mem allocator, cl_kernel k, size_t numLists );
|
||||
extern cl_int verify_linked_lists_on_device(int qi, cl_command_queue q, cl_mem num_correct, cl_kernel k, cl_int ListLength, size_t numLists );
|
||||
extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator, cl_kernel k, size_t numLists );
|
||||
extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists );
|
||||
|
||||
extern int test_byte_granularity(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_fine_grain_memory_consistency(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_fine_grain_sync_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_pointer_passing(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_allocate_shared_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shared_sub_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps);
|
||||
|
||||
extern const char *linked_list_create_and_verify_kernels[];
|
||||
|
||||
#endif // #ifndef __COMMON_H__
|
||||
|
||||
338
test_conformance/SVM/main.cpp
Normal file
338
test_conformance/SVM/main.cpp
Normal file
@@ -0,0 +1,338 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
#include "common.h"
|
||||
|
||||
// SVM Atomic wrappers.
|
||||
// Platforms that support SVM atomics (atomics that work across the host and devices) need to implement these host side functions correctly.
|
||||
// Platforms that do not support SVM atomics can simpy implement these functions as empty stubs since the functions will not be called.
|
||||
// For now only Windows x86 is implemented, add support for other platforms as needed.
|
||||
cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order)
|
||||
{
|
||||
#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
|
||||
return *pValue; // provided the value is aligned x86 doesn't need anything more than this for seq_cst.
|
||||
#elif defined(__GNUC__)
|
||||
return __sync_add_and_fetch(pValue, 0);
|
||||
#else
|
||||
log_error("ERROR: AtomicLoadExplicit function not implemented\n");
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
// all the x86 atomics are seq_cst, so don't need to do anything with the memory order parameter.
|
||||
cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o)
|
||||
{
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
|
||||
return InterlockedExchangeAdd( (volatile LONG*) object, operand);
|
||||
#elif defined(__GNUC__)
|
||||
return __sync_fetch_and_add(object, operand);
|
||||
#else
|
||||
log_error("ERROR: AtomicFetchAddExplicit function not implemented\n");
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
cl_int AtomicExchangeExplicit(volatile cl_int *object, cl_int desired, cl_memory_order mo)
|
||||
{
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
|
||||
return InterlockedExchange( (volatile LONG*) object, desired);
|
||||
#elif defined(__GNUC__)
|
||||
return __sync_lock_test_and_set(object, desired);
|
||||
#else
|
||||
log_error("ERROR: AtomicExchangeExplicit function not implemented\n");
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
const char *linked_list_create_and_verify_kernels[] = {
|
||||
"typedef struct Node {\n"
|
||||
" int global_id;\n"
|
||||
" int position_in_list;\n"
|
||||
" __global struct Node* pNext;\n"
|
||||
"} Node;\n"
|
||||
"\n"
|
||||
// The allocation_index parameter must be initialized on the host to N work-items
|
||||
// The first N nodes in pNodes will be the heads of the lists.
|
||||
"__kernel void create_linked_lists(__global Node* pNodes, volatile __attribute__((nosvm)) __global int* allocation_index, int list_length)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" __global Node *pNode = &pNodes[i];\n"
|
||||
"\n"
|
||||
" pNode->global_id = i;\n"
|
||||
" pNode->position_in_list = 0;\n"
|
||||
"\n"
|
||||
" __global Node *pNew;\n"
|
||||
" for(int j=1; j < list_length; j++)\n"
|
||||
" {\n"
|
||||
" pNew = &pNodes[ atomic_inc(allocation_index) ];// allocate a new node\n"
|
||||
" pNew->global_id = i;\n"
|
||||
" pNew->position_in_list = j;\n"
|
||||
" pNode->pNext = pNew; // link new node onto end of list\n"
|
||||
" pNode = pNew; // move to end of list\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
|
||||
"__kernel void verify_linked_lists(__global Node* pNodes, volatile __global uint* num_correct, int list_length)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" __global Node *pNode = &pNodes[i];\n"
|
||||
"\n"
|
||||
" for(int j=0; j < list_length; j++)\n"
|
||||
" {\n"
|
||||
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
|
||||
" {\n"
|
||||
" atomic_inc(num_correct);\n"
|
||||
" } \n"
|
||||
" else {\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" pNode = pNode->pNext;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
// The first N nodes in pNodes will be the heads of the lists.
|
||||
void create_linked_lists(Node* pNodes, size_t num_lists, int list_length)
|
||||
{
|
||||
size_t allocation_index = num_lists; // heads of lists are in first num_lists nodes.
|
||||
|
||||
for(cl_uint i = 0; i < num_lists; i++)
|
||||
{
|
||||
Node *pNode = &pNodes[i];
|
||||
pNode->global_id = i;
|
||||
pNode->position_in_list = 0;
|
||||
Node *pNew;
|
||||
for(int j=1; j < list_length; j++)
|
||||
{
|
||||
pNew = &pNodes[ allocation_index++ ];// allocate a new node
|
||||
pNew->global_id = i;
|
||||
pNew->position_in_list = j;
|
||||
pNode->pNext = pNew; // link new node onto end of list
|
||||
pNode = pNew; // move to end of list
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
int numCorrect = 0;
|
||||
|
||||
log_info(" and verifying on host ");
|
||||
for(cl_uint i=0; i < num_lists; i++)
|
||||
{
|
||||
Node *pNode = &pNodes[i];
|
||||
for(int j=0; j < list_length; j++)
|
||||
{
|
||||
if( pNode->global_id == i && pNode->position_in_list == j)
|
||||
{
|
||||
numCorrect++;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
pNode = pNode->pNext;
|
||||
}
|
||||
}
|
||||
if(numCorrect != list_length * (cl_uint)num_lists)
|
||||
{
|
||||
error = -1;
|
||||
log_info("Failed\n");
|
||||
}
|
||||
else
|
||||
log_info("Passed\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
// Note that we don't use the context provided by the test harness since it doesn't support multiple devices,
|
||||
// so we create are own context here that has all devices, we use the same platform that the harness used.
|
||||
cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps)
|
||||
{
|
||||
cl_int error;
|
||||
|
||||
cl_platform_id platform_id;
|
||||
// find out what platform the harness is using.
|
||||
error = clGetDeviceInfo(device_from_harness, CL_DEVICE_PLATFORM,sizeof(cl_platform_id),&platform_id,NULL);
|
||||
test_error(error,"clGetDeviceInfo failed");
|
||||
|
||||
error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 0, NULL, num_devices );
|
||||
test_error(error, "clGetDeviceIDs failed");
|
||||
|
||||
std::vector<cl_device_id> devicesTmp(*num_devices), devices, capable_devices;
|
||||
|
||||
error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, *num_devices, &devicesTmp[0], NULL );
|
||||
test_error(error, "clGetDeviceIDs failed");
|
||||
|
||||
devices.push_back(device_from_harness);
|
||||
for (size_t i = 0; i < devicesTmp.size(); ++i)
|
||||
{
|
||||
if (device_from_harness != devicesTmp[i])
|
||||
devices.push_back(devicesTmp[i]);
|
||||
}
|
||||
|
||||
// Select only the devices that support the SVM level needed for the test.
|
||||
// Note that if requested SVM capabilities are not supported by any device then the test still passes (even though it does not execute).
|
||||
cl_device_svm_capabilities caps;
|
||||
cl_uint num_capable_devices = 0;
|
||||
for(cl_uint i = 0; i < *num_devices; i++)
|
||||
{
|
||||
size_t ret_len = 0;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, 0, 0, &ret_len);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetDeviceInfo failed %s\n", IGetErrorString(error));
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<char> oclVersion(ret_len + 1);
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(char) * oclVersion.size(), &oclVersion[0], 0);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetDeviceInfo failed %s\n", IGetErrorString(error));
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::string versionStr(&oclVersion[7]);
|
||||
std::stringstream stream;
|
||||
stream << versionStr;
|
||||
|
||||
double version = 0.0;
|
||||
stream >> version;
|
||||
|
||||
if(device_from_harness != devices[i] && version < 2.0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
|
||||
test_error(error,"clGetDeviceInfo failed for CL_DEVICE_MEM_SHARING");
|
||||
if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS)))
|
||||
{
|
||||
log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value");
|
||||
return -1;
|
||||
}
|
||||
if((caps & required_svm_caps) == required_svm_caps)
|
||||
{
|
||||
capable_devices.push_back(devices[i]);
|
||||
++num_capable_devices;
|
||||
}
|
||||
}
|
||||
devices = capable_devices; // the only devices we care about from here on are the ones capable of supporting the requested SVM level.
|
||||
*num_devices = num_capable_devices;
|
||||
if(num_capable_devices == 0)
|
||||
// if(svm_level > CL_DEVICE_COARSE_SVM && 0 == num_capable_devices)
|
||||
{
|
||||
log_info("Requested SVM level not supported by any device on this platform, test not executed.\n");
|
||||
return 1; // 1 indicates do not execute, but counts as passing.
|
||||
}
|
||||
|
||||
cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, NULL };
|
||||
*context = clCreateContext(context_properties, *num_devices, &devices[0], NULL, NULL, &error);
|
||||
test_error(error, "Unable to create context" );
|
||||
|
||||
// *queues = (cl_command_queue *) malloc( *num_devices * sizeof( cl_command_queue ) );
|
||||
|
||||
for(cl_uint i = 0; i < *num_devices; i++)
|
||||
{
|
||||
queues[i] = clCreateCommandQueueWithProperties(*context, devices[i], 0, &error);
|
||||
test_error(error, "clCreateCommandQueue failed");
|
||||
}
|
||||
|
||||
if(ppCodeString)
|
||||
{
|
||||
*program = clCreateProgramWithSource(*context, 1, ppCodeString , NULL, &error);
|
||||
test_error( error, "clCreateProgramWithSource failed" );
|
||||
|
||||
error = clBuildProgram(*program,0,NULL,"-cl-std=CL2.0",NULL,NULL);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
print_error(error, "clBuildProgram failed");
|
||||
|
||||
char *buildLog = NULL;
|
||||
size_t buildLogSize = 0;
|
||||
error = clGetProgramBuildInfo (*program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &buildLogSize);
|
||||
buildLog = (char*)malloc(buildLogSize);
|
||||
memset(buildLog, 0, buildLogSize);
|
||||
error = clGetProgramBuildInfo (*program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL);
|
||||
char string[15000];
|
||||
sprintf(string,"%s", buildLog);
|
||||
//MessageBox(NULL,(LPCWSTR)string,(LPCWSTR)"OpenCL Error",MB_OK);
|
||||
//MessageBox(NULL,string,"OpenCL Error",MB_OK);
|
||||
free(buildLog);
|
||||
log_info("%s",string);
|
||||
if (error) {
|
||||
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
basefn basefn_list[] = {
|
||||
test_byte_granularity,
|
||||
test_set_kernel_exec_info_svm_ptrs,
|
||||
test_fine_grain_memory_consistency,
|
||||
test_fine_grain_sync_buffers,
|
||||
test_shared_address_space_fine_grain,
|
||||
test_shared_sub_buffers,
|
||||
test_shared_address_space_fine_grain_buffers,
|
||||
test_allocate_shared_buffer,
|
||||
test_shared_address_space_coarse_grain_old_api,
|
||||
test_shared_address_space_coarse_grain_new_api,
|
||||
test_cross_buffer_pointers_coarse_grain,
|
||||
test_svm_pointer_passing,
|
||||
test_enqueue_api,
|
||||
};
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"svm_byte_granularity",
|
||||
"svm_set_kernel_exec_info_svm_ptrs",
|
||||
"svm_fine_grain_memory_consistency",
|
||||
"svm_fine_grain_sync_buffers",
|
||||
"svm_shared_address_space_fine_grain",
|
||||
"svm_shared_sub_buffers",
|
||||
"svm_shared_address_space_fine_grain_buffers",
|
||||
"svm_allocate_shared_buffer",
|
||||
"svm_shared_address_space_coarse_grain_old_api",
|
||||
"svm_shared_address_space_coarse_grain_new_api",
|
||||
"svm_cross_buffer_pointers_coarse_grain",
|
||||
"svm_pointer_passing",
|
||||
"svm_enqueue_api",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, true, 0 );
|
||||
}
|
||||
|
||||
|
||||
|
||||
107
test_conformance/SVM/test_allocate_shared_buffer.cpp
Normal file
107
test_conformance/SVM/test_allocate_shared_buffer.cpp
Normal file
@@ -0,0 +1,107 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
const cl_mem_flags flag_set[] = {
|
||||
CL_MEM_READ_WRITE,
|
||||
CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_READ_ONLY,
|
||||
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
|
||||
CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
|
||||
0
|
||||
};
|
||||
const char* flag_set_names[] = {
|
||||
"CL_MEM_READ_WRITE",
|
||||
"CL_MEM_WRITE_ONLY",
|
||||
"CL_MEM_READ_ONLY",
|
||||
"CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER",
|
||||
"CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER",
|
||||
"CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER",
|
||||
"CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
|
||||
"CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
|
||||
"CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
|
||||
"0"
|
||||
};
|
||||
|
||||
|
||||
int test_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
cl_device_svm_capabilities caps;
|
||||
err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
|
||||
test_error(err,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
|
||||
|
||||
// under construction...
|
||||
err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(err) return -1;
|
||||
|
||||
size_t size = 1024;
|
||||
|
||||
// iteration over flag combos
|
||||
int num_flags = sizeof(flag_set)/sizeof(cl_mem_flags);
|
||||
for(int i = 0; i < num_flags; i++)
|
||||
{
|
||||
if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
|
||||
|| ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 && (caps & CL_DEVICE_SVM_ATOMICS) == 0))
|
||||
{
|
||||
log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]);
|
||||
cl_char *pBufData1 = (cl_char*) clSVMAlloc(context, flag_set[i], size, 0);
|
||||
if(pBufData1 == NULL)
|
||||
{
|
||||
log_error("SVMalloc returned NULL");
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size, pBufData1, &err);
|
||||
test_error(err,"clCreateBuffer failed");
|
||||
|
||||
cl_char *pBufData2 = NULL;
|
||||
cl_uint flags = CL_MAP_READ | CL_MAP_READ;
|
||||
if(flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE;
|
||||
if(flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ;
|
||||
|
||||
if(!(flag_set[i] & CL_MEM_HOST_NO_ACCESS))
|
||||
{
|
||||
pBufData2 = (cl_char*) clEnqueueMapBuffer(queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL,NULL, &err);
|
||||
test_error(err, "clEnqueueMapBuffer failed");
|
||||
|
||||
if(pBufData2 != pBufData1 || NULL == pBufData1)
|
||||
{
|
||||
log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clSVMFree(context, pBufData1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
148
test_conformance/SVM/test_byte_granularity.cpp
Normal file
148
test_conformance/SVM/test_byte_granularity.cpp
Normal file
@@ -0,0 +1,148 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
const char *byte_manipulation_kernels[] = {
|
||||
// Each device will write it's id into the bytes that it "owns", ownership is based on round robin (global_id % num_id)
|
||||
// num_id is equal to number of SVM devices in the system plus one (for the host code).
|
||||
// id is the index (id) of the device that this kernel is executing on.
|
||||
// For example, if there are 2 SVM devices and the host; the buffer should look like this after each device and the host write their id's:
|
||||
// 0, 1, 2, 0, 1, 2, 0, 1, 2...
|
||||
"__kernel void write_owned_locations(__global char* a, uint num_id, uint id)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" int owner = i % num_id;\n"
|
||||
" if(id == owner) \n"
|
||||
" a[i] = id;\n" // modify location if it belongs to this device, write id
|
||||
"}\n"
|
||||
|
||||
// Verify that a device can see the byte sized updates from the other devices, sum up the device id's and see if they match expected value.
|
||||
// Note: this must be called with a reduced NDRange so that neighbor acesses don't go past end of buffer.
|
||||
// For example if there are two SVM devices and the host (3 total devices) the buffer should look like this:
|
||||
// 0,1,2,0,1,2...
|
||||
// and the expected sum at each point is 0+1+2 = 3.
|
||||
"__kernel void sum_neighbor_locations(__global char* a, uint num_devices, volatile __global uint* error_count)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" uint expected_sum = (num_devices * (num_devices - 1))/2;\n"
|
||||
" uint sum = 0;\n"
|
||||
" for(uint j=0; j<num_devices; j++) {\n"
|
||||
" sum += a[i + j];\n" // add my neighbors to the right
|
||||
" }\n"
|
||||
" if(sum != expected_sum)\n"
|
||||
" atomic_inc(error_count);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
|
||||
int test_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper k1,k2;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
cl_int rval = CL_SUCCESS;
|
||||
|
||||
err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
|
||||
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(err < 0) return -1; // fail test.
|
||||
|
||||
cl_uint num_devices_plus_host = num_devices + 1;
|
||||
|
||||
k1 = clCreateKernel(program, "write_owned_locations", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
k2 = clCreateKernel(program, "sum_neighbor_locations", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
|
||||
|
||||
cl_char *pA = (cl_char*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_char) * num_elements, 0);
|
||||
|
||||
cl_uint **error_counts = (cl_uint**) malloc(sizeof(void*) * num_devices);
|
||||
|
||||
for(cl_uint i=0; i < num_devices; i++) {
|
||||
error_counts[i] = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint), 0);
|
||||
*error_counts[i] = 0;
|
||||
}
|
||||
for(int i=0; i < num_elements; i++) pA[i] = -1;
|
||||
|
||||
err |= clSetKernelArgSVMPointer(k1, 0, pA);
|
||||
err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
// get all the devices going simultaneously
|
||||
size_t element_num = num_elements;
|
||||
for(cl_uint d=0; d < num_devices; d++) // device ids starting at 1.
|
||||
{
|
||||
err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, 0, NULL, NULL);
|
||||
test_error(err,"clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
for(cl_uint d=0; d < num_devices; d++) clFlush(queues[d]);
|
||||
|
||||
cl_uint host_id = num_devices; // host code will take the id above the devices.
|
||||
for(int i = (int)num_devices; i < num_elements; i+= num_devices_plus_host) pA[i] = host_id;
|
||||
|
||||
for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
|
||||
|
||||
// now check that each device can see the byte writes made by the other devices.
|
||||
|
||||
err |= clSetKernelArgSVMPointer(k2, 0, pA);
|
||||
err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
// adjusted so k2 doesn't read past end of buffer
|
||||
size_t adjusted_num_elements = num_elements - num_devices;
|
||||
for(cl_uint id = 0; id < num_devices; id++)
|
||||
{
|
||||
err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, &adjusted_num_elements, NULL, 0, NULL, NULL);
|
||||
test_error(err,"clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
|
||||
|
||||
bool failed = false;
|
||||
|
||||
// see if any of the devices found errors
|
||||
for(cl_uint i=0; i < num_devices; i++) {
|
||||
if(*error_counts[i] > 0)
|
||||
failed = true;
|
||||
}
|
||||
cl_uint expected = (num_devices_plus_host * (num_devices_plus_host - 1))/2;
|
||||
// check that host can see the byte writes made by the devices.
|
||||
for(cl_uint i = 0; i < num_elements - num_devices_plus_host; i++)
|
||||
{
|
||||
int sum = 0;
|
||||
for(cl_uint j=0; j < num_devices_plus_host; j++) sum += pA[i+j];
|
||||
if(sum != expected)
|
||||
failed = true;
|
||||
}
|
||||
|
||||
clSVMFree(context, pA);
|
||||
for(cl_uint i=0; i < num_devices; i++) clSVMFree(context, error_counts[i]);
|
||||
|
||||
if(failed)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
219
test_conformance/SVM/test_cross_buffer_pointers.cpp
Normal file
219
test_conformance/SVM/test_cross_buffer_pointers.cpp
Normal file
@@ -0,0 +1,219 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
// create linked lists that use nodes from two different buffers.
|
||||
const char *SVMCrossBufferPointers_test_kernel[] = {
|
||||
"\n"
|
||||
"typedef struct Node {\n"
|
||||
" int global_id;\n"
|
||||
" int position_in_list;\n"
|
||||
" __global struct Node* pNext;\n"
|
||||
"} Node;\n"
|
||||
"\n"
|
||||
"__global Node* allocate_node(__global Node* pNodes1, __global Node* pNodes2, volatile __global int* allocation_index, size_t i)\n"
|
||||
"{\n"
|
||||
// mix things up, adjacent work items will allocate from different buffers
|
||||
" if(i & 0x1)\n"
|
||||
" return &pNodes1[atomic_inc(allocation_index)];\n"
|
||||
" else\n"
|
||||
" return &pNodes2[atomic_inc(allocation_index)];\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
// The allocation_index parameter must be initialized on the host to N work-items
|
||||
// The first N nodes in pNodes will be the heads of the lists.
|
||||
"__kernel void create_linked_lists(__global Node* pNodes, __global Node* pNodes2, volatile __global int* allocation_index, int list_length)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" __global Node *pNode = &pNodes[i];\n"
|
||||
"\n"
|
||||
" pNode->global_id = i;\n"
|
||||
" pNode->position_in_list = 0;\n"
|
||||
"\n"
|
||||
" __global Node *pNew;\n"
|
||||
" for(int j=1; j < list_length; j++)\n"
|
||||
" {\n"
|
||||
" pNew = allocate_node(pNodes, pNodes2, allocation_index, i);\n"
|
||||
" pNew->global_id = i;\n"
|
||||
" pNew->position_in_list = j;\n"
|
||||
" pNode->pNext = pNew; // link new node onto end of list\n"
|
||||
" pNode = pNew; // move to end of list\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void verify_linked_lists(__global Node* pNodes, __global Node* pNodes2, volatile __global uint* num_correct, int list_length)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" __global Node *pNode = &pNodes[i];\n"
|
||||
"\n"
|
||||
" for(int j=0; j < list_length; j++)\n"
|
||||
" {\n"
|
||||
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
|
||||
" {\n"
|
||||
" atomic_inc(num_correct);\n"
|
||||
" }\n"
|
||||
" else {\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" pNode = pNode->pNext;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
// Creates linked list using host code.
|
||||
cl_int create_linked_lists_on_host(cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
log_info("SVM: creating linked list on host ");
|
||||
|
||||
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
|
||||
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes2, "clEnqueueMapBuffer failed");
|
||||
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
// Verify correctness of the linked list using host code.
|
||||
cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
//log_info(" and verifying on host ");
|
||||
|
||||
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if(error) return -1;
|
||||
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
// This tests that shared buffers are able to contain pointers that point to other shared buffers.
|
||||
// This tests that all devices and the host share a common address space; using only the coarse-grain features.
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host.
|
||||
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
|
||||
// This basic test is performed for all combinations of devices and the host.
|
||||
int test_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
|
||||
|
||||
{
|
||||
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the index into the nodes buffer that is used for node allocation
|
||||
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the count of correct nodes which is computed by the verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
|
||||
//error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) pNodes);
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), (void *) &ListLength);
|
||||
|
||||
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), (void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
error = create_linked_lists_on_host(queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists_on_host(vi, queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
} // inner loop, vi
|
||||
} // outer loop, ci
|
||||
}
|
||||
|
||||
clSVMFree(context, pNodes2);
|
||||
clSVMFree(context, pNodes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
254
test_conformance/SVM/test_enqueue_api.cpp
Normal file
254
test_conformance/SVM/test_enqueue_api.cpp
Normal file
@@ -0,0 +1,254 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
typedef struct
|
||||
{
|
||||
cl_uint status;
|
||||
cl_uint num_svm_pointers;
|
||||
std::vector<void *> svm_pointers;
|
||||
} CallbackData;
|
||||
|
||||
void generate_data(std::vector<cl_uchar> &data, size_t size, MTdata seed)
|
||||
{
|
||||
cl_uint randomData = genrand_int32(seed);
|
||||
cl_uint bitsLeft = 32;
|
||||
|
||||
for( size_t i = 0; i < size; i++ )
|
||||
{
|
||||
if( 0 == bitsLeft)
|
||||
{
|
||||
randomData = genrand_int32(seed);
|
||||
bitsLeft = 32;
|
||||
}
|
||||
data[i] = (cl_uchar)( randomData & 255 );
|
||||
randomData >>= 8; randomData -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
//callback which will be passed to clEnqueueSVMFree command
|
||||
void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_pointers, void * svm_pointers[], void * user_data)
|
||||
{
|
||||
CallbackData *data = (CallbackData *)user_data;
|
||||
data->num_svm_pointers = num_svm_pointers;
|
||||
data->svm_pointers.resize(num_svm_pointers, 0);
|
||||
|
||||
cl_context context;
|
||||
if(clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, 0) != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetCommandQueueInfo failed in the callback\n");
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_svm_pointers; ++i)
|
||||
{
|
||||
data->svm_pointers[i] = svm_pointers[i];
|
||||
clSVMFree(context, svm_pointers[i]);
|
||||
}
|
||||
|
||||
data->status = 1;
|
||||
}
|
||||
|
||||
int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
cl_uint num_devices = 0;
|
||||
const size_t elementNum = 1024;
|
||||
const size_t numSVMBuffers = 32;
|
||||
cl_int error = CL_SUCCESS;
|
||||
RandomSeed seed(0);
|
||||
|
||||
error = create_cl_objects(deviceID, NULL, &context, NULL, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
|
||||
queue = queues[0];
|
||||
|
||||
//all possible sizes of vectors and scalars
|
||||
size_t typeSizes[] = {
|
||||
sizeof(cl_uchar),
|
||||
sizeof(cl_uchar2),
|
||||
sizeof(cl_uchar3),
|
||||
sizeof(cl_uchar4),
|
||||
sizeof(cl_uchar8),
|
||||
sizeof(cl_uchar16),
|
||||
sizeof(cl_ushort),
|
||||
sizeof(cl_ushort2),
|
||||
sizeof(cl_ushort3),
|
||||
sizeof(cl_ushort4),
|
||||
sizeof(cl_ushort8),
|
||||
sizeof(cl_ushort16),
|
||||
sizeof(cl_uint),
|
||||
sizeof(cl_uint2),
|
||||
sizeof(cl_uint3),
|
||||
sizeof(cl_uint4),
|
||||
sizeof(cl_uint8),
|
||||
sizeof(cl_uint16),
|
||||
sizeof(cl_ulong),
|
||||
sizeof(cl_ulong2),
|
||||
sizeof(cl_ulong3),
|
||||
sizeof(cl_ulong4),
|
||||
sizeof(cl_ulong8),
|
||||
sizeof(cl_ulong16),
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < ( sizeof(typeSizes) / sizeof(typeSizes[0]) ); ++i)
|
||||
{
|
||||
//generate initial data
|
||||
std::vector<cl_uchar> fillData0(typeSizes[i]), fillData1(typeSizes[i], 0), fillData2(typeSizes[i]);
|
||||
generate_data(fillData0, typeSizes[i], seed);
|
||||
generate_data(fillData2, typeSizes[i], seed);
|
||||
|
||||
cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum * typeSizes[i], 0);
|
||||
cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum * typeSizes[i], 0);
|
||||
|
||||
clEventWrapper userEvent = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
|
||||
clEventWrapper eventMemFill;
|
||||
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], typeSizes[i], elementNum * typeSizes[i], 1, &userEvent, &eventMemFill);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
clEventWrapper eventMemcpy;
|
||||
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, elementNum * typeSizes[i], 1, &eventMemFill, &eventMemcpy);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
error = clSetUserEventStatus(userEvent, CL_COMPLETE);
|
||||
test_error(error, "clSetUserEventStatus failed");
|
||||
|
||||
clEventWrapper eventMap;
|
||||
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, dstBuffer, elementNum * typeSizes[i], 1, &eventMemcpy, &eventMap);
|
||||
test_error(error, "clEnqueueSVMMap failed");
|
||||
|
||||
error = clWaitForEvents(1, &eventMap);
|
||||
test_error(error, "clWaitForEvents failed");
|
||||
|
||||
//data verification
|
||||
for (size_t j = 0; j < elementNum * typeSizes[i]; ++j)
|
||||
{
|
||||
if (dstBuffer[j] != fillData0[j % typeSizes[i]])
|
||||
{
|
||||
log_error("Invalid data at index %ld, expected %d, got %d\n", j, fillData0[j % typeSizes[i]], dstBuffer[j]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
clEventWrapper eventUnmap;
|
||||
error = clEnqueueSVMUnmap(queue, dstBuffer, 0, 0, &eventUnmap);
|
||||
test_error(error, "clEnqueueSVMUnmap failed");
|
||||
|
||||
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData2[0], typeSizes[i], elementNum * typeSizes[i] / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
error = clEnqueueSVMMemFill(queue, dstBuffer + elementNum * typeSizes[i] / 2, &fillData2[0], typeSizes[i], elementNum * typeSizes[i] / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, elementNum * typeSizes[i] / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
error = clEnqueueSVMMemcpy(queue, CL_TRUE, dstBuffer + elementNum * typeSizes[i] / 2, srcBuffer + elementNum * typeSizes[i] / 2, elementNum * typeSizes[i] / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
void *ptrs[] = {(void *)srcBuffer, (void *)dstBuffer};
|
||||
|
||||
clEventWrapper eventFree;
|
||||
error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree);
|
||||
test_error(error, "clEnqueueSVMFree failed");
|
||||
|
||||
error = clWaitForEvents(1, &eventFree);
|
||||
test_error(error, "clWaitForEvents failed");
|
||||
|
||||
//event info verification for new SVM commands
|
||||
cl_command_type commandType;
|
||||
error = clGetEventInfo(eventMemFill, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MEMFILL)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMMemFill\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MEMCPY)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMMemcpy\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventMap, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MAP)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMMap\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventUnmap, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_UNMAP)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMUnmap\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_FREE)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMFree\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<void *> buffers(numSVMBuffers, 0);
|
||||
for(size_t i = 0; i < numSVMBuffers; ++i) buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0);
|
||||
|
||||
//verify if callback is triggered correctly
|
||||
CallbackData data;
|
||||
data.status = 0;
|
||||
|
||||
error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], callback_svm_free, &data, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMFree failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
//wait for the callback
|
||||
while(data.status == 0) { }
|
||||
|
||||
//check if number of SVM pointers returned in the callback matches with expected
|
||||
if (data.num_svm_pointers != buffers.size())
|
||||
{
|
||||
log_error("Invalid number of SVM pointers returned in the callback, expected: %ld, got: %d\n", buffers.size(), data.num_svm_pointers);
|
||||
return -1;
|
||||
}
|
||||
|
||||
//check if pointers returned in callback are correct
|
||||
for (size_t i = 0; i < buffers.size(); ++i)
|
||||
{
|
||||
if (data.svm_pointers[i] != buffers[i])
|
||||
{
|
||||
log_error("Invalid SVM pointer returned in the callback, idx: %ld\n", i);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
176
test_conformance/SVM/test_fine_grain_memory_consistency.cpp
Normal file
176
test_conformance/SVM/test_fine_grain_memory_consistency.cpp
Normal file
@@ -0,0 +1,176 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
static char hash_table_kernel[] =
|
||||
"#if 0\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
|
||||
"#endif\n"
|
||||
"typedef struct BinNode {\n"
|
||||
" int value;\n"
|
||||
" atomic_uintptr_t pNext;\n"
|
||||
"} BinNode;\n"
|
||||
|
||||
"__kernel void build_hash_table(__global uint* input, __global BinNode* pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
|
||||
"{\n"
|
||||
" __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, 1, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
|
||||
" uint i = get_global_id(0);\n"
|
||||
" uint b = input[i] % numBins;\n"
|
||||
" pNew->value = input[i];\n"
|
||||
" uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), memory_order_seq_cst, memory_scope_all_svm_devices);\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, memory_scope_all_svm_devices);\n" // always inserting at head of list
|
||||
" } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), &next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, memory_scope_all_svm_devices));\n"
|
||||
"}\n";
|
||||
|
||||
typedef struct BinNode{
|
||||
cl_uint value;
|
||||
struct BinNode* pNext;
|
||||
} BinNode;
|
||||
|
||||
void build_hash_table_on_host(cl_context c, cl_uint* input, size_t inputSize, BinNode* pNodes, cl_int volatile *pNumNodes, cl_uint numBins)
|
||||
{
|
||||
for(cl_uint i = 0; i < inputSize; i++)
|
||||
{
|
||||
BinNode *pNew = &pNodes[ AtomicFetchAddExplicit(pNumNodes, 1, memory_order_relaxed) ];
|
||||
cl_uint b = input[i] % numBins;
|
||||
pNew->value = input[i];
|
||||
|
||||
BinNode *next = pNodes[b].pNext;
|
||||
do {
|
||||
pNew->pNext = next; // always inserting at head of list
|
||||
} while(!AtomicCompareExchangeStrongExplicit(&(pNodes[b].pNext), &next, pNew, memory_order_relaxed, memory_order_seq_cst));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int launch_kernels_and_verify(clContextWrapper &context, clCommandQueueWrapper* queues, clKernelWrapper &kernel, cl_uint num_devices, cl_uint numBins, size_t num_pixels)
|
||||
{
|
||||
int err = CL_SUCCESS;
|
||||
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
|
||||
BinNode *pNodes = (BinNode*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(BinNode) * (num_pixels * (num_devices + 1) + numBins), 0);
|
||||
cl_int *pNumNodes = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int), 0);
|
||||
|
||||
*pNumNodes = numBins; // using the first numBins nodes to hold the list heads.
|
||||
for(cl_uint i=0;i<numBins;i++) {
|
||||
pNodes[i].pNext = NULL;
|
||||
};
|
||||
|
||||
for(cl_uint i=0; i < num_pixels; i++) pInputImage[i] = i;
|
||||
|
||||
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 1, pNodes);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 2, pNumNodes);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof(cl_uint), (void*) &numBins);
|
||||
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
cl_event done;
|
||||
// get all the devices going simultaneously, each device (and the host) will insert all the pixels.
|
||||
for(cl_uint d=0; d<num_devices; d++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queues[d], kernel, 1, NULL, &num_pixels, 0, 0, NULL, &done);
|
||||
test_error(err,"clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
for(cl_uint d=0; d<num_devices; d++) clFlush(queues[d]);
|
||||
|
||||
// wait until we see some activity from a device (try to run host side simultaneously).
|
||||
while(numBins == AtomicLoadExplicit(pNumNodes, memory_order_relaxed));
|
||||
|
||||
build_hash_table_on_host(context, pInputImage, num_pixels, pNodes, pNumNodes, numBins);
|
||||
|
||||
for(cl_uint d=0; d<num_devices; d++) clFinish(queues[d]);
|
||||
|
||||
cl_uint num_items = 0;
|
||||
// check correctness of each bin in the hash table.
|
||||
for(cl_uint i = 0; i < numBins; i++)
|
||||
{
|
||||
BinNode *pNode = pNodes[i].pNext;
|
||||
while(pNode)
|
||||
{
|
||||
if((pNode->value % numBins) != i)
|
||||
{
|
||||
log_error("Something went wrong, item is in wrong hash bucket\n");
|
||||
break;
|
||||
}
|
||||
num_items++;
|
||||
pNode = pNode->pNext;
|
||||
}
|
||||
}
|
||||
|
||||
clSVMFree(context, pInputImage);
|
||||
clSVMFree(context, pNodes);
|
||||
clSVMFree(context, pNumNodes);
|
||||
// each device and the host inserted all of the pixels, check that none are missing.
|
||||
if(num_items != num_pixels * (num_devices + 1) )
|
||||
{
|
||||
log_error("The hash table is not correct, num items %d, expected num items: %d\n", num_items, num_pixels * (num_devices + 1));
|
||||
return -1; // test did not pass
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This tests for memory consistency across devices and the host.
|
||||
// Each device and the host simultaneously insert values into a single hash table.
|
||||
// Each bin in the hash table is a linked list. Each bin is protected against simultaneous
|
||||
// update using a lock free technique. The correctness of the list is verfied on the host.
|
||||
// This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering.
|
||||
int test_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
if (sizeof(void *) == 8 && (!is_extension_available(deviceID, "cl_khr_int64_base_atomics") || !is_extension_available(deviceID, "cl_khr_int64_extended_atomics")))
|
||||
{
|
||||
log_info("WARNING: test skipped. 'cl_khr_int64_base_atomics' and 'cl_khr_int64_extended_atomics' extensions are not supported\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Make pragmas visible for 64-bit addresses
|
||||
hash_table_kernel[4] = sizeof(void *) == 8 ? '1' : '0';
|
||||
|
||||
char *source[] = { hash_table_kernel };
|
||||
|
||||
err = create_cl_objects(deviceID, (const char**)source, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
|
||||
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(err < 0) return -1; // fail test.
|
||||
|
||||
kernel = clCreateKernel(program, "build_hash_table", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
size_t num_pixels = num_elements;
|
||||
|
||||
int result;
|
||||
cl_uint numBins = 1; // all work groups in all devices and the host code will hammer on this one lock.
|
||||
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
|
||||
if(result == -1) return result;
|
||||
|
||||
numBins = 2; // 2 locks within in same cache line will get hit from different devices and host.
|
||||
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
|
||||
if(result == -1) return result;
|
||||
|
||||
numBins = 29; // locks span a few cache lines.
|
||||
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
|
||||
if(result == -1) return result;
|
||||
|
||||
return result;
|
||||
}
|
||||
105
test_conformance/SVM/test_fine_grain_sync_buffers.cpp
Normal file
105
test_conformance/SVM/test_fine_grain_sync_buffers.cpp
Normal file
@@ -0,0 +1,105 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
const char *find_targets_kernel[] = {
|
||||
|
||||
"__kernel void find_targets(__global uint* image, uint target, volatile __global atomic_uint *numTargetsFound, volatile __global atomic_uint *targetLocations)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" uint index;\n"
|
||||
" if(image[i] == target) {\n"
|
||||
" index = atomic_fetch_add_explicit(numTargetsFound, 1, memory_order_relaxed, memory_scope_device); \n"
|
||||
" atomic_exchange_explicit(&targetLocations[index], i, memory_order_relaxed, memory_scope_all_svm_devices); \n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
void spawnAnalysisTask(int location)
|
||||
{
|
||||
// printf("found target at location %d\n", location);
|
||||
}
|
||||
|
||||
#define MAX_TARGETS 1024
|
||||
|
||||
// Goals: demonstrate use of SVM's atomics to do fine grain synchronization between the device and host.
|
||||
// Concept: a device kernel is used to search an input image for regions that match a target pattern.
|
||||
// The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices).
|
||||
// The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets.
|
||||
int test_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
err = create_cl_objects(deviceID, &find_targets_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
|
||||
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(err < 0) return -1; // fail test.
|
||||
|
||||
clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
|
||||
size_t num_pixels = num_elements;
|
||||
//cl_uint num_pixels = 1024*1024*32;
|
||||
|
||||
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
|
||||
cl_uint *pNumTargetsFound = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_uint), 0);
|
||||
cl_int *pTargetLocations = (cl_int* ) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int) * MAX_TARGETS, 0);
|
||||
|
||||
cl_uint targetDescriptor = 777;
|
||||
*pNumTargetsFound = 0;
|
||||
cl_uint i;
|
||||
for(i=0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1;
|
||||
for(i=0; i < num_pixels; i++) pInputImage[i] = 0;
|
||||
pInputImage[0] = targetDescriptor;
|
||||
pInputImage[3] = targetDescriptor;
|
||||
pInputImage[num_pixels - 1] = targetDescriptor;
|
||||
|
||||
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &targetDescriptor);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
cl_event done;
|
||||
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, 0, NULL, &done);
|
||||
test_error(err,"clEnqueueNDRangeKernel failed");
|
||||
clFlush(queues[0]);
|
||||
|
||||
|
||||
i=0;
|
||||
cl_int status;
|
||||
// check for new targets, if found spawn a task to analyze target.
|
||||
do {
|
||||
err = clGetEventInfo(done,CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL);
|
||||
test_error(err,"clGetEventInfo failed");
|
||||
if( AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1) // -1 indicates slot not used yet.
|
||||
{
|
||||
spawnAnalysisTask(pTargetLocations[i]);
|
||||
i++;
|
||||
}
|
||||
} while (status != CL_COMPLETE || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1);
|
||||
|
||||
clSVMFree(context, pInputImage);
|
||||
clSVMFree(context, pNumTargetsFound);
|
||||
clSVMFree(context, pTargetLocations);
|
||||
|
||||
if(i != 3) return -1;
|
||||
return 0;
|
||||
}
|
||||
115
test_conformance/SVM/test_pointer_passing.cpp
Normal file
115
test_conformance/SVM/test_pointer_passing.cpp
Normal file
@@ -0,0 +1,115 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
const char *SVMPointerPassing_test_kernel[] = {
|
||||
"__kernel void verify_char(__global uchar* pChar, volatile __global uint* num_correct, uchar expected)\n"
|
||||
"{\n"
|
||||
" if(0 == get_global_id(0))\n"
|
||||
" {\n"
|
||||
" *num_correct = 0;\n"
|
||||
" if(*pChar == expected)\n"
|
||||
" {\n"
|
||||
" *num_correct=1;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
// Test that arbitrarily aligned char pointers into shared buffers can be passed directly to a kernel.
|
||||
// This iterates through a buffer passing a pointer to each location to the kernel.
|
||||
// The buffer is initialized to known values at each location.
|
||||
// The kernel checks that it finds the expected value at each location.
|
||||
// TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--.
|
||||
int test_svm_pointer_passing(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
|
||||
clKernelWrapper kernel_verify_char = clCreateKernel(program, "verify_char", &error);
|
||||
test_error(error,"clCreateKernel failed");
|
||||
|
||||
size_t bufSize = 256;
|
||||
char *pbuf = (char*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_uchar)*bufSize, 0);
|
||||
|
||||
cl_int *pNumCorrect = NULL;
|
||||
pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0);
|
||||
|
||||
{
|
||||
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar)*bufSize, pbuf, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error = clSetKernelArg(kernel_verify_char, 1, sizeof(void*), (void *) &num_correct);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// put values into buf so that we can expect to see these values in the kernel when we pass a pointer to them.
|
||||
cl_command_queue cmdq = queues[0];
|
||||
cl_uchar* pBuf = (cl_uchar*) clEnqueueMapBuffer(cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uchar)*bufSize, 0, NULL,NULL, &error);
|
||||
test_error2(error, pBuf, "clEnqueueMapBuffer failed");
|
||||
for(int i = 0; i<(int)bufSize; i++)
|
||||
{
|
||||
pBuf[i]= (cl_uchar)i;
|
||||
}
|
||||
error = clEnqueueUnmapMemObject(cmdq, buf, pBuf, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
|
||||
for (cl_uint ii = 0; ii<num_devices; ++ii) // iterate over all devices in the platform.
|
||||
{
|
||||
cmdq = queues[ii];
|
||||
for(int i = 0; i<(int)bufSize; i++)
|
||||
{
|
||||
cl_uchar* pChar = &pBuf[i];
|
||||
error = clSetKernelArgSVMPointer(kernel_verify_char, 0, pChar); // pass a pointer to a location within the buffer
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar), (void *) &i ); // pass the expected value at the above location.
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL, &bufSize, NULL, 0, NULL, NULL);
|
||||
test_error(error,"clEnqueueNDRangeKernel failed");
|
||||
|
||||
pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
|
||||
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
|
||||
cl_int correct_count = *pNumCorrect;
|
||||
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
|
||||
if(correct_count != 1)
|
||||
{
|
||||
log_error("Passing pointer directly to kernel for byte #%d failed on device %d\n", i, ii);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed");
|
||||
}
|
||||
|
||||
|
||||
clSVMFree(context, pbuf);
|
||||
clSVMFree(context, pNumCorrect);
|
||||
|
||||
return 0;
|
||||
}
|
||||
153
test_conformance/SVM/test_set_kernel_exec_info_svm_ptrs.cpp
Normal file
153
test_conformance/SVM/test_set_kernel_exec_info_svm_ptrs.cpp
Normal file
@@ -0,0 +1,153 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
typedef struct {
|
||||
cl_int *pA;
|
||||
cl_int *pB;
|
||||
cl_int *pC;
|
||||
} BufPtrs;
|
||||
|
||||
const char *set_kernel_exec_info_svm_ptrs_kernel[] = {
|
||||
"struct BufPtrs;\n"
|
||||
"\n"
|
||||
"typedef struct {\n"
|
||||
" __global int *pA;\n"
|
||||
" __global int *pB;\n"
|
||||
" __global int *pC;\n"
|
||||
"} BufPtrs;\n"
|
||||
"\n"
|
||||
"__kernel void set_kernel_exec_info_test(__global BufPtrs* pBufs)\n"
|
||||
"{\n"
|
||||
" size_t i;\n"
|
||||
" i = get_global_id(0);\n"
|
||||
" pBufs->pA[i]++;\n"
|
||||
" pBufs->pB[i]++;\n"
|
||||
" pBufs->pC[i]++;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
// Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
|
||||
//
|
||||
int test_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper c = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
//error = create_cl_objects(deviceID, &set_kernel_exec_info_svm_ptrs_kernel[0], &context, &program, &q, &num_devices, CL_DEVICE_SVM_FINE_GRAIN);
|
||||
error = create_cl_objects(deviceID, &set_kernel_exec_info_svm_ptrs_kernel[0], &c, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(error < 0) return -1; // fail test.
|
||||
|
||||
|
||||
clKernelWrapper k = clCreateKernel(program, "set_kernel_exec_info_test", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
size_t size = num_elements*sizeof(int);
|
||||
//int* pA = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
|
||||
//int* pB = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
|
||||
//int* pC = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
|
||||
int* pA = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
|
||||
int* pB = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
|
||||
int* pC = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
|
||||
BufPtrs* pBuf = (BufPtrs*) clSVMAlloc(c, CL_MEM_READ_WRITE, sizeof(BufPtrs), 0);
|
||||
|
||||
bool failed = false;
|
||||
{
|
||||
clMemWrapper ba,bb,bc,bBuf;
|
||||
ba = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pA, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
bb = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pB, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
bc = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pC, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
bBuf = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, sizeof(BufPtrs), pBuf, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bBuf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(BufPtrs), 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
|
||||
for(int i = 0; i < num_elements; i++) pA[i] = pB[i] = pC[i] = 0;
|
||||
|
||||
pBuf->pA = pA;
|
||||
pBuf->pB = pB;
|
||||
pBuf->pC = pC;
|
||||
|
||||
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bBuf, pBuf, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
|
||||
|
||||
error = clSetKernelArgSVMPointer(k, 0, pBuf);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, sizeof(BufPtrs), pBuf);
|
||||
test_error(error, "clSetKernelExecInfo failed");
|
||||
|
||||
size_t range = num_elements;
|
||||
error = clEnqueueNDRangeKernel(queues[0], k, 1, NULL, &range, NULL, 0, NULL, NULL);
|
||||
test_error(error,"clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clFinish(queues[0]);
|
||||
test_error(error, "clFinish failed.");
|
||||
|
||||
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
|
||||
for(int i = 0; i < num_elements; i++)
|
||||
{
|
||||
if(pA[i] + pB[i] + pC[i] != 3)
|
||||
failed = true;
|
||||
}
|
||||
|
||||
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
}
|
||||
|
||||
error = clFinish(queues[0]);
|
||||
test_error(error, " clFinish failed.");
|
||||
|
||||
clSVMFree(c, pA);
|
||||
clSVMFree(c, pB);
|
||||
clSVMFree(c, pC);
|
||||
clSVMFree(c, pBuf);
|
||||
|
||||
if(failed) return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
282
test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
Normal file
282
test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
Normal file
@@ -0,0 +1,282 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
// Creates linked list using host code
|
||||
cl_int create_linked_lists_on_host(cl_command_queue cmdq, cl_mem nodes, Node *pNodes2, cl_int ListLength, size_t numLists, cl_bool useNewAPI )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
log_info("SVM: creating linked list on host ");
|
||||
|
||||
Node *pNodes;
|
||||
if (useNewAPI == CL_FALSE)
|
||||
{
|
||||
pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqMapBuffer failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
pNodes = pNodes2;
|
||||
error = clEnqueueSVMMap(cmdq, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, pNodes2, sizeof(Node)*ListLength*numLists, 0, NULL,NULL);
|
||||
test_error2(error, pNodes, "clEnqueueSVMMap failed");
|
||||
}
|
||||
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
|
||||
if (useNewAPI == CL_FALSE)
|
||||
{
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueSVMUnmap(cmdq, pNodes2, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueSVMUnmap failed.");
|
||||
}
|
||||
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed.");
|
||||
return error;
|
||||
}
|
||||
|
||||
// Purpose: uses host code to verify correctness of the linked list
|
||||
cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, Node *pNodes2, cl_int ListLength, size_t numLists, cl_bool useNewAPI )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
cl_int correct_count;
|
||||
|
||||
Node *pNodes;
|
||||
if (useNewAPI == CL_FALSE)
|
||||
{
|
||||
pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
pNodes = pNodes2;
|
||||
error = clEnqueueSVMMap(cmdq, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, pNodes2, sizeof(Node)*ListLength * numLists, 0, NULL,NULL);
|
||||
test_error2(error, pNodes, "clEnqueueSVMMap failed");
|
||||
}
|
||||
|
||||
correct_count = 0;
|
||||
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if(error) return -1;
|
||||
|
||||
if (useNewAPI == CL_FALSE)
|
||||
{
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueSVMUnmap(cmdq, pNodes2, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueSVMUnmap failed.");
|
||||
}
|
||||
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed.");
|
||||
return error;
|
||||
}
|
||||
|
||||
cl_int create_linked_lists_on_device(int ci, cl_command_queue cmdq, cl_mem allocator, cl_kernel kernel_create_lists, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
log_info("SVM: creating linked list on device: %d ", ci);
|
||||
|
||||
size_t *pAllocator = (size_t*) clEnqueueMapBuffer(cmdq, allocator, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
|
||||
test_error2(error, pAllocator, "clEnqueueMapBuffer failed");
|
||||
// reset allocator index
|
||||
*pAllocator = numLists; // the first numLists elements of the nodes array are already allocated (they hold the head of each list).
|
||||
error = clEnqueueUnmapMemObject(cmdq, allocator, pAllocator, 0,NULL,NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
|
||||
error = clEnqueueNDRangeKernel(cmdq, kernel_create_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRange failed.");
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed.");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
cl_int verify_linked_lists_on_device(int vi, cl_command_queue cmdq,cl_mem num_correct, cl_kernel kernel_verify_lists, cl_int ListLength, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
log_info(" and verifying on device: %d ", vi);
|
||||
|
||||
cl_int *pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
|
||||
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
|
||||
|
||||
*pNumCorrect = 0; // reset numCorrect to zero
|
||||
|
||||
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
|
||||
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
|
||||
test_error(error,"clEnqueueNDRangeKernel failed");
|
||||
|
||||
pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
|
||||
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
|
||||
cl_int correct_count = *pNumCorrect;
|
||||
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
clFinish(cmdq);
|
||||
test_error(error,"clFinish failed");
|
||||
|
||||
if(correct_count != ListLength * (cl_uint)numLists)
|
||||
{
|
||||
error = -1;
|
||||
log_info("Failed\n");
|
||||
}
|
||||
else
|
||||
log_info("Passed\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
// This tests that all devices and the host share a common address space; using only the coarse-grain features.
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
|
||||
// the platform. The test passes only if every combination passes.
|
||||
int shared_address_space_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements, cl_bool useNewAPI)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this buffer holds the linked list nodes.
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
|
||||
|
||||
{
|
||||
cl_bool usesSVMpointer = CL_FALSE;
|
||||
clMemWrapper nodes;
|
||||
if (useNewAPI == CL_FALSE)
|
||||
{
|
||||
nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// verify if buffer uses SVM pointer
|
||||
size_t paramSize = 0;
|
||||
error = clGetMemObjectInfo(nodes, CL_MEM_USES_SVM_POINTER, 0, 0, ¶mSize);
|
||||
test_error(error, "clGetMemObjectInfo failed.");
|
||||
|
||||
if (paramSize != sizeof(cl_bool))
|
||||
{
|
||||
log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned wrong size.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetMemObjectInfo(nodes, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0);
|
||||
test_error(error, "clGetMemObjectInfo failed.");
|
||||
|
||||
if (usesSVMpointer != CL_TRUE)
|
||||
{
|
||||
log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned CL_FALSE for buffer created from SVM pointer.");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// this buffer holds an index into the nodes buffer, it is used for node allocation
|
||||
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error = clGetMemObjectInfo(allocator, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0);
|
||||
test_error(error, "clGetMemObjectInfo failed.");
|
||||
|
||||
if (usesSVMpointer != CL_FALSE)
|
||||
{
|
||||
log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned CL_TRUE for non-SVM buffer.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// this buffer holds the count of correct nodes, which is computed by the verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
if (useNewAPI == CL_TRUE)
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
|
||||
else
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
|
||||
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
error = create_linked_lists_on_host(queues[0], nodes, pNodes, ListLength, numLists, useNewAPI);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists_on_host(vi, queues[0], nodes, pNodes, ListLength, numLists, useNewAPI);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clSVMFree(context, pNodes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_FALSE);
|
||||
}
|
||||
|
||||
int test_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_TRUE);
|
||||
}
|
||||
101
test_conformance/SVM/test_shared_address_space_fine_grain.cpp
Normal file
101
test_conformance/SVM/test_shared_address_space_fine_grain.cpp
Normal file
@@ -0,0 +1,101 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
|
||||
// This tests that all devices and the host share a common address space using fine-grain mode with no buffers.
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
|
||||
// the platform. The test passes only if every combination passes.
|
||||
int test_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM);
|
||||
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(error < 0) return -1; // fail test.
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this allocation holds the linked list nodes.
|
||||
// FIXME: remove the alignment once prototype can handle it
|
||||
Node* pNodes = (Node*) align_malloc(numLists*ListLength*sizeof(Node),128);
|
||||
test_error2(error, pNodes, "malloc failed");
|
||||
|
||||
// this allocation holds an index into the nodes buffer, it is used for node allocation
|
||||
size_t* pAllocator = (size_t*) align_malloc(sizeof(cl_int), 128);
|
||||
test_error2(error, pAllocator, "malloc failed");
|
||||
|
||||
// this allocation holds the count of correct nodes, which is computed by the verify kernel.
|
||||
cl_int* pNum_correct = (cl_int*) align_malloc(sizeof(cl_int), 128);
|
||||
test_error2(error, pNum_correct, "malloc failed");
|
||||
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),(void *) &ListLength);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNum_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
log_info("creating linked list on host ");
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNum_correct, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
align_free(pNodes);
|
||||
align_free(pAllocator);
|
||||
align_free(pNum_correct);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,138 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
|
||||
|
||||
|
||||
cl_int create_linked_lists_on_device_no_map(int ci, cl_command_queue cmdq, size_t* pAllocator, cl_kernel kernel_create_lists, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
log_info("SVM: creating linked list on device: %d ", ci);
|
||||
|
||||
// reset allocator index
|
||||
*pAllocator = numLists; // the first numLists elements of the nodes array are already allocated (they hold the head of each list).
|
||||
error = clEnqueueNDRangeKernel(cmdq, kernel_create_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRange failed.");
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed.");
|
||||
return error;
|
||||
}
|
||||
|
||||
cl_int verify_linked_lists_on_device_no_map(int vi, cl_command_queue cmdq,cl_int* pNumCorrect, cl_kernel kernel_verify_lists, cl_int ListLength, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
log_info(" and verifying on device: %d ", vi);
|
||||
|
||||
*pNumCorrect = 0; // reset numCorrect to zero
|
||||
|
||||
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
|
||||
test_error(error,"clEnqueueNDRangeKernel failed");
|
||||
clFinish(cmdq);
|
||||
test_error(error,"clFinish failed");
|
||||
|
||||
cl_int correct_count = *pNumCorrect;
|
||||
if(correct_count != ListLength * (cl_uint)numLists)
|
||||
{
|
||||
error = -1;
|
||||
log_info("Failed\n");
|
||||
}
|
||||
else
|
||||
log_info("Passed\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
// This tests that all devices and the host share a common address space; using only the fine-grain with buffers mode.
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
|
||||
// the platform. The test passes only if every combination passes.
|
||||
int test_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
|
||||
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(error < 0) return -1; // fail test.
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this buffer holds the linked list nodes.
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(Node)*ListLength*numLists, 0);
|
||||
|
||||
// this buffer holds an index into the nodes buffer, it is used for node allocation
|
||||
size_t *pAllocator = (size_t*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(size_t), 0);
|
||||
|
||||
// this buffer holds the count of correct nodes, which is computed by the verify kernel.
|
||||
cl_int *pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_int), 0);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNumCorrect);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
log_info("SVM: creating linked list on host ");
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNumCorrect, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clSVMFree(context, pNodes);
|
||||
clSVMFree(context, pAllocator);
|
||||
clSVMFree(context, pNumCorrect);
|
||||
|
||||
return 0;
|
||||
}
|
||||
241
test_conformance/SVM/test_shared_sub_buffers.cpp
Normal file
241
test_conformance/SVM/test_shared_sub_buffers.cpp
Normal file
@@ -0,0 +1,241 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
const char *shared_sub_buffers_test_kernel[] = {
|
||||
"typedef struct Node {\n"
|
||||
" int global_id;\n"
|
||||
" int position_in_list;\n"
|
||||
" __global struct Node* pNext;\n"
|
||||
"} Node;\n"
|
||||
|
||||
// create linked lists that use nodes from 2 different buffers
|
||||
"__global Node* allocate_node(__global Node* pNodes1, __global Node* pNodes2, volatile __global int* allocation_index, size_t i)\n"
|
||||
"{\n"
|
||||
// mix things up, adjacent work items will allocate from different buffers
|
||||
" if(i & 0x1)\n"
|
||||
" return &pNodes1[atomic_inc(allocation_index)];\n"
|
||||
" else\n"
|
||||
" return &pNodes2[atomic_inc(allocation_index)];\n"
|
||||
"}\n"
|
||||
|
||||
// The allocation_index parameter must be initialized on the host to N work-items
|
||||
// The first N nodes in pNodes will be the heads of the lists.
|
||||
// This tests passing 4 different sub-buffers that come from two parent buffers.
|
||||
// Note that we have arguments that appear to be unused, but they are required so that system knows to get all the sub-buffers on to the device
|
||||
"__kernel void create_linked_lists(__global Node* pNodes_sub1, __global Node* pNodes2_sub1, __global Node* pNodes_sub2, __global Node* pNodes2_sub2, volatile __global int* allocation_index, int list_length) \n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" __global Node *pNode = &pNodes_sub1[i];\n"
|
||||
" pNode->global_id = i;\n"
|
||||
" pNode->position_in_list = 0;\n"
|
||||
" __global Node *pNew;\n"
|
||||
" for(int j=1; j < list_length; j++) {\n"
|
||||
" pNew = allocate_node(pNodes_sub1, pNodes2_sub1, allocation_index, i);\n"
|
||||
" pNew->global_id = i;\n"
|
||||
" pNew->position_in_list = j;\n"
|
||||
" pNode->pNext = pNew; // link new node onto end of list\n"
|
||||
" pNode = pNew; // move to end of list\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
// Note that we have arguments that appear to be unused, but they are required so that system knows to get all the sub-buffers on to the device
|
||||
"__kernel void verify_linked_lists(__global Node* pNodes_sub1, __global Node* pNodes2_sub1, __global Node* pNodes_sub2, __global Node* pNodes2_sub2, volatile __global uint* num_correct, int list_length)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" __global Node *pNode = &pNodes_sub1[i];\n"
|
||||
" for(int j=0; j < list_length; j++) {\n"
|
||||
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
|
||||
" atomic_inc(num_correct);\n"
|
||||
" else \n"
|
||||
" break;\n"
|
||||
" pNode = pNode->pNext;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
// Creates linked list using host code.
|
||||
cl_int create_linked_lists_on_host_sb(cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
log_info("SVM: creating linked list on host ");
|
||||
|
||||
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
|
||||
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes2, "clEnqueueMapBuffer failed");
|
||||
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
// Verify correctness of the linked list using host code.
|
||||
cl_int verify_linked_lists_on_host_sb(int ci, cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
//log_info(" and verifying on host ");
|
||||
|
||||
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if(error) return -1;
|
||||
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// This tests that shared sub-buffers can be created and that they inherit the flags from the parent buffer when no flags are specified.
|
||||
// This tests that passing only the sub-buffers to a kernel works.
|
||||
// The test is derived from the cross-buffer pointers test which
|
||||
// tests that shared buffers are able to contain pointers that point to other shared buffers.
|
||||
// This tests that all devices and the host share a common address space; using only the coarse-grain features.
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host.
|
||||
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
|
||||
// This basic test is performed for all combinations of devices and the host.
|
||||
int test_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
|
||||
size_t numLists = num_elements;
|
||||
if(numLists & 0x1) numLists++; // force even size, so we can easily create two sub-buffers of same size.
|
||||
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
size_t nodes_bufsize = sizeof(Node)*ListLength*numLists;
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
|
||||
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
|
||||
|
||||
{
|
||||
// this buffer holds some of the linked list nodes.
|
||||
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, nodes_bufsize, pNodes, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
cl_buffer_region r;
|
||||
r.origin = 0;
|
||||
r.size = nodes_bufsize / 2;
|
||||
// this should inherit the flag settings from nodes buffer
|
||||
clMemWrapper nodes_sb1 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
r.origin = nodes_bufsize / 2;
|
||||
clMemWrapper nodes_sb2 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
r.origin = 0;
|
||||
r.size = nodes_bufsize / 2;
|
||||
// this should inherit the flag settings from nodes buffer
|
||||
clMemWrapper nodes2_sb1 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
r.origin = nodes_bufsize / 2;
|
||||
clMemWrapper nodes2_sb2 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION,(void*)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
|
||||
|
||||
|
||||
// this buffer holds the index into the nodes buffer that is used for node allocation
|
||||
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the count of correct nodes which is computed by the verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes_sb1);
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &nodes_sb2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void*), (void *) &allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),(void *) &ListLength);
|
||||
|
||||
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes_sb1);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &nodes_sb2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void*), (void *) &num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),(void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
error = create_linked_lists_on_host_sb(queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists_on_host_sb(vi, queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
} // inner loop, vi
|
||||
} // outer loop, ci
|
||||
}
|
||||
clSVMFree(context, pNodes2);
|
||||
clSVMFree(context, pNodes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
18
test_conformance/allocations/CMakeLists.txt
Normal file
18
test_conformance/allocations/CMakeLists.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
set(MODULE_NAME ALLOCATIONS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
allocation_execute.cpp
|
||||
allocation_fill.cpp
|
||||
allocation_functions.cpp
|
||||
allocation_utils.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/msvc9.c
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
19
test_conformance/allocations/Jamfile
Normal file
19
test_conformance/allocations/Jamfile
Normal file
@@ -0,0 +1,19 @@
|
||||
project
|
||||
: requirements
|
||||
# <toolset>gcc:<cflags>-xc++
|
||||
# <toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_allocations
|
||||
: allocation_execute.cpp
|
||||
allocation_fill.cpp
|
||||
allocation_functions.cpp
|
||||
allocation_utils.cpp
|
||||
main.cpp
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_allocations
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/allocations
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/allocations
|
||||
;
|
||||
46
test_conformance/allocations/Makefile
Normal file
46
test_conformance/allocations/Makefile
Normal file
@@ -0,0 +1,46 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.cpp \
|
||||
allocation_functions.cpp \
|
||||
allocation_fill.cpp \
|
||||
allocation_utils.cpp \
|
||||
allocation_execute.cpp \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/kernelHelpers.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
|
||||
DEFINES = DONT_TEST_GARBAGE_POINTERS
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
FRAMEWORK = $(SOURCES)
|
||||
HEADERS =
|
||||
TARGET = test_allocations
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
373
test_conformance/allocations/allocation_execute.cpp
Normal file
373
test_conformance/allocations/allocation_execute.cpp
Normal file
@@ -0,0 +1,373 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "allocation_execute.h"
|
||||
#include "allocation_functions.h"
|
||||
|
||||
|
||||
const char *buffer_kernel_pattern = {
|
||||
"__kernel void sample_test(%s __global uint *result, __global %s *array_sizes, uint per_item)\n"
|
||||
"{\n"
|
||||
"\tint tid = get_global_id(0);\n"
|
||||
"\tuint r = 0;\n"
|
||||
"\t%s i;\n"
|
||||
"\tfor(i=(%s)tid*(%s)per_item; i<(%s)(1+tid)*(%s)per_item; i++) {\n"
|
||||
"%s"
|
||||
"\t}\n"
|
||||
"\tresult[tid] = r;\n"
|
||||
"}\n" };
|
||||
|
||||
const char *image_kernel_pattern = {
|
||||
"__kernel void sample_test(%s __global uint *result)\n"
|
||||
"{\n"
|
||||
"\tuint4 color;\n"
|
||||
"\tcolor = (uint4)(0);\n"
|
||||
"%s"
|
||||
"\tint x, y;\n"
|
||||
"%s"
|
||||
"\tresult[get_global_id(0)] += color.x + color.y + color.z + color.w;\n"
|
||||
"}\n" };
|
||||
|
||||
const char *read_pattern = {
|
||||
"\tfor(y=0; y<get_image_height(image%d); y++)\n"
|
||||
"\t\tif (y %s get_global_size(0) == get_global_id(0))\n"
|
||||
"\t\t\tfor (x=0; x<get_image_width(image%d); x++) {\n"
|
||||
"\t\t\t\tcolor += read_imageui(image%d, sampler, (int2)(x,y));\n"
|
||||
"\t\t\t}\n"
|
||||
};
|
||||
|
||||
const char *offset_pattern =
|
||||
"\tconst uint4 offset = (uint4)(0,1,2,3);\n";
|
||||
|
||||
const char *sampler_pattern =
|
||||
"\tconst sampler_t sampler = CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n";
|
||||
|
||||
|
||||
const char *write_pattern = {
|
||||
"\tfor(y=0; y<get_image_height(image%d); y++)\n"
|
||||
"\t\tif (y %s get_global_size(0) == get_global_id(0))\n"
|
||||
"\t\t\tfor (x=0; x<get_image_width(image%d); x++) {\n"
|
||||
"\t\t\t\tcolor = (uint4)x*(uint4)y+offset;\n"
|
||||
"\t\t\t\twrite_imageui(image%d, (int2)(x,y), color);\n"
|
||||
"\t\t\t}\n"
|
||||
"\tbarrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
};
|
||||
|
||||
|
||||
int check_image(cl_command_queue queue, cl_mem mem) {
|
||||
int error;
|
||||
cl_mem_object_type type;
|
||||
size_t width, height;
|
||||
size_t origin[3], region[3], x, j;
|
||||
cl_uint *data;
|
||||
|
||||
error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (type == CL_MEM_OBJECT_BUFFER) {
|
||||
log_error("Expected image object, not buffer.\n");
|
||||
return -1;
|
||||
} else if (type == CL_MEM_OBJECT_IMAGE2D) {
|
||||
error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
|
||||
return -1;
|
||||
}
|
||||
error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
data = (cl_uint*)malloc(width*4*sizeof(cl_uint));
|
||||
if (data == NULL) {
|
||||
log_error("Failed to malloc host buffer for writing into image.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
origin[0] = 0;
|
||||
origin[1] = 0;
|
||||
origin[2] = 0;
|
||||
region[0] = width;
|
||||
region[1] = 1;
|
||||
region[2] = 1;
|
||||
for (origin[1] = 0; origin[1] < height; origin[1]++) {
|
||||
error = clEnqueueReadImage(queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clEnqueueReadImage failed");
|
||||
free(data);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (x=0; x<width; x++) {
|
||||
for (j=0; j<4; j++) {
|
||||
if (data[x*4+j] != (cl_uint)(x*origin[1]+j)) {
|
||||
log_error("Pixel %d, %d, component %d, expected %u, got %u.\n",
|
||||
(int)x, (int)origin[1], (int)j, (cl_uint)(x*origin[1]+j), data[x*4+j]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#define NUM_OF_WORK_ITEMS 8192*2
|
||||
|
||||
int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum) {
|
||||
|
||||
char *argument_string;
|
||||
char *access_string;
|
||||
char *kernel_string;
|
||||
int i, error, result;
|
||||
clKernelWrapper kernel;
|
||||
clProgramWrapper program;
|
||||
clMemWrapper result_mem;
|
||||
char *ptr;
|
||||
size_t global_dims[3];
|
||||
cl_uint per_item;
|
||||
cl_uint per_item_uint;
|
||||
cl_uint returned_results[NUM_OF_WORK_ITEMS], final_result;
|
||||
clEventWrapper event;
|
||||
cl_int event_status;
|
||||
|
||||
// Allocate memory for the kernel source
|
||||
argument_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*64);
|
||||
access_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10));
|
||||
kernel_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10+64)+1024);
|
||||
argument_string[0] = '\0';
|
||||
access_string[0] = '\0';
|
||||
kernel_string[0] = '\0';
|
||||
|
||||
// Zero the results.
|
||||
for (i=0; i<NUM_OF_WORK_ITEMS; i++)
|
||||
returned_results[i] = 0;
|
||||
|
||||
// detect if device supports ulong/int64
|
||||
//detect whether profile of the device is embedded
|
||||
bool support64 = true;
|
||||
char profile[1024] = "";
|
||||
error = clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
|
||||
if ((NULL != strstr(profile, "EMBEDDED_PROFILE")) &&
|
||||
(!is_extension_available(device_id, "cles_khr_int64"))) {
|
||||
support64 = false;
|
||||
}
|
||||
|
||||
// Build the kernel source
|
||||
if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
|
||||
for(i=0; i<number_of_mems_used; i++) {
|
||||
sprintf(argument_string + strlen(argument_string), " __global uint *buffer%d, ", i);
|
||||
sprintf(access_string + strlen( access_string), "\t\tif (i<array_sizes[%d]) r += buffer%d[i];\n", i, i);
|
||||
}
|
||||
char type[10];
|
||||
if (support64) {
|
||||
sprintf(type, "ulong");
|
||||
}
|
||||
else {
|
||||
sprintf(type, "uint");
|
||||
}
|
||||
sprintf(kernel_string, buffer_kernel_pattern, argument_string, type, type, type, type, type, type, access_string);
|
||||
}
|
||||
else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING) {
|
||||
for(i=0; i<number_of_mems_used; i++) {
|
||||
sprintf(argument_string + strlen(argument_string), " read_only image2d_t image%d, ", i);
|
||||
sprintf(access_string + strlen(access_string), read_pattern, i, "%", i, i);
|
||||
}
|
||||
sprintf(kernel_string, image_kernel_pattern, argument_string, sampler_pattern, access_string);
|
||||
}
|
||||
else if (test == IMAGE_WRITE || test == IMAGE_WRITE_NON_BLOCKING) {
|
||||
for(i=0; i<number_of_mems_used; i++) {
|
||||
sprintf(argument_string + strlen(argument_string), " write_only image2d_t image%d, ", i);
|
||||
sprintf(access_string + strlen( access_string), write_pattern, i, "%", i, i);
|
||||
}
|
||||
sprintf(kernel_string, image_kernel_pattern, argument_string, offset_pattern, access_string);
|
||||
}
|
||||
ptr = kernel_string;
|
||||
|
||||
// Create the kernel
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" );
|
||||
|
||||
free(argument_string);
|
||||
free(access_string);
|
||||
free(kernel_string);
|
||||
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
if (result != SUCCEEDED) {
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tCreate kernel failed: %s.\n", IGetErrorString(error));
|
||||
else
|
||||
print_error(error, "Create kernel and program failed");
|
||||
return result;
|
||||
}
|
||||
|
||||
// Set the arguments
|
||||
for (i=0; i<number_of_mems_used; i++) {
|
||||
error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mems[i]);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
}
|
||||
|
||||
// Set the result
|
||||
result_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*NUM_OF_WORK_ITEMS, &returned_results, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
error = clSetKernelArg(kernel, i, sizeof(result_mem), &result_mem);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Thread dimensions for execution
|
||||
global_dims[0] = NUM_OF_WORK_ITEMS; global_dims[1] = 1; global_dims[2] = 1;
|
||||
|
||||
// We have extra arguments for the buffer kernel because we need to pass in the buffer sizes
|
||||
cl_ulong *ulSizes = NULL;
|
||||
cl_uint *uiSizes = NULL;
|
||||
if (support64) {
|
||||
ulSizes = (cl_ulong*)malloc(sizeof(cl_ulong)*number_of_mems_used);
|
||||
}
|
||||
else {
|
||||
uiSizes = (cl_uint*)malloc(sizeof(cl_uint)*number_of_mems_used);
|
||||
}
|
||||
cl_ulong max_size = 0;
|
||||
clMemWrapper buffer_sizes;
|
||||
if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
|
||||
for (i=0; i<number_of_mems_used; i++) {
|
||||
size_t size;
|
||||
error = clGetMemObjectInfo(mems[i], CL_MEM_SIZE, sizeof(size), &size, NULL);
|
||||
test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
|
||||
if (support64) {
|
||||
ulSizes[i] = size/sizeof(cl_uint);
|
||||
}
|
||||
else {
|
||||
uiSizes[i] = (cl_uint)size/sizeof(cl_uint);
|
||||
}
|
||||
if (size/sizeof(cl_uint) > max_size)
|
||||
max_size = size/sizeof(cl_uint);
|
||||
}
|
||||
if (support64) {
|
||||
buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_ulong)*number_of_mems_used, ulSizes, &error);
|
||||
}
|
||||
else {
|
||||
buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*number_of_mems_used, uiSizes, &error);
|
||||
}
|
||||
test_error_abort(error, "clCreateBuffer failed");
|
||||
error = clSetKernelArg(kernel, number_of_mems_used+1, sizeof(cl_mem), &buffer_sizes);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
per_item = (cl_uint)ceil((double)max_size/global_dims[0]);
|
||||
if (per_item > CL_UINT_MAX)
|
||||
log_error("Size is too large for a uint parameter to the kernel. Expect invalid results.\n");
|
||||
per_item_uint = (cl_uint)per_item;
|
||||
error = clSetKernelArg(kernel, number_of_mems_used+2, sizeof(per_item_uint), &per_item_uint);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
}
|
||||
if (ulSizes) {
|
||||
free(ulSizes);
|
||||
}
|
||||
if (uiSizes) {
|
||||
free(uiSizes);
|
||||
}
|
||||
|
||||
size_t local_dims[3] = {1,1,1};
|
||||
error = get_max_common_work_group_size(context, kernel, global_dims[0], &local_dims[0]);
|
||||
test_error(error, "get_max_common_work_group_size failed");
|
||||
|
||||
// Execute the kernel
|
||||
error = clEnqueueNDRangeKernel(*queue, kernel, 1, NULL, global_dims, local_dims, 0, NULL, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
if (result != SUCCEEDED) {
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tExecute kernel failed: %s (global dim: %ld, local dim: %ld)\n", IGetErrorString(error), global_dims[0], local_dims[0]);
|
||||
else
|
||||
print_error(error, "clEnqueueNDRangeKernel failed");
|
||||
return result;
|
||||
}
|
||||
|
||||
// Finish the test
|
||||
error = clFinish(*queue);
|
||||
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
|
||||
else
|
||||
print_error(error, "clFinish failed");
|
||||
return result;
|
||||
}
|
||||
|
||||
// Verify that the event from the execution did not have an error
|
||||
error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
|
||||
test_error_abort(error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
|
||||
if (event_status < 0) {
|
||||
result = check_allocation_error(context, device_id, event_status, queue);
|
||||
if (result != SUCCEEDED) {
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tEvent returned from kernel execution indicates failure: %s.\n", IGetErrorString(event_status));
|
||||
else
|
||||
print_error(event_status, "clEnqueueNDRangeKernel failed");
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// If we are not verifying the checksum return here
|
||||
if (!verify_checksum) {
|
||||
log_info("Note: Allocations were not initialized so kernel execution can not verify correct results.\n");
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
// Verify the checksum.
|
||||
// Read back the result
|
||||
error = clEnqueueReadBuffer(*queue, result_mem, CL_TRUE, 0, sizeof(cl_uint)*NUM_OF_WORK_ITEMS, &returned_results, 0, NULL, NULL);
|
||||
test_error_abort(error, "clEnqueueReadBuffer failed");
|
||||
final_result = 0;
|
||||
if (test == BUFFER || test == IMAGE_READ || test == BUFFER_NON_BLOCKING || test == IMAGE_READ_NON_BLOCKING) {
|
||||
// For buffers or read images we are just looking at the sum of what each thread summed up
|
||||
for (i=0; i<NUM_OF_WORK_ITEMS; i++) {
|
||||
final_result += returned_results[i];
|
||||
}
|
||||
if (final_result != checksum) {
|
||||
log_error("\t\tChecksum failed to verify. Expected %u got %u.\n", checksum, final_result);
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
log_info("\t\tChecksum verified (%u == %u).\n", checksum, final_result);
|
||||
} else {
|
||||
// For write images we need to verify the values
|
||||
for (i=0; i<number_of_mems_used; i++) {
|
||||
if (check_image(*queue, mems[i])) {
|
||||
log_error("\t\tImage contents failed to verify for image %d.\n", (int)i);
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
}
|
||||
log_info("\t\tImage contents verified.\n");
|
||||
}
|
||||
|
||||
// Finish the test
|
||||
error = clFinish(*queue);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
if (result != SUCCEEDED) {
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
|
||||
else
|
||||
print_error(error, "clFinish failed");
|
||||
return result;
|
||||
}
|
||||
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
|
||||
22
test_conformance/allocations/allocation_execute.h
Normal file
22
test_conformance/allocations/allocation_execute.h
Normal file
@@ -0,0 +1,22 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "allocation_utils.h"
|
||||
|
||||
|
||||
int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum);
|
||||
|
||||
|
||||
329
test_conformance/allocations/allocation_fill.cpp
Normal file
329
test_conformance/allocations/allocation_fill.cpp
Normal file
@@ -0,0 +1,329 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "allocation_fill.h"
|
||||
|
||||
#define BUFFER_CHUNK_SIZE 8*1024*1024
|
||||
#define IMAGE_LINES 8
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
int fill_buffer_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, size_t size, MTdata d, cl_bool blocking_write) {
|
||||
size_t i, j;
|
||||
cl_uint *data;
|
||||
int error, result;
|
||||
cl_uint checksum_delta = 0;
|
||||
cl_event event;
|
||||
|
||||
size_t size_to_use = BUFFER_CHUNK_SIZE;
|
||||
if (size_to_use > size)
|
||||
size_to_use = size;
|
||||
|
||||
data = (cl_uint*)malloc(size_to_use);
|
||||
if (data == NULL) {
|
||||
log_error("Failed to malloc host buffer for writing into buffer.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
for (i=0; i<size-size_to_use; i+=size_to_use) {
|
||||
// Put values in the data, and keep a checksum as we go along.
|
||||
for (j=0; j<size_to_use/sizeof(cl_uint); j++) {
|
||||
data[j] = genrand_int32(d);
|
||||
checksum_delta += data[j];
|
||||
}
|
||||
if (blocking_write) {
|
||||
error = clEnqueueWriteBuffer(*queue, mem, CL_TRUE, i, size_to_use, data, 0, NULL, NULL);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteBuffer failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clFinish(*queue);
|
||||
free(data);
|
||||
clReleaseMemObject(mem);
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
error = clEnqueueWriteBuffer(*queue, mem, CL_FALSE, i, size_to_use, data, 0, NULL, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteBuffer failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clFinish(*queue);
|
||||
free(data);
|
||||
clReleaseMemObject(mem);
|
||||
return result;
|
||||
}
|
||||
|
||||
error = clWaitForEvents(1, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue, &event);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clWaitForEvents failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clFinish(*queue);
|
||||
clReleaseEvent(event);
|
||||
free(data);
|
||||
clReleaseMemObject(mem);
|
||||
return result;
|
||||
}
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
}
|
||||
|
||||
// Deal with any leftover bits
|
||||
if (i < size) {
|
||||
// Put values in the data, and keep a checksum as we go along.
|
||||
for (j=0; j<(size-i)/sizeof(cl_uint); j++) {
|
||||
data[j] = (cl_uint)genrand_int32(d);
|
||||
checksum_delta += data[j];
|
||||
}
|
||||
|
||||
if (blocking_write) {
|
||||
error = clEnqueueWriteBuffer(*queue, mem, CL_TRUE, i, size-i, data, 0, NULL, NULL);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteBuffer failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clFinish(*queue);
|
||||
clReleaseMemObject(mem);
|
||||
free(data);
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
error = clEnqueueWriteBuffer(*queue, mem, CL_FALSE, i, size-i, data, 0, NULL, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteBuffer failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clFinish(*queue);
|
||||
clReleaseMemObject(mem);
|
||||
free(data);
|
||||
return result;
|
||||
}
|
||||
|
||||
error = clWaitForEvents(1, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue, &event);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clWaitForEvents failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clFinish(*queue);
|
||||
clReleaseEvent(event);
|
||||
free(data);
|
||||
clReleaseMemObject(mem);
|
||||
return result;
|
||||
}
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
}
|
||||
|
||||
free(data);
|
||||
// Only update the checksum if this succeeded.
|
||||
checksum += checksum_delta;
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
|
||||
int fill_image_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, size_t width, size_t height, MTdata d, cl_bool blocking_write) {
|
||||
size_t origin[3], region[3], j;
|
||||
int error, result;
|
||||
cl_uint *data;
|
||||
cl_uint checksum_delta = 0;
|
||||
cl_event event;
|
||||
|
||||
size_t image_lines_to_use;
|
||||
image_lines_to_use = IMAGE_LINES;
|
||||
if (image_lines_to_use > height)
|
||||
image_lines_to_use = height;
|
||||
|
||||
data = (cl_uint*)malloc(width*4*sizeof(cl_uint)*image_lines_to_use);
|
||||
if (data == NULL) {
|
||||
log_error("Failed to malloc host buffer for writing into image.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
origin[0] = 0;
|
||||
origin[1] = 0;
|
||||
origin[2] = 0;
|
||||
region[0] = width;
|
||||
region[1] = image_lines_to_use;
|
||||
region[2] = 1;
|
||||
for (origin[1] = 0; origin[1] < height - image_lines_to_use; origin[1] += image_lines_to_use) {
|
||||
// Put values in the data, and keep a checksum as we go along.
|
||||
for (j=0; j<width*4*image_lines_to_use; j++) {
|
||||
data[j] = (cl_uint)genrand_int32(d);
|
||||
checksum_delta += data[j];
|
||||
}
|
||||
|
||||
if (blocking_write) {
|
||||
error = clEnqueueWriteImage(*queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteImage failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clFinish(*queue);
|
||||
clReleaseMemObject(mem);
|
||||
free(data);
|
||||
return result;
|
||||
}
|
||||
result = clFinish(*queue);
|
||||
if (result != SUCCEEDED)
|
||||
{
|
||||
print_error(error, "clFinish failed after successful enquing filling buffer with data.");
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteImage failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clFinish(*queue);
|
||||
clReleaseMemObject(mem);
|
||||
free(data);
|
||||
return result;
|
||||
}
|
||||
|
||||
error = clWaitForEvents(1, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue, &event);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clWaitForEvents failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clReleaseEvent(event);
|
||||
free(data);
|
||||
clReleaseMemObject(mem);
|
||||
return result;
|
||||
}
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
}
|
||||
|
||||
// Deal with any leftover bits
|
||||
if (origin[1] < height) {
|
||||
// Put values in the data, and keep a checksum as we go along.
|
||||
for (j=0; j<width*4*(height-origin[1]); j++) {
|
||||
data[j] = (cl_uint)genrand_int32(d);
|
||||
checksum_delta += data[j];
|
||||
}
|
||||
|
||||
region[1] = height-origin[1];
|
||||
if(blocking_write) {
|
||||
error = clEnqueueWriteImage(*queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteImage failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clFinish(*queue);
|
||||
clReleaseMemObject(mem);
|
||||
free(data);
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteImage failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clFinish(*queue);
|
||||
clReleaseMemObject(mem);
|
||||
free(data);
|
||||
return result;
|
||||
}
|
||||
|
||||
error = clWaitForEvents(1, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue, &event);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clWaitForEvents failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clFinish(*queue);
|
||||
clReleaseEvent(event);
|
||||
free(data);
|
||||
clReleaseMemObject(mem);
|
||||
return result;
|
||||
}
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
}
|
||||
|
||||
free(data);
|
||||
// Only update the checksum if this succeeded.
|
||||
checksum += checksum_delta;
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int fill_mem_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, MTdata d, cl_bool blocking_write) {
|
||||
int error;
|
||||
cl_mem_object_type type;
|
||||
size_t size, width, height;
|
||||
|
||||
error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
|
||||
test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
|
||||
|
||||
if (type == CL_MEM_OBJECT_BUFFER) {
|
||||
error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
|
||||
test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
|
||||
return fill_buffer_with_data(context, device_id, queue, mem, size, d, blocking_write);
|
||||
} else if (type == CL_MEM_OBJECT_IMAGE2D) {
|
||||
error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
|
||||
test_error_abort(error, "clGetImageInfo failed for CL_IMAGE_WIDTH.");
|
||||
error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
|
||||
test_error_abort(error, "clGetImageInfo failed for CL_IMAGE_HEIGHT.");
|
||||
return fill_image_with_data(context, device_id, queue, mem, width, height, d, blocking_write);
|
||||
}
|
||||
|
||||
log_error("Invalid CL_MEM_TYPE: %d\n", type);
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
|
||||
|
||||
19
test_conformance/allocations/allocation_fill.h
Normal file
19
test_conformance/allocations/allocation_fill.h
Normal file
@@ -0,0 +1,19 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "allocation_utils.h"
|
||||
|
||||
int fill_mem_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, MTdata d, cl_bool blocking_write);
|
||||
287
test_conformance/allocations/allocation_functions.cpp
Normal file
287
test_conformance/allocations/allocation_functions.cpp
Normal file
@@ -0,0 +1,287 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "allocation_functions.h"
|
||||
#include "allocation_fill.h"
|
||||
|
||||
|
||||
static cl_image_format image_format = { CL_RGBA, CL_UNSIGNED_INT32 };
|
||||
|
||||
int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
|
||||
int error;
|
||||
// log_info("\t\tAttempting to allocate a %gMB array and fill with %s writes.\n", (size_to_allocate/(1024.0*1024.0)), (blocking_write ? "blocking" : "non-blocking"));
|
||||
*mem = clCreateBuffer(context, CL_MEM_READ_WRITE, size_to_allocate, NULL, &error);
|
||||
return check_allocation_error(context, device_id, error, queue);
|
||||
}
|
||||
|
||||
|
||||
int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t *width, size_t *height, size_t* max_size) {
|
||||
size_t max_width, max_height, num_pixels, found_width, found_height;
|
||||
int error;
|
||||
|
||||
if (checkForImageSupport(device_id)) {
|
||||
log_info("Can not allocate an image on this device because it does not support images.");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
if (size_to_allocate == 0) {
|
||||
log_error("Trying to allcoate a zero sized image.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL );
|
||||
test_error_abort(error, "clGetDeviceInfo failed.");
|
||||
error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( max_height ), &max_height, NULL );
|
||||
test_error_abort(error, "clGetDeviceInfo failed.");
|
||||
|
||||
num_pixels = size_to_allocate / (sizeof(cl_uint)*4);
|
||||
|
||||
if (num_pixels > (max_width*max_height)) {
|
||||
if(NULL != max_size) {
|
||||
*max_size = max_width * max_height * sizeof(cl_uint) * 4;
|
||||
}
|
||||
return FAILED_TOO_BIG;
|
||||
}
|
||||
|
||||
// We want a close-to-square aspect ratio.
|
||||
// Note that this implicitly assumes that max width >= max height
|
||||
found_width = (int)sqrt( (double) num_pixels );
|
||||
if( found_width > max_width ) {
|
||||
found_width = max_width;
|
||||
}
|
||||
if (found_width == 0)
|
||||
found_width = 1;
|
||||
|
||||
found_height = (size_t)num_pixels/found_width;
|
||||
if (found_height > max_height) {
|
||||
found_height = max_height;
|
||||
}
|
||||
if (found_height == 0)
|
||||
found_height = 1;
|
||||
|
||||
*width = found_width;
|
||||
*height = found_height;
|
||||
|
||||
if(NULL != max_size) {
|
||||
*max_size = found_width * found_height * sizeof(cl_uint) * 4;
|
||||
}
|
||||
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
|
||||
int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
|
||||
size_t width, height;
|
||||
int error;
|
||||
|
||||
error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
|
||||
if (error != SUCCEEDED)
|
||||
return error;
|
||||
|
||||
log_info("\t\tAttempting to allocate a %gMB read-only image (%d x %d) and fill with %s writes.\n",
|
||||
(size_to_allocate/(1024.0*1024.0)), (int)width, (int)height, (blocking_write ? "blocking" : "non-blocking"));
|
||||
*mem = create_image_2d(context, CL_MEM_READ_ONLY, &image_format, width, height, 0, NULL, &error);
|
||||
|
||||
return check_allocation_error(context, device_id, error, queue);
|
||||
}
|
||||
|
||||
|
||||
int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
|
||||
size_t width, height;
|
||||
int error;
|
||||
|
||||
error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
|
||||
if (error != SUCCEEDED)
|
||||
return error;
|
||||
|
||||
//log_info("\t\tAttempting to allocate a %gMB write-only image (%d x %d) and fill with %s writes.\n",
|
||||
//(size_to_allocate/(1024.0*1024.0)), (int)width, (int)height, (blocking_write ? "blocking" : "non-blocking"));
|
||||
*mem = create_image_2d(context, CL_MEM_WRITE_ONLY, &image_format, width, height, 0, NULL, &error);
|
||||
|
||||
return check_allocation_error(context, device_id, error, queue);
|
||||
}
|
||||
|
||||
int do_allocation(cl_context context, cl_command_queue *queue, cl_device_id device_id, size_t size_to_allocate, int type, cl_mem *mem) {
|
||||
if (type == BUFFER) return allocate_buffer(context, queue, device_id, mem, size_to_allocate, true);
|
||||
if (type == IMAGE_READ) return allocate_image2d_read(context, queue, device_id, mem, size_to_allocate, true);
|
||||
if (type == IMAGE_WRITE) return allocate_image2d_write(context, queue, device_id, mem, size_to_allocate, true);
|
||||
if (type == BUFFER_NON_BLOCKING) return allocate_buffer(context, queue, device_id, mem, size_to_allocate, false);
|
||||
if (type == IMAGE_READ_NON_BLOCKING) return allocate_image2d_read(context, queue, device_id, mem, size_to_allocate, false);
|
||||
if (type == IMAGE_WRITE_NON_BLOCKING) return allocate_image2d_write(context, queue, device_id, mem, size_to_allocate, false);
|
||||
log_error("Invalid allocation type: %d\n", type);
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
|
||||
int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id device_id, int multiple_allocations, size_t size_to_allocate,
|
||||
int type, cl_mem mems[], int *number_of_mems, size_t *final_size, int force_fill, MTdata d) {
|
||||
|
||||
cl_ulong max_individual_allocation_size, global_mem_size;
|
||||
int error, result;
|
||||
size_t amount_allocated;
|
||||
size_t reduction_amount;
|
||||
int current_allocation;
|
||||
size_t allocation_this_time, actual_allocation;
|
||||
|
||||
// Set the number of mems used to 0 so if we fail to create even a single one we don't end up returning a garbage value
|
||||
*number_of_mems = 0;
|
||||
|
||||
error = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_individual_allocation_size), &max_individual_allocation_size, NULL);
|
||||
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
|
||||
error = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
|
||||
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
|
||||
|
||||
if (global_mem_size > (cl_ulong)SIZE_MAX) {
|
||||
global_mem_size = (cl_ulong)SIZE_MAX;
|
||||
}
|
||||
|
||||
// log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
|
||||
// max_individual_allocation_size, toMB(max_individual_allocation_size),
|
||||
// global_mem_size, toMB(global_mem_size));
|
||||
|
||||
if (size_to_allocate > global_mem_size) {
|
||||
log_error("Can not allocate more than the global memory size.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
amount_allocated = 0;
|
||||
current_allocation = 0;
|
||||
|
||||
// If allocating for images, reduce the maximum allocation size to the maximum image size.
|
||||
// If we don't do this, then the value of CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4 can be higher
|
||||
// than the maximum image size on systems with 16GB or RAM or more. In this case, we
|
||||
// succeed in allocating an image but its size is less than CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4
|
||||
// (min_allocation_allowed) and thus we fail the allocation below.
|
||||
if(type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING || type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) {
|
||||
size_t width;
|
||||
size_t height;
|
||||
size_t max_size;
|
||||
error = find_good_image_size(device_id, size_to_allocate, &width, &height, &max_size);
|
||||
if (!(error == SUCCEEDED || error == FAILED_TOO_BIG))
|
||||
return error;
|
||||
if(max_size < max_individual_allocation_size)
|
||||
max_individual_allocation_size = max_size;
|
||||
}
|
||||
|
||||
reduction_amount = (size_t)max_individual_allocation_size/16;
|
||||
|
||||
if (type == BUFFER || type == BUFFER_NON_BLOCKING) log_info("\tAttempting to allocate a buffer of size %gMB.\n", toMB(size_to_allocate));
|
||||
else if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) log_info("\tAttempting to allocate a read-only image of size %gMB.\n", toMB(size_to_allocate));
|
||||
else if (type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) log_info("\tAttempting to allocate a write-only image of size %gMB.\n", toMB(size_to_allocate));
|
||||
|
||||
// log_info("\t\t(Reduction size is %gMB per iteration, minimum allowable individual allocation size is %gMB.)\n",
|
||||
// toMB(reduction_amount), toMB(min_allocation_allowed));
|
||||
// if (force_fill && type != IMAGE_WRITE && type != IMAGE_WRITE_NON_BLOCKING) log_info("\t\t(Allocations will be filled with random data for checksum calculation.)\n");
|
||||
|
||||
// If we are only doing a single allocation, only allow 1
|
||||
int max_to_allocate = multiple_allocations ? MAX_NUMBER_TO_ALLOCATE : 1;
|
||||
|
||||
// Make sure that the maximum number of images allocated is constrained by the
|
||||
// maximum that may be passed to a kernel
|
||||
if (type != BUFFER && type != BUFFER_NON_BLOCKING) {
|
||||
cl_device_info param_name = (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) ?
|
||||
CL_DEVICE_MAX_READ_IMAGE_ARGS : CL_DEVICE_MAX_WRITE_IMAGE_ARGS;
|
||||
|
||||
cl_uint max_image_args;
|
||||
error = clGetDeviceInfo(device_id, param_name, sizeof(max_image_args), &max_image_args, NULL);
|
||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX IMAGE_ARGS");
|
||||
|
||||
if ((int)max_image_args < max_to_allocate) {
|
||||
log_info("\t\tMaximum number of images per kernel limited to %d\n",(int)max_image_args);
|
||||
max_to_allocate = max_image_args;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Try to allocate the requested amount.
|
||||
while (amount_allocated != size_to_allocate && current_allocation < max_to_allocate) {
|
||||
|
||||
// Determine how much more is needed
|
||||
allocation_this_time = size_to_allocate - amount_allocated;
|
||||
|
||||
// Bound by the individual allocation size
|
||||
if (allocation_this_time > max_individual_allocation_size)
|
||||
allocation_this_time = (size_t)max_individual_allocation_size;
|
||||
|
||||
// Allocate the largest object possible
|
||||
result = FAILED_TOO_BIG;
|
||||
//log_info("\t\tTrying sub-allocation %d at size %gMB.\n", current_allocation, toMB(allocation_this_time));
|
||||
while (result == FAILED_TOO_BIG && allocation_this_time != 0) {
|
||||
|
||||
// Create the object
|
||||
result = do_allocation(context, queue, device_id, allocation_this_time, type, &mems[current_allocation]);
|
||||
if (result == SUCCEEDED) {
|
||||
// Allocation succeeded, another memory object was added to the array
|
||||
*number_of_mems = (current_allocation+1);
|
||||
|
||||
// Verify the size is correct to within 1MB.
|
||||
actual_allocation = get_actual_allocation_size(mems[current_allocation]);
|
||||
if (fabs((double)allocation_this_time - (double)actual_allocation) > 1024.0*1024.0) {
|
||||
log_error("Allocation not of expected size. Expected %gMB, got %gMB.\n", toMB(allocation_this_time), toMB( actual_allocation));
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
// If we are filling the allocation for verification do so
|
||||
if (force_fill) {
|
||||
//log_info("\t\t\tWriting random values to object and calculating checksum.\n");
|
||||
cl_bool blocking_write = true;
|
||||
if (type == BUFFER_NON_BLOCKING || type == IMAGE_READ_NON_BLOCKING || type == IMAGE_WRITE_NON_BLOCKING) {
|
||||
blocking_write = false;
|
||||
}
|
||||
result = fill_mem_with_data(context, device_id, queue, mems[current_allocation], d, blocking_write);
|
||||
}
|
||||
}
|
||||
|
||||
// If creation failed, try to create a smaller object
|
||||
if (result == FAILED_TOO_BIG) {
|
||||
//log_info("\t\t\tAllocation %d failed at size %gMB. Trying smaller.\n", current_allocation, toMB(allocation_this_time));
|
||||
if (allocation_this_time > reduction_amount)
|
||||
allocation_this_time -= reduction_amount;
|
||||
else if (reduction_amount > 1) {
|
||||
reduction_amount /= 2;
|
||||
}
|
||||
else {
|
||||
allocation_this_time = 0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
log_error("\t\tAllocation failed.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
if (!allocation_this_time) {
|
||||
log_info("\t\tFailed to allocate %gMB across several objects.\n", toMB(size_to_allocate));
|
||||
return FAILED_TOO_BIG;
|
||||
}
|
||||
|
||||
// Otherwise we succeeded
|
||||
if (result != SUCCEEDED) {
|
||||
log_error("Test logic error.");
|
||||
test_finish();
|
||||
exit(-1);
|
||||
}
|
||||
amount_allocated += allocation_this_time;
|
||||
|
||||
*final_size = amount_allocated;
|
||||
|
||||
current_allocation++;
|
||||
}
|
||||
|
||||
log_info("\t\tSucceeded in allocating %gMB using %d memory objects.\n", toMB(amount_allocated), current_allocation);
|
||||
return SUCCEEDED;
|
||||
}
|
||||
24
test_conformance/allocations/allocation_functions.h
Normal file
24
test_conformance/allocations/allocation_functions.h
Normal file
@@ -0,0 +1,24 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "allocation_utils.h"
|
||||
|
||||
int do_allocation(cl_context context, cl_command_queue *queue, cl_device_id device_id, size_t size_to_allocate, int type, cl_mem *mem);
|
||||
int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
|
||||
int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
|
||||
int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
|
||||
int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id device_id, int multiple_allocations, size_t size_to_allocate,
|
||||
int type, cl_mem mems[], int *number_of_mems, size_t *final_size, int force_fill, MTdata d);
|
||||
104
test_conformance/allocations/allocation_utils.cpp
Normal file
104
test_conformance/allocations/allocation_utils.cpp
Normal file
@@ -0,0 +1,104 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "allocation_utils.h"
|
||||
|
||||
cl_command_queue reset_queue(cl_context context, cl_device_id device_id, cl_command_queue *queue, int *error)
|
||||
{
|
||||
log_info("Invalid command queue. Releasing and recreating the command queue.\n");
|
||||
clReleaseCommandQueue(*queue);
|
||||
*queue = clCreateCommandQueueWithProperties(context, device_id, 0, error);
|
||||
return *queue;
|
||||
}
|
||||
|
||||
int check_allocation_error(cl_context context, cl_device_id device_id, int error, cl_command_queue *queue, cl_event *event) {
|
||||
//log_info("check_allocation_error context=%p device_id=%p error=%d *queue=%p\n", context, device_id, error, *queue);
|
||||
if (error == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST && event != 0)
|
||||
{
|
||||
// check for errors from clWaitForEvents (e.g after clEnqueueWriteBuffer)
|
||||
cl_int eventError;
|
||||
error = clGetEventInfo(*event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(error), &eventError, 0);
|
||||
if (CL_SUCCESS != error)
|
||||
{
|
||||
log_error("Failed to get event execution status: %s\n", IGetErrorString(error));
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
if (eventError >= 0)
|
||||
{
|
||||
log_error("Non-negative event execution status after CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: %s\n", IGetErrorString(error));
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
error = eventError;
|
||||
}
|
||||
if ((error == CL_MEM_OBJECT_ALLOCATION_FAILURE ) || (error == CL_OUT_OF_RESOURCES ) || (error == CL_OUT_OF_HOST_MEMORY) || (error == CL_INVALID_IMAGE_SIZE)) {
|
||||
return FAILED_TOO_BIG;
|
||||
} else if (error == CL_INVALID_COMMAND_QUEUE) {
|
||||
*queue = reset_queue(context, device_id, queue, &error);
|
||||
if (CL_SUCCESS != error)
|
||||
{
|
||||
log_error("Failed to reset command queue after corrupted queue: %s\n", IGetErrorString(error));
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
// Try again with smaller resources.
|
||||
return FAILED_TOO_BIG;
|
||||
} else if (error != CL_SUCCESS) {
|
||||
log_error("Allocation failed with %s.\n", IGetErrorString(error));
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
|
||||
double toMB(cl_ulong size_in) {
|
||||
return (double)size_in/(1024.0*1024.0);
|
||||
}
|
||||
|
||||
size_t get_actual_allocation_size(cl_mem mem) {
|
||||
int error;
|
||||
cl_mem_object_type type;
|
||||
size_t size, width, height;
|
||||
|
||||
error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (type == CL_MEM_OBJECT_BUFFER) {
|
||||
error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
|
||||
return 0;
|
||||
}
|
||||
return size;
|
||||
} else if (type == CL_MEM_OBJECT_IMAGE2D) {
|
||||
error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
|
||||
return 0;
|
||||
}
|
||||
error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
|
||||
return 0;
|
||||
}
|
||||
return width*height*4*sizeof(cl_uint);
|
||||
}
|
||||
|
||||
log_error("Invalid CL_MEM_TYPE: %d\n", type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
27
test_conformance/allocations/allocation_utils.h
Normal file
27
test_conformance/allocations/allocation_utils.h
Normal file
@@ -0,0 +1,27 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _allocation_utils_h
|
||||
#define _allocation_utils_h
|
||||
|
||||
#include "testBase.h"
|
||||
|
||||
extern cl_uint checksum;
|
||||
|
||||
int check_allocation_error(cl_context context, cl_device_id device_id, int error, cl_command_queue *queue, cl_event *event = 0);
|
||||
double toMB(cl_ulong size_in);
|
||||
size_t get_actual_allocation_size(cl_mem mem);
|
||||
|
||||
#endif // _allocation_utils_h
|
||||
429
test_conformance/allocations/main.cpp
Normal file
429
test_conformance/allocations/main.cpp
Normal file
@@ -0,0 +1,429 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
|
||||
#include "allocation_functions.h"
|
||||
#include "allocation_fill.h"
|
||||
#include "allocation_execute.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include <time.h>
|
||||
|
||||
typedef long long unsigned llu;
|
||||
|
||||
cl_device_id g_device_id;
|
||||
cl_device_type g_device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
clContextWrapper g_context;
|
||||
clCommandQueueWrapper g_queue;
|
||||
int g_repetition_count = 1;
|
||||
int g_reduction_percentage = 100;
|
||||
int g_write_allocations = 1;
|
||||
int g_multiple_allocations = 0;
|
||||
int g_execute_kernel = 1;
|
||||
|
||||
static size_t g_max_size;
|
||||
static RandomSeed g_seed( gRandomSeed );
|
||||
|
||||
cl_long g_max_individual_allocation_size;
|
||||
cl_long g_global_mem_size;
|
||||
|
||||
cl_uint checksum;
|
||||
|
||||
static void printUsage( const char *execName );
|
||||
|
||||
int init_cl() {
|
||||
cl_platform_id platform;
|
||||
int error;
|
||||
|
||||
error = clGetPlatformIDs(1, &platform, NULL);
|
||||
test_error(error, "clGetPlatformIDs failed");
|
||||
|
||||
error = clGetDeviceIDs(platform, g_device_type, 1, &g_device_id, NULL);
|
||||
test_error(error, "clGetDeviceIDs failed");
|
||||
|
||||
/* Create a context */
|
||||
g_context = clCreateContext( NULL, 1, &g_device_id, notify_callback, NULL, &error );
|
||||
test_error(error, "clCreateContext failed");
|
||||
|
||||
/* Create command queue */
|
||||
g_queue = clCreateCommandQueueWithProperties( g_context, g_device_id, 0, &error );
|
||||
test_error(error, "clCreateCommandQueue failed");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
int doTest( AllocType alloc_type )
|
||||
{
|
||||
int error;
|
||||
int failure_counts = 0;
|
||||
size_t final_size;
|
||||
size_t current_test_size;
|
||||
cl_mem mems[MAX_NUMBER_TO_ALLOCATE];
|
||||
int number_of_mems_used;
|
||||
cl_ulong max_individual_allocation_size = g_max_individual_allocation_size;
|
||||
cl_ulong global_mem_size = g_global_mem_size ;
|
||||
|
||||
static const char* alloc_description[] = {
|
||||
"buffer(s)",
|
||||
"read-only image(s)",
|
||||
"write-only image(s)",
|
||||
"buffer(s)",
|
||||
"read-only image(s)",
|
||||
"write-only image(s)",
|
||||
};
|
||||
|
||||
// Skip image tests if we don't support images on the device
|
||||
if( alloc_type > BUFFER && checkForImageSupport( g_device_id ) )
|
||||
{
|
||||
log_info( "Can not test image allocation because device does not support images.\n" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This section was added in order to fix a bug in the test
|
||||
// If CL_DEVICE_MAX_MEM_ALLOC_SIZE is much grater than CL_DEVICE_IMAGE2D_MAX_WIDTH * CL_DEVICE_IMAGE2D_MAX_HEIGHT
|
||||
// The test will fail in image allocations as the size requested for the allocation will be much grater than the maximum size allowed for image
|
||||
if( ( alloc_type != BUFFER ) && ( alloc_type != BUFFER_NON_BLOCKING ) )
|
||||
{
|
||||
size_t max_width, max_height;
|
||||
|
||||
error = clGetDeviceInfo( g_device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL );
|
||||
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_WIDTH" );
|
||||
|
||||
error = clGetDeviceInfo( g_device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( max_height ), &max_height, NULL );
|
||||
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_HEIGHT" );
|
||||
|
||||
cl_ulong max_image2d_size = (cl_ulong)max_height * max_width * 4 * sizeof(cl_uint);
|
||||
|
||||
if( max_individual_allocation_size > max_image2d_size )
|
||||
{
|
||||
max_individual_allocation_size = max_image2d_size;
|
||||
}
|
||||
}
|
||||
|
||||
// Pick the baseline size based on whether we are doing a single large or multiple allocations
|
||||
g_max_size = g_multiple_allocations ? (size_t)global_mem_size : (size_t)max_individual_allocation_size;
|
||||
|
||||
// Adjust based on the percentage
|
||||
if( g_reduction_percentage != 100 )
|
||||
{
|
||||
log_info( "NOTE: reducing max allocations to %d%%.\n", g_reduction_percentage );
|
||||
g_max_size = (size_t)( (double)g_max_size * (double)g_reduction_percentage / 100.0 );
|
||||
}
|
||||
|
||||
// Round to nearest MB.
|
||||
g_max_size &= (size_t)(0xFFFFFFFFFF00000ULL);
|
||||
|
||||
log_info( "** Target allocation size (rounded to nearest MB) is: %llu bytes (%gMB).\n", llu( g_max_size ), toMB( g_max_size ) );
|
||||
log_info( "** Allocating %s to size %gMB.\n", alloc_description[alloc_type], toMB( g_max_size ) );
|
||||
|
||||
for( int count = 0; count < g_repetition_count; count++ )
|
||||
{
|
||||
current_test_size = g_max_size;
|
||||
error = FAILED_TOO_BIG;
|
||||
log_info( " => Allocation %d\n", count + 1 );
|
||||
|
||||
while( ( error == FAILED_TOO_BIG ) && ( current_test_size > g_max_size / 8 ) )
|
||||
{
|
||||
// Reset our checksum for each allocation
|
||||
checksum = 0;
|
||||
|
||||
// Do the allocation
|
||||
error = allocate_size( g_context, &g_queue, g_device_id, g_multiple_allocations, current_test_size, alloc_type,
|
||||
mems, &number_of_mems_used, &final_size, g_write_allocations, g_seed );
|
||||
|
||||
// If we succeeded and we're supposed to execute a kernel, do so.
|
||||
if( error == SUCCEEDED && g_execute_kernel )
|
||||
{
|
||||
log_info( "\tExecuting kernel with memory objects.\n" );
|
||||
error = execute_kernel( g_context, &g_queue, g_device_id, alloc_type, mems, number_of_mems_used,
|
||||
g_write_allocations );
|
||||
}
|
||||
|
||||
// If we failed to allocate more than 1/8th of the requested amount return a failure.
|
||||
if( final_size < (size_t)g_max_size / 8 )
|
||||
{
|
||||
log_error( "===> Allocation %d failed to allocate more than 1/8th of the requested size.\n", count + 1 );
|
||||
failure_counts++;
|
||||
}
|
||||
|
||||
// Clean up.
|
||||
for( int i = 0; i < number_of_mems_used; i++ )
|
||||
{
|
||||
clReleaseMemObject( mems[i] );
|
||||
}
|
||||
|
||||
if( error == FAILED_ABORT )
|
||||
{
|
||||
log_error( " => Allocation %d failed.\n", count + 1 );
|
||||
failure_counts++;
|
||||
}
|
||||
|
||||
if( error == FAILED_TOO_BIG )
|
||||
{
|
||||
current_test_size -= g_max_size / 16;
|
||||
log_info( "\tFailed at this size; trying a smaller size of %gMB.\n", toMB( current_test_size ) );
|
||||
}
|
||||
}
|
||||
|
||||
if( error == SUCCEEDED && current_test_size == g_max_size )
|
||||
{
|
||||
log_info("\tPASS: Allocation succeeded.\n");
|
||||
}
|
||||
else if( error == SUCCEEDED && current_test_size > g_max_size / 8 )
|
||||
{
|
||||
log_info("\tPASS: Allocation succeeded at reduced size.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error("\tFAIL: Allocation failed.\n");
|
||||
failure_counts++;
|
||||
}
|
||||
}
|
||||
|
||||
return failure_counts;
|
||||
}
|
||||
|
||||
int test_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return doTest( BUFFER );
|
||||
}
|
||||
int test_image2d_read(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return doTest( IMAGE_READ );
|
||||
}
|
||||
int test_image2d_write(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return doTest( IMAGE_WRITE );
|
||||
}
|
||||
int test_buffer_non_blocking(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return doTest( BUFFER_NON_BLOCKING );
|
||||
}
|
||||
int test_image2d_read_non_blocking(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return doTest( IMAGE_READ_NON_BLOCKING );
|
||||
}
|
||||
int test_image2d_write_non_blocking(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return doTest( IMAGE_WRITE_NON_BLOCKING );
|
||||
}
|
||||
|
||||
basefn basefn_list[] = {
|
||||
test_buffer,
|
||||
test_image2d_read,
|
||||
test_image2d_write,
|
||||
test_buffer_non_blocking,
|
||||
test_image2d_read_non_blocking,
|
||||
test_image2d_write_non_blocking,
|
||||
};
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"buffer",
|
||||
"image2d_read",
|
||||
"image2d_write",
|
||||
"buffer_non_blocking",
|
||||
"image2d_read_non_blocking",
|
||||
"image2d_write_non_blocking",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
int error;
|
||||
char *endPtr;
|
||||
int r;
|
||||
int randomize = 0;
|
||||
|
||||
test_start();
|
||||
|
||||
const char ** argList = (const char **)calloc( argc, sizeof( char*) );
|
||||
|
||||
if( NULL == argList )
|
||||
{
|
||||
log_error( "Failed to allocate memory for argList array.\n" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
argList[0] = argv[0];
|
||||
size_t argCount = 1;
|
||||
|
||||
// Parse arguments
|
||||
checkDeviceTypeOverride( &g_device_type );
|
||||
for( int i = 1; i < argc; i++ )
|
||||
{
|
||||
if( strcmp( argv[i], "cpu" ) == 0 || strcmp( argv[i], "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
g_device_type = CL_DEVICE_TYPE_CPU;
|
||||
else if( strcmp( argv[i], "gpu" ) == 0 || strcmp( argv[i], "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
g_device_type = CL_DEVICE_TYPE_GPU;
|
||||
else if( strcmp( argv[i], "accelerator" ) == 0 || strcmp( argv[i], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
g_device_type = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
else if( strcmp( argv[i], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
g_device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
|
||||
else if( strcmp( argv[i], "multiple" ) == 0 )
|
||||
g_multiple_allocations = 1;
|
||||
else if( strcmp( argv[i], "randomize" ) == 0 )
|
||||
randomize = 1;
|
||||
else if( strcmp( argv[i], "single" ) == 0 )
|
||||
g_multiple_allocations = 0;
|
||||
|
||||
else if( ( r = (int)strtol( argv[i], &endPtr, 10 ) ) && ( endPtr != argv[i] ) && ( *endPtr == 0 ) )
|
||||
{
|
||||
// By spec, that means the entire string was an integer, so take it as a repetition count
|
||||
g_repetition_count = r;
|
||||
}
|
||||
|
||||
else if( strchr( argv[i], '%' ) != NULL )
|
||||
{
|
||||
// Reduction percentage (let strtol ignore the percentage)
|
||||
g_reduction_percentage = (int)strtol( argv[i], NULL, 10 );
|
||||
}
|
||||
|
||||
else if( strcmp( argv[i], "do_not_force_fill" ) == 0 )
|
||||
{
|
||||
g_write_allocations = 0;
|
||||
}
|
||||
|
||||
else if( strcmp( argv[i], "do_not_execute" ) == 0 )
|
||||
{
|
||||
g_execute_kernel = 0;
|
||||
}
|
||||
|
||||
else if ( strcmp( argv[i], "--help" ) == 0 || strcmp( argv[i], "-h" ) == 0 )
|
||||
{
|
||||
printUsage( argv[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
argList[argCount] = argv[i];
|
||||
argCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if( randomize )
|
||||
{
|
||||
gRandomSeed = (cl_uint) time( NULL );
|
||||
log_info( "Random seed: %u.\n", gRandomSeed );
|
||||
gReSeed = 1;
|
||||
g_seed = RandomSeed( gRandomSeed );
|
||||
}
|
||||
|
||||
// All ready to go, so set up an environment
|
||||
error = init_cl();
|
||||
if (error) {
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( printDeviceHeader( g_device_id ) != CL_SUCCESS )
|
||||
{
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
error = clGetDeviceInfo(g_device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(g_max_individual_allocation_size), &g_max_individual_allocation_size, NULL);
|
||||
if ( error ) {
|
||||
print_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
error = clGetDeviceInfo(g_device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(g_global_mem_size), &g_global_mem_size, NULL);
|
||||
if ( error ) {
|
||||
print_error( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
|
||||
llu( g_max_individual_allocation_size ), toMB( g_max_individual_allocation_size ),
|
||||
llu( g_global_mem_size ), toMB( g_global_mem_size ) );
|
||||
|
||||
if( g_global_mem_size > (cl_ulong)SIZE_MAX )
|
||||
{
|
||||
g_global_mem_size = (cl_ulong)SIZE_MAX;
|
||||
}
|
||||
|
||||
if( g_max_individual_allocation_size > g_global_mem_size )
|
||||
{
|
||||
log_error( "FAILURE: CL_DEVICE_MAX_MEM_ALLOC_SIZE (%llu) is greater than the CL_DEVICE_GLOBAL_MEM_SIZE (%llu)\n",
|
||||
llu( g_max_individual_allocation_size ), llu( g_global_mem_size ) );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// We may need to back off the global_mem_size on unified memory devices to leave room for application and operating system code
|
||||
// and associated data in the working set, so we dont start pathologically paging.
|
||||
// Check to see if we are a unified memory device
|
||||
cl_bool hasUnifiedMemory = CL_FALSE;
|
||||
if( ( error = clGetDeviceInfo( g_device_id, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof( hasUnifiedMemory ), &hasUnifiedMemory, NULL )))
|
||||
{
|
||||
print_error( error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
// we share unified memory so back off to 1/2 the global memory size.
|
||||
if( CL_TRUE == hasUnifiedMemory )
|
||||
{
|
||||
g_global_mem_size -= g_global_mem_size /2;
|
||||
log_info( "Device shares memory with the host, so backing off the maximum combined allocation size to be %gMB to avoid rampant paging.\n", toMB( g_global_mem_size ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Lets just use 60% of total available memory as framework/driver may not allow using all of it
|
||||
// e.g. vram on GPU is used by window server and even for this test, we need some space for context,
|
||||
// queue, kernel code on GPU.
|
||||
g_global_mem_size *= 0.60;
|
||||
}
|
||||
|
||||
int ret = parseAndCallCommandLineTests( argCount, argList, NULL, num_fns, basefn_list, basefn_names, true, 0, 0 );
|
||||
|
||||
free(argList);
|
||||
|
||||
test_finish();
|
||||
return ret;
|
||||
}
|
||||
|
||||
void printUsage( const char *execName )
|
||||
{
|
||||
const char *p = strrchr( execName, '/' );
|
||||
if( p != NULL )
|
||||
execName = p + 1;
|
||||
|
||||
log_info( "Usage: %s [options] [test_names]\n", execName );
|
||||
log_info( "Options:\n" );
|
||||
log_info( "\trandomize - Uses random seed\n" );
|
||||
log_info( "\tsingle - Tests using a single allocation as large as possible\n" );
|
||||
log_info( "\tmultiple - Tests using as many allocations as possible\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tnumReps - Optional integer specifying the number of repetitions to run and average the result (defaults to 1)\n" );
|
||||
log_info( "\treduction%% - Optional integer, followed by a %% sign, that acts as a multiplier for the target amount of memory.\n" );
|
||||
log_info( "\t Example: target amount of 512MB and a reduction of 75%% will result in a target of 384MB.\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tdo_not_force_fill - Disable explicitly write data to all memory objects after creating them.\n" );
|
||||
log_info( "\t Without this, the kernel execution can not verify its checksum.\n" );
|
||||
log_info( "\tdo_not_execute - Disable executing a kernel that accesses all of the memory objects.\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "Test names (Allocation Types):\n" );
|
||||
for( int i = 0; i < num_fns; i++ )
|
||||
{
|
||||
log_info( "\t%s\n", basefn_names[i] );
|
||||
}
|
||||
}
|
||||
65
test_conformance/allocations/testBase.h
Normal file
65
test_conformance/allocations/testBase.h
Normal file
@@ -0,0 +1,65 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _testBase_h
|
||||
#define _testBase_h
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
|
||||
#define MAX_NUMBER_TO_ALLOCATE 100
|
||||
|
||||
#define FAILED_CORRUPTED_QUEUE -2
|
||||
#define FAILED_ABORT -1
|
||||
#define FAILED_TOO_BIG 1
|
||||
// On Windows macro `SUCCEEDED' is defined in `WinError.h'. It causes compiler warnings. Let us avoid them.
|
||||
#if defined( _WIN32 ) && defined( SUCCEEDED )
|
||||
#undef SUCCEEDED
|
||||
#endif
|
||||
#define SUCCEEDED 0
|
||||
|
||||
enum AllocType
|
||||
{
|
||||
BUFFER,
|
||||
IMAGE_READ,
|
||||
IMAGE_WRITE,
|
||||
BUFFER_NON_BLOCKING,
|
||||
IMAGE_READ_NON_BLOCKING,
|
||||
IMAGE_WRITE_NON_BLOCKING,
|
||||
};
|
||||
|
||||
#define test_error_abort(errCode,msg) test_error_ret_abort(errCode,msg,errCode)
|
||||
#define test_error_ret_abort(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return FAILED_ABORT ; } }
|
||||
|
||||
|
||||
#endif // _testBase_h
|
||||
|
||||
|
||||
|
||||
34
test_conformance/api/CMakeLists.txt
Normal file
34
test_conformance/api/CMakeLists.txt
Normal file
@@ -0,0 +1,34 @@
|
||||
set(MODULE_NAME API)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.c
|
||||
test_bool.c
|
||||
test_retain.cpp
|
||||
test_retain_program.c
|
||||
test_queries.cpp
|
||||
test_create_kernels.c
|
||||
test_kernels.c
|
||||
test_api_min_max.c
|
||||
test_kernel_arg_changes.cpp
|
||||
test_kernel_arg_multi_setup.cpp
|
||||
test_binary.cpp
|
||||
test_native_kernel.cpp
|
||||
test_mem_objects.cpp
|
||||
test_create_context_from_type.cpp
|
||||
test_device_min_data_type_align_size_alignment.cpp
|
||||
test_platform.cpp
|
||||
test_kernel_arg_info.c
|
||||
test_null_buffer_arg.c
|
||||
test_mem_object_info.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/msvc9.c
|
||||
../../test_common/harness/imageHelpers.cpp
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
27
test_conformance/api/Jamfile
Normal file
27
test_conformance/api/Jamfile
Normal file
@@ -0,0 +1,27 @@
|
||||
project
|
||||
: requirements
|
||||
<toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
|
||||
exe test_api
|
||||
: main.c
|
||||
test_api_min_max.c
|
||||
test_binary.cpp
|
||||
test_create_kernels.c
|
||||
test_create_context_from_type.cpp
|
||||
test_kernel_arg_changes.cpp
|
||||
test_kernel_arg_multi_setup.cpp
|
||||
test_kernels.c
|
||||
test_native_kernel.cpp
|
||||
test_queries.cpp
|
||||
test_retain_program.c
|
||||
test_platform.cpp
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_api #test.lst
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/api
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/api
|
||||
;
|
||||
61
test_conformance/api/Makefile
Normal file
61
test_conformance/api/Makefile
Normal file
@@ -0,0 +1,61 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c \
|
||||
test_retain_program.c \
|
||||
test_queries.cpp \
|
||||
test_create_kernels.c \
|
||||
test_kernels.c \
|
||||
test_kernel_arg_info.c \
|
||||
test_api_min_max.c \
|
||||
test_kernel_arg_changes.cpp \
|
||||
test_kernel_arg_multi_setup.cpp \
|
||||
test_binary.cpp \
|
||||
test_native_kernel.cpp \
|
||||
test_create_context_from_type.cpp \
|
||||
test_platform.cpp \
|
||||
test_retain.cpp \
|
||||
test_device_min_data_type_align_size_alignment.cpp \
|
||||
test_mem_objects.cpp \
|
||||
test_bool.c \
|
||||
test_null_buffer_arg.c \
|
||||
test_mem_object_info.cpp \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/imageHelpers.cpp \
|
||||
../../test_common/harness/kernelHelpers.c \
|
||||
../../test_common/harness/typeWrappers.cpp \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/conversions.c
|
||||
|
||||
DEFINES = DONT_TEST_GARBAGE_POINTERS
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
HEADERS =
|
||||
TARGET = test_api
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
215
test_conformance/api/main.c
Normal file
215
test_conformance/api/main.c
Normal file
@@ -0,0 +1,215 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string.h>
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
|
||||
// (for example, generate_random_image_data()), the tests are required to declare
|
||||
// the following variables (<rdar://problem/11111245>):
|
||||
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
|
||||
bool gTestRounding = false;
|
||||
|
||||
basefn basefn_list[] = {
|
||||
test_get_platform_info,
|
||||
test_get_sampler_info,
|
||||
test_get_command_queue_info,
|
||||
test_get_context_info,
|
||||
test_get_device_info,
|
||||
test_enqueue_task,
|
||||
test_binary_get,
|
||||
test_program_binary_create,
|
||||
test_kernel_required_group_size,
|
||||
|
||||
test_release_kernel_order,
|
||||
test_release_during_execute,
|
||||
|
||||
test_load_single_kernel,
|
||||
test_load_two_kernels,
|
||||
test_load_two_kernels_in_one,
|
||||
test_load_two_kernels_manually,
|
||||
test_get_program_info_kernel_names,
|
||||
test_get_kernel_arg_info,
|
||||
test_create_kernels_in_program,
|
||||
test_get_kernel_info,
|
||||
test_execute_kernel_local_sizes,
|
||||
test_set_kernel_arg_by_index,
|
||||
test_set_kernel_arg_constant,
|
||||
test_set_kernel_arg_struct_array,
|
||||
test_kernel_global_constant,
|
||||
|
||||
test_min_max_thread_dimensions,
|
||||
test_min_max_work_items_sizes,
|
||||
test_min_max_work_group_size,
|
||||
test_min_max_read_image_args,
|
||||
test_min_max_write_image_args,
|
||||
test_min_max_mem_alloc_size,
|
||||
test_min_max_image_2d_width,
|
||||
test_min_max_image_2d_height,
|
||||
test_min_max_image_3d_width,
|
||||
test_min_max_image_3d_height,
|
||||
test_min_max_image_3d_depth,
|
||||
test_min_max_image_array_size,
|
||||
test_min_max_image_buffer_size,
|
||||
test_min_max_parameter_size,
|
||||
test_min_max_samplers,
|
||||
test_min_max_constant_buffer_size,
|
||||
test_min_max_constant_args,
|
||||
test_min_max_compute_units,
|
||||
test_min_max_address_bits,
|
||||
test_min_max_single_fp_config,
|
||||
test_min_max_double_fp_config,
|
||||
test_min_max_local_mem_size,
|
||||
test_min_max_kernel_preferred_work_group_size_multiple,
|
||||
test_min_max_execution_capabilities,
|
||||
test_min_max_queue_properties,
|
||||
test_min_max_device_version,
|
||||
test_min_max_language_version,
|
||||
|
||||
test_kernel_arg_changes,
|
||||
test_kernel_arg_multi_setup_random,
|
||||
|
||||
test_native_kernel,
|
||||
|
||||
test_create_context_from_type,
|
||||
|
||||
test_platform_extensions,
|
||||
test_get_platform_ids,
|
||||
test_for_bool_type,
|
||||
|
||||
test_repeated_setup_cleanup,
|
||||
|
||||
test_retain_queue_single,
|
||||
test_retain_queue_multiple,
|
||||
test_retain_mem_object_single,
|
||||
test_retain_mem_object_multiple,
|
||||
test_min_data_type_align_size_alignment,
|
||||
|
||||
test_mem_object_destructor_callback,
|
||||
test_null_buffer_arg,
|
||||
test_get_buffer_info,
|
||||
test_get_image2d_info,
|
||||
test_get_image3d_info,
|
||||
test_get_image1d_info,
|
||||
test_get_image1d_array_info,
|
||||
test_get_image2d_array_info,
|
||||
};
|
||||
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"get_platform_info",
|
||||
"get_sampler_info",
|
||||
"get_command_queue_info",
|
||||
"get_context_info",
|
||||
"get_device_info",
|
||||
"enqueue_task",
|
||||
"binary_get",
|
||||
"binary_create",
|
||||
"kernel_required_group_size",
|
||||
|
||||
"release_kernel_order",
|
||||
"release_during_execute",
|
||||
|
||||
"load_single_kernel",
|
||||
"load_two_kernels",
|
||||
"load_two_kernels_in_one",
|
||||
"load_two_kernels_manually",
|
||||
"get_program_info_kernel_names",
|
||||
"get_kernel_arg_info",
|
||||
"create_kernels_in_program",
|
||||
"get_kernel_info",
|
||||
"execute_kernel_local_sizes",
|
||||
"set_kernel_arg_by_index",
|
||||
"set_kernel_arg_constant",
|
||||
"set_kernel_arg_struct_array",
|
||||
"kernel_global_constant",
|
||||
|
||||
"min_max_thread_dimensions",
|
||||
"min_max_work_items_sizes",
|
||||
"min_max_work_group_size",
|
||||
"min_max_read_image_args",
|
||||
"min_max_write_image_args",
|
||||
"min_max_mem_alloc_size",
|
||||
"min_max_image_2d_width",
|
||||
"min_max_image_2d_height",
|
||||
"min_max_image_3d_width",
|
||||
"min_max_image_3d_height",
|
||||
"min_max_image_3d_depth",
|
||||
"min_max_image_array_size",
|
||||
"min_max_image_buffer_size",
|
||||
"min_max_parameter_size",
|
||||
"min_max_samplers",
|
||||
"min_max_constant_buffer_size",
|
||||
"min_max_constant_args",
|
||||
"min_max_compute_units",
|
||||
"min_max_address_bits",
|
||||
"min_max_single_fp_config",
|
||||
"min_max_double_fp_config",
|
||||
"min_max_local_mem_size",
|
||||
"min_max_kernel_preferred_work_group_size_multiple",
|
||||
"min_max_execution_capabilities",
|
||||
"min_max_queue_properties",
|
||||
"min_max_device_version",
|
||||
"min_max_language_version",
|
||||
|
||||
"kernel_arg_changes",
|
||||
"kernel_arg_multi_setup_random",
|
||||
|
||||
"native_kernel",
|
||||
|
||||
"create_context_from_type",
|
||||
"platform_extensions",
|
||||
|
||||
"get_platform_ids",
|
||||
"bool_type",
|
||||
|
||||
"repeated_setup_cleanup",
|
||||
|
||||
"retain_queue_single",
|
||||
"retain_queue_multiple",
|
||||
"retain_mem_object_single",
|
||||
"retain_mem_object_multiple",
|
||||
|
||||
"min_data_type_align_size_alignment",
|
||||
|
||||
"mem_object_destructor_callback",
|
||||
"null_buffer_arg",
|
||||
"get_buffer_info",
|
||||
"get_image2d_info",
|
||||
"get_image3d_info",
|
||||
"get_image1d_info",
|
||||
"get_image1d_array_info",
|
||||
"get_image2d_array_info",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
|
||||
}
|
||||
|
||||
|
||||
108
test_conformance/api/procs.h
Normal file
108
test_conformance/api/procs.h
Normal file
@@ -0,0 +1,108 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/clImageHelper.h"
|
||||
#include "../../test_common/harness/imageHelpers.h"
|
||||
extern float calculate_ulperror(float a, float b);
|
||||
|
||||
extern int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_two_kernels_manually(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_for_bool_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_platform_extensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_release_during_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
|
||||
extern int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_kernel_arg_changes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_arg_multi_setup_random(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
|
||||
extern int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_null_buffer_arg( cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
|
||||
31
test_conformance/api/testBase.h
Normal file
31
test_conformance/api/testBase.h
Normal file
@@ -0,0 +1,31 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _testBase_h
|
||||
#define _testBase_h
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#endif // _testBase_h
|
||||
|
||||
|
||||
|
||||
2122
test_conformance/api/test_api_min_max.c
Normal file
2122
test_conformance/api/test_api_min_max.c
Normal file
File diff suppressed because it is too large
Load Diff
226
test_conformance/api/test_binary.cpp
Normal file
226
test_conformance/api/test_binary.cpp
Normal file
@@ -0,0 +1,226 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
|
||||
static const char *sample_binary_kernel_source[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid] + 1;\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
|
||||
int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
size_t binarySize;
|
||||
|
||||
|
||||
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
|
||||
test_error( error, "Unable to create program from source" );
|
||||
|
||||
// Build so we have a binary to get
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build test program" );
|
||||
|
||||
// Get the size of the resulting binary (only one device)
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
|
||||
test_error( error, "Unable to get binary size" );
|
||||
|
||||
// Sanity check
|
||||
if( binarySize == 0 )
|
||||
{
|
||||
log_error( "ERROR: Binary size of program is zero\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create a buffer and get the actual binary
|
||||
unsigned char *binary;
|
||||
binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
|
||||
unsigned char *buffers[ 1 ] = { binary };
|
||||
|
||||
// Do another sanity check here first
|
||||
size_t size;
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
|
||||
test_error( error, "Unable to get expected size of binaries array" );
|
||||
if( size != sizeof( buffers ) )
|
||||
{
|
||||
log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size );
|
||||
free(binary);
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary" );
|
||||
|
||||
// No way to verify the binary is correct, so just be good with that
|
||||
free(binary);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
/* To test this in a self-contained fashion, we have to create a program with
|
||||
source, then get the binary, then use that binary to reload the program, and then verify */
|
||||
|
||||
int error;
|
||||
clProgramWrapper program, program_from_binary;
|
||||
size_t binarySize;
|
||||
|
||||
|
||||
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
|
||||
test_error( error, "Unable to create program from source" );
|
||||
|
||||
// Build so we have a binary to get
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build test program" );
|
||||
|
||||
// Get the size of the resulting binary (only one device)
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
|
||||
test_error( error, "Unable to get binary size" );
|
||||
|
||||
// Sanity check
|
||||
if( binarySize == 0 )
|
||||
{
|
||||
log_error( "ERROR: Binary size of program is zero\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create a buffer and get the actual binary
|
||||
unsigned char *binary = (unsigned char*)malloc(binarySize);
|
||||
const unsigned char *buffers[ 1 ] = { binary };
|
||||
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary" );
|
||||
|
||||
cl_int loadErrors[ 1 ];
|
||||
program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error );
|
||||
test_error( error, "Unable to load valid program binary" );
|
||||
test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" );
|
||||
|
||||
error = clBuildProgram( program_from_binary, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build binary program" );
|
||||
|
||||
// Get the size of the binary built from the first binary
|
||||
size_t binary2Size;
|
||||
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARY_SIZES, sizeof( binary2Size ), &binary2Size, NULL );
|
||||
test_error( error, "Unable to get size for the binary program" );
|
||||
|
||||
// Now get the binary one more time and verify it loaded the right binary
|
||||
unsigned char *binary2 = (unsigned char*)malloc(binary2Size);
|
||||
buffers[ 0 ] = binary2;
|
||||
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary second time" );
|
||||
|
||||
// Try again, this time without passing the status ptr in, to make sure we still
|
||||
// get a valid binary
|
||||
clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binary2Size, buffers, NULL, &error );
|
||||
test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" );
|
||||
|
||||
error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build binary program created without binary_status" );
|
||||
|
||||
// Get the size of the binary created without passing binary_status
|
||||
size_t binary3Size;
|
||||
error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARY_SIZES, sizeof( binary3Size ), &binary3Size, NULL );
|
||||
test_error( error, "Unable to get size for the binary program created without binary_status" );
|
||||
|
||||
// Now get the binary one more time
|
||||
unsigned char *binary3 = (unsigned char*)malloc(binary3Size);
|
||||
buffers[ 0 ] = binary3;
|
||||
error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary from the program created without binary_status" );
|
||||
|
||||
// We no longer need these intermediate binaries
|
||||
free(binary);
|
||||
free(binary2);
|
||||
free(binary3);
|
||||
|
||||
// Now execute them both to see that they both do the same thing.
|
||||
clMemWrapper in, out, out_binary;
|
||||
clKernelWrapper kernel, kernel_binary;
|
||||
cl_int *out_data, *out_data_binary;
|
||||
cl_float *in_data;
|
||||
size_t size_to_run = 1000;
|
||||
|
||||
// Allocate some data
|
||||
in_data = (cl_float*)malloc(sizeof(cl_float)*size_to_run);
|
||||
out_data = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
|
||||
out_data_binary = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
|
||||
memset(out_data, 0, sizeof(cl_int)*size_to_run);
|
||||
memset(out_data_binary, 0, sizeof(cl_int)*size_to_run);
|
||||
for (size_t i=0; i<size_to_run; i++)
|
||||
in_data[i] = (cl_float)i;
|
||||
|
||||
// Create the buffers
|
||||
in = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*size_to_run, in_data, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
out = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
out_binary = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data_binary, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
|
||||
// Create the kernels
|
||||
kernel = clCreateKernel(program, "sample_test", &error);
|
||||
test_error( error, "clCreateKernel failed");
|
||||
kernel_binary = clCreateKernel(program_from_binary, "sample_test", &error);
|
||||
test_error( error, "clCreateKernel from binary failed");
|
||||
|
||||
// Set the arguments
|
||||
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
|
||||
test_error( error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
|
||||
test_error( error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel_binary, 0, sizeof(in), &in);
|
||||
test_error( error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel_binary, 1, sizeof(out_binary), &out_binary);
|
||||
test_error( error, "clSetKernelArg failed");
|
||||
|
||||
// Execute the kernels
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueNDRangeKernel failed");
|
||||
error = clEnqueueNDRangeKernel(queue, kernel_binary, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueNDRangeKernel for binary kernel failed");
|
||||
|
||||
// Finish up
|
||||
error = clFinish(queue);
|
||||
test_error( error, "clFinish failed");
|
||||
|
||||
// Get the results back
|
||||
error = clEnqueueReadBuffer(queue, out, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueReadBuffer failed");
|
||||
error = clEnqueueReadBuffer(queue, out_binary, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data_binary, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueReadBuffer failed");
|
||||
|
||||
// Compare the results
|
||||
if( memcmp( out_data, out_data_binary, sizeof(cl_int)*size_to_run ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Results from executing binary and regular kernel differ.\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// All done!
|
||||
free(in_data);
|
||||
free(out_data);
|
||||
free(out_data_binary);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
52
test_conformance/api/test_bool.c
Normal file
52
test_conformance/api/test_bool.c
Normal file
@@ -0,0 +1,52 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
|
||||
const char *kernel_with_bool[] = {
|
||||
"__kernel void kernel_with_bool(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" bool myBool = (src[tid] < 0.5f) && (src[tid] > -0.5f);\n"
|
||||
" if(myBool)\n"
|
||||
" {\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" dst[tid] = 0;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
int test_for_bool_type(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
int err = create_single_kernel_helper(context,
|
||||
&program,
|
||||
&kernel,
|
||||
1, kernel_with_bool,
|
||||
"kernel_with_bool" );
|
||||
return err;
|
||||
}
|
||||
|
||||
130
test_conformance/api/test_create_context_from_type.cpp
Normal file
130
test_conformance/api/test_create_context_from_type.cpp
Normal file
@@ -0,0 +1,130 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
extern cl_uint gRandomSeed;
|
||||
|
||||
int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
clContextWrapper context_to_test;
|
||||
clCommandQueueWrapper queue_to_test;
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_float inputData[10];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
RandomSeed seed( gRandomSeed );
|
||||
|
||||
const char *sample_single_test_kernel[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed\n");
|
||||
|
||||
cl_platform_id platform;
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed\n");
|
||||
|
||||
cl_context_properties properties[3] = {
|
||||
(cl_context_properties)CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)platform,
|
||||
NULL
|
||||
};
|
||||
|
||||
context_to_test = clCreateContextFromType(properties, type, notify_callback, NULL, &error);
|
||||
test_error(error, "clCreateContextFromType failed");
|
||||
if (context_to_test == NULL) {
|
||||
log_error("clCreateContextFromType returned NULL, but error was CL_SUCCESS.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
queue_to_test = clCreateCommandQueueWithProperties(context_to_test, deviceID, NULL, &error);
|
||||
test_error(error, "clCreateCommandQueue failed");
|
||||
if (queue_to_test == NULL) {
|
||||
log_error("clCreateCommandQueue returned NULL, but error was CL_SUCCESS.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Write some test data */
|
||||
memset( outputData, 0, sizeof( outputData ) );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
|
||||
|
||||
error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
|
||||
test_error( error, "Unable to set testing kernel data" );
|
||||
|
||||
/* Test setting the arguments by index manually */
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue_to_test, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue_to_test, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
if (outputData[i] != (int)inputData[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
643
test_conformance/api/test_create_kernels.c
Normal file
643
test_conformance/api/test_create_kernels.c
Normal file
@@ -0,0 +1,643 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
|
||||
const char *sample_single_kernel[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
size_t sample_single_kernel_lengths[1];
|
||||
|
||||
const char *sample_two_kernels[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
size_t sample_two_kernel_lengths[2];
|
||||
|
||||
const char *sample_two_kernels_in_1[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
size_t sample_two_kernels_in_1_lengths[1];
|
||||
|
||||
|
||||
const char *repeate_test_kernel =
|
||||
"__kernel void test_kernel(__global int *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" dst[get_global_id(0)] = src[get_global_id(0)]+1;\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
|
||||
int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
cl_program testProgram;
|
||||
clKernelWrapper kernel;
|
||||
cl_context testContext;
|
||||
unsigned int numKernels;
|
||||
cl_char testName[512];
|
||||
cl_uint testArgCount;
|
||||
size_t realSize;
|
||||
|
||||
|
||||
/* Preprocess: calc the length of each source file line */
|
||||
sample_single_kernel_lengths[ 0 ] = strlen( sample_single_kernel[ 0 ] );
|
||||
|
||||
/* Create a program */
|
||||
program = clCreateProgramWithSource( context, 1, sample_single_kernel, sample_single_kernel_lengths, &error );
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to create single kernel program" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build single kernel program" );
|
||||
error = clCreateKernelsInProgram(program, 1, &kernel, &numKernels);
|
||||
test_error( error, "Unable to create single kernel program" );
|
||||
|
||||
/* Check program and context pointers */
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( cl_program ), &testProgram, &realSize );
|
||||
test_error( error, "Unable to get kernel's program" );
|
||||
if( (cl_program)testProgram != (cl_program)program )
|
||||
{
|
||||
log_error( "ERROR: Returned kernel's program does not match program used to create it! (Got %p, expected %p)\n", (cl_program)testProgram, (cl_program)program );
|
||||
return -1;
|
||||
}
|
||||
if( realSize != sizeof( cl_program ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of kernel's program does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_program ), (int)realSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( cl_context ), &testContext, &realSize );
|
||||
test_error( error, "Unable to get kernel's context" );
|
||||
if( (cl_context)testContext != (cl_context)context )
|
||||
{
|
||||
log_error( "ERROR: Returned kernel's context does not match program used to create it! (Got %p, expected %p)\n", (cl_context)testContext, (cl_context)context );
|
||||
return -1;
|
||||
}
|
||||
if( realSize != sizeof( cl_context ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of kernel's context does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_context ), (int)realSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Test arg count */
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &realSize );
|
||||
test_error( error, "Unable to get size of arg count info from kernel" );
|
||||
|
||||
if( realSize != sizeof( testArgCount ) )
|
||||
{
|
||||
log_error( "ERROR: size of arg count not valid! %d\n", (int)realSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
|
||||
test_error( error, "Unable to get arg count from kernel" );
|
||||
|
||||
if( testArgCount != 2 )
|
||||
{
|
||||
log_error( "ERROR: Kernel arg count does not match!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* Test function name */
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, &realSize );
|
||||
test_error( error, "Unable to get name from kernel" );
|
||||
|
||||
if( strcmp( (char *)testName, "sample_test" ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Kernel names do not match!\n" );
|
||||
return -1;
|
||||
}
|
||||
if( realSize != strlen( (char *)testName ) + 1 )
|
||||
{
|
||||
log_error( "ERROR: Length of kernel name returned does not validate (expected %d, got %d)\n", (int)strlen( (char *)testName ) + 1, (int)realSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel[2];
|
||||
unsigned int numKernels;
|
||||
cl_char testName[ 512 ];
|
||||
cl_uint testArgCount;
|
||||
|
||||
|
||||
/* Preprocess: calc the length of each source file line */
|
||||
sample_two_kernel_lengths[ 0 ] = strlen( sample_two_kernels[ 0 ] );
|
||||
sample_two_kernel_lengths[ 1 ] = strlen( sample_two_kernels[ 1 ] );
|
||||
|
||||
/* Now create a test program */
|
||||
program = clCreateProgramWithSource( context, 2, sample_two_kernels, sample_two_kernel_lengths, &error );
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to create dual kernel program!" );
|
||||
return -1;
|
||||
}
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build dual kernel program" );
|
||||
error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
|
||||
test_error( error, "Unable to create dual kernel program" );
|
||||
|
||||
if( numKernels != 2 )
|
||||
{
|
||||
log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Check first kernel */
|
||||
error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
|
||||
test_error( error, "Unable to get function name from kernel" );
|
||||
|
||||
int found_kernel1 = 0, found_kernel2 = 0;
|
||||
|
||||
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
|
||||
found_kernel1 = 1;
|
||||
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
|
||||
found_kernel2 = 1;
|
||||
} else {
|
||||
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
|
||||
test_error( error, "Unable to get function name from second kernel" );
|
||||
|
||||
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
|
||||
if (found_kernel1) {
|
||||
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
|
||||
return -1;
|
||||
}
|
||||
found_kernel1 = 1;
|
||||
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
|
||||
if (found_kernel2) {
|
||||
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
|
||||
return -1;
|
||||
}
|
||||
found_kernel2 = 1;
|
||||
} else {
|
||||
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( !found_kernel1 || !found_kernel2 )
|
||||
{
|
||||
log_error( "ERROR: Kernel names do not match.\n" );
|
||||
if (!found_kernel1)
|
||||
log_error("Kernel \"%s\" not returned.\n", "sample_test");
|
||||
if (!found_kernel2)
|
||||
log_error("Kernel \"%s\" not returned.\n", "sample_test");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
|
||||
test_error( error, "Unable to get arg count from kernel" );
|
||||
|
||||
if( testArgCount != 2 )
|
||||
{
|
||||
log_error( "ERROR: wrong # of args for kernel\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel[2];
|
||||
unsigned int numKernels;
|
||||
cl_char testName[512];
|
||||
cl_uint testArgCount;
|
||||
|
||||
|
||||
/* Preprocess: calc the length of each source file line */
|
||||
sample_two_kernels_in_1_lengths[ 0 ] = strlen( sample_two_kernels_in_1[ 0 ] );
|
||||
|
||||
/* Now create a test program */
|
||||
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, sample_two_kernels_in_1_lengths, &error );
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to create dual kernel program" );
|
||||
return -1;
|
||||
}
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build dual kernel program" );
|
||||
error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
|
||||
test_error( error, "Unable to create dual kernel program" );
|
||||
|
||||
if( numKernels != 2 )
|
||||
{
|
||||
log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Check first kernel */
|
||||
error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
|
||||
test_error( error, "Unable to get function name from kernel" );
|
||||
|
||||
int found_kernel1 = 0, found_kernel2 = 0;
|
||||
|
||||
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
|
||||
found_kernel1 = 1;
|
||||
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
|
||||
found_kernel2 = 1;
|
||||
} else {
|
||||
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
|
||||
test_error( error, "Unable to get arg count from kernel" );
|
||||
|
||||
if( testArgCount != 2 )
|
||||
{
|
||||
log_error( "ERROR: wrong # of args for kernel\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Check second kernel */
|
||||
error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
|
||||
test_error( error, "Unable to get function name from kernel" );
|
||||
|
||||
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
|
||||
if (found_kernel1) {
|
||||
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
|
||||
return -1;
|
||||
}
|
||||
found_kernel1 = 1;
|
||||
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
|
||||
if (found_kernel2) {
|
||||
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
|
||||
return -1;
|
||||
}
|
||||
found_kernel2 = 1;
|
||||
} else {
|
||||
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( !found_kernel1 || !found_kernel2 )
|
||||
{
|
||||
log_error( "ERROR: Kernel names do not match.\n" );
|
||||
if (!found_kernel1)
|
||||
log_error("Kernel \"%s\" not returned.\n", "sample_test");
|
||||
if (!found_kernel2)
|
||||
log_error("Kernel \"%s\" not returned.\n", "sample_test");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_load_two_kernels_manually( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel1, kernel2;
|
||||
int error;
|
||||
|
||||
|
||||
/* Now create a test program */
|
||||
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to create dual kernel program" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Compile the program */
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build kernel program" );
|
||||
|
||||
/* Try manually creating kernels (backwards just in case) */
|
||||
kernel1 = clCreateKernel( program, "sample_test2", &error );
|
||||
|
||||
if( kernel1 == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Could not get kernel 1" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel2 = clCreateKernel( program, "sample_test", &error );
|
||||
|
||||
if( kernel2 == NULL )
|
||||
{
|
||||
print_error( error, "Could not get kernel 2" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel1, kernel2;
|
||||
int error;
|
||||
size_t i;
|
||||
|
||||
/* Now create a test program */
|
||||
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to create dual kernel program" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Compile the program */
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build kernel program" );
|
||||
|
||||
/* Lookup the number of kernels in the program. */
|
||||
size_t total_kernels = 0;
|
||||
error = clGetProgramInfo(program, CL_PROGRAM_NUM_KERNELS, sizeof(size_t),&total_kernels,NULL);
|
||||
test_error( error, "Unable to get program info num kernels");
|
||||
|
||||
if (total_kernels != 2)
|
||||
{
|
||||
print_error( error, "Program did not contain two kernels" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Lookup the kernel names. */
|
||||
const char* actual_names[] = { "sample_test;sample_test2", "sample_test2;sample_test"} ;
|
||||
|
||||
size_t kernel_names_len = 0;
|
||||
error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,0,NULL,&kernel_names_len);
|
||||
test_error( error, "Unable to get length of kernel names list." );
|
||||
|
||||
if (kernel_names_len != (strlen(actual_names[0])+1))
|
||||
{
|
||||
print_error( error, "Kernel names length did not match");
|
||||
return -1;
|
||||
}
|
||||
|
||||
const size_t len = (kernel_names_len+1)*sizeof(char);
|
||||
char* kernel_names = (char*)malloc(len);
|
||||
error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,len,kernel_names,&kernel_names_len);
|
||||
test_error( error, "Unable to get kernel names list." );
|
||||
|
||||
/* Check to see if the kernel name array is null terminated. */
|
||||
if (kernel_names[kernel_names_len-1] != '\0')
|
||||
{
|
||||
free(kernel_names);
|
||||
print_error( error, "Kernel name list was not null terminated");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Check to see if the correct kernel name string was returned. */
|
||||
for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
|
||||
if( 0 == strcmp(actual_names[i],kernel_names) )
|
||||
break;
|
||||
|
||||
if (i == sizeof( actual_names ) / sizeof( actual_names[0] ) )
|
||||
{
|
||||
free(kernel_names);
|
||||
log_error( "Kernel names \"%s\" did not match:\n", kernel_names );
|
||||
for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
|
||||
log_error( "\t\t\"%s\"\n", actual_names[0] );
|
||||
return -1;
|
||||
}
|
||||
free(kernel_names);
|
||||
|
||||
/* Try manually creating kernels (backwards just in case) */
|
||||
kernel1 = clCreateKernel( program, "sample_test", &error );
|
||||
if( kernel1 == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Could not get kernel 1" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel2 = clCreateKernel( program, "sample_test2", &error );
|
||||
if( kernel2 == NULL )
|
||||
{
|
||||
print_error( error, "Could not get kernel 2" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *single_task_kernel[] = {
|
||||
"__kernel void sample_test(__global int *dst, int count)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" for( int i = 0; i < count; i++ )\n"
|
||||
" dst[i] = tid + i;\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper output;
|
||||
cl_int count;
|
||||
|
||||
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, single_task_kernel, "sample_test" ) )
|
||||
return -1;
|
||||
|
||||
// Create args
|
||||
count = 100;
|
||||
output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( cl_int ), &count );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
|
||||
// Run task
|
||||
error = clEnqueueTask( queue, kernel, 0, NULL, NULL );
|
||||
test_error( error, "Unable to run task" );
|
||||
|
||||
// Read results
|
||||
cl_int *results = (cl_int*)malloc(sizeof(cl_int)*count);
|
||||
error = clEnqueueReadBuffer( queue, output, CL_TRUE, 0, sizeof( cl_int ) * count, results, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
// Validate
|
||||
for( cl_int i = 0; i < count; i++ )
|
||||
{
|
||||
if( results[ i ] != i )
|
||||
{
|
||||
log_error( "ERROR: Task result value %d did not validate! Expected %d, got %d\n", (int)i, (int)i, (int)results[ i ] );
|
||||
free(results);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
free(results);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define TEST_SIZE 1000
|
||||
int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
|
||||
cl_context local_context;
|
||||
cl_command_queue local_queue;
|
||||
cl_program local_program;
|
||||
cl_kernel local_kernel;
|
||||
cl_mem local_mem_in, local_mem_out;
|
||||
cl_event local_event;
|
||||
size_t global_dim[3];
|
||||
int i, j, error;
|
||||
global_dim[0] = TEST_SIZE;
|
||||
global_dim[1] = 1; global_dim[2] = 1;
|
||||
cl_int *inData, *outData;
|
||||
cl_int status;
|
||||
|
||||
inData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
|
||||
outData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
|
||||
for (i=0; i<TEST_SIZE; i++) {
|
||||
inData[i] = i;
|
||||
}
|
||||
|
||||
|
||||
for (i=0; i<100; i++) {
|
||||
memset(outData, 0, sizeof(cl_int)*TEST_SIZE);
|
||||
|
||||
local_context = clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &error);
|
||||
test_error( error, "clCreateContext failed");
|
||||
|
||||
local_queue = clCreateCommandQueueWithProperties(local_context, deviceID, 0, &error);
|
||||
test_error( error, "clCreateCommandQueue failed");
|
||||
|
||||
local_program = clCreateProgramWithSource(local_context, 1, &repeate_test_kernel, NULL, &error);
|
||||
test_error( error, "clCreateProgramWithSource failed");
|
||||
|
||||
error = clBuildProgram(local_program, 0, NULL, NULL, NULL, NULL);
|
||||
test_error( error, "clBuildProgram failed");
|
||||
|
||||
local_kernel = clCreateKernel(local_program, "test_kernel", &error);
|
||||
test_error( error, "clCreateKernel failed");
|
||||
|
||||
local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
|
||||
local_mem_out = clCreateBuffer(local_context, CL_MEM_WRITE_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
|
||||
error = clEnqueueWriteBuffer(local_queue, local_mem_in, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), inData, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
error = clEnqueueWriteBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
error = clSetKernelArg(local_kernel, 0, sizeof(local_mem_in), &local_mem_in);
|
||||
test_error( error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelArg(local_kernel, 1, sizeof(local_mem_out), &local_mem_out);
|
||||
test_error( error, "clSetKernelArg failed");
|
||||
|
||||
error = clEnqueueNDRangeKernel(local_queue, local_kernel, 1, NULL, global_dim, NULL, 0, NULL, &local_event);
|
||||
test_error( error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clWaitForEvents(1, &local_event);
|
||||
test_error( error, "clWaitForEvents failed");
|
||||
|
||||
error = clGetEventInfo(local_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL);
|
||||
test_error( error, "clGetEventInfo failed");
|
||||
|
||||
if (status != CL_COMPLETE) {
|
||||
log_error( "Kernel execution not complete: status %d.\n", status);
|
||||
free(inData);
|
||||
free(outData);
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clEnqueueReadBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueReadBuffer failed");
|
||||
|
||||
clReleaseEvent(local_event);
|
||||
clReleaseMemObject(local_mem_in);
|
||||
clReleaseMemObject(local_mem_out);
|
||||
clReleaseKernel(local_kernel);
|
||||
clReleaseProgram(local_program);
|
||||
clReleaseCommandQueue(local_queue);
|
||||
clReleaseContext(local_context);
|
||||
|
||||
for (j=0; j<TEST_SIZE; j++) {
|
||||
if (outData[j] != inData[j] + 1) {
|
||||
log_error("Results failed to validate at iteration %d. %d != %d.\n", i, outData[j], inData[j] + 1);
|
||||
free(inData);
|
||||
free(outData);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(inData);
|
||||
free(outData);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user