mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Initial open source release of OpenCL 2.2 CTS.
This commit is contained in:
17
test_conformance/half/CMakeLists.txt
Normal file
17
test_conformance/half/CMakeLists.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
set(MODULE_NAME HALF)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
cl_utils.c
|
||||
Test_vLoadHalf.c
|
||||
Test_roundTrip.c
|
||||
Test_vStoreHalf.c main.c
|
||||
../../test_common/harness/msvc9.c
|
||||
../../test_common/harness/mingw_compat.c
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/ThreadPool.c
|
||||
../../test_common/harness/parseParameters.cpp
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
|
||||
22
test_conformance/half/Jamfile
Normal file
22
test_conformance/half/Jamfile
Normal file
@@ -0,0 +1,22 @@
|
||||
project
|
||||
: requirements
|
||||
-<library>/harness//harness <use>/harness//harness
|
||||
# <toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe Test_half
|
||||
: cl_utils.c
|
||||
main.c
|
||||
Test_roundTrip.c
|
||||
Test_vLoadHalf.c
|
||||
Test_vStoreHalf.c
|
||||
/harness//errorHelpers.c
|
||||
: <target-os>windows:<source>/harness//msvc9.c
|
||||
;
|
||||
|
||||
install dist
|
||||
: Test_half
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/half
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/half
|
||||
;
|
||||
45
test_conformance/half/Makefile
Normal file
45
test_conformance/half/Makefile
Normal file
@@ -0,0 +1,45 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCFILES = cl_utils.c Test_vLoadHalf.c Test_roundTrip.c \
|
||||
Test_vStoreHalf.c main.c
|
||||
|
||||
CC = c++
|
||||
CFLAGS = -g -Wall -Wshorten-64-to-32 $(COMPILERFLAGS) ${RC_CFLAGS} \
|
||||
${USE_ATF}
|
||||
|
||||
LIBRARIES = -framework OpenCL ${RC_CFLAGS} ${ATF}
|
||||
|
||||
|
||||
all: release
|
||||
|
||||
OBJECTS := ${SRCFILES:.c=.o}
|
||||
|
||||
release:
|
||||
echo "Build Release"
|
||||
$(CC) $(SRCFILES) -Os $(CFLAGS) -o Test_half $(LIBRARIES)
|
||||
|
||||
debug: $(OBJECTS)
|
||||
echo "Build Debug"
|
||||
$(CC) $(OBJECTS) -O0 $(CFLAGS) -o Test_half_debug -D_DEBUG=1 $(LIBRARIES)
|
||||
|
||||
test: release
|
||||
arch -i386 ./Test_half -c > cpu.log &
|
||||
arch -i386 ./Test_half -g > gpu.log &
|
||||
echo "Testing 32-bit mode in progress. See cpu.log and gpu.log for results."
|
||||
|
||||
test64: release
|
||||
arch -x86_64 ./Test_half -c > cpu64.log &
|
||||
arch -x86_64 ./Test_half -g > gpu64.log &
|
||||
echo "Testing 64-bit mode in progress. See cpu64.log and gpu64.log for results."
|
||||
|
||||
|
||||
clean:
|
||||
rm -f ./Test_half_debug
|
||||
rm -f ./Test_half
|
||||
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
292
test_conformance/half/Test_half.xcodeproj/project.pbxproj
Normal file
292
test_conformance/half/Test_half.xcodeproj/project.pbxproj
Normal file
@@ -0,0 +1,292 @@
|
||||
// !$*UTF8*$!
|
||||
{
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 44;
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
3B1B765F0DE342BC00837A59 /* Test_vStoreHalf.c in Sources */ = {isa = PBXBuildFile; fileRef = 3B1B765E0DE342BC00837A59 /* Test_vStoreHalf.c */; };
|
||||
3B6173C40DE2B14800384A2C /* Test_roundTrip.c in Sources */ = {isa = PBXBuildFile; fileRef = 3B6173C30DE2B14800384A2C /* Test_roundTrip.c */; };
|
||||
3BA6BFB80DE21DB9008685CF /* Test_vLoadHalf.c in Sources */ = {isa = PBXBuildFile; fileRef = 3BA6BFB70DE21DB9008685CF /* Test_vLoadHalf.c */; };
|
||||
3BA6BFBB0DE21EFA008685CF /* cl_utils.c in Sources */ = {isa = PBXBuildFile; fileRef = 3BA6BFBA0DE21EFA008685CF /* cl_utils.c */; };
|
||||
3BA6BFF30DE229C5008685CF /* OpenCL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 3BA6BFF20DE229C5008685CF /* OpenCL.framework */; };
|
||||
8DD76FAC0486AB0100D96B5E /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = 08FB7796FE84155DC02AAC07 /* main.c */; settings = {ATTRIBUTES = (); }; };
|
||||
8DD76FB00486AB0100D96B5E /* Test_half.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6A0FF2C0290799A04C91782 /* Test_half.1 */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXCopyFilesBuildPhase section */
|
||||
8DD76FAF0486AB0100D96B5E /* CopyFiles */ = {
|
||||
isa = PBXCopyFilesBuildPhase;
|
||||
buildActionMask = 8;
|
||||
dstPath = /usr/share/man/man1/;
|
||||
dstSubfolderSpec = 0;
|
||||
files = (
|
||||
8DD76FB00486AB0100D96B5E /* Test_half.1 in CopyFiles */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 1;
|
||||
};
|
||||
/* End PBXCopyFilesBuildPhase section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
08FB7796FE84155DC02AAC07 /* main.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; };
|
||||
3B1B765E0DE342BC00837A59 /* Test_vStoreHalf.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = Test_vStoreHalf.c; sourceTree = "<group>"; };
|
||||
3B1B77910DE3896E00837A59 /* builtins.cl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = builtins.cl; path = ../../../compute/OpenCL/cl_headers/private/builtins.cl; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.c; };
|
||||
3B6173C30DE2B14800384A2C /* Test_roundTrip.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = Test_roundTrip.c; sourceTree = "<group>"; };
|
||||
3BA6BFB70DE21DB9008685CF /* Test_vLoadHalf.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = Test_vLoadHalf.c; sourceTree = "<group>"; };
|
||||
3BA6BFB90DE21EFA008685CF /* cl_utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cl_utils.h; sourceTree = "<group>"; };
|
||||
3BA6BFBA0DE21EFA008685CF /* cl_utils.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = cl_utils.c; sourceTree = "<group>"; };
|
||||
3BA6BFF20DE229C5008685CF /* OpenCL.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = OpenCL.framework; path = /System/Library/Frameworks/OpenCL.framework; sourceTree = "<absolute>"; };
|
||||
3BA6C00A0DE22A95008685CF /* test_config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = test_config.h; sourceTree = "<group>"; };
|
||||
3BA6C0770DE24F41008685CF /* tests.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tests.h; sourceTree = "<group>"; };
|
||||
8DD76FB20486AB0100D96B5E /* Test_half */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Test_half; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
C6A0FF2C0290799A04C91782 /* Test_half.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = Test_half.1; sourceTree = "<group>"; };
|
||||
FFB9F1420E5E155400F45584 /* ATF.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = ATF.framework; path = /Library/Frameworks/ATF.framework; sourceTree = "<absolute>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
8DD76FAD0486AB0100D96B5E /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
3BA6BFF30DE229C5008685CF /* OpenCL.framework in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXFrameworksBuildPhase section */
|
||||
|
||||
/* Begin PBXGroup section */
|
||||
08FB7794FE84155DC02AAC07 /* Test_half */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
3BA6C00A0DE22A95008685CF /* test_config.h */,
|
||||
3BA6C0770DE24F41008685CF /* tests.h */,
|
||||
08FB7796FE84155DC02AAC07 /* main.c */,
|
||||
08FB7795FE84155DC02AAC07 /* Source */,
|
||||
3BA6BFF80DE229CC008685CF /* Resources */,
|
||||
3B1B77910DE3896E00837A59 /* builtins.cl */,
|
||||
C6A0FF2B0290797F04C91782 /* Documentation */,
|
||||
1AB674ADFE9D54B511CA2CBB /* Products */,
|
||||
FFB9F1420E5E155400F45584 /* ATF.framework */,
|
||||
);
|
||||
name = Test_half;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
08FB7795FE84155DC02AAC07 /* Source */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
3BA6BFB90DE21EFA008685CF /* cl_utils.h */,
|
||||
3BA6BFBA0DE21EFA008685CF /* cl_utils.c */,
|
||||
3BA6BFB70DE21DB9008685CF /* Test_vLoadHalf.c */,
|
||||
3B6173C30DE2B14800384A2C /* Test_roundTrip.c */,
|
||||
3B1B765E0DE342BC00837A59 /* Test_vStoreHalf.c */,
|
||||
);
|
||||
name = Source;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1AB674ADFE9D54B511CA2CBB /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8DD76FB20486AB0100D96B5E /* Test_half */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
3BA6BFF80DE229CC008685CF /* Resources */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
3BA6BFF20DE229C5008685CF /* OpenCL.framework */,
|
||||
);
|
||||
name = Resources;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
C6A0FF2B0290797F04C91782 /* Documentation */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
C6A0FF2C0290799A04C91782 /* Test_half.1 */,
|
||||
);
|
||||
name = Documentation;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
8DD76FA90486AB0100D96B5E /* Test_half */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 1DEB928508733DD80010E9CD /* Build configuration list for PBXNativeTarget "Test_half" */;
|
||||
buildPhases = (
|
||||
8DD76FAB0486AB0100D96B5E /* Sources */,
|
||||
8DD76FAD0486AB0100D96B5E /* Frameworks */,
|
||||
8DD76FAF0486AB0100D96B5E /* CopyFiles */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = Test_half;
|
||||
productInstallPath = "$(HOME)/bin";
|
||||
productName = Test_half;
|
||||
productReference = 8DD76FB20486AB0100D96B5E /* Test_half */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
||||
/* Begin PBXProject section */
|
||||
08FB7793FE84155DC02AAC07 /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Test_half" */;
|
||||
compatibilityVersion = "Xcode 3.0";
|
||||
hasScannedForEncodings = 1;
|
||||
mainGroup = 08FB7794FE84155DC02AAC07 /* Test_half */;
|
||||
projectDirPath = "";
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
8DD76FA90486AB0100D96B5E /* Test_half */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
||||
/* Begin PBXSourcesBuildPhase section */
|
||||
8DD76FAB0486AB0100D96B5E /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
8DD76FAC0486AB0100D96B5E /* main.c in Sources */,
|
||||
3BA6BFB80DE21DB9008685CF /* Test_vLoadHalf.c in Sources */,
|
||||
3BA6BFBB0DE21EFA008685CF /* cl_utils.c in Sources */,
|
||||
3B6173C40DE2B14800384A2C /* Test_roundTrip.c in Sources */,
|
||||
3B1B765F0DE342BC00837A59 /* Test_vStoreHalf.c in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXSourcesBuildPhase section */
|
||||
|
||||
/* Begin XCBuildConfiguration section */
|
||||
1DEB928608733DD80010E9CD /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
COPY_PHASE_STRIP = NO;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
GCC_ENABLE_FIX_AND_CONTINUE = YES;
|
||||
GCC_MODEL_TUNING = G5;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
INSTALL_PATH = /usr/local/bin;
|
||||
PRODUCT_NAME = Test_half;
|
||||
ZERO_LINK = YES;
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
1DEB928708733DD80010E9CD /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
GCC_MODEL_TUNING = G5;
|
||||
INSTALL_PATH = /usr/local/bin;
|
||||
PRODUCT_NAME = Test_half;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
1DEB928A08733DD80010E9CD /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ARCHS = "$(ARCHS_STANDARD_32_64_BIT_PRE_XCODE_3_1)";
|
||||
ARCHS_STANDARD_32_64_BIT_PRE_XCODE_3_1 = "x86_64 i386 ppc";
|
||||
ATF_Config_Comment = "Set ATF_DEFAULT to non-empty to link to ATF iff the BUILD_WITH_ATF env var is set";
|
||||
ATF_DEFAULT = SKIPPING;
|
||||
ATF_DEFINES = "$(ATF_DEFINES_$(SKIPPING_ATF))";
|
||||
ATF_DEFINES_ = "USE_ATF=1";
|
||||
ATF_LINK = "$(ATF_LINK_$(SKIPPING_ATF))";
|
||||
ATF_LINK_ = "-framework ATF";
|
||||
GCC_C_LANGUAGE_STANDARD = c99;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = _DEBUG;
|
||||
GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
|
||||
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_CHECK_SWITCH_STATEMENTS = YES;
|
||||
GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED = YES;
|
||||
GCC_WARN_MISSING_PARENTHESES = NO;
|
||||
GCC_WARN_PEDANTIC = YES;
|
||||
GCC_WARN_SHADOW = YES;
|
||||
GCC_WARN_SIGN_COMPARE = YES;
|
||||
GCC_WARN_TYPECHECK_CALLS_TO_PRINTF = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = NO;
|
||||
GCC_WARN_UNKNOWN_PRAGMAS = YES;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_LABEL = YES;
|
||||
GCC_WARN_UNUSED_PARAMETER = YES;
|
||||
GCC_WARN_UNUSED_VALUE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
PREBINDING = NO;
|
||||
SKIPPING_ATF = SKIPPING;
|
||||
VALID_ARCHS = "i386 x86_64";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
1DEB928B08733DD80010E9CD /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ARCHS = (
|
||||
ppc,
|
||||
i386,
|
||||
);
|
||||
ATF_Config_Comment = "Set ATF_DEFAULT to non-empty to link to ATF iff the BUILD_WITH_ATF env var is set";
|
||||
ATF_DEFAULT = SKIPPING;
|
||||
ATF_DEFINES = "$(ATF_DEFINES_$(SKIPPING_ATF))";
|
||||
ATF_DEFINES_ = "USE_ATF=1";
|
||||
ATF_LINK = "$(ATF_LINK_$(SKIPPING_ATF))";
|
||||
ATF_LINK_ = "-framework ATF";
|
||||
GCC_C_LANGUAGE_STANDARD = c99;
|
||||
GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
|
||||
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_CHECK_SWITCH_STATEMENTS = YES;
|
||||
GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED = YES;
|
||||
GCC_WARN_MISSING_PARENTHESES = NO;
|
||||
GCC_WARN_PEDANTIC = NO;
|
||||
GCC_WARN_SHADOW = YES;
|
||||
GCC_WARN_SIGN_COMPARE = YES;
|
||||
GCC_WARN_TYPECHECK_CALLS_TO_PRINTF = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = NO;
|
||||
GCC_WARN_UNKNOWN_PRAGMAS = YES;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_LABEL = YES;
|
||||
GCC_WARN_UNUSED_PARAMETER = YES;
|
||||
GCC_WARN_UNUSED_VALUE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
PREBINDING = NO;
|
||||
SKIPPING_ATF = SKIPPING;
|
||||
VALID_ARCHS = "i386 x86_64";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
1DEB928508733DD80010E9CD /* Build configuration list for PBXNativeTarget "Test_half" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
1DEB928608733DD80010E9CD /* Debug */,
|
||||
1DEB928708733DD80010E9CD /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Test_half" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
1DEB928A08733DD80010E9CD /* Debug */,
|
||||
1DEB928B08733DD80010E9CD /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
};
|
||||
rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
|
||||
}
|
||||
398
test_conformance/half/Test_roundTrip.c
Normal file
398
test_conformance/half/Test_roundTrip.c
Normal file
@@ -0,0 +1,398 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <string.h>
|
||||
#include "cl_utils.h"
|
||||
#include "tests.h"
|
||||
|
||||
|
||||
|
||||
int Test_roundTrip( void )
|
||||
{
|
||||
int vectorSize, error;
|
||||
uint64_t i, j;
|
||||
cl_program programs[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
|
||||
cl_kernel kernels[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
|
||||
cl_program doublePrograms[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
|
||||
cl_kernel doubleKernels[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
|
||||
uint64_t time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
|
||||
uint64_t min_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
|
||||
uint64_t doubleTime[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
|
||||
uint64_t min_double_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
|
||||
memset( min_time, -1, sizeof( min_time ) );
|
||||
memset( min_double_time, -1, sizeof( min_double_time ) );
|
||||
|
||||
vlog( "Testing roundTrip\n" );
|
||||
fflush( stdout );
|
||||
|
||||
|
||||
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
{
|
||||
const char *source[] = {
|
||||
"__kernel void test( const __global half *in, __global half *out )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" vstore_half",vector_size_name_extensions[vectorSize],"( vload_half",vector_size_name_extensions[vectorSize],"(i, in), i, out);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *doubleSource[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
|
||||
"__kernel void test( const __global half *in, __global half *out )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" vstore_half",vector_size_name_extensions[vectorSize],"( convert_double", vector_size_name_extensions[vectorSize], "( vload_half",vector_size_name_extensions[vectorSize],"(i, in)), i, out);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *sourceV3[] = {
|
||||
"__kernel void test( const __global half *in, __global half *out,"
|
||||
" uint extra_last_thread )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" size_t last_i = get_global_size(0)-1;\n"
|
||||
" size_t adjust = 0;\n"
|
||||
" if(i == last_i && extra_last_thread != 0) { \n"
|
||||
" adjust = 3-extra_last_thread;\n"
|
||||
" }\n"
|
||||
" vstore_half3( vload_half3(i, in-adjust), i, out-adjust);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *doubleSourceV3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
|
||||
"__kernel void test( const __global half *in, __global half *out,"
|
||||
" uint extra_last_thread )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" size_t last_i = get_global_size(0)-1;\n"
|
||||
" size_t adjust = 0;\n"
|
||||
" if(i == last_i && extra_last_thread != 0) { \n"
|
||||
" adjust = 3-extra_last_thread;\n"
|
||||
" }\n"
|
||||
" vstore_half3( vload_half3(i, in-adjust), i, out-adjust);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
/*
|
||||
const char *sourceV3aligned[] = {
|
||||
"__kernel void test( const __global half *in, __global half *out )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" vstorea_half3( vloada_half3(i, in), i, out);\n"
|
||||
" vstore_half(vload_half(4*i+3, in), 4*i+3, out);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *doubleSourceV3aligned[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
|
||||
"__kernel void test( const __global half *in, __global half *out )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" vstorea_half3( vloada_half3(i, in), i, out);\n"
|
||||
" vstore_half(vload_half(4*i+3, in), 4*i+3, out);\n"
|
||||
"}\n"
|
||||
};
|
||||
*/
|
||||
|
||||
if(g_arrVecSizes[vectorSize] == 3) {
|
||||
programs[vectorSize] = MakeProgram( sourceV3, sizeof( sourceV3) / sizeof( sourceV3[0]) );
|
||||
if( NULL == programs[ vectorSize ] )
|
||||
{
|
||||
gFailCount++;
|
||||
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
programs[vectorSize] = MakeProgram( source, sizeof( source) / sizeof( source[0]) );
|
||||
if( NULL == programs[ vectorSize ] )
|
||||
{
|
||||
gFailCount++;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
kernels[ vectorSize ] = clCreateKernel( programs[ vectorSize ], "test", &error );
|
||||
if( NULL == kernels[vectorSize] )
|
||||
{
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
|
||||
return error;
|
||||
}
|
||||
|
||||
if( gTestDouble )
|
||||
{
|
||||
if(g_arrVecSizes[vectorSize] == 3) {
|
||||
doublePrograms[vectorSize] = MakeProgram( doubleSourceV3, sizeof( doubleSourceV3) / sizeof( doubleSourceV3[0]) );
|
||||
if( NULL == programs[ vectorSize ] )
|
||||
{
|
||||
gFailCount++;
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
doublePrograms[vectorSize] = MakeProgram( doubleSource, sizeof( doubleSource) / sizeof( doubleSource[0]) );
|
||||
if( NULL == programs[ vectorSize ] )
|
||||
{
|
||||
gFailCount++;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
doubleKernels[ vectorSize ] = clCreateKernel( doublePrograms[ vectorSize ], "test", &error );
|
||||
if( NULL == kernels[vectorSize] )
|
||||
{
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out how many elements are in a work block
|
||||
size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
|
||||
size_t blockCount = (size_t)getBufferSize(gDevice) / elementSize; //elementSize is a power of two
|
||||
uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of cl_half
|
||||
size_t stride = blockCount;
|
||||
|
||||
error = 0;
|
||||
uint64_t printMask = (lastCase >> 4) - 1;
|
||||
uint32_t count;
|
||||
size_t loopCount;
|
||||
|
||||
for( i = 0; i < (uint64_t)lastCase; i += stride )
|
||||
{
|
||||
count = (uint32_t) MIN( blockCount, lastCase - i );
|
||||
|
||||
//Init the input stream
|
||||
uint16_t *p = (uint16_t *)gIn_half;
|
||||
for( j = 0; j < count; j++ )
|
||||
p[j] = j + i;
|
||||
|
||||
if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_half, CL_TRUE, 0, count * sizeof( cl_half ), gIn_half, 0, NULL, NULL)) )
|
||||
{
|
||||
vlog_error( "Failure in clWriteArray\n" );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
//Check the vector lengths
|
||||
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
{ // here we loop through vector sizes -- 3 is last.
|
||||
uint32_t pattern = 0xdeaddead;
|
||||
memset_pattern4( gOut_half, &pattern, (size_t)getBufferSize(gDevice)/2);
|
||||
|
||||
if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_half), gOut_half, 0, NULL, NULL)) )
|
||||
{
|
||||
vlog_error( "Failure in clWriteArray\n" );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// here is where "3" starts to cause problems.
|
||||
error = RunKernel( kernels[vectorSize], gInBuffer_half, gOutBuffer_half, numVecs(count, vectorSize, false) ,
|
||||
runsOverBy(count, vectorSize, false) );
|
||||
if(error)
|
||||
{
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_half), gOut_half, 0, NULL, NULL)) )
|
||||
{
|
||||
vlog_error( "Failure in clReadArray\n" );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if( (memcmp( gOut_half, gIn_half, count * sizeof(cl_half))) )
|
||||
{
|
||||
uint16_t *u1 = (uint16_t *)gOut_half;
|
||||
uint16_t *u2 = (uint16_t *)gIn_half;
|
||||
for( j = 0; j < count; j++ )
|
||||
{
|
||||
if( u1[j] != u2[j] )
|
||||
{
|
||||
uint16_t abs1 = u1[j] & 0x7fff;
|
||||
uint16_t abs2 = u2[j] & 0x7fff;
|
||||
if( abs1 > 0x7c00 && abs2 > 0x7c00 )
|
||||
continue; //any NaN is okay if NaN is input
|
||||
|
||||
// if reference result is sub normal, test if the output is flushed to zero
|
||||
if( IsHalfSubnormal(u2[j]) && ( (u1[j] == 0) || (u1[j] == 0x8000) ) )
|
||||
continue;
|
||||
|
||||
vlog_error( "%lld) (of %lld) Failure at 0x%4.4x: 0x%4.4x vector_size = %d \n", j, (uint64_t)count, u2[j], u1[j], (g_arrVecSizes[vectorSize]) );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( gTestDouble )
|
||||
{
|
||||
memset_pattern4( gOut_half, &pattern, (size_t)getBufferSize(gDevice)/2);
|
||||
if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_half), gOut_half, 0, NULL, NULL)) )
|
||||
{
|
||||
vlog_error( "Failure in clWriteArray\n" );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
if( (error = RunKernel( doubleKernels[vectorSize], gInBuffer_half, gOutBuffer_half, numVecs(count, vectorSize, false) ,
|
||||
runsOverBy(count, vectorSize, false) ) ) )
|
||||
{
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_half), gOut_half, 0, NULL, NULL)) )
|
||||
{
|
||||
vlog_error( "Failure in clReadArray\n" );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if( (memcmp( gOut_half, gIn_half, count * sizeof(cl_half))) )
|
||||
{
|
||||
uint16_t *u1 = (uint16_t *)gOut_half;
|
||||
uint16_t *u2 = (uint16_t *)gIn_half;
|
||||
for( j = 0; j < count; j++ )
|
||||
{
|
||||
if( u1[j] != u2[j] )
|
||||
{
|
||||
uint16_t abs1 = u1[j] & 0x7fff;
|
||||
uint16_t abs2 = u2[j] & 0x7fff;
|
||||
if( abs1 > 0x7c00 && abs2 > 0x7c00 )
|
||||
continue; //any NaN is okay if NaN is input
|
||||
|
||||
// if reference result is sub normal, test if the output is flushed to zero
|
||||
if( IsHalfSubnormal(u2[j]) && ( (u1[j] == 0) || (u1[j] == 0x8000) ) )
|
||||
continue;
|
||||
|
||||
vlog_error( "%lld) Failure at 0x%4.4x: 0x%4.4x vector_size = %d (double precsion)\n", j, u2[j], u1[j], (g_arrVecSizes[vectorSize]) );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( ((i+blockCount) & ~printMask) == (i+blockCount) )
|
||||
{
|
||||
vlog( "." );
|
||||
fflush( stdout );
|
||||
}
|
||||
}
|
||||
|
||||
vlog( "\tPassed\n" );
|
||||
|
||||
loopCount = 100;
|
||||
if( gReportTimes )
|
||||
{
|
||||
//Run again for timing
|
||||
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
{
|
||||
uint64_t bestTime = -1ULL;
|
||||
|
||||
for( j = 0; j < loopCount; j++ )
|
||||
{
|
||||
uint64_t startTime = ReadTime();
|
||||
if( (error = RunKernel( kernels[vectorSize], gInBuffer_half, gOutBuffer_half,numVecs(count, vectorSize, false) ,
|
||||
runsOverBy(count, vectorSize, false)) ) )
|
||||
{
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if( (error = clFinish(gQueue)) )
|
||||
{
|
||||
vlog_error( "Failure in clFinish\n" );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
uint64_t currentTime = ReadTime() - startTime;
|
||||
if( currentTime < bestTime )
|
||||
bestTime = currentTime;
|
||||
time[ vectorSize ] += currentTime;
|
||||
}
|
||||
if( bestTime < min_time[ vectorSize ] )
|
||||
min_time[ vectorSize ] = bestTime;
|
||||
|
||||
if( gTestDouble )
|
||||
{
|
||||
bestTime = -1ULL;
|
||||
for( j = 0; j < loopCount; j++ )
|
||||
{
|
||||
uint64_t startTime = ReadTime();
|
||||
if( (error = RunKernel( doubleKernels[vectorSize], gInBuffer_half, gOutBuffer_half, numVecs(count, vectorSize, false) ,
|
||||
runsOverBy(count, vectorSize, false)) ) )
|
||||
{
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if( (error = clFinish(gQueue)) )
|
||||
{
|
||||
vlog_error( "Failure in clFinish\n" );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
uint64_t currentTime = ReadTime() - startTime;
|
||||
if( currentTime < bestTime )
|
||||
bestTime = currentTime;
|
||||
doubleTime[ vectorSize ] += currentTime;
|
||||
}
|
||||
if( bestTime < min_double_time[ vectorSize ] )
|
||||
min_double_time[ vectorSize ] = bestTime;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( gReportTimes )
|
||||
{
|
||||
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
vlog_perf( SubtractTime( time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0, "average us/elem", "roundTrip avg. (vector size: %d)", (g_arrVecSizes[vectorSize]) );
|
||||
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
vlog_perf( SubtractTime( min_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0, "best us/elem", "roundTrip best (vector size: %d)", (g_arrVecSizes[vectorSize]) );
|
||||
if( gTestDouble )
|
||||
{
|
||||
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
vlog_perf( SubtractTime( doubleTime[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0, "average us/elem (double)", "roundTrip avg. d (vector size: %d)", (g_arrVecSizes[vectorSize]) );
|
||||
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
vlog_perf( SubtractTime( min_double_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0, "best us/elem (double)", "roundTrip best d (vector size: %d)", (g_arrVecSizes[vectorSize]) );
|
||||
}
|
||||
}
|
||||
|
||||
exit:
|
||||
//clean up
|
||||
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
{
|
||||
clReleaseKernel( kernels[ vectorSize ] );
|
||||
clReleaseProgram( programs[ vectorSize ] );
|
||||
if( gTestDouble )
|
||||
{
|
||||
clReleaseKernel( doubleKernels[ vectorSize ] );
|
||||
clReleaseProgram( doublePrograms[ vectorSize ] );
|
||||
}
|
||||
}
|
||||
|
||||
gTestCount++;
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
628
test_conformance/half/Test_vLoadHalf.c
Normal file
628
test_conformance/half/Test_vLoadHalf.c
Normal file
@@ -0,0 +1,628 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <string.h>
|
||||
#include "cl_utils.h"
|
||||
#include "tests.h"
|
||||
|
||||
extern const char *addressSpaceNames[];
|
||||
|
||||
static inline float half2float( cl_ushort us )
|
||||
{
|
||||
uint32_t u = us;
|
||||
uint32_t sign = (u << 16) & 0x80000000;
|
||||
int32_t exponent = (u & 0x7c00) >> 10;
|
||||
uint32_t mantissa = (u & 0x03ff) << 13;
|
||||
union{ unsigned int u; float f;}uu;
|
||||
|
||||
if( exponent == 0 )
|
||||
{
|
||||
if( mantissa == 0 )
|
||||
return sign ? -0.0f : 0.0f;
|
||||
|
||||
int shift = __builtin_clz( mantissa ) - 8;
|
||||
exponent -= shift-1;
|
||||
mantissa <<= shift;
|
||||
mantissa &= 0x007fffff;
|
||||
}
|
||||
else
|
||||
if( exponent == 31)
|
||||
{
|
||||
uu.u = mantissa | sign;
|
||||
if( mantissa )
|
||||
uu.u |= 0x7fc00000;
|
||||
else
|
||||
uu.u |= 0x7f800000;
|
||||
|
||||
return uu.f;
|
||||
}
|
||||
|
||||
exponent += 127 - 15;
|
||||
exponent <<= 23;
|
||||
|
||||
exponent |= mantissa;
|
||||
uu.u = exponent | sign;
|
||||
|
||||
return uu.f;
|
||||
}
|
||||
|
||||
int Test_vLoadHalf_private( bool aligned );
|
||||
|
||||
int Test_vLoadHalf_private( bool aligned )
|
||||
{
|
||||
cl_int error;
|
||||
int vectorSize;
|
||||
cl_program programs[kVectorSizeCount+kStrangeVectorSizeCount][4] = {{0}};
|
||||
cl_kernel kernels[kVectorSizeCount+kStrangeVectorSizeCount][4] = {{0}};
|
||||
uint64_t time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
|
||||
uint64_t min_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
|
||||
size_t q;
|
||||
|
||||
memset( min_time, -1, sizeof( min_time ) );
|
||||
|
||||
vlog( "Testing vload%s_half\n", aligned ? "a" : "" );
|
||||
fflush( stdout );
|
||||
const char *vector_size_names[] = {"1", "2", "4", "8", "16", "3"};
|
||||
|
||||
int minVectorSize = kMinVectorSize;
|
||||
// There is no aligned scalar vloada_half in CL 1.1
|
||||
#if ! defined( CL_VERSION_1_1 ) && ! defined(__APPLE__)
|
||||
vlog("Note: testing vloada_half.\n");
|
||||
if (aligned && minVectorSize == 0)
|
||||
minVectorSize = 1;
|
||||
#endif
|
||||
|
||||
for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
{
|
||||
|
||||
int effectiveVectorSize = g_arrVecSizes[vectorSize];
|
||||
if(effectiveVectorSize == 3 && aligned) {
|
||||
effectiveVectorSize = 4;
|
||||
}
|
||||
const char *source[] = {
|
||||
"__kernel void test( const __global half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( i, p );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *sourceV3[] = {
|
||||
"__kernel void test( const __global half *p, __global float *f,\n"
|
||||
" uint extra_last_thread)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" size_t last_i = get_global_size(0)-1;\n"
|
||||
" if(last_i == i && extra_last_thread != 0) {\n"
|
||||
" if(extra_last_thread ==2) {\n"
|
||||
" f[3*i+1] = vload_half(3*i+1, p);\n"
|
||||
" }\n"
|
||||
" f[3*i] = vload_half(3*i, p);\n"
|
||||
" } else {\n"
|
||||
" vstore3(vload_half3( i, p ),i,f);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *sourceV3aligned[] = {
|
||||
"__kernel void test( const __global half *p, __global float3 *f )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" f[i] = vloada_half3( i, p );\n"
|
||||
" ((__global float *)f)[4*i+3] = vloada_half(4*i+3,p);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *source_private1[] = {
|
||||
"__kernel void test( const __global half *p, __global float *f )\n"
|
||||
"{\n"
|
||||
" __private ushort data[1];\n"
|
||||
" __private half* hdata_p = (__private half*) data;\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" data[0] = ((__global ushort*)p)[i];\n"
|
||||
" f[i] = vload", (aligned ? "a" : ""), "_half( 0, hdata_p );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *source_private2[] = {
|
||||
"__kernel void test( const __global half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
|
||||
"{\n"
|
||||
" __private ", align_types[vectorSize], " data[", vector_size_names[vectorSize], "/", align_divisors[vectorSize], "];\n"
|
||||
" __private half* hdata_p = (__private half*) data;\n"
|
||||
" __global ", align_types[vectorSize], "* i_p = (__global ", align_types[vectorSize], "*)p;\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" int k;\n"
|
||||
" for (k=0; k<",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"; k++)\n"
|
||||
" data[k] = i_p[i+k];\n"
|
||||
" f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( 0, hdata_p );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *source_privateV3[] = {
|
||||
"__kernel void test( const __global half *p, __global float *f,"
|
||||
" uint extra_last_thread )\n"
|
||||
"{\n"
|
||||
" __private ushort data[3];\n"
|
||||
" __private half* hdata_p = (__private half*) data;\n"
|
||||
" __global ushort* i_p = (__global ushort*)p;\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" int k;\n"
|
||||
// " data = vload3(i, i_p);\n"
|
||||
" size_t last_i = get_global_size(0)-1;\n"
|
||||
" if(last_i == i && extra_last_thread != 0) {\n"
|
||||
" if(extra_last_thread ==2) {\n"
|
||||
" f[3*i+1] = vload_half(3*i+1, p);\n"
|
||||
" }\n"
|
||||
" f[3*i] = vload_half(3*i, p);\n"
|
||||
" } else {\n"
|
||||
" for (k=0; k<3; k++)\n"
|
||||
" data[k] = i_p[i*3+k];\n"
|
||||
" vstore3(vload_half3( 0, hdata_p ), i, f);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *source_privateV3aligned[] = {
|
||||
"__kernel void test( const __global half *p, __global float3 *f )\n"
|
||||
"{\n"
|
||||
" ushort4 data[4];\n" // declare as vector for alignment. Make four to check to see vloada_half3 index is working.
|
||||
" half* hdata_p = (half*) &data;\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" global ushort* i_p = (global ushort*)p + i * 4;\n"
|
||||
" int offset = i & 3;\n"
|
||||
" data[offset] = (ushort4)( i_p[0], i_p[1], i_p[2], USHRT_MAX ); \n"
|
||||
" data[offset^1] = USHRT_MAX; \n"
|
||||
" data[offset^2] = USHRT_MAX; \n"
|
||||
" data[offset^3] = USHRT_MAX; \n"
|
||||
// test vloada_half3
|
||||
" f[i] = vloada_half3( offset, hdata_p );\n"
|
||||
// Fill in the 4th value so we don't have to special case this code elsewhere in the test.
|
||||
" mem_fence(CLK_GLOBAL_MEM_FENCE );\n"
|
||||
" ((__global float *)f)[4*i+3] = vload_half(4*i+3, p);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
char local_buf_size[10];
|
||||
|
||||
sprintf(local_buf_size, "%lld", (uint64_t)((effectiveVectorSize))*gWorkGroupSize);
|
||||
const char *source_local1[] = {
|
||||
"__kernel void test( const __global half *p, __global float *f )\n"
|
||||
"{\n"
|
||||
" __local ushort data[",local_buf_size,"];\n"
|
||||
" __local half* hdata_p = (__local half*) data;\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" size_t lid = get_local_id(0);\n"
|
||||
" data[lid] = ((__global ushort*)p)[i];\n"
|
||||
" f[i] = vload", aligned ? "a" : "", "_half( lid, hdata_p );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *source_local2[] = {
|
||||
"__kernel void test( const __global half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
|
||||
"{\n"
|
||||
" __local ", align_types[vectorSize], " data[", local_buf_size, "/", align_divisors[vectorSize], "];\n"
|
||||
" __local half* hdata_p = (__local half*) data;\n"
|
||||
" __global ", align_types[vectorSize], "* i_p = (__global ", align_types[vectorSize],"*)p;\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" size_t lid = get_local_id(0);\n"
|
||||
" int k;\n"
|
||||
" for (k=0; k<",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"; k++)\n"
|
||||
" data[lid*",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"+k] = i_p[i*",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"+k];\n"
|
||||
" f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( lid, hdata_p );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *source_localV3[] = {
|
||||
"__kernel void test( const __global half *p, __global float *f,\n"
|
||||
" uint extra_last_thread)\n"
|
||||
"{\n"
|
||||
" __local ushort data[", local_buf_size,"];\n"
|
||||
" __local half* hdata_p = (__local half*) data;\n"
|
||||
" __global ushort* i_p = (__global ushort*)p;\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" size_t last_i = get_global_size(0)-1;\n"
|
||||
" size_t lid = get_local_id(0);\n"
|
||||
" int k;\n"
|
||||
" if(last_i == i && extra_last_thread != 0) {\n"
|
||||
" if(extra_last_thread ==2) {\n"
|
||||
" f[3*i+1] = vload_half(3*i+1, p);\n"
|
||||
" }\n"
|
||||
" f[3*i] = vload_half(3*i, p);\n"
|
||||
" } else {\n"
|
||||
" for (k=0; k<3; k++)\n"
|
||||
" data[lid*3+k] = i_p[i*3+k];\n"
|
||||
" vstore3( vload_half3( lid, hdata_p ),i,f);\n"
|
||||
" };\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *source_localV3aligned[] = {
|
||||
"__kernel void test( const __global half *p, __global float3 *f )\n"
|
||||
"{\n"
|
||||
" __local ushort data[", local_buf_size,"];\n"
|
||||
" __local half* hdata_p = (__local half*) data;\n"
|
||||
" __global ushort* i_p = (__global ushort*)p;\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" size_t lid = get_local_id(0);\n"
|
||||
" int k;\n"
|
||||
" for (k=0; k<4; k++)\n"
|
||||
" data[lid*4+k] = i_p[i*4+k];\n"
|
||||
" f[i] = vloada_half3( lid, hdata_p );\n"
|
||||
" ((__global float *)f)[4*i+3] = vload_half(lid*4+3, hdata_p);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *source_constant[] = {
|
||||
"__kernel void test( __constant half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( i, p );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *source_constantV3[] = {
|
||||
"__kernel void test( __constant half *p, __global float *f,\n"
|
||||
" uint extra_last_thread)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" size_t last_i = get_global_size(0)-1;\n"
|
||||
" if(last_i == i && extra_last_thread != 0) {\n"
|
||||
" if(extra_last_thread ==2) {\n"
|
||||
" f[3*i+1] = vload_half(3*i+1, p);\n"
|
||||
" }\n"
|
||||
" f[3*i] = vload_half(3*i, p);\n"
|
||||
" } else {\n"
|
||||
" vstore3(vload_half",vector_size_name_extensions[vectorSize],"( i, p ), i, f);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *source_constantV3aligned[] = {
|
||||
"__kernel void test( __constant half *p, __global float3 *f )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" f[i] = vloada_half3( i, p );\n"
|
||||
" ((__global float *)f)[4*i+3] = vload_half(4*i+3,p);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
if(g_arrVecSizes[vectorSize] != 3) {
|
||||
programs[vectorSize][0] = MakeProgram( source, sizeof( source) / sizeof( source[0]) );
|
||||
if( NULL == programs[ vectorSize ][0] ) {
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create program.\n" );
|
||||
for ( q= 0; q < sizeof( source) / sizeof( source[0]); q++)
|
||||
vlog_error("%s", source[q]);
|
||||
return -1;
|
||||
} else {
|
||||
}
|
||||
} else if(aligned) {
|
||||
programs[vectorSize][0] = MakeProgram( sourceV3aligned, sizeof( sourceV3aligned) / sizeof( sourceV3aligned[0]) );
|
||||
if( NULL == programs[ vectorSize ][0] ) {
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create program.\n" );
|
||||
for ( q= 0; q < sizeof( sourceV3aligned) / sizeof( sourceV3aligned[0]); q++)
|
||||
vlog_error("%s", sourceV3aligned[q]);
|
||||
return -1;
|
||||
} else {
|
||||
}
|
||||
} else {
|
||||
programs[vectorSize][0] = MakeProgram( sourceV3, sizeof( sourceV3) / sizeof( sourceV3[0]) );
|
||||
if( NULL == programs[ vectorSize ][0] ) {
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create program.\n" );
|
||||
for ( q= 0; q < sizeof( sourceV3) / sizeof( sourceV3[0]); q++)
|
||||
vlog_error("%s", sourceV3[q]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
kernels[ vectorSize ][0] = clCreateKernel( programs[ vectorSize ][0], "test", &error );
|
||||
if( NULL == kernels[vectorSize][0] )
|
||||
{
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
|
||||
return -2;
|
||||
}
|
||||
|
||||
const char** source_ptr;
|
||||
uint32_t source_size;
|
||||
if (vectorSize == 0) {
|
||||
source_ptr = source_private1;
|
||||
source_size = sizeof( source_private1) / sizeof( source_private1[0]);
|
||||
} else if(g_arrVecSizes[vectorSize] == 3) {
|
||||
if(aligned) {
|
||||
source_ptr = source_privateV3aligned;
|
||||
source_size = sizeof( source_privateV3aligned) / sizeof( source_privateV3aligned[0]);
|
||||
} else {
|
||||
source_ptr = source_privateV3;
|
||||
source_size = sizeof( source_privateV3) / sizeof( source_privateV3[0]);
|
||||
}
|
||||
} else {
|
||||
source_ptr = source_private2;
|
||||
source_size = sizeof( source_private2) / sizeof( source_private2[0]);
|
||||
}
|
||||
programs[vectorSize][1] = MakeProgram( source_ptr, source_size );
|
||||
if( NULL == programs[ vectorSize ][1] )
|
||||
{
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create private program.\n" );
|
||||
for ( q= 0; q < source_size; q++)
|
||||
vlog_error("%s", source_ptr[q]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernels[ vectorSize ][1] = clCreateKernel( programs[ vectorSize ][1], "test", &error );
|
||||
if( NULL == kernels[vectorSize][1] )
|
||||
{
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create private kernel. (%d)\n", error );
|
||||
return -2;
|
||||
}
|
||||
|
||||
if (vectorSize == 0) {
|
||||
source_ptr = source_local1;
|
||||
source_size = sizeof( source_local1) / sizeof( source_local1[0]);
|
||||
} else if(g_arrVecSizes[vectorSize] == 3) {
|
||||
if(aligned) {
|
||||
source_ptr = source_localV3aligned;
|
||||
source_size = sizeof(source_localV3aligned)/sizeof(source_localV3aligned[0]);
|
||||
} else {
|
||||
source_ptr = source_localV3;
|
||||
source_size = sizeof(source_localV3)/sizeof(source_localV3[0]);
|
||||
}
|
||||
} else {
|
||||
source_ptr = source_local2;
|
||||
source_size = sizeof( source_local2) / sizeof( source_local2[0]);
|
||||
}
|
||||
programs[vectorSize][2] = MakeProgram( source_ptr, source_size );
|
||||
if( NULL == programs[ vectorSize ][2] )
|
||||
{
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create local program.\n" );
|
||||
for ( q= 0; q < source_size; q++)
|
||||
vlog_error("%s", source_ptr[q]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernels[ vectorSize ][2] = clCreateKernel( programs[ vectorSize ][2], "test", &error );
|
||||
if( NULL == kernels[vectorSize][2] )
|
||||
{
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create local kernel. (%d)\n", error );
|
||||
return -2;
|
||||
}
|
||||
|
||||
if(g_arrVecSizes[vectorSize] == 3) {
|
||||
if(aligned) {
|
||||
programs[vectorSize][3] = MakeProgram( source_constantV3aligned, sizeof(source_constantV3aligned) / sizeof( source_constantV3aligned[0]) );
|
||||
if( NULL == programs[ vectorSize ][3] )
|
||||
{
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create constant program.\n" );
|
||||
for ( q= 0; q < sizeof( source_constantV3aligned) / sizeof( source_constantV3aligned[0]); q++)
|
||||
vlog_error("%s", source_constantV3aligned[q]);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
programs[vectorSize][3] = MakeProgram( source_constantV3, sizeof(source_constantV3) / sizeof( source_constantV3[0]) );
|
||||
if( NULL == programs[ vectorSize ][3] )
|
||||
{
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create constant program.\n" );
|
||||
for ( q= 0; q < sizeof( source_constantV3) / sizeof( source_constantV3[0]); q++)
|
||||
vlog_error("%s", source_constantV3[q]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
programs[vectorSize][3] = MakeProgram( source_constant, sizeof(source_constant) / sizeof( source_constant[0]) );
|
||||
if( NULL == programs[ vectorSize ][3] )
|
||||
{
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create constant program.\n" );
|
||||
for ( q= 0; q < sizeof( source_constant) / sizeof( source_constant[0]); q++)
|
||||
vlog_error("%s", source_constant[q]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
kernels[ vectorSize ][3] = clCreateKernel( programs[ vectorSize ][3], "test", &error );
|
||||
if( NULL == kernels[vectorSize][3] )
|
||||
{
|
||||
gFailCount++;
|
||||
vlog_error( "\t\tFAILED -- Failed to create constant kernel. (%d)\n", error );
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out how many elements are in a work block
|
||||
size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
|
||||
size_t blockCount = getBufferSize(gDevice) / elementSize; // elementSize is power of 2
|
||||
uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of things of size cl_half
|
||||
|
||||
// we handle 64-bit types a bit differently.
|
||||
if( lastCase == 0 )
|
||||
lastCase = 0x100000000ULL;
|
||||
|
||||
|
||||
uint64_t i, j;
|
||||
uint64_t printMask = (lastCase >> 4) - 1;
|
||||
uint32_t count = 0;
|
||||
error = 0;
|
||||
int addressSpace;
|
||||
// int reported_vector_skip = 0;
|
||||
|
||||
for( i = 0; i < (uint64_t)lastCase; i += blockCount )
|
||||
{
|
||||
count = (uint32_t) MIN( blockCount, lastCase - i );
|
||||
|
||||
//Init the input stream
|
||||
uint16_t *p = (uint16_t *)gIn_half;
|
||||
for( j = 0; j < count; j++ )
|
||||
p[j] = j + i;
|
||||
|
||||
if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_half, CL_TRUE, 0, count * sizeof( cl_half ), gIn_half, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error( "Failure in clWriteArray\n" );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
//create the reference result
|
||||
const unsigned short *s = (const unsigned short *)gIn_half;
|
||||
float *d = (float *)gOut_single_reference;
|
||||
for( j = 0; j < count; j++ )
|
||||
d[j] = half2float( s[j] );
|
||||
|
||||
//Check the vector lengths
|
||||
for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
{ // here we loop through vector sizes, 3 is last
|
||||
|
||||
for ( addressSpace = 0; addressSpace < 4; addressSpace++) {
|
||||
uint32_t pattern = 0x7fffdead;
|
||||
|
||||
/*
|
||||
if (addressSpace == 3) {
|
||||
vlog("Note: skipping address space %s due to small buffer size.\n", addressSpaceNames[addressSpace]);
|
||||
continue;
|
||||
}
|
||||
*/
|
||||
memset_pattern4( gOut_single, &pattern, getBufferSize(gDevice));
|
||||
if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_single, CL_TRUE, 0, count * sizeof( float ), gOut_single, 0, NULL, NULL)) )
|
||||
{
|
||||
vlog_error( "Failure in clWriteArray\n" );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if(g_arrVecSizes[vectorSize] == 3 && !aligned) {
|
||||
// now we need to add the extra const argument for how
|
||||
// many elements the last thread should take care of.
|
||||
}
|
||||
|
||||
// okay, here is where we have to be careful
|
||||
if( (error = RunKernel( kernels[vectorSize][addressSpace], gInBuffer_half, gOutBuffer_single, numVecs(count, vectorSize, aligned) ,
|
||||
runsOverBy(count, vectorSize, aligned) ) ) )
|
||||
{
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer_single, CL_TRUE, 0, count * sizeof( float ), gOut_single, 0, NULL, NULL)) )
|
||||
{
|
||||
vlog_error( "Failure in clReadArray\n" );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if( memcmp( gOut_single, gOut_single_reference, count * sizeof( float )) )
|
||||
{
|
||||
uint32_t *u1 = (uint32_t *)gOut_single;
|
||||
uint32_t *u2 = (uint32_t *)gOut_single_reference;
|
||||
float *f1 = (float *)gOut_single;
|
||||
float *f2 = (float *)gOut_single_reference;
|
||||
for( j = 0; j < count; j++ )
|
||||
{
|
||||
if(isnan(f1[j]) && isnan(f2[j])) // both are nan dont compare them
|
||||
continue;
|
||||
if( u1[j] != u2[j])
|
||||
{
|
||||
vlog_error( " %lld) (of %lld) Failure at 0x%4.4x: %a vs *%a (0x%8.8x vs *0x%8.8x) vector_size = %d (%s) address space = %s, load is %s\n",
|
||||
j, (uint64_t)count, ((unsigned short*)gIn_half)[j], f1[j], f2[j], u1[j], u2[j], (g_arrVecSizes[vectorSize]),
|
||||
vector_size_names[vectorSize], addressSpaceNames[addressSpace],
|
||||
(aligned?"aligned":"unaligned"));
|
||||
gFailCount++;
|
||||
break; // goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( gReportTimes && addressSpace == 0)
|
||||
{
|
||||
//Run again for timing
|
||||
for( j = 0; j < 100; j++ )
|
||||
{
|
||||
uint64_t startTime = ReadTime();
|
||||
error =
|
||||
RunKernel( kernels[vectorSize][addressSpace], gInBuffer_half, gOutBuffer_single, numVecs(count, vectorSize, aligned) ,
|
||||
runsOverBy(count, vectorSize, aligned));
|
||||
if(error)
|
||||
{
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if( (error = clFinish(gQueue)) )
|
||||
{
|
||||
vlog_error( "Failure in clFinish\n" );
|
||||
gFailCount++;
|
||||
goto exit;
|
||||
}
|
||||
uint64_t currentTime = ReadTime() - startTime;
|
||||
time[ vectorSize ] += currentTime;
|
||||
if( currentTime < min_time[ vectorSize ] )
|
||||
min_time[ vectorSize ] = currentTime ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( ((i+blockCount) & ~printMask) == (i+blockCount) )
|
||||
{
|
||||
vlog( "." );
|
||||
fflush( stdout );
|
||||
}
|
||||
}
|
||||
|
||||
vlog( "\tPassed\n" );
|
||||
|
||||
if( gReportTimes )
|
||||
{
|
||||
for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
vlog_perf( SubtractTime( time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * 100), 0,
|
||||
"average us/elem", "vLoad%sHalf avg. (%s, vector size: %d)", ( (aligned) ? "a" : ""), addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
|
||||
for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
vlog_perf( SubtractTime( min_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0,
|
||||
"best us/elem", "vLoad%sHalf best (%s vector size: %d)", ( (aligned) ? "a" : ""), addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
|
||||
}
|
||||
|
||||
exit:
|
||||
//clean up
|
||||
for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
|
||||
{
|
||||
for ( addressSpace = 0; addressSpace < 3; addressSpace++) {
|
||||
clReleaseKernel( kernels[ vectorSize ][addressSpace] );
|
||||
clReleaseProgram( programs[ vectorSize ][addressSpace] );
|
||||
}
|
||||
}
|
||||
|
||||
gTestCount++;
|
||||
return error;
|
||||
}
|
||||
|
||||
int Test_vload_half( void )
|
||||
{
|
||||
return Test_vLoadHalf_private( false );
|
||||
}
|
||||
|
||||
int Test_vloada_half( void )
|
||||
{
|
||||
return Test_vLoadHalf_private( true );
|
||||
}
|
||||
|
||||
1947
test_conformance/half/Test_vStoreHalf.c
Normal file
1947
test_conformance/half/Test_vStoreHalf.c
Normal file
File diff suppressed because it is too large
Load Diff
493
test_conformance/half/cl_utils.c
Normal file
493
test_conformance/half/cl_utils.c
Normal file
@@ -0,0 +1,493 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "cl_utils.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined (_WIN32)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include "test_config.h"
|
||||
#include "string.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
|
||||
#define HALF_MIN 1.0p-14
|
||||
|
||||
|
||||
const char *vector_size_name_extensions[kVectorSizeCount+kStrangeVectorSizeCount] = { "", "2", "4", "8", "16", "3" };
|
||||
const char *vector_size_strings[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2", "4", "8", "16", "3" };
|
||||
const char *align_divisors[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2", "4", "8", "16", "4" };
|
||||
const char *align_types[kVectorSizeCount+kStrangeVectorSizeCount] = { "half", "int", "int2", "int4", "int8", "int2" };
|
||||
|
||||
|
||||
void *gIn_half = NULL;
|
||||
void *gOut_half = NULL;
|
||||
void *gOut_half_reference = NULL;
|
||||
void *gOut_half_reference_double = NULL;
|
||||
void *gIn_single = NULL;
|
||||
void *gOut_single = NULL;
|
||||
void *gOut_single_reference = NULL;
|
||||
void *gIn_double = NULL;
|
||||
// void *gOut_double = NULL;
|
||||
// void *gOut_double_reference = NULL;
|
||||
cl_mem gInBuffer_half = NULL;
|
||||
cl_mem gOutBuffer_half = NULL;
|
||||
cl_mem gInBuffer_single = NULL;
|
||||
cl_mem gOutBuffer_single = NULL;
|
||||
cl_mem gInBuffer_double = NULL;
|
||||
// cl_mem gOutBuffer_double = NULL;
|
||||
|
||||
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
|
||||
cl_device_id gDevice = NULL;
|
||||
cl_context gContext = NULL;
|
||||
cl_command_queue gQueue = NULL;
|
||||
uint32_t gDeviceFrequency = 0;
|
||||
uint32_t gComputeDevices = 0;
|
||||
size_t gMaxThreadGroupSize = 0;
|
||||
size_t gWorkGroupSize = 0;
|
||||
int gTestCount = 0;
|
||||
int gFailCount = 0;
|
||||
bool gWimpyMode = false;
|
||||
int gTestDouble = 0;
|
||||
uint32_t gDeviceIndex = 0;
|
||||
int gIsEmbedded = 0;
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
int gReportTimes = 1;
|
||||
#else
|
||||
int gReportTimes = 0;
|
||||
#endif
|
||||
|
||||
#pragma mark -
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
|
||||
{
|
||||
vlog( "%s\n", errinfo );
|
||||
}
|
||||
|
||||
int InitCL( void )
|
||||
{
|
||||
cl_platform_id platform = NULL;
|
||||
size_t configSize = sizeof( gComputeDevices );
|
||||
int error;
|
||||
|
||||
if( (error = clGetPlatformIDs(1, &platform, NULL) ) )
|
||||
return error;
|
||||
|
||||
// gDeviceType & gDeviceIndex are globals set in ParseArgs
|
||||
|
||||
cl_uint ndevices;
|
||||
if ( (error = clGetDeviceIDs(platform, gDeviceType, 0, NULL, &ndevices)) )
|
||||
return error;
|
||||
|
||||
cl_device_id *gDeviceList = (cl_device_id *)malloc(ndevices*sizeof( cl_device_id ));
|
||||
if ( gDeviceList == 0 )
|
||||
{
|
||||
log_error("Unable to allocate memory for devices\n");
|
||||
return -1;
|
||||
}
|
||||
if( (error = clGetDeviceIDs(platform, gDeviceType, ndevices, gDeviceList, NULL )) )
|
||||
{
|
||||
free( gDeviceList );
|
||||
return error;
|
||||
}
|
||||
|
||||
gDevice = gDeviceList[gDeviceIndex];
|
||||
free( gDeviceList );
|
||||
|
||||
#if MULTITHREAD
|
||||
if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_COMPUTE_UNITS, configSize, &gComputeDevices, NULL )) )
|
||||
#endif
|
||||
gComputeDevices = 1;
|
||||
|
||||
configSize = sizeof( gMaxThreadGroupSize );
|
||||
if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_WORK_GROUP_SIZE, configSize, &gMaxThreadGroupSize, NULL )) )
|
||||
gMaxThreadGroupSize = 1;
|
||||
|
||||
// Use only one-eighth the work group size
|
||||
if (gMaxThreadGroupSize > 8)
|
||||
gWorkGroupSize = gMaxThreadGroupSize / 8;
|
||||
else
|
||||
gWorkGroupSize = gMaxThreadGroupSize;
|
||||
|
||||
configSize = sizeof( gDeviceFrequency );
|
||||
if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency, NULL )) )
|
||||
gDeviceFrequency = 1;
|
||||
|
||||
// Check extensions
|
||||
size_t extSize = 0;
|
||||
int hasDouble = 0;
|
||||
if((error = clGetDeviceInfo( gDevice, CL_DEVICE_EXTENSIONS, 0, NULL, &extSize)))
|
||||
{ vlog_error( "Unable to get device extension string to see if double present. (%d) \n", error ); }
|
||||
else
|
||||
{
|
||||
char *ext = (char *)malloc( extSize );
|
||||
if( NULL == ext )
|
||||
{ vlog_error( "malloc failed at %s:%d\nUnable to determine if double present.\n", __FILE__, __LINE__ ); }
|
||||
else
|
||||
{
|
||||
if((error = clGetDeviceInfo( gDevice, CL_DEVICE_EXTENSIONS, extSize, ext, NULL)))
|
||||
{ vlog_error( "Unable to get device extension string to see if double present. (%d) \n", error ); }
|
||||
else
|
||||
{
|
||||
if( strstr( ext, "cl_khr_fp64" ))
|
||||
hasDouble = 1;
|
||||
}
|
||||
free(ext);
|
||||
}
|
||||
}
|
||||
gTestDouble ^= hasDouble;
|
||||
|
||||
|
||||
|
||||
//detect whether profile of the device is embedded
|
||||
char profile[64] = "";
|
||||
if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) )
|
||||
{
|
||||
vlog_error( "Unable to get device CL DEVICE PROFILE string. (%d) \n", error );
|
||||
}
|
||||
else if( strstr(profile, "EMBEDDED_PROFILE" ) )
|
||||
{
|
||||
gIsEmbedded = 1;
|
||||
}
|
||||
|
||||
vlog( "%d compute devices at %f GHz\n", gComputeDevices, (double) gDeviceFrequency / 1000. );
|
||||
vlog( "Max thread group size is %lld.\n", (uint64_t) gMaxThreadGroupSize );
|
||||
|
||||
gContext = clCreateContext( NULL, 1, &gDevice, notify_callback, NULL, &error );
|
||||
if( NULL == gContext )
|
||||
{
|
||||
vlog_error( "clCreateDeviceGroup failed. (%d)\n", error );
|
||||
return -1;
|
||||
}
|
||||
|
||||
gQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
|
||||
if( NULL == gQueue )
|
||||
{
|
||||
vlog_error( "clCreateContext failed. (%d)\n", error );
|
||||
return -2;
|
||||
}
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
// FIXME: use clProtectedArray
|
||||
#endif
|
||||
//Allocate buffers
|
||||
gIn_half = malloc( getBufferSize(gDevice)/2 );
|
||||
gOut_half = malloc( BUFFER_SIZE/2 );
|
||||
gOut_half_reference = malloc( BUFFER_SIZE/2 );
|
||||
gOut_half_reference_double = malloc( BUFFER_SIZE/2 );
|
||||
gIn_single = malloc( BUFFER_SIZE );
|
||||
gOut_single = malloc( getBufferSize(gDevice) );
|
||||
gOut_single_reference = malloc( getBufferSize(gDevice) );
|
||||
gIn_double = malloc( 2*BUFFER_SIZE );
|
||||
// gOut_double = malloc( (2*getBufferSize(gDevice)) );
|
||||
// gOut_double_reference = malloc( (2*getBufferSize(gDevice)) );
|
||||
|
||||
if ( NULL == gIn_half ||
|
||||
NULL == gOut_half ||
|
||||
NULL == gOut_half_reference ||
|
||||
NULL == gOut_half_reference_double ||
|
||||
NULL == gIn_single ||
|
||||
NULL == gOut_single ||
|
||||
NULL == gOut_single_reference ||
|
||||
NULL == gIn_double // || NULL == gOut_double || NULL == gOut_double_reference
|
||||
)
|
||||
return -3;
|
||||
|
||||
gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, getBufferSize(gDevice) / 2, NULL, &error);
|
||||
if( gInBuffer_half == NULL )
|
||||
{
|
||||
vlog_error( "clCreateArray failed for input (%d)\n", error );
|
||||
return -4;
|
||||
}
|
||||
|
||||
gInBuffer_single = clCreateBuffer(gContext, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &error );
|
||||
if( gInBuffer_single == NULL )
|
||||
{
|
||||
vlog_error( "clCreateArray failed for input (%d)\n", error );
|
||||
return -4;
|
||||
}
|
||||
|
||||
gInBuffer_double = clCreateBuffer(gContext, CL_MEM_READ_ONLY, BUFFER_SIZE*2, NULL, &error );
|
||||
if( gInBuffer_double == NULL )
|
||||
{
|
||||
vlog_error( "clCreateArray failed for input (%d)\n", error );
|
||||
return -4;
|
||||
}
|
||||
|
||||
gOutBuffer_half = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, BUFFER_SIZE/2, NULL, &error );
|
||||
if( gOutBuffer_half == NULL )
|
||||
{
|
||||
vlog_error( "clCreateArray failed for output (%d)\n", error );
|
||||
return -5;
|
||||
}
|
||||
|
||||
gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, getBufferSize(gDevice), NULL, &error );
|
||||
if( gOutBuffer_single == NULL )
|
||||
{
|
||||
vlog_error( "clCreateArray failed for output (%d)\n", error );
|
||||
return -5;
|
||||
}
|
||||
|
||||
#if 0
|
||||
gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*getBufferSize(gDevice)), NULL, &error );
|
||||
if( gOutBuffer_double == NULL )
|
||||
{
|
||||
vlog_error( "clCreateArray failed for output (%d)\n", error );
|
||||
return -5;
|
||||
}
|
||||
#endif
|
||||
|
||||
char string[16384];
|
||||
vlog( "\nCompute Device info:\n" );
|
||||
error = clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(string), string, NULL);
|
||||
vlog( "\tDevice Name: %s\n", string );
|
||||
error = clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(string), string, NULL);
|
||||
vlog( "\tVendor: %s\n", string );
|
||||
error = clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(string), string, NULL);
|
||||
vlog( "\tDevice Version: %s\n", string );
|
||||
error = clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(string), string, NULL);
|
||||
vlog( "\tOpenCL C Version: %s\n", string );
|
||||
error = clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(string), string, NULL);
|
||||
vlog( "\tDriver Version: %s\n", string );
|
||||
vlog( "\tProcessing with %d devices\n", gComputeDevices );
|
||||
vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
|
||||
vlog( "\tHas double? %s\n", hasDouble ? "YES" : "NO" );
|
||||
vlog( "\tTest double? %s\n", gTestDouble ? "YES" : "NO" );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
cl_program MakeProgram( const char *source[], int count )
|
||||
{
|
||||
int error;
|
||||
int i;
|
||||
|
||||
//create the program
|
||||
cl_program program;
|
||||
error = create_single_kernel_helper_create_program(gContext, &program, (cl_uint)count, source);
|
||||
if( NULL == program )
|
||||
{
|
||||
vlog_error( "\t\tFAILED -- Failed to create program. (%d)\n", error );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// build it
|
||||
if( (error = clBuildProgram( program, 1, &gDevice, NULL, NULL, NULL )) )
|
||||
{
|
||||
size_t len;
|
||||
char buffer[16384];
|
||||
|
||||
vlog_error("\t\tFAILED -- clBuildProgramExecutable() failed:\n");
|
||||
clGetProgramBuildInfo(program, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
|
||||
vlog_error("Log: %s\n", buffer);
|
||||
vlog_error("Source :\n");
|
||||
for(i = 0; i < count; ++i) {
|
||||
vlog_error("%s", source[i]);
|
||||
}
|
||||
vlog_error("\n");
|
||||
|
||||
clReleaseProgram( program );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
void ReleaseCL(void)
|
||||
{
|
||||
clReleaseMemObject(gInBuffer_half);
|
||||
clReleaseMemObject(gOutBuffer_half);
|
||||
clReleaseMemObject(gInBuffer_single);
|
||||
clReleaseMemObject(gOutBuffer_single);
|
||||
clReleaseMemObject(gInBuffer_double);
|
||||
// clReleaseMemObject(gOutBuffer_double);
|
||||
clReleaseCommandQueue(gQueue);
|
||||
clReleaseContext(gContext);
|
||||
}
|
||||
|
||||
cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned) {
|
||||
if(aligned && g_arrVecSizes[vectorSizeIdx] == 3) {
|
||||
return count/4;
|
||||
}
|
||||
return (count + g_arrVecSizes[vectorSizeIdx] - 1)/
|
||||
( (g_arrVecSizes[vectorSizeIdx]) );
|
||||
}
|
||||
|
||||
cl_uint runsOverBy(cl_uint count, int vectorSizeIdx, bool aligned) {
|
||||
if(aligned || g_arrVecSizes[vectorSizeIdx] != 3) { return -1; }
|
||||
return count% (g_arrVecSizes[vectorSizeIdx]);
|
||||
}
|
||||
|
||||
void printSource(const char * src[], int len) {
|
||||
int i;
|
||||
for(i = 0; i < len; ++i) {
|
||||
vlog("%s", src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int RunKernel( cl_kernel kernel, void *inBuf, void *outBuf, uint32_t blockCount , int extraArg)
|
||||
{
|
||||
size_t localCount = blockCount;
|
||||
size_t wg_size;
|
||||
int error;
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof inBuf, &inBuf);
|
||||
error |= clSetKernelArg(kernel, 1, sizeof outBuf, &outBuf);
|
||||
|
||||
if(extraArg >= 0) {
|
||||
error |= clSetKernelArg(kernel, 2, sizeof(cl_uint), &extraArg);
|
||||
}
|
||||
|
||||
if( error )
|
||||
{
|
||||
vlog_error( "FAILED -- could not set kernel args\n" );
|
||||
return -3;
|
||||
}
|
||||
|
||||
error = clGetKernelWorkGroupInfo(kernel, gDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof( wg_size ), &wg_size, NULL);
|
||||
if (error)
|
||||
{
|
||||
vlog_error( "FAILED -- could not get kernel work group info\n" );
|
||||
return -4;
|
||||
}
|
||||
|
||||
wg_size = (wg_size > gWorkGroupSize) ? gWorkGroupSize : wg_size;
|
||||
while( localCount % wg_size )
|
||||
wg_size--;
|
||||
|
||||
if( (error = clEnqueueNDRangeKernel( gQueue, kernel, 1, NULL, &localCount, &wg_size, 0, NULL, NULL )) )
|
||||
{
|
||||
vlog_error( "FAILED -- could not execute kernel\n" );
|
||||
return -5;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined (__APPLE__ )
|
||||
|
||||
#include <mach/mach_time.h>
|
||||
|
||||
uint64_t ReadTime( void )
|
||||
{
|
||||
return mach_absolute_time(); // returns time since boot. Ticks have better than microsecond precsion.
|
||||
}
|
||||
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime )
|
||||
{
|
||||
static double conversion = 0.0;
|
||||
|
||||
if( 0.0 == conversion )
|
||||
{
|
||||
mach_timebase_info_data_t info;
|
||||
kern_return_t err = mach_timebase_info( &info );
|
||||
if( 0 == err )
|
||||
conversion = 1e-9 * (double) info.numer / (double) info.denom;
|
||||
}
|
||||
|
||||
return (double) (endTime - startTime) * conversion;
|
||||
}
|
||||
|
||||
#elif defined( _WIN32 ) && defined (_MSC_VER)
|
||||
|
||||
// functions are defined in compat.h
|
||||
|
||||
#else
|
||||
|
||||
//
|
||||
// Please feel free to substitute your own timing facility here.
|
||||
//
|
||||
|
||||
#warning Times are meaningless. No timing facility in place for this platform.
|
||||
uint64_t ReadTime( void )
|
||||
{
|
||||
return 0ULL;
|
||||
}
|
||||
|
||||
// return the difference between two times obtained from ReadTime in seconds
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime )
|
||||
{
|
||||
return INFINITY;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined( __APPLE__ )
|
||||
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
|
||||
{
|
||||
uint32_t pat = ((uint32_t*) src_pattern)[0];
|
||||
size_t count = bytes / 4;
|
||||
size_t i;
|
||||
uint32_t *d = (uint32_t*)dest;
|
||||
|
||||
for( i = 0; i < count; i++ )
|
||||
d[i] = pat;
|
||||
|
||||
d += i;
|
||||
|
||||
bytes &= 3;
|
||||
if( bytes )
|
||||
memcpy( d, src_pattern, bytes );
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t getBufferSize(cl_device_id device_id)
|
||||
{
|
||||
static int s_initialized = 0;
|
||||
static cl_device_id s_device_id;
|
||||
static cl_ulong s_result = 64*1024;
|
||||
|
||||
if(s_initialized == 0 || s_device_id != device_id)
|
||||
{
|
||||
cl_ulong result;
|
||||
cl_int err = clGetDeviceInfo (device_id,
|
||||
CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
|
||||
sizeof(result), (void *)&result,
|
||||
NULL);
|
||||
if(err)
|
||||
{
|
||||
vlog_error("clGetDeviceInfo() failed\n");
|
||||
s_result = 64*1024;
|
||||
goto exit;
|
||||
}
|
||||
result = result / 2;
|
||||
log_info("Const buffer size is %llx (%llu)\n", result, result);
|
||||
s_initialized = 1;
|
||||
s_device_id = device_id;
|
||||
s_result = result;
|
||||
}
|
||||
|
||||
exit:
|
||||
if( s_result > SIZE_MAX )
|
||||
{
|
||||
vlog_error( "ERROR: clGetDeviceInfo is reporting a CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE larger than addressable memory on the host.\n It seems highly unlikely that this is usable, due to the API design.\n" );
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
return (size_t) s_result;
|
||||
}
|
||||
|
||||
cl_ulong getBufferCount(cl_device_id device_id, size_t vecSize, size_t typeSize)
|
||||
{
|
||||
cl_ulong tmp = getBufferSize(device_id);
|
||||
if(vecSize == 3)
|
||||
{
|
||||
return tmp/(cl_ulong)(4*typeSize);
|
||||
}
|
||||
return tmp/(cl_ulong)(vecSize*typeSize);
|
||||
}
|
||||
162
test_conformance/half/cl_utils.h
Normal file
162
test_conformance/half/cl_utils.h
Normal file
@@ -0,0 +1,162 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef CL_UTILS_H
|
||||
#define CL_UTILS_H
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <sys/param.h>
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __MINGW32__
|
||||
#define __mingw_printf printf
|
||||
#endif
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
#include "../../test_common/harness/ThreadPool.h"
|
||||
|
||||
|
||||
|
||||
#include "test_config.h"
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
extern void *gIn_half;
|
||||
extern void *gOut_half;
|
||||
extern void *gOut_half_reference;
|
||||
extern void *gOut_half_reference_double;
|
||||
extern void *gIn_single;
|
||||
extern void *gOut_single;
|
||||
extern void *gOut_single_reference;
|
||||
extern void *gIn_double;
|
||||
// extern void *gOut_double;
|
||||
// extern void *gOut_double_reference;
|
||||
extern cl_mem gInBuffer_half;
|
||||
extern cl_mem gOutBuffer_half;
|
||||
extern cl_mem gInBuffer_single;
|
||||
extern cl_mem gOutBuffer_single;
|
||||
extern cl_mem gInBuffer_double;
|
||||
// extern cl_mem gOutBuffer_double;
|
||||
|
||||
extern uint32_t gDeviceIndex;
|
||||
extern cl_device_type gDeviceType;
|
||||
extern cl_device_id gDevice;
|
||||
extern cl_context gContext;
|
||||
extern cl_command_queue gQueue;
|
||||
extern uint32_t gDeviceFrequency;
|
||||
extern uint32_t gComputeDevices;
|
||||
extern size_t gMaxThreadGroupSize;
|
||||
extern size_t gWorkGroupSize;
|
||||
extern int gTestCount;
|
||||
extern int gFailCount;
|
||||
extern int gTestDouble;
|
||||
extern int gReportTimes;
|
||||
extern int gIsEmbedded;
|
||||
|
||||
// gWimpyMode indicates if we run the test in wimpy mode where we limit the
|
||||
// size of 32 bit ranges to a much smaller set. This is meant to be used
|
||||
// as a smoke test
|
||||
extern bool gWimpyMode;
|
||||
|
||||
uint64_t ReadTime( void );
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime );
|
||||
|
||||
cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned);
|
||||
cl_uint runsOverBy(cl_uint count, int vectorSizeIdx, bool aligned);
|
||||
|
||||
void printSource(const char * src[], int len);
|
||||
|
||||
extern const char *vector_size_name_extensions[kVectorSizeCount+kStrangeVectorSizeCount];
|
||||
extern const char *vector_size_strings[kVectorSizeCount+kStrangeVectorSizeCount];
|
||||
extern const char *align_divisors[kVectorSizeCount+kStrangeVectorSizeCount];
|
||||
extern const char *align_types[kVectorSizeCount+kStrangeVectorSizeCount];
|
||||
|
||||
int InitCL( void );
|
||||
void ReleaseCL( void );
|
||||
int RunKernel( cl_kernel kernel, void *inBuf, void *outBuf, uint32_t blockCount , int extraArg);
|
||||
cl_program MakeProgram( const char *source[], int count );
|
||||
|
||||
#if ! defined( __APPLE__ )
|
||||
extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
|
||||
#endif
|
||||
|
||||
#define STRING( _x ) STRINGIFY( _x )
|
||||
#define STRINGIFY(x) #x
|
||||
|
||||
static inline float as_float(cl_uint u) { union { cl_uint u; float f; }v; v.u = u; return v.f; }
|
||||
static inline double as_double(cl_ulong u) { union { cl_ulong u; double d; }v; v.u = u; return v.d; }
|
||||
|
||||
// used to convert a bucket of bits into a search pattern through double
|
||||
static inline cl_ulong DoubleFromUInt( cl_uint bits );
|
||||
static inline cl_ulong DoubleFromUInt( cl_uint bits )
|
||||
{
|
||||
// split 0x89abcdef to 0x89abcd00000000ef
|
||||
cl_ulong u = ((cl_ulong)(bits & ~0xffU) << 32) | ((cl_ulong)(bits & 0xffU));
|
||||
|
||||
// sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
|
||||
u -= (cl_ulong)((bits & 0x80U) << 1);
|
||||
|
||||
return u;
|
||||
}
|
||||
|
||||
static inline int IsHalfSubnormal( uint16_t x )
|
||||
{
|
||||
return ((x&0x7fffU)-1U) < 0x03ffU;
|
||||
}
|
||||
|
||||
// prevent silent failures due to missing FLT_RADIX
|
||||
#ifndef FLT_RADIX
|
||||
#error FLT_RADIX is not defined by float.h
|
||||
#endif
|
||||
|
||||
static inline int IsFloatSubnormal( double x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ float d; uint32_t u;}u;
|
||||
u.d = fabsf((float) x);
|
||||
return (u.u-1) < 0x007fffffU;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) FLT_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int IsDoubleSubnormal( long double x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ double d; uint64_t u;}u;
|
||||
u.d = fabs((double)x);
|
||||
return (u.u-1) < 0x000fffffffffffffULL;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) DBL_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* CL_UTILS_H */
|
||||
|
||||
|
||||
|
||||
434
test_conformance/half/main.c
Normal file
434
test_conformance/half/main.c
Normal file
@@ -0,0 +1,434 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined (_WIN32)
|
||||
#include <sys/resource.h>
|
||||
#if !defined(__ANDROID__)
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
#include <libgen.h>
|
||||
#include <sys/param.h>
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/mingw_compat.h"
|
||||
#include "../../test_common/harness/parseParameters.h"
|
||||
#if defined (__MINGW32__)
|
||||
#include <sys/param.h>
|
||||
#endif
|
||||
|
||||
#include "cl_utils.h"
|
||||
#include "tests.h"
|
||||
|
||||
const char ** argList = NULL;
|
||||
size_t argCount = 0;
|
||||
char appName[64] = "ctest";
|
||||
const char *addressSpaceNames[] = {"global", "private", "local", "constant"};
|
||||
|
||||
#pragma mark -
|
||||
#pragma mark Declarations
|
||||
|
||||
|
||||
static int ParseArgs( int argc, const char **argv );
|
||||
static void PrintUsage( void );
|
||||
static void PrintArch(void);
|
||||
static void PrintDevice(void);
|
||||
static int DoTest( void);
|
||||
|
||||
|
||||
int g_arrVecSizes[kVectorSizeCount+kStrangeVectorSizeCount];
|
||||
int g_arrVecAligns[kLargestVectorSize+1];
|
||||
static int arrStrangeVecSizes[kStrangeVectorSizeCount] = {3};
|
||||
|
||||
int main (int argc, const char **argv )
|
||||
{
|
||||
int error;
|
||||
int i;
|
||||
int alignbound;
|
||||
|
||||
for(i = 0; i < kVectorSizeCount; ++i) {
|
||||
g_arrVecSizes[i] = (1<<i);
|
||||
}
|
||||
for(i = 0; i < kStrangeVectorSizeCount; ++i) {
|
||||
g_arrVecSizes[i+kVectorSizeCount] =
|
||||
arrStrangeVecSizes[i];
|
||||
}
|
||||
|
||||
for(i = 0, alignbound=1; i <= kLargestVectorSize; ++i) {
|
||||
while(alignbound < i) {
|
||||
alignbound = alignbound<<1;
|
||||
}
|
||||
g_arrVecAligns[i] = alignbound;
|
||||
}
|
||||
|
||||
test_start();
|
||||
|
||||
argc = parseCustomParam(argc, argv);
|
||||
if (argc == -1)
|
||||
{
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( (error = ParseArgs( argc, argv )) )
|
||||
goto exit;
|
||||
|
||||
if( (error = InitCL()) )
|
||||
goto exit;
|
||||
|
||||
if (gIsEmbedded) {
|
||||
vlog( "\tProfile: Embedded\n" );
|
||||
}else
|
||||
{
|
||||
vlog( "\tProfile: Full\n" );
|
||||
}
|
||||
|
||||
fflush( stdout );
|
||||
error = DoTest();
|
||||
|
||||
exit:
|
||||
|
||||
if (gFailCount == 0) {
|
||||
if (gTestCount > 1)
|
||||
vlog("PASSED %d of %d tests.\n", gTestCount, gTestCount);
|
||||
else
|
||||
vlog("PASSED test.\n");
|
||||
} else if (gFailCount > 0) {
|
||||
if (gFailCount+gTestCount > 1)
|
||||
vlog_error("FAILED %d of %d tests.\n", gFailCount, gTestCount+gFailCount);
|
||||
else
|
||||
vlog_error("FAILED test.\n");
|
||||
}
|
||||
|
||||
if (gQueue) {
|
||||
int flush_error = clFinish(gQueue);
|
||||
if (flush_error)
|
||||
vlog_error("clFinish failed: %d\n", flush_error);
|
||||
}
|
||||
|
||||
ReleaseCL();
|
||||
test_finish();
|
||||
|
||||
if (gFailCount)
|
||||
return gFailCount;
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
#pragma mark -
|
||||
#pragma mark setup
|
||||
|
||||
static int ParseArgs( int argc, const char **argv )
|
||||
{
|
||||
int i;
|
||||
argList = (const char **)calloc( argc - 1, sizeof( char*) );
|
||||
|
||||
argCount = 0;
|
||||
|
||||
if( NULL == argList && argc > 1 )
|
||||
return -1;
|
||||
|
||||
#if (defined( __APPLE__ ) || defined(__linux__) || defined(__MINGW32__))
|
||||
{ // Extract the app name
|
||||
char baseName[ MAXPATHLEN ];
|
||||
strncpy( baseName, argv[0], MAXPATHLEN );
|
||||
char *base = basename( baseName );
|
||||
if( NULL != base )
|
||||
{
|
||||
strncpy( appName, base, sizeof( appName ) );
|
||||
appName[ sizeof( appName ) -1 ] = '\0';
|
||||
}
|
||||
}
|
||||
#elif defined (_WIN32)
|
||||
{
|
||||
char fname[_MAX_FNAME + _MAX_EXT + 1];
|
||||
char ext[_MAX_EXT];
|
||||
|
||||
errno_t err = _splitpath_s( argv[0], NULL, 0, NULL, 0,
|
||||
fname, _MAX_FNAME, ext, _MAX_EXT );
|
||||
if (err == 0) { // no error
|
||||
strcat (fname, ext); //just cat them, size of frame can keep both
|
||||
strncpy (appName, fname, sizeof(appName));
|
||||
appName[ sizeof( appName ) -1 ] = '\0';
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Check for environment variable to set device type */
|
||||
char *env_mode = getenv( "CL_DEVICE_TYPE" );
|
||||
if( env_mode != NULL )
|
||||
{
|
||||
if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
gDeviceType = CL_DEVICE_TYPE_GPU;
|
||||
else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
gDeviceType = CL_DEVICE_TYPE_CPU;
|
||||
else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
gDeviceType = CL_DEVICE_TYPE_DEFAULT;
|
||||
else
|
||||
{
|
||||
vlog_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int num_devices;
|
||||
cl_platform_id platform = NULL;
|
||||
clGetPlatformIDs(1, &platform, NULL);
|
||||
clGetDeviceIDs(platform, gDeviceType, 0, NULL, &num_devices);
|
||||
|
||||
const char* device_index_env = getenv("CL_DEVICE_INDEX");
|
||||
if (device_index_env) {
|
||||
if (device_index_env) {
|
||||
gDeviceIndex = atoi(device_index_env);
|
||||
}
|
||||
|
||||
if (gDeviceIndex >= num_devices) {
|
||||
vlog("Specified CL_DEVICE_INDEX=%d out of range, using index 0.\n",
|
||||
gDeviceIndex);
|
||||
gDeviceIndex = 0;
|
||||
}
|
||||
}
|
||||
|
||||
vlog( "\n%s", appName );
|
||||
for( i = 1; i < argc; i++ )
|
||||
{
|
||||
const char *arg = argv[i];
|
||||
if( NULL == arg )
|
||||
break;
|
||||
|
||||
vlog( "\t%s", arg );
|
||||
if( arg[0] == '-' )
|
||||
{
|
||||
arg++;
|
||||
while( *arg != '\0' )
|
||||
{
|
||||
switch( *arg )
|
||||
{
|
||||
case 'd':
|
||||
gTestDouble ^= 1;
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
PrintUsage();
|
||||
return -1;
|
||||
|
||||
case 't':
|
||||
gReportTimes ^= 1;
|
||||
break;
|
||||
|
||||
case 'w': // Wimpy mode
|
||||
gWimpyMode = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
vlog_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
|
||||
PrintUsage();
|
||||
return -1;
|
||||
}
|
||||
arg++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( 0 == strcmp( arg, "CL_DEVICE_TYPE_CPU" ) )
|
||||
gDeviceType = CL_DEVICE_TYPE_CPU;
|
||||
else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_GPU" ) )
|
||||
gDeviceType = CL_DEVICE_TYPE_GPU;
|
||||
else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_ACCELERATOR" ) )
|
||||
gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_DEFAULT" ) )
|
||||
gDeviceType = CL_DEVICE_TYPE_DEFAULT;
|
||||
else
|
||||
{
|
||||
argList[ argCount ] = arg;
|
||||
argCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (getenv("CL_WIMPY_MODE")) {
|
||||
vlog( "\n" );
|
||||
vlog( "*** Detected CL_WIMPY_MODE env ***\n" );
|
||||
gWimpyMode = 1;
|
||||
}
|
||||
|
||||
vlog( "Test binary built %s %s\n", __DATE__, __TIME__ );
|
||||
PrintArch();
|
||||
PrintDevice();
|
||||
if( gWimpyMode )
|
||||
{
|
||||
vlog( "\n" );
|
||||
vlog( "*** WARNING: Testing in Wimpy mode! ***\n" );
|
||||
vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
|
||||
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void PrintUsage( void )
|
||||
{
|
||||
vlog( "%s [-dthw]: <optional: test names>\n", appName );
|
||||
vlog( "\t\t-d\tToggle double precision testing (default: on if double supported)\n" );
|
||||
vlog( "\t\t-t\tToggle reporting performance data.\n" );
|
||||
vlog( "\t\t-w\tRun in wimpy mode\n" );
|
||||
vlog( "\t\t-h\tHelp\n" );
|
||||
vlog( "\n" );
|
||||
}
|
||||
|
||||
static void PrintArch( void )
|
||||
{
|
||||
vlog( "sizeof( void*) = %ld\n", sizeof( void *) );
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#if defined( __ppc__ )
|
||||
vlog( "ARCH:\tppc\n" );
|
||||
#elif defined( __ppc64__ )
|
||||
vlog( "ARCH:\tppc64\n" );
|
||||
#elif defined( __i386__ )
|
||||
vlog( "ARCH:\ti386\n" );
|
||||
#elif defined( __x86_64__ )
|
||||
vlog( "ARCH:\tx86_64\n" );
|
||||
#elif defined( __arm__ )
|
||||
vlog( "ARCH:\tarm\n" );
|
||||
#else
|
||||
#error unknown arch
|
||||
#endif
|
||||
|
||||
int type = 0;
|
||||
size_t typeSize = sizeof( type );
|
||||
sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 );
|
||||
vlog( "cpu type:\t%d\n", type );
|
||||
typeSize = sizeof( type );
|
||||
sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 );
|
||||
vlog( "cpu subtype:\t%d\n", type );
|
||||
#endif
|
||||
}
|
||||
|
||||
static void PrintDevice( void)
|
||||
{
|
||||
switch(gDeviceType) {
|
||||
case CL_DEVICE_TYPE_CPU:
|
||||
vlog( "DEVICE:\tcpu\n" );
|
||||
break;
|
||||
case CL_DEVICE_TYPE_GPU:
|
||||
vlog( "DEVICE:\tgpu\n" );
|
||||
break;
|
||||
case CL_DEVICE_TYPE_ACCELERATOR:
|
||||
vlog( "DEVICE:\taccelerator\n" );
|
||||
break;
|
||||
default:
|
||||
vlog_error( "DEVICE:\tunknown\n" );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int DoTest( void )
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
if( 0 == argCount )
|
||||
{ // test all
|
||||
if( (error = Test_vload_half()) )
|
||||
return error;
|
||||
|
||||
if( (error = Test_vloada_half()) )
|
||||
return error;
|
||||
|
||||
if( (error = Test_vstore_half()) )
|
||||
return error;
|
||||
|
||||
if( (error = Test_vstorea_half()) )
|
||||
return error;
|
||||
|
||||
if( (error = Test_vstore_half_rte()) )
|
||||
return error;
|
||||
|
||||
if( (error = Test_vstorea_half_rte()) )
|
||||
return error;
|
||||
|
||||
if( (error = Test_vstore_half_rtz()) )
|
||||
return error;
|
||||
|
||||
if( (error = Test_vstorea_half_rtz()) )
|
||||
return error;
|
||||
|
||||
if( (error = Test_vstore_half_rtp()) )
|
||||
return error;
|
||||
|
||||
if( (error = Test_vstorea_half_rtp()) )
|
||||
return error;
|
||||
|
||||
if( (error = Test_vstore_half_rtn()) )
|
||||
return error;
|
||||
|
||||
if( (error = Test_vstorea_half_rtn()) )
|
||||
return error;
|
||||
|
||||
if( (error = Test_roundTrip()) )
|
||||
return error;
|
||||
}
|
||||
else
|
||||
{
|
||||
typedef struct{ int (*f)(void); const char *name; }TestItem;
|
||||
#define ENTRY( _x ) { Test_ ## _x, STRINGIFY(_x) }
|
||||
static const TestItem list[] =
|
||||
{
|
||||
ENTRY(vload_half),
|
||||
ENTRY(vloada_half),
|
||||
ENTRY(vstore_half),
|
||||
ENTRY(vstorea_half),
|
||||
ENTRY(vstore_half_rte),
|
||||
ENTRY(vstorea_half_rte),
|
||||
ENTRY(vstore_half_rtz),
|
||||
ENTRY(vstorea_half_rtz),
|
||||
ENTRY(vstore_half_rtp),
|
||||
ENTRY(vstorea_half_rtp),
|
||||
ENTRY(vstore_half_rtn),
|
||||
ENTRY(vstorea_half_rtn),
|
||||
ENTRY(roundTrip)
|
||||
};
|
||||
static const size_t list_count = sizeof( list ) / sizeof( list[0] );
|
||||
|
||||
size_t i, j;
|
||||
for( i = 0; i < argCount; i++ )
|
||||
{
|
||||
const char *argp = argList[i];
|
||||
for( j = 0; j < list_count; j++ )
|
||||
{
|
||||
if( 0 == strcmp(argp, list[j].name) )
|
||||
{
|
||||
if( (error = list[j].f()) )
|
||||
return error;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( j == list_count )
|
||||
{
|
||||
vlog_error( "Unknown test name: %s\n. Exiting...\n", argp );
|
||||
return -5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
41
test_conformance/half/test_config.h
Normal file
41
test_conformance/half/test_config.h
Normal file
@@ -0,0 +1,41 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFIG_H
|
||||
#define TEST_CONFIG_H
|
||||
|
||||
#define MULTITHREAD 1
|
||||
|
||||
#define kVectorSizeCount 5
|
||||
#define kStrangeVectorSizeCount 1
|
||||
#define kMinVectorSize 0
|
||||
#define kLargestVectorSize (1 << (kVectorSizeCount-1))
|
||||
|
||||
#define kLastVectorSizeToTest (kVectorSizeCount + kStrangeVectorSizeCount)
|
||||
|
||||
#define BUFFER_SIZE ((size_t)2 * 1024 * 1024)
|
||||
|
||||
extern size_t getBufferSize(cl_device_id device_id);
|
||||
extern cl_ulong getBufferCount(cl_device_id device_id, size_t vecSize, size_t typeSize);
|
||||
// could call
|
||||
// CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
|
||||
#define kPageSize 4096
|
||||
|
||||
extern int g_arrVecSizes[kVectorSizeCount+kStrangeVectorSizeCount];
|
||||
extern int g_arrVecAligns[kLargestVectorSize+1];
|
||||
|
||||
#endif /* TEST_CONFIG_H */
|
||||
|
||||
|
||||
41
test_conformance/half/tests.h
Normal file
41
test_conformance/half/tests.h
Normal file
@@ -0,0 +1,41 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TESTS_H
|
||||
#define TESTS_H
|
||||
|
||||
|
||||
int Test_vload_half( void );
|
||||
int Test_vloada_half( void );
|
||||
int Test_vstore_half( void );
|
||||
int Test_vstorea_half( void );
|
||||
int Test_vstore_half_rte( void );
|
||||
int Test_vstorea_half_rte( void );
|
||||
int Test_vstore_half_rtz( void );
|
||||
int Test_vstorea_half_rtz( void );
|
||||
int Test_vstore_half_rtp( void );
|
||||
int Test_vstorea_half_rtp( void );
|
||||
int Test_vstore_half_rtn( void );
|
||||
int Test_vstorea_half_rtn( void );
|
||||
int Test_roundTrip( void );
|
||||
|
||||
typedef cl_ushort (*f2h)( float );
|
||||
typedef cl_ushort (*d2h)( double );
|
||||
int Test_vStoreHalf_private( f2h referenceFunc, d2h referenceDoubleFunc, const char *roundName );
|
||||
int Test_vStoreaHalf_private( f2h referenceFunc, d2h referenceDoubleFunc, const char *roundName );
|
||||
|
||||
#endif /* TESTS_H */
|
||||
|
||||
|
||||
Reference in New Issue
Block a user