Initial open source release of OpenCL 2.2 CTS.

This commit is contained in:
Kedar Patil
2017-05-16 18:25:37 +05:30
parent 6911ba5116
commit 2821bf1323
1035 changed files with 343518 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
set(MODULE_NAME HALF)
set(${MODULE_NAME}_SOURCES
cl_utils.c
Test_vLoadHalf.c
Test_roundTrip.c
Test_vStoreHalf.c main.c
../../test_common/harness/msvc9.c
../../test_common/harness/mingw_compat.c
../../test_common/harness/errorHelpers.c
../../test_common/harness/ThreadPool.c
../../test_common/harness/parseParameters.cpp
../../test_common/harness/kernelHelpers.c
)
include(../CMakeCommon.txt)

View File

@@ -0,0 +1,22 @@
project
: requirements
-<library>/harness//harness <use>/harness//harness
# <toolset>gcc:<cflags>-xc++
<toolset>msvc:<cflags>"/TP"
;
exe Test_half
: cl_utils.c
main.c
Test_roundTrip.c
Test_vLoadHalf.c
Test_vStoreHalf.c
/harness//errorHelpers.c
: <target-os>windows:<source>/harness//msvc9.c
;
install dist
: Test_half
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/half
<variant>release:<location>$(DIST)/release/tests/test_conformance/half
;

View File

@@ -0,0 +1,45 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCFILES = cl_utils.c Test_vLoadHalf.c Test_roundTrip.c \
Test_vStoreHalf.c main.c
CC = c++
CFLAGS = -g -Wall -Wshorten-64-to-32 $(COMPILERFLAGS) ${RC_CFLAGS} \
${USE_ATF}
LIBRARIES = -framework OpenCL ${RC_CFLAGS} ${ATF}
all: release
OBJECTS := ${SRCFILES:.c=.o}
release:
echo "Build Release"
$(CC) $(SRCFILES) -Os $(CFLAGS) -o Test_half $(LIBRARIES)
debug: $(OBJECTS)
echo "Build Debug"
$(CC) $(OBJECTS) -O0 $(CFLAGS) -o Test_half_debug -D_DEBUG=1 $(LIBRARIES)
test: release
arch -i386 ./Test_half -c > cpu.log &
arch -i386 ./Test_half -g > gpu.log &
echo "Testing 32-bit mode in progress. See cpu.log and gpu.log for results."
test64: release
arch -x86_64 ./Test_half -c > cpu64.log &
arch -x86_64 ./Test_half -g > gpu64.log &
echo "Testing 64-bit mode in progress. See cpu64.log and gpu64.log for results."
clean:
rm -f ./Test_half_debug
rm -f ./Test_half
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,292 @@
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 44;
objects = {
/* Begin PBXBuildFile section */
3B1B765F0DE342BC00837A59 /* Test_vStoreHalf.c in Sources */ = {isa = PBXBuildFile; fileRef = 3B1B765E0DE342BC00837A59 /* Test_vStoreHalf.c */; };
3B6173C40DE2B14800384A2C /* Test_roundTrip.c in Sources */ = {isa = PBXBuildFile; fileRef = 3B6173C30DE2B14800384A2C /* Test_roundTrip.c */; };
3BA6BFB80DE21DB9008685CF /* Test_vLoadHalf.c in Sources */ = {isa = PBXBuildFile; fileRef = 3BA6BFB70DE21DB9008685CF /* Test_vLoadHalf.c */; };
3BA6BFBB0DE21EFA008685CF /* cl_utils.c in Sources */ = {isa = PBXBuildFile; fileRef = 3BA6BFBA0DE21EFA008685CF /* cl_utils.c */; };
3BA6BFF30DE229C5008685CF /* OpenCL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 3BA6BFF20DE229C5008685CF /* OpenCL.framework */; };
8DD76FAC0486AB0100D96B5E /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = 08FB7796FE84155DC02AAC07 /* main.c */; settings = {ATTRIBUTES = (); }; };
8DD76FB00486AB0100D96B5E /* Test_half.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6A0FF2C0290799A04C91782 /* Test_half.1 */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
8DD76FAF0486AB0100D96B5E /* CopyFiles */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 8;
dstPath = /usr/share/man/man1/;
dstSubfolderSpec = 0;
files = (
8DD76FB00486AB0100D96B5E /* Test_half.1 in CopyFiles */,
);
runOnlyForDeploymentPostprocessing = 1;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
08FB7796FE84155DC02AAC07 /* main.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; };
3B1B765E0DE342BC00837A59 /* Test_vStoreHalf.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = Test_vStoreHalf.c; sourceTree = "<group>"; };
3B1B77910DE3896E00837A59 /* builtins.cl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = builtins.cl; path = ../../../compute/OpenCL/cl_headers/private/builtins.cl; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.c; };
3B6173C30DE2B14800384A2C /* Test_roundTrip.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = Test_roundTrip.c; sourceTree = "<group>"; };
3BA6BFB70DE21DB9008685CF /* Test_vLoadHalf.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = Test_vLoadHalf.c; sourceTree = "<group>"; };
3BA6BFB90DE21EFA008685CF /* cl_utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cl_utils.h; sourceTree = "<group>"; };
3BA6BFBA0DE21EFA008685CF /* cl_utils.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = cl_utils.c; sourceTree = "<group>"; };
3BA6BFF20DE229C5008685CF /* OpenCL.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = OpenCL.framework; path = /System/Library/Frameworks/OpenCL.framework; sourceTree = "<absolute>"; };
3BA6C00A0DE22A95008685CF /* test_config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = test_config.h; sourceTree = "<group>"; };
3BA6C0770DE24F41008685CF /* tests.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tests.h; sourceTree = "<group>"; };
8DD76FB20486AB0100D96B5E /* Test_half */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Test_half; sourceTree = BUILT_PRODUCTS_DIR; };
C6A0FF2C0290799A04C91782 /* Test_half.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = Test_half.1; sourceTree = "<group>"; };
FFB9F1420E5E155400F45584 /* ATF.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = ATF.framework; path = /Library/Frameworks/ATF.framework; sourceTree = "<absolute>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
8DD76FAD0486AB0100D96B5E /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
3BA6BFF30DE229C5008685CF /* OpenCL.framework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
08FB7794FE84155DC02AAC07 /* Test_half */ = {
isa = PBXGroup;
children = (
3BA6C00A0DE22A95008685CF /* test_config.h */,
3BA6C0770DE24F41008685CF /* tests.h */,
08FB7796FE84155DC02AAC07 /* main.c */,
08FB7795FE84155DC02AAC07 /* Source */,
3BA6BFF80DE229CC008685CF /* Resources */,
3B1B77910DE3896E00837A59 /* builtins.cl */,
C6A0FF2B0290797F04C91782 /* Documentation */,
1AB674ADFE9D54B511CA2CBB /* Products */,
FFB9F1420E5E155400F45584 /* ATF.framework */,
);
name = Test_half;
sourceTree = "<group>";
};
08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup;
children = (
3BA6BFB90DE21EFA008685CF /* cl_utils.h */,
3BA6BFBA0DE21EFA008685CF /* cl_utils.c */,
3BA6BFB70DE21DB9008685CF /* Test_vLoadHalf.c */,
3B6173C30DE2B14800384A2C /* Test_roundTrip.c */,
3B1B765E0DE342BC00837A59 /* Test_vStoreHalf.c */,
);
name = Source;
sourceTree = "<group>";
};
1AB674ADFE9D54B511CA2CBB /* Products */ = {
isa = PBXGroup;
children = (
8DD76FB20486AB0100D96B5E /* Test_half */,
);
name = Products;
sourceTree = "<group>";
};
3BA6BFF80DE229CC008685CF /* Resources */ = {
isa = PBXGroup;
children = (
3BA6BFF20DE229C5008685CF /* OpenCL.framework */,
);
name = Resources;
sourceTree = "<group>";
};
C6A0FF2B0290797F04C91782 /* Documentation */ = {
isa = PBXGroup;
children = (
C6A0FF2C0290799A04C91782 /* Test_half.1 */,
);
name = Documentation;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
8DD76FA90486AB0100D96B5E /* Test_half */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1DEB928508733DD80010E9CD /* Build configuration list for PBXNativeTarget "Test_half" */;
buildPhases = (
8DD76FAB0486AB0100D96B5E /* Sources */,
8DD76FAD0486AB0100D96B5E /* Frameworks */,
8DD76FAF0486AB0100D96B5E /* CopyFiles */,
);
buildRules = (
);
dependencies = (
);
name = Test_half;
productInstallPath = "$(HOME)/bin";
productName = Test_half;
productReference = 8DD76FB20486AB0100D96B5E /* Test_half */;
productType = "com.apple.product-type.tool";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
08FB7793FE84155DC02AAC07 /* Project object */ = {
isa = PBXProject;
buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Test_half" */;
compatibilityVersion = "Xcode 3.0";
hasScannedForEncodings = 1;
mainGroup = 08FB7794FE84155DC02AAC07 /* Test_half */;
projectDirPath = "";
projectRoot = "";
targets = (
8DD76FA90486AB0100D96B5E /* Test_half */,
);
};
/* End PBXProject section */
/* Begin PBXSourcesBuildPhase section */
8DD76FAB0486AB0100D96B5E /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
8DD76FAC0486AB0100D96B5E /* main.c in Sources */,
3BA6BFB80DE21DB9008685CF /* Test_vLoadHalf.c in Sources */,
3BA6BFBB0DE21EFA008685CF /* cl_utils.c in Sources */,
3B6173C40DE2B14800384A2C /* Test_roundTrip.c in Sources */,
3B1B765F0DE342BC00837A59 /* Test_vStoreHalf.c in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
1DEB928608733DD80010E9CD /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
COPY_PHASE_STRIP = NO;
GCC_DYNAMIC_NO_PIC = NO;
GCC_ENABLE_FIX_AND_CONTINUE = YES;
GCC_MODEL_TUNING = G5;
GCC_OPTIMIZATION_LEVEL = 0;
INSTALL_PATH = /usr/local/bin;
PRODUCT_NAME = Test_half;
ZERO_LINK = YES;
};
name = Debug;
};
1DEB928708733DD80010E9CD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
GCC_MODEL_TUNING = G5;
INSTALL_PATH = /usr/local/bin;
PRODUCT_NAME = Test_half;
};
name = Release;
};
1DEB928A08733DD80010E9CD /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ARCHS = "$(ARCHS_STANDARD_32_64_BIT_PRE_XCODE_3_1)";
ARCHS_STANDARD_32_64_BIT_PRE_XCODE_3_1 = "x86_64 i386 ppc";
ATF_Config_Comment = "Set ATF_DEFAULT to non-empty to link to ATF iff the BUILD_WITH_ATF env var is set";
ATF_DEFAULT = SKIPPING;
ATF_DEFINES = "$(ATF_DEFINES_$(SKIPPING_ATF))";
ATF_DEFINES_ = "USE_ATF=1";
ATF_LINK = "$(ATF_LINK_$(SKIPPING_ATF))";
ATF_LINK_ = "-framework ATF";
GCC_C_LANGUAGE_STANDARD = c99;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = _DEBUG;
GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_CHECK_SWITCH_STATEMENTS = YES;
GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED = YES;
GCC_WARN_MISSING_PARENTHESES = NO;
GCC_WARN_PEDANTIC = YES;
GCC_WARN_SHADOW = YES;
GCC_WARN_SIGN_COMPARE = YES;
GCC_WARN_TYPECHECK_CALLS_TO_PRINTF = YES;
GCC_WARN_UNINITIALIZED_AUTOS = NO;
GCC_WARN_UNKNOWN_PRAGMAS = YES;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_LABEL = YES;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VALUE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
PREBINDING = NO;
SKIPPING_ATF = SKIPPING;
VALID_ARCHS = "i386 x86_64";
};
name = Debug;
};
1DEB928B08733DD80010E9CD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ARCHS = (
ppc,
i386,
);
ATF_Config_Comment = "Set ATF_DEFAULT to non-empty to link to ATF iff the BUILD_WITH_ATF env var is set";
ATF_DEFAULT = SKIPPING;
ATF_DEFINES = "$(ATF_DEFINES_$(SKIPPING_ATF))";
ATF_DEFINES_ = "USE_ATF=1";
ATF_LINK = "$(ATF_LINK_$(SKIPPING_ATF))";
ATF_LINK_ = "-framework ATF";
GCC_C_LANGUAGE_STANDARD = c99;
GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_CHECK_SWITCH_STATEMENTS = YES;
GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED = YES;
GCC_WARN_MISSING_PARENTHESES = NO;
GCC_WARN_PEDANTIC = NO;
GCC_WARN_SHADOW = YES;
GCC_WARN_SIGN_COMPARE = YES;
GCC_WARN_TYPECHECK_CALLS_TO_PRINTF = YES;
GCC_WARN_UNINITIALIZED_AUTOS = NO;
GCC_WARN_UNKNOWN_PRAGMAS = YES;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_LABEL = YES;
GCC_WARN_UNUSED_PARAMETER = YES;
GCC_WARN_UNUSED_VALUE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
PREBINDING = NO;
SKIPPING_ATF = SKIPPING;
VALID_ARCHS = "i386 x86_64";
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
1DEB928508733DD80010E9CD /* Build configuration list for PBXNativeTarget "Test_half" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1DEB928608733DD80010E9CD /* Debug */,
1DEB928708733DD80010E9CD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Test_half" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1DEB928A08733DD80010E9CD /* Debug */,
1DEB928B08733DD80010E9CD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
}

View File

@@ -0,0 +1,398 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <string.h>
#include "cl_utils.h"
#include "tests.h"
int Test_roundTrip( void )
{
int vectorSize, error;
uint64_t i, j;
cl_program programs[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
cl_kernel kernels[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
cl_program doublePrograms[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
cl_kernel doubleKernels[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
uint64_t time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
uint64_t min_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
uint64_t doubleTime[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
uint64_t min_double_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
memset( min_time, -1, sizeof( min_time ) );
memset( min_double_time, -1, sizeof( min_double_time ) );
vlog( "Testing roundTrip\n" );
fflush( stdout );
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
{
const char *source[] = {
"__kernel void test( const __global half *in, __global half *out )\n"
"{\n"
" size_t i = get_global_id(0);\n"
" vstore_half",vector_size_name_extensions[vectorSize],"( vload_half",vector_size_name_extensions[vectorSize],"(i, in), i, out);\n"
"}\n"
};
const char *doubleSource[] = {
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"__kernel void test( const __global half *in, __global half *out )\n"
"{\n"
" size_t i = get_global_id(0);\n"
" vstore_half",vector_size_name_extensions[vectorSize],"( convert_double", vector_size_name_extensions[vectorSize], "( vload_half",vector_size_name_extensions[vectorSize],"(i, in)), i, out);\n"
"}\n"
};
const char *sourceV3[] = {
"__kernel void test( const __global half *in, __global half *out,"
" uint extra_last_thread )\n"
"{\n"
" size_t i = get_global_id(0);\n"
" size_t last_i = get_global_size(0)-1;\n"
" size_t adjust = 0;\n"
" if(i == last_i && extra_last_thread != 0) { \n"
" adjust = 3-extra_last_thread;\n"
" }\n"
" vstore_half3( vload_half3(i, in-adjust), i, out-adjust);\n"
"}\n"
};
const char *doubleSourceV3[] = {
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"__kernel void test( const __global half *in, __global half *out,"
" uint extra_last_thread )\n"
"{\n"
" size_t i = get_global_id(0);\n"
" size_t last_i = get_global_size(0)-1;\n"
" size_t adjust = 0;\n"
" if(i == last_i && extra_last_thread != 0) { \n"
" adjust = 3-extra_last_thread;\n"
" }\n"
" vstore_half3( vload_half3(i, in-adjust), i, out-adjust);\n"
"}\n"
};
/*
const char *sourceV3aligned[] = {
"__kernel void test( const __global half *in, __global half *out )\n"
"{\n"
" size_t i = get_global_id(0);\n"
" vstorea_half3( vloada_half3(i, in), i, out);\n"
" vstore_half(vload_half(4*i+3, in), 4*i+3, out);\n"
"}\n"
};
const char *doubleSourceV3aligned[] = {
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"__kernel void test( const __global half *in, __global half *out )\n"
"{\n"
" size_t i = get_global_id(0);\n"
" vstorea_half3( vloada_half3(i, in), i, out);\n"
" vstore_half(vload_half(4*i+3, in), 4*i+3, out);\n"
"}\n"
};
*/
if(g_arrVecSizes[vectorSize] == 3) {
programs[vectorSize] = MakeProgram( sourceV3, sizeof( sourceV3) / sizeof( sourceV3[0]) );
if( NULL == programs[ vectorSize ] )
{
gFailCount++;
return -1;
}
} else {
programs[vectorSize] = MakeProgram( source, sizeof( source) / sizeof( source[0]) );
if( NULL == programs[ vectorSize ] )
{
gFailCount++;
return -1;
}
}
kernels[ vectorSize ] = clCreateKernel( programs[ vectorSize ], "test", &error );
if( NULL == kernels[vectorSize] )
{
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
return error;
}
if( gTestDouble )
{
if(g_arrVecSizes[vectorSize] == 3) {
doublePrograms[vectorSize] = MakeProgram( doubleSourceV3, sizeof( doubleSourceV3) / sizeof( doubleSourceV3[0]) );
if( NULL == programs[ vectorSize ] )
{
gFailCount++;
return -1;
}
} else {
doublePrograms[vectorSize] = MakeProgram( doubleSource, sizeof( doubleSource) / sizeof( doubleSource[0]) );
if( NULL == programs[ vectorSize ] )
{
gFailCount++;
return -1;
}
}
doubleKernels[ vectorSize ] = clCreateKernel( doublePrograms[ vectorSize ], "test", &error );
if( NULL == kernels[vectorSize] )
{
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
return error;
}
}
}
// Figure out how many elements are in a work block
size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
size_t blockCount = (size_t)getBufferSize(gDevice) / elementSize; //elementSize is a power of two
uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of cl_half
size_t stride = blockCount;
error = 0;
uint64_t printMask = (lastCase >> 4) - 1;
uint32_t count;
size_t loopCount;
for( i = 0; i < (uint64_t)lastCase; i += stride )
{
count = (uint32_t) MIN( blockCount, lastCase - i );
//Init the input stream
uint16_t *p = (uint16_t *)gIn_half;
for( j = 0; j < count; j++ )
p[j] = j + i;
if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_half, CL_TRUE, 0, count * sizeof( cl_half ), gIn_half, 0, NULL, NULL)) )
{
vlog_error( "Failure in clWriteArray\n" );
gFailCount++;
goto exit;
}
//Check the vector lengths
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
{ // here we loop through vector sizes -- 3 is last.
uint32_t pattern = 0xdeaddead;
memset_pattern4( gOut_half, &pattern, (size_t)getBufferSize(gDevice)/2);
if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_half), gOut_half, 0, NULL, NULL)) )
{
vlog_error( "Failure in clWriteArray\n" );
gFailCount++;
goto exit;
}
// here is where "3" starts to cause problems.
error = RunKernel( kernels[vectorSize], gInBuffer_half, gOutBuffer_half, numVecs(count, vectorSize, false) ,
runsOverBy(count, vectorSize, false) );
if(error)
{
gFailCount++;
goto exit;
}
if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_half), gOut_half, 0, NULL, NULL)) )
{
vlog_error( "Failure in clReadArray\n" );
gFailCount++;
goto exit;
}
if( (memcmp( gOut_half, gIn_half, count * sizeof(cl_half))) )
{
uint16_t *u1 = (uint16_t *)gOut_half;
uint16_t *u2 = (uint16_t *)gIn_half;
for( j = 0; j < count; j++ )
{
if( u1[j] != u2[j] )
{
uint16_t abs1 = u1[j] & 0x7fff;
uint16_t abs2 = u2[j] & 0x7fff;
if( abs1 > 0x7c00 && abs2 > 0x7c00 )
continue; //any NaN is okay if NaN is input
// if reference result is sub normal, test if the output is flushed to zero
if( IsHalfSubnormal(u2[j]) && ( (u1[j] == 0) || (u1[j] == 0x8000) ) )
continue;
vlog_error( "%lld) (of %lld) Failure at 0x%4.4x: 0x%4.4x vector_size = %d \n", j, (uint64_t)count, u2[j], u1[j], (g_arrVecSizes[vectorSize]) );
gFailCount++;
goto exit;
}
}
}
if( gTestDouble )
{
memset_pattern4( gOut_half, &pattern, (size_t)getBufferSize(gDevice)/2);
if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_half), gOut_half, 0, NULL, NULL)) )
{
vlog_error( "Failure in clWriteArray\n" );
gFailCount++;
goto exit;
}
if( (error = RunKernel( doubleKernels[vectorSize], gInBuffer_half, gOutBuffer_half, numVecs(count, vectorSize, false) ,
runsOverBy(count, vectorSize, false) ) ) )
{
gFailCount++;
goto exit;
}
if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_half), gOut_half, 0, NULL, NULL)) )
{
vlog_error( "Failure in clReadArray\n" );
gFailCount++;
goto exit;
}
if( (memcmp( gOut_half, gIn_half, count * sizeof(cl_half))) )
{
uint16_t *u1 = (uint16_t *)gOut_half;
uint16_t *u2 = (uint16_t *)gIn_half;
for( j = 0; j < count; j++ )
{
if( u1[j] != u2[j] )
{
uint16_t abs1 = u1[j] & 0x7fff;
uint16_t abs2 = u2[j] & 0x7fff;
if( abs1 > 0x7c00 && abs2 > 0x7c00 )
continue; //any NaN is okay if NaN is input
// if reference result is sub normal, test if the output is flushed to zero
if( IsHalfSubnormal(u2[j]) && ( (u1[j] == 0) || (u1[j] == 0x8000) ) )
continue;
vlog_error( "%lld) Failure at 0x%4.4x: 0x%4.4x vector_size = %d (double precsion)\n", j, u2[j], u1[j], (g_arrVecSizes[vectorSize]) );
gFailCount++;
goto exit;
}
}
}
}
}
if( ((i+blockCount) & ~printMask) == (i+blockCount) )
{
vlog( "." );
fflush( stdout );
}
}
vlog( "\tPassed\n" );
loopCount = 100;
if( gReportTimes )
{
//Run again for timing
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
{
uint64_t bestTime = -1ULL;
for( j = 0; j < loopCount; j++ )
{
uint64_t startTime = ReadTime();
if( (error = RunKernel( kernels[vectorSize], gInBuffer_half, gOutBuffer_half,numVecs(count, vectorSize, false) ,
runsOverBy(count, vectorSize, false)) ) )
{
gFailCount++;
goto exit;
}
if( (error = clFinish(gQueue)) )
{
vlog_error( "Failure in clFinish\n" );
gFailCount++;
goto exit;
}
uint64_t currentTime = ReadTime() - startTime;
if( currentTime < bestTime )
bestTime = currentTime;
time[ vectorSize ] += currentTime;
}
if( bestTime < min_time[ vectorSize ] )
min_time[ vectorSize ] = bestTime;
if( gTestDouble )
{
bestTime = -1ULL;
for( j = 0; j < loopCount; j++ )
{
uint64_t startTime = ReadTime();
if( (error = RunKernel( doubleKernels[vectorSize], gInBuffer_half, gOutBuffer_half, numVecs(count, vectorSize, false) ,
runsOverBy(count, vectorSize, false)) ) )
{
gFailCount++;
goto exit;
}
if( (error = clFinish(gQueue)) )
{
vlog_error( "Failure in clFinish\n" );
gFailCount++;
goto exit;
}
uint64_t currentTime = ReadTime() - startTime;
if( currentTime < bestTime )
bestTime = currentTime;
doubleTime[ vectorSize ] += currentTime;
}
if( bestTime < min_double_time[ vectorSize ] )
min_double_time[ vectorSize ] = bestTime;
}
}
}
if( gReportTimes )
{
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
vlog_perf( SubtractTime( time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0, "average us/elem", "roundTrip avg. (vector size: %d)", (g_arrVecSizes[vectorSize]) );
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
vlog_perf( SubtractTime( min_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0, "best us/elem", "roundTrip best (vector size: %d)", (g_arrVecSizes[vectorSize]) );
if( gTestDouble )
{
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
vlog_perf( SubtractTime( doubleTime[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0, "average us/elem (double)", "roundTrip avg. d (vector size: %d)", (g_arrVecSizes[vectorSize]) );
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
vlog_perf( SubtractTime( min_double_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0, "best us/elem (double)", "roundTrip best d (vector size: %d)", (g_arrVecSizes[vectorSize]) );
}
}
exit:
//clean up
for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
{
clReleaseKernel( kernels[ vectorSize ] );
clReleaseProgram( programs[ vectorSize ] );
if( gTestDouble )
{
clReleaseKernel( doubleKernels[ vectorSize ] );
clReleaseProgram( doublePrograms[ vectorSize ] );
}
}
gTestCount++;
return error;
}

View File

@@ -0,0 +1,628 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <string.h>
#include "cl_utils.h"
#include "tests.h"
extern const char *addressSpaceNames[];
static inline float half2float( cl_ushort us )
{
uint32_t u = us;
uint32_t sign = (u << 16) & 0x80000000;
int32_t exponent = (u & 0x7c00) >> 10;
uint32_t mantissa = (u & 0x03ff) << 13;
union{ unsigned int u; float f;}uu;
if( exponent == 0 )
{
if( mantissa == 0 )
return sign ? -0.0f : 0.0f;
int shift = __builtin_clz( mantissa ) - 8;
exponent -= shift-1;
mantissa <<= shift;
mantissa &= 0x007fffff;
}
else
if( exponent == 31)
{
uu.u = mantissa | sign;
if( mantissa )
uu.u |= 0x7fc00000;
else
uu.u |= 0x7f800000;
return uu.f;
}
exponent += 127 - 15;
exponent <<= 23;
exponent |= mantissa;
uu.u = exponent | sign;
return uu.f;
}
int Test_vLoadHalf_private( bool aligned );
int Test_vLoadHalf_private( bool aligned )
{
cl_int error;
int vectorSize;
cl_program programs[kVectorSizeCount+kStrangeVectorSizeCount][4] = {{0}};
cl_kernel kernels[kVectorSizeCount+kStrangeVectorSizeCount][4] = {{0}};
uint64_t time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
uint64_t min_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
size_t q;
memset( min_time, -1, sizeof( min_time ) );
vlog( "Testing vload%s_half\n", aligned ? "a" : "" );
fflush( stdout );
const char *vector_size_names[] = {"1", "2", "4", "8", "16", "3"};
int minVectorSize = kMinVectorSize;
// There is no aligned scalar vloada_half in CL 1.1
#if ! defined( CL_VERSION_1_1 ) && ! defined(__APPLE__)
vlog("Note: testing vloada_half.\n");
if (aligned && minVectorSize == 0)
minVectorSize = 1;
#endif
for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
{
int effectiveVectorSize = g_arrVecSizes[vectorSize];
if(effectiveVectorSize == 3 && aligned) {
effectiveVectorSize = 4;
}
const char *source[] = {
"__kernel void test( const __global half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
"{\n"
" size_t i = get_global_id(0);\n"
" f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( i, p );\n"
"}\n"
};
const char *sourceV3[] = {
"__kernel void test( const __global half *p, __global float *f,\n"
" uint extra_last_thread)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" size_t last_i = get_global_size(0)-1;\n"
" if(last_i == i && extra_last_thread != 0) {\n"
" if(extra_last_thread ==2) {\n"
" f[3*i+1] = vload_half(3*i+1, p);\n"
" }\n"
" f[3*i] = vload_half(3*i, p);\n"
" } else {\n"
" vstore3(vload_half3( i, p ),i,f);\n"
" }\n"
"}\n"
};
const char *sourceV3aligned[] = {
"__kernel void test( const __global half *p, __global float3 *f )\n"
"{\n"
" size_t i = get_global_id(0);\n"
" f[i] = vloada_half3( i, p );\n"
" ((__global float *)f)[4*i+3] = vloada_half(4*i+3,p);\n"
"}\n"
};
const char *source_private1[] = {
"__kernel void test( const __global half *p, __global float *f )\n"
"{\n"
" __private ushort data[1];\n"
" __private half* hdata_p = (__private half*) data;\n"
" size_t i = get_global_id(0);\n"
" data[0] = ((__global ushort*)p)[i];\n"
" f[i] = vload", (aligned ? "a" : ""), "_half( 0, hdata_p );\n"
"}\n"
};
const char *source_private2[] = {
"__kernel void test( const __global half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
"{\n"
" __private ", align_types[vectorSize], " data[", vector_size_names[vectorSize], "/", align_divisors[vectorSize], "];\n"
" __private half* hdata_p = (__private half*) data;\n"
" __global ", align_types[vectorSize], "* i_p = (__global ", align_types[vectorSize], "*)p;\n"
" size_t i = get_global_id(0);\n"
" int k;\n"
" for (k=0; k<",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"; k++)\n"
" data[k] = i_p[i+k];\n"
" f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( 0, hdata_p );\n"
"}\n"
};
const char *source_privateV3[] = {
"__kernel void test( const __global half *p, __global float *f,"
" uint extra_last_thread )\n"
"{\n"
" __private ushort data[3];\n"
" __private half* hdata_p = (__private half*) data;\n"
" __global ushort* i_p = (__global ushort*)p;\n"
" size_t i = get_global_id(0);\n"
" int k;\n"
// " data = vload3(i, i_p);\n"
" size_t last_i = get_global_size(0)-1;\n"
" if(last_i == i && extra_last_thread != 0) {\n"
" if(extra_last_thread ==2) {\n"
" f[3*i+1] = vload_half(3*i+1, p);\n"
" }\n"
" f[3*i] = vload_half(3*i, p);\n"
" } else {\n"
" for (k=0; k<3; k++)\n"
" data[k] = i_p[i*3+k];\n"
" vstore3(vload_half3( 0, hdata_p ), i, f);\n"
" }\n"
"}\n"
};
const char *source_privateV3aligned[] = {
"__kernel void test( const __global half *p, __global float3 *f )\n"
"{\n"
" ushort4 data[4];\n" // declare as vector for alignment. Make four to check to see vloada_half3 index is working.
" half* hdata_p = (half*) &data;\n"
" size_t i = get_global_id(0);\n"
" global ushort* i_p = (global ushort*)p + i * 4;\n"
" int offset = i & 3;\n"
" data[offset] = (ushort4)( i_p[0], i_p[1], i_p[2], USHRT_MAX ); \n"
" data[offset^1] = USHRT_MAX; \n"
" data[offset^2] = USHRT_MAX; \n"
" data[offset^3] = USHRT_MAX; \n"
// test vloada_half3
" f[i] = vloada_half3( offset, hdata_p );\n"
// Fill in the 4th value so we don't have to special case this code elsewhere in the test.
" mem_fence(CLK_GLOBAL_MEM_FENCE );\n"
" ((__global float *)f)[4*i+3] = vload_half(4*i+3, p);\n"
"}\n"
};
char local_buf_size[10];
sprintf(local_buf_size, "%lld", (uint64_t)((effectiveVectorSize))*gWorkGroupSize);
const char *source_local1[] = {
"__kernel void test( const __global half *p, __global float *f )\n"
"{\n"
" __local ushort data[",local_buf_size,"];\n"
" __local half* hdata_p = (__local half*) data;\n"
" size_t i = get_global_id(0);\n"
" size_t lid = get_local_id(0);\n"
" data[lid] = ((__global ushort*)p)[i];\n"
" f[i] = vload", aligned ? "a" : "", "_half( lid, hdata_p );\n"
"}\n"
};
const char *source_local2[] = {
"__kernel void test( const __global half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
"{\n"
" __local ", align_types[vectorSize], " data[", local_buf_size, "/", align_divisors[vectorSize], "];\n"
" __local half* hdata_p = (__local half*) data;\n"
" __global ", align_types[vectorSize], "* i_p = (__global ", align_types[vectorSize],"*)p;\n"
" size_t i = get_global_id(0);\n"
" size_t lid = get_local_id(0);\n"
" int k;\n"
" for (k=0; k<",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"; k++)\n"
" data[lid*",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"+k] = i_p[i*",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"+k];\n"
" f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( lid, hdata_p );\n"
"}\n"
};
const char *source_localV3[] = {
"__kernel void test( const __global half *p, __global float *f,\n"
" uint extra_last_thread)\n"
"{\n"
" __local ushort data[", local_buf_size,"];\n"
" __local half* hdata_p = (__local half*) data;\n"
" __global ushort* i_p = (__global ushort*)p;\n"
" size_t i = get_global_id(0);\n"
" size_t last_i = get_global_size(0)-1;\n"
" size_t lid = get_local_id(0);\n"
" int k;\n"
" if(last_i == i && extra_last_thread != 0) {\n"
" if(extra_last_thread ==2) {\n"
" f[3*i+1] = vload_half(3*i+1, p);\n"
" }\n"
" f[3*i] = vload_half(3*i, p);\n"
" } else {\n"
" for (k=0; k<3; k++)\n"
" data[lid*3+k] = i_p[i*3+k];\n"
" vstore3( vload_half3( lid, hdata_p ),i,f);\n"
" };\n"
"}\n"
};
const char *source_localV3aligned[] = {
"__kernel void test( const __global half *p, __global float3 *f )\n"
"{\n"
" __local ushort data[", local_buf_size,"];\n"
" __local half* hdata_p = (__local half*) data;\n"
" __global ushort* i_p = (__global ushort*)p;\n"
" size_t i = get_global_id(0);\n"
" size_t lid = get_local_id(0);\n"
" int k;\n"
" for (k=0; k<4; k++)\n"
" data[lid*4+k] = i_p[i*4+k];\n"
" f[i] = vloada_half3( lid, hdata_p );\n"
" ((__global float *)f)[4*i+3] = vload_half(lid*4+3, hdata_p);\n"
"}\n"
};
const char *source_constant[] = {
"__kernel void test( __constant half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
"{\n"
" size_t i = get_global_id(0);\n"
" f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( i, p );\n"
"}\n"
};
const char *source_constantV3[] = {
"__kernel void test( __constant half *p, __global float *f,\n"
" uint extra_last_thread)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" size_t last_i = get_global_size(0)-1;\n"
" if(last_i == i && extra_last_thread != 0) {\n"
" if(extra_last_thread ==2) {\n"
" f[3*i+1] = vload_half(3*i+1, p);\n"
" }\n"
" f[3*i] = vload_half(3*i, p);\n"
" } else {\n"
" vstore3(vload_half",vector_size_name_extensions[vectorSize],"( i, p ), i, f);\n"
" }\n"
"}\n"
};
const char *source_constantV3aligned[] = {
"__kernel void test( __constant half *p, __global float3 *f )\n"
"{\n"
" size_t i = get_global_id(0);\n"
" f[i] = vloada_half3( i, p );\n"
" ((__global float *)f)[4*i+3] = vload_half(4*i+3,p);\n"
"}\n"
};
if(g_arrVecSizes[vectorSize] != 3) {
programs[vectorSize][0] = MakeProgram( source, sizeof( source) / sizeof( source[0]) );
if( NULL == programs[ vectorSize ][0] ) {
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create program.\n" );
for ( q= 0; q < sizeof( source) / sizeof( source[0]); q++)
vlog_error("%s", source[q]);
return -1;
} else {
}
} else if(aligned) {
programs[vectorSize][0] = MakeProgram( sourceV3aligned, sizeof( sourceV3aligned) / sizeof( sourceV3aligned[0]) );
if( NULL == programs[ vectorSize ][0] ) {
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create program.\n" );
for ( q= 0; q < sizeof( sourceV3aligned) / sizeof( sourceV3aligned[0]); q++)
vlog_error("%s", sourceV3aligned[q]);
return -1;
} else {
}
} else {
programs[vectorSize][0] = MakeProgram( sourceV3, sizeof( sourceV3) / sizeof( sourceV3[0]) );
if( NULL == programs[ vectorSize ][0] ) {
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create program.\n" );
for ( q= 0; q < sizeof( sourceV3) / sizeof( sourceV3[0]); q++)
vlog_error("%s", sourceV3[q]);
return -1;
}
}
kernels[ vectorSize ][0] = clCreateKernel( programs[ vectorSize ][0], "test", &error );
if( NULL == kernels[vectorSize][0] )
{
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
return -2;
}
const char** source_ptr;
uint32_t source_size;
if (vectorSize == 0) {
source_ptr = source_private1;
source_size = sizeof( source_private1) / sizeof( source_private1[0]);
} else if(g_arrVecSizes[vectorSize] == 3) {
if(aligned) {
source_ptr = source_privateV3aligned;
source_size = sizeof( source_privateV3aligned) / sizeof( source_privateV3aligned[0]);
} else {
source_ptr = source_privateV3;
source_size = sizeof( source_privateV3) / sizeof( source_privateV3[0]);
}
} else {
source_ptr = source_private2;
source_size = sizeof( source_private2) / sizeof( source_private2[0]);
}
programs[vectorSize][1] = MakeProgram( source_ptr, source_size );
if( NULL == programs[ vectorSize ][1] )
{
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create private program.\n" );
for ( q= 0; q < source_size; q++)
vlog_error("%s", source_ptr[q]);
return -1;
}
kernels[ vectorSize ][1] = clCreateKernel( programs[ vectorSize ][1], "test", &error );
if( NULL == kernels[vectorSize][1] )
{
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create private kernel. (%d)\n", error );
return -2;
}
if (vectorSize == 0) {
source_ptr = source_local1;
source_size = sizeof( source_local1) / sizeof( source_local1[0]);
} else if(g_arrVecSizes[vectorSize] == 3) {
if(aligned) {
source_ptr = source_localV3aligned;
source_size = sizeof(source_localV3aligned)/sizeof(source_localV3aligned[0]);
} else {
source_ptr = source_localV3;
source_size = sizeof(source_localV3)/sizeof(source_localV3[0]);
}
} else {
source_ptr = source_local2;
source_size = sizeof( source_local2) / sizeof( source_local2[0]);
}
programs[vectorSize][2] = MakeProgram( source_ptr, source_size );
if( NULL == programs[ vectorSize ][2] )
{
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create local program.\n" );
for ( q= 0; q < source_size; q++)
vlog_error("%s", source_ptr[q]);
return -1;
}
kernels[ vectorSize ][2] = clCreateKernel( programs[ vectorSize ][2], "test", &error );
if( NULL == kernels[vectorSize][2] )
{
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create local kernel. (%d)\n", error );
return -2;
}
if(g_arrVecSizes[vectorSize] == 3) {
if(aligned) {
programs[vectorSize][3] = MakeProgram( source_constantV3aligned, sizeof(source_constantV3aligned) / sizeof( source_constantV3aligned[0]) );
if( NULL == programs[ vectorSize ][3] )
{
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create constant program.\n" );
for ( q= 0; q < sizeof( source_constantV3aligned) / sizeof( source_constantV3aligned[0]); q++)
vlog_error("%s", source_constantV3aligned[q]);
return -1;
}
} else {
programs[vectorSize][3] = MakeProgram( source_constantV3, sizeof(source_constantV3) / sizeof( source_constantV3[0]) );
if( NULL == programs[ vectorSize ][3] )
{
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create constant program.\n" );
for ( q= 0; q < sizeof( source_constantV3) / sizeof( source_constantV3[0]); q++)
vlog_error("%s", source_constantV3[q]);
return -1;
}
}
} else {
programs[vectorSize][3] = MakeProgram( source_constant, sizeof(source_constant) / sizeof( source_constant[0]) );
if( NULL == programs[ vectorSize ][3] )
{
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create constant program.\n" );
for ( q= 0; q < sizeof( source_constant) / sizeof( source_constant[0]); q++)
vlog_error("%s", source_constant[q]);
return -1;
}
}
kernels[ vectorSize ][3] = clCreateKernel( programs[ vectorSize ][3], "test", &error );
if( NULL == kernels[vectorSize][3] )
{
gFailCount++;
vlog_error( "\t\tFAILED -- Failed to create constant kernel. (%d)\n", error );
return -2;
}
}
// Figure out how many elements are in a work block
size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
size_t blockCount = getBufferSize(gDevice) / elementSize; // elementSize is power of 2
uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of things of size cl_half
// we handle 64-bit types a bit differently.
if( lastCase == 0 )
lastCase = 0x100000000ULL;
uint64_t i, j;
uint64_t printMask = (lastCase >> 4) - 1;
uint32_t count = 0;
error = 0;
int addressSpace;
// int reported_vector_skip = 0;
for( i = 0; i < (uint64_t)lastCase; i += blockCount )
{
count = (uint32_t) MIN( blockCount, lastCase - i );
//Init the input stream
uint16_t *p = (uint16_t *)gIn_half;
for( j = 0; j < count; j++ )
p[j] = j + i;
if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_half, CL_TRUE, 0, count * sizeof( cl_half ), gIn_half, 0, NULL, NULL)))
{
vlog_error( "Failure in clWriteArray\n" );
gFailCount++;
goto exit;
}
//create the reference result
const unsigned short *s = (const unsigned short *)gIn_half;
float *d = (float *)gOut_single_reference;
for( j = 0; j < count; j++ )
d[j] = half2float( s[j] );
//Check the vector lengths
for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
{ // here we loop through vector sizes, 3 is last
for ( addressSpace = 0; addressSpace < 4; addressSpace++) {
uint32_t pattern = 0x7fffdead;
/*
if (addressSpace == 3) {
vlog("Note: skipping address space %s due to small buffer size.\n", addressSpaceNames[addressSpace]);
continue;
}
*/
memset_pattern4( gOut_single, &pattern, getBufferSize(gDevice));
if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_single, CL_TRUE, 0, count * sizeof( float ), gOut_single, 0, NULL, NULL)) )
{
vlog_error( "Failure in clWriteArray\n" );
gFailCount++;
goto exit;
}
if(g_arrVecSizes[vectorSize] == 3 && !aligned) {
// now we need to add the extra const argument for how
// many elements the last thread should take care of.
}
// okay, here is where we have to be careful
if( (error = RunKernel( kernels[vectorSize][addressSpace], gInBuffer_half, gOutBuffer_single, numVecs(count, vectorSize, aligned) ,
runsOverBy(count, vectorSize, aligned) ) ) )
{
gFailCount++;
goto exit;
}
if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer_single, CL_TRUE, 0, count * sizeof( float ), gOut_single, 0, NULL, NULL)) )
{
vlog_error( "Failure in clReadArray\n" );
gFailCount++;
goto exit;
}
if( memcmp( gOut_single, gOut_single_reference, count * sizeof( float )) )
{
uint32_t *u1 = (uint32_t *)gOut_single;
uint32_t *u2 = (uint32_t *)gOut_single_reference;
float *f1 = (float *)gOut_single;
float *f2 = (float *)gOut_single_reference;
for( j = 0; j < count; j++ )
{
if(isnan(f1[j]) && isnan(f2[j])) // both are nan dont compare them
continue;
if( u1[j] != u2[j])
{
vlog_error( " %lld) (of %lld) Failure at 0x%4.4x: %a vs *%a (0x%8.8x vs *0x%8.8x) vector_size = %d (%s) address space = %s, load is %s\n",
j, (uint64_t)count, ((unsigned short*)gIn_half)[j], f1[j], f2[j], u1[j], u2[j], (g_arrVecSizes[vectorSize]),
vector_size_names[vectorSize], addressSpaceNames[addressSpace],
(aligned?"aligned":"unaligned"));
gFailCount++;
break; // goto exit;
}
}
}
if( gReportTimes && addressSpace == 0)
{
//Run again for timing
for( j = 0; j < 100; j++ )
{
uint64_t startTime = ReadTime();
error =
RunKernel( kernels[vectorSize][addressSpace], gInBuffer_half, gOutBuffer_single, numVecs(count, vectorSize, aligned) ,
runsOverBy(count, vectorSize, aligned));
if(error)
{
gFailCount++;
goto exit;
}
if( (error = clFinish(gQueue)) )
{
vlog_error( "Failure in clFinish\n" );
gFailCount++;
goto exit;
}
uint64_t currentTime = ReadTime() - startTime;
time[ vectorSize ] += currentTime;
if( currentTime < min_time[ vectorSize ] )
min_time[ vectorSize ] = currentTime ;
}
}
}
}
if( ((i+blockCount) & ~printMask) == (i+blockCount) )
{
vlog( "." );
fflush( stdout );
}
}
vlog( "\tPassed\n" );
if( gReportTimes )
{
for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
vlog_perf( SubtractTime( time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * 100), 0,
"average us/elem", "vLoad%sHalf avg. (%s, vector size: %d)", ( (aligned) ? "a" : ""), addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
vlog_perf( SubtractTime( min_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0,
"best us/elem", "vLoad%sHalf best (%s vector size: %d)", ( (aligned) ? "a" : ""), addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
}
exit:
//clean up
for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
{
for ( addressSpace = 0; addressSpace < 3; addressSpace++) {
clReleaseKernel( kernels[ vectorSize ][addressSpace] );
clReleaseProgram( programs[ vectorSize ][addressSpace] );
}
}
gTestCount++;
return error;
}
int Test_vload_half( void )
{
return Test_vLoadHalf_private( false );
}
int Test_vloada_half( void )
{
return Test_vLoadHalf_private( true );
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,493 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "cl_utils.h"
#include <stdlib.h>
#if !defined (_WIN32)
#include <sys/mman.h>
#endif
#include "test_config.h"
#include "string.h"
#include "../../test_common/harness/kernelHelpers.h"
#define HALF_MIN 1.0p-14
const char *vector_size_name_extensions[kVectorSizeCount+kStrangeVectorSizeCount] = { "", "2", "4", "8", "16", "3" };
const char *vector_size_strings[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2", "4", "8", "16", "3" };
const char *align_divisors[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2", "4", "8", "16", "4" };
const char *align_types[kVectorSizeCount+kStrangeVectorSizeCount] = { "half", "int", "int2", "int4", "int8", "int2" };
void *gIn_half = NULL;
void *gOut_half = NULL;
void *gOut_half_reference = NULL;
void *gOut_half_reference_double = NULL;
void *gIn_single = NULL;
void *gOut_single = NULL;
void *gOut_single_reference = NULL;
void *gIn_double = NULL;
// void *gOut_double = NULL;
// void *gOut_double_reference = NULL;
cl_mem gInBuffer_half = NULL;
cl_mem gOutBuffer_half = NULL;
cl_mem gInBuffer_single = NULL;
cl_mem gOutBuffer_single = NULL;
cl_mem gInBuffer_double = NULL;
// cl_mem gOutBuffer_double = NULL;
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
cl_device_id gDevice = NULL;
cl_context gContext = NULL;
cl_command_queue gQueue = NULL;
uint32_t gDeviceFrequency = 0;
uint32_t gComputeDevices = 0;
size_t gMaxThreadGroupSize = 0;
size_t gWorkGroupSize = 0;
int gTestCount = 0;
int gFailCount = 0;
bool gWimpyMode = false;
int gTestDouble = 0;
uint32_t gDeviceIndex = 0;
int gIsEmbedded = 0;
#if defined( __APPLE__ )
int gReportTimes = 1;
#else
int gReportTimes = 0;
#endif
#pragma mark -
static void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
{
vlog( "%s\n", errinfo );
}
int InitCL( void )
{
cl_platform_id platform = NULL;
size_t configSize = sizeof( gComputeDevices );
int error;
if( (error = clGetPlatformIDs(1, &platform, NULL) ) )
return error;
// gDeviceType & gDeviceIndex are globals set in ParseArgs
cl_uint ndevices;
if ( (error = clGetDeviceIDs(platform, gDeviceType, 0, NULL, &ndevices)) )
return error;
cl_device_id *gDeviceList = (cl_device_id *)malloc(ndevices*sizeof( cl_device_id ));
if ( gDeviceList == 0 )
{
log_error("Unable to allocate memory for devices\n");
return -1;
}
if( (error = clGetDeviceIDs(platform, gDeviceType, ndevices, gDeviceList, NULL )) )
{
free( gDeviceList );
return error;
}
gDevice = gDeviceList[gDeviceIndex];
free( gDeviceList );
#if MULTITHREAD
if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_COMPUTE_UNITS, configSize, &gComputeDevices, NULL )) )
#endif
gComputeDevices = 1;
configSize = sizeof( gMaxThreadGroupSize );
if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_WORK_GROUP_SIZE, configSize, &gMaxThreadGroupSize, NULL )) )
gMaxThreadGroupSize = 1;
// Use only one-eighth the work group size
if (gMaxThreadGroupSize > 8)
gWorkGroupSize = gMaxThreadGroupSize / 8;
else
gWorkGroupSize = gMaxThreadGroupSize;
configSize = sizeof( gDeviceFrequency );
if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency, NULL )) )
gDeviceFrequency = 1;
// Check extensions
size_t extSize = 0;
int hasDouble = 0;
if((error = clGetDeviceInfo( gDevice, CL_DEVICE_EXTENSIONS, 0, NULL, &extSize)))
{ vlog_error( "Unable to get device extension string to see if double present. (%d) \n", error ); }
else
{
char *ext = (char *)malloc( extSize );
if( NULL == ext )
{ vlog_error( "malloc failed at %s:%d\nUnable to determine if double present.\n", __FILE__, __LINE__ ); }
else
{
if((error = clGetDeviceInfo( gDevice, CL_DEVICE_EXTENSIONS, extSize, ext, NULL)))
{ vlog_error( "Unable to get device extension string to see if double present. (%d) \n", error ); }
else
{
if( strstr( ext, "cl_khr_fp64" ))
hasDouble = 1;
}
free(ext);
}
}
gTestDouble ^= hasDouble;
//detect whether profile of the device is embedded
char profile[64] = "";
if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) )
{
vlog_error( "Unable to get device CL DEVICE PROFILE string. (%d) \n", error );
}
else if( strstr(profile, "EMBEDDED_PROFILE" ) )
{
gIsEmbedded = 1;
}
vlog( "%d compute devices at %f GHz\n", gComputeDevices, (double) gDeviceFrequency / 1000. );
vlog( "Max thread group size is %lld.\n", (uint64_t) gMaxThreadGroupSize );
gContext = clCreateContext( NULL, 1, &gDevice, notify_callback, NULL, &error );
if( NULL == gContext )
{
vlog_error( "clCreateDeviceGroup failed. (%d)\n", error );
return -1;
}
gQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
if( NULL == gQueue )
{
vlog_error( "clCreateContext failed. (%d)\n", error );
return -2;
}
#if defined( __APPLE__ )
// FIXME: use clProtectedArray
#endif
//Allocate buffers
gIn_half = malloc( getBufferSize(gDevice)/2 );
gOut_half = malloc( BUFFER_SIZE/2 );
gOut_half_reference = malloc( BUFFER_SIZE/2 );
gOut_half_reference_double = malloc( BUFFER_SIZE/2 );
gIn_single = malloc( BUFFER_SIZE );
gOut_single = malloc( getBufferSize(gDevice) );
gOut_single_reference = malloc( getBufferSize(gDevice) );
gIn_double = malloc( 2*BUFFER_SIZE );
// gOut_double = malloc( (2*getBufferSize(gDevice)) );
// gOut_double_reference = malloc( (2*getBufferSize(gDevice)) );
if ( NULL == gIn_half ||
NULL == gOut_half ||
NULL == gOut_half_reference ||
NULL == gOut_half_reference_double ||
NULL == gIn_single ||
NULL == gOut_single ||
NULL == gOut_single_reference ||
NULL == gIn_double // || NULL == gOut_double || NULL == gOut_double_reference
)
return -3;
gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, getBufferSize(gDevice) / 2, NULL, &error);
if( gInBuffer_half == NULL )
{
vlog_error( "clCreateArray failed for input (%d)\n", error );
return -4;
}
gInBuffer_single = clCreateBuffer(gContext, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &error );
if( gInBuffer_single == NULL )
{
vlog_error( "clCreateArray failed for input (%d)\n", error );
return -4;
}
gInBuffer_double = clCreateBuffer(gContext, CL_MEM_READ_ONLY, BUFFER_SIZE*2, NULL, &error );
if( gInBuffer_double == NULL )
{
vlog_error( "clCreateArray failed for input (%d)\n", error );
return -4;
}
gOutBuffer_half = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, BUFFER_SIZE/2, NULL, &error );
if( gOutBuffer_half == NULL )
{
vlog_error( "clCreateArray failed for output (%d)\n", error );
return -5;
}
gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, getBufferSize(gDevice), NULL, &error );
if( gOutBuffer_single == NULL )
{
vlog_error( "clCreateArray failed for output (%d)\n", error );
return -5;
}
#if 0
gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*getBufferSize(gDevice)), NULL, &error );
if( gOutBuffer_double == NULL )
{
vlog_error( "clCreateArray failed for output (%d)\n", error );
return -5;
}
#endif
char string[16384];
vlog( "\nCompute Device info:\n" );
error = clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(string), string, NULL);
vlog( "\tDevice Name: %s\n", string );
error = clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(string), string, NULL);
vlog( "\tVendor: %s\n", string );
error = clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(string), string, NULL);
vlog( "\tDevice Version: %s\n", string );
error = clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(string), string, NULL);
vlog( "\tOpenCL C Version: %s\n", string );
error = clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(string), string, NULL);
vlog( "\tDriver Version: %s\n", string );
vlog( "\tProcessing with %d devices\n", gComputeDevices );
vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
vlog( "\tHas double? %s\n", hasDouble ? "YES" : "NO" );
vlog( "\tTest double? %s\n", gTestDouble ? "YES" : "NO" );
return 0;
}
cl_program MakeProgram( const char *source[], int count )
{
int error;
int i;
//create the program
cl_program program;
error = create_single_kernel_helper_create_program(gContext, &program, (cl_uint)count, source);
if( NULL == program )
{
vlog_error( "\t\tFAILED -- Failed to create program. (%d)\n", error );
return NULL;
}
// build it
if( (error = clBuildProgram( program, 1, &gDevice, NULL, NULL, NULL )) )
{
size_t len;
char buffer[16384];
vlog_error("\t\tFAILED -- clBuildProgramExecutable() failed:\n");
clGetProgramBuildInfo(program, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
vlog_error("Log: %s\n", buffer);
vlog_error("Source :\n");
for(i = 0; i < count; ++i) {
vlog_error("%s", source[i]);
}
vlog_error("\n");
clReleaseProgram( program );
return NULL;
}
return program;
}
void ReleaseCL(void)
{
clReleaseMemObject(gInBuffer_half);
clReleaseMemObject(gOutBuffer_half);
clReleaseMemObject(gInBuffer_single);
clReleaseMemObject(gOutBuffer_single);
clReleaseMemObject(gInBuffer_double);
// clReleaseMemObject(gOutBuffer_double);
clReleaseCommandQueue(gQueue);
clReleaseContext(gContext);
}
cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned) {
if(aligned && g_arrVecSizes[vectorSizeIdx] == 3) {
return count/4;
}
return (count + g_arrVecSizes[vectorSizeIdx] - 1)/
( (g_arrVecSizes[vectorSizeIdx]) );
}
cl_uint runsOverBy(cl_uint count, int vectorSizeIdx, bool aligned) {
if(aligned || g_arrVecSizes[vectorSizeIdx] != 3) { return -1; }
return count% (g_arrVecSizes[vectorSizeIdx]);
}
void printSource(const char * src[], int len) {
int i;
for(i = 0; i < len; ++i) {
vlog("%s", src[i]);
}
}
int RunKernel( cl_kernel kernel, void *inBuf, void *outBuf, uint32_t blockCount , int extraArg)
{
size_t localCount = blockCount;
size_t wg_size;
int error;
error = clSetKernelArg(kernel, 0, sizeof inBuf, &inBuf);
error |= clSetKernelArg(kernel, 1, sizeof outBuf, &outBuf);
if(extraArg >= 0) {
error |= clSetKernelArg(kernel, 2, sizeof(cl_uint), &extraArg);
}
if( error )
{
vlog_error( "FAILED -- could not set kernel args\n" );
return -3;
}
error = clGetKernelWorkGroupInfo(kernel, gDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof( wg_size ), &wg_size, NULL);
if (error)
{
vlog_error( "FAILED -- could not get kernel work group info\n" );
return -4;
}
wg_size = (wg_size > gWorkGroupSize) ? gWorkGroupSize : wg_size;
while( localCount % wg_size )
wg_size--;
if( (error = clEnqueueNDRangeKernel( gQueue, kernel, 1, NULL, &localCount, &wg_size, 0, NULL, NULL )) )
{
vlog_error( "FAILED -- could not execute kernel\n" );
return -5;
}
return 0;
}
#if defined (__APPLE__ )
#include <mach/mach_time.h>
uint64_t ReadTime( void )
{
return mach_absolute_time(); // returns time since boot. Ticks have better than microsecond precsion.
}
double SubtractTime( uint64_t endTime, uint64_t startTime )
{
static double conversion = 0.0;
if( 0.0 == conversion )
{
mach_timebase_info_data_t info;
kern_return_t err = mach_timebase_info( &info );
if( 0 == err )
conversion = 1e-9 * (double) info.numer / (double) info.denom;
}
return (double) (endTime - startTime) * conversion;
}
#elif defined( _WIN32 ) && defined (_MSC_VER)
// functions are defined in compat.h
#else
//
// Please feel free to substitute your own timing facility here.
//
#warning Times are meaningless. No timing facility in place for this platform.
uint64_t ReadTime( void )
{
return 0ULL;
}
// return the difference between two times obtained from ReadTime in seconds
double SubtractTime( uint64_t endTime, uint64_t startTime )
{
return INFINITY;
}
#endif
#if !defined( __APPLE__ )
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
{
uint32_t pat = ((uint32_t*) src_pattern)[0];
size_t count = bytes / 4;
size_t i;
uint32_t *d = (uint32_t*)dest;
for( i = 0; i < count; i++ )
d[i] = pat;
d += i;
bytes &= 3;
if( bytes )
memcpy( d, src_pattern, bytes );
}
#endif
size_t getBufferSize(cl_device_id device_id)
{
static int s_initialized = 0;
static cl_device_id s_device_id;
static cl_ulong s_result = 64*1024;
if(s_initialized == 0 || s_device_id != device_id)
{
cl_ulong result;
cl_int err = clGetDeviceInfo (device_id,
CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
sizeof(result), (void *)&result,
NULL);
if(err)
{
vlog_error("clGetDeviceInfo() failed\n");
s_result = 64*1024;
goto exit;
}
result = result / 2;
log_info("Const buffer size is %llx (%llu)\n", result, result);
s_initialized = 1;
s_device_id = device_id;
s_result = result;
}
exit:
if( s_result > SIZE_MAX )
{
vlog_error( "ERROR: clGetDeviceInfo is reporting a CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE larger than addressable memory on the host.\n It seems highly unlikely that this is usable, due to the API design.\n" );
fflush(stdout);
abort();
}
return (size_t) s_result;
}
cl_ulong getBufferCount(cl_device_id device_id, size_t vecSize, size_t typeSize)
{
cl_ulong tmp = getBufferSize(device_id);
if(vecSize == 3)
{
return tmp/(cl_ulong)(4*typeSize);
}
return tmp/(cl_ulong)(vecSize*typeSize);
}

View File

@@ -0,0 +1,162 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef CL_UTILS_H
#define CL_UTILS_H
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#if !defined(_WIN32)
#include <sys/param.h>
#endif
#ifdef __MINGW32__
#define __mingw_printf printf
#endif
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/ThreadPool.h"
#include "test_config.h"
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
extern void *gIn_half;
extern void *gOut_half;
extern void *gOut_half_reference;
extern void *gOut_half_reference_double;
extern void *gIn_single;
extern void *gOut_single;
extern void *gOut_single_reference;
extern void *gIn_double;
// extern void *gOut_double;
// extern void *gOut_double_reference;
extern cl_mem gInBuffer_half;
extern cl_mem gOutBuffer_half;
extern cl_mem gInBuffer_single;
extern cl_mem gOutBuffer_single;
extern cl_mem gInBuffer_double;
// extern cl_mem gOutBuffer_double;
extern uint32_t gDeviceIndex;
extern cl_device_type gDeviceType;
extern cl_device_id gDevice;
extern cl_context gContext;
extern cl_command_queue gQueue;
extern uint32_t gDeviceFrequency;
extern uint32_t gComputeDevices;
extern size_t gMaxThreadGroupSize;
extern size_t gWorkGroupSize;
extern int gTestCount;
extern int gFailCount;
extern int gTestDouble;
extern int gReportTimes;
extern int gIsEmbedded;
// gWimpyMode indicates if we run the test in wimpy mode where we limit the
// size of 32 bit ranges to a much smaller set. This is meant to be used
// as a smoke test
extern bool gWimpyMode;
uint64_t ReadTime( void );
double SubtractTime( uint64_t endTime, uint64_t startTime );
cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned);
cl_uint runsOverBy(cl_uint count, int vectorSizeIdx, bool aligned);
void printSource(const char * src[], int len);
extern const char *vector_size_name_extensions[kVectorSizeCount+kStrangeVectorSizeCount];
extern const char *vector_size_strings[kVectorSizeCount+kStrangeVectorSizeCount];
extern const char *align_divisors[kVectorSizeCount+kStrangeVectorSizeCount];
extern const char *align_types[kVectorSizeCount+kStrangeVectorSizeCount];
int InitCL( void );
void ReleaseCL( void );
int RunKernel( cl_kernel kernel, void *inBuf, void *outBuf, uint32_t blockCount , int extraArg);
cl_program MakeProgram( const char *source[], int count );
#if ! defined( __APPLE__ )
extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
#endif
#define STRING( _x ) STRINGIFY( _x )
#define STRINGIFY(x) #x
static inline float as_float(cl_uint u) { union { cl_uint u; float f; }v; v.u = u; return v.f; }
static inline double as_double(cl_ulong u) { union { cl_ulong u; double d; }v; v.u = u; return v.d; }
// used to convert a bucket of bits into a search pattern through double
static inline cl_ulong DoubleFromUInt( cl_uint bits );
static inline cl_ulong DoubleFromUInt( cl_uint bits )
{
// split 0x89abcdef to 0x89abcd00000000ef
cl_ulong u = ((cl_ulong)(bits & ~0xffU) << 32) | ((cl_ulong)(bits & 0xffU));
// sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
u -= (cl_ulong)((bits & 0x80U) << 1);
return u;
}
static inline int IsHalfSubnormal( uint16_t x )
{
return ((x&0x7fffU)-1U) < 0x03ffU;
}
// prevent silent failures due to missing FLT_RADIX
#ifndef FLT_RADIX
#error FLT_RADIX is not defined by float.h
#endif
static inline int IsFloatSubnormal( double x )
{
#if 2 == FLT_RADIX
// Do this in integer to avoid problems with FTZ behavior
union{ float d; uint32_t u;}u;
u.d = fabsf((float) x);
return (u.u-1) < 0x007fffffU;
#else
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
return fabs(x) < (double) FLT_MIN && x != 0.0;
#endif
}
static inline int IsDoubleSubnormal( long double x )
{
#if 2 == FLT_RADIX
// Do this in integer to avoid problems with FTZ behavior
union{ double d; uint64_t u;}u;
u.d = fabs((double)x);
return (u.u-1) < 0x000fffffffffffffULL;
#else
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
return fabs(x) < (double) DBL_MIN && x != 0.0;
#endif
}
#endif /* CL_UTILS_H */

View File

@@ -0,0 +1,434 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if !defined (_WIN32)
#include <sys/resource.h>
#if !defined(__ANDROID__)
#include <sys/sysctl.h>
#endif
#include <libgen.h>
#include <sys/param.h>
#endif
#include "../../test_common/harness/mingw_compat.h"
#include "../../test_common/harness/parseParameters.h"
#if defined (__MINGW32__)
#include <sys/param.h>
#endif
#include "cl_utils.h"
#include "tests.h"
const char ** argList = NULL;
size_t argCount = 0;
char appName[64] = "ctest";
const char *addressSpaceNames[] = {"global", "private", "local", "constant"};
#pragma mark -
#pragma mark Declarations
static int ParseArgs( int argc, const char **argv );
static void PrintUsage( void );
static void PrintArch(void);
static void PrintDevice(void);
static int DoTest( void);
int g_arrVecSizes[kVectorSizeCount+kStrangeVectorSizeCount];
int g_arrVecAligns[kLargestVectorSize+1];
static int arrStrangeVecSizes[kStrangeVectorSizeCount] = {3};
int main (int argc, const char **argv )
{
int error;
int i;
int alignbound;
for(i = 0; i < kVectorSizeCount; ++i) {
g_arrVecSizes[i] = (1<<i);
}
for(i = 0; i < kStrangeVectorSizeCount; ++i) {
g_arrVecSizes[i+kVectorSizeCount] =
arrStrangeVecSizes[i];
}
for(i = 0, alignbound=1; i <= kLargestVectorSize; ++i) {
while(alignbound < i) {
alignbound = alignbound<<1;
}
g_arrVecAligns[i] = alignbound;
}
test_start();
argc = parseCustomParam(argc, argv);
if (argc == -1)
{
test_finish();
return -1;
}
if( (error = ParseArgs( argc, argv )) )
goto exit;
if( (error = InitCL()) )
goto exit;
if (gIsEmbedded) {
vlog( "\tProfile: Embedded\n" );
}else
{
vlog( "\tProfile: Full\n" );
}
fflush( stdout );
error = DoTest();
exit:
if (gFailCount == 0) {
if (gTestCount > 1)
vlog("PASSED %d of %d tests.\n", gTestCount, gTestCount);
else
vlog("PASSED test.\n");
} else if (gFailCount > 0) {
if (gFailCount+gTestCount > 1)
vlog_error("FAILED %d of %d tests.\n", gFailCount, gTestCount+gFailCount);
else
vlog_error("FAILED test.\n");
}
if (gQueue) {
int flush_error = clFinish(gQueue);
if (flush_error)
vlog_error("clFinish failed: %d\n", flush_error);
}
ReleaseCL();
test_finish();
if (gFailCount)
return gFailCount;
return error;
}
#pragma mark -
#pragma mark setup
static int ParseArgs( int argc, const char **argv )
{
int i;
argList = (const char **)calloc( argc - 1, sizeof( char*) );
argCount = 0;
if( NULL == argList && argc > 1 )
return -1;
#if (defined( __APPLE__ ) || defined(__linux__) || defined(__MINGW32__))
{ // Extract the app name
char baseName[ MAXPATHLEN ];
strncpy( baseName, argv[0], MAXPATHLEN );
char *base = basename( baseName );
if( NULL != base )
{
strncpy( appName, base, sizeof( appName ) );
appName[ sizeof( appName ) -1 ] = '\0';
}
}
#elif defined (_WIN32)
{
char fname[_MAX_FNAME + _MAX_EXT + 1];
char ext[_MAX_EXT];
errno_t err = _splitpath_s( argv[0], NULL, 0, NULL, 0,
fname, _MAX_FNAME, ext, _MAX_EXT );
if (err == 0) { // no error
strcat (fname, ext); //just cat them, size of frame can keep both
strncpy (appName, fname, sizeof(appName));
appName[ sizeof( appName ) -1 ] = '\0';
}
}
#endif
/* Check for environment variable to set device type */
char *env_mode = getenv( "CL_DEVICE_TYPE" );
if( env_mode != NULL )
{
if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
gDeviceType = CL_DEVICE_TYPE_GPU;
else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
gDeviceType = CL_DEVICE_TYPE_CPU;
else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
gDeviceType = CL_DEVICE_TYPE_DEFAULT;
else
{
vlog_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
abort();
}
}
unsigned int num_devices;
cl_platform_id platform = NULL;
clGetPlatformIDs(1, &platform, NULL);
clGetDeviceIDs(platform, gDeviceType, 0, NULL, &num_devices);
const char* device_index_env = getenv("CL_DEVICE_INDEX");
if (device_index_env) {
if (device_index_env) {
gDeviceIndex = atoi(device_index_env);
}
if (gDeviceIndex >= num_devices) {
vlog("Specified CL_DEVICE_INDEX=%d out of range, using index 0.\n",
gDeviceIndex);
gDeviceIndex = 0;
}
}
vlog( "\n%s", appName );
for( i = 1; i < argc; i++ )
{
const char *arg = argv[i];
if( NULL == arg )
break;
vlog( "\t%s", arg );
if( arg[0] == '-' )
{
arg++;
while( *arg != '\0' )
{
switch( *arg )
{
case 'd':
gTestDouble ^= 1;
break;
case 'h':
PrintUsage();
return -1;
case 't':
gReportTimes ^= 1;
break;
case 'w': // Wimpy mode
gWimpyMode = true;
break;
default:
vlog_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
PrintUsage();
return -1;
}
arg++;
}
}
else
{
if( 0 == strcmp( arg, "CL_DEVICE_TYPE_CPU" ) )
gDeviceType = CL_DEVICE_TYPE_CPU;
else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_GPU" ) )
gDeviceType = CL_DEVICE_TYPE_GPU;
else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_ACCELERATOR" ) )
gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_DEFAULT" ) )
gDeviceType = CL_DEVICE_TYPE_DEFAULT;
else
{
argList[ argCount ] = arg;
argCount++;
}
}
}
if (getenv("CL_WIMPY_MODE")) {
vlog( "\n" );
vlog( "*** Detected CL_WIMPY_MODE env ***\n" );
gWimpyMode = 1;
}
vlog( "Test binary built %s %s\n", __DATE__, __TIME__ );
PrintArch();
PrintDevice();
if( gWimpyMode )
{
vlog( "\n" );
vlog( "*** WARNING: Testing in Wimpy mode! ***\n" );
vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
}
return 0;
}
static void PrintUsage( void )
{
vlog( "%s [-dthw]: <optional: test names>\n", appName );
vlog( "\t\t-d\tToggle double precision testing (default: on if double supported)\n" );
vlog( "\t\t-t\tToggle reporting performance data.\n" );
vlog( "\t\t-w\tRun in wimpy mode\n" );
vlog( "\t\t-h\tHelp\n" );
vlog( "\n" );
}
static void PrintArch( void )
{
vlog( "sizeof( void*) = %ld\n", sizeof( void *) );
#if defined( __APPLE__ )
#if defined( __ppc__ )
vlog( "ARCH:\tppc\n" );
#elif defined( __ppc64__ )
vlog( "ARCH:\tppc64\n" );
#elif defined( __i386__ )
vlog( "ARCH:\ti386\n" );
#elif defined( __x86_64__ )
vlog( "ARCH:\tx86_64\n" );
#elif defined( __arm__ )
vlog( "ARCH:\tarm\n" );
#else
#error unknown arch
#endif
int type = 0;
size_t typeSize = sizeof( type );
sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 );
vlog( "cpu type:\t%d\n", type );
typeSize = sizeof( type );
sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 );
vlog( "cpu subtype:\t%d\n", type );
#endif
}
static void PrintDevice( void)
{
switch(gDeviceType) {
case CL_DEVICE_TYPE_CPU:
vlog( "DEVICE:\tcpu\n" );
break;
case CL_DEVICE_TYPE_GPU:
vlog( "DEVICE:\tgpu\n" );
break;
case CL_DEVICE_TYPE_ACCELERATOR:
vlog( "DEVICE:\taccelerator\n" );
break;
default:
vlog_error( "DEVICE:\tunknown\n" );
break;
}
}
static int DoTest( void )
{
int error = 0;
if( 0 == argCount )
{ // test all
if( (error = Test_vload_half()) )
return error;
if( (error = Test_vloada_half()) )
return error;
if( (error = Test_vstore_half()) )
return error;
if( (error = Test_vstorea_half()) )
return error;
if( (error = Test_vstore_half_rte()) )
return error;
if( (error = Test_vstorea_half_rte()) )
return error;
if( (error = Test_vstore_half_rtz()) )
return error;
if( (error = Test_vstorea_half_rtz()) )
return error;
if( (error = Test_vstore_half_rtp()) )
return error;
if( (error = Test_vstorea_half_rtp()) )
return error;
if( (error = Test_vstore_half_rtn()) )
return error;
if( (error = Test_vstorea_half_rtn()) )
return error;
if( (error = Test_roundTrip()) )
return error;
}
else
{
typedef struct{ int (*f)(void); const char *name; }TestItem;
#define ENTRY( _x ) { Test_ ## _x, STRINGIFY(_x) }
static const TestItem list[] =
{
ENTRY(vload_half),
ENTRY(vloada_half),
ENTRY(vstore_half),
ENTRY(vstorea_half),
ENTRY(vstore_half_rte),
ENTRY(vstorea_half_rte),
ENTRY(vstore_half_rtz),
ENTRY(vstorea_half_rtz),
ENTRY(vstore_half_rtp),
ENTRY(vstorea_half_rtp),
ENTRY(vstore_half_rtn),
ENTRY(vstorea_half_rtn),
ENTRY(roundTrip)
};
static const size_t list_count = sizeof( list ) / sizeof( list[0] );
size_t i, j;
for( i = 0; i < argCount; i++ )
{
const char *argp = argList[i];
for( j = 0; j < list_count; j++ )
{
if( 0 == strcmp(argp, list[j].name) )
{
if( (error = list[j].f()) )
return error;
break;
}
}
if( j == list_count )
{
vlog_error( "Unknown test name: %s\n. Exiting...\n", argp );
return -5;
}
}
}
return error;
}

View File

@@ -0,0 +1,41 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef TEST_CONFIG_H
#define TEST_CONFIG_H
#define MULTITHREAD 1
#define kVectorSizeCount 5
#define kStrangeVectorSizeCount 1
#define kMinVectorSize 0
#define kLargestVectorSize (1 << (kVectorSizeCount-1))
#define kLastVectorSizeToTest (kVectorSizeCount + kStrangeVectorSizeCount)
#define BUFFER_SIZE ((size_t)2 * 1024 * 1024)
extern size_t getBufferSize(cl_device_id device_id);
extern cl_ulong getBufferCount(cl_device_id device_id, size_t vecSize, size_t typeSize);
// could call
// CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
#define kPageSize 4096
extern int g_arrVecSizes[kVectorSizeCount+kStrangeVectorSizeCount];
extern int g_arrVecAligns[kLargestVectorSize+1];
#endif /* TEST_CONFIG_H */

View File

@@ -0,0 +1,41 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef TESTS_H
#define TESTS_H
int Test_vload_half( void );
int Test_vloada_half( void );
int Test_vstore_half( void );
int Test_vstorea_half( void );
int Test_vstore_half_rte( void );
int Test_vstorea_half_rte( void );
int Test_vstore_half_rtz( void );
int Test_vstorea_half_rtz( void );
int Test_vstore_half_rtp( void );
int Test_vstorea_half_rtp( void );
int Test_vstore_half_rtn( void );
int Test_vstorea_half_rtn( void );
int Test_roundTrip( void );
typedef cl_ushort (*f2h)( float );
typedef cl_ushort (*d2h)( double );
int Test_vStoreHalf_private( f2h referenceFunc, d2h referenceDoubleFunc, const char *roundName );
int Test_vStoreaHalf_private( f2h referenceFunc, d2h referenceDoubleFunc, const char *roundName );
#endif /* TESTS_H */