mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Initial open source release of OpenCL 2.2 CTS.
This commit is contained in:
20
test_conformance/relationals/CMakeLists.txt
Normal file
20
test_conformance/relationals/CMakeLists.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
set(MODULE_NAME RELATIONALS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.c
|
||||
test_relationals.cpp
|
||||
test_comparisons_float.cpp
|
||||
test_comparisons_double.cpp
|
||||
test_shuffles.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/msvc9.c
|
||||
../../test_common/harness/parseParameters.cpp
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
|
||||
19
test_conformance/relationals/Jamfile
Normal file
19
test_conformance/relationals/Jamfile
Normal file
@@ -0,0 +1,19 @@
|
||||
project
|
||||
: requirements
|
||||
<toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_relationals
|
||||
: main.c
|
||||
test_comparisons_double.cpp
|
||||
test_comparisons_float.cpp
|
||||
test_relationals.cpp
|
||||
test_shuffles.cpp
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_relationals
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/relationals
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/relationals
|
||||
;
|
||||
46
test_conformance/relationals/Makefile
Normal file
46
test_conformance/relationals/Makefile
Normal file
@@ -0,0 +1,46 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c \
|
||||
test_relationals.cpp \
|
||||
test_shuffles.cpp \
|
||||
test_comparisons_double.cpp \
|
||||
test_comparisons_float.cpp \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/kernelHelpers.c \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/conversions.c
|
||||
|
||||
DEFINES =
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
FRAMEWORK = $(SOURCES)
|
||||
HEADERS =
|
||||
TARGET = test_relationals
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
100
test_conformance/relationals/main.c
Normal file
100
test_conformance/relationals/main.c
Normal file
@@ -0,0 +1,100 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#if DENSE_PACK_VECS
|
||||
const int g_vector_aligns[] = {0, 1, 2, 3, 4,
|
||||
5, 6, 7, 8,
|
||||
9, 10, 11, 12,
|
||||
13, 14, 15, 16};
|
||||
|
||||
#else
|
||||
const int g_vector_aligns[] = {0, 1, 2, 4, 4,
|
||||
8, 8, 8, 8,
|
||||
16, 16, 16, 16,
|
||||
16, 16, 16, 16};
|
||||
#endif
|
||||
|
||||
|
||||
const int g_vector_allocs[] = {0, 1, 2, 4, 4,
|
||||
8, 8, 8, 8,
|
||||
16, 16, 16, 16,
|
||||
16, 16, 16, 16};
|
||||
|
||||
|
||||
basefn basefn_list[] = {
|
||||
test_relational_any,
|
||||
test_relational_all,
|
||||
test_relational_bitselect,
|
||||
test_relational_select_signed,
|
||||
test_relational_select_unsigned,
|
||||
|
||||
test_relational_isequal,
|
||||
test_relational_isnotequal,
|
||||
test_relational_isgreater,
|
||||
test_relational_isgreaterequal,
|
||||
test_relational_isless,
|
||||
test_relational_islessequal,
|
||||
test_relational_islessgreater,
|
||||
|
||||
test_shuffle_copy,
|
||||
test_shuffle_function_call,
|
||||
test_shuffle_array_cast,
|
||||
test_shuffle_built_in,
|
||||
test_shuffle_built_in_dual_input
|
||||
};
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"relational_any",
|
||||
"relational_all",
|
||||
"relational_bitselect",
|
||||
"relational_select_signed",
|
||||
"relational_select_unsigned",
|
||||
|
||||
"relational_isequal",
|
||||
"relational_isnotequal",
|
||||
"relational_isgreater",
|
||||
"relational_isgreaterequal",
|
||||
"relational_isless",
|
||||
"relational_islessequal",
|
||||
"relational_islessgreater",
|
||||
|
||||
"shuffle_copy",
|
||||
"shuffle_function_call",
|
||||
"shuffle_array_cast",
|
||||
"shuffle_built_in",
|
||||
"shuffle_built_in_dual_input",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
|
||||
}
|
||||
|
||||
|
||||
56
test_conformance/relationals/procs.h
Normal file
56
test_conformance/relationals/procs.h
Normal file
@@ -0,0 +1,56 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/threadTesting.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
// The number of errors to print out for each test in the shuffle tests
|
||||
#define MAX_ERRORS_TO_PRINT 1
|
||||
|
||||
extern const int g_vector_aligns[];
|
||||
extern const int g_vector_allocs[];
|
||||
|
||||
#define DENSE_PACK_VECS 1
|
||||
|
||||
extern int create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
|
||||
|
||||
extern int test_relational_any(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_relational_all(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_relational_bitselect(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_relational_select_signed(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_relational_select_unsigned(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_relational_isequal(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_relational_isnotequal(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_relational_isgreater(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_relational_isgreaterequal(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_relational_isless(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_relational_islessequal(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_relational_islessgreater(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_shuffles(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shuffles_16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shuffles_dual(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shuffle_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shuffle_function_call(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shuffle_array_cast(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shuffle_built_in(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shuffle_built_in_dual_input(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
|
||||
31
test_conformance/relationals/testBase.h
Normal file
31
test_conformance/relationals/testBase.h
Normal file
@@ -0,0 +1,31 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _testBase_h
|
||||
#define _testBase_h
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#endif // _testBase_h
|
||||
|
||||
|
||||
|
||||
361
test_conformance/relationals/test_comparisons_double.cpp
Normal file
361
test_conformance/relationals/test_comparisons_double.cpp
Normal file
@@ -0,0 +1,361 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
extern "C" { extern cl_uint gRandomSeed; };
|
||||
|
||||
#define TEST_SIZE 512
|
||||
|
||||
const char *equivTestKernelPattern_double =
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
|
||||
"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
|
||||
" destValuesB[tid] = sourceA[tid] %s sourceB[tid];\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
const char *equivTestKernelPatternLessGreater_double =
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
|
||||
"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
|
||||
" destValuesB[tid] = (sourceA[tid] < sourceB[tid]) | (sourceA[tid] > sourceB[tid]);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
const char *equivTestKernelPattern_double3 =
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
|
||||
"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" double3 sampA = vload3(tid, (__global double *)sourceA);\n"
|
||||
" double3 sampB = vload3(tid, (__global double *)sourceB);\n"
|
||||
" vstore3(%s( sampA, sampB ), tid, (__global long *)destValues);\n"
|
||||
" vstore3(( sampA %s sampB ), tid, (__global long *)destValuesB);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
const char *equivTestKernelPatternLessGreater_double3 =
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
|
||||
"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" double3 sampA = vload3(tid, (__global double *)sourceA);\n"
|
||||
" double3 sampB = vload3(tid, (__global double *)sourceB);\n"
|
||||
" vstore3(%s( sampA, sampB ), tid, (__global long *)destValues);\n"
|
||||
" vstore3(( sampA < sampB ) | (sampA > sampB), tid, (__global long *)destValuesB);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
typedef bool (*equivVerifyFn)( double inDataA, double inDataB );
|
||||
|
||||
void verify_equiv_values_double( unsigned int vecSize, double *inDataA, double *inDataB, cl_long *outData, equivVerifyFn verifyFn )
|
||||
{
|
||||
unsigned int i;
|
||||
cl_long trueResult;
|
||||
bool result;
|
||||
|
||||
trueResult = ( vecSize == 1 ) ? 1 : -1;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
{
|
||||
result = verifyFn( inDataA[ i ], inDataB[ i ] );
|
||||
outData[ i ] = result ? trueResult : 0;
|
||||
}
|
||||
}
|
||||
|
||||
void generate_equiv_test_data_double( double *outData, unsigned int vecSize, bool alpha, MTdata d )
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
generate_random_data( kDouble, vecSize * TEST_SIZE, d, outData );
|
||||
|
||||
// Fill the first few vectors with NAN in each vector element (or the second set if we're alpha, so we can test either case)
|
||||
if( alpha )
|
||||
outData += vecSize * vecSize;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
{
|
||||
outData[ 0 ] = NAN;
|
||||
outData += vecSize + 1;
|
||||
}
|
||||
// Make sure the third set is filled regardless, to test the case where both have NANs
|
||||
if( !alpha )
|
||||
outData += vecSize * vecSize;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
{
|
||||
outData[ 0 ] = NAN;
|
||||
outData += vecSize + 1;
|
||||
}
|
||||
}
|
||||
|
||||
int test_equiv_kernel_double(cl_context context, cl_command_queue queue, const char *fnName, const char *opName,
|
||||
unsigned int vecSize, equivVerifyFn verifyFn, MTdata d )
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[4];
|
||||
double inDataA[TEST_SIZE * 16], inDataB[ TEST_SIZE * 16 ];
|
||||
cl_long outData[TEST_SIZE * 16], expected[16];
|
||||
int error, i, j;
|
||||
size_t threads[1], localThreads[1];
|
||||
char kernelSource[10240];
|
||||
char *programPtr;
|
||||
char sizeName[4];
|
||||
|
||||
|
||||
/* Create the source */
|
||||
if( vecSize == 1 )
|
||||
sizeName[ 0 ] = 0;
|
||||
else
|
||||
sprintf( sizeName, "%d", vecSize );
|
||||
|
||||
if(DENSE_PACK_VECS && vecSize == 3) {
|
||||
if (strcmp(fnName, "islessgreater")) {
|
||||
sprintf( kernelSource, equivTestKernelPattern_double3, sizeName, sizeName, sizeName, sizeName, fnName, opName );
|
||||
} else {
|
||||
sprintf( kernelSource, equivTestKernelPatternLessGreater_double3, sizeName, sizeName, sizeName, sizeName, fnName );
|
||||
}
|
||||
} else {
|
||||
if (strcmp(fnName, "islessgreater")) {
|
||||
sprintf( kernelSource, equivTestKernelPattern_double, sizeName, sizeName, sizeName, sizeName, fnName, opName );
|
||||
} else {
|
||||
sprintf( kernelSource, equivTestKernelPatternLessGreater_double, sizeName, sizeName, sizeName, sizeName, fnName );
|
||||
}
|
||||
}
|
||||
|
||||
/* Create kernels */
|
||||
programPtr = kernelSource;
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Generate some streams */
|
||||
generate_equiv_test_data_double( inDataA, vecSize, true, d );
|
||||
generate_equiv_test_data_double( inDataB, vecSize, false, d );
|
||||
|
||||
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_double ) * vecSize * TEST_SIZE, &inDataA, &error);
|
||||
if( streams[0] == NULL )
|
||||
{
|
||||
print_error( error, "Creating input array A failed!\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_double ) * vecSize * TEST_SIZE, &inDataB, &error);
|
||||
if( streams[1] == NULL )
|
||||
{
|
||||
print_error( error, "Creating input array A failed!\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_long ) * vecSize * TEST_SIZE, NULL, &error);
|
||||
if( streams[2] == NULL )
|
||||
{
|
||||
print_error( error, "Creating output array failed!\n");
|
||||
return -1;
|
||||
}
|
||||
streams[3] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_long ) * vecSize * TEST_SIZE, NULL, &error);
|
||||
if( streams[3] == NULL )
|
||||
{
|
||||
print_error( error, "Creating output array failed!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* Assign streams and execute */
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
|
||||
/* Run the kernel */
|
||||
threads[0] = TEST_SIZE;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
|
||||
/* Now get the results */
|
||||
error = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof( cl_long ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output array!" );
|
||||
|
||||
/* And verify! */
|
||||
for( i = 0; i < TEST_SIZE; i++ )
|
||||
{
|
||||
verify_equiv_values_double( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
|
||||
|
||||
for( j = 0; j < (int)vecSize; j++ )
|
||||
{
|
||||
if( expected[ j ] != outData[ i * vecSize + j ] )
|
||||
{
|
||||
log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %lld, got %lld, source %f,%f\n",
|
||||
i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Now get the results */
|
||||
error = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof( cl_long ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output array!" );
|
||||
|
||||
/* And verify! */
|
||||
for( i = 0; i < TEST_SIZE; i++ )
|
||||
{
|
||||
verify_equiv_values_double( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
|
||||
|
||||
for( j = 0; j < (int)vecSize; j++ )
|
||||
{
|
||||
if( expected[ j ] != outData[ i * vecSize + j ] )
|
||||
{
|
||||
log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %lld, got %lld, source %f,%f\n",
|
||||
i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_equiv_kernel_set_double(cl_device_id device, cl_context context, cl_command_queue queue, const char *fnName, const char *opName, equivVerifyFn verifyFn, MTdata d )
|
||||
{
|
||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||
unsigned int index;
|
||||
int retVal = 0;
|
||||
|
||||
if (!is_extension_available(device, "cl_khr_fp64")) {
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
||||
return 0;
|
||||
}
|
||||
log_info("Testing doubles.\n");
|
||||
|
||||
for( index = 0; vecSizes[ index ] != 0; index++ )
|
||||
{
|
||||
// Test!
|
||||
if( test_equiv_kernel_double(context, queue, fnName, opName, vecSizes[ index ], verifyFn, d ) != 0 )
|
||||
{
|
||||
log_error( " Vector double%d FAILED\n", vecSizes[ index ] );
|
||||
retVal = -1;
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool isequal_verify_fn_double( double valueA, double valueB )
|
||||
{
|
||||
if( isnan( valueA ) || isnan( valueB ) )
|
||||
return false;
|
||||
return valueA == valueB;
|
||||
}
|
||||
|
||||
int test_relational_isequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
return test_equiv_kernel_set_double( device, context, queue, "isequal", "==", isequal_verify_fn_double, seed );
|
||||
}
|
||||
|
||||
bool isnotequal_verify_fn_double( double valueA, double valueB )
|
||||
{
|
||||
if( isnan( valueA ) || isnan( valueB ) )
|
||||
return true;
|
||||
return valueA != valueB;
|
||||
}
|
||||
|
||||
int test_relational_isnotequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
return test_equiv_kernel_set_double( device, context, queue, "isnotequal", "!=", isnotequal_verify_fn_double, seed );
|
||||
}
|
||||
|
||||
bool isgreater_verify_fn_double( double valueA, double valueB )
|
||||
{
|
||||
if( isnan( valueA ) || isnan( valueB ) )
|
||||
return false;
|
||||
return valueA > valueB;
|
||||
}
|
||||
|
||||
int test_relational_isgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
return test_equiv_kernel_set_double( device, context, queue, "isgreater", ">", isgreater_verify_fn_double, seed );
|
||||
}
|
||||
|
||||
bool isgreaterequal_verify_fn_double( double valueA, double valueB )
|
||||
{
|
||||
if( isnan( valueA ) || isnan( valueB ) )
|
||||
return false;
|
||||
return valueA >= valueB;
|
||||
}
|
||||
|
||||
int test_relational_isgreaterequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
return test_equiv_kernel_set_double( device, context, queue, "isgreaterequal", ">=", isgreaterequal_verify_fn_double, seed );
|
||||
}
|
||||
|
||||
bool isless_verify_fn_double( double valueA, double valueB )
|
||||
{
|
||||
if( isnan( valueA ) || isnan( valueB ) )
|
||||
return false;
|
||||
return valueA < valueB;
|
||||
}
|
||||
|
||||
int test_relational_isless_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
return test_equiv_kernel_set_double( device, context, queue, "isless", "<", isless_verify_fn_double, seed );
|
||||
}
|
||||
|
||||
bool islessequal_verify_fn_double( double valueA, double valueB )
|
||||
{
|
||||
if( isnan( valueA ) || isnan( valueB ) )
|
||||
return false;
|
||||
return valueA <= valueB;
|
||||
}
|
||||
|
||||
int test_relational_islessequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
return test_equiv_kernel_set_double( device, context, queue, "islessequal", "<=", islessequal_verify_fn_double, seed );
|
||||
}
|
||||
|
||||
bool islessgreater_verify_fn_double( double valueA, double valueB )
|
||||
{
|
||||
if( isnan( valueA ) || isnan( valueB ) )
|
||||
return false;
|
||||
return ( valueA < valueB ) || ( valueA > valueB );
|
||||
}
|
||||
|
||||
int test_relational_islessgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
return test_equiv_kernel_set_double( device, context, queue, "islessgreater", "<>", islessgreater_verify_fn_double, seed );
|
||||
}
|
||||
|
||||
|
||||
361
test_conformance/relationals/test_comparisons_float.cpp
Normal file
361
test_conformance/relationals/test_comparisons_float.cpp
Normal file
@@ -0,0 +1,361 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
extern "C" { extern cl_uint gRandomSeed;};
|
||||
|
||||
#define TEST_SIZE 512
|
||||
|
||||
const char *equivTestKernelPattern_float =
|
||||
"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
|
||||
" destValuesB[tid] = sourceA[tid] %s sourceB[tid];\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
const char *equivTestKernelPatternLessGreater_float =
|
||||
"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
|
||||
" destValuesB[tid] = (sourceA[tid] < sourceB[tid]) | (sourceA[tid] > sourceB[tid]);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
const char *equivTestKernelPattern_float3 =
|
||||
"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" float3 sampA = vload3(tid, (__global float *)sourceA);\n"
|
||||
" float3 sampB = vload3(tid, (__global float *)sourceB);\n"
|
||||
" vstore3(%s( sampA, sampB ), tid, (__global int *)destValues);\n"
|
||||
" vstore3(( sampA %s sampB ), tid, (__global int *)destValuesB);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
const char *equivTestKernelPatternLessGreater_float3 =
|
||||
"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" float3 sampA = vload3(tid, (__global float *)sourceA);\n"
|
||||
" float3 sampB = vload3(tid, (__global float *)sourceB);\n"
|
||||
" vstore3(%s( sampA, sampB ), tid, (__global int *)destValues);\n"
|
||||
" vstore3(( sampA < sampB ) | (sampA > sampB), tid, (__global int *)destValuesB);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
typedef bool (*equivVerifyFn)( float inDataA, float inDataB );
|
||||
extern "C" { extern int gInfNanSupport; };
|
||||
|
||||
int IsFloatInfinity(float x)
|
||||
{
|
||||
return isinf(x);
|
||||
}
|
||||
|
||||
int IsFloatNaN(float x)
|
||||
{
|
||||
return isnan(x);
|
||||
}
|
||||
|
||||
void verify_equiv_values_float( unsigned int vecSize, float *inDataA, float *inDataB, int *outData, equivVerifyFn verifyFn )
|
||||
{
|
||||
unsigned int i;
|
||||
int trueResult;
|
||||
bool result;
|
||||
|
||||
trueResult = ( vecSize == 1 ) ? 1 : -1;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
{
|
||||
result = verifyFn( inDataA[ i ], inDataB[ i ] );
|
||||
outData[ i ] = result ? trueResult : 0;
|
||||
}
|
||||
}
|
||||
|
||||
void generate_equiv_test_data_float( float *outData, unsigned int vecSize, bool alpha, MTdata d )
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
generate_random_data( kFloat, vecSize * TEST_SIZE, d, outData );
|
||||
|
||||
// Fill the first few vectors with NAN in each vector element (or the second set if we're alpha, so we can test either case)
|
||||
if( alpha )
|
||||
outData += vecSize * vecSize;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
{
|
||||
outData[ 0 ] = NAN;
|
||||
outData += vecSize + 1;
|
||||
}
|
||||
// Make sure the third set is filled regardless, to test the case where both have NANs
|
||||
if( !alpha )
|
||||
outData += vecSize * vecSize;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
{
|
||||
outData[ 0 ] = NAN;
|
||||
outData += vecSize + 1;
|
||||
}
|
||||
}
|
||||
|
||||
int test_equiv_kernel_float(cl_context context, cl_command_queue queue, const char *fnName, const char *opName,
|
||||
unsigned int vecSize, equivVerifyFn verifyFn, MTdata d )
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[4];
|
||||
float inDataA[TEST_SIZE * 16], inDataB[ TEST_SIZE * 16 ];
|
||||
int outData[TEST_SIZE * 16], expected[16];
|
||||
int error, i, j;
|
||||
size_t threads[1], localThreads[1];
|
||||
char kernelSource[10240];
|
||||
char *programPtr;
|
||||
char sizeName[4];
|
||||
|
||||
|
||||
/* Create the source */
|
||||
if( vecSize == 1 )
|
||||
sizeName[ 0 ] = 0;
|
||||
else
|
||||
sprintf( sizeName, "%d", vecSize );
|
||||
|
||||
|
||||
if(DENSE_PACK_VECS && vecSize == 3) {
|
||||
if (strcmp(fnName, "islessgreater")) {
|
||||
sprintf( kernelSource, equivTestKernelPattern_float3, sizeName, sizeName, sizeName, sizeName, fnName, opName );
|
||||
} else {
|
||||
sprintf( kernelSource, equivTestKernelPatternLessGreater_float3, sizeName, sizeName, sizeName, sizeName, fnName );
|
||||
}
|
||||
} else {
|
||||
if (strcmp(fnName, "islessgreater")) {
|
||||
sprintf( kernelSource, equivTestKernelPattern_float, sizeName, sizeName, sizeName, sizeName, fnName, opName );
|
||||
} else {
|
||||
sprintf( kernelSource, equivTestKernelPatternLessGreater_float, sizeName, sizeName, sizeName, sizeName, fnName );
|
||||
}
|
||||
}
|
||||
|
||||
/* Create kernels */
|
||||
programPtr = kernelSource;
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Generate some streams */
|
||||
generate_equiv_test_data_float( inDataA, vecSize, true, d );
|
||||
generate_equiv_test_data_float( inDataB, vecSize, false, d );
|
||||
|
||||
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_float ) * vecSize * TEST_SIZE, &inDataA, &error);
|
||||
if( streams[0] == NULL )
|
||||
{
|
||||
print_error( error, "Creating input array A failed!\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_float ) * vecSize * TEST_SIZE, &inDataB, &error);
|
||||
if( streams[1] == NULL )
|
||||
{
|
||||
print_error( error, "Creating input array A failed!\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_int ) * vecSize * TEST_SIZE, NULL, &error);
|
||||
if( streams[2] == NULL )
|
||||
{
|
||||
print_error( error, "Creating output array failed!\n");
|
||||
return -1;
|
||||
}
|
||||
streams[3] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_int ) * vecSize * TEST_SIZE, NULL, &error);
|
||||
if( streams[3] == NULL )
|
||||
{
|
||||
print_error( error, "Creating output array failed!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* Assign streams and execute */
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
|
||||
/* Run the kernel */
|
||||
threads[0] = TEST_SIZE;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
|
||||
/* Now get the results */
|
||||
error = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof( int ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output array!" );
|
||||
|
||||
/* And verify! */
|
||||
for( i = 0; i < TEST_SIZE; i++ )
|
||||
{
|
||||
verify_equiv_values_float( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
|
||||
|
||||
for( j = 0; j < (int)vecSize; j++ )
|
||||
{
|
||||
if( expected[ j ] != outData[ i * vecSize + j ] )
|
||||
{
|
||||
log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %d, got %d, source %f,%f\n",
|
||||
i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Now get the results */
|
||||
error = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof( int ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output array!" );
|
||||
|
||||
/* And verify! */
|
||||
int fail = 0;
|
||||
for( i = 0; i < TEST_SIZE; i++ )
|
||||
{
|
||||
verify_equiv_values_float( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
|
||||
|
||||
for( j = 0; j < (int)vecSize; j++ )
|
||||
{
|
||||
if( expected[ j ] != outData[ i * vecSize + j ] )
|
||||
{
|
||||
if (gInfNanSupport == 0)
|
||||
{
|
||||
if (IsFloatNaN(inDataA[i*vecSize + j]) || IsFloatNaN (inDataB[i*vecSize + j]))
|
||||
{
|
||||
fail = 0;
|
||||
}
|
||||
else
|
||||
fail = 1;
|
||||
}
|
||||
if (fail)
|
||||
{
|
||||
log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %d, got %d, source %f,%f\n",
|
||||
i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_equiv_kernel_set_float(cl_context context, cl_command_queue queue, const char *fnName, const char *opName, equivVerifyFn verifyFn, MTdata d )
|
||||
{
|
||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||
unsigned int index;
|
||||
int retVal = 0;
|
||||
|
||||
for( index = 0; vecSizes[ index ] != 0; index++ )
|
||||
{
|
||||
// Test!
|
||||
if( test_equiv_kernel_float(context, queue, fnName, opName, vecSizes[ index ], verifyFn, d ) != 0 )
|
||||
{
|
||||
log_error( " Vector float%d FAILED\n", vecSizes[ index ] );
|
||||
retVal = -1;
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool isequal_verify_fn_float( float valueA, float valueB )
|
||||
{
|
||||
return valueA == valueB;
|
||||
}
|
||||
|
||||
int test_relational_isequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed( gRandomSeed );
|
||||
return test_equiv_kernel_set_float( context, queue, "isequal", "==", isequal_verify_fn_float, seed );
|
||||
}
|
||||
|
||||
bool isnotequal_verify_fn_float( float valueA, float valueB )
|
||||
{
|
||||
return valueA != valueB;
|
||||
}
|
||||
|
||||
int test_relational_isnotequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed( gRandomSeed );
|
||||
return test_equiv_kernel_set_float( context, queue, "isnotequal", "!=", isnotequal_verify_fn_float, seed );
|
||||
}
|
||||
|
||||
bool isgreater_verify_fn_float( float valueA, float valueB )
|
||||
{
|
||||
return valueA > valueB;
|
||||
}
|
||||
|
||||
int test_relational_isgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed( gRandomSeed );
|
||||
return test_equiv_kernel_set_float( context, queue, "isgreater", ">", isgreater_verify_fn_float, seed );
|
||||
}
|
||||
|
||||
bool isgreaterequal_verify_fn_float( float valueA, float valueB )
|
||||
{
|
||||
return valueA >= valueB;
|
||||
}
|
||||
|
||||
int test_relational_isgreaterequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed( gRandomSeed );
|
||||
return test_equiv_kernel_set_float( context, queue, "isgreaterequal", ">=", isgreaterequal_verify_fn_float, seed );
|
||||
}
|
||||
|
||||
bool isless_verify_fn_float( float valueA, float valueB )
|
||||
{
|
||||
return valueA < valueB;
|
||||
}
|
||||
|
||||
int test_relational_isless_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed( gRandomSeed );
|
||||
return test_equiv_kernel_set_float( context, queue, "isless", "<", isless_verify_fn_float, seed );
|
||||
}
|
||||
|
||||
bool islessequal_verify_fn_float( float valueA, float valueB )
|
||||
{
|
||||
return valueA <= valueB;
|
||||
}
|
||||
|
||||
int test_relational_islessequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed( gRandomSeed );
|
||||
return test_equiv_kernel_set_float( context, queue, "islessequal", "<=", islessequal_verify_fn_float, seed );
|
||||
}
|
||||
|
||||
bool islessgreater_verify_fn_float( float valueA, float valueB )
|
||||
{
|
||||
return ( valueA < valueB ) || ( valueA > valueB );
|
||||
}
|
||||
|
||||
int test_relational_islessgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
RandomSeed seed( gRandomSeed );
|
||||
return test_equiv_kernel_set_float( context, queue, "islessgreater", "<>", islessgreater_verify_fn_float, seed );
|
||||
}
|
||||
|
||||
|
||||
785
test_conformance/relationals/test_relationals.cpp
Normal file
785
test_conformance/relationals/test_relationals.cpp
Normal file
@@ -0,0 +1,785 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
const char *anyAllTestKernelPattern =
|
||||
"%s\n" // optional pragma
|
||||
"__kernel void sample_test(__global %s%s *sourceA, __global int *destValues)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" destValues[tid] = %s( sourceA[tid] );\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
const char *anyAllTestKernelPatternVload =
|
||||
"%s\n" // optional pragma
|
||||
"__kernel void sample_test(__global %s%s *sourceA, __global int *destValues)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" destValues[tid] = %s(vload3(tid, (__global %s *)sourceA));\n" // ugh, almost
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
#define TEST_SIZE 512
|
||||
|
||||
extern "C" {extern cl_uint gRandomSeed;};
|
||||
|
||||
typedef int (*anyAllVerifyFn)( ExplicitType vecType, unsigned int vecSize, void *inData );
|
||||
|
||||
int test_any_all_kernel(cl_context context, cl_command_queue queue,
|
||||
const char *fnName, ExplicitType vecType,
|
||||
unsigned int vecSize, anyAllVerifyFn verifyFn,
|
||||
MTdata d )
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
cl_long inDataA[TEST_SIZE * 16], clearData[TEST_SIZE * 16];
|
||||
int outData[TEST_SIZE];
|
||||
int error, i;
|
||||
size_t threads[1], localThreads[1];
|
||||
char kernelSource[10240];
|
||||
char *programPtr;
|
||||
char sizeName[4];
|
||||
|
||||
|
||||
/* Create the source */
|
||||
if( g_vector_aligns[vecSize] == 1 ) {
|
||||
sizeName[ 0 ] = 0;
|
||||
} else {
|
||||
sprintf( sizeName, "%d", vecSize );
|
||||
}
|
||||
log_info("Testing any/all on %s%s\n",
|
||||
get_explicit_type_name( vecType ), sizeName);
|
||||
if(DENSE_PACK_VECS && vecSize == 3) {
|
||||
// anyAllTestKernelPatternVload
|
||||
sprintf( kernelSource, anyAllTestKernelPatternVload,
|
||||
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_type_name( vecType ), sizeName, fnName,
|
||||
get_explicit_type_name(vecType));
|
||||
} else {
|
||||
sprintf( kernelSource, anyAllTestKernelPattern,
|
||||
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_type_name( vecType ), sizeName, fnName );
|
||||
}
|
||||
/* Create kernels */
|
||||
programPtr = kernelSource;
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1,
|
||||
(const char **)&programPtr,
|
||||
"sample_test" ) )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Generate some streams */
|
||||
generate_random_data( vecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataA );
|
||||
memset( clearData, 0, sizeof( clearData ) );
|
||||
|
||||
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataA, &error);
|
||||
if( streams[0] == NULL )
|
||||
{
|
||||
print_error( error, "Creating input array A failed!\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * g_vector_aligns[vecSize] * TEST_SIZE, clearData, &error );
|
||||
if( streams[1] == NULL )
|
||||
{
|
||||
print_error( error, "Creating output array failed!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Assign streams and execute */
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
/* Run the kernel */
|
||||
threads[0] = TEST_SIZE;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
|
||||
/* Now get the results */
|
||||
error = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof( int ) * TEST_SIZE, outData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output array!" );
|
||||
|
||||
/* And verify! */
|
||||
for( i = 0; i < TEST_SIZE; i++ )
|
||||
{
|
||||
int expected = verifyFn( vecType, vecSize, (char *)inDataA + i * get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] );
|
||||
if( expected != outData[ i ] )
|
||||
{
|
||||
unsigned int *ptr = (unsigned int *)( (char *)inDataA + i * get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] );
|
||||
log_error( "ERROR: Data sample %d does not validate! Expected (%d), got (%d), source 0x%08x\n",
|
||||
i, expected, outData[i], *ptr );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int anyVerifyFn( ExplicitType vecType, unsigned int vecSize, void *inData )
|
||||
{
|
||||
unsigned int i;
|
||||
switch( vecType )
|
||||
{
|
||||
case kChar:
|
||||
{
|
||||
char sum = 0;
|
||||
char *tData = (char *)inData;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
sum |= tData[ i ] & 0x80;
|
||||
return (sum != 0) ? 1 : 0;
|
||||
}
|
||||
case kShort:
|
||||
{
|
||||
short sum = 0;
|
||||
short *tData = (short *)inData;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
sum |= tData[ i ] & 0x8000;
|
||||
return (sum != 0);
|
||||
}
|
||||
case kInt:
|
||||
{
|
||||
cl_int sum = 0;
|
||||
cl_int *tData = (cl_int *)inData;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
sum |= tData[ i ] & (cl_int)0x80000000L;
|
||||
return (sum != 0);
|
||||
}
|
||||
case kLong:
|
||||
{
|
||||
cl_long sum = 0;
|
||||
cl_long *tData = (cl_long *)inData;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
sum |= tData[ i ] & 0x8000000000000000LL;
|
||||
return (sum != 0);
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int test_relational_any(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kShort, kInt, kLong };
|
||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||
unsigned int index, typeIndex;
|
||||
int retVal = 0;
|
||||
RandomSeed seed(gRandomSeed );
|
||||
|
||||
for( typeIndex = 0; typeIndex < 4; typeIndex++ )
|
||||
{
|
||||
if (vecType[typeIndex] == kLong && !gHasLong)
|
||||
continue;
|
||||
|
||||
for( index = 0; vecSizes[ index ] != 0; index++ )
|
||||
{
|
||||
// Test!
|
||||
if( test_any_all_kernel(context, queue, "any", vecType[ typeIndex ], vecSizes[ index ], anyVerifyFn, seed ) != 0 )
|
||||
{
|
||||
log_error( " Vector %s%d FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), vecSizes[ index ] );
|
||||
retVal = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
int allVerifyFn( ExplicitType vecType, unsigned int vecSize, void *inData )
|
||||
{
|
||||
unsigned int i;
|
||||
switch( vecType )
|
||||
{
|
||||
case kChar:
|
||||
{
|
||||
char sum = 0x80;
|
||||
char *tData = (char *)inData;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
sum &= tData[ i ] & 0x80;
|
||||
return (sum != 0) ? 1 : 0;
|
||||
}
|
||||
case kShort:
|
||||
{
|
||||
short sum = 0x8000;
|
||||
short *tData = (short *)inData;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
sum &= tData[ i ] & 0x8000;
|
||||
return (sum != 0);
|
||||
}
|
||||
case kInt:
|
||||
{
|
||||
cl_int sum = 0x80000000L;
|
||||
cl_int *tData = (cl_int *)inData;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
sum &= tData[ i ] & (cl_int)0x80000000L;
|
||||
return (sum != 0);
|
||||
}
|
||||
case kLong:
|
||||
{
|
||||
cl_long sum = 0x8000000000000000LL;
|
||||
cl_long *tData = (cl_long *)inData;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
sum &= tData[ i ] & 0x8000000000000000LL;
|
||||
return (sum != 0);
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int test_relational_all(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kShort, kInt, kLong };
|
||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||
unsigned int index, typeIndex;
|
||||
int retVal = 0;
|
||||
RandomSeed seed(gRandomSeed );
|
||||
|
||||
|
||||
for( typeIndex = 0; typeIndex < 4; typeIndex++ )
|
||||
{
|
||||
if (vecType[typeIndex] == kLong && !gHasLong)
|
||||
continue;
|
||||
|
||||
for( index = 0; vecSizes[ index ] != 0; index++ )
|
||||
{
|
||||
// Test!
|
||||
if( test_any_all_kernel(context, queue, "all", vecType[ typeIndex ], vecSizes[ index ], allVerifyFn, seed ) != 0 )
|
||||
{
|
||||
log_error( " Vector %s%d FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), vecSizes[ index ] );
|
||||
retVal = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
const char *selectTestKernelPattern =
|
||||
"%s\n" // optional pragma
|
||||
"__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" destValues[tid] = %s( sourceA[tid], sourceB[tid], sourceC[tid] );\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
const char *selectTestKernelPatternVload =
|
||||
"%s\n" // optional pragma
|
||||
"__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" %s%s tmp = %s( vload3(tid, (__global %s *)sourceA), vload3(tid, (__global %s *)sourceB), vload3(tid, (__global %s *)sourceC) );\n"
|
||||
" vstore3(tmp, tid, (__global %s *)destValues);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
typedef void (*selectVerifyFn)( ExplicitType vecType, ExplicitType testVecType, unsigned int vecSize, void *inDataA, void *inDataB, void *inDataTest, void *outData );
|
||||
|
||||
int test_select_kernel(cl_context context, cl_command_queue queue, const char *fnName,
|
||||
ExplicitType vecType, unsigned int vecSize, ExplicitType testVecType, selectVerifyFn verifyFn, MTdata d )
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[4];
|
||||
cl_long inDataA[TEST_SIZE * 16], inDataB[ TEST_SIZE * 16 ], inDataC[ TEST_SIZE * 16 ];
|
||||
cl_long outData[TEST_SIZE * 16], expected[16];
|
||||
int error, i;
|
||||
size_t threads[1], localThreads[1];
|
||||
char kernelSource[10240];
|
||||
char *programPtr;
|
||||
char sizeName[4], outSizeName[4];
|
||||
unsigned int outVecSize;
|
||||
|
||||
|
||||
/* Create the source */
|
||||
if( vecSize == 1 )
|
||||
sizeName[ 0 ] = 0;
|
||||
else
|
||||
sprintf( sizeName, "%d", vecSize );
|
||||
|
||||
outVecSize = vecSize;
|
||||
|
||||
if( outVecSize == 1 )
|
||||
outSizeName[ 0 ] = 0;
|
||||
else
|
||||
sprintf( outSizeName, "%d", outVecSize );
|
||||
|
||||
if(DENSE_PACK_VECS && vecSize == 3) {
|
||||
// anyAllTestKernelPatternVload
|
||||
sprintf( kernelSource, selectTestKernelPatternVload,
|
||||
(vecType == kDouble || testVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_type_name( vecType ), sizeName,
|
||||
get_explicit_type_name( vecType ), sizeName,
|
||||
get_explicit_type_name( testVecType ), sizeName,
|
||||
get_explicit_type_name( vecType ), outSizeName,
|
||||
get_explicit_type_name( vecType ), sizeName,
|
||||
fnName,
|
||||
get_explicit_type_name( vecType ),
|
||||
get_explicit_type_name( vecType ),
|
||||
get_explicit_type_name( vecType ),
|
||||
get_explicit_type_name( testVecType ) );
|
||||
} else {
|
||||
sprintf( kernelSource, selectTestKernelPattern,
|
||||
(vecType == kDouble || testVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_type_name( vecType ), sizeName,
|
||||
get_explicit_type_name( vecType ), sizeName,
|
||||
get_explicit_type_name( testVecType ), sizeName,
|
||||
get_explicit_type_name( vecType ), outSizeName,
|
||||
fnName );
|
||||
}
|
||||
|
||||
/* Create kernels */
|
||||
programPtr = kernelSource;
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Generate some streams */
|
||||
generate_random_data( vecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataA );
|
||||
generate_random_data( vecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataB );
|
||||
generate_random_data( testVecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataC );
|
||||
|
||||
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataA, &error);
|
||||
if( streams[0] == NULL )
|
||||
{
|
||||
print_error( error, "Creating input array A failed!\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataB, &error);
|
||||
if( streams[1] == NULL )
|
||||
{
|
||||
print_error( error, "Creating input array A failed!\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( testVecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataC, &error);
|
||||
if( streams[2] == NULL )
|
||||
{
|
||||
print_error( error, "Creating input array A failed!\n");
|
||||
return -1;
|
||||
}
|
||||
streams[3] = clCreateBuffer( context, CL_MEM_READ_WRITE, get_explicit_type_size( vecType ) * g_vector_aligns[outVecSize] * TEST_SIZE, NULL, &error);
|
||||
if( streams[3] == NULL )
|
||||
{
|
||||
print_error( error, "Creating output array failed!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Assign streams and execute */
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
/* Run the kernel */
|
||||
threads[0] = TEST_SIZE;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
|
||||
/* Now get the results */
|
||||
error = clEnqueueReadBuffer( queue, streams[3], true, 0, get_explicit_type_size( vecType ) * TEST_SIZE * g_vector_aligns[outVecSize], outData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output array!" );
|
||||
|
||||
/* And verify! */
|
||||
for( i = 0; i < (int)(TEST_SIZE * g_vector_aligns[vecSize]); i++ )
|
||||
{
|
||||
if(i%g_vector_aligns[vecSize] >= (int) vecSize) {
|
||||
continue;
|
||||
}
|
||||
verifyFn( vecType, testVecType, vecSize, (char *)inDataA + i * get_explicit_type_size( vecType ),
|
||||
(char *)inDataB + i * get_explicit_type_size( vecType ),
|
||||
(char *)inDataC + i * get_explicit_type_size( testVecType ),
|
||||
expected);
|
||||
|
||||
char *outPtr = (char *)outData;
|
||||
outPtr += ( i / g_vector_aligns[vecSize] ) * get_explicit_type_size( vecType ) * g_vector_aligns[outVecSize];
|
||||
outPtr += ( i % g_vector_aligns[vecSize] ) * get_explicit_type_size( vecType );
|
||||
if( memcmp( expected, outPtr, get_explicit_type_size( vecType ) ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%08x), got (0x%08x) from (0x%08x) and (0x%08x) with test (0x%08x)\n",
|
||||
i / g_vector_aligns[vecSize],
|
||||
i % g_vector_aligns[vecSize],
|
||||
*( (int *)expected ),
|
||||
*( (int *)( (char *)outData +
|
||||
i * get_explicit_type_size( vecType
|
||||
) ) ),
|
||||
*( (int *)( (char *)inDataA +
|
||||
i * get_explicit_type_size( vecType
|
||||
) ) ),
|
||||
*( (int *)( (char *)inDataB +
|
||||
i * get_explicit_type_size( vecType
|
||||
) ) ),
|
||||
*( (int *)( (char *)inDataC +
|
||||
i*get_explicit_type_size( testVecType
|
||||
) ) ) );
|
||||
int j;
|
||||
log_error( "inA: " );
|
||||
unsigned char *a = (unsigned char *)( (char *)inDataA + i * get_explicit_type_size( vecType ) );
|
||||
unsigned char *b = (unsigned char *)( (char *)inDataB + i * get_explicit_type_size( vecType ) );
|
||||
unsigned char *c = (unsigned char *)( (char *)inDataC + i * get_explicit_type_size( testVecType ) );
|
||||
unsigned char *e = (unsigned char *)( expected );
|
||||
unsigned char *g = (unsigned char *)( (char *)outData + i * get_explicit_type_size( vecType ) );
|
||||
for( j = 0; j < 16; j++ )
|
||||
log_error( "0x%02x ", a[ j ] );
|
||||
log_error( "\ninB: " );
|
||||
for( j = 0; j < 16; j++ )
|
||||
log_error( "0x%02x ", b[ j ] );
|
||||
log_error( "\ninC: " );
|
||||
for( j = 0; j < 16; j++ )
|
||||
log_error( "0x%02x ", c[ j ] );
|
||||
log_error( "\nexp: " );
|
||||
for( j = 0; j < 16; j++ )
|
||||
log_error( "0x%02x ", e[ j ] );
|
||||
log_error( "\ngot: " );
|
||||
for( j = 0; j < 16; j++ )
|
||||
log_error( "0x%02x ", g[ j ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bitselect_verify_fn( ExplicitType vecType, ExplicitType testVecType, unsigned int vecSize, void *inDataA, void *inDataB, void *inDataTest, void *outData )
|
||||
{
|
||||
char *inA = (char *)inDataA, *inB = (char *)inDataB, *inT = (char *)inDataTest, *out = (char *)outData;
|
||||
size_t i, numBytes = get_explicit_type_size( vecType );
|
||||
|
||||
// Type is meaningless, this is all bitwise!
|
||||
for( i = 0; i < numBytes; i++ )
|
||||
{
|
||||
out[ i ] = ( inA[ i ] & ~inT[ i ] ) | ( inB[ i ] & inT[ i ] );
|
||||
}
|
||||
}
|
||||
|
||||
int test_relational_bitselect(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
|
||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||
unsigned int index, typeIndex;
|
||||
int retVal = 0;
|
||||
RandomSeed seed( gRandomSeed );
|
||||
|
||||
|
||||
for( typeIndex = 0; typeIndex < 10; typeIndex++ )
|
||||
{
|
||||
if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong)
|
||||
continue;
|
||||
|
||||
if (vecType[typeIndex] == kDouble)
|
||||
{
|
||||
if(!is_extension_available(device, "cl_khr_fp64"))
|
||||
{
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
||||
continue;
|
||||
}
|
||||
else
|
||||
log_info("Testing doubles.\n");
|
||||
}
|
||||
for( index = 0; vecSizes[ index ] != 0; index++ )
|
||||
{
|
||||
// Test!
|
||||
if( test_select_kernel(context, queue, "bitselect", vecType[ typeIndex ], vecSizes[ index ], vecType[typeIndex], bitselect_verify_fn, seed ) != 0 )
|
||||
{
|
||||
log_error( " Vector %s%d FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), vecSizes[ index ] );
|
||||
retVal = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void select_signed_verify_fn( ExplicitType vecType, ExplicitType testVecType, unsigned int vecSize, void *inDataA, void *inDataB, void *inDataTest, void *outData )
|
||||
{
|
||||
bool yep = false;
|
||||
if (vecSize == 1) {
|
||||
switch( testVecType )
|
||||
{
|
||||
case kChar:
|
||||
yep = *( (char *)inDataTest ) ? true : false;
|
||||
break;
|
||||
case kShort:
|
||||
yep = *( (short *)inDataTest ) ? true : false;
|
||||
break;
|
||||
case kInt:
|
||||
yep = *( (int *)inDataTest ) ? true : false;
|
||||
break;
|
||||
case kLong:
|
||||
yep = *( (cl_long *)inDataTest ) ? true : false;
|
||||
break;
|
||||
default:
|
||||
// Should never get here
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch( testVecType )
|
||||
{
|
||||
case kChar:
|
||||
yep = *( (char *)inDataTest ) & 0x80 ? true : false;
|
||||
break;
|
||||
case kShort:
|
||||
yep = *( (short *)inDataTest ) & 0x8000 ? true : false;
|
||||
break;
|
||||
case kInt:
|
||||
yep = *( (int *)inDataTest ) & 0x80000000L ? true : false;
|
||||
break;
|
||||
case kLong:
|
||||
yep = *( (cl_long *)inDataTest ) & 0x8000000000000000LL ? true : false;
|
||||
break;
|
||||
default:
|
||||
// Should never get here
|
||||
return;
|
||||
}
|
||||
}
|
||||
memcpy( outData, ( yep ) ? inDataB : inDataA, get_explicit_type_size( vecType ) );
|
||||
}
|
||||
|
||||
int test_relational_select_signed(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
|
||||
ExplicitType testVecType[] = { kChar, kShort, kInt, kLong, kNumExplicitTypes };
|
||||
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
|
||||
unsigned int index, typeIndex, testTypeIndex;
|
||||
int retVal = 0;
|
||||
RandomSeed seed( gRandomSeed );
|
||||
|
||||
for( typeIndex = 0; typeIndex < 10; typeIndex++ )
|
||||
{
|
||||
if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong)
|
||||
continue;
|
||||
|
||||
if (vecType[typeIndex] == kDouble) {
|
||||
if(!is_extension_available(device, "cl_khr_fp64")) {
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
||||
continue;
|
||||
} else {
|
||||
log_info("Testing doubles.\n");
|
||||
}
|
||||
}
|
||||
for( testTypeIndex = 0; testVecType[ testTypeIndex ] != kNumExplicitTypes; testTypeIndex++ )
|
||||
{
|
||||
if( testVecType[ testTypeIndex ] != vecType[ typeIndex ] )
|
||||
continue;
|
||||
|
||||
for( index = 0; vecSizes[ index ] != 0; index++ )
|
||||
{
|
||||
// Test!
|
||||
if( test_select_kernel(context, queue, "select", vecType[ typeIndex ], vecSizes[ index ], testVecType[ testTypeIndex ], select_signed_verify_fn, seed ) != 0 )
|
||||
{
|
||||
log_error( " Vector %s%d, test vector %s%d FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), vecSizes[ index ],
|
||||
get_explicit_type_name( testVecType[ testTypeIndex ] ), vecSizes[ index ] );
|
||||
retVal = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void select_unsigned_verify_fn( ExplicitType vecType, ExplicitType testVecType, unsigned int vecSize, void *inDataA, void *inDataB, void *inDataTest, void *outData )
|
||||
{
|
||||
bool yep = false;
|
||||
if (vecSize == 1) {
|
||||
switch( testVecType )
|
||||
{
|
||||
case kUChar:
|
||||
yep = *( (unsigned char *)inDataTest ) ? true : false;
|
||||
break;
|
||||
case kUShort:
|
||||
yep = *( (unsigned short *)inDataTest ) ? true : false;
|
||||
break;
|
||||
case kUInt:
|
||||
yep = *( (unsigned int *)inDataTest ) ? true : false;
|
||||
break;
|
||||
case kULong:
|
||||
yep = *( (cl_ulong *)inDataTest ) ? true : false;
|
||||
break;
|
||||
default:
|
||||
// Should never get here
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch( testVecType )
|
||||
{
|
||||
case kUChar:
|
||||
yep = *( (unsigned char *)inDataTest ) & 0x80 ? true : false;
|
||||
break;
|
||||
case kUShort:
|
||||
yep = *( (unsigned short *)inDataTest ) & 0x8000 ? true : false;
|
||||
break;
|
||||
case kUInt:
|
||||
yep = *( (unsigned int *)inDataTest ) & 0x80000000L ? true : false;
|
||||
break;
|
||||
case kULong:
|
||||
yep = *( (cl_ulong *)inDataTest ) & 0x8000000000000000LL ? true : false;
|
||||
break;
|
||||
default:
|
||||
// Should never get here
|
||||
return;
|
||||
}
|
||||
}
|
||||
memcpy( outData, ( yep ) ? inDataB : inDataA, get_explicit_type_size( vecType ) );
|
||||
}
|
||||
|
||||
int test_relational_select_unsigned(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
|
||||
ExplicitType testVecType[] = { kUChar, kUShort, kUInt, kULong, kNumExplicitTypes };
|
||||
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
|
||||
unsigned int index, typeIndex, testTypeIndex;
|
||||
int retVal = 0;
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
|
||||
for( typeIndex = 0; typeIndex < 10; typeIndex++ )
|
||||
{
|
||||
if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong)
|
||||
continue;
|
||||
|
||||
if (vecType[typeIndex] == kDouble) {
|
||||
if(!is_extension_available(device, "cl_khr_fp64")) {
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
||||
continue;
|
||||
} else {
|
||||
log_info("Testing doubles.\n");
|
||||
}
|
||||
}
|
||||
for( testTypeIndex = 0; testVecType[ testTypeIndex ] != kNumExplicitTypes; testTypeIndex++ )
|
||||
{
|
||||
if( testVecType[ testTypeIndex ] != vecType[ typeIndex ] )
|
||||
continue;
|
||||
|
||||
for( index = 0; vecSizes[ index ] != 0; index++ )
|
||||
{
|
||||
// Test!
|
||||
if( test_select_kernel(context, queue, "select", vecType[ typeIndex ], vecSizes[ index ], testVecType[ testTypeIndex ], select_unsigned_verify_fn, seed ) != 0 )
|
||||
{
|
||||
log_error( " Vector %s%d, test vector %s%d FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), vecSizes[ index ],
|
||||
get_explicit_type_name( testVecType[ testTypeIndex ] ), vecSizes[ index ] );
|
||||
retVal = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
|
||||
extern int test_relational_isequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_isnotequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_isgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_isgreaterequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_isless_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_islessequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_islessgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_isequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_isnotequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_isgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_isgreaterequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_isless_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_islessequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
extern int test_relational_islessgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
|
||||
|
||||
int test_relational_isequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
int err = 0;
|
||||
err |= test_relational_isequal_float( device, context, queue, numElements );
|
||||
err |= test_relational_isequal_double( device, context, queue, numElements );
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int test_relational_isnotequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
int err = 0;
|
||||
err |= test_relational_isnotequal_float( device, context, queue, numElements );
|
||||
err |= test_relational_isnotequal_double( device, context, queue, numElements );
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int test_relational_isgreater(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
int err = 0;
|
||||
err |= test_relational_isgreater_float( device, context, queue, numElements );
|
||||
err |= test_relational_isgreater_double( device, context, queue, numElements );
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int test_relational_isgreaterequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
int err = 0;
|
||||
err |= test_relational_isgreaterequal_float( device, context, queue, numElements );
|
||||
err |= test_relational_isgreaterequal_double( device, context, queue, numElements );
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int test_relational_isless(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
int err = 0;
|
||||
err |= test_relational_isless_float( device, context, queue, numElements );
|
||||
err |= test_relational_isless_double( device, context, queue, numElements );
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int test_relational_islessequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
int err = 0;
|
||||
err |= test_relational_islessequal_float( device, context, queue, numElements );
|
||||
err |= test_relational_islessequal_double( device, context, queue, numElements );
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int test_relational_islessgreater(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
int err = 0;
|
||||
err |= test_relational_islessgreater_float( device, context, queue, numElements );
|
||||
err |= test_relational_islessgreater_double( device, context, queue, numElements );
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
932
test_conformance/relationals/test_shuffles.cpp
Normal file
932
test_conformance/relationals/test_shuffles.cpp
Normal file
@@ -0,0 +1,932 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
// #define USE_NEW_SYNTAX 1
|
||||
// The number of shuffles to test per test
|
||||
#define NUM_TESTS 32
|
||||
// The number of times to run each combination of shuffles
|
||||
#define NUM_ITERATIONS_PER_TEST 2
|
||||
#define MAX_PROGRAM_SIZE NUM_TESTS*1024
|
||||
#define PRINT_SHUFFLE_KERNEL_SOURCE 0
|
||||
#define SPEW_ORDER_DETAILS 0
|
||||
|
||||
enum ShuffleMode
|
||||
{
|
||||
kNormalMode = 0,
|
||||
kFunctionCallMode,
|
||||
kArrayAccessMode,
|
||||
kBuiltInFnMode,
|
||||
kBuiltInDualInputFnMode
|
||||
};
|
||||
|
||||
extern "C" { extern cl_uint gRandomSeed;};
|
||||
|
||||
static const char *shuffleKernelPattern[3] = {
|
||||
"__kernel void sample_test( __global %s%s *source, __global %s%s *dest )\n"
|
||||
"{\n"
|
||||
" if (get_global_id(0) != 0) return;\n"
|
||||
" //%s%s src1 %s, src2%s;\n",// Here's a comma...
|
||||
// Above code is commented out for now, but keeping around for testing local storage options
|
||||
"}\n" };
|
||||
|
||||
static const char *shuffleTempPattern = " %s%s tmp;\n";
|
||||
|
||||
static const char *clearTempPattern = " tmp = (%s%s)((%s)0);\n";
|
||||
|
||||
static const char *shuffleSinglePattern =
|
||||
" tmp%s%s = source[%d]%s%s;\n"
|
||||
" dest[%d] = tmp;\n"
|
||||
;
|
||||
|
||||
static const char * shuffleSinglePatternV3src =
|
||||
" tmp%s%s = vload3(%d, source)%s%s;\n"
|
||||
" dest[%d] = tmp;\n";
|
||||
|
||||
static const char * shuffleSinglePatternV3dst =
|
||||
" tmp%s%s = source[%d]%s%s;\n"
|
||||
" vstore3(tmp, %d, dest);\n";
|
||||
|
||||
|
||||
static const char * shuffleSinglePatternV3srcV3dst =
|
||||
"tmp%s%s = vload3(%d, source)%s%s;\n"
|
||||
"vstore3(tmp, %d, dest);\n";
|
||||
|
||||
static const char *shuffleFnLinePattern = "%s%s shuffle_fn( %s%s source );\n%s%s shuffle_fn( %s%s source ) { return source; }\n\n";
|
||||
|
||||
static const char *shuffleFnPattern =
|
||||
" tmp%s%s = shuffle_fn( source[%d] )%s%s;\n"
|
||||
" dest[%d] = tmp;\n"
|
||||
;
|
||||
|
||||
|
||||
static const char *shuffleFnPatternV3src =
|
||||
" tmp%s%s = shuffle_fn( vload3(%d, source) )%s%s;\n"
|
||||
" dest[%d] = tmp;\n"
|
||||
;
|
||||
|
||||
|
||||
static const char *shuffleFnPatternV3dst =
|
||||
" tmp%s%s = shuffle_fn( source[%d] )%s%s;\n"
|
||||
" vstore3(tmp, %d, dest);\n"
|
||||
;
|
||||
|
||||
|
||||
static const char *shuffleFnPatternV3srcV3dst =
|
||||
" tmp%s%s = shuffle_fn(vload3(%d, source) )%s%s;\n"
|
||||
" vstore3(tmp, %d, dest);\n"
|
||||
;
|
||||
|
||||
// shuffle() built-in function patterns
|
||||
static const char *shuffleBuiltInPattern =
|
||||
" {\n"
|
||||
" %s%s src1 = %s;\n"
|
||||
" %s%s%s mask = (%s%s%s)( %s );\n"
|
||||
" tmp = shuffle( src1, mask );\n"
|
||||
" %s;\n"
|
||||
" }\n"
|
||||
;
|
||||
|
||||
// shuffle() built-in dual-input function patterns
|
||||
static const char *shuffleBuiltInDualPattern =
|
||||
" {\n"
|
||||
" %s%s src1 = %s;\n"
|
||||
" %s%s src2 = %s;\n"
|
||||
" %s%s%s mask = (%s%s%s)( %s );\n"
|
||||
" tmp = shuffle2( src1, src2, mask );\n"
|
||||
" %s;\n"
|
||||
" }\n"
|
||||
;
|
||||
|
||||
|
||||
typedef unsigned char ShuffleOrder[ 16 ];
|
||||
|
||||
void incrementShuffleOrder( ShuffleOrder &order, size_t orderSize, size_t orderRange )
|
||||
{
|
||||
for( size_t i = 0; i < orderSize; i++ )
|
||||
{
|
||||
order[ i ]++;
|
||||
if( order[ i ] < orderRange )
|
||||
return;
|
||||
order[ i ] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool shuffleOrderContainsDuplicates( ShuffleOrder &order, size_t orderSize )
|
||||
{
|
||||
bool flags[ 16 ] = { false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false };
|
||||
for( size_t i = 0; i < orderSize; i++ )
|
||||
{
|
||||
if( flags[ order[ i ] ] )
|
||||
return true;
|
||||
flags[ order[ i ] ] = true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void shuffleVector( unsigned char *inVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse )
|
||||
{
|
||||
for(size_t i = 0; i < lengthToUse; i++ )
|
||||
{
|
||||
unsigned char *inPtr = inVector + typeSize *order[ i ];
|
||||
memcpy( outVector, inPtr, typeSize );
|
||||
outVector += typeSize;
|
||||
}
|
||||
}
|
||||
|
||||
static void shuffleVector2( unsigned char *inVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse )
|
||||
{
|
||||
for(size_t i = 0; i < lengthToUse; i++ )
|
||||
{
|
||||
unsigned char *outPtr = outVector + typeSize *order[ i ];
|
||||
memcpy( outPtr, inVector, typeSize );
|
||||
inVector += typeSize;
|
||||
}
|
||||
}
|
||||
|
||||
static void shuffleVectorDual( unsigned char *inVector, unsigned char *inSecondVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse )
|
||||
{
|
||||
// This is tricky: the indices of each shuffle are in a range (0-srcVecSize * 2-1),
|
||||
// where (srcVecSize-srcVecSize*2-1) refers to the second input.
|
||||
size_t uphalfMask = (size_t)vecSize;
|
||||
size_t lowerBits = (size_t)( vecSize - 1 );
|
||||
|
||||
for(size_t i = 0; i < lengthToUse; i++ )
|
||||
{
|
||||
unsigned char *inPtr;
|
||||
#if SPEW_ORDER_DETAILS
|
||||
log_info("order[%d] is %d, or %d of %s\n", (int)i,
|
||||
(int)(order[i]),
|
||||
(int)(order[i] & lowerBits),
|
||||
((order[i]&uphalfMask) == 0)?"lower num":"upper num");
|
||||
#endif
|
||||
if( order[ i ] & uphalfMask )
|
||||
inPtr = inSecondVector + typeSize * ( order[ i ] & lowerBits );
|
||||
else
|
||||
inPtr = inVector + typeSize * ( order[ i ] & lowerBits );
|
||||
memcpy( outVector, inPtr, typeSize );
|
||||
outVector += typeSize;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static ShuffleOrder sNaturalOrder = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||
|
||||
static int useNumbersFlip = 0;
|
||||
const char *get_order_string( ShuffleOrder &order, size_t vecSize, cl_uint lengthToUse, bool byNumber, MTdata d )
|
||||
{
|
||||
// NOTE: names are only valid for hex characters (up to F) but for debugging, we use
|
||||
// this to print out orders for dual inputs, which actually can be valid up to position 31 (two 16-element vectors)
|
||||
// so we go ahead and fake the rest of the alphabet for those other 16 positions, so we have
|
||||
// some (indirectly) meaningful output
|
||||
char names[] = "0123456789abcdefghijklmnopqrstuv";
|
||||
char namesUpperCase[] = "0123456789ABCDEFGHIJKLMNOPQRSTUV";
|
||||
char names2[] = "xyzw!!!!!!!!!!!!";
|
||||
|
||||
static char orderString[ 18 ];
|
||||
|
||||
size_t j, idx;
|
||||
|
||||
// Assume we don't have to use numbers
|
||||
byNumber = 0;
|
||||
// Check to see
|
||||
for( j = 0; j < lengthToUse; j++ )
|
||||
{
|
||||
if (order[j] > 3) {
|
||||
// An index is > xyzw so we need to use numbers
|
||||
byNumber = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If we can use numbers, do so half the time.
|
||||
if (!byNumber) {
|
||||
byNumber = (useNumbersFlip++)%2;
|
||||
}
|
||||
// Do not use xyzw for vectors whose length is not 2 or 4 per the spec.
|
||||
if (vecSize != 2 || vecSize != 4 || vecSize != 3)
|
||||
byNumber = 1;
|
||||
|
||||
if( byNumber || vecSize > 4 )
|
||||
{
|
||||
idx = 0;
|
||||
// Randomly chose upper and lower case S
|
||||
orderString[ idx++ ] = random_in_range(0, 1, d) ? 's' : 'S';
|
||||
for( j = 0; j < vecSize && j < lengthToUse; j++ ) {
|
||||
// Randomly choose upper and lower case.
|
||||
orderString[ idx++ ] = random_in_range(0, 1, d) ? names[ (int)order[ j ] ] : namesUpperCase[ (int)order[ j ] ];
|
||||
}
|
||||
orderString[ idx++ ] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
for( j = 0; j < vecSize && j < lengthToUse; j++ ) {
|
||||
// Randomly choose upper and lower case.
|
||||
orderString[ j ] = names2[ (int)order[ j ] ];
|
||||
}
|
||||
orderString[ j ] = 0;
|
||||
}
|
||||
|
||||
return orderString;
|
||||
}
|
||||
|
||||
char * get_order_name( ExplicitType vecType, size_t inVecSize, size_t outVecSize, ShuffleOrder &inOrder, ShuffleOrder &outOrder, cl_uint lengthToUse, MTdata d, bool inUseNumerics, bool outUseNumerics )
|
||||
{
|
||||
static char orderName[ 512 ] = "";
|
||||
char inOrderStr[ 512 ], outOrderStr[ 512 ];
|
||||
|
||||
if( inVecSize == 1 )
|
||||
inOrderStr[ 0 ] = 0;
|
||||
else
|
||||
sprintf( inOrderStr, "%d.%s", (int)inVecSize, get_order_string( inOrder, outVecSize, lengthToUse, inUseNumerics, d ) );
|
||||
if( outVecSize == 1 )
|
||||
outOrderStr[ 0 ] = 0;
|
||||
else
|
||||
sprintf( outOrderStr, "%d.%s", (int)outVecSize, get_order_string( outOrder, outVecSize, lengthToUse, outUseNumerics, d ) );
|
||||
|
||||
sprintf( orderName, "order %s%s -> %s%s",
|
||||
get_explicit_type_name( vecType ), inOrderStr, get_explicit_type_name( vecType ), outOrderStr );
|
||||
return orderName;
|
||||
}
|
||||
|
||||
void print_hex_mem_dump( const unsigned char *inDataPtr, const unsigned char * inDataPtr2, const unsigned char *expected, const unsigned char *outDataPtr, size_t inVecSize, size_t outVecSize, size_t typeSize )
|
||||
{
|
||||
char error [4096] = "";
|
||||
strcat(error, " Source: ");
|
||||
for( unsigned int j = 0; j < inVecSize * typeSize; j++ )
|
||||
{
|
||||
sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)inDataPtr[ j ] );
|
||||
}
|
||||
if( inDataPtr2 != NULL )
|
||||
{
|
||||
strcat(error, "\n Source 2: ");
|
||||
for( unsigned int j = 0; j < inVecSize * typeSize; j++ )
|
||||
{
|
||||
sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)inDataPtr2[ j ] );
|
||||
}
|
||||
}
|
||||
strcat(error, "\n Expected: " );
|
||||
for( unsigned int j = 0; j < outVecSize * typeSize; j++ )
|
||||
{
|
||||
sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)expected[ j ] );
|
||||
}
|
||||
strcat(error, "\n Actual: " );
|
||||
for( unsigned int j = 0; j < outVecSize * typeSize; j++ )
|
||||
{
|
||||
sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)outDataPtr[ j ] );
|
||||
}
|
||||
log_info("%s\n", error);
|
||||
}
|
||||
|
||||
void generate_shuffle_mask( char *outMaskString, size_t maskSize, const ShuffleOrder *order )
|
||||
{
|
||||
outMaskString[ 0 ] = 0;
|
||||
if( order != NULL )
|
||||
{
|
||||
for( size_t jj = 0; jj < maskSize; jj++ )
|
||||
{
|
||||
char thisMask[ 16 ];
|
||||
sprintf( thisMask, "%s%d", ( jj == 0 ) ? "" : ", ", (*order)[ jj ] );
|
||||
strcat( outMaskString, thisMask );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( size_t jj = 0; jj < maskSize; jj++ )
|
||||
{
|
||||
char thisMask[ 16 ];
|
||||
sprintf( thisMask, "%s%ld", ( jj == 0 ) ? "" : ", ", jj );
|
||||
strcat( outMaskString, thisMask );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int create_shuffle_kernel( cl_context context, cl_program *outProgram, cl_kernel *outKernel,
|
||||
size_t *outRealVecSize,
|
||||
ExplicitType vecType, size_t inVecSize, size_t outVecSize, cl_uint *lengthToUse, bool inUseNumerics, bool outUseNumerics,
|
||||
size_t numOrders, ShuffleOrder *inOrders, ShuffleOrder *outOrders,
|
||||
MTdata d, ShuffleMode shuffleMode = kNormalMode )
|
||||
{
|
||||
char inOrder[18], shuffledOrder[18];
|
||||
size_t typeSize;
|
||||
char kernelSource[MAX_PROGRAM_SIZE], progLine[ 10240 ];
|
||||
char *programPtr;
|
||||
char inSizeName[4], outSizeName[4], outRealSizeName[4], inSizeArgName[4];
|
||||
char outSizeNameTmpVar[4];
|
||||
|
||||
|
||||
/* Create the source; note vec size is the vector length we are testing */
|
||||
if( inVecSize == 1 ) //|| (inVecSize == 3)) // just have arrays if we go with size 3
|
||||
inSizeName[ 0 ] = 0;
|
||||
else
|
||||
sprintf( inSizeName, "%ld", inVecSize );
|
||||
if( inVecSize == 3 )
|
||||
inSizeArgName[ 0 ] = 0;
|
||||
else
|
||||
strcpy( inSizeArgName, inSizeName );
|
||||
|
||||
|
||||
typeSize = get_explicit_type_size( vecType );
|
||||
|
||||
*outRealVecSize = outVecSize;
|
||||
|
||||
if( outVecSize == 1 || (outVecSize == 3))
|
||||
outSizeName[ 0 ] = 0;
|
||||
else
|
||||
sprintf( outSizeName, "%d", (int)outVecSize );
|
||||
|
||||
if(outVecSize == 1) {
|
||||
outSizeNameTmpVar[0] = 0;
|
||||
} else {
|
||||
sprintf(outSizeNameTmpVar, "%d", (int)outVecSize);
|
||||
}
|
||||
|
||||
if( *outRealVecSize == 1 || ( *outRealVecSize == 3))
|
||||
outRealSizeName[ 0 ] = 0;
|
||||
else
|
||||
sprintf( outRealSizeName, "%d", (int)*outRealVecSize );
|
||||
|
||||
|
||||
// Loop through and create the source for all order strings
|
||||
kernelSource[ 0 ] = 0;
|
||||
if (vecType == kDouble) {
|
||||
strcat(kernelSource, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n");
|
||||
}
|
||||
|
||||
if( shuffleMode == kFunctionCallMode )
|
||||
{
|
||||
sprintf( progLine, shuffleFnLinePattern, get_explicit_type_name( vecType ), inSizeName, get_explicit_type_name( vecType ), inSizeName,
|
||||
get_explicit_type_name( vecType ), inSizeName, get_explicit_type_name( vecType ), inSizeName );
|
||||
strcat(kernelSource, progLine);
|
||||
}
|
||||
|
||||
// We're going to play a REALLY NASTY trick here. We're going to use the inSize insert point
|
||||
// to put in an entire third parameter if we need it
|
||||
char inParamSizeString[ 1024 ];
|
||||
if( shuffleMode == kBuiltInDualInputFnMode )
|
||||
sprintf( inParamSizeString, "%s *secondSource, __global %s%s", inSizeArgName, get_explicit_type_name( vecType ), inSizeArgName );
|
||||
else
|
||||
strcpy( inParamSizeString, inSizeArgName );
|
||||
|
||||
// These two take care of unused variable warnings
|
||||
const char * src2EnableA = ( shuffleMode == kBuiltInDualInputFnMode ) ? "" : "/*";
|
||||
const char * src2EnableB = ( shuffleMode == kBuiltInDualInputFnMode ) ? "" : "*/";
|
||||
|
||||
sprintf( progLine, shuffleKernelPattern[ 0 ], get_explicit_type_name( vecType ), inParamSizeString,
|
||||
get_explicit_type_name( vecType ), outRealSizeName, get_explicit_type_name( vecType ), inSizeName,
|
||||
src2EnableA, src2EnableB );
|
||||
strcat(kernelSource, progLine);
|
||||
if( inOrders == NULL )
|
||||
strcpy( inOrder, get_order_string( sNaturalOrder, outVecSize, (cl_uint)outVecSize, inUseNumerics, d ) );
|
||||
|
||||
sprintf( progLine, shuffleTempPattern, get_explicit_type_name( vecType ), outSizeNameTmpVar);
|
||||
strcat(kernelSource, progLine);
|
||||
|
||||
for( unsigned int i = 0; i < numOrders; i++ )
|
||||
{
|
||||
if( inOrders != NULL )
|
||||
strcpy( inOrder, get_order_string( inOrders[ i ], outVecSize, lengthToUse[i], inUseNumerics, d ) );
|
||||
strcpy( shuffledOrder, get_order_string( outOrders[ i ], outVecSize, lengthToUse[i], outUseNumerics, d ) );
|
||||
|
||||
|
||||
sprintf( progLine, clearTempPattern, get_explicit_type_name( vecType ), outSizeName,get_explicit_type_name( vecType ));
|
||||
strcat(kernelSource, progLine);
|
||||
|
||||
|
||||
if( shuffleMode == kNormalMode )
|
||||
{
|
||||
if(outVecSize == 3 && inVecSize == 3) {
|
||||
// shuffleSinglePatternV3srcV3dst
|
||||
sprintf( progLine, shuffleSinglePatternV3srcV3dst,
|
||||
outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
|
||||
inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
|
||||
} else if(inVecSize == 3) {
|
||||
// shuffleSinglePatternV3src
|
||||
sprintf( progLine, shuffleSinglePatternV3src,
|
||||
outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
|
||||
inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
|
||||
} else if(outVecSize == 3) {
|
||||
// shuffleSinglePatternV3dst
|
||||
sprintf( progLine, shuffleSinglePatternV3dst,
|
||||
outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
|
||||
inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "",
|
||||
(int)i );
|
||||
} else {
|
||||
sprintf( progLine, shuffleSinglePattern,
|
||||
outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
|
||||
inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
|
||||
}
|
||||
}
|
||||
else if( shuffleMode == kFunctionCallMode )
|
||||
{
|
||||
// log_info("About to make a shuffle line\n");
|
||||
// fflush(stdout);
|
||||
if(inVecSize == 3 && outVecSize == 3) { // swap last two
|
||||
sprintf( progLine, shuffleFnPatternV3srcV3dst,
|
||||
outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
|
||||
inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "",
|
||||
(int)i );
|
||||
} else if(outVecSize == 3) { // swap last two
|
||||
// log_info("Here\n\n");
|
||||
// fflush(stdout);
|
||||
sprintf( progLine, shuffleFnPatternV3dst,
|
||||
outVecSize > 1 ? "." : "",
|
||||
outVecSize > 1 ? shuffledOrder : "",
|
||||
(int)i,
|
||||
inVecSize > 1 ? "." : "",
|
||||
inVecSize > 1 ? inOrder : "",
|
||||
(int)i );
|
||||
// log_info("\n%s\n", progLine);
|
||||
// fflush(stdout);
|
||||
} else if(inVecSize == 3) {
|
||||
sprintf( progLine, shuffleFnPatternV3src,
|
||||
outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
|
||||
inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
|
||||
} else {
|
||||
sprintf( progLine, shuffleFnPattern,
|
||||
outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
|
||||
inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
|
||||
}
|
||||
}
|
||||
else if( shuffleMode == kArrayAccessMode )
|
||||
{ // now we want to replace inSizeName with inSizeNameShuffleFn
|
||||
int vectorSizeToCastTo = 16;
|
||||
cl_uint item;
|
||||
for (item =0; item<lengthToUse[i]; item++) {
|
||||
int absoluteIndex = i*(int)inVecSize+(int)inOrders[i][item];
|
||||
int castVectorIndex = absoluteIndex/vectorSizeToCastTo;
|
||||
size_t castElementIndex = absoluteIndex % vectorSizeToCastTo;
|
||||
ShuffleOrder myOutOrders, myInOrders;
|
||||
myOutOrders[0] = outOrders[i][item];
|
||||
myInOrders[0] = castElementIndex;
|
||||
|
||||
strcpy( inOrder, get_order_string( myInOrders, 1, 1, 0, d ) );
|
||||
strcpy( shuffledOrder, get_order_string( myOutOrders, 1, 1, 0, d ) );
|
||||
|
||||
sprintf(progLine, " tmp%s%s = ((__global %s%d *)source)[%d]%s%s;\n",
|
||||
outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "",
|
||||
get_explicit_type_name( vecType ), vectorSizeToCastTo,
|
||||
castVectorIndex,
|
||||
vectorSizeToCastTo > 1 ? "." : "", vectorSizeToCastTo > 1 ? inOrder : "");
|
||||
strcat(kernelSource, progLine);
|
||||
}
|
||||
if(outVecSize == 3) {
|
||||
sprintf(progLine," vstore3(tmp, %d, (__global %s *)dest);\n",
|
||||
i, get_explicit_type_name( vecType ));
|
||||
// probably don't need that last
|
||||
// cast to (__global %s *) where %s is get_explicit_type_name( vecType)
|
||||
} else {
|
||||
sprintf(progLine," dest[%d] = tmp;\n", i );
|
||||
}
|
||||
}
|
||||
else // shuffleMode == kBuiltInFnMode or kBuiltInDualInputFnMode
|
||||
{
|
||||
if(inVecSize == 3 || outVecSize == 3 ||
|
||||
inVecSize == 1 || outVecSize == 1) {
|
||||
// log_info("Skipping test for size 3\n");
|
||||
continue;
|
||||
}
|
||||
ExplicitType maskType = vecType;
|
||||
if( maskType == kFloat )
|
||||
maskType = kUInt;
|
||||
if( maskType == kDouble) {
|
||||
maskType = kULong;
|
||||
}
|
||||
|
||||
char maskString[ 1024 ] = "";
|
||||
size_t maskSize = outVecSize;// ( shuffleMode == kBuiltInDualInputFnMode ) ? ( outVecSize << 1 ) : outVecSize;
|
||||
generate_shuffle_mask( maskString, maskSize, ( outOrders != NULL ) ? &outOrders[ i ] : NULL );
|
||||
|
||||
// Set up a quick prefix, so mask gets unsigned type regardless of the input/output type
|
||||
char maskPrefix[ 2 ] = "u";
|
||||
if( get_explicit_type_name( maskType )[ 0 ] == 'u' )
|
||||
maskPrefix[ 0 ] = 0;
|
||||
|
||||
char progLine2[ 10240 ];
|
||||
if( shuffleMode == kBuiltInDualInputFnMode )
|
||||
{
|
||||
sprintf( progLine2, shuffleBuiltInDualPattern, get_explicit_type_name( vecType ), inSizeName,
|
||||
( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)source )" : "source[ %ld ]",
|
||||
get_explicit_type_name( vecType ), inSizeName,
|
||||
( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)secondSource )" : "secondSource[ %ld ]",
|
||||
maskPrefix, get_explicit_type_name( maskType ), outSizeName, maskPrefix, get_explicit_type_name( maskType ), outSizeName,
|
||||
maskString,
|
||||
( outVecSize == 3 ) ? "vstore3( tmp, %ld, (__global %s *)dest )" : "dest[ %ld ] = tmp" );
|
||||
|
||||
if( outVecSize == 3 )
|
||||
{
|
||||
if( inVecSize == 3 )
|
||||
sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ) );
|
||||
else
|
||||
sprintf( progLine, progLine2, i, i, i, get_explicit_type_name( vecType ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
if( inVecSize == 3 )
|
||||
sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ), i );
|
||||
else
|
||||
sprintf( progLine, progLine2, i, i, i );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf( progLine2, shuffleBuiltInPattern, get_explicit_type_name( vecType ), inSizeName,
|
||||
( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)source )" : "source[ %ld ]",
|
||||
maskPrefix, get_explicit_type_name( maskType ), outSizeName, maskPrefix, get_explicit_type_name( maskType ), outSizeName,
|
||||
maskString,
|
||||
( outVecSize == 3 ) ? "vstore3( tmp, %ld, (__global %s *)dest )" : "dest[ %ld ] = tmp" );
|
||||
|
||||
if( outVecSize == 3 )
|
||||
{
|
||||
if( inVecSize == 3 )
|
||||
sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ) );
|
||||
else
|
||||
sprintf( progLine, progLine2, i, i, get_explicit_type_name( vecType ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
if( inVecSize == 3 )
|
||||
sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i );
|
||||
else
|
||||
sprintf( progLine, progLine2, i, i );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
strcat( kernelSource, progLine );
|
||||
if (strlen(kernelSource) > 0.9*MAX_PROGRAM_SIZE)
|
||||
log_info("WARNING: Program has grown to 90%% (%d) of the defined max program size of %d\n", (int)strlen(kernelSource), (int)MAX_PROGRAM_SIZE);
|
||||
}
|
||||
strcat( kernelSource, shuffleKernelPattern[ 1 ] );
|
||||
|
||||
// Print the kernel source
|
||||
if (PRINT_SHUFFLE_KERNEL_SOURCE)
|
||||
log_info( "Kernel:%s\n", kernelSource );
|
||||
|
||||
/* Create kernel */
|
||||
programPtr = kernelSource;
|
||||
if( create_single_kernel_helper( context, outProgram, outKernel, 1, (const char **)&programPtr, "sample_test" ) )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_shuffle_dual_kernel(cl_context context, cl_command_queue queue,
|
||||
ExplicitType vecType, size_t inVecSize, size_t outVecSize, cl_uint *lengthToUse, size_t numOrders,
|
||||
ShuffleOrder *inOrderIdx, ShuffleOrder *outOrderIdx, bool inUseNumerics, bool outUseNumerics, MTdata d,
|
||||
ShuffleMode shuffleMode = kNormalMode )
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
int error;
|
||||
size_t threads[1], localThreads[1];
|
||||
size_t typeSize, outRealVecSize;
|
||||
clMemWrapper streams[ 3 ];
|
||||
|
||||
/* Create the source */
|
||||
error = create_shuffle_kernel( context, &program, &kernel, &outRealVecSize, vecType,
|
||||
inVecSize, outVecSize, lengthToUse, inUseNumerics, outUseNumerics, numOrders, inOrderIdx, outOrderIdx,
|
||||
d, shuffleMode );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
|
||||
typeSize = get_explicit_type_size( vecType );
|
||||
|
||||
#if !(defined(_WIN32) && defined (_MSC_VER))
|
||||
cl_long inData[ inVecSize * numOrders ];
|
||||
cl_long inSecondData[ inVecSize * numOrders ];
|
||||
cl_long outData[ outRealVecSize * numOrders ];
|
||||
#else
|
||||
cl_long* inData = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long));
|
||||
cl_long* inSecondData = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long));
|
||||
cl_long* outData = (cl_long*)_malloca(outRealVecSize * numOrders * sizeof(cl_long));
|
||||
#endif
|
||||
memset(outData, 0, outRealVecSize * numOrders * sizeof(cl_long) );
|
||||
|
||||
generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inData );
|
||||
if( shuffleMode == kBuiltInDualInputFnMode )
|
||||
generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inSecondData );
|
||||
|
||||
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * inVecSize * numOrders, inData, &error);
|
||||
test_error( error, "Unable to create input stream" );
|
||||
|
||||
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * outRealVecSize * numOrders, outData, &error);
|
||||
test_error( error, "Unable to create output stream" );
|
||||
|
||||
int argIndex = 0;
|
||||
if( shuffleMode == kBuiltInDualInputFnMode )
|
||||
{
|
||||
streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * inVecSize * numOrders, inSecondData, &error);
|
||||
test_error( error, "Unable to create second input stream" );
|
||||
|
||||
error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 2 ] ), &streams[ 2 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
}
|
||||
|
||||
// Set kernel arguments
|
||||
error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 0 ] ), &streams[ 0 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 1 ] ), &streams[ 1 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
|
||||
|
||||
/* Run the kernel */
|
||||
threads[0] = numOrders;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
|
||||
|
||||
// Read the results back
|
||||
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, typeSize * numOrders * outRealVecSize, outData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
unsigned char *inDataPtr = (unsigned char *)inData;
|
||||
unsigned char *inSecondDataPtr = (unsigned char *)inSecondData;
|
||||
unsigned char *outDataPtr = (unsigned char *)outData;
|
||||
int ret = 0;
|
||||
int errors_printed = 0;
|
||||
for( size_t i = 0; i < numOrders; i++ )
|
||||
{
|
||||
unsigned char expected[ 1024 ];
|
||||
unsigned char temp[ 1024 ];
|
||||
memset(expected, 0, sizeof(expected));
|
||||
memset(temp, 0, sizeof(temp));
|
||||
if( shuffleMode == kBuiltInFnMode )
|
||||
shuffleVector( inDataPtr, expected, outOrderIdx[ i ], outVecSize, typeSize, lengthToUse[i] );
|
||||
else if( shuffleMode == kBuiltInDualInputFnMode )
|
||||
shuffleVectorDual( inDataPtr, inSecondDataPtr, expected, outOrderIdx[ i ], inVecSize, typeSize, lengthToUse[i] );
|
||||
else
|
||||
{
|
||||
shuffleVector( inDataPtr, temp, inOrderIdx[ i ], inVecSize, typeSize, lengthToUse[i] );
|
||||
shuffleVector2( temp, expected, outOrderIdx[ i ], outVecSize, typeSize, lengthToUse[i] );
|
||||
}
|
||||
|
||||
if( memcmp( expected, outDataPtr, outVecSize * typeSize ) != 0 )
|
||||
{
|
||||
log_error( " ERROR: Shuffle test %d FAILED for %s (memory hex dump follows)\n", (int)i,
|
||||
get_order_name( vecType, inVecSize, outVecSize, inOrderIdx[ i ], outOrderIdx[ i ], lengthToUse[i], d, inUseNumerics, outUseNumerics ) );
|
||||
|
||||
print_hex_mem_dump( inDataPtr, ( shuffleMode == kBuiltInDualInputFnMode ) ? inSecondDataPtr : NULL, expected, outDataPtr, inVecSize, outVecSize, typeSize );
|
||||
|
||||
if( ( shuffleMode == kBuiltInFnMode ) || ( shuffleMode == kBuiltInDualInputFnMode ) )
|
||||
{
|
||||
// Mask would've been different for every shuffle done, so we have to regen it to print it
|
||||
char maskString[ 1024 ];
|
||||
generate_shuffle_mask( maskString, outVecSize, ( outOrderIdx != NULL ) ? &outOrderIdx[ i ] : NULL );
|
||||
log_error( " Mask: %s\n", maskString );
|
||||
}
|
||||
|
||||
ret++;
|
||||
errors_printed++;
|
||||
if (errors_printed > MAX_ERRORS_TO_PRINT)
|
||||
{
|
||||
log_info("Further errors suppressed.\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
inDataPtr += inVecSize * typeSize;
|
||||
inSecondDataPtr += inVecSize * typeSize;
|
||||
outDataPtr += outRealVecSize * typeSize;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void build_random_shuffle_order( ShuffleOrder &outIndices, unsigned int length, unsigned int selectLength, bool allowRepeats, MTdata d )
|
||||
{
|
||||
char flags[ 16 ];
|
||||
|
||||
memset( flags, 0, sizeof( flags ) );
|
||||
|
||||
for( unsigned int i = 0; i < length; i++ )
|
||||
{
|
||||
char selector = (char)random_in_range( 0, selectLength - 1, d );
|
||||
if( !allowRepeats )
|
||||
{
|
||||
while( flags[ (int)selector ] )
|
||||
selector = (char)random_in_range( 0, selectLength - 1, d );
|
||||
flags[ (int)selector ] = true;
|
||||
}
|
||||
outIndices[ i ] = selector;
|
||||
}
|
||||
}
|
||||
|
||||
class shuffleBuffer
|
||||
{
|
||||
public:
|
||||
|
||||
shuffleBuffer( cl_context ctx, cl_command_queue queue, ExplicitType type, size_t inSize, size_t outSize, ShuffleMode mode )
|
||||
{
|
||||
mContext = ctx;
|
||||
mQueue = queue;
|
||||
mVecType = type;
|
||||
mInVecSize = inSize;
|
||||
mOutVecSize = outSize;
|
||||
mShuffleMode = mode;
|
||||
|
||||
mCount = 0;
|
||||
|
||||
// Here's the deal with mLengthToUse[i].
|
||||
// if you have, for instance
|
||||
// uchar4 dst;
|
||||
// uchar8 src;
|
||||
// you can do
|
||||
// src.s0213 = dst.s1045;
|
||||
// but you can also do
|
||||
// src.s02 = dst.s10;
|
||||
// which has a different effect
|
||||
// The intent with these "sub lengths" is to test all such
|
||||
// possibilities
|
||||
// Calculate a range of sub-lengths within the vector to copy.
|
||||
int i;
|
||||
size_t maxSize = (mInVecSize < mOutVecSize) ? mInVecSize : mOutVecSize;
|
||||
for(i=0; i<NUM_TESTS; i++)
|
||||
{
|
||||
// Built-in fns can't select sub-lengths (the mask must be the length of the dest vector).
|
||||
// Well, at least for these tests...
|
||||
if( ( mode == kBuiltInFnMode ) || ( mode == kBuiltInDualInputFnMode ) )
|
||||
mLengthToUse[i] = (cl_int)mOutVecSize;
|
||||
else
|
||||
{
|
||||
mLengthToUse[i] = (cl_uint)(((double)i/NUM_TESTS)*maxSize) + 1;
|
||||
// Force the length to be a valid vector length.
|
||||
if( ( mLengthToUse[i] == 1 ) && ( mode != kBuiltInFnMode ) )
|
||||
mLengthToUse[i] = 1;
|
||||
else if (mLengthToUse[i] < 4)
|
||||
mLengthToUse[i] = 2;
|
||||
else if (mLengthToUse[i] < 8)
|
||||
mLengthToUse[i] = 4;
|
||||
else if (mLengthToUse[i] < 16)
|
||||
mLengthToUse[i] = 8;
|
||||
else
|
||||
mLengthToUse[i] = 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int AddRun( ShuffleOrder &inOrder, ShuffleOrder &outOrder, MTdata d )
|
||||
{
|
||||
memcpy( &mInOrders[ mCount ], &inOrder, sizeof( inOrder ) );
|
||||
memcpy( &mOutOrders[ mCount ], &outOrder, sizeof( outOrder ) );
|
||||
mCount++;
|
||||
|
||||
if( mCount == NUM_TESTS )
|
||||
return Flush(d);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
int Flush( MTdata d )
|
||||
{
|
||||
int err = CL_SUCCESS;
|
||||
if( mCount > 0 )
|
||||
{
|
||||
err = test_shuffle_dual_kernel( mContext, mQueue, mVecType, mInVecSize, mOutVecSize, mLengthToUse,
|
||||
mCount, mInOrders, mOutOrders, true, true, d, mShuffleMode );
|
||||
mCount = 0;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
protected:
|
||||
cl_context mContext;
|
||||
cl_command_queue mQueue;
|
||||
ExplicitType mVecType;
|
||||
size_t mInVecSize, mOutVecSize, mCount;
|
||||
ShuffleMode mShuffleMode;
|
||||
cl_uint mLengthToUse[ NUM_TESTS ];
|
||||
|
||||
ShuffleOrder mInOrders[ NUM_TESTS ], mOutOrders[ NUM_TESTS ];
|
||||
};
|
||||
|
||||
|
||||
int test_shuffle_random(cl_device_id device, cl_context context, cl_command_queue queue, ShuffleMode shuffleMode, MTdata d )
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
|
||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||
unsigned int srcIdx, dstIdx, typeIndex;
|
||||
int error = 0, totalError = 0, prevTotalError = 0;
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
for( typeIndex = 0; typeIndex < 10; typeIndex++ )
|
||||
{
|
||||
//log_info( "\n\t%s... ", get_explicit_type_name( vecType[ typeIndex ] ) );
|
||||
//fflush( stdout );
|
||||
if (vecType[typeIndex] == kDouble) {
|
||||
if (!is_extension_available(device, "cl_khr_fp64")) {
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
||||
continue;
|
||||
}
|
||||
log_info("Testing doubles.\n");
|
||||
}
|
||||
|
||||
if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong )
|
||||
{
|
||||
log_info("Long types are unsupported, skipping.");
|
||||
continue;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
for( srcIdx = 0; vecSizes[ srcIdx ] != 0 /*&& error == 0*/; srcIdx++ )
|
||||
{
|
||||
for( dstIdx = 0; vecSizes[ dstIdx ] != 0 /*&& error == 0*/; dstIdx++ )
|
||||
{
|
||||
if( ( ( shuffleMode == kBuiltInDualInputFnMode ) || ( shuffleMode == kBuiltInFnMode ) ) &&
|
||||
( ( vecSizes[ dstIdx ] & 1 ) || ( vecSizes[ srcIdx ] & 1 ) ) )
|
||||
{
|
||||
// Built-in shuffle functions don't work on size 1 (scalars) or size 3 (vec3s)
|
||||
continue;
|
||||
}
|
||||
|
||||
log_info("Testing [%s%d to %s%d]... ", get_explicit_type_name( vecType[ typeIndex ] ) , vecSizes[srcIdx], get_explicit_type_name( vecType[ typeIndex ] ) , vecSizes[dstIdx]);
|
||||
shuffleBuffer buffer( context, queue, vecType[ typeIndex ], vecSizes[ srcIdx ], vecSizes[ dstIdx ], shuffleMode );
|
||||
|
||||
int numTests = NUM_TESTS*NUM_ITERATIONS_PER_TEST;
|
||||
for( int i = 0; i < numTests /*&& error == 0*/; i++ )
|
||||
{
|
||||
ShuffleOrder src, dst;
|
||||
if( shuffleMode == kBuiltInFnMode )
|
||||
{
|
||||
build_random_shuffle_order( dst, vecSizes[ dstIdx ], vecSizes[ srcIdx ], true, d );
|
||||
}
|
||||
else if(shuffleMode == kBuiltInDualInputFnMode)
|
||||
{
|
||||
build_random_shuffle_order(dst, vecSizes[dstIdx], 2*vecSizes[srcIdx], true, d);
|
||||
}
|
||||
else
|
||||
{
|
||||
build_random_shuffle_order( src, vecSizes[ dstIdx ], vecSizes[ srcIdx ], true, d );
|
||||
build_random_shuffle_order( dst, vecSizes[ dstIdx ], vecSizes[ dstIdx ], false, d );
|
||||
}
|
||||
|
||||
error = buffer.AddRun( src, dst, seed );
|
||||
if (error)
|
||||
totalError++;
|
||||
}
|
||||
int test_error = buffer.Flush(seed);
|
||||
if (test_error)
|
||||
totalError++;
|
||||
|
||||
if (totalError == prevTotalError)
|
||||
log_info("\tPassed.\n");
|
||||
else
|
||||
{
|
||||
log_error("\tFAILED.\n");
|
||||
prevTotalError = totalError;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return totalError;
|
||||
}
|
||||
|
||||
int test_shuffle_copy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
return test_shuffle_random( device, context, queue, kNormalMode, seed );
|
||||
}
|
||||
|
||||
int test_shuffle_function_call(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
return test_shuffle_random( device, context, queue, kFunctionCallMode, seed );
|
||||
}
|
||||
|
||||
int test_shuffle_array_cast(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
return test_shuffle_random( device, context, queue, kArrayAccessMode, seed );
|
||||
}
|
||||
|
||||
int test_shuffle_built_in(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
return test_shuffle_random( device, context, queue, kBuiltInFnMode, seed );
|
||||
}
|
||||
|
||||
int test_shuffle_built_in_dual_input(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
return test_shuffle_random( device, context, queue, kBuiltInDualInputFnMode, seed );
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user