Remove exact duplicates from the compatibility suites (#495)

Contributes to #494.

Signed-off-by: Kévin Petit <kpet@free.fr>
This commit is contained in:
Kévin Petit
2019-11-20 17:38:49 +00:00
committed by GitHub
parent 4a8cae4ae2
commit 2dca46bc00
22 changed files with 0 additions and 3504 deletions

View File

@@ -2,21 +2,14 @@ set(MODULE_NAME COMPATIBILITY_API)
set(${MODULE_NAME}_SOURCES
main.c
test_bool.c
test_retain.cpp
test_retain_program.c
test_queries.cpp
test_create_kernels.c
test_kernels.c
test_api_min_max.c
test_kernel_arg_changes.cpp
test_kernel_arg_multi_setup.cpp
test_binary.cpp
test_native_kernel.cpp
test_mem_objects.cpp
test_create_context_from_type.cpp
test_device_min_data_type_align_size_alignment.cpp
test_platform.cpp
test_mem_object_info.cpp
test_null_buffer_arg.c
test_kernel_arg_info.c

View File

@@ -86,26 +86,15 @@ test_definition test_list[] = {
ADD_TEST( min_max_device_version ),
ADD_TEST( min_max_language_version ),
ADD_TEST( kernel_arg_changes ),
ADD_TEST( kernel_arg_multi_setup_random ),
ADD_TEST( native_kernel ),
ADD_TEST( create_context_from_type ),
ADD_TEST( platform_extensions ),
ADD_TEST( get_platform_ids ),
ADD_TEST( bool_type ),
ADD_TEST( repeated_setup_cleanup ),
ADD_TEST( retain_queue_single ),
ADD_TEST( retain_queue_multiple ),
ADD_TEST( retain_mem_object_single ),
ADD_TEST( retain_mem_object_multiple ),
ADD_TEST( min_data_type_align_size_alignment ),
ADD_TEST( mem_object_destructor_callback ),
ADD_TEST( null_buffer_arg ),
ADD_TEST( get_buffer_info ),
ADD_TEST( get_image2d_info ),

View File

@@ -1,51 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "harness/testHarness.h"
const char *kernel_with_bool[] = {
"__kernel void kernel_with_bool(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" bool myBool = (src[tid] < 0.5f) && (src[tid] > -0.5f);\n"
" if(myBool)\n"
" {\n"
" dst[tid] = (int)src[tid];\n"
" }\n"
" else\n"
" {\n"
" dst[tid] = 0;\n"
" }\n"
"\n"
"}\n"
};
int test_bool_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_program program;
cl_kernel kernel;
int err = create_single_kernel_helper(context,
&program,
&kernel,
1, kernel_with_bool,
"kernel_with_bool" );
return err;
}

View File

@@ -1,60 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "harness/testHarness.h"
#ifndef _WIN32
#include <unistd.h>
#endif
int IsAPowerOfTwo( unsigned long x )
{
return 0 == (x & (x-1));
}
int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
cl_uint min_alignment;
if (gHasLong)
min_alignment = sizeof(cl_long)*16;
else
min_alignment = sizeof(cl_int)*16;
int error = 0;
cl_uint alignment;
error = clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(alignment), &alignment, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_MEM_BASE_ADDR_ALIGN failed");
log_info("Device reported CL_DEVICE_MEM_BASE_ADDR_ALIGN = %lu bits.\n", (unsigned long)alignment);
// Verify the size is large enough
if (alignment < min_alignment*8) {
log_error("ERROR: alignment too small. Minimum alignment for %s16 is %lu bits, device reported %lu bits.",
(gHasLong) ? "long" : "int",
(unsigned long)(min_alignment*8), (unsigned long)alignment);
return -1;
}
// Verify the size is a power of two
if (!IsAPowerOfTwo((unsigned long)alignment)) {
log_error("ERROR: alignment is not a power of two.\n");
return -1;
}
return 0;
}

View File

@@ -1,141 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
extern "C" { extern cl_uint gRandomSeed;}
// This test is designed to stress changing kernel arguments between execute calls (that are asynchronous and thus
// potentially overlapping) to make sure each kernel gets the right arguments
// Note: put a delay loop in the kernel to make sure we have time to queue the next kernel before this one finishes
const char *inspect_image_kernel_source[] = {
"__kernel void sample_test(read_only image2d_t src, __global int *outDimensions )\n"
"{\n"
" int tid = get_global_id(0), i;\n"
" for( i = 0; i < 100000; i++ ); \n"
" outDimensions[tid * 2] = get_image_width(src) * tid;\n"
" outDimensions[tid * 2 + 1] = get_image_height(src) * tid;\n"
"\n"
"}\n" };
#define NUM_TRIES 100
#define NUM_THREADS 2048
int test_kernel_arg_changes(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
int error, i;
clMemWrapper images[ NUM_TRIES ];
size_t sizes[ NUM_TRIES ][ 2 ];
clMemWrapper results[ NUM_TRIES ];
cl_image_format imageFormat;
size_t maxWidth, maxHeight;
size_t threads[1], localThreads[1];
cl_int resultArray[ NUM_THREADS * 2 ];
char errStr[ 128 ];
RandomSeed seed( gRandomSeed );
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// Just get any ol format to test with
error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &imageFormat );
test_error( error, "Unable to obtain suitable image format to test with!" );
// Create our testing kernel
error = create_single_kernel_helper( context, &program, &kernel, 1, inspect_image_kernel_source, "sample_test" );
test_error( error, "Unable to create testing kernel" );
// Get max dimensions for each of our images
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
test_error( error, "Unable to get max image dimensions for device" );
// Get the number of threads we'll be able to run
threads[0] = NUM_THREADS;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size for kernel" );
// Create a variety of images and output arrays
for( i = 0; i < NUM_TRIES; i++ )
{
sizes[ i ][ 0 ] = genrand_int32(seed) % (maxWidth/32) + 1;
sizes[ i ][ 1 ] = genrand_int32(seed) % (maxHeight/32) + 1;
images[ i ] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY),
&imageFormat, sizes[ i ][ 0], sizes[ i ][ 1 ], 0, NULL, &error );
if( images[i] == NULL )
{
log_error("Failed to create image %d of size %d x %d (%s).\n", i, (int)sizes[i][0], (int)sizes[i][1], IGetErrorString( error ));
return -1;
}
results[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * threads[0] * 2, NULL, &error );
if( results[i] == NULL)
{
log_error("Failed to create array %d of size %d.\n", i, (int)threads[0]*2);
return -1;
}
}
// Start setting arguments and executing kernels
for( i = 0; i < NUM_TRIES; i++ )
{
// Set the arguments for this try
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &images[ i ] );
sprintf( errStr, "Unable to set argument 0 for kernel try %d", i );
test_error( error, errStr );
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &results[ i ] );
sprintf( errStr, "Unable to set argument 1 for kernel try %d", i );
test_error( error, errStr );
// Queue up execution
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
sprintf( errStr, "Unable to execute kernel try %d", i );
test_error( error, errStr );
}
// Read the results back out, one at a time, and verify
for( i = 0; i < NUM_TRIES; i++ )
{
error = clEnqueueReadBuffer( queue, results[ i ], CL_TRUE, 0, sizeof( cl_int ) * threads[0] * 2, resultArray, 0, NULL, NULL );
sprintf( errStr, "Unable to read results for kernel try %d", i );
test_error( error, errStr );
// Verify. Each entry should be n * the (width/height) of image i
for( int j = 0; j < NUM_THREADS; j++ )
{
if( resultArray[ j * 2 + 0 ] != (int)sizes[ i ][ 0 ] * j )
{
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a width of %d, got %d\n",
i, j, (int)sizes[ i ][ 0 ] * j, resultArray[ j * 2 + 0 ] );
return -1;
}
if( resultArray[ j * 2 + 1 ] != (int)sizes[ i ][ 1 ] * j )
{
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a height of %d, got %d\n",
i, j, (int)sizes[ i ][ 1 ] * j, resultArray[ j * 2 + 1 ] );
return -1;
}
}
}
// If we got here, everything verified successfully
return 0;
}

View File

@@ -1,277 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "harness/conversions.h"
// This test is designed to stress passing multiple vector parameters to kernels and verifying access between them all
const char *multi_arg_kernel_source_pattern =
"__kernel void sample_test(__global %s *src1, __global %s *src2, __global %s *src3, __global %s *dst1, __global %s *dst2, __global %s *dst3 )\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst1[tid] = src1[tid];\n"
" dst2[tid] = src2[tid];\n"
" dst3[tid] = src3[tid];\n"
"}\n";
extern cl_uint gRandomSeed;
#define MAX_ERROR_TOLERANCE 0.0005f
int test_multi_arg_set(cl_device_id device, cl_context context, cl_command_queue queue,
ExplicitType vec1Type, int vec1Size,
ExplicitType vec2Type, int vec2Size,
ExplicitType vec3Type, int vec3Size, MTdata d)
{
clProgramWrapper program;
clKernelWrapper kernel;
int error, i, j;
clMemWrapper streams[ 6 ];
size_t threads[1], localThreads[1];
char programSrc[ 10248 ], vec1Name[ 64 ], vec2Name[ 64 ], vec3Name[ 64 ];
char sizeNames[][ 4 ] = { "", "2", "3", "4", "", "", "", "8" };
const char *ptr;
void *initData[3], *resultData[3];
// Create the program source
sprintf( vec1Name, "%s%s", get_explicit_type_name( vec1Type ), sizeNames[ vec1Size - 1 ] );
sprintf( vec2Name, "%s%s", get_explicit_type_name( vec2Type ), sizeNames[ vec2Size - 1 ] );
sprintf( vec3Name, "%s%s", get_explicit_type_name( vec3Type ), sizeNames[ vec3Size - 1 ] );
sprintf( programSrc, multi_arg_kernel_source_pattern,
vec1Name, vec2Name, vec3Name, vec1Name, vec2Name, vec3Name,
vec1Size, vec1Size, vec2Size, vec2Size, vec3Size, vec3Size );
ptr = programSrc;
// Create our testing kernel
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" );
test_error( error, "Unable to create testing kernel" );
// Get thread dimensions
threads[0] = 1024;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size for kernel" );
// Create input streams
initData[ 0 ] = create_random_data( vec1Type, d, (unsigned int)threads[ 0 ] * vec1Size );
streams[ 0 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, initData[ 0 ], &error );
test_error( error, "Unable to create testing stream" );
initData[ 1 ] = create_random_data( vec2Type, d, (unsigned int)threads[ 0 ] * vec2Size );
streams[ 1 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, initData[ 1 ], &error );
test_error( error, "Unable to create testing stream" );
initData[ 2 ] = create_random_data( vec3Type, d, (unsigned int)threads[ 0 ] * vec3Size );
streams[ 2 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, initData[ 2 ], &error );
test_error( error, "Unable to create testing stream" );
streams[ 3 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, NULL, &error );
test_error( error, "Unable to create testing stream" );
streams[ 4 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, NULL, &error );
test_error( error, "Unable to create testing stream" );
streams[ 5 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, NULL, &error );
test_error( error, "Unable to create testing stream" );
// Set the arguments
error = 0;
for( i = 0; i < 6; i++ )
error |= clSetKernelArg( kernel, i, sizeof( cl_mem ), &streams[ i ] );
test_error( error, "Unable to set arguments for kernel" );
// Execute!
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to execute kernel" );
// Read results
resultData[0] = malloc( get_explicit_type_size( vec1Type ) * vec1Size * threads[0] );
resultData[1] = malloc( get_explicit_type_size( vec2Type ) * vec2Size * threads[0] );
resultData[2] = malloc( get_explicit_type_size( vec3Type ) * vec3Size * threads[0] );
error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, get_explicit_type_size( vec1Type ) * vec1Size * threads[ 0 ], resultData[0], 0, NULL, NULL );
error |= clEnqueueReadBuffer( queue, streams[ 4 ], CL_TRUE, 0, get_explicit_type_size( vec2Type ) * vec2Size * threads[ 0 ], resultData[1], 0, NULL, NULL );
error |= clEnqueueReadBuffer( queue, streams[ 5 ], CL_TRUE, 0, get_explicit_type_size( vec3Type ) * vec3Size * threads[ 0 ], resultData[2], 0, NULL, NULL );
test_error( error, "Unable to read result stream" );
// Verify
char *ptr1 = (char *)initData[ 0 ], *ptr2 = (char *)resultData[ 0 ];
size_t span = get_explicit_type_size( vec1Type );
for( i = 0; i < (int)threads[0]; i++ )
{
for( j = 0; j < vec1Size; j++ )
{
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
{
log_error( "ERROR: Value did not validate for component %d of item %d of stream 0!\n", j, i );
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return -1;
}
}
ptr1 += span * vec1Size;
ptr2 += span * vec1Size;
}
ptr1 = (char *)initData[ 1 ];
ptr2 = (char *)resultData[ 1 ];
span = get_explicit_type_size( vec2Type );
for( i = 0; i < (int)threads[0]; i++ )
{
for( j = 0; j < vec2Size; j++ )
{
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
{
log_error( "ERROR: Value did not validate for component %d of item %d of stream 1!\n", j, i );
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return -1;
}
}
ptr1 += span * vec2Size;
ptr2 += span * vec2Size;
}
ptr1 = (char *)initData[ 2 ];
ptr2 = (char *)resultData[ 2 ];
span = get_explicit_type_size( vec3Type );
for( i = 0; i < (int)threads[0]; i++ )
{
for( j = 0; j < vec3Size; j++ )
{
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
{
log_error( "ERROR: Value did not validate for component %d of item %d of stream 2!\n", j, i );
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return -1;
}
}
ptr1 += span * vec3Size;
ptr2 += span * vec3Size;
}
// If we got here, everything verified successfully
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return 0;
}
int test_kernel_arg_multi_setup_exhaustive(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
// Loop through every combination of input and output types
ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
int type1, type2, type3;
int size1, size2, size3;
RandomSeed seed( gRandomSeed );
log_info( "\n" ); // for formatting
for( type1 = 0; types[ type1 ] != kNumExplicitTypes; type1++ )
{
for( type2 = 0; types[ type2 ] != kNumExplicitTypes; type2++ )
{
for( type3 = 0; types[ type3 ] != kNumExplicitTypes; type3++ )
{
log_info( "\n\ttesting %s, %s, %s...", get_explicit_type_name( types[ type1 ] ), get_explicit_type_name( types[ type2 ] ), get_explicit_type_name( types[ type3 ] ) );
// Loop through every combination of vector size
for( size1 = 2; size1 <= 8; size1 <<= 1 )
{
for( size2 = 2; size2 <= 8; size2 <<= 1 )
{
for( size3 = 2; size3 <= 8; size3 <<= 1 )
{
log_info(".");
fflush( stdout);
if( test_multi_arg_set( device, context, queue,
types[ type1 ], size1,
types[ type2 ], size2,
types[ type3 ], size3, seed ) )
return -1;
}
}
}
}
}
}
log_info( "\n" );
return 0;
}
int test_kernel_arg_multi_setup_random(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
// Loop through a selection of combinations
ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
int type1, type2, type3;
int size1, size2, size3;
RandomSeed seed( gRandomSeed );
num_elements = 3*3*3*4;
log_info( "Testing %d random configurations\n", num_elements );
// Loop through every combination of vector size
for( size1 = 2; size1 <= 8; size1 <<= 1 )
{
for( size2 = 2; size2 <= 8; size2 <<= 1 )
{
for( size3 = 2; size3 <= 8; size3 <<= 1 )
{
// Loop through 4 type combinations for each size combination
int n;
for (n=0; n<4; n++) {
type1 = (int)get_random_float(0,4, seed);
type2 = (int)get_random_float(0,4, seed);
type3 = (int)get_random_float(0,4, seed);
log_info( "\ttesting %s%d, %s%d, %s%d...\n",
get_explicit_type_name( types[ type1 ] ), size1,
get_explicit_type_name( types[ type2 ] ), size2,
get_explicit_type_name( types[ type3 ] ), size3 );
if( test_multi_arg_set( device, context, queue,
types[ type1 ], size1,
types[ type2 ], size2,
types[ type3 ], size3, seed ) )
return -1;
}
}
}
}
return 0;
}

View File

@@ -1,108 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
static volatile cl_int sDestructorIndex;
void CL_CALLBACK mem_destructor_callback( cl_mem memObject, void * userData )
{
int * userPtr = (int *)userData;
// ordering of callbacks is guaranteed, meaning we don't need to do atomic operation here
*userPtr = ++sDestructorIndex;
}
#ifndef ABS
#define ABS( x ) ( ( x < 0 ) ? -x : x )
#endif
int test_mem_object_destructor_callback_single( clMemWrapper &memObject )
{
cl_int error;
int i;
// Set up some variables to catch the order in which callbacks are called
volatile int callbackOrders[ 3 ] = { 0, 0, 0 };
sDestructorIndex = 0;
// Set up the callbacks
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 0 ] );
test_error( error, "Unable to set destructor callback" );
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 1 ] );
test_error( error, "Unable to set destructor callback" );
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 2 ] );
test_error( error, "Unable to set destructor callback" );
// Now release the buffer, which SHOULD call the callbacks
error = clReleaseMemObject( memObject );
test_error( error, "Unable to release test buffer" );
// Note: since we manually released the mem wrapper, we need to set it to NULL to prevent a double-release
memObject = NULL;
// At this point, all three callbacks should have already been called
int numErrors = 0;
for( i = 0; i < 3; i++ )
{
// Spin waiting for the release to finish. If you don't call the mem_destructor_callback, you will not
// pass the test. bugzilla 6316
while( 0 == callbackOrders[i] )
{}
if( ABS( callbackOrders[ i ] ) != 3-i )
{
log_error( "\tERROR: Callback %d was called in the wrong order! (Was called order %d, should have been order %d)\n",
i+1, ABS( callbackOrders[ i ] ), i );
numErrors++;
}
}
return ( numErrors > 0 ) ? -1 : 0;
}
int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clMemWrapper testBuffer, testImage;
cl_int error;
// Create a buffer and an image to test callbacks against
testBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, 1024, NULL, &error );
test_error( error, "Unable to create testing buffer" );
if( test_mem_object_destructor_callback_single( testBuffer ) != 0 )
{
log_error( "ERROR: Destructor callbacks for buffer object FAILED\n" );
return -1;
}
if( checkForImageSupport( deviceID ) == 0 )
{
cl_image_format imageFormat = { CL_RGBA, CL_SIGNED_INT8 };
testImage = create_image_2d( context, CL_MEM_READ_ONLY, &imageFormat, 16, 16, 0, NULL, &error );
test_error( error, "Unable to create testing image" );
if( test_mem_object_destructor_callback_single( testImage ) != 0 )
{
log_error( "ERROR: Destructor callbacks for image object FAILED\n" );
return -1;
}
}
return 0;
}

View File

@@ -1,121 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#ifndef _WIN32
#include <unistd.h>
#endif
#include "harness/conversions.h"
extern cl_uint gRandomSeed;
static void CL_CALLBACK test_native_kernel_fn( void *userData )
{
struct arg_struct {
cl_int * source;
cl_int * dest;
cl_int count;
} *args = (arg_struct *)userData;
for( cl_int i = 0; i < args->count; i++ )
args->dest[ i ] = args->source[ i ];
}
int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
int error;
RandomSeed seed( gRandomSeed );
// Check if we support native kernels
cl_device_exec_capabilities capabilities;
error = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(capabilities), &capabilities, NULL);
if (!(capabilities & CL_EXEC_NATIVE_KERNEL)) {
log_info("Device does not support CL_EXEC_NATIVE_KERNEL.\n");
return 0;
}
clMemWrapper streams[ 2 ];
#if !(defined (_WIN32) && defined (_MSC_VER))
cl_int inBuffer[ n_elems ], outBuffer[ n_elems ];
#else
cl_int* inBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
cl_int* outBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
#endif
clEventWrapper finishEvent;
struct arg_struct
{
cl_mem inputStream;
cl_mem outputStream;
cl_int count;
} args;
// Create some input values
generate_random_data( kInt, n_elems, seed, inBuffer );
// Create I/O streams
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int), inBuffer, &error );
test_error( error, "Unable to create I/O stream" );
streams[ 1 ] = clCreateBuffer( context, 0, n_elems * sizeof(cl_int), NULL, &error );
test_error( error, "Unable to create I/O stream" );
// Set up the arrays to call with
args.inputStream = streams[ 0 ];
args.outputStream = streams[ 1 ];
args.count = n_elems;
void * memLocs[ 2 ] = { &args.inputStream, &args.outputStream };
// Run the kernel
error = clEnqueueNativeKernel( queue, test_native_kernel_fn,
&args, sizeof( args ),
2, &streams[ 0 ],
(const void **)memLocs,
0, NULL, &finishEvent );
test_error( error, "Unable to queue native kernel" );
// Finish and wait for the kernel to complete
error = clFinish( queue );
test_error(error, "clFinish failed");
error = clWaitForEvents( 1, &finishEvent );
test_error(error, "clWaitForEvents failed");
// Now read the results and verify
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, n_elems * sizeof(cl_int), outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
for( int i = 0; i < n_elems; i++ )
{
if( inBuffer[ i ] != outBuffer[ i ] )
{
log_error( "ERROR: Data sample %d for native kernel did not validate (expected %d, got %d)\n",
i, (int)inBuffer[ i ], (int)outBuffer[ i ] );
return 1;
}
}
return 0;
}

View File

@@ -1,289 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include <string.h>
#define EXTENSION_NAME_BUF_SIZE 4096
#define PRINT_EXTENSION_INFO 0
int test_platform_extensions(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
const char * extensions[] = {
"cl_khr_byte_addressable_store",
// "cl_APPLE_SetMemObjectDestructor",
"cl_khr_global_int32_base_atomics",
"cl_khr_global_int32_extended_atomics",
"cl_khr_local_int32_base_atomics",
"cl_khr_local_int32_extended_atomics",
"cl_khr_int64_base_atomics",
"cl_khr_int64_extended_atomics",
// need to put in entires for various atomics
"cl_khr_3d_image_writes",
"cl_khr_fp16",
"cl_khr_fp64",
NULL
};
bool extensionsSupported[] = {
false, //"cl_khr_byte_addressable_store",
false, // need to put in entires for various atomics
false, // "cl_khr_global_int32_base_atomics",
false, // "cl_khr_global_int32_extended_atomics",
false, // "cl_khr_local_int32_base_atomics",
false, // "cl_khr_local_int32_extended_atomics",
false, // "cl_khr_int64_base_atomics",
false, // "cl_khr_int64_extended_atomics",
false, //"cl_khr_3d_image_writes",
false, //"cl_khr_fp16",
false, //"cl_khr_fp64",
false //NULL
};
int extensionIndex;
cl_platform_id platformID;
cl_int err;
char platform_extensions[EXTENSION_NAME_BUF_SIZE];
char device_extensions[EXTENSION_NAME_BUF_SIZE];
// Okay, so what we're going to do is just check the device indicated by
// deviceID against the platform that includes this device
// pass CL_DEVICE_PLATFORM to clGetDeviceInfo
// to get a result of type cl_platform_id
err = clGetDeviceInfo(deviceID,
CL_DEVICE_PLATFORM,
sizeof(cl_platform_id),
(void *)(&platformID),
NULL);
if(err != CL_SUCCESS)
{
vlog_error("test_platform_extensions : could not get platformID from device\n");
return -1;
}
// now we grab the set of extensions specified by the platform
err = clGetPlatformInfo(platformID,
CL_PLATFORM_EXTENSIONS,
sizeof(platform_extensions),
(void *)(&platform_extensions[0]),
NULL);
if(err != CL_SUCCESS)
{
vlog_error("test_platform_extensions : could not get extension string from platform\n");
return -1;
}
#if PRINT_EXTENSION_INFO
log_info("Platform extensions include \"%s\"\n\n", platform_extensions);
#endif
// here we parse the platform extensions, to look for the "important" ones
for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
{
if(strstr(platform_extensions, extensions[extensionIndex]) != NULL)
{
// we found it
#if PRINT_EXTENSION_INFO
log_info("Found \"%s\" in platform extensions\n",
extensions[extensionIndex]);
#endif
extensionsSupported[extensionIndex] = true;
}
}
// and then we grab the set of extensions specified by the device
// (this can be turned into a "loop over all devices in this platform")
err = clGetDeviceInfo(deviceID,
CL_DEVICE_EXTENSIONS,
sizeof(device_extensions),
(void *)(&device_extensions[0]),
NULL);
if(err != CL_SUCCESS)
{
vlog_error("test_platform_extensions : could not get extension string from device\n");
return -1;
}
#if PRINT_EXTENSION_INFO
log_info("Device extensions include \"%s\"\n\n", device_extensions);
#endif
for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
{
if(extensionsSupported[extensionIndex] == false)
{
continue; // skip this one
}
if(strstr(device_extensions, extensions[extensionIndex]) == NULL)
{
// device does not support it
vlog_error("Platform supports extension \"%s\" but device does not\n",
extensions[extensionIndex]);
return -1;
}
}
return 0;
}
int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
cl_platform_id platforms[16];
cl_uint num_platforms;
char *string_returned;
string_returned = (char*)malloc(8192);
int total_errors = 0;
int err = CL_SUCCESS;
err = clGetPlatformIDs(16, platforms, &num_platforms);
test_error(err, "clGetPlatformIDs failed");
if (num_platforms <= 16) {
// Try with NULL
err = clGetPlatformIDs(num_platforms, platforms, NULL);
test_error(err, "clGetPlatformIDs failed with NULL for return size");
}
if (num_platforms < 1) {
log_error("Found 0 platforms.\n");
return -1;
}
log_info("Found %d platforms.\n", num_platforms);
for (int p=0; p<(int)num_platforms; p++) {
cl_device_id *devices;
cl_uint num_devices;
size_t size;
log_info("Platform %d (%p):\n", p, platforms[p]);
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_PROFILE, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_PROFILE failed");
log_info("\tCL_PLATFORM_PROFILE: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VERSION, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_VERSION failed");
log_info("\tCL_PLATFORM_VERSION: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_NAME failed");
log_info("\tCL_PLATFORM_NAME: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VENDOR, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_VENDOR failed");
log_info("\tCL_PLATFORM_VENDOR: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_EXTENSIONS, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_EXTENSIONS failed");
log_info("\tCL_PLATFORM_EXTENSIONS: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
test_error(err, "clGetDeviceIDs size failed.\n");
devices = (cl_device_id *)malloc(num_devices*sizeof(cl_device_id));
memset(devices, 0, sizeof(cl_device_id)*num_devices);
err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
test_error(err, "clGetDeviceIDs failed.\n");
log_info("\tPlatform has %d devices.\n", (int)num_devices);
for (int d=0; d<(int)num_devices; d++) {
size_t returned_size;
cl_platform_id returned_platform;
cl_context context;
cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[p], 0 };
err = clGetDeviceInfo(devices[d], CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &returned_platform, &returned_size);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM\n");
if (returned_size != sizeof(cl_platform_id)) {
log_error("Reported return size (%ld) does not match expected size (%ld).\n", returned_size, sizeof(cl_platform_id));
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 8192, string_returned, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_NAME\n");
log_info("\t\tPlatform for device %d (%s) is %p.\n", d, string_returned, returned_platform);
log_info("\t\t\tTesting clCreateContext for the platform/device...\n");
// Try creating a context for the platform
context = clCreateContext(properties, 1, &devices[d], NULL, NULL, &err);
test_error(err, "\t\tclCreateContext failed for device with platform properties\n");
memset(properties, 0, sizeof(cl_context_properties)*3);
err = clGetContextInfo(context, CL_CONTEXT_PROPERTIES, sizeof(cl_context_properties)*3, properties, &returned_size);
test_error(err, "clGetContextInfo for CL_CONTEXT_PROPERTIES failed");
if (returned_size != sizeof(cl_context_properties)*3) {
log_error("Invalid size returned from clGetContextInfo for CL_CONTEXT_PROPERTIES. Got %ld, expected %ld.\n",
returned_size, sizeof(cl_context_properties)*3);
total_errors++;
}
if (properties[0] != (cl_context_properties)CL_CONTEXT_PLATFORM || properties[1] != (cl_context_properties)platforms[p]) {
log_error("Wrong properties returned. Expected: [%p %p], got [%p %p]\n",
(void*)CL_CONTEXT_PLATFORM, platforms[p], (void*)properties[0], (void*)properties[1]);
total_errors++;
}
err = clReleaseContext(context);
test_error(err, "clReleaseContext failed");
}
free(devices);
}
free(string_returned);
return total_errors;
}

View File

@@ -26,8 +26,6 @@ set(${MODULE_NAME}_SOURCES
test_barrier.c
test_basic_parameter_types.c
test_arrayreadwrite.c
test_arraycopy.c
test_imagearraycopy.c
test_imagearraycopy3d.c
test_imagecopy.c
test_imagerandomcopy.c
@@ -39,16 +37,11 @@ set(${MODULE_NAME}_SOURCES
test_astype.cpp
test_async_copy.cpp
test_sizeof.c
test_vector_creation.cpp
test_vec_type_hint.c
test_numeric_constants.cpp
test_constant_source.cpp
test_bufferreadwriterect.c
test_async_strided_copy.cpp
test_preprocessors.cpp
test_kernel_memory_alignment.cpp
test_global_work_offsets.cpp
test_kernel_call_kernel_function.cpp
test_local_kernel_scope.cpp
)

View File

@@ -71,8 +71,6 @@ test_definition test_list[] = {
ADD_TEST( readimage3d_fp32 ),
ADD_TEST( bufferreadwriterect ),
ADD_TEST( arrayreadwrite ),
ADD_TEST( arraycopy ),
ADD_TEST( imagearraycopy ),
ADD_TEST( imagearraycopy3d ),
ADD_TEST( imagecopy ),
ADD_TEST( imagecopy3d ),
@@ -120,22 +118,12 @@ test_definition test_list[] = {
ADD_TEST( async_strided_copy_local_to_global ),
ADD_TEST( prefetch ),
ADD_TEST( kernel_call_kernel_function ),
ADD_TEST( host_numeric_constants ),
ADD_TEST( kernel_numeric_constants ),
ADD_TEST( kernel_limit_constants ),
ADD_TEST( kernel_preprocessor_macros ),
ADD_TEST( parameter_types ),
ADD_TEST( vector_creation ),
ADD_TEST( vec_type_hint ),
ADD_TEST( kernel_memory_alignment_local ),
ADD_TEST( kernel_memory_alignment_global ),
ADD_TEST( kernel_memory_alignment_constant ),
ADD_TEST( kernel_memory_alignment_private ),
ADD_TEST( global_work_offsets ),
ADD_TEST( get_global_offset ),
};
const int test_num = ARRAY_SIZE( test_list );

View File

@@ -1,3 +0,0 @@
#!/bin/sh
cd `dirname $0`
./test_basic arrayreadwrite arraycopy bufferreadwriterect $@

View File

@@ -1,3 +0,0 @@
#!/bin/sh
cd `dirname $0`
./test_basic arrayimagecopy arrayimagecopy3d imagearraycopy

View File

@@ -1,17 +0,0 @@
#!/bin/sh
cd `dirname $0`
./test_basic \
imagecopy imagerandomcopy \
imagearraycopy imagearraycopy3d \
image_r8 \
readimage readimage_int16 readimage_fp32 \
writeimage writeimage_int16 writeimage_fp32 \
imagenpot \
image_param \
image_multipass_integer_coord \
readimage3d \
readimage3d_int16 \
readimage3d_fp32 \
imagereadwrite3d \
imagereadwrite \
$@

View File

@@ -1,4 +0,0 @@
#!/bin/sh
cd `dirname $0`
./test_basic mri_one mri_multiple

View File

@@ -1,201 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *copy_kernel_code =
"__kernel void test_copy(__global unsigned int *src, __global unsigned int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid];\n"
"}\n";
int
test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_uint *input_ptr, *output_ptr;
cl_mem streams[4], results;
cl_program program;
cl_kernel kernel;
unsigned num_elements = 128 * 1024;
cl_uint num_copies = 1;
size_t delta_offset;
unsigned i;
cl_int err;
MTdata d;
int error_count = 0;
input_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
// results
results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed");
/*****************************************************************************************************************************************/
#pragma mark client backing
log_info("Testing CL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer\n");
// randomize data
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
// client backing
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
test_error(err, "clCreateBuffer failed");
delta_offset = num_elements * sizeof(cl_uint) / num_copies;
for (i=0; i<num_copies; i++)
{
size_t offset = i * delta_offset;
err = clEnqueueCopyBuffer(queue, streams[0], results, offset, offset, delta_offset, 0, NULL, NULL);
test_error(err, "clEnqueueCopyBuffer failed");
}
// Try upload from client backing
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (i=0; i<num_elements; i++)
{
if (input_ptr[i] != output_ptr[i])
{
err = -1;
error_count++;
}
}
if (err)
log_error("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer FAILED\n");
else
log_info("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer passed\n");
#pragma mark framework backing (no client data)
log_info("Testing with clEnqueueWriteBuffer and clEnqueueCopyBuffer\n");
// randomize data
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
// no backing
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed");
for (i=0; i<num_copies; i++)
{
size_t offset = i * delta_offset;
// Copy the array up from host ptr
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_uint)*num_elements, input_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyBuffer(queue, streams[2], results, offset, offset, delta_offset, 0, NULL, NULL);
test_error(err, "clEnqueueCopyBuffer failed");
}
err = clEnqueueReadBuffer( queue, results, true, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (i=0; i<num_elements; i++)
{
if (input_ptr[i] != output_ptr[i])
{
err = -1;
error_count++;
break;
}
}
if (err)
log_error("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer FAILED\n");
else
log_info("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer passed\n");
/*****************************************************************************************************************************************/
#pragma mark kernel copy test
log_info("Testing CL_MEM_USE_HOST_PTR buffer with kernel copy\n");
// randomize data
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
free_mtdata(d); d= NULL;
// client backing
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
test_error(err, "clCreateBuffer failed");
err = create_single_kernel_helper(context, &program, &kernel, 1, &copy_kernel_code, "test_copy" );
test_error(err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
err |= clSetKernelArg(kernel, 1, sizeof results, &results);
test_error(err, "clSetKernelArg failed");
size_t threads[3] = {num_elements, 0, 0};
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed");
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (i=0; i<num_elements; i++)
{
if (input_ptr[i] != output_ptr[i])
{
err = -1;
error_count++;
break;
}
}
// Keep track of multiple errors.
if (error_count != 0)
err = error_count;
if (err)
log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
else
log_info("\tCL_MEM_USE_HOST_PTR buffer with kernel copy passed\n");
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseMemObject(results);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -1,284 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
#include <ctype.h>
const char *work_offset_test[] = {
"__kernel void test( __global int * outputID_A, \n"
" __global int * outputID_B, __global int * outputID_C )\n"
"{\n"
" size_t id0 = get_local_id( 0 ) + get_group_id( 0 ) * get_local_size( 0 );\n"
" size_t id1 = get_local_id( 1 ) + get_group_id( 1 ) * get_local_size( 1 );\n"
" size_t id2 = get_local_id( 2 ) + get_group_id( 2 ) * get_local_size( 2 );\n"
" size_t id = ( id2 * get_global_size( 0 ) * get_global_size( 1 ) ) + ( id1 * get_global_size( 0 ) ) + id0;\n"
"\n"
" outputID_A[ id ] = get_global_id( 0 );\n"
" outputID_B[ id ] = get_global_id( 1 );\n"
" outputID_C[ id ] = get_global_id( 2 );\n"
"}\n"
};
#define MAX_TEST_ITEMS 16 * 16 * 16
#define NUM_TESTS 16
#define MAX_OFFSET 256
#define CHECK_RANGE( v, m, c ) \
if( ( v >= (cl_int)m ) || ( v < 0 ) ) \
{ \
log_error( "ERROR: ouputID_%c[%lu]: %d is < 0 or >= %lu\n", c, i, v, m ); \
return -1; \
}
int check_results( size_t threads[], size_t offsets[], cl_int outputA[], cl_int outputB[], cl_int outputC[] )
{
size_t offsettedSizes[ 3 ] = { threads[ 0 ] + offsets[ 0 ], threads[ 1 ] + offsets[ 1 ], threads[ 2 ] + offsets[ 2 ] };
size_t limit = threads[ 0 ] * threads[ 1 ] * threads[ 2 ];
static char counts[ MAX_OFFSET + 32 ][ MAX_OFFSET + 16 ][ MAX_OFFSET + 16 ];
memset( counts, 0, sizeof( counts ) );
for( size_t i = 0; i < limit; i++ )
{
// Check ranges first
CHECK_RANGE( outputA[ i ], offsettedSizes[ 0 ], 'A' )
CHECK_RANGE( outputB[ i ], offsettedSizes[ 1 ], 'B' )
CHECK_RANGE( outputC[ i ], offsettedSizes[ 2 ], 'C' )
// Now set the value in the map
counts[ outputA[ i ] ][ outputB[ i ] ][ outputC[ i ] ]++;
}
// Now check the map
int missed = 0, multiple = 0, errored = 0, corrected = 0;
for( size_t x = 0; x < offsettedSizes[ 0 ]; x++ )
{
for( size_t y = 0; y < offsettedSizes[ 1 ]; y++ )
{
for( size_t z = 0; z < offsettedSizes[ 2 ]; z++ )
{
const char * limitMsg = " (further errors of this type suppressed)";
if( ( x >= offsets[ 0 ] ) && ( y >= offsets[ 1 ] ) && ( z >= offsets[ 2 ] ) )
{
if( counts[ x ][ y ][ z ] < 1 )
{
if( missed < 3 )
log_error( "ERROR: Map value (%ld,%ld,%ld) was missed%s\n", x, y, z, ( missed == 2 ) ? limitMsg : "" );
missed++;
}
else if( counts[ x ][ y ][ z ] > 1 )
{
if( multiple < 3 )
log_error( "ERROR: Map value (%ld,%ld,%ld) was returned multiple times%s\n", x, y, z, ( multiple == 2 ) ? limitMsg : "" );
multiple++;
}
}
else
{
if( counts[ x ][ y ][ z ] > 0 )
{
if( errored < 3 )
log_error( "ERROR: Map value (%ld,%ld,%ld) was erroneously returned%s\n", x, y, z, ( errored == 2 ) ? limitMsg : "" );
errored++;
}
}
}
}
}
if( missed || multiple || errored )
{
size_t diffs[3] = { ( offsets[ 0 ] > threads[ 0 ] ? 0 : threads[ 0 ] - offsets[ 0 ] ),
( offsets[ 1 ] > threads[ 1 ] ? 0 : threads[ 1 ] - offsets[ 1 ] ),
( offsets[ 2 ] > threads[ 2 ] ? 0 : threads[ 2 ] - offsets[ 2 ] ) };
int diff = (int)( ( threads[ 0 ] - diffs[ 0 ] ) * ( threads[ 1 ] - diffs[ 1 ] ) * ( threads[ 2 ] - diffs[ 2 ] ) );
if( ( multiple == 0 ) && ( missed == diff ) && ( errored == diff ) )
log_error( "ERROR: Global work offset values are not being respected by get_global_id()\n" );
else
log_error( "ERROR: Global work offset values did not function as expected (%d missed, %d reported multiple times, %d erroneously hit)\n",
missed, multiple, errored );
}
return ( missed | multiple | errored | corrected );
}
int test_global_work_offsets(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 7 ];
int error;
size_t threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
cl_int outputA[ MAX_TEST_ITEMS ], outputB[ MAX_TEST_ITEMS ], outputC[ MAX_TEST_ITEMS ];
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, work_offset_test, "test" ) != 0 )
{
return -1;
}
//// Create some output streams
// Use just one output array to init them all (no need to init every single stack storage here)
memset( outputA, 0xff, sizeof( outputA ) );
for( int i = 0; i < 3; i++ )
{
streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof(outputA), outputA, &error );
test_error( error, "Unable to create output array" );
}
// Run a few different times
MTdata seed = init_genrand( gRandomSeed );
for( int test = 0; test < NUM_TESTS; test++ )
{
// Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
threads[ 0 ] = random_in_range( 1, 32, seed );
threads[ 1 ] = random_in_range( 1, 16, seed );
threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
// Make sure we get the local thread count right
error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
test_error( error, "Unable to determine local work group sizes" );
// Randomize some offsets
for( int j = 0; j < 3; j++ )
offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
// Now set up and run
for( int i = 0; i < 3; i++ )
{
error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
test_error( error, "Unable to set indexed kernel arguments" );
}
error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
// Read our results back now
cl_int * resultBuffers[] = { outputA, outputB, outputC };
for( int i = 0; i < 3; i++ )
{
error = clEnqueueReadBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( outputA ), resultBuffers[ i ], 0, NULL, NULL );
test_error( error, "Unable to get result data" );
}
// Now we need to check the results. The outputs should have one entry for each possible ID,
// but they won't be in order, so we need to construct a count map to determine what we got
if( check_results( threads, offsets, outputA, outputB, outputC ) )
{
log_error( "\t(Test failed for global dim %ld,%ld,%ld, local dim %ld,%ld,%ld, offsets %ld,%ld,%ld)\n",
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
return -1;
}
}
free_mtdata(seed);
// All done!
return 0;
}
const char *get_offset_test[] = {
"__kernel void test( __global int * outOffsets )\n"
"{\n"
" // We use local ID here so we don't have to worry about offsets\n"
" // Also note that these should be the same for ALL threads, so we won't worry about contention\n"
" outOffsets[ 0 ] = (int)get_global_offset( 0 );\n"
" outOffsets[ 1 ] = (int)get_global_offset( 1 );\n"
" outOffsets[ 2 ] = (int)get_global_offset( 2 );\n"
"}\n"
};
int test_get_global_offset(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 1 ];
int error;
size_t threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
cl_int outOffsets[ 3 ];
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, get_offset_test, "test" ) != 0 )
{
return -1;
}
// Create some output streams, and storage for a single control ID
memset( outOffsets, 0xff, sizeof( outOffsets ) );
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof( outOffsets ), outOffsets, &error );
test_error( error, "Unable to create control ID buffer" );
// Run a few different times
MTdata seed = init_genrand( gRandomSeed );
for( int test = 0; test < NUM_TESTS; test++ )
{
// Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
threads[ 0 ] = random_in_range( 1, 32, seed );
threads[ 1 ] = random_in_range( 1, 16, seed );
threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
// Make sure we get the local thread count right
error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
test_error( error, "Unable to determine local work group sizes" );
// Randomize some offsets
for( int j = 0; j < 3; j++ )
offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
// Now set up and run
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
// Read our results back now
error = clEnqueueReadBuffer( queue, streams[ 0 ], CL_TRUE, 0, sizeof( outOffsets ), outOffsets, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
// And check!
int errors = 0;
for( int j = 0; j < 3; j++ )
{
if( outOffsets[ j ] != (cl_int)offsets[ j ] )
{
log_error( "ERROR: get_global_offset( %d ) did not return expected value (expected %ld, got %d)\n", j, offsets[ j ], outOffsets[ j ] );
errors++;
}
}
if( errors > 0 )
return errors;
}
free_mtdata(seed);
// All done!
return 0;
}

View File

@@ -1,146 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int test_imagearraycopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
{
cl_uchar *imgptr, *bufptr;
clMemWrapper image, buffer;
int img_width = 512;
int img_height = 512;
size_t elem_size;
size_t buffer_size;
int i;
cl_int err;
MTdata d;
cl_event copyevent;
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
test_error(err, "clGetImageInfo failed");
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
test_error(err, "clCreateBuffer failed");
d = init_genrand( gRandomSeed );
imgptr = (cl_uchar*)malloc(buffer_size);
for (i=0; i<(int)buffer_size; i++) {
imgptr[i] = (cl_uchar)genrand_int32(d);
}
free_mtdata(d); d = NULL;
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
err = clEnqueueWriteImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 0, NULL, NULL );
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyImageToBuffer( queue, image, buffer, origin, region, 0, 0, NULL, &copyevent );
test_error(err, "clEnqueueCopyImageToBuffer failed");
bufptr = (cl_uchar*)malloc(buffer_size);
err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 1, &copyevent, NULL);
test_error(err, "clEnqueueReadBuffer failed");
err = clReleaseEvent(copyevent);
test_error(err, "clReleaseEvent failed");
if (memcmp(imgptr, bufptr, buffer_size) != 0) {
log_error( "ERROR: Results did not validate!\n" );
unsigned char * inchar = (unsigned char*)imgptr;
unsigned char * outchar = (unsigned char*)bufptr;
int failuresPrinted = 0;
int i;
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
int failed = 0;
int j;
for (j=0; j<(int)elem_size; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
int j;
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
sprintf(values + strlen(values), "] != expected [");
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
err = -1;
}
free(imgptr);
free(bufptr);
if (err)
log_error("IMAGE to ARRAY copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
return err;
}
int test_imagearraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int err;
cl_image_format *formats;
cl_uint num_formats;
cl_uint i;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
test_error(err, "clGetSupportedImageFormats failed");
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
test_error(err, "clGetSupportedImageFormats failed");
for (i = 0; i < num_formats; i++) {
err |= test_imagearraycopy_single_format(device, context, queue, &formats[i]);
}
free(formats);
if (err)
log_error("IMAGE to ARRAY copy test failed\n");
else
log_info("IMAGE to ARRAY copy test passed\n");
return err;
}

View File

@@ -1,253 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
const char *kernel_call_kernel_code[] = {
"void test_function_to_call(__global int *output, __global int *input, int where);\n"
"\n"
"__kernel void test_kernel_to_call(__global int *output, __global int *input, int where) \n"
"{\n"
" int b;\n"
" if (where == 0) {\n"
" output[get_global_id(0)] = 0;\n"
" }\n"
" for (b=0; b<where; b++)\n"
" output[get_global_id(0)] += input[b]; \n"
"}\n"
"\n"
"__kernel void test_call_kernel(__global int *src, __global int *dst, int times) \n"
"{\n"
" int tid = get_global_id(0);\n"
" int a;\n"
" dst[tid] = 1;\n"
" for (a=0; a<times; a++)\n"
" test_kernel_to_call(dst, src, tid);\n"
"}\n"
"void test_function_to_call(__global int *output, __global int *input, int where) \n"
"{\n"
" int b;\n"
" if (where == 0) {\n"
" output[get_global_id(0)] = 0;\n"
" }\n"
" for (b=0; b<where; b++)\n"
" output[get_global_id(0)] += input[b]; \n"
"}\n"
"\n"
"__kernel void test_call_function(__global int *src, __global int *dst, int times) \n"
"{\n"
" int tid = get_global_id(0);\n"
" int a;\n"
" dst[tid] = 1;\n"
" for (a=0; a<times; a++)\n"
" test_function_to_call(dst, src, tid);\n"
"}\n"
};
int test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
num_elements = 256;
int error, errors = 0;
clProgramWrapper program;
clKernelWrapper kernel1, kernel2, kernel_to_call;
clMemWrapper streams[2];
size_t threads[] = {num_elements,1,1};
cl_int *input, *output, *expected;
cl_int times = 4;
int pass = 0;
input = (cl_int*)malloc(sizeof(cl_int)*num_elements);
output = (cl_int*)malloc(sizeof(cl_int)*num_elements);
expected = (cl_int*)malloc(sizeof(cl_int)*num_elements);
for (int i=0; i<num_elements; i++) {
input[i] = i;
output[i] = i;
expected[i] = output[i];
}
// Calculate the expected results
for (int tid=0; tid<num_elements; tid++) {
expected[tid] = 1;
for (int a=0; a<times; a++) {
int where = tid;
if (where == 0)
expected[tid] = 0;
for (int b=0; b<where; b++) {
expected[tid] += input[b];
}
}
}
// Test kernel calling a kernel
log_info("Testing kernel calling kernel...\n");
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel1, 1, kernel_call_kernel_code, "test_call_kernel" ) != 0 )
{
return -1;
}
kernel_to_call = clCreateKernel(program, "test_kernel_to_call", &error);
test_error(error, "clCreateKernel failed");
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*num_elements, input, &error);
test_error( error, "clCreateBuffer failed" );
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*num_elements, output, &error);
test_error( error, "clCreateBuffer failed" );
error = clSetKernelArg(kernel1, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "clSetKernelArg failed" );
error = clSetKernelArg(kernel1, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "clSetKernelArg failed" );
error = clSetKernelArg(kernel1, 2, sizeof( times ), &times);
test_error( error, "clSetKernelArg failed" );
error = clEnqueueNDRangeKernel( queue, kernel1, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "clEnqueueNDRangeKernel failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
test_error( error, "clEnqueueReadBuffer failed" );
// Compare the results
pass = 1;
for (int i=0; i<num_elements; i++) {
if (output[i] != expected[i]) {
if (errors > 10)
continue;
if (errors == 10) {
log_error("Suppressing further results...\n");
continue;
}
log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
errors++;
pass = 0;
}
}
if (pass) log_info("Passed kernel calling kernel...\n");
// Test kernel calling a function
log_info("Testing kernel calling function...\n");
// Reset the inputs
for (int i=0; i<num_elements; i++) {
input[i] = i;
output[i] = i;
}
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, input, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
kernel2 = clCreateKernel(program, "test_call_function", &error);
test_error(error, "clCreateKernel failed");
error = clSetKernelArg(kernel2, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "clSetKernelArg failed" );
error = clSetKernelArg(kernel2, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "clSetKernelArg failed" );
error = clSetKernelArg(kernel2, 2, sizeof( times ), &times);
test_error( error, "clSetKernelArg failed" );
error = clEnqueueNDRangeKernel( queue, kernel2, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "clEnqueueNDRangeKernel failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
test_error( error, "clEnqueueReadBuffer failed" );
// Compare the results
pass = 1;
for (int i=0; i<num_elements; i++) {
if (output[i] != expected[i]) {
if (errors > 10)
continue;
if (errors > 10) {
log_error("Suppressing further results...\n");
continue;
}
log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
errors++;
pass = 0;
}
}
if (pass) log_info("Passed kernel calling function...\n");
// Test calling the kernel we called from another kernel
log_info("Testing calling the kernel we called from another kernel before...\n");
// Reset the inputs
for (int i=0; i<num_elements; i++) {
input[i] = i;
output[i] = i;
expected[i] = output[i];
}
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, input, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
// Calculate the expected results
int where = times;
for (int tid=0; tid<num_elements; tid++) {
if (where == 0)
expected[tid] = 0;
for (int b=0; b<where; b++) {
expected[tid] += input[b];
}
}
error = clSetKernelArg(kernel_to_call, 0, sizeof( streams[1] ), &streams[1]);
test_error( error, "clSetKernelArg failed" );
error = clSetKernelArg(kernel_to_call, 1, sizeof( streams[0] ), &streams[0]);
test_error( error, "clSetKernelArg failed" );
error = clSetKernelArg(kernel_to_call, 2, sizeof( times ), &times);
test_error( error, "clSetKernelArg failed" );
error = clEnqueueNDRangeKernel( queue, kernel_to_call, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "clEnqueueNDRangeKernel failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
test_error( error, "clEnqueueReadBuffer failed" );
// Compare the results
pass = 1;
for (int i=0; i<num_elements; i++) {
if (output[i] != expected[i]) {
if (errors > 10)
continue;
if (errors > 10) {
log_error("Suppressing further results...\n");
continue;
}
log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
errors++;
pass = 0;
}
}
if (pass) log_info("Passed calling the kernel we called from another kernel before...\n");
free( input );
free( output );
free( expected );
return errors;
}

View File

@@ -1,710 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
#define TEST_VALUE_POSITIVE( string_name, name, value ) \
{ \
if (name < value) { \
log_error("FAILED: " string_name ": " #name " < " #value "\n"); \
errors++;\
} else { \
log_info("\t" string_name ": " #name " >= " #value "\n"); \
} \
}
#define TEST_VALUE_NEGATIVE( string_name, name, value ) \
{ \
if (name > value) { \
log_error("FAILED: " string_name ": " #name " > " #value "\n"); \
errors++;\
} else { \
log_info("\t" string_name ": " #name " <= " #value "\n"); \
} \
}
#define TEST_VALUE_EQUAL_LITERAL( string_name, name, value ) \
{ \
if (name != value) { \
log_error("FAILED: " string_name ": " #name " != " #value "\n"); \
errors++;\
} else { \
log_info("\t" string_name ": " #name " = " #value "\n"); \
} \
}
#define TEST_VALUE_EQUAL( string_name, name, value ) \
{ \
if (name != value) { \
log_error("FAILED: " string_name ": " #name " != %a (%17.21g)\n", value, value); \
errors++;\
} else { \
log_info("\t" string_name ": " #name " = %a (%17.21g)\n", value, value); \
} \
}
int test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int errors = 0;
TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_BIT", CL_CHAR_BIT, 8)
TEST_VALUE_EQUAL_LITERAL( "CL_SCHAR_MAX", CL_SCHAR_MAX, 127)
TEST_VALUE_EQUAL_LITERAL( "CL_SCHAR_MIN", CL_SCHAR_MIN, (-127-1))
TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_MAX", CL_CHAR_MAX, CL_SCHAR_MAX)
TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_MIN", CL_CHAR_MIN, CL_SCHAR_MIN)
TEST_VALUE_EQUAL_LITERAL( "CL_UCHAR_MAX", CL_UCHAR_MAX, 255)
TEST_VALUE_EQUAL_LITERAL( "CL_SHRT_MAX", CL_SHRT_MAX, 32767)
TEST_VALUE_EQUAL_LITERAL( "CL_SHRT_MIN", CL_SHRT_MIN, (-32767-1))
TEST_VALUE_EQUAL_LITERAL( "CL_USHRT_MAX", CL_USHRT_MAX, 65535)
TEST_VALUE_EQUAL_LITERAL( "CL_INT_MAX", CL_INT_MAX, 2147483647)
TEST_VALUE_EQUAL_LITERAL( "CL_INT_MIN", CL_INT_MIN, (-2147483647-1))
TEST_VALUE_EQUAL_LITERAL( "CL_UINT_MAX", CL_UINT_MAX, 0xffffffffU)
TEST_VALUE_EQUAL_LITERAL( "CL_LONG_MAX", CL_LONG_MAX, ((cl_long) 0x7FFFFFFFFFFFFFFFLL))
TEST_VALUE_EQUAL_LITERAL( "CL_LONG_MIN", CL_LONG_MIN, ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL))
TEST_VALUE_EQUAL_LITERAL( "CL_ULONG_MAX", CL_ULONG_MAX, ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL))
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_DIG", CL_FLT_DIG, 6)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MANT_DIG", CL_FLT_MANT_DIG, 24)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX_10_EXP", CL_FLT_MAX_10_EXP, +38)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX_EXP", CL_FLT_MAX_EXP, +128)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN_10_EXP", CL_FLT_MIN_10_EXP, -37)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN_EXP", CL_FLT_MIN_EXP, -125)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_RADIX", CL_FLT_RADIX, 2)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX", CL_FLT_MAX, MAKE_HEX_FLOAT( 0x1.fffffep127f, 0x1fffffeL, 103))
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN", CL_FLT_MIN, MAKE_HEX_FLOAT(0x1.0p-126f, 0x1L, -126))
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_EPSILON", CL_FLT_EPSILON, MAKE_HEX_FLOAT(0x1.0p-23f, 0x1L, -23))
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_DIG", CL_DBL_DIG, 15)
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MANT_DIG", CL_DBL_MANT_DIG, 53)
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MAX_10_EXP", CL_DBL_MAX_10_EXP, +308)
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MAX_EXP", CL_DBL_MAX_EXP, +1024)
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MIN_10_EXP", CL_DBL_MIN_10_EXP, -307)
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MIN_EXP", CL_DBL_MIN_EXP, -1021)
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_RADIX", CL_DBL_RADIX, 2)
TEST_VALUE_EQUAL( "CL_DBL_MAX", CL_DBL_MAX, MAKE_HEX_DOUBLE(0x1.fffffffffffffp1023, 0x1fffffffffffffLL, 971))
TEST_VALUE_EQUAL( "CL_DBL_MIN", CL_DBL_MIN, MAKE_HEX_DOUBLE(0x1.0p-1022, 0x1LL, -1022))
TEST_VALUE_EQUAL( "CL_DBL_EPSILON", CL_DBL_EPSILON, MAKE_HEX_DOUBLE(0x1.0p-52, 0x1LL, -52))
TEST_VALUE_EQUAL( "CL_M_E", CL_M_E, MAKE_HEX_DOUBLE(0x1.5bf0a8b145769p+1, 0x15bf0a8b145769LL, -51) );
TEST_VALUE_EQUAL( "CL_M_LOG2E", CL_M_LOG2E, MAKE_HEX_DOUBLE(0x1.71547652b82fep+0, 0x171547652b82feLL, -52) );
TEST_VALUE_EQUAL( "CL_M_LOG10E", CL_M_LOG10E, MAKE_HEX_DOUBLE(0x1.bcb7b1526e50ep-2, 0x1bcb7b1526e50eLL, -54) );
TEST_VALUE_EQUAL( "CL_M_LN2", CL_M_LN2, MAKE_HEX_DOUBLE(0x1.62e42fefa39efp-1, 0x162e42fefa39efLL, -53) );
TEST_VALUE_EQUAL( "CL_M_LN10", CL_M_LN10, MAKE_HEX_DOUBLE(0x1.26bb1bbb55516p+1, 0x126bb1bbb55516LL, -51) );
TEST_VALUE_EQUAL( "CL_M_PI", CL_M_PI, MAKE_HEX_DOUBLE(0x1.921fb54442d18p+1, 0x1921fb54442d18LL, -51) );
TEST_VALUE_EQUAL( "CL_M_PI_2", CL_M_PI_2, MAKE_HEX_DOUBLE(0x1.921fb54442d18p+0, 0x1921fb54442d18LL, -52) );
TEST_VALUE_EQUAL( "CL_M_PI_4", CL_M_PI_4, MAKE_HEX_DOUBLE(0x1.921fb54442d18p-1, 0x1921fb54442d18LL, -53) );
TEST_VALUE_EQUAL( "CL_M_1_PI", CL_M_1_PI, MAKE_HEX_DOUBLE(0x1.45f306dc9c883p-2, 0x145f306dc9c883LL, -54) );
TEST_VALUE_EQUAL( "CL_M_2_PI", CL_M_2_PI, MAKE_HEX_DOUBLE(0x1.45f306dc9c883p-1, 0x145f306dc9c883LL, -53) );
TEST_VALUE_EQUAL( "CL_M_2_SQRTPI", CL_M_2_SQRTPI, MAKE_HEX_DOUBLE(0x1.20dd750429b6dp+0, 0x120dd750429b6dLL, -52) );
TEST_VALUE_EQUAL( "CL_M_SQRT2", CL_M_SQRT2, MAKE_HEX_DOUBLE(0x1.6a09e667f3bcdp+0, 0x16a09e667f3bcdLL, -52) );
TEST_VALUE_EQUAL( "CL_M_SQRT1_2", CL_M_SQRT1_2, MAKE_HEX_DOUBLE(0x1.6a09e667f3bcdp-1, 0x16a09e667f3bcdLL, -53) );
TEST_VALUE_EQUAL( "CL_M_E_F", CL_M_E_F, MAKE_HEX_FLOAT(0x1.5bf0a8p+1f, 0x15bf0a8L, -23));
TEST_VALUE_EQUAL( "CL_M_LOG2E_F", CL_M_LOG2E_F, MAKE_HEX_FLOAT(0x1.715476p+0f, 0x1715476L, -24));
TEST_VALUE_EQUAL( "CL_M_LOG10E_F", CL_M_LOG10E_F, MAKE_HEX_FLOAT(0x1.bcb7b2p-2f, 0x1bcb7b2L, -26));
TEST_VALUE_EQUAL( "CL_M_LN2_F", CL_M_LN2_F, MAKE_HEX_FLOAT(0x1.62e43p-1f, 0x162e43L, -21) );
TEST_VALUE_EQUAL( "CL_M_LN10_F", CL_M_LN10_F, MAKE_HEX_FLOAT(0x1.26bb1cp+1f, 0x126bb1cL, -23));
TEST_VALUE_EQUAL( "CL_M_PI_F", CL_M_PI_F, MAKE_HEX_FLOAT(0x1.921fb6p+1f, 0x1921fb6L, -23));
TEST_VALUE_EQUAL( "CL_M_PI_2_F", CL_M_PI_2_F, MAKE_HEX_FLOAT(0x1.921fb6p+0f, 0x1921fb6L, -24));
TEST_VALUE_EQUAL( "CL_M_PI_4_F", CL_M_PI_4_F, MAKE_HEX_FLOAT(0x1.921fb6p-1f, 0x1921fb6L, -25));
TEST_VALUE_EQUAL( "CL_M_1_PI_F", CL_M_1_PI_F, MAKE_HEX_FLOAT(0x1.45f306p-2f, 0x145f306L, -26));
TEST_VALUE_EQUAL( "CL_M_2_PI_F", CL_M_2_PI_F, MAKE_HEX_FLOAT(0x1.45f306p-1f, 0x145f306L, -25));
TEST_VALUE_EQUAL( "CL_M_2_SQRTPI_F", CL_M_2_SQRTPI_F,MAKE_HEX_FLOAT(0x1.20dd76p+0f, 0x120dd76L, -24));
TEST_VALUE_EQUAL( "CL_M_SQRT2_F", CL_M_SQRT2_F, MAKE_HEX_FLOAT(0x1.6a09e6p+0f, 0x16a09e6L, -24));
TEST_VALUE_EQUAL( "CL_M_SQRT1_2_F", CL_M_SQRT1_2_F, MAKE_HEX_FLOAT(0x1.6a09e6p-1f, 0x16a09e6L, -25));
return errors;
}
const char *kernel_int_float[] = {
"__kernel void test( __global float *float_out, __global int *int_out, __global uint *uint_out) \n"
"{\n"
" int_out[0] = CHAR_BIT;\n"
" int_out[1] = SCHAR_MAX;\n"
" int_out[2] = SCHAR_MIN;\n"
" int_out[3] = CHAR_MAX;\n"
" int_out[4] = CHAR_MIN;\n"
" int_out[5] = UCHAR_MAX;\n"
" int_out[6] = SHRT_MAX;\n"
" int_out[7] = SHRT_MIN;\n"
" int_out[8] = USHRT_MAX;\n"
" int_out[9] = INT_MAX;\n"
" int_out[10] = INT_MIN;\n"
" uint_out[0] = UINT_MAX;\n"
" int_out[11] = FLT_DIG;\n"
" int_out[12] = FLT_MANT_DIG;\n"
" int_out[13] = FLT_MAX_10_EXP;\n"
" int_out[14] = FLT_MAX_EXP;\n"
" int_out[15] = FLT_MIN_10_EXP;\n"
" int_out[16] = FLT_MIN_EXP;\n"
" int_out[17] = FLT_RADIX;\n"
"#ifdef __IMAGE_SUPPORT__\n"
" int_out[18] = __IMAGE_SUPPORT__;\n"
"#else\n"
" int_out[18] = 0xf00baa;\n"
"#endif\n"
" float_out[0] = FLT_MAX;\n"
" float_out[1] = FLT_MIN;\n"
" float_out[2] = FLT_EPSILON;\n"
" float_out[3] = M_E_F;\n"
" float_out[4] = M_LOG2E_F;\n"
" float_out[5] = M_LOG10E_F;\n"
" float_out[6] = M_LN2_F;\n"
" float_out[7] = M_LN10_F;\n"
" float_out[8] = M_PI_F;\n"
" float_out[9] = M_PI_2_F;\n"
" float_out[10] = M_PI_4_F;\n"
" float_out[11] = M_1_PI_F;\n"
" float_out[12] = M_2_PI_F;\n"
" float_out[13] = M_2_SQRTPI_F;\n"
" float_out[14] = M_SQRT2_F;\n"
" float_out[15] = M_SQRT1_2_F;\n"
"}\n"
};
const char *kernel_long[] = {
"__kernel void test(__global long *long_out, __global ulong *ulong_out) \n"
"{\n"
" long_out[0] = LONG_MAX;\n"
" long_out[1] = LONG_MIN;\n"
" ulong_out[0] = ULONG_MAX;\n"
"}\n"
};
const char *kernel_double[] = {
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"__kernel void test( __global double *double_out, __global long *long_out ) \n "
"{\n"
" long_out[0] = DBL_DIG;\n"
" long_out[1] = DBL_MANT_DIG;\n"
" long_out[2] = DBL_MAX_10_EXP;\n"
" long_out[3] = DBL_MAX_EXP;\n"
" long_out[4] = DBL_MIN_10_EXP;\n"
" long_out[5] = DBL_MIN_EXP;\n"
" long_out[6] = DBL_RADIX;\n"
" double_out[0] = DBL_MAX;\n"
" double_out[1] = DBL_MIN;\n"
" double_out[2] = DBL_EPSILON;\n"
" double_out[3] = M_E;\n"
" double_out[4] = M_LOG2E;\n"
" double_out[5] = M_LOG10E;\n"
" double_out[6] = M_LN2;\n"
" double_out[7] = M_LN10;\n"
" double_out[8] = M_PI;\n"
" double_out[9] = M_PI_2;\n"
" double_out[10] = M_PI_4;\n"
" double_out[11] = M_1_PI;\n"
" double_out[12] = M_2_PI;\n"
" double_out[13] = M_2_SQRTPI;\n"
" double_out[14] = M_SQRT2;\n"
" double_out[15] = M_SQRT1_2;\n"
"}\n"
};
int test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error, errors = 0;
// clProgramWrapper program;
// clKernelWrapper kernel;
// clMemWrapper streams[3];
cl_program program;
cl_kernel kernel;
cl_mem streams[3];
size_t threads[] = {1,1,1};
cl_float float_out[16];
cl_int int_out[19];
cl_uint uint_out[1];
cl_long long_out[7];
cl_ulong ulong_out[1];
cl_double double_out[16];
/** INTs and FLOATs **/
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_int_float, "test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(float_out), NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(int_out), NULL, &error);
test_error( error, "Creating test array failed" );
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(uint_out), NULL, &error);
test_error( error, "Creating test array failed" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(float_out), (void*)float_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(int_out), (void*)int_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(uint_out), (void*)uint_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
TEST_VALUE_EQUAL_LITERAL( "CHAR_BIT", int_out[0], 8)
TEST_VALUE_EQUAL_LITERAL( "SCHAR_MAX", int_out[1], 127)
TEST_VALUE_EQUAL_LITERAL( "SCHAR_MIN", int_out[2], (-127-1))
TEST_VALUE_EQUAL_LITERAL( "CHAR_MAX", int_out[3], CL_SCHAR_MAX)
TEST_VALUE_EQUAL_LITERAL( "CHAR_MIN", int_out[4], CL_SCHAR_MIN)
TEST_VALUE_EQUAL_LITERAL( "UCHAR_MAX", int_out[5], 255)
TEST_VALUE_EQUAL_LITERAL( "SHRT_MAX", int_out[6], 32767)
TEST_VALUE_EQUAL_LITERAL( "SHRT_MIN",int_out[7], (-32767-1))
TEST_VALUE_EQUAL_LITERAL( "USHRT_MAX", int_out[8], 65535)
TEST_VALUE_EQUAL_LITERAL( "INT_MAX", int_out[9], 2147483647)
TEST_VALUE_EQUAL_LITERAL( "INT_MIN", int_out[10], (-2147483647-1))
TEST_VALUE_EQUAL_LITERAL( "UINT_MAX", uint_out[0], 0xffffffffU)
TEST_VALUE_EQUAL_LITERAL( "FLT_DIG", int_out[11], 6)
TEST_VALUE_EQUAL_LITERAL( "FLT_MANT_DIG", int_out[12], 24)
TEST_VALUE_EQUAL_LITERAL( "FLT_MAX_10_EXP", int_out[13], +38)
TEST_VALUE_EQUAL_LITERAL( "FLT_MAX_EXP", int_out[14], +128)
TEST_VALUE_EQUAL_LITERAL( "FLT_MIN_10_EXP", int_out[15], -37)
TEST_VALUE_EQUAL_LITERAL( "FLT_MIN_EXP", int_out[16], -125)
TEST_VALUE_EQUAL_LITERAL( "FLT_RADIX", int_out[17], 2)
TEST_VALUE_EQUAL( "FLT_MAX", float_out[0], MAKE_HEX_FLOAT(0x1.fffffep127f, 0x1fffffeL, 103))
TEST_VALUE_EQUAL( "FLT_MIN", float_out[1], MAKE_HEX_FLOAT(0x1.0p-126f, 0x1L, -126))
TEST_VALUE_EQUAL( "FLT_EPSILON", float_out[2], MAKE_HEX_FLOAT(0x1.0p-23f, 0x1L, -23))
TEST_VALUE_EQUAL( "M_E_F", float_out[3], CL_M_E_F )
TEST_VALUE_EQUAL( "M_LOG2E_F", float_out[4], CL_M_LOG2E_F )
TEST_VALUE_EQUAL( "M_LOG10E_F", float_out[5], CL_M_LOG10E_F )
TEST_VALUE_EQUAL( "M_LN2_F", float_out[6], CL_M_LN2_F )
TEST_VALUE_EQUAL( "M_LN10_F", float_out[7], CL_M_LN10_F )
TEST_VALUE_EQUAL( "M_PI_F", float_out[8], CL_M_PI_F )
TEST_VALUE_EQUAL( "M_PI_2_F", float_out[9], CL_M_PI_2_F )
TEST_VALUE_EQUAL( "M_PI_4_F", float_out[10], CL_M_PI_4_F )
TEST_VALUE_EQUAL( "M_1_PI_F", float_out[11], CL_M_1_PI_F )
TEST_VALUE_EQUAL( "M_2_PI_F", float_out[12], CL_M_2_PI_F )
TEST_VALUE_EQUAL( "M_2_SQRTPI_F", float_out[13], CL_M_2_SQRTPI_F )
TEST_VALUE_EQUAL( "M_SQRT2_F", float_out[14], CL_M_SQRT2_F )
TEST_VALUE_EQUAL( "M_SQRT1_2_F", float_out[15], CL_M_SQRT1_2_F )
// We need to check these values against what we know is supported on the device
if( checkForImageSupport( deviceID ) == 0 )
{ // has images
// If images are supported, the constant should have been defined to the value 1
if( int_out[18] == 0xf00baa )
{
log_error( "FAILURE: __IMAGE_SUPPORT__ undefined even though images are supported\n" );
return -1;
}
else if( int_out[18] != 1 )
{
log_error( "FAILURE: __IMAGE_SUPPORT__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", int_out[18] );
return -1;
}
}
else
{ // no images
// If images aren't supported, the constant should be undefined
if( int_out[18] != 0xf00baa )
{
log_error( "FAILURE: __IMAGE_SUPPORT__ defined to value %d even though images aren't supported", int_out[18] );
return -1;
}
}
log_info( "\t__IMAGE_SUPPORT__: %d\n", int_out[18]);
clReleaseMemObject(streams[0]); streams[0] = NULL;
clReleaseMemObject(streams[1]); streams[1] = NULL;
clReleaseMemObject(streams[2]); streams[2] = NULL;
clReleaseKernel(kernel); kernel = NULL;
clReleaseProgram(program); program = NULL;
/** LONGs **/
if(!gHasLong) {
log_info("Longs not supported; skipping long tests.\n");
}
else
{
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_long, "test" ) != 0 )
{
return -1;
}
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(long_out), NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(ulong_out), NULL, &error);
test_error( error, "Creating test array failed" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(long_out), &long_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(ulong_out), &ulong_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
TEST_VALUE_EQUAL_LITERAL( "LONG_MAX", long_out[0], ((cl_long) 0x7FFFFFFFFFFFFFFFLL))
TEST_VALUE_EQUAL_LITERAL( "LONG_MIN", long_out[1], ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL))
TEST_VALUE_EQUAL_LITERAL( "ULONG_MAX", ulong_out[0], ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL))
clReleaseMemObject(streams[0]); streams[0] = NULL;
clReleaseMemObject(streams[1]); streams[1] = NULL;
clReleaseKernel(kernel); kernel = NULL;
clReleaseProgram(program); program = NULL;
}
/** DOUBLEs **/
if(!is_extension_available(deviceID, "cl_khr_fp64")) {
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
}
else
{
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_double, "test" ) != 0 )
{
return -1;
}
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(double_out), NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(long_out), NULL, &error);
test_error( error, "Creating test array failed" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(double_out), &double_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(long_out), &long_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
TEST_VALUE_EQUAL_LITERAL( "DBL_DIG", long_out[0], 15)
TEST_VALUE_EQUAL_LITERAL( "DBL_MANT_DIG", long_out[1], 53)
TEST_VALUE_EQUAL_LITERAL( "DBL_MAX_10_EXP", long_out[2], +308)
TEST_VALUE_EQUAL_LITERAL( "DBL_MAX_EXP", long_out[3], +1024)
TEST_VALUE_EQUAL_LITERAL( "DBL_MIN_10_EXP", long_out[4], -307)
TEST_VALUE_EQUAL_LITERAL( "DBL_MIN_EXP", long_out[5], -1021)
TEST_VALUE_EQUAL_LITERAL( "DBL_RADIX", long_out[6], 2)
TEST_VALUE_EQUAL( "DBL_MAX", double_out[0], MAKE_HEX_DOUBLE(0x1.fffffffffffffp1023, 0x1fffffffffffffLL, 971))
TEST_VALUE_EQUAL( "DBL_MIN", double_out[1], MAKE_HEX_DOUBLE(0x1.0p-1022, 0x1LL, -1022))
TEST_VALUE_EQUAL( "DBL_EPSILON", double_out[2], MAKE_HEX_DOUBLE(0x1.0p-52, 0x1LL, -52))
//TEST_VALUE_EQUAL( "M_E", double_out[3], CL_M_E )
TEST_VALUE_EQUAL( "M_LOG2E", double_out[4], CL_M_LOG2E )
TEST_VALUE_EQUAL( "M_LOG10E", double_out[5], CL_M_LOG10E )
TEST_VALUE_EQUAL( "M_LN2", double_out[6], CL_M_LN2 )
TEST_VALUE_EQUAL( "M_LN10", double_out[7], CL_M_LN10 )
TEST_VALUE_EQUAL( "M_PI", double_out[8], CL_M_PI )
TEST_VALUE_EQUAL( "M_PI_2", double_out[9], CL_M_PI_2 )
TEST_VALUE_EQUAL( "M_PI_4", double_out[10], CL_M_PI_4 )
TEST_VALUE_EQUAL( "M_1_PI", double_out[11], CL_M_1_PI )
TEST_VALUE_EQUAL( "M_2_PI", double_out[12], CL_M_2_PI )
TEST_VALUE_EQUAL( "M_2_SQRTPI", double_out[13], CL_M_2_SQRTPI )
TEST_VALUE_EQUAL( "M_SQRT2", double_out[14], CL_M_SQRT2 )
TEST_VALUE_EQUAL( "M_SQRT1_2", double_out[15], CL_M_SQRT1_2 )
clReleaseMemObject(streams[0]); streams[0] = NULL;
clReleaseMemObject(streams[1]); streams[1] = NULL;
clReleaseKernel(kernel); kernel = NULL;
clReleaseProgram(program); program = NULL;
}
error = clFinish(queue);
test_error(error, "clFinish failed");
return errors;
}
const char *kernel_constant_limits[] = {
"__kernel void test( __global int *intOut, __global float *floatOut ) \n"
"{\n"
" intOut[0] = isinf( MAXFLOAT ) ? 1 : 0;\n"
" intOut[1] = isnormal( MAXFLOAT ) ? 1 : 0;\n"
" intOut[2] = isnan( MAXFLOAT ) ? 1 : 0;\n"
" intOut[3] = sizeof( MAXFLOAT );\n"
" intOut[4] = ( MAXFLOAT == FLT_MAX ) ? 1 : 0;\n"
// " intOut[5] = ( MAXFLOAT == CL_FLT_MAX ) ? 1 : 0;\n"
" intOut[6] = ( MAXFLOAT == MAXFLOAT ) ? 1 : 0;\n"
" intOut[7] = ( MAXFLOAT == 0x1.fffffep127f ) ? 1 : 0;\n"
" floatOut[0] = MAXFLOAT;\n"
"}\n"
};
const char *kernel_constant_extended_limits[] = {
"__kernel void test( __global int *intOut, __global float *floatOut ) \n"
"{\n"
" intOut[0] = ( INFINITY == HUGE_VALF ) ? 1 : 0;\n"
" intOut[1] = sizeof( INFINITY );\n"
" intOut[2] = isinf( INFINITY ) ? 1 : 0;\n"
" intOut[3] = isnormal( INFINITY ) ? 1 : 0;\n"
" intOut[4] = isnan( INFINITY ) ? 1 : 0;\n"
" intOut[5] = ( INFINITY > MAXFLOAT ) ? 1 : 0;\n"
" intOut[6] = ( -INFINITY < -MAXFLOAT ) ? 1 : 0;\n"
" intOut[7] = ( ( MAXFLOAT + MAXFLOAT ) == INFINITY ) ? 1 : 0;\n"
" intOut[8] = ( nextafter( MAXFLOAT, INFINITY ) == INFINITY ) ? 1 : 0;\n"
" intOut[9] = ( nextafter( -MAXFLOAT, -INFINITY ) == -INFINITY ) ? 1 : 0;\n"
" intOut[10] = ( INFINITY == INFINITY ) ? 1 : 0;\n"
" intOut[11] = ( as_uint( INFINITY ) == 0x7f800000 ) ? 1 : 0;\n"
" floatOut[0] = INFINITY;\n"
"\n"
" intOut[12] = sizeof( HUGE_VALF );\n"
" intOut[13] = ( HUGE_VALF == INFINITY ) ? 1 : 0;\n"
" floatOut[1] = HUGE_VALF;\n"
"\n"
" intOut[14] = ( NAN == NAN ) ? 1 : 0;\n"
" intOut[15] = ( NAN != NAN ) ? 1 : 0;\n"
" intOut[16] = isnan( NAN ) ? 1 : 0;\n"
" intOut[17] = isinf( NAN ) ? 1 : 0;\n"
" intOut[18] = isnormal( NAN ) ? 1 : 0;\n"
" intOut[19] = ( ( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000 ) ? 1 : 0;\n"
" intOut[20] = sizeof( NAN );\n"
" floatOut[2] = NAN;\n"
"\n"
" intOut[21] = isnan( INFINITY / INFINITY ) ? 1 : 0;\n"
" intOut[22] = isnan( INFINITY - INFINITY ) ? 1 : 0;\n"
" intOut[23] = isnan( 0.f / 0.f ) ? 1 : 0;\n"
" intOut[24] = isnan( INFINITY * 0.f ) ? 1 : 0;\n"
" intOut[25] = ( INFINITY == NAN ); \n"
" intOut[26] = ( -INFINITY == NAN ); \n"
" intOut[27] = ( INFINITY > NAN ); \n"
" intOut[28] = ( -INFINITY < NAN ); \n"
" intOut[29] = ( INFINITY != NAN ); \n"
" intOut[30] = ( NAN > INFINITY ); \n"
" intOut[31] = ( NAN < -INFINITY ); \n"
"}\n"
};
const char *kernel_constant_double_limits[] = {
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"__kernel void test( __global int *intOut, __global double *doubleOut ) \n"
"{\n"
" intOut[0] = sizeof( HUGE_VAL );\n"
" intOut[1] = ( HUGE_VAL == INFINITY ) ? 1 : 0;\n"
" intOut[2] = isinf( HUGE_VAL ) ? 1 : 0;\n"
" intOut[3] = isnormal( HUGE_VAL ) ? 1 : 0;\n"
" intOut[4] = isnan( HUGE_VAL ) ? 1 : 0;\n"
" intOut[5] = ( HUGE_VAL == HUGE_VALF ) ? 1 : 0;\n"
" intOut[6] = ( as_ulong( HUGE_VAL ) == 0x7ff0000000000000UL ) ? 1 : 0;\n"
" doubleOut[0] = HUGE_VAL;\n"
"}\n"
};
#define TEST_FLOAT_ASSERTION( a, msg, f ) if( !( a ) ) { log_error( "ERROR: Float constant failed requirement: %s (bitwise value is 0x%8.8x)\n", msg, *( (uint32_t *)&f ) ); return -1; }
#define TEST_DOUBLE_ASSERTION( a, msg, f ) if( !( a ) ) { log_error( "ERROR: Double constant failed requirement: %s (bitwise value is 0x%16.16llx)\n", msg, *( (uint64_t *)&f ) ); return -1; }
int test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
size_t threads[] = {1,1,1};
clMemWrapper intStream, floatStream, doubleStream;
cl_int intOut[ 32 ];
cl_float floatOut[ 3 ];
cl_double doubleOut[ 1 ];
/* Create some I/O streams */
intStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(intOut), NULL, &error );
test_error( error, "Creating test array failed" );
floatStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(floatOut), NULL, &error );
test_error( error, "Creating test array failed" );
// Stage 1: basic limits on MAXFLOAT
{
clProgramWrapper program;
clKernelWrapper kernel;
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_limits, "test" ) != 0 )
{
return -1;
}
error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg( kernel, 1, sizeof( floatStream ), &floatStream );
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, floatStream, CL_TRUE, 0, sizeof(floatOut), floatOut, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
// Test MAXFLOAT properties
TEST_FLOAT_ASSERTION( intOut[0] == 0, "isinf( MAXFLOAT ) = false", floatOut[0] )
TEST_FLOAT_ASSERTION( intOut[1] == 1, "isnormal( MAXFLOAT ) = true", floatOut[0] )
TEST_FLOAT_ASSERTION( intOut[2] == 0, "isnan( MAXFLOAT ) = false", floatOut[0] )
TEST_FLOAT_ASSERTION( intOut[3] == 4, "sizeof( MAXFLOAT ) = 4", floatOut[0] )
TEST_FLOAT_ASSERTION( intOut[4] == 1, "MAXFLOAT = FLT_MAX", floatOut[0] )
TEST_FLOAT_ASSERTION( floatOut[0] == CL_FLT_MAX, "MAXFLOAT = CL_FLT_MAX", floatOut[0] )
TEST_FLOAT_ASSERTION( intOut[6] == 1, "MAXFLOAT = MAXFLOAT", floatOut[0] )
TEST_FLOAT_ASSERTION( floatOut[0] == MAKE_HEX_FLOAT( 0x1.fffffep127f, 0x1fffffeL, 103), "MAXFLOAT = 0x1.fffffep127f", floatOut[0] )
}
// Stage 2: INFINITY and NAN
char profileStr[128] = "";
error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL );
test_error( error, "Unable to run INFINITY/NAN tests (unable to get CL_DEVICE_PROFILE" );
bool testInfNan = true;
if( strcmp( profileStr, "EMBEDDED_PROFILE" ) == 0 )
{
// We test if we're not an embedded profile, OR if the inf/nan flag in the config is set
cl_device_fp_config single = 0;
error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
test_error( error, "Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)" );
if( ( single & CL_FP_INF_NAN ) == 0 )
{
log_info( "Skipping INFINITY and NAN tests on embedded device (INF/NAN not supported on this device)" );
testInfNan = false;
}
}
if( testInfNan )
{
clProgramWrapper program;
clKernelWrapper kernel;
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_extended_limits, "test" ) != 0 )
{
return -1;
}
error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg( kernel, 1, sizeof( floatStream ), &floatStream );
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, floatStream, CL_TRUE, 0, sizeof(floatOut), floatOut, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
TEST_FLOAT_ASSERTION( intOut[0] == 1, "INFINITY == HUGE_VALF", intOut[0] )
TEST_FLOAT_ASSERTION( intOut[1] == 4, "sizeof( INFINITY ) == 4", intOut[1] )
TEST_FLOAT_ASSERTION( intOut[2] == 1, "isinf( INFINITY ) == true", intOut[2] )
TEST_FLOAT_ASSERTION( intOut[3] == 0, "isnormal( INFINITY ) == false", intOut[3] )
TEST_FLOAT_ASSERTION( intOut[4] == 0, "isnan( INFINITY ) == false", intOut[4] )
TEST_FLOAT_ASSERTION( intOut[5] == 1, "INFINITY > MAXFLOAT", intOut[5] )
TEST_FLOAT_ASSERTION( intOut[6] == 1, "-INFINITY < -MAXFLOAT", intOut[6] )
TEST_FLOAT_ASSERTION( intOut[7] == 1, "( MAXFLOAT + MAXFLOAT ) == INFINITY", intOut[7] )
TEST_FLOAT_ASSERTION( intOut[8] == 1, "nextafter( MAXFLOAT, INFINITY ) == INFINITY", intOut[8] )
TEST_FLOAT_ASSERTION( intOut[9] == 1, "nextafter( -MAXFLOAT, -INFINITY ) == -INFINITY", intOut[9] )
TEST_FLOAT_ASSERTION( intOut[10] == 1, "INFINITY = INFINITY", intOut[10] )
TEST_FLOAT_ASSERTION( intOut[11] == 1, "asuint( INFINITY ) == 0x7f800000", intOut[11] )
TEST_FLOAT_ASSERTION( *( (uint32_t *)&floatOut[0] ) == 0x7f800000, "asuint( INFINITY ) == 0x7f800000", floatOut[0] )
TEST_FLOAT_ASSERTION( floatOut[1] == INFINITY, "INFINITY == INFINITY", floatOut[1] )
TEST_FLOAT_ASSERTION( intOut[12] == 4, "sizeof( HUGE_VALF ) == 4", intOut[12] )
TEST_FLOAT_ASSERTION( intOut[13] == 1, "HUGE_VALF == INFINITY", intOut[13] )
TEST_FLOAT_ASSERTION( floatOut[1] == HUGE_VALF, "HUGE_VALF == HUGE_VALF", floatOut[1] )
TEST_FLOAT_ASSERTION( intOut[14] == 0, "(NAN == NAN) = false", intOut[14] )
TEST_FLOAT_ASSERTION( intOut[15] == 1, "(NAN != NAN) = true", intOut[15] )
TEST_FLOAT_ASSERTION( intOut[16] == 1, "isnan( NAN ) = true", intOut[16] )
TEST_FLOAT_ASSERTION( intOut[17] == 0, "isinf( NAN ) = false", intOut[17] )
TEST_FLOAT_ASSERTION( intOut[18] == 0, "isnormal( NAN ) = false", intOut[18] )
TEST_FLOAT_ASSERTION( intOut[19] == 1, "( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000", intOut[19] )
TEST_FLOAT_ASSERTION( intOut[20] == 4, "sizeof( NAN ) = 4", intOut[20] )
TEST_FLOAT_ASSERTION( ( *( (uint32_t *)&floatOut[2] ) & 0x7fffffff ) > 0x7f800000, "( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000", floatOut[2] )
TEST_FLOAT_ASSERTION( intOut[ 21 ] == 1, "isnan( INFINITY / INFINITY ) = true", intOut[ 21 ] )
TEST_FLOAT_ASSERTION( intOut[ 22 ] == 1, "isnan( INFINITY - INFINITY ) = true", intOut[ 22 ] )
TEST_FLOAT_ASSERTION( intOut[ 23 ] == 1, "isnan( 0.f / 0.f ) = true", intOut[ 23 ] )
TEST_FLOAT_ASSERTION( intOut[ 24 ] == 1, "isnan( INFINITY * 0.f ) = true", intOut[ 24 ] )
TEST_FLOAT_ASSERTION( intOut[ 25 ] == 0, "( INFINITY == NAN ) = false", intOut[ 25 ] )
TEST_FLOAT_ASSERTION( intOut[ 26 ] == 0, "(-INFINITY == NAN ) = false", intOut[ 26 ] )
TEST_FLOAT_ASSERTION( intOut[ 27 ] == 0, "( INFINITY > NAN ) = false", intOut[ 27 ] )
TEST_FLOAT_ASSERTION( intOut[ 28 ] == 0, "(-INFINITY < NAN ) = false", intOut[ 28 ] )
TEST_FLOAT_ASSERTION( intOut[ 29 ] == 1, "( INFINITY != NAN ) = true", intOut[ 29 ] )
TEST_FLOAT_ASSERTION( intOut[ 30 ] == 0, "( NAN < INFINITY ) = false", intOut[ 30 ] )
TEST_FLOAT_ASSERTION( intOut[ 31 ] == 0, "( NAN > -INFINITY ) = false", intOut[ 31 ] )
}
// Stage 3: limits on HUGE_VAL (double)
if( !is_extension_available( deviceID, "cl_khr_fp64" ) )
log_info( "Note: Skipping double HUGE_VAL tests (doubles unsupported on device)\n" );
else
{
cl_device_fp_config config = 0;
error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( config ), &config, NULL );
test_error( error, "Unable to run INFINITY/NAN tests (unable to get double FP_CONFIG bits)" );
if( ( config & CL_FP_INF_NAN ) == 0 )
log_info( "Skipping HUGE_VAL tests (INF/NAN not supported on this device)" );
else
{
clProgramWrapper program;
clKernelWrapper kernel;
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_double_limits, "test" ) != 0 )
{
return -1;
}
doubleStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(doubleOut), NULL, &error );
test_error( error, "Creating test array failed" );
error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg( kernel, 1, sizeof( doubleStream ), &doubleStream );
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, doubleStream, CL_TRUE, 0, sizeof(doubleOut), doubleOut, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
TEST_DOUBLE_ASSERTION( intOut[0] == 8, "sizeof( HUGE_VAL ) = 8", intOut[0] )
TEST_DOUBLE_ASSERTION( intOut[1] == 1, "HUGE_VAL = INFINITY", intOut[1] )
TEST_DOUBLE_ASSERTION( intOut[2] == 1, "isinf( HUGE_VAL ) = true", intOut[2] )
TEST_DOUBLE_ASSERTION( intOut[3] == 0, "isnormal( HUGE_VAL ) = false", intOut[3] )
TEST_DOUBLE_ASSERTION( intOut[4] == 0, "isnan( HUGE_VAL ) = false", intOut[4] )
TEST_DOUBLE_ASSERTION( intOut[5] == 1, "HUGE_VAL = HUGE_VAL", intOut[5] )
TEST_DOUBLE_ASSERTION( intOut[6] == 1, "as_ulong( HUGE_VAL ) = 0x7ff0000000000000UL", intOut[6] )
TEST_DOUBLE_ASSERTION( *( (uint64_t *)&doubleOut[0] ) == 0x7ff0000000000000ULL, "as_ulong( HUGE_VAL ) = 0x7ff0000000000000UL", doubleOut[0] )
}
}
return 0;
}

View File

@@ -1,393 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
#include <ctype.h>
// Test __FILE__, __LINE__, __OPENCL_VERSION__, __OPENCL_C_VERSION__, __ENDIAN_LITTLE__, __ROUNDING_MODE__, __IMAGE_SUPPORT__, __FAST_RELAXED_MATH__
// __kernel_exec
const char *preprocessor_test = {
"#line 2 \"%s\"\n"
"__kernel void test( __global int *results, __global char *outFileString, __global char *outRoundingString )\n"
"{\n"
// Integer preprocessor macros
"#ifdef __IMAGE_SUPPORT__\n"
" results[0] = __IMAGE_SUPPORT__;\n"
"#else\n"
" results[0] = 0xf00baa;\n"
"#endif\n"
"#ifdef __ENDIAN_LITTLE__\n"
" results[1] = __ENDIAN_LITTLE__;\n"
"#else\n"
" results[1] = 0xf00baa;\n"
"#endif\n"
"#ifdef __OPENCL_VERSION__\n"
" results[2] = __OPENCL_VERSION__;\n"
"#else\n"
" results[2] = 0xf00baa;\n"
"#endif\n"
"#ifdef __OPENCL_C_VERSION__\n"
" results[3] = __OPENCL_C_VERSION__;\n"
"#else\n"
" results[3] = 0xf00baa;\n"
"#endif\n"
"#ifdef __LINE__\n"
" results[4] = __LINE__;\n"
"#else\n"
" results[4] = 0xf00baa;\n"
"#endif\n"
#if 0 // Removed by Affie's request 2/24
"#ifdef __FAST_RELAXED_MATH__\n"
" results[5] = __FAST_RELAXED_MATH__;\n"
"#else\n"
" results[5] = 0xf00baa;\n"
"#endif\n"
#endif
"#ifdef __kernel_exec\n"
" results[6] = 1;\n" // By spec, we can only really evaluate that it is defined, not what it expands to
"#else\n"
" results[6] = 0xf00baa;\n"
"#endif\n"
// String preprocessor macros. Technically, there are strings in OpenCL, but not really.
"#ifdef __FILE__\n"
" int i;\n"
" constant char *f = \"\" __FILE__;\n"
" for( i = 0; f[ i ] != 0 && i < 512; i++ )\n"
" outFileString[ i ] = f[ i ];\n"
" outFileString[ i ] = 0;\n"
"#else\n"
" outFileString[ 0 ] = 0;\n"
"#endif\n"
"}\n"
};
int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 3 ];
int error;
size_t threads[] = {1,1,1};
cl_int results[ 7 ];
cl_char fileString[ 512 ] = "", roundingString[ 128 ] = "";
char programSource[4096];
char curFileName[512];
char *programPtr = programSource;
int i = 0;
snprintf(curFileName, 512, "%s", __FILE__);
#ifdef _WIN32
// Replace "\" with "\\"
while(curFileName[i] != '\0') {
if (curFileName[i] == '\\') {
int j = i + 1;
char prev = '\\';
while (curFileName[j - 1] != '\0') {
char tmp = curFileName[j];
curFileName[j] = prev;
prev = tmp;
j++;
}
i++;
}
i++;
}
#endif
sprintf(programSource,preprocessor_test,curFileName);
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(results), NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(fileString), NULL, &error);
test_error( error, "Creating test array failed" );
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(roundingString), NULL, &error);
test_error( error, "Creating test array failed" );
// Set up and run
for( int i = 0; i < 3; i++ )
{
error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
test_error( error, "Unable to set indexed kernel arguments" );
}
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(results), results, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(fileString), fileString, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(roundingString), roundingString, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
/////// Check the integer results
// We need to check these values against what we know is supported on the device
if( checkForImageSupport( deviceID ) == 0 )
{
// If images are supported, the constant should have been defined to the value 1
if( results[ 0 ] == 0xf00baa )
{
log_error( "ERROR: __IMAGE_SUPPORT__ undefined even though images are supported\n" );
return -1;
}
else if( results[ 0 ] != 1 )
{
log_error( "ERROR: __IMAGE_SUPPORT__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", (int)results[ 0 ] );
return -1;
}
}
else
{
// If images aren't supported, the constant should be undefined
if( results[ 0 ] != 0xf00baa )
{
log_error( "ERROR: __IMAGE_SUPPORT__ defined to value %d even though images aren't supported", (int)results[ 0 ] );
return -1;
}
}
// __ENDIAN_LITTLE__ is similar to __IMAGE_SUPPORT__: 1 if it's true, undefined if it isn't
cl_bool deviceIsLittleEndian;
error = clGetDeviceInfo( deviceID, CL_DEVICE_ENDIAN_LITTLE, sizeof( deviceIsLittleEndian ), &deviceIsLittleEndian, NULL );
test_error( error, "Unable to get endian property of device to validate against" );
if( deviceIsLittleEndian )
{
if( results[ 1 ] == 0xf00baa )
{
log_error( "ERROR: __ENDIAN_LITTLE__ undefined even though the device is little endian\n" );
return -1;
}
else if( results[ 1 ] != 1 )
{
log_error( "ERROR: __ENDIAN_LITTLE__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", (int)results[ 1 ] );
return -1;
}
}
else
{
if( results[ 1 ] != 0xf00baa )
{
log_error( "ERROR: __ENDIAN_LITTLE__ defined to value %d even though the device is not little endian (should be undefined per spec)", (int)results[ 1 ] );
return -1;
}
}
// __OPENCL_VERSION__
if( results[ 2 ] == 0xf00baa )
{
log_error( "ERROR: Kernel preprocessor __OPENCL_VERSION__ undefined!" );
return -1;
}
// The OpenCL version reported by the macro reports the feature level supported by the compiler. Since
// this doesn't directly match any property we can query, we just check to see if it's a sane value
char versionBuffer[ 128 ];
error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( versionBuffer ), versionBuffer, NULL );
test_error( error, "Unable to get device's version to validate against" );
// We need to parse to get the version number to compare against
char *p1, *p2, *p3;
for( p1 = versionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
;
for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
;
for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
;
if( p2 == p3 )
{
log_error( "ERROR: Unable to verify OpenCL version string (platform string is incorrect format)\n" );
return -1;
}
*p2 = 0;
*p3 = 0;
int major = atoi( p1 );
int minor = atoi( p2 + 1 );
int realVersion = ( major * 100 ) + ( minor * 10 );
if( ( results[ 2 ] < 100 ) || ( results[ 2 ] > realVersion ) )
{
log_error( "ERROR: Kernel preprocessor __OPENCL_VERSION__ does not make sense w.r.t. device's version string! "
"(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
return -1;
}
// __OPENCL_C_VERSION__
if( results[ 3 ] == 0xf00baa )
{
log_error( "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ undefined!\n" );
return -1;
}
// The OpenCL C version reported by the macro reports the OpenCL C supported by the compiler for this OpenCL device.
char cVersionBuffer[ 128 ];
error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( cVersionBuffer ), cVersionBuffer, NULL );
test_error( error, "Unable to get device's OpenCL C version to validate against" );
// We need to parse to get the version number to compare against
for( p1 = cVersionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
;
for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
;
for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
;
if( p2 == p3 )
{
log_error( "ERROR: Unable to verify OpenCL C version string (platform string is incorrect format)\n" );
return -1;
}
*p2 = 0;
*p3 = 0;
major = atoi( p1 );
minor = atoi( p2 + 1 );
realVersion = ( major * 100 ) + ( minor * 10 );
if( ( results[ 3 ] < 100 ) || ( results[ 3 ] > realVersion ) )
{
log_error( "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ does not make sense w.r.t. device's version string! "
"(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
return -1;
}
// __LINE__
if( results[ 4 ] == 0xf00baa )
{
log_error( "ERROR: Kernel preprocessor __LINE__ undefined!" );
return -1;
}
// This is fun--we get to search for where __LINE__ actually is so we know what line it should define to!
// Note: it shows up twice, once for the #ifdef, and the other for the actual result output
const char *linePtr = strstr( preprocessor_test, "__LINE__" );
if( linePtr == NULL )
{
log_error( "ERROR: Nonsensical NULL pointer encountered!" );
return -2;
}
linePtr = strstr( linePtr + strlen( "__LINE__" ), "__LINE__" );
if( linePtr == NULL )
{
log_error( "ERROR: Nonsensical NULL pointer encountered!" );
return -2;
}
// Now count how many carriage returns are before the string
const char *retPtr = strchr( preprocessor_test, '\n' );
int retCount = 1;
for( ; ( retPtr < linePtr ) && ( retPtr != NULL ); retPtr = strchr( retPtr + 1, '\n' ) )
retCount++;
if( retCount != results[ 4 ] )
{
log_error( "ERROR: Kernel preprocessor __LINE__ does not expand to the actual line number! (expanded to %d, but was on line %d)\n",
results[ 4 ], retCount );
return -1;
}
#if 0 // Removed by Affie's request 2/24
// __FAST_RELAXED_MATH__
// Since create_single_kernel_helper does NOT define -cl-fast-relaxed-math, this should be undefined
if( results[ 5 ] != 0xf00baa )
{
log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ defined even though build option was not used (should be undefined)\n" );
return -1;
}
#endif
// __kernel_exec
// We can ONLY check to verify that it is defined
if( results[ 6 ] == 0xf00baa )
{
log_error( "ERROR: Kernel preprocessor __kernel_exec must be defined\n" );
return -1;
}
//// String preprocessors
// Since we provided the program directly, __FILE__ should compile to "<program source>".
if( fileString[ 0 ] == 0 )
{
log_error( "ERROR: Kernel preprocessor __FILE__ undefined!\n" );
return -1;
}
else if( strncmp( (char *)fileString, __FILE__, 512 ) != 0 )
{
log_info( "WARNING: __FILE__ defined, but to an unexpected value (%s)\n\tShould be: \"%s\"", fileString, __FILE__ );
return -1;
}
#if 0 // Removed by Affie's request 2/24
// One more try through: try with -cl-fast-relaxed-math to make sure the appropriate preprocessor gets defined
clProgramWrapper programB = clCreateProgramWithSource( context, 1, preprocessor_test, NULL, &error );
test_error( error, "Unable to create test program" );
// Try compiling
error = clBuildProgram( programB, 1, &deviceID, "-cl-fast-relaxed-math", NULL, NULL );
test_error( error, "Unable to build program" );
// Create a kernel again to run against
clKernelWrapper kernelB = clCreateKernel( programB, "test", &error );
test_error( error, "Unable to create testing kernel" );
// Set up and run
for( int i = 0; i < 3; i++ )
{
error = clSetKernelArg( kernelB, i, sizeof( streams[i] ), &streams[i] );
test_error( error, "Unable to set indexed kernel arguments" );
}
error = clEnqueueNDRangeKernel( queue, kernelB, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
// Only need the one read
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(results), results, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
// We only need to check the one result this time
if( results[ 5 ] == 0xf00baa )
{
log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ not defined!\n" );
return -1;
}
else if( results[ 5 ] != 1 )
{
log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ not defined to 1 (was %d)\n", results[ 5 ] );
return -1;
}
#endif
return 0;
}

View File

@@ -1,406 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
#include "harness/errorHelpers.h"
#define DEBUG 0
#define DEPTH 16
// Limit the maximum code size for any given kernel.
#define MAX_CODE_SIZE (1024*32)
const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1};
const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"};
// Creates a kernel by enumerating all possible ways of building the vector out of vloads
// skip_to_results will skip results up to a given number. If the amount of code generated
// is greater than MAX_CODE_SIZE, this function will return the number of results used,
// which can then be used as the skip_to_result value to continue where it left off.
int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) {
int number_of_sizes;
switch (output_size) {
case 1:
number_of_sizes = 1;
break;
case 2:
number_of_sizes = 2;
break;
case 3:
number_of_sizes = 3;
break;
case 4:
number_of_sizes = 4;
break;
case 8:
number_of_sizes = 5;
break;
case 16:
number_of_sizes = 6;
break;
default:
log_error("Invalid size: %d\n", output_size);
return -1;
}
int total_results = 0;
int current_result = 0;
int total_vloads = 0;
int total_program_length = 0;
int aborted_due_to_size = 0;
if (skip_to_result < 0)
skip_to_result = 0;
// The line of code for the vector creation
char line[1024];
// Keep track of what size vector we are using in each position so we can iterate through all fo them
int pos[DEPTH];
int max_size = output_size;
if (DEBUG > 1) log_info("max_size: %d\n", max_size);
program[0] = '\0';
sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n",
type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]);
total_program_length += (int)strlen(program);
char storePrefix[ 128 ], storeSuffix[ 128 ];
// Start out trying sizes 1,1,1,1,1...
for (int i=0; i<DEPTH; i++)
pos[i] = 0;
int done = 0;
while (!done) {
if (DEBUG > 1) {
log_info("pos size[] = [");
for (int k=0; k<DEPTH; k++)
log_info(" %d ", pos[k]);
log_info("]\n");
}
// Go through the selected vector sizes and see if the first n of them fit the
// required size exactly.
int size_so_far = 0;
int vloads;
for ( vloads=0; vloads<DEPTH; vloads++) {
if (size_so_far + sizes[pos[vloads]] <= max_size) {
size_so_far += sizes[pos[vloads]];
} else {
break;
}
}
if (DEBUG > 1) log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far);
// If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations
// of the sizes to the right. Prune them from the search.
if (size_so_far != max_size) {
// Zero all the sizes to the right
for (int k=vloads+1; k<DEPTH; k++) {
pos[k] = 0;
}
// Increment this current size and propagate the values up if needed
for (int d=vloads; d>=0; d--) {
pos[d]++;
if (pos[d] >= number_of_sizes) {
pos[d] = 0;
if (d == 0) {
// If we rolled over then we are done
done = 1;
break;
}
} else {
break;
}
}
// Go on to the next size since this one (and all others "under" it) didn't fit
continue;
}
// Generate the actual load line if we are building this part
line[0]= '\0';
if (skip_to_result == 0 || total_results >= skip_to_result) {
if( number_of_sizes == 3 )
{
sprintf( storePrefix, "vstore3( " );
sprintf( storeSuffix, ", %d, result )", current_result );
}
else
{
sprintf( storePrefix, "result[%d] = ", current_result );
storeSuffix[ 0 ] = 0;
}
sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size);
current_result++;
int offset = 0;
for (int i=0; i<vloads; i++) {
if (pos[i] == 0)
sprintf(line + strlen(line), "src[%d]", offset);
else
sprintf(line + strlen(line), "vload%s(0,src+%d)", size_names[pos[i]], offset);
offset += sizes[pos[i]];
if (i<(vloads-1))
sprintf(line + strlen(line), ",");
}
sprintf(line + strlen(line), ")%s;\n", storeSuffix);
strcat(program, line);
total_vloads += vloads;
}
total_results++;
total_program_length += (int)strlen(line);
if (total_program_length > MAX_CODE_SIZE) {
aborted_due_to_size = 1;
done = 1;
}
if (DEBUG) log_info("line is: %s", line);
// If we did not use all of them, then we ignore any changes further to the right.
// We do this by causing those loops to skip on the next iteration.
if (vloads < DEPTH) {
if (DEBUG > 1) log_info("done with this depth\n");
for (int k=vloads; k<DEPTH; k++)
pos[k] = number_of_sizes;
}
// Increment the far right size by 1, rolling over as needed
for (int d=DEPTH-1; d>=0; d--) {
pos[d]++;
if (pos[d] >= number_of_sizes) {
pos[d] = 0;
if (d == 0) {
// If we rolled over at the far-left then we are done
done = 1;
break;
}
} else {
break;
}
}
if (done)
break;
// Continue until we are done.
}
strcat(program, "}\n\n"); //log_info("%s\n", program);
total_program_length += 3;
if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n",
get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads);
*number_of_results = current_result;
if (aborted_due_to_size)
return total_results;
return 0;
}
int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16};
char *program_source;
int error;
int total_errors = 0;
cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
void *input_data_converted;
void *output_data;
int number_of_results;;
input_data_converted = malloc(sizeof(cl_double)*16);
program_source = (char*)malloc(sizeof(char)*1024*1024*4);
// Iterate over all the types
for (int type_index=0; type_index<10; type_index++) {
if(!gHasLong && ((vecType[type_index] == kLong) || (vecType[type_index] == kULong)))
{
log_info("Long/ULong data type not supported on this device\n");
continue;
}
clMemWrapper input;
if (vecType[type_index] == kDouble) {
if (!is_extension_available(deviceID, "cl_khr_fp64")) {
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
continue;
}
log_info("Testing doubles.\n");
}
// Convert the data to the right format for the test.
memset(input_data_converted, 0xff, sizeof(cl_double)*16);
if (vecType[type_index] != kDouble) {
for (int j=0; j<16; j++) {
convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j,
kInt, 0, kRoundToEven, vecType[type_index]);
}
} else {
memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16);
}
input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16,
(vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error);
if (error) {
print_error(error, "clCreateBuffer failed");
total_errors++;
continue;
}
// Iterate over all the vector sizes.
for (int size_index=1; size_index< 5; size_index++) {
size_t global[] = {1,1,1};
int number_generated = -1;
int previous_number_generated = 0;
log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]);
while (number_generated != 0) {
clMemWrapper output;
clKernelWrapper kernel;
clProgramWrapper program;
number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated);
if (number_generated != 0) {
if (previous_number_generated == 0)
log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0);
log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1);
}
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation");
if (error) {
log_error("create_single_kernel_helper failed.\n");
total_errors++;
break;
}
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
NULL, &error);
if (error) {
print_error(error, "clCreateBuffer failed");
total_errors++;
break;
}
error = clSetKernelArg(kernel, 0, sizeof(input), &input);
error |= clSetKernelArg(kernel, 1, sizeof(output), &output);
if (error) {
print_error(error, "clSetKernelArg failed");
total_errors++;
break;
}
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
if (error) {
print_error(error, "clEnqueueNDRangeKernel failed");
total_errors++;
break;
}
error = clFinish(queue);
if (error) {
print_error(error, "clFinish failed");
total_errors++;
break;
}
output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
if (output_data == NULL) {
log_error("Failed to allocate memory for output data.\n");
total_errors++;
break;
}
memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0,
number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
output_data, 0, NULL, NULL);
if (error) {
print_error(error, "clEnqueueReadBuffer failed");
total_errors++;
free(output_data);
break;
}
// Check the results
char *res = (char *)output_data;
char *exp = (char *)input_data_converted;
for (int i=0; i<number_of_results; i++) {
// If they do not match, then print out why
if (memcmp(input_data_converted,
res + i*(get_explicit_type_size(vecType[type_index])*vecSizes[size_index]),
get_explicit_type_size(vecType[type_index])*vecSizes[size_index])
) {
log_error("Data failed to validate for result %d\n", i);
// Find the line in the program that failed. This is ugly.
char search[32];
char found_line[1024];
found_line[0]='\0';
search[0]='\0';
sprintf(search, "result[%d] = (", i);
char *start_loc = strstr(program_source, search);
if (start_loc == NULL)
log_error("Failed to find program source for failure for %s in \n%s", search, program_source);
else {
char *end_loc = strstr(start_loc, "\n");
memcpy(&found_line, start_loc, (end_loc-start_loc));
found_line[end_loc-start_loc]='\0';
log_error("Failed vector line: %s\n", found_line);
}
for (int j=0; j<(int)vecSizes[size_index]; j++) {
char expected_value[64];
char returned_value[64];
expected_value[0]='\0';
returned_value[0]='\0';
print_type_to_string(vecType[type_index], (void*)(res+get_explicit_type_size(vecType[type_index])*(i*vecSizes[size_index]+j)), returned_value);
print_type_to_string(vecType[type_index], (void*)(exp+get_explicit_type_size(vecType[type_index])*j), expected_value);
log_error("index [%d, component %d]: got: %s expected: %s\n", i, j,
returned_value, expected_value);
}
total_errors++;
}
}
free(output_data);
previous_number_generated = number_generated;
} // number_generated != 0
} // vector sizes
} // vector types
free(input_data_converted);
free(program_source);
return total_errors;
}