Remove all obvious near-duplicate tests from the compatibility suite (#496)

- whitespace diffs
- trivial differences (mostly bugfixes merged to master)
- use of the kernel helpers to create programs

Signed-off-by: Kévin Petit <kpet@free.fr>
This commit is contained in:
Kévin Petit
2019-11-24 12:47:26 +00:00
committed by GitHub
parent 2dca46bc00
commit a018d6a3be
51 changed files with 1 additions and 13558 deletions

View File

@@ -2,16 +2,9 @@ set(MODULE_NAME COMPATIBILITY_API)
set(${MODULE_NAME}_SOURCES
main.c
test_retain.cpp
test_retain_program.c
test_queries.cpp
test_create_kernels.c
test_kernels.c
test_api_min_max.c
test_binary.cpp
test_create_context_from_type.cpp
test_mem_object_info.cpp
test_null_buffer_arg.c
test_kernel_arg_info.c
test_queue_properties.cpp
)

View File

@@ -36,27 +36,9 @@ test_definition test_list[] = {
ADD_TEST( get_command_queue_info ),
ADD_TEST( get_context_info ),
ADD_TEST( get_device_info ),
ADD_TEST( enqueue_task ),
ADD_TEST( binary_get ),
ADD_TEST( binary_create ),
ADD_TEST( kernel_required_group_size ),
ADD_TEST( release_kernel_order ),
ADD_TEST( release_during_execute ),
ADD_TEST( load_single_kernel ),
ADD_TEST( load_two_kernels ),
ADD_TEST( load_two_kernels_in_one ),
ADD_TEST( load_two_kernels_manually ),
ADD_TEST( get_program_info_kernel_names ),
ADD_TEST( get_kernel_arg_info ),
ADD_TEST( create_kernels_in_program ),
ADD_TEST( get_kernel_info ),
ADD_TEST( execute_kernel_local_sizes ),
ADD_TEST( set_kernel_arg_by_index ),
ADD_TEST( set_kernel_arg_constant ),
ADD_TEST( set_kernel_arg_struct_array ),
ADD_TEST( kernel_global_constant ),
ADD_TEST( min_max_thread_dimensions ),
ADD_TEST( min_max_work_items_sizes ),
@@ -86,16 +68,6 @@ test_definition test_list[] = {
ADD_TEST( min_max_device_version ),
ADD_TEST( min_max_language_version ),
ADD_TEST( create_context_from_type ),
ADD_TEST( repeated_setup_cleanup ),
ADD_TEST( retain_queue_single ),
ADD_TEST( retain_queue_multiple ),
ADD_TEST( retain_mem_object_single ),
ADD_TEST( retain_mem_object_multiple ),
ADD_TEST( null_buffer_arg ),
ADD_TEST( get_buffer_info ),
ADD_TEST( get_image2d_info ),
ADD_TEST( get_image3d_info ),

View File

@@ -1,226 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
static const char *sample_binary_kernel_source[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid] + 1;\n"
"\n"
"}\n" };
int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
size_t binarySize;
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
test_error( error, "Unable to create program from source" );
// Build so we have a binary to get
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build test program" );
// Get the size of the resulting binary (only one device)
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
test_error( error, "Unable to get binary size" );
// Sanity check
if( binarySize == 0 )
{
log_error( "ERROR: Binary size of program is zero\n" );
return -1;
}
// Create a buffer and get the actual binary
unsigned char *binary;
binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
unsigned char *buffers[ 1 ] = { binary };
// Do another sanity check here first
size_t size;
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
test_error( error, "Unable to get expected size of binaries array" );
if( size != sizeof( buffers ) )
{
log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size );
free(binary);
return -1;
}
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary" );
// No way to verify the binary is correct, so just be good with that
free(binary);
return 0;
}
int test_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
/* To test this in a self-contained fashion, we have to create a program with
source, then get the binary, then use that binary to reload the program, and then verify */
int error;
clProgramWrapper program, program_from_binary;
size_t binarySize;
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
test_error( error, "Unable to create program from source" );
// Build so we have a binary to get
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build test program" );
// Get the size of the resulting binary (only one device)
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
test_error( error, "Unable to get binary size" );
// Sanity check
if( binarySize == 0 )
{
log_error( "ERROR: Binary size of program is zero\n" );
return -1;
}
// Create a buffer and get the actual binary
unsigned char *binary = (unsigned char*)malloc(binarySize);
const unsigned char *buffers[ 1 ] = { binary };
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary" );
cl_int loadErrors[ 1 ];
program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error );
test_error( error, "Unable to load valid program binary" );
test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" );
error = clBuildProgram( program_from_binary, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build binary program" );
// Get the size of the binary built from the first binary
size_t binary2Size;
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARY_SIZES, sizeof( binary2Size ), &binary2Size, NULL );
test_error( error, "Unable to get size for the binary program" );
// Now get the binary one more time and verify it loaded the right binary
unsigned char *binary2 = (unsigned char*)malloc(binary2Size);
buffers[ 0 ] = binary2;
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary second time" );
// Try again, this time without passing the status ptr in, to make sure we still
// get a valid binary
clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binary2Size, buffers, NULL, &error );
test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" );
error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build binary program created without binary_status" );
// Get the size of the binary created without passing binary_status
size_t binary3Size;
error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARY_SIZES, sizeof( binary3Size ), &binary3Size, NULL );
test_error( error, "Unable to get size for the binary program created without binary_status" );
// Now get the binary one more time
unsigned char *binary3 = (unsigned char*)malloc(binary3Size);
buffers[ 0 ] = binary3;
error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary from the program created without binary_status" );
// We no longer need these intermediate binaries
free(binary);
free(binary2);
free(binary3);
// Now execute them both to see that they both do the same thing.
clMemWrapper in, out, out_binary;
clKernelWrapper kernel, kernel_binary;
cl_int *out_data, *out_data_binary;
cl_float *in_data;
size_t size_to_run = 1000;
// Allocate some data
in_data = (cl_float*)malloc(sizeof(cl_float)*size_to_run);
out_data = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
out_data_binary = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
memset(out_data, 0, sizeof(cl_int)*size_to_run);
memset(out_data_binary, 0, sizeof(cl_int)*size_to_run);
for (size_t i=0; i<size_to_run; i++)
in_data[i] = (cl_float)i;
// Create the buffers
in = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*size_to_run, in_data, &error);
test_error( error, "clCreateBuffer failed");
out = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data, &error);
test_error( error, "clCreateBuffer failed");
out_binary = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data_binary, &error);
test_error( error, "clCreateBuffer failed");
// Create the kernels
kernel = clCreateKernel(program, "sample_test", &error);
test_error( error, "clCreateKernel failed");
kernel_binary = clCreateKernel(program_from_binary, "sample_test", &error);
test_error( error, "clCreateKernel from binary failed");
// Set the arguments
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_binary, 0, sizeof(in), &in);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_binary, 1, sizeof(out_binary), &out_binary);
test_error( error, "clSetKernelArg failed");
// Execute the kernels
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel failed");
error = clEnqueueNDRangeKernel(queue, kernel_binary, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel for binary kernel failed");
// Finish up
error = clFinish(queue);
test_error( error, "clFinish failed");
// Get the results back
error = clEnqueueReadBuffer(queue, out, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed");
error = clEnqueueReadBuffer(queue, out_binary, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data_binary, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed");
// Compare the results
if( memcmp( out_data, out_data_binary, sizeof(cl_int)*size_to_run ) != 0 )
{
log_error( "ERROR: Results from executing binary and regular kernel differ.\n" );
return -1;
}
// All done!
free(in_data);
free(out_data);
free(out_data_binary);
return 0;
}

View File

@@ -1,130 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#ifndef _WIN32
#include <unistd.h>
#endif
#include "harness/testHarness.h"
#include "harness/conversions.h"
extern cl_uint gRandomSeed;
int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
clContextWrapper context_to_test;
clCommandQueueWrapper queue_to_test;
size_t threads[1], localThreads[1];
cl_float inputData[10];
cl_int outputData[10];
int i;
RandomSeed seed( gRandomSeed );
const char *sample_single_test_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
cl_device_type type;
error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed\n");
cl_platform_id platform;
error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed\n");
cl_context_properties properties[3] = {
(cl_context_properties)CL_CONTEXT_PLATFORM,
(cl_context_properties)platform,
NULL
};
context_to_test = clCreateContextFromType(properties, type, notify_callback, NULL, &error);
test_error(error, "clCreateContextFromType failed");
if (context_to_test == NULL) {
log_error("clCreateContextFromType returned NULL, but error was CL_SUCCESS.");
return -1;
}
queue_to_test = clCreateCommandQueue(context_to_test, deviceID, NULL, &error);
test_error(error, "clCreateCommandQueue failed");
if (queue_to_test == NULL) {
log_error("clCreateCommandQueue returned NULL, but error was CL_SUCCESS.");
return -1;
}
/* Create a kernel to test with */
if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Write some test data */
memset( outputData, 0, sizeof( outputData ) );
for (i=0; i<10; i++)
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
test_error( error, "Unable to set testing kernel data" );
/* Test setting the arguments by index manually */
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue_to_test, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue_to_test, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
return 0;
}

View File

@@ -1,643 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "harness/testHarness.h"
const char *sample_single_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
size_t sample_single_kernel_lengths[1];
const char *sample_two_kernels[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n",
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n" };
size_t sample_two_kernel_lengths[2];
const char *sample_two_kernels_in_1[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n" };
size_t sample_two_kernels_in_1_lengths[1];
const char *repeate_test_kernel =
"__kernel void test_kernel(__global int *src, __global int *dst)\n"
"{\n"
" dst[get_global_id(0)] = src[get_global_id(0)]+1;\n"
"}\n";
int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
cl_program testProgram;
clKernelWrapper kernel;
cl_context testContext;
unsigned int numKernels;
cl_char testName[512];
cl_uint testArgCount;
size_t realSize;
/* Preprocess: calc the length of each source file line */
sample_single_kernel_lengths[ 0 ] = strlen( sample_single_kernel[ 0 ] );
/* Create a program */
program = clCreateProgramWithSource( context, 1, sample_single_kernel, sample_single_kernel_lengths, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create single kernel program" );
return -1;
}
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build single kernel program" );
error = clCreateKernelsInProgram(program, 1, &kernel, &numKernels);
test_error( error, "Unable to create single kernel program" );
/* Check program and context pointers */
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( cl_program ), &testProgram, &realSize );
test_error( error, "Unable to get kernel's program" );
if( (cl_program)testProgram != (cl_program)program )
{
log_error( "ERROR: Returned kernel's program does not match program used to create it! (Got %p, expected %p)\n", (cl_program)testProgram, (cl_program)program );
return -1;
}
if( realSize != sizeof( cl_program ) )
{
log_error( "ERROR: Returned size of kernel's program does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_program ), (int)realSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( cl_context ), &testContext, &realSize );
test_error( error, "Unable to get kernel's context" );
if( (cl_context)testContext != (cl_context)context )
{
log_error( "ERROR: Returned kernel's context does not match program used to create it! (Got %p, expected %p)\n", (cl_context)testContext, (cl_context)context );
return -1;
}
if( realSize != sizeof( cl_context ) )
{
log_error( "ERROR: Returned size of kernel's context does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_context ), (int)realSize );
return -1;
}
/* Test arg count */
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &realSize );
test_error( error, "Unable to get size of arg count info from kernel" );
if( realSize != sizeof( testArgCount ) )
{
log_error( "ERROR: size of arg count not valid! %d\n", (int)realSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
test_error( error, "Unable to get arg count from kernel" );
if( testArgCount != 2 )
{
log_error( "ERROR: Kernel arg count does not match!\n" );
return -1;
}
/* Test function name */
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, &realSize );
test_error( error, "Unable to get name from kernel" );
if( strcmp( (char *)testName, "sample_test" ) != 0 )
{
log_error( "ERROR: Kernel names do not match!\n" );
return -1;
}
if( realSize != strlen( (char *)testName ) + 1 )
{
log_error( "ERROR: Length of kernel name returned does not validate (expected %d, got %d)\n", (int)strlen( (char *)testName ) + 1, (int)realSize );
return -1;
}
/* All done */
return 0;
}
int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel[2];
unsigned int numKernels;
cl_char testName[ 512 ];
cl_uint testArgCount;
/* Preprocess: calc the length of each source file line */
sample_two_kernel_lengths[ 0 ] = strlen( sample_two_kernels[ 0 ] );
sample_two_kernel_lengths[ 1 ] = strlen( sample_two_kernels[ 1 ] );
/* Now create a test program */
program = clCreateProgramWithSource( context, 2, sample_two_kernels, sample_two_kernel_lengths, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create dual kernel program!" );
return -1;
}
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build dual kernel program" );
error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
test_error( error, "Unable to create dual kernel program" );
if( numKernels != 2 )
{
log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
return -1;
}
/* Check first kernel */
error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from kernel" );
int found_kernel1 = 0, found_kernel2 = 0;
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from second kernel" );
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
if (found_kernel1) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
if (found_kernel2) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
if( !found_kernel1 || !found_kernel2 )
{
log_error( "ERROR: Kernel names do not match.\n" );
if (!found_kernel1)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
if (!found_kernel2)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
return -1;
}
error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
test_error( error, "Unable to get arg count from kernel" );
if( testArgCount != 2 )
{
log_error( "ERROR: wrong # of args for kernel\n" );
return -1;
}
/* All done */
return 0;
}
int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel[2];
unsigned int numKernels;
cl_char testName[512];
cl_uint testArgCount;
/* Preprocess: calc the length of each source file line */
sample_two_kernels_in_1_lengths[ 0 ] = strlen( sample_two_kernels_in_1[ 0 ] );
/* Now create a test program */
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, sample_two_kernels_in_1_lengths, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create dual kernel program" );
return -1;
}
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build dual kernel program" );
error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
test_error( error, "Unable to create dual kernel program" );
if( numKernels != 2 )
{
log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
return -1;
}
/* Check first kernel */
error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from kernel" );
int found_kernel1 = 0, found_kernel2 = 0;
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
test_error( error, "Unable to get arg count from kernel" );
if( testArgCount != 2 )
{
log_error( "ERROR: wrong # of args for kernel\n" );
return -1;
}
/* Check second kernel */
error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from kernel" );
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
if (found_kernel1) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
if (found_kernel2) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
if( !found_kernel1 || !found_kernel2 )
{
log_error( "ERROR: Kernel names do not match.\n" );
if (!found_kernel1)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
if (!found_kernel2)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
return -1;
}
/* All done */
return 0;
}
int test_load_two_kernels_manually( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel1, kernel2;
int error;
/* Now create a test program */
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create dual kernel program" );
return -1;
}
/* Compile the program */
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build kernel program" );
/* Try manually creating kernels (backwards just in case) */
kernel1 = clCreateKernel( program, "sample_test2", &error );
if( kernel1 == NULL || error != CL_SUCCESS )
{
print_error( error, "Could not get kernel 1" );
return -1;
}
kernel2 = clCreateKernel( program, "sample_test", &error );
if( kernel2 == NULL )
{
print_error( error, "Could not get kernel 2" );
return -1;
}
return 0;
}
int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel1, kernel2;
int error;
size_t i;
/* Now create a test program */
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create dual kernel program" );
return -1;
}
/* Compile the program */
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build kernel program" );
/* Lookup the number of kernels in the program. */
size_t total_kernels = 0;
error = clGetProgramInfo(program, CL_PROGRAM_NUM_KERNELS, sizeof(size_t),&total_kernels,NULL);
test_error( error, "Unable to get program info num kernels");
if (total_kernels != 2)
{
print_error( error, "Program did not contain two kernels" );
return -1;
}
/* Lookup the kernel names. */
const char* actual_names[] = { "sample_test;sample_test2", "sample_test2;sample_test"} ;
size_t kernel_names_len = 0;
error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,0,NULL,&kernel_names_len);
test_error( error, "Unable to get length of kernel names list." );
if (kernel_names_len != (strlen(actual_names[0])+1))
{
print_error( error, "Kernel names length did not match");
return -1;
}
const size_t len = (kernel_names_len+1)*sizeof(char);
char* kernel_names = (char*)malloc(len);
error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,len,kernel_names,&kernel_names_len);
test_error( error, "Unable to get kernel names list." );
/* Check to see if the kernel name array is null terminated. */
if (kernel_names[kernel_names_len-1] != '\0')
{
free(kernel_names);
print_error( error, "Kernel name list was not null terminated");
return -1;
}
/* Check to see if the correct kernel name string was returned. */
for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
if( 0 == strcmp(actual_names[i],kernel_names) )
break;
if (i == sizeof( actual_names ) / sizeof( actual_names[0] ) )
{
free(kernel_names);
log_error( "Kernel names \"%s\" did not match:\n", kernel_names );
for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
log_error( "\t\t\"%s\"\n", actual_names[0] );
return -1;
}
free(kernel_names);
/* Try manually creating kernels (backwards just in case) */
kernel1 = clCreateKernel( program, "sample_test", &error );
if( kernel1 == NULL || error != CL_SUCCESS )
{
print_error( error, "Could not get kernel 1" );
return -1;
}
kernel2 = clCreateKernel( program, "sample_test2", &error );
if( kernel2 == NULL )
{
print_error( error, "Could not get kernel 2" );
return -1;
}
return 0;
}
static const char *single_task_kernel[] = {
"__kernel void sample_test(__global int *dst, int count)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" for( int i = 0; i < count; i++ )\n"
" dst[i] = tid + i;\n"
"\n"
"}\n" };
int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper output;
cl_int count;
if( create_single_kernel_helper( context, &program, &kernel, 1, single_task_kernel, "sample_test" ) )
return -1;
// Create args
count = 100;
output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error );
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( cl_int ), &count );
test_error( error, "Unable to set kernel argument" );
// Run task
error = clEnqueueTask( queue, kernel, 0, NULL, NULL );
test_error( error, "Unable to run task" );
// Read results
cl_int *results = (cl_int*)malloc(sizeof(cl_int)*count);
error = clEnqueueReadBuffer( queue, output, CL_TRUE, 0, sizeof( cl_int ) * count, results, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Validate
for( cl_int i = 0; i < count; i++ )
{
if( results[ i ] != i )
{
log_error( "ERROR: Task result value %d did not validate! Expected %d, got %d\n", (int)i, (int)i, (int)results[ i ] );
free(results);
return -1;
}
}
/* All done */
free(results);
return 0;
}
#define TEST_SIZE 1000
int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_context local_context;
cl_command_queue local_queue;
cl_program local_program;
cl_kernel local_kernel;
cl_mem local_mem_in, local_mem_out;
cl_event local_event;
size_t global_dim[3];
int i, j, error;
global_dim[0] = TEST_SIZE;
global_dim[1] = 1; global_dim[2] = 1;
cl_int *inData, *outData;
cl_int status;
inData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
outData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
for (i=0; i<TEST_SIZE; i++) {
inData[i] = i;
}
for (i=0; i<100; i++) {
memset(outData, 0, sizeof(cl_int)*TEST_SIZE);
local_context = clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &error);
test_error( error, "clCreateContext failed");
local_queue = clCreateCommandQueue(local_context, deviceID, 0, &error);
test_error( error, "clCreateCommandQueue failed");
local_program = clCreateProgramWithSource(local_context, 1, &repeate_test_kernel, NULL, &error);
test_error( error, "clCreateProgramWithSource failed");
error = clBuildProgram(local_program, 0, NULL, NULL, NULL, NULL);
test_error( error, "clBuildProgram failed");
local_kernel = clCreateKernel(local_program, "test_kernel", &error);
test_error( error, "clCreateKernel failed");
local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
test_error( error, "clCreateBuffer failed");
local_mem_out = clCreateBuffer(local_context, CL_MEM_WRITE_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
test_error( error, "clCreateBuffer failed");
error = clEnqueueWriteBuffer(local_queue, local_mem_in, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), inData, 0, NULL, NULL);
test_error( error, "clEnqueueWriteBuffer failed");
error = clEnqueueWriteBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
test_error( error, "clEnqueueWriteBuffer failed");
error = clSetKernelArg(local_kernel, 0, sizeof(local_mem_in), &local_mem_in);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(local_kernel, 1, sizeof(local_mem_out), &local_mem_out);
test_error( error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(local_queue, local_kernel, 1, NULL, global_dim, NULL, 0, NULL, &local_event);
test_error( error, "clEnqueueNDRangeKernel failed");
error = clWaitForEvents(1, &local_event);
test_error( error, "clWaitForEvents failed");
error = clGetEventInfo(local_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL);
test_error( error, "clGetEventInfo failed");
if (status != CL_COMPLETE) {
log_error( "Kernel execution not complete: status %d.\n", status);
free(inData);
free(outData);
return -1;
}
error = clEnqueueReadBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed");
clReleaseEvent(local_event);
clReleaseMemObject(local_mem_in);
clReleaseMemObject(local_mem_out);
clReleaseKernel(local_kernel);
clReleaseProgram(local_program);
clReleaseCommandQueue(local_queue);
clReleaseContext(local_context);
for (j=0; j<TEST_SIZE; j++) {
if (outData[j] != inData[j] + 1) {
log_error("Results failed to validate at iteration %d. %d != %d.\n", i, outData[j], inData[j] + 1);
free(inData);
free(outData);
return -1;
}
}
}
free(inData);
free(outData);
return 0;
}

View File

@@ -1,704 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "harness/typeWrappers.h"
#include "harness/conversions.h"
extern cl_uint gRandomSeed;
const char *sample_single_test_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
const char *sample_struct_test_kernel[] = {
"typedef struct {\n"
"__global int *A;\n"
"__global int *B;\n"
"} input_pair_t;\n"
"\n"
"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src->A[tid] + src->B[tid];\n"
"\n"
"}\n" };
const char *sample_struct_array_test_kernel[] = {
"typedef struct {\n"
"int A;\n"
"int B;\n"
"} input_pair_t;\n"
"\n"
"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid].A + src[tid].B;\n"
"\n"
"}\n" };
const char *sample_const_test_kernel[] = {
"__kernel void sample_test(__constant int *src1, __constant int *src2, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src1[tid] + src2[tid];\n"
"\n"
"}\n" };
const char *sample_const_global_test_kernel[] = {
"__constant int addFactor = 1024;\n"
"__kernel void sample_test(__global int *src1, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src1[tid] + addFactor;\n"
"\n"
"}\n" };
const char *sample_two_kernel_program[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n",
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n" };
int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program, testProgram;
cl_context testContext;
cl_kernel kernel;
cl_char name[ 512 ];
cl_uint numArgs, numInstances;
size_t paramSize;
/* Create reference */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, NULL, 0, &paramSize );
test_error( error, "Unable to get kernel function name param size" );
if( paramSize != strlen( "sample_test" ) + 1 )
{
log_error( "ERROR: Kernel function name param returns invalid size (expected %d, got %d)\n", (int)strlen( "sample_test" ) + 1, (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( name ), name, NULL );
test_error( error, "Unable to get kernel function name" );
if( strcmp( (char *)name, "sample_test" ) != 0 )
{
log_error( "ERROR: Kernel function name returned invalid value (expected sample_test, got %s)\n", (char *)name );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &paramSize );
test_error( error, "Unable to get kernel arg count param size" );
if( paramSize != sizeof( numArgs ) )
{
log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numArgs ), (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
test_error( error, "Unable to get kernel arg count" );
if( numArgs != 2 )
{
log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d)\n", 2, numArgs );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, 0, NULL, &paramSize );
test_error( error, "Unable to get kernel reference count param size" );
if( paramSize != sizeof( numInstances ) )
{
log_error( "ERROR: Kernel reference count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numInstances ), (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL );
test_error( error, "Unable to get kernel reference count" );
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, NULL, 0, &paramSize );
test_error( error, "Unable to get kernel program param size" );
if( paramSize != sizeof( testProgram ) )
{
log_error( "ERROR: Kernel program param returns invalid size (expected %d, got %d)\n", (int)sizeof( testProgram ), (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( testProgram ), &testProgram, NULL );
test_error( error, "Unable to get kernel program" );
if( testProgram != program )
{
log_error( "ERROR: Kernel program returned invalid value (expected %p, got %p)\n", program, testProgram );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( testContext ), &testContext, NULL );
test_error( error, "Unable to get kernel context" );
if( testContext != context )
{
log_error( "ERROR: Kernel context returned invalid value (expected %p, got %p)\n", context, testContext );
return -1;
}
/* Release memory */
clReleaseKernel( kernel );
clReleaseProgram( program );
return 0;
}
int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_float inputData[100];
cl_int outputData[100];
RandomSeed seed( gRandomSeed );
int i;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 100, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 100, NULL, &error);
test_error( error, "Creating test array failed" );
/* Write some test data */
memset( outputData, 0, sizeof( outputData ) );
for (i=0; i<100; i++)
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*100, (void *)inputData, 0, NULL, NULL);
test_error( error, "Unable to set testing kernel data" );
/* Set the arguments */
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set kernel arguments" );
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
test_error( error, "Unable to set kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)100;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
/* Try again */
if( localThreads[0] > 1 )
localThreads[0] /= 2;
while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
localThreads[0]--;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
/* And again */
if( localThreads[0] > 1 )
localThreads[0] /= 2;
while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
localThreads[0]--;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
/* One more time */
localThreads[0] = (unsigned int)1;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
return 0;
}
int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_float inputData[10];
cl_int outputData[10];
RandomSeed seed( gRandomSeed );
int i;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Write some test data */
memset( outputData, 0, sizeof( outputData ) );
for (i=0; i<10; i++)
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
test_error( error, "Unable to set testing kernel data" );
/* Test setting the arguments by index manually */
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
return 0;
}
int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_kernel kernel;
void *args[2];
cl_mem outStream;
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
cl_int randomTestDataA[10], randomTestDataB[10];
MTdata d;
struct img_pair_t
{
cl_mem streamA;
cl_mem streamB;
} image_pair;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
randomTestDataA[i] = (cl_int)genrand_int32(d);
randomTestDataB[i] = (cl_int)genrand_int32(d);
}
free_mtdata(d); d = NULL;
image_pair.streamA = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
test_error( error, "Creating test array failed" );
image_pair.streamB = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
test_error( error, "Creating test array failed" );
outStream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
args[0] = &image_pair;
args[1] = outStream;
error = clSetKernelArg(kernel, 0, sizeof( image_pair ), &image_pair);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), &args[1]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
{
log_error( "ERROR: Data did not verify!\n" );
return -1;
}
}
clReleaseMemObject( image_pair.streamA );
clReleaseMemObject( image_pair.streamB );
clReleaseMemObject( outStream );
clReleaseKernel( kernel );
clReleaseProgram( program );
return 0;
}
int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[3];
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
cl_int randomTestDataA[10], randomTestDataB[10];
cl_ulong maxSize;
MTdata d;
/* Verify our test buffer won't be bigger than allowed */
error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
test_error( error, "Unable to get max constant buffer size" );
if( maxSize < sizeof( cl_int ) * 10 )
{
log_error( "ERROR: Unable to test constant argument to kernel: max size of constant buffer is reported as %d!\n", (int)maxSize );
return -1;
}
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffffff; /* Make sure values are positive, just so we don't have to */
randomTestDataB[i] = (cl_int)genrand_int32(d) & 0xffffff; /* deal with overflow on the verification */
}
free_mtdata(d); d = NULL;
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
test_error( error, "Creating test array failed" );
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
{
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + %d (%d)\n", i, outputData[i], randomTestDataA[i], randomTestDataB[i], ( randomTestDataA[i] + randomTestDataB[i] ) );
return -1;
}
}
return 0;
}
int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
MTdata d;
typedef struct img_pair_type
{
int A;
int B;
} image_pair_t;
image_pair_t image_pair[ 10 ];
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_array_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
image_pair[i].A = (cl_int)genrand_int32(d);
image_pair[i].A = (cl_int)genrand_int32(d);
}
free_mtdata(d); d = NULL;
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(image_pair_t) * 10, (void *)image_pair, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != image_pair[i].A + image_pair[i].B)
{
log_error( "ERROR: Data did not verify!\n" );
return -1;
}
}
return 0;
}
int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_kernel kernel[3];
unsigned int kernelCount;
/* Create a test program */
program = clCreateProgramWithSource( context, 2, sample_two_kernel_program, NULL, &error);
if( program == NULL || error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create test program!\n" );
return -1;
}
/* Build */
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build test program" );
/* Try getting the kernel count */
error = clCreateKernelsInProgram( program, 0, NULL, &kernelCount );
test_error( error, "Unable to get kernel count for built program" );
if( kernelCount != 2 )
{
log_error( "ERROR: Returned kernel count from clCreateKernelsInProgram is incorrect! (got %d, expected 2)\n", kernelCount );
return -1;
}
/* Try actually getting the kernels */
error = clCreateKernelsInProgram( program, 2, kernel, NULL );
test_error( error, "Unable to get kernels for built program" );
clReleaseKernel( kernel[0] );
clReleaseKernel( kernel[1] );
clReleaseProgram( program );
return 0;
}
int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
cl_int randomTestDataA[10];
MTdata d;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_global_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffff; /* Make sure values are positive and small, just so we don't have to */
}
free_mtdata(d); d = NULL;
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != randomTestDataA[i] + 1024)
{
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + 1024 (%d)\n", i, outputData[i], randomTestDataA[i], ( randomTestDataA[i] + 1024 ) );
return -1;
}
}
return 0;
}

View File

@@ -1,162 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <stdio.h>
#if defined(__APPLE__)
#include <OpenCL/opencl.h>
#include <OpenCL/cl_platform.h>
#else
#include <CL/opencl.h>
#include <CL/cl_platform.h>
#endif
#include "procs.h"
enum { SUCCESS, FAILURE };
typedef enum { NON_NULL_PATH, ADDROF_NULL_PATH, NULL_PATH } test_type;
#define NITEMS 4096
/* places the casted long value of the src ptr into each element of the output
* array, to allow testing that the kernel actually _gets_ the NULL value */
const char *kernel_string =
"kernel void test_kernel(global float *src, global long *dst)\n"
"{\n"
" uint tid = get_global_id(0);\n"
" dst[tid] = (long)src;\n"
"}\n";
/*
* The guts of the test:
* call setKernelArgs with a regular buffer, &NULL, or NULL depending on
* the value of 'test_type'
*/
static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel,
cl_mem test_buf, cl_mem result_buf, test_type type)
{
unsigned int test_success = 0;
unsigned int i;
cl_int status;
char *typestr;
if (type == NON_NULL_PATH) {
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
typestr = "non-NULL";
} else if (type == ADDROF_NULL_PATH) {
test_buf = NULL;
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
typestr = "&NULL";
} else if (type == NULL_PATH) {
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
typestr = "NULL";
}
log_info("Testing setKernelArgs with %s buffer.\n", typestr);
if (status != CL_SUCCESS) {
log_error("clSetKernelArg failed with status: %d\n", status);
return FAILURE; // no point in continuing *this* test
}
size_t global = NITEMS;
status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global,
NULL, 0, NULL, NULL);
test_error(status, "NDRangeKernel failed.");
cl_long* host_result = (cl_long*)malloc(NITEMS*sizeof(cl_long));
status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
sizeof(cl_long)*NITEMS, host_result, 0, NULL, NULL);
test_error(status, "ReadBuffer failed.");
// in the non-null case, we expect NONZERO values:
if (type == NON_NULL_PATH) {
for (i=0; i<NITEMS; i++) {
if (host_result[i] == 0) {
log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
test_success = FAILURE; break;
}
}
} else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
for (i=0; i<NITEMS; i++) {
if (host_result[i] != 0) {
log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
test_success = FAILURE; break;
}
}
}
free(host_result);
if (test_success == SUCCESS) {
log_info("\t%s ok.\n", typestr);
}
return test_success;
}
int test_null_buffer_arg(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
unsigned int test_success = 0;
unsigned int i;
cl_int status;
cl_program program;
cl_kernel kernel;
// prep kernel:
program = clCreateProgramWithSource(context, 1, &kernel_string, NULL, &status);
test_error(status, "CreateProgramWithSource failed.");
status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
test_error(status, "BuildProgram failed.");
kernel = clCreateKernel(program, "test_kernel", &status);
test_error(status, "CreateKernel failed.");
cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
NULL, NULL);
cl_mem dev_dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, NITEMS*sizeof(cl_long),
NULL, NULL);
// set the destination buffer normally:
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_dst);
test_error(status, "SetKernelArg failed.");
//
// we test three cases:
//
// - typical case, used everyday: non-null buffer
// - the case of src as &NULL (the spec-compliance test)
// - the case of src as NULL (the backwards-compatibility test, Apple only)
//
test_success = test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NON_NULL_PATH);
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, ADDROF_NULL_PATH);
#ifdef __APPLE__
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NULL_PATH);
#endif
// clean up:
if (dev_src) clReleaseMemObject(dev_src);
clReleaseMemObject(dev_dst);
clReleaseKernel(kernel);
clReleaseProgram(program);
return test_success;
}

View File

@@ -1,234 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif // !_WIN32
// Note: According to spec, the various functions to get instance counts should return an error when passed in an object
// that has already been released. However, the spec is out of date. If it gets re-updated to allow such action, re-enable
// this define.
//#define VERIFY_AFTER_RELEASE 1
#define GET_QUEUE_INSTANCE_COUNT(p) numInstances = ( (err = clGetCommandQueueInfo(p, CL_QUEUE_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
#define GET_MEM_INSTANCE_COUNT(p) numInstances = ( (err = clGetMemObjectInfo(p, CL_MEM_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
#define VERIFY_INSTANCE_COUNT(c,rightValue) if( c != rightValue ) { \
log_error( "ERROR: Instance count for test object is not valid! (should be %d, really is %d)\n", rightValue, c ); \
return -1; }
int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
{
cl_command_queue queue;
cl_uint numInstances;
int err;
/* Create a test queue */
queue = clCreateCommandQueue( context, deviceID, 0, &err );
test_error( err, "Unable to create command queue to test with" );
/* Test the instance count */
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* Now release the program */
clReleaseCommandQueue( queue );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_QUEUE_INSTANCE_COUNT( queue );
if( err != CL_INVALID_COMMAND_QUEUE )
{
print_error( err, "Command queue was not properly released" );
return -1;
}
#endif
return 0;
}
int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
{
cl_command_queue queue;
unsigned int numInstances, i;
int err;
/* Create a test program */
queue = clCreateCommandQueue( context, deviceID, 0, &err );
test_error( err, "Unable to create command queue to test with" );
/* Increment 9 times, which should bring the count to 10 */
for( i = 0; i < 9; i++ )
{
clRetainCommandQueue( queue );
}
/* Test the instance count */
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 10 );
/* Now release 5 times, which should take us to 5 */
for( i = 0; i < 5; i++ )
{
clReleaseCommandQueue( queue );
}
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 5 );
/* Retain again three times, which should take us to 8 */
for( i = 0; i < 3; i++ )
{
clRetainCommandQueue( queue );
}
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 8 );
/* Release 7 times, which should take it to 1 */
for( i = 0; i < 7; i++ )
{
clReleaseCommandQueue( queue );
}
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* And one last one */
clReleaseCommandQueue( queue );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_QUEUE_INSTANCE_COUNT( queue );
if( err != CL_INVALID_COMMAND_QUEUE )
{
print_error( err, "Command queue was not properly released" );
return -1;
}
#endif
return 0;
}
int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem object;
cl_uint numInstances;
int err;
/* Create a test object */
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
test_error( err, "Unable to create buffer to test with" );
/* Test the instance count */
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* Now release the program */
clReleaseMemObject( object );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_MEM_INSTANCE_COUNT( object );
if( err != CL_INVALID_MEM_OBJECT )
{
print_error( err, "Mem object was not properly released" );
return -1;
}
#endif
return 0;
}
int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem object;
unsigned int numInstances, i;
int err;
/* Create a test object */
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
test_error( err, "Unable to create buffer to test with" );
/* Increment 9 times, which should bring the count to 10 */
for( i = 0; i < 9; i++ )
{
clRetainMemObject( object );
}
/* Test the instance count */
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 10 );
/* Now release 5 times, which should take us to 5 */
for( i = 0; i < 5; i++ )
{
clReleaseMemObject( object );
}
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 5 );
/* Retain again three times, which should take us to 8 */
for( i = 0; i < 3; i++ )
{
clRetainMemObject( object );
}
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 8 );
/* Release 7 times, which should take it to 1 */
for( i = 0; i < 7; i++ )
{
clReleaseMemObject( object );
}
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* And one last one */
clReleaseMemObject( object );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_MEM_INSTANCE_COUNT( object );
if( err != CL_INVALID_MEM_OBJECT )
{
print_error( err, "Mem object was not properly released" );
return -1;
}
#endif
return 0;
}

View File

@@ -1,109 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include "harness/compat.h"
int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_program program;
cl_kernel kernel;
int error;
const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
/* Create a test program */
program = clCreateProgramWithSource( context, 1, testProgram, NULL, &error);
test_error( error, "Unable to create program to test with" );
/* Compile the program */
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build sample program to test with" );
/* And create a kernel from it */
kernel = clCreateKernel( program, "sample_test", &error );
test_error( error, "Unable to create kernel" );
/* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
clReleaseProgram( program );
clReleaseKernel( kernel );
/* If we got here fine, we succeeded. If not, well, we won't be able to return an error :) */
return 0;
}
const char *sample_delay_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" for( int i = 0; i < 1000000; i++ ); \n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
int test_release_during_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_kernel kernel;
cl_mem streams[2];
size_t threads[1] = { 10 }, localThreadSize;
/* We now need an event to test. So we'll execute a kernel to get one */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_delay_kernel, "sample_test" ) )
{
return -1;
}
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[ 0 ]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[ 1 ]);
test_error( error, "Unable to set indexed kernel arguments" );
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreadSize );
test_error( error, "Unable to calc local thread size" );
/* Execute the kernel */
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &localThreadSize, 0, NULL, NULL );
test_error( error, "Unable to execute test kernel" );
/* The kernel should still be executing, but we should still be able to release it. It's not terribly
useful, but we should be able to do it, if the internal refcounting is indeed correct. */
clReleaseMemObject( streams[ 1 ] );
clReleaseMemObject( streams[ 0 ] );
clReleaseKernel( kernel );
clReleaseProgram( program );
/* Now make sure we're really finished before we go on. */
error = clFinish(queue);
test_error( error, "Unable to finish context.");
return 0;
}

View File

@@ -2,47 +2,20 @@ set(MODULE_NAME COMPATIBILITY_BASIC)
set(${MODULE_NAME}_SOURCES
main.c
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
test_hiloeo.c test_local.c test_pointercast.c
test_if.c test_loop.c
test_readimage.c test_readimage_int16.c test_readimage_fp32.c
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
test_writeimage.c
test_multireadimageonefmt.c test_multireadimagemultifmt.c
test_imagedim.c
test_vloadstore.c
test_int2float.c test_float2int.c
test_createkernelsinprogram.c
test_hostptr.c
test_explicit_s2v.cpp
test_constant.c
test_image_multipass.c
test_imagereadwrite.c test_imagereadwrite3d.c
test_image_param.c
test_imagenpot.c
test_image_r8.c
test_barrier.c
test_basic_parameter_types.c
test_arrayreadwrite.c
test_imagearraycopy3d.c
test_imagecopy.c
test_imagerandomcopy.c
test_arrayimagecopy.c
test_arrayimagecopy3d.c
test_imagecopy3d.c
test_enqueue_map.cpp
test_work_item_functions.cpp
test_astype.cpp
test_async_copy.cpp
test_sizeof.c
test_vec_type_hint.c
test_constant_source.cpp
test_bufferreadwriterect.c
test_async_strided_copy.cpp
test_kernel_memory_alignment.cpp
test_local_kernel_scope.cpp
)
set(${MODULE_NAME}_LIBS harness-compat)

View File

@@ -31,99 +31,33 @@ cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
bool gTestRounding = false;
test_definition test_list[] = {
ADD_TEST( hostptr ),
ADD_TEST( fpmath_float ),
ADD_TEST( fpmath_float2 ),
ADD_TEST( fpmath_float4 ),
ADD_TEST( intmath_int ),
ADD_TEST( intmath_int2 ),
ADD_TEST( intmath_int4 ),
ADD_TEST( intmath_long ),
ADD_TEST( intmath_long2 ),
ADD_TEST( intmath_long4 ),
ADD_TEST( hiloeo ),
ADD_TEST( if ),
ADD_TEST( sizeof ),
ADD_TEST( loop ),
ADD_TEST( pointer_cast ),
ADD_TEST( local_arg_def ),
ADD_TEST( local_kernel_def ),
ADD_TEST( local_kernel_scope ),
ADD_TEST( constant ),
ADD_TEST( constant_source ),
ADD_TEST( readimage ),
ADD_TEST( readimage_int16 ),
ADD_TEST( readimage_fp32 ),
ADD_TEST( writeimage ),
ADD_TEST( writeimage_int16 ),
ADD_TEST( writeimage_fp32 ),
ADD_TEST( mri_one ),
ADD_TEST( mri_multiple ),
ADD_TEST( image_r8 ),
ADD_TEST( barrier ),
ADD_TEST( int2float ),
ADD_TEST( float2int ),
ADD_TEST( imagereadwrite ),
ADD_TEST( imagereadwrite3d ),
ADD_TEST( readimage3d ),
ADD_TEST( readimage3d_int16 ),
ADD_TEST( readimage3d_fp32 ),
ADD_TEST( bufferreadwriterect ),
ADD_TEST( arrayreadwrite ),
ADD_TEST( imagearraycopy3d ),
ADD_TEST( imagecopy ),
ADD_TEST( imagecopy3d ),
ADD_TEST( imagerandomcopy ),
ADD_TEST( arrayimagecopy ),
ADD_TEST( arrayimagecopy3d ),
ADD_TEST( imagenpot ),
ADD_TEST( vload_global ),
ADD_TEST( vload_local ),
ADD_TEST( vload_constant ),
ADD_TEST( vload_private ),
ADD_TEST( vstore_global ),
ADD_TEST( vstore_local ),
ADD_TEST( vstore_private ),
ADD_TEST( createkernelsinprogram ),
ADD_TEST( imagedim_pow2 ),
ADD_TEST( imagedim_non_pow2 ),
ADD_TEST( image_param ),
ADD_TEST( image_multipass_integer_coord ),
ADD_TEST( image_multipass_float_coord ),
ADD_TEST( explicit_s2v_bool ),
ADD_TEST( explicit_s2v_char ),
ADD_TEST( explicit_s2v_uchar ),
ADD_TEST( explicit_s2v_short ),
ADD_TEST( explicit_s2v_ushort ),
ADD_TEST( explicit_s2v_int ),
ADD_TEST( explicit_s2v_uint ),
ADD_TEST( explicit_s2v_long ),
ADD_TEST( explicit_s2v_ulong ),
ADD_TEST( explicit_s2v_float ),
ADD_TEST( explicit_s2v_double ),
ADD_TEST( enqueue_map_buffer ),
ADD_TEST( enqueue_map_image ),
ADD_TEST( work_item_functions ),
ADD_TEST( astype ),
ADD_TEST( async_copy_global_to_local ),
ADD_TEST( async_copy_local_to_global ),
ADD_TEST( async_strided_copy_global_to_local ),
ADD_TEST( async_strided_copy_local_to_global ),
ADD_TEST( prefetch ),
ADD_TEST( parameter_types ),
ADD_TEST( vec_type_hint ),
ADD_TEST( kernel_memory_alignment_local ),
ADD_TEST( kernel_memory_alignment_global ),
ADD_TEST( kernel_memory_alignment_constant ),
ADD_TEST( kernel_memory_alignment_private ),
};
const int test_num = ARRAY_SIZE( test_list );

View File

@@ -1,143 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int test_arrayimagecopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
{
cl_uchar *bufptr, *imgptr;
clMemWrapper buffer, image;
int img_width = 512;
int img_height = 512;
size_t elem_size;
size_t buffer_size;
int i;
cl_int err;
MTdata d;
cl_event copyevent;
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
test_error(err, "clGetImageInfo failed");
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
test_error(err, "clCreateBuffer failed");
bufptr = (cl_uchar*)malloc(buffer_size);
d = init_genrand( gRandomSeed );
bufptr = (cl_uchar*)malloc(buffer_size);
for (i=0; i<(int)buffer_size; i++) {
bufptr[i] = (cl_uchar)genrand_int32(d);
}
free_mtdata(d); d = NULL;
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
test_error(err, "clEnqueueCopyImageToBuffer failed");
imgptr = (cl_uchar*)malloc(buffer_size);
err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
test_error(err, "clEnqueueReadBuffer failed");
if (memcmp(bufptr, imgptr, buffer_size) != 0) {
log_error( "ERROR: Results did not validate!\n" );
unsigned char * inchar = (unsigned char*)bufptr;
unsigned char * outchar = (unsigned char*)imgptr;
int failuresPrinted = 0;
int i;
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
int failed = 0;
int j;
for (j=0; j<(int)elem_size; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
int j;
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
sprintf(values + strlen(values), "] != expected [");
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
err = -1;
}
free(bufptr);
free(imgptr);
if (err)
log_error("ARRAY to IMAGE copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
return err;
}
int test_arrayimagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int err;
cl_image_format *formats;
cl_uint num_formats;
cl_uint i;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
test_error(err, "clGetSupportedImageFormats failed");
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
test_error(err, "clGetSupportedImageFormats failed");
for (i = 0; i < num_formats; i++) {
err |= test_arrayimagecopy_single_format(device, context, queue, &formats[i]);
}
if (err)
log_error("ARRAY to IMAGE copy test failed\n");
else
log_info("ARRAY to IMAGE copy test passed\n");
return err;
}

View File

@@ -1,144 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int test_arrayimagecopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
{
cl_uchar *bufptr, *imgptr;
clMemWrapper buffer, image;
int img_width = 128;
int img_height = 128;
int img_depth = 32;
size_t elem_size;
size_t buffer_size;
int i;
cl_int err;
MTdata d;
cl_event copyevent;
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
test_error(err, "clGetImageInfo failed");
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
test_error(err, "clCreateBuffer failed");
bufptr = (cl_uchar*)malloc(buffer_size);
d = init_genrand( gRandomSeed );
bufptr = (cl_uchar*)malloc(buffer_size);
for (i=0; i<(int)buffer_size; i++) {
bufptr[i] = (cl_uchar)genrand_int32(d);
}
free_mtdata(d); d = NULL;
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
test_error(err, "clEnqueueCopyImageToBuffer failed");
imgptr = (cl_uchar*)malloc(buffer_size);
err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
test_error(err, "clEnqueueReadBuffer failed");
if (memcmp(bufptr, imgptr, buffer_size) != 0) {
log_error( "ERROR: Results did not validate!\n" );
unsigned char * inchar = (unsigned char*)bufptr;
unsigned char * outchar = (unsigned char*)imgptr;
int failuresPrinted = 0;
int i;
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
int failed = 0;
int j;
for (j=0; j<(int)elem_size; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
int j;
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
sprintf(values + strlen(values), "] != expected [");
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
err = -1;
}
free(bufptr);
free(imgptr);
if (err)
log_error("ARRAY to IMAGE3D copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
return err;
}
int test_arrayimagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int err;
cl_image_format *formats;
cl_uint num_formats;
cl_uint i;
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
test_error(err, "clGetSupportedImageFormats failed");
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
test_error(err, "clGetSupportedImageFormats failed");
for (i = 0; i < num_formats; i++) {
err |= test_arrayimagecopy3d_single_format(device, context, queue, &formats[i]);
}
if (err)
log_error("ARRAY to IMAGE3D copy test failed\n");
else
log_info("ARRAY to IMAGE3D copy test passed\n");
return err;
}

View File

@@ -1,94 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int
test_arrayreadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_uint *inptr, *outptr;
cl_mem streams[1];
int num_tries = 400;
num_elements = 1024 * 1024 * 4;
int i, j, err;
MTdata d;
inptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
outptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
// randomize data
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed");
for (i=0; i<num_tries; i++)
{
int offset;
int cb;
do {
offset = (int)(genrand_int32(d) & 0x7FFFFFFF);
if (offset > 0 && offset < num_elements)
break;
} while (1);
cb = (int)(genrand_int32(d) & 0x7FFFFFFF);
if (cb > (num_elements - offset))
cb = num_elements - offset;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), sizeof(cl_uint)*cb,&inptr[offset], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), cb*sizeof(cl_uint), &outptr[offset], 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (j=offset; j<offset+cb; j++)
{
if (inptr[j] != outptr[j])
{
log_error("ARRAY read, write test failed\n");
err = -1;
break;
}
}
if (err)
break;
}
free_mtdata(d);
clReleaseMemObject(streams[0]);
free(inptr);
free(outptr);
if (!err)
log_info("ARRAY read, write test passed\n");
return err;
}

View File

@@ -1,289 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
static const char *astype_kernel_pattern =
"%s\n"
"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( src[ tid ] );\n"
" dst[ tid ] = tmp;\n"
"}\n";
static const char *astype_kernel_pattern_V3srcV3dst =
"%s\n"
"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
" vstore3(tmp,tid,dst);\n"
"}\n";
// in the printf, remove the third and fifth argument, each of which
// should be a "3", when copying from the printf for astype_kernel_pattern
static const char *astype_kernel_pattern_V3dst =
"%s\n"
"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s3 tmp = as_%s3( src[ tid ] );\n"
" vstore3(tmp,tid,dst);\n"
"}\n";
// in the printf, remove the fifth argument, which
// should be a "3", when copying from the printf for astype_kernel_pattern
static const char *astype_kernel_pattern_V3src =
"%s\n"
"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
" dst[ tid ] = tmp;\n"
"}\n";
// in the printf, remove the third argument, which
// should be a "3", when copying from the printf for astype_kernel_pattern
int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
unsigned int vecSize, unsigned int outVecSize,
int numElements )
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
char programSrc[ 10240 ];
size_t threads[ 1 ], localThreads[ 1 ];
size_t typeSize = get_explicit_type_size( inVecType );
size_t outTypeSize = get_explicit_type_size(outVecType);
char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
MTdata d;
// Create program
if(outVecSize == 3 && vecSize == 3) {
// astype_kernel_pattern_V3srcV3dst
sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
} else if(outVecSize == 3) {
// astype_kernel_pattern_V3dst
sprintf( programSrc, astype_kernel_pattern_V3dst,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
get_explicit_type_name( outVecType ),
get_explicit_type_name( outVecType ),
get_explicit_type_name( outVecType ));
} else if(vecSize == 3) {
// astype_kernel_pattern_V3src
sprintf( programSrc, astype_kernel_pattern_V3src,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
} else {
sprintf( programSrc, astype_kernel_pattern,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
}
const char *ptr = programSrc;
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
// Create some input values
size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
char *inBuffer = (char*)malloc( inBufferSize );
size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
char *outBuffer = (char*)malloc( outBufferSize );
d = init_genrand( gRandomSeed );
generate_random_data( inVecType, numElements * vecSize,
d, inBuffer );
free_mtdata(d); d = NULL;
// Create I/O streams and set arguments
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
test_error( error, "Unable to create I/O stream" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
test_error( error, "Unable to create I/O stream" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel argument" );
// Run the kernel
threads[ 0 ] = numElements;
error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
test_error( error, "Unable to get group size to run with" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
// Get the results and compare
// The beauty is that astype is supposed to return the bit pattern as a different type, which means
// the output should have the exact same bit pattern as the input. No interpretation necessary!
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
char *expected = inBuffer;
char *actual = outBuffer;
size_t compSize = typeSize*vecSize;
if(outTypeSize*outVecSize < compSize) {
compSize = outTypeSize*outVecSize;
}
if(outVecSize == 4 && vecSize == 3)
{
// as_type4(vec3) should compile but produce undefined results??
free(inBuffer);
free(outBuffer);
return 0;
}
if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize)
{
// as_typen(vecm) should compile and run but produce
// implementation-defined results for m != n
// and n*sizeof(type) = sizeof(vecm)
free(inBuffer);
free(outBuffer);
return 0;
}
for( int i = 0; i < numElements; i++ )
{
if( memcmp( expected, actual, compSize ) != 0 )
{
char expectedString[ 1024 ], actualString[ 1024 ];
log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n",
(int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize,
GetDataVectorString( expected, typeSize, vecSize, expectedString ),
GetDataVectorString( actual, typeSize, vecSize, actualString ) );
log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
programSrc, (int)threads[0],(int) localThreads[0]);
free(inBuffer);
free(outBuffer);
return 1;
}
expected += typeSize * vecSize;
actual += outTypeSize * outVecSize;
}
free(inBuffer);
free(outBuffer);
return 0;
}
int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is
// legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
// for us to verify what is "valid". So the only thing we can test are types that match in size independent
// of the element count (char -> uchar, etc)
ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
size_t inTypeSize, outTypeSize;
int error = 0;
for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
{
inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
continue;
if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
continue;
for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
{
outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
continue;
}
if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
continue;
// change this check
if( inTypeIdx == outTypeIdx ) {
continue;
}
log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) );
fflush( stdout );
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
{
for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
{
if(vecSizes[sizeIdx]*inTypeSize !=
vecSizes[outSizeIdx]*outTypeSize )
{
continue;
}
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
}
}
if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
get_explicit_type_size(vecTypes[outTypeIdx])) {
// as_type3(vec4) allowed, as_type4(vec3) not allowed
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems );
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems );
}
}
}
return error;
}

View File

@@ -1,158 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *barrier_kernel_code =
"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
"{\n"
" int tid = get_local_id(0);\n"
" int lsize = get_local_size(0);\n"
" int i;\n"
"\n"
" tmp_sum[tid] = 0;\n"
" for (i=tid; i<n; i+=lsize)\n"
" tmp_sum[tid] += a[i];\n"
" \n"
" // updated to work for any workgroup size \n"
" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
" {\n"
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
" if (tid + i < lsize)\n"
" tmp_sum[tid] += tmp_sum[tid + i];\n"
" lsize = i; \n"
" }\n"
"\n"
" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
" if (tid == 0)\n"
" *sum = tmp_sum[0];\n"
"}\n";
static int
verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
{
int r = 0;
int i;
for (i=0; i<n; i++)
{
r += inptr[i];
}
if (r != outptr[0])
{
log_error("BARRIER test failed\n");
return -1;
}
log_info("BARRIER test passed\n");
return 0;
}
int
test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_int *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL;
cl_program program;
cl_kernel kernel;
size_t global_threads[3];
size_t local_threads[3];
int err;
int i;
size_t max_local_workgroup_size[3];
size_t max_threadgroup_size = 0;
MTdata d;
err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" );
test_error(err, "Failed to build kernel/program.");
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
test_error(err, "clGetKernelWorkgroupInfo failed.");
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (max_threadgroup_size > max_local_workgroup_size[0])
max_threadgroup_size = max_local_workgroup_size[0];
// work group size must divide evenly into the global size
while( num_elements % max_threadgroup_size )
max_threadgroup_size--;
input_ptr = (int*)malloc(sizeof(int) * num_elements);
output_ptr = (int*)malloc(sizeof(int));
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int), NULL, &err);
test_error(err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * max_threadgroup_size, NULL, &err);
test_error(err, "clCreateBuffer failed.");
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed.");
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
test_error(err, "clSetKernelArg failed.");
global_threads[0] = max_threadgroup_size;
local_threads[0] = max_threadgroup_size;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed.");
err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -1,300 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *kernel_code =
"__kernel void test_kernel(\n"
"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n"
"__global float%s *result)\n"
"{\n"
" result[0] = %s(c);\n"
" result[1] = %s(uc);\n"
" result[2] = %s(s);\n"
" result[3] = %s(us);\n"
" result[4] = %s(i);\n"
" result[5] = %s(ui);\n"
" result[6] = f;\n"
"}\n";
const char *kernel_code_long =
"__kernel void test_kernel_long(\n"
"long%s l, ulong%s ul,\n"
"__global float%s *result)\n"
"{\n"
" result[0] = %s(l);\n"
" result[1] = %s(ul);\n"
"}\n";
int test_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clMemWrapper results;
int error;
size_t global[3] = {1, 1, 1};
float results_back[2*16];
int count, index;
const char* types[] = { "long", "ulong" };
char kernel_string[8192];
int sizes[] = {1, 2, 4, 8, 16};
const char* size_strings[] = {"", "2", "4", "8", "16"};
float expected;
int total_errors = 0;
int size_to_test;
char *ptr;
char convert_string[1024];
size_t max_parameter_size;
// We don't really care about the contents since we're just testing that the types work.
cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
// Calculate how large our paramter size is to the kernel
size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
// Init our strings.
kernel_string[0] = '\0';
convert_string[0] = '\0';
// Get the maximum parameter size allowed
error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
test_error( error, "Unable to get max parameter size from device" );
// Create the results buffer
results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error);
test_error(error, "clCreateBuffer failed");
// Go over all the vector sizes
for (size_to_test = 0; size_to_test < 5; size_to_test++) {
clProgramWrapper program;
clKernelWrapper kernel;
size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
if (total_parameter_size > max_parameter_size) {
log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
(int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
continue;
}
log_info("Testing vector size %d\n", sizes[size_to_test]);
// If size is > 1, then we need a explicit convert call.
if (sizes[size_to_test] > 1) {
sprintf(convert_string, "convert_float%s", size_strings[size_to_test]);
} else {
sprintf(convert_string, " ");
}
// Build the kernel
sprintf(kernel_string, kernel_code_long,
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
convert_string, convert_string
);
ptr = kernel_string;
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long");
test_error(error, "create single kernel failed");
// Set the arguments
for (count = 0; count < 2; count++) {
switch (count) {
case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break;
case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break;
default: log_error("Test error"); break;
}
if (error)
log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
test_error(error, "clSetKernelArgs failed");
}
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
test_error(error, "clSetKernelArgs failed");
// Execute
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
for (count = 0; count < 2; count++) {
for (index=0; index < sizes[size_to_test]; index++) {
switch (count) {
case 0: expected = (float)l[index]; break;
case 1: expected = (float)ul[index]; break;
default: log_error("Test error"); break;
}
if (results_back[count*sizes[size_to_test]+index] != expected) {
total_errors++;
log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
index, results_back[count*sizes[size_to_test]+index], expected);
}
}
}
}
return total_errors;
}
int test_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clMemWrapper results;
int error;
size_t global[3] = {1, 1, 1};
float results_back[7*16];
int count, index;
const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"};
char kernel_string[8192];
int sizes[] = {1, 2, 4, 8, 16};
const char* size_strings[] = {"", "2", "4", "8", "16"};
float expected;
int total_errors = 0;
int size_to_test;
char *ptr;
char convert_string[1024];
size_t max_parameter_size;
// We don't really care about the contents since we're just testing that the types work.
cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
// Calculate how large our paramter size is to the kernel
size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) +
sizeof(cl_short) +sizeof(cl_ushort) +
sizeof(cl_int) +sizeof(cl_uint) +
sizeof(cl_float);
// Init our strings.
kernel_string[0] = '\0';
convert_string[0] = '\0';
// Get the maximum parameter size allowed
error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
test_error( error, "Unable to get max parameter size from device" );
// Create the results buffer
results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error);
test_error(error, "clCreateBuffer failed");
// Go over all the vector sizes
for (size_to_test = 0; size_to_test < 5; size_to_test++) {
clProgramWrapper program;
clKernelWrapper kernel;
size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
if (total_parameter_size > max_parameter_size) {
log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
(int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
continue;
}
log_info("Testing vector size %d\n", sizes[size_to_test]);
// If size is > 1, then we need a explicit convert call.
if (sizes[size_to_test] > 1) {
sprintf(convert_string, "convert_float%s", size_strings[size_to_test]);
} else {
sprintf(convert_string, " ");
}
// Build the kernel
sprintf(kernel_string, kernel_code,
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
size_strings[size_to_test], size_strings[size_to_test],
convert_string, convert_string, convert_string,
convert_string, convert_string, convert_string
);
ptr = kernel_string;
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel");
test_error(error, "create single kernel failed");
// Set the arguments
for (count = 0; count < 7; count++) {
switch (count) {
case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break;
case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break;
case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break;
case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break;
case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break;
case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break;
case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break;
default: log_error("Test error"); break;
}
if (error)
log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
test_error(error, "clSetKernelArgs failed");
}
error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
test_error(error, "clSetKernelArgs failed");
// Execute
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
for (count = 0; count < 7; count++) {
for (index=0; index < sizes[size_to_test]; index++) {
switch (count) {
case 0: expected = (float)c[index]; break;
case 1: expected = (float)uc[index]; break;
case 2: expected = (float)s[index]; break;
case 3: expected = (float)us[index]; break;
case 4: expected = (float)i[index]; break;
case 5: expected = (float)ui[index]; break;
case 6: expected = (float)f[index]; break;
default: log_error("Test error"); break;
}
if (results_back[count*sizes[size_to_test]+index] != expected) {
total_errors++;
log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
index, results_back[count*sizes[size_to_test]+index], expected);
}
}
}
}
if (gHasLong) {
log_info("Testing long types...\n");
total_errors += test_parameter_types_long( device, context, queue, num_elements );
}
else {
log_info("Longs unsupported, skipping.");
}
return total_errors;
}

View File

@@ -1,275 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *constant_kernel_code =
"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" float ftmp = tmpF[tid]; \n"
" float Itmp = tmpI[tid]; \n"
" out[tid] = ftmp * Itmp; \n"
"}\n";
const char *loop_constant_kernel_code =
"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n"
"{\n"
" int tid = get_global_id(0);\n"
" float sum = 0;\n"
" for (int i = 0; i < num; i++) {\n"
" float pos = i_pos[i*3];\n"
" sum += pos;\n"
" }\n"
" out[tid] = sum;\n"
"}\n";
static int
verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
{
int i;
for (i=0; i < n; i++)
{
float f = tmpF[i] * tmpI[i];
if( out[i] != f )
{
log_error("CONSTANT test failed\n");
return -1;
}
}
log_info("CONSTANT test passed\n");
return 0;
}
static int
verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
{
int i;
cl_int j;
for (i=0; i < n; i++)
{
float sum = 0;
for (j=0; j < l; ++j)
sum += tmp[j*3];
if( out[i] != sum )
{
log_error("loop CONSTANT test failed\n");
return -1;
}
}
log_info("loop CONSTANT test passed\n");
return 0;
}
int
test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_int *tmpI;
cl_float *tmpF, *out;
cl_program program;
cl_kernel kernel;
size_t global_threads[3];
int err;
unsigned int i;
cl_ulong maxSize, maxGlobalSize, maxAllocSize;
size_t num_floats, num_ints, constant_values;
MTdata d;
RoundingMode oldRoundMode;
int isRTZ = 0;
/* Verify our test buffer won't be bigger than allowed */
err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
test_error( err, "Unable to get max constant buffer size" );
log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
// Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE
err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
if (maxSize > maxGlobalSize / 4)
maxSize = maxGlobalSize / 4;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
if (maxSize > maxAllocSize)
maxSize = maxAllocSize;
maxSize/=4;
num_ints = (size_t)maxSize/sizeof(cl_int);
num_floats = (size_t)maxSize/sizeof(cl_float);
if (num_ints >= num_floats) {
constant_values = num_floats;
} else {
constant_values = num_ints;
}
log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
out = (cl_float*)malloc(sizeof(cl_float) * constant_values);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * constant_values, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
d = init_genrand( gRandomSeed );
for (i=0; i<constant_values; i++) {
tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
}
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
if (err) {
log_error("Failed to create kernel and program: %d\n", err);
return -1;
}
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
global_threads[0] = constant_values;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed: %d\n", err);
return -1;
}
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
{
oldRoundMode = set_round(kRoundTowardZero, kfloat);
isRTZ = 1;
}
err = verify(tmpF, tmpI, out, (int)constant_values);
if (isRTZ)
(void)set_round(oldRoundMode, kfloat);
// Loop constant buffer test
cl_program loop_program;
cl_kernel loop_kernel;
cl_int limit = 2;
memset(out, 0, sizeof(cl_float) * constant_values);
err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1,
&loop_constant_kernel_code, "loop_constant_kernel" );
if (err) {
log_error("Failed to create loop kernel and program: %d\n", err);
return -1;
}
err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit);
if (err != CL_SUCCESS) {
log_error("clSetKernelArgs for loop kernel failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS) {
log_error("clEnqueueNDRangeKernel failed: %d\n", err);
return -1;
}
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
if (err != CL_SUCCESS) {
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseKernel(loop_kernel);
clReleaseProgram(loop_program);
free(tmpI);
free(tmpF);
free(out);
return err;
}

View File

@@ -1,100 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *constant_source_kernel_code[] = {
"__constant int outVal = 42;\n"
"__constant int outIndex = 7;\n"
"__constant int outValues[ 16 ] = { 17, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };\n"
"\n"
"__kernel void constant_kernel( __global int *out )\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" if( tid == 0 )\n"
" {\n"
" out[ 0 ] = outVal;\n"
" out[ 1 ] = outValues[ outIndex ];\n"
" }\n"
" else\n"
" {\n"
" out[ tid + 1 ] = outValues[ tid ];\n"
" }\n"
"}\n" };
int test_constant_source(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper outStream;
cl_int outValues[ 17 ];
cl_int expectedValues[ 17 ] = { 42, 1985, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };
cl_int error;
// Create a kernel to test with
error = create_single_kernel_helper( context, &program, &kernel, 1, constant_source_kernel_code, "constant_kernel" );
test_error( error, "Unable to create testing kernel" );
// Create our output buffer
outStream = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof( outValues ), NULL, &error );
test_error( error, "Unable to create output buffer" );
// Set the argument
error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
test_error( error, "Unable to set kernel argument" );
// Run test kernel
size_t threads[ 1 ] = { 16 };
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Unable to enqueue kernel" );
// Read results
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof( outValues ), outValues, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Verify results
for( int i = 0; i < 17; i++ )
{
if( expectedValues[ i ] != outValues[ i ] )
{
if( i == 0 )
log_error( "ERROR: Output value %d from constant source global did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
else if( i == 1 )
log_error( "ERROR: Output value %d from constant-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
else
log_error( "ERROR: Output value %d from variable-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
return -1;
}
}
return 0;
}

View File

@@ -1,121 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *sample_single_kernel = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"};
const char *sample_double_kernel = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"
"__kernel void sample_test2(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"};
int
test_createkernelsinprogram(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_program program;
cl_kernel kernel[2];
unsigned int num_kernels;
size_t lengths[2];
int err;
lengths[0] = strlen(sample_single_kernel);
program = clCreateProgramWithSource(context, 1, &sample_single_kernel, lengths, NULL);
if (!program)
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgramExecutable failed\n");
return -1;
}
err = clCreateKernelsInProgram(program, 1, kernel, &num_kernels);
if ( (err != CL_SUCCESS) || (num_kernels != 1) )
{
log_error("clCreateKernelsInProgram test failed for a single kernel\n");
return -1;
}
clReleaseKernel(kernel[0]);
clReleaseProgram(program);
lengths[0] = strlen(sample_double_kernel);
program = clCreateProgramWithSource(context, 1, &sample_double_kernel, lengths, NULL);
if (!program)
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgramExecutable failed\n");
return -1;
}
err = clCreateKernelsInProgram(program, 2, kernel, &num_kernels);
if ( (err != CL_SUCCESS) || (num_kernels != 2) )
{
log_error("clCreateKernelsInProgram test failed for two kernels\n");
return -1;
}
log_info("clCreateKernelsInProgram test passed\n");
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
return err;
}

View File

@@ -1,253 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
const cl_mem_flags flag_set[] = {
CL_MEM_ALLOC_HOST_PTR,
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_USE_HOST_PTR,
CL_MEM_COPY_HOST_PTR,
0
};
const char* flag_set_names[] = {
"CL_MEM_ALLOC_HOST_PTR",
"CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
"CL_MEM_USE_HOST_PTR",
"CL_MEM_COPY_HOST_PTR",
"0"
};
int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
const size_t bufferSize = 256*256;
int src_flag_id;
MTdata d = init_genrand( gRandomSeed );
cl_char *initialData = (cl_char*)malloc(bufferSize);
cl_char *finalData = (cl_char*)malloc(bufferSize);
for (src_flag_id=0; src_flag_id < 5; src_flag_id++)
{
clMemWrapper memObject;
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
generate_random_data( kChar, (unsigned int)bufferSize, d, initialData );
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
memObject = clCreateBuffer(context, flag_set[src_flag_id], bufferSize * sizeof( cl_char ), initialData, &error);
else
memObject = clCreateBuffer(context, flag_set[src_flag_id], bufferSize * sizeof( cl_char ), NULL, &error);
test_error( error, "Unable to create testing buffer" );
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
{
error = clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize * sizeof( cl_char ), initialData, 0, NULL, NULL);
test_error( error, "clEnqueueWriteBuffer failed");
}
for( int i = 0; i < 128; i++ )
{
size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d );
size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d );
cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
offset, length, 0, NULL, NULL, &error );
if( error != CL_SUCCESS )
{
print_error( error, "clEnqueueMapBuffer call failed" );
log_error( "\tOffset: %d Length: %d\n", (int)offset, (int)length );
free( initialData );
free( finalData );
free_mtdata(d);
return -1;
}
// Write into the region
for( size_t j = 0; j < length; j++ )
{
cl_char spin = (cl_char)genrand_int32( d );
// Test read AND write in one swipe
cl_char value = mappedRegion[ j ];
value = spin - value;
mappedRegion[ j ] = value;
// Also update the initial data array
value = initialData[ offset + j ];
value = spin - value;
initialData[ offset + j ] = value;
}
// Unmap
error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
test_error( error, "Unable to unmap buffer" );
}
// Final validation: read actual values of buffer and compare against our reference
error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, sizeof( cl_char ) * bufferSize, finalData, 0, NULL, NULL );
test_error( error, "Unable to read results" );
for( size_t q = 0; q < bufferSize; q++ )
{
if( initialData[ q ] != finalData[ q ] )
{
log_error( "ERROR: Sample %d did not validate! Got %d, expected %d\n", (int)q, (int)finalData[ q ], (int)initialData[ q ] );
free( initialData );
free( finalData );
free_mtdata(d);
return -1;
}
}
} // cl_mem flags
free( initialData );
free( finalData );
free_mtdata(d);
return 0;
}
int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 };
const size_t imageSize = 256;
int src_flag_id;
cl_uint *initialData;
cl_uint *finalData;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
initialData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
finalData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
if( !is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &format ) )
{
log_error( "ERROR: Test requires basic OpenCL 1.0 format CL_RGBA:CL_UNSIGNED_INT32, which is unsupported by this device!\n" );
free(initialData);
free(finalData);
return -1;
}
d = init_genrand( gRandomSeed );
for (src_flag_id=0; src_flag_id < 5; src_flag_id++) {
clMemWrapper memObject;
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
generate_random_data( kUInt, (unsigned int)( imageSize * imageSize ), d, initialData );
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
imageSize, imageSize, 0, initialData, &error );
else
memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
imageSize, imageSize, 0, NULL, &error );
test_error( error, "Unable to create testing buffer" );
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1};
error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, NULL, NULL, initialData, 0, NULL, NULL);
test_error( error, "Unable to write to testing buffer" );
}
for( int i = 0; i < 128; i++ )
{
size_t offset[3], region[3];
size_t rowPitch;
offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d );
offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d );
offset[ 2 ] = 0;
region[ 2 ] = 1;
cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
offset, region, &rowPitch, NULL, 0, NULL, NULL, &error );
if( error != CL_SUCCESS )
{
print_error( error, "clEnqueueMapImage call failed" );
log_error( "\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] );
free(initialData);
free(finalData);
free_mtdata(d);
return -1;
}
// Write into the region
cl_uint *mappedPtr = mappedRegion;
for( size_t y = 0; y < region[ 1 ]; y++ )
{
for( size_t x = 0; x < region[ 0 ] * 4; x++ )
{
cl_int spin = (cl_int)random_in_range( 16, 1024, d );
cl_int value;
// Test read AND write in one swipe
value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ];
value = spin - value;
mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value;
// Also update the initial data array
value = initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ];
value = spin - value;
initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ] = value;
}
}
// Unmap
error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
test_error( error, "Unable to unmap buffer" );
}
// Final validation: read actual values of buffer and compare against our reference
size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 };
error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL );
test_error( error, "Unable to read results" );
for( size_t q = 0; q < imageSize * imageSize * 4; q++ )
{
if( initialData[ q ] != finalData[ q ] )
{
log_error( "ERROR: Sample %d (coord %d,%d) did not validate! Got %d, expected %d\n", (int)q, (int)( ( q / 4 ) % imageSize ), (int)( ( q / 4 ) / imageSize ),
(int)finalData[ q ], (int)initialData[ q ] );
free(initialData);
free(finalData);
free_mtdata(d);
return -1;
}
}
} // cl_mem_flags
free(initialData);
free(finalData);
free_mtdata(d);
return 0;
}

View File

@@ -1,384 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
#define DECLARE_S2V_IDENT_KERNEL(srctype,dsttype,size) \
"__kernel void test_conversion(__global " srctype " *sourceValues, __global " dsttype #size " *destValues )\n" \
"{\n" \
" int tid = get_global_id(0);\n" \
" " srctype " src = sourceValues[tid];\n" \
"\n" \
" destValues[tid] = (" dsttype #size ")src;\n" \
"\n" \
"}\n"
#define DECLARE_S2V_IDENT_KERNELS(srctype,dsttype) \
{ \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,2), \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,4), \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,8), \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,16) \
}
#define DECLARE_EMPTY { NULL, NULL, NULL, NULL, NULL }
/* Note: the next four arrays all must match in order and size to the ExplicitTypes enum in conversions.h!!! */
#define DECLARE_S2V_IDENT_KERNELS_SET(srctype) \
{ \
DECLARE_S2V_IDENT_KERNELS(#srctype,bool), \
DECLARE_S2V_IDENT_KERNELS(#srctype,char), \
DECLARE_S2V_IDENT_KERNELS(#srctype,uchar), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned char), \
DECLARE_S2V_IDENT_KERNELS(#srctype,short), \
DECLARE_S2V_IDENT_KERNELS(#srctype,ushort), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned short), \
DECLARE_S2V_IDENT_KERNELS(#srctype,int), \
DECLARE_S2V_IDENT_KERNELS(#srctype,uint), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned int), \
DECLARE_S2V_IDENT_KERNELS(#srctype,long), \
DECLARE_S2V_IDENT_KERNELS(#srctype,ulong), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned long), \
DECLARE_S2V_IDENT_KERNELS(#srctype,float), \
DECLARE_EMPTY \
}
#define DECLARE_EMPTY_SET \
{ \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY \
}
/* The overall array */
const char * kernel_explicit_s2v_set[kNumExplicitTypes][kNumExplicitTypes][5] = {
DECLARE_S2V_IDENT_KERNELS_SET(bool),
DECLARE_S2V_IDENT_KERNELS_SET(char),
DECLARE_S2V_IDENT_KERNELS_SET(uchar),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned char),
DECLARE_S2V_IDENT_KERNELS_SET(short),
DECLARE_S2V_IDENT_KERNELS_SET(ushort),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned short),
DECLARE_S2V_IDENT_KERNELS_SET(int),
DECLARE_S2V_IDENT_KERNELS_SET(uint),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned int),
DECLARE_S2V_IDENT_KERNELS_SET(long),
DECLARE_S2V_IDENT_KERNELS_SET(ulong),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned long),
DECLARE_S2V_IDENT_KERNELS_SET(float),
DECLARE_EMPTY_SET
};
int test_explicit_s2v_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programSrc,
ExplicitType srcType, unsigned int count, ExplicitType destType, unsigned int vecSize, void *inputData )
{
clProgramWrapper program;
clKernelWrapper kernel;
int error;
clMemWrapper streams[2];
void *outData;
unsigned char convertedData[ 8 ]; /* Max type size is 8 bytes */
size_t threadSize[3], groupSize[3];
unsigned int i, s;
unsigned char *inPtr, *outPtr;
size_t paramSize, destTypeSize;
const char* finalProgramSrc[2] = {
"", // optional pragma
programSrc
};
if (srcType == kDouble || destType == kDouble) {
finalProgramSrc[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
}
if( programSrc == NULL )
return 0;
paramSize = get_explicit_type_size( srcType );
destTypeSize = get_explicit_type_size( destType );
size_t destStride = destTypeSize * vecSize;
outData = malloc( destStride * count );
if( create_single_kernel_helper( context, &program, &kernel, 2, finalProgramSrc, "test_conversion" ) )
{
log_info( "****** %s%s *******\n", finalProgramSrc[0], finalProgramSrc[1] );
return -1;
}
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
test_error( error, "clCreateBuffer failed");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), destStride * count, NULL, &error);
test_error( error, "clCreateBuffer failed");
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
test_error( error, "Unable to set indexed kernel arguments" );
/* Run the kernel */
threadSize[0] = count;
error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
test_error( error, "Unable to execute test kernel" );
/* Now verify the results. Each value should have been duplicated four times, and we should be able to just
do a memcpy instead of relying on the actual type of data */
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, destStride * count, outData, 0, NULL, NULL );
test_error( error, "Unable to read output values!" );
inPtr = (unsigned char *)inputData;
outPtr = (unsigned char *)outData;
for( i = 0; i < count; i++ )
{
/* Convert the input data element to our output data type to compare against */
convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
/* Now compare every element of the vector */
for( s = 0; s < vecSize; s++ )
{
if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
{
unsigned int *p = (unsigned int *)outPtr;
log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
log_error( " Input: 0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
log_error( " Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
return -1;
}
}
inPtr += paramSize;
outPtr += destStride;
}
free( outData );
return 0;
}
int test_explicit_s2v_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, ExplicitType srcType,
unsigned int count, void *inputData )
{
unsigned int sizes[] = { 2, 4, 8, 16, 0 };
int i, dstType, failed = 0;
for( dstType = kBool; dstType < kNumExplicitTypes; dstType++ )
{
if( dstType == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
continue;
if (( dstType == kLong || dstType == kULong ) && !gHasLong )
continue;
for( i = 0; sizes[i] != 0; i++ )
{
if( dstType != srcType )
continue;
if( strchr( get_explicit_type_name( (ExplicitType)srcType ), ' ' ) != NULL ||
strchr( get_explicit_type_name( (ExplicitType)dstType ), ' ' ) != NULL )
continue;
if( test_explicit_s2v_function( deviceID, context, queue, kernel_explicit_s2v_set[ srcType ][ dstType ][ i ],
srcType, count, (ExplicitType)dstType, sizes[ i ], inputData ) != 0 )
{
log_error( "ERROR: Explicit cast of scalar %s to vector %s%d FAILED; skipping other %s vector tests\n",
get_explicit_type_name(srcType), get_explicit_type_name((ExplicitType)dstType), sizes[i], get_explicit_type_name((ExplicitType)dstType) );
failed = -1;
break;
}
}
}
return failed;
}
int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
log_info( "NOTE: Boolean vectors not defined in OpenCL 1.0. Skipping test.\n" );
return 0;
#if 0
bool data[128];
generate_random_data( kBool, 128, data );
return test_explicit_s2v_function_set( deviceID, context, queue, kBool, 128, data );
#endif
}
int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
char data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kChar, 128, seed, data );
return test_explicit_s2v_function_set( deviceID, context, queue, kChar, 128, data );
}
int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
unsigned char data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kUChar, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kUChar, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedChar, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
short data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kShort, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kShort, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
unsigned short data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kUShort, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kUShort, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedShort, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kInt, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kInt, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
unsigned int data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kUInt, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kUInt, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedInt, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_long data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kLong, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kLong, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_ulong data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kULong, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kULong, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedLong, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
float data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kFloat, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kFloat, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
double data[128];
RandomSeed seed(gRandomSeed);
if( !is_extension_available( deviceID, "cl_khr_fp64" ) ) {
log_info("Extension cl_khr_fp64 not supported. Skipping test.\n");
return 0;
}
generate_random_data( kDouble, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kDouble, 128, data ) != 0 )
return -1;
return 0;
}

View File

@@ -1,160 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *float2int_kernel_code =
"__kernel void test_float2int(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n";
int
verify_float2int(cl_float *inptr, cl_int *outptr, int n)
{
int i;
for (i=0; i<n; i++)
{
if (outptr[i] != (int)inptr[i])
{
log_error("FLOAT2INT test failed\n");
return -1;
}
}
log_info("FLOAT2INT test passed\n");
return 0;
}
int
test_float2int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_float *input_ptr;
cl_int *output_ptr;
cl_program program;
cl_kernel kernel;
void *values[2];
size_t lengths[1];
size_t threads[1];
int err;
int i;
MTdata d;
input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*num_elements, (void *)input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
lengths[0] = strlen(float2int_kernel_code);
program = clCreateProgramWithSource(context, 1, &float2int_kernel_code, lengths, NULL);
if (!program)
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgramExecutable failed\n");
return -1;
}
kernel = clCreateKernel(program, "test_float2int", NULL);
if (!kernel)
{
log_error("clCreateKernel failed\n");
return -1;
}
values[0] = streams[0];
values[1] = streams[1];
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err = clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (size_t)num_elements;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_float2int(input_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -1,270 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "harness/rounding_mode.h"
#include "procs.h"
const char *fpadd_kernel_code =
"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *fpsub_kernel_code =
"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *fpmul_kernel_code =
"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
static const float MAX_ERR = 1e-5f;
int
verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("FP_ADD float test failed\n");
return -1;
}
}
log_info("FP_ADD float test passed\n");
return 0;
}
int
verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("FP_SUB float test failed\n");
return -1;
}
}
log_info("FP_SUB float test passed\n");
return 0;
}
int
verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("FP_MUL float test failed\n");
return -1;
}
}
log_info("FP_MUL float test passed\n");
return 0;
}
int
test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[3];
cl_kernel kernel[3];
float *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_float) * num_elements;
int isRTZ = 0;
RoundingMode oldMode = kDefaultRoundingMode;
// check for floating point capabilities
cl_device_fp_config single_config = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
if (err) {
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
test_finish();
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
{
//Check to make sure we are an embedded device
char profile[32];
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if( err )
{
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
test_finish();
return -1;
}
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
{
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
test_finish();
return -1;
}
isRTZ = 1;
oldMode = get_round();
}
input_ptr[0] = (cl_float*)malloc(length);
input_ptr[1] = (cl_float*)malloc(length);
input_ptr[2] = (cl_float*)malloc(length);
output_ptr = (cl_float*)malloc(length);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
p = input_ptr[0];
for (i=0; i<num_elements; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[1];
for (i=0; i<num_elements; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[2];
for (i=0; i<num_elements; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error( err, "clEnqueueWriteBuffer failed.");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error( err, "clEnqueueWriteBuffer failed.");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error( err, "clEnqueueWriteBuffer failed.");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
threads[0] = (unsigned int)num_elements;
for (i=0; i<3; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
if( isRTZ )
set_round( kRoundTowardZero, kfloat );
switch (i)
{
case 0:
err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
}
if( isRTZ )
set_round( oldMode, kfloat );
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<3; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
free_mtdata( d );
return err;
}

View File

@@ -1,268 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "harness/rounding_mode.h"
#include "procs.h"
const char *fpadd2_kernel_code =
"__kernel void test_fpadd2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *fpsub2_kernel_code =
"__kernel void test_fpsub2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *fpmul2_kernel_code =
"__kernel void test_fpmul2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
int
verify_fpadd2(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("FP_ADD float2 test failed\n");
return -1;
}
}
log_info("FP_ADD float2 test passed\n");
return 0;
}
int
verify_fpsub2(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("FP_SUB float2 test failed\n");
return -1;
}
}
log_info("FP_SUB float2 test passed\n");
return 0;
}
int
verify_fpmul2(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("FP_MUL float2 test failed\n");
return -1;
}
}
log_info("FP_MUL float2 test passed\n");
return 0;
}
int
test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[3];
cl_kernel kernel[3];
cl_float *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_float) * 2 * num_elements;
int isRTZ = 0;
RoundingMode oldMode = kDefaultRoundingMode;
// check for floating point capabilities
cl_device_fp_config single_config = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
if (err) {
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
test_finish();
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
{
//Check to make sure we are an embedded device
char profile[32];
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if( err )
{
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
test_finish();
return -1;
}
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
{
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
test_finish();
return -1;
}
isRTZ = 1;
oldMode = get_round();
}
input_ptr[0] = (cl_float*)malloc(length);
input_ptr[1] = (cl_float*)malloc(length);
input_ptr[2] = (cl_float*)malloc(length);
output_ptr = (cl_float*)malloc(length);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
p = input_ptr[0];
for (i=0; i<num_elements*2; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
p = input_ptr[1];
for (i=0; i<num_elements*2; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
p = input_ptr[2];
for (i=0; i<num_elements*2; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd2_kernel_code, "test_fpadd2");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub2_kernel_code, "test_fpsub2");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul2_kernel_code, "test_fpmul2");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
free_mtdata(d);
d = NULL;
threads[0] = (unsigned int)num_elements;
for (i=0; i<3; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
if( isRTZ )
set_round( kRoundTowardZero, kfloat );
switch (i)
{
case 0:
err = verify_fpadd2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
break;
case 1:
err = verify_fpsub2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
break;
case 2:
err = verify_fpmul2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
break;
}
if( isRTZ )
set_round( oldMode, kfloat );
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<3; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -1,269 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "harness/rounding_mode.h"
const char *fpadd4_kernel_code =
"__kernel void test_fpadd4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *fpsub4_kernel_code =
"__kernel void test_fpsub4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *fpmul4_kernel_code =
"__kernel void test_fpmul4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
int
verify_fpadd4(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("FP_ADD float4 test failed\n");
return -1;
}
}
log_info("FP_ADD float4 test passed\n");
return 0;
}
int
verify_fpsub4(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("FP_SUB float4 test failed\n");
return -1;
}
}
log_info("FP_SUB float4 test passed\n");
return 0;
}
int
verify_fpmul4(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("FP_MUL float4 test failed\n");
return -1;
}
}
log_info("FP_MUL float4 test passed\n");
return 0;
}
int
test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[3];
cl_kernel kernel[3];
cl_float *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_float) * 4 * num_elements;
int isRTZ = 0;
RoundingMode oldMode = kDefaultRoundingMode;
// check for floating point capabilities
cl_device_fp_config single_config = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
if (err) {
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
test_finish();
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
{
//Check to make sure we are an embedded device
char profile[32];
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if( err )
{
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
test_finish();
return -1;
}
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
{
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
test_finish();
return -1;
}
isRTZ = 1;
oldMode = get_round();
}
input_ptr[0] = (cl_float*)malloc(length);
input_ptr[1] = (cl_float*)malloc(length);
input_ptr[2] = (cl_float*)malloc(length);
output_ptr = (cl_float*)malloc(length);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
p = input_ptr[0];
for (i=0; i<num_elements*4; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[1];
for (i=0; i<num_elements*4; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[2];
for (i=0; i<num_elements*4; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
free_mtdata(d);
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd4_kernel_code, "test_fpadd4");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub4_kernel_code, "test_fpsub4");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul4_kernel_code, "test_fpmul4");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
threads[0] = (unsigned int)num_elements;
for (i=0; i<3; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
if( isRTZ )
set_round( kRoundTowardZero, kfloat );
switch (i)
{
case 0:
err = verify_fpadd4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
break;
case 1:
err = verify_fpsub4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
break;
case 2:
err = verify_fpmul4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
break;
}
if( isRTZ )
set_round( oldMode, kfloat );
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<3; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -1,421 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int hi_offset( int index, int vectorSize) { return index + vectorSize / 2; }
int lo_offset( int index, int vectorSize) { return index; }
int even_offset( int index, int vectorSize ) { return index * 2; }
int odd_offset( int index, int vectorSize ) { return index * 2 + 1; }
typedef int (*OffsetFunc)( int index, int vectorSize );
static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset };
typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName );
static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" };
static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" };
static const unsigned int vector_sizes[] = { 1, 2, 3, 4, 8, 16};
static const unsigned int vector_aligns[] = { 1, 2, 4, 4, 8, 16};
static const unsigned int out_vector_idx[] = { 0, 0, 1, 1, 3, 4};
// if input is size vector_sizes[i], output is size
// vector_sizes[out_vector_idx[i]]
// input type name is strcat(gentype, vector_size_names[i]);
// and output type name is
// strcat(gentype, vector_size_names[out_vector_idx[i]]);
static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4,
-1,-1,-1,-1,-1,-1,-1,5};
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse );
int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_int *input_ptr, *output_ptr, *p;
int err;
cl_uint i;
int hasDouble = is_extension_available( device, "cl_khr_fp64" );
cl_uint vectorSize, operatorToUse;
cl_uint type;
MTdata d;
int expressionMode;
int numExpressionModes = 2;
size_t length = sizeof(cl_int) * 4 * n_elems;
input_ptr = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
p = input_ptr;
d = init_genrand( gRandomSeed );
for (i=0; i<4 * (cl_uint) n_elems; i++)
p[i] = genrand_int32(d);
free_mtdata(d); d = NULL;
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
{
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
size_t elementCount = length / kSizes[type];
cl_mem streams[2];
// skip double if unavailable
if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" )))
continue;
if( !gHasLong &&
( 0 == strcmp( test_str_names[type], "long" )) &&
( 0 == strcmp( test_str_names[type], "ulong" )))
continue;
log_info( "%s", test_str_names[type] );
fflush( stdout );
// Set up data streams for the type
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ )
{
log_info( " %s", operatorToUse_names[ operatorToUse ] );
fflush( stdout );
for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) {
for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) {
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint outVectorSize = out_vector_idx[vectorSize];
char expression[1024];
const char *source[] = {
"", // optional pragma string
"__kernel void test_", operatorToUse_names[ operatorToUse ], "_", test_str_names[type], vector_size_names[vectorSize],
"(__global ", test_str_names[type], vector_size_names[vectorSize],
" *srcA, __global ", test_str_names[type], vector_size_names[outVectorSize],
" *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" ", test_str_names[type],
vector_size_names[out_vector_idx[vectorSize]],
" tmp = ", expression, ".", operatorToUse_names[ operatorToUse ], ";\n"
" dst[tid] = tmp;\n"
"}\n"
};
if(expressionMode == 0) {
sprintf(expression, "srcA[tid]");
} else if(expressionMode == 1) {
switch(vector_sizes[vectorSize]) {
case 16:
sprintf(expression,
"((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))",
test_str_names[type]
);
break;
case 8:
sprintf(expression,
"((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))",
test_str_names[type]
);
break;
case 4:
sprintf(expression,
"((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))",
test_str_names[type]
);
break;
case 3:
sprintf(expression,
"((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))",
test_str_names[type]
);
break;
case 2:
sprintf(expression,
"((%s2)(srcA[tid].s0, srcA[tid].s1))",
test_str_names[type]
);
break;
default :
sprintf(expression, "srcA[tid]");
log_info("Default\n");
}
} else {
sprintf(expression, "srcA[tid]");
}
if (0 == strcmp( test_str_names[type], "double" ))
source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
char kernelName[128];
snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] );
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
//Wipe the output buffer clean
uint32_t pattern = 0xdeadbeef;
memset_pattern4( output_ptr, &pattern, length );
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
size_t size = elementCount / (vector_aligns[vectorSize]);
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
char *inP = (char *)input_ptr;
char *outP = (char *)output_ptr;
outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
( vector_sizes[ out_vector_idx[vectorSize] ] ) );
// was outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
for( size_t e = 0; e < size; e++ )
{
if( CheckResults( inP, outP, 1, type, vectorSize, operatorToUse ) ) {
log_info("e is %d\n", (int)e);
fflush(stdout);
// break;
return -1;
}
inP += kSizes[type] * ( vector_aligns[vectorSize] );
outP += kSizes[type] * ( vector_aligns[outVectorSize] );
}
clReleaseKernel( kernel );
clReleaseProgram( program );
log_info( "." );
fflush( stdout );
}
}
}
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
log_info( "done\n" );
}
log_info("HiLoEO test passed\n");
free(input_ptr);
free(output_ptr);
return err;
}
static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse )
{
cl_ulong array[8];
void *p = array;
size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]];
size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]];
// was 1 << (vectorSize-1);
OffsetFunc f = offsetFuncs[ operatorToUse ];
size_t elementSize = kSizes[type];
if(vector_size_names[vectorSize][0] == '3') {
if(operatorToUse_names[operatorToUse][0] == 'h' ||
operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
{
cmpVectorSize = 1; // special case for vec3 ignored values
}
}
switch( elementSize )
{
case 1:
{
char *i = (char*)in;
char *o = (char*)out;
size_t j;
cl_uint k;
OffsetFunc f = offsetFuncs[ operatorToUse ];
for( k = 0; k < elementCount; k++ )
{
char *o2 = (char*)p;
for( j = 0; j < halfVectorSize; j++ )
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
if( memcmp( o, o2, elementSize * cmpVectorSize ) )
{
log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
for( j = 1; j < halfVectorSize * 2; j++ )
log_info( ", %d", i[j] );
log_info( " } --> { %d", o[0] );
for( j = 1; j < halfVectorSize; j++ )
log_info( ", %d", o[j] );
log_info( " }\n" );
return -1;
}
i += 2 * halfVectorSize;
o += halfVectorSize;
}
}
break;
case 2:
{
short *i = (short*)in;
short *o = (short*)out;
size_t j;
cl_uint k;
for( k = 0; k < elementCount; k++ )
{
short *o2 = (short*)p;
for( j = 0; j < halfVectorSize; j++ )
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
if( memcmp( o, o2, elementSize * cmpVectorSize ) )
{
log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
for( j = 1; j < halfVectorSize * 2; j++ )
log_info( ", %d", i[j] );
log_info( " } --> { %d", o[0] );
for( j = 1; j < halfVectorSize; j++ )
log_info( ", %d", o[j] );
log_info( " }\n" );
return -1;
}
i += 2 * halfVectorSize;
o += halfVectorSize;
}
}
break;
case 4:
{
int *i = (int*)in;
int *o = (int*)out;
size_t j;
cl_uint k;
for( k = 0; k < elementCount; k++ )
{
int *o2 = (int *)p;
for( j = 0; j < halfVectorSize; j++ )
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
for( j = 0; j < cmpVectorSize; j++ )
{
/* Allow float nans to be binary different */
if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
{
log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
for( j = 1; j < halfVectorSize * 2; j++ )
log_info( ", 0x%8.8x", i[j] );
log_info( " } --> { 0x%8.8x", o[0] );
for( j = 1; j < halfVectorSize; j++ )
log_info( ", 0x%8.8x", o[j] );
log_info( " }\n" );
return -1;
}
}
i += 2 * halfVectorSize;
o += halfVectorSize;
}
}
break;
case 8:
{
cl_ulong *i = (cl_ulong*)in;
cl_ulong *o = (cl_ulong*)out;
size_t j;
cl_uint k;
for( k = 0; k < elementCount; k++ )
{
cl_ulong *o2 = (cl_ulong*)p;
for( j = 0; j < halfVectorSize; j++ )
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
if( memcmp( o, o2, elementSize * cmpVectorSize ) )
{
log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
for( j = 1; j < halfVectorSize * 2; j++ )
log_info( ", 0x%16.16llx", i[j] );
log_info( " } --> { 0x%16.16llx", o[0] );
for( j = 1; j < halfVectorSize; j++ )
log_info( ", 0x%16.16llx", o[j] );
log_info( " }\n" );
return -1;
}
i += 2 * halfVectorSize;
o += halfVectorSize;
}
}
break;
default:
log_info( "Internal error. Unknown data type\n" );
return -2;
}
return 0;
}

View File

@@ -1,276 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *hostptr_kernel_code =
"__kernel void test_hostptr(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
static const float MAX_ERR = 1e-5f;
static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
{
cl_float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
return -1;
}
}
return 0;
}
static void make_random_data(unsigned count, float *ptr, MTdata d)
{
cl_uint i;
for (i=0; i<count; i++)
ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), d);
}
static unsigned char *
generate_rgba8_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static unsigned char *
randomize_rgba8_image(unsigned char *ptr, int w, int h, MTdata d)
{
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
int
test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_float *input_ptr[2], *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[3]={0,0,0};
cl_image_format img_format;
cl_uchar *rgba8_inptr, *rgba8_outptr;
void *lock_buffer;
int img_width = 512;
int img_height = 512;
cl_int err;
MTdata d;
RoundingMode oldRoundMode;
int isRTZ = 0;
// Block to mark deletion of streams before deletion of host_ptr
{
clMemWrapper streams[7];
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// Alloc buffers
input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
d = init_genrand( gRandomSeed );
rgba8_inptr = (cl_uchar *)generate_rgba8_image(img_width, img_height, d);
rgba8_outptr = (cl_uchar *)malloc(sizeof(cl_uchar) * 4 * img_width * img_height);
// Random data
make_random_data(num_elements, input_ptr[0], d);
make_random_data(num_elements, input_ptr[1], d);
// Create host-side input
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[0], &err);
test_error(err, "clCreateBuffer 0 failed");
// Create a copied input
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[1], &err);
test_error(err, "clCreateBuffer 1 failed");
// Create a host-side output
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, output_ptr, &err);
test_error(err, "clCreateBuffer 2 failed");
// Create a host-side input
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
test_error(err, "create_image_2d 3 failed");
// Create a copied input
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
test_error(err, "create_image_2d 4 failed");
// Create a host-side output
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
test_error(err, "create_image_2d 5 failed");
// Create a copied output
img_format.image_channel_data_type = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[6] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
test_error(err, "create_image_2d 6 failed");
err = create_single_kernel_helper(context, &program, &kernel,1, &hostptr_kernel_code, "test_hostptr" );
test_error(err, "create_single_kernel_helper failed");
// Execute kernel
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
test_error(err, "clSetKernelArg failed");
threads[0] = (size_t)num_elements;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed");
cl_float *data = (cl_float*) clEnqueueMapBuffer( queue, streams[2], CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float) * num_elements, 0, NULL, NULL, &err );
test_error( err, "clEnqueueMapBuffer failed" );
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
{
oldRoundMode = set_round(kRoundTowardZero, kfloat);
isRTZ = 1;
}
if (isRTZ)
oldRoundMode = set_round(kRoundTowardZero, kfloat);
// Verify that we got the expected results back on the host side
err = verify_hostptr(input_ptr[0], input_ptr[1], data, num_elements);
if (err)
{
log_error("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
"and a CL_MEM_USE_HOST_PTR output did not return the expected results.\n");
} else {
log_info("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
"and a CL_MEM_USE_HOST_PTR output returned the expected results.\n");
}
if (isRTZ)
set_round(oldRoundMode, kfloat);
err = clEnqueueUnmapMemObject( queue, streams[2], data, 0, NULL, NULL );
test_error( err, "clEnqueueUnmapMemObject failed" );
size_t origin[3]={0,0,0}, region[3]={img_width, img_height, 1};
randomize_rgba8_image(rgba8_outptr, img_width, img_height, d);
free_mtdata(d); d = NULL;
// Copy from host-side to host-side
log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR...\n");
err = clEnqueueCopyImage(queue, streams[3], streams[5],
origin, origin, region, 0, NULL, NULL);
test_error(err, "clEnqueueCopyImage failed");
log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR image passed.\n");
// test the lock buffer interface
log_info("Mapping the CL_MEM_USE_HOST_PTR image with clEnqueueMapImage...\n");
size_t row_pitch;
lock_buffer = clEnqueueMapImage(queue, streams[5], CL_TRUE,
CL_MAP_READ, origin, region,
&row_pitch, NULL,
0, NULL, NULL, &err);
test_error(err, "clEnqueueMapImage failed");
err = verify_rgba8_image(rgba8_inptr, (unsigned char*)lock_buffer, img_width, img_height);
if (err != CL_SUCCESS)
{
log_error("verify_rgba8_image FAILED after clEnqueueMapImage\n");
return -1;
}
log_info("verify_rgba8_image passed after clEnqueueMapImage\n");
err = clEnqueueUnmapMemObject(queue, streams[5], lock_buffer, 0, NULL, NULL);
test_error(err, "clEnqueueUnmapMemObject failed");
// Copy host-side to device-side and read back
log_info("clEnqueueCopyImage CL_MEM_USE_HOST_PTR to CL_MEM_COPY_HOST_PTR...\n");
err = clEnqueueCopyImage(queue, streams[3], streams[5],
origin, origin, region,
0, NULL, NULL);
test_error(err, "clEnqueueCopyImage failed");
err = clEnqueueReadImage(queue, streams[5], CL_TRUE, origin, region, 4*img_width, 0, rgba8_outptr, 0, NULL, NULL);
test_error(err, "clEnqueueReadImage failed");
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
if (err != CL_SUCCESS)
{
log_error("verify_rgba8_image FAILED after clEnqueueCopyImage, clEnqueueReadImage\n");
return -1;
}
log_info("verify_rgba8_image passed after clEnqueueCopyImage, clEnqueueReadImage\n");
}
// cleanup
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr[0]);
free(input_ptr[1]);
free(output_ptr);
free(rgba8_inptr);
free(rgba8_outptr);
return err;
}

View File

@@ -1,165 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *conditional_kernel_code =
"__kernel void test_if(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" if (src[tid] == 0)\n"
" dst[tid] = 0x12345678;\n"
" else if (src[tid] == 1)\n"
" dst[tid] = 0x23456781;\n"
" else if (src[tid] == 2)\n"
" dst[tid] = 0x34567812;\n"
" else if (src[tid] == 3)\n"
" dst[tid] = 0x45678123;\n"
" else if (src[tid] == 4)\n"
" dst[tid] = 0x56781234;\n"
" else if (src[tid] == 5)\n"
" dst[tid] = 0x67812345;\n"
" else if (src[tid] == 6)\n"
" dst[tid] = 0x78123456;\n"
" else if (src[tid] == 7)\n"
" dst[tid] = 0x81234567;\n"
" else\n"
" dst[tid] = 0x7FFFFFFF;\n"
"\n"
"}\n";
const int results[] = {
0x12345678,
0x23456781,
0x34567812,
0x45678123,
0x56781234,
0x67812345,
0x78123456,
0x81234567,
};
int
verify_if(int *inptr, int *outptr, int n)
{
int r, i;
for (i=0; i<n; i++)
{
if (inptr[i] <= 7)
r = results[inptr[i]];
else
r = 0x7FFFFFFF;
if (r != outptr[i])
{
log_error("IF test failed\n");
return -1;
}
}
log_info("IF test passed\n");
return 0;
}
int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_int *input_ptr, *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_int) * num_elements;
input_ptr = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
for (i=0; i<num_elements; i++)
input_ptr[i] = (int)get_random_float(0, 32, d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &conditional_kernel_code, "test_if" );
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadArray failed\n");
return -1;
}
err = verify_if(input_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -1,234 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static unsigned char *
generate_rgba8_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
static unsigned short *
generate_rgba16_image(int w, int h, MTdata d)
{
unsigned short *ptr = (unsigned short *)malloc(w * h * 4 * sizeof(unsigned short));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned short)genrand_int32(d);
return ptr;
}
static int
verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
static float *
generate_rgbafp_image(int w, int h, MTdata d)
{
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
return ptr;
}
static int
verify_rgbafp_image(float *image, float *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
int
test_imagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_image_format img_format;
unsigned char *rgba8_inptr, *rgba8_outptr;
unsigned short *rgba16_inptr, *rgba16_outptr;
float *rgbafp_inptr, *rgbafp_outptr;
clMemWrapper streams[6];
int img_width = 512;
int img_height = 512;
int i, err;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
free_mtdata(d); d = NULL;
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
for (i=0; i<3; i++)
{
void *p, *outp;
int x, y, delta_w = img_width/8, delta_h = img_height/16;
switch (i)
{
case 0:
p = (void *)rgba8_inptr;
outp = (void *)rgba8_outptr;
log_info("Testing CL_RGBA CL_UNORM_INT8\n");
break;
case 1:
p = (void *)rgba16_inptr;
outp = (void *)rgba16_outptr;
log_info("Testing CL_RGBA CL_UNORM_INT16\n");
break;
case 2:
p = (void *)rgbafp_inptr;
outp = (void *)rgbafp_outptr;
log_info("Testing CL_RGBA CL_FLOAT\n");
break;
}
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
test_error(err, "create_image_2d failed");
int copy_number = 0;
for (y=0; y<img_height; y+=delta_h)
{
for (x=0; x<img_width; x+=delta_w)
{
copy_number++;
size_t copy_origin[3] = {x,y,0}, copy_region[3]={delta_w, delta_h, 1};
err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1],
copy_origin, copy_origin, copy_region,
0, NULL, NULL);
if (err) {
log_error("Copy %d (origin [%d, %d], size [%d, %d], image size [%d x %d]) Failed\n", copy_number, x, y, delta_w, delta_h, img_width, img_height);
}
test_error(err, "clEnqueueCopyImage failed");
}
}
err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
test_error(err, "clEnqueueReadImage failed");
switch (i)
{
case 0:
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
break;
case 1:
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
break;
case 2:
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
break;
}
if (err)
break;
}
free(rgba8_inptr);
free(rgba16_inptr);
free(rgbafp_inptr);
free(rgba8_outptr);
free(rgba16_outptr);
free(rgbafp_outptr);
if (err)
log_error("IMAGE copy test failed\n");
else
log_info("IMAGE copy test passed\n");
return err;
}

View File

@@ -1,237 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static unsigned char *
generate_uint8_image(unsigned num_elements, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(num_elements);
unsigned i;
for (i=0; i<num_elements; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_uint8_image(unsigned char *image, unsigned char *outptr, unsigned num_elements)
{
unsigned i;
for (i=0; i<num_elements; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
static unsigned short *
generate_uint16_image(unsigned num_elements, MTdata d)
{
unsigned short *ptr = (unsigned short *)malloc(num_elements * sizeof(unsigned short));
unsigned i;
for (i=0; i<num_elements; i++)
ptr[i] = (unsigned short)genrand_int32(d);
return ptr;
}
static int
verify_uint16_image(unsigned short *image, unsigned short *outptr, unsigned num_elements)
{
unsigned i;
for (i=0; i<num_elements; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
static float *
generate_float_image(unsigned num_elements, MTdata d)
{
float *ptr = (float*)malloc(num_elements * sizeof(float));
unsigned i;
for (i=0; i<num_elements; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
return ptr;
}
static int
verify_float_image(float *image, float *outptr, unsigned num_elements)
{
unsigned i;
for (i=0; i<num_elements; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
int
test_imagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements_ignored)
{
cl_image_format img_format;
unsigned char *rgba8_inptr, *rgba8_outptr;
unsigned short *rgba16_inptr, *rgba16_outptr;
float *rgbafp_inptr, *rgbafp_outptr;
clMemWrapper streams[6];
int img_width = 128;
int img_height = 128;
int img_depth = 64;
int i;
cl_int err;
unsigned num_elements = img_width * img_height * img_depth * 4;
MTdata d;
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
rgba8_inptr = (unsigned char *)generate_uint8_image(num_elements, d);
rgba16_inptr = (unsigned short *)generate_uint16_image(num_elements, d);
rgbafp_inptr = (float *)generate_float_image(num_elements, d);
free_mtdata(d); d = NULL;
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * num_elements);
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * num_elements);
rgbafp_outptr = (float*)malloc(sizeof(float) * num_elements);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
streams[3] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[4] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
streams[5] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
for (i=0; i<3; i++)
{
void *p, *outp;
int x, y, z, delta_w = img_width/8, delta_h = img_height/16, delta_d = img_depth/4;
switch (i)
{
case 0:
p = (void *)rgba8_inptr;
outp = (void *)rgba8_outptr;
break;
case 1:
p = (void *)rgba16_inptr;
outp = (void *)rgba16_outptr;
break;
case 2:
p = (void *)rgbafp_inptr;
outp = (void *)rgbafp_outptr;
break;
}
size_t origin[3]={0,0,0}, region[3]={img_width, img_height, img_depth};
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
test_error(err, "clEnqueueWriteImage failed");
for (z=0; z<img_depth; z+=delta_d)
{
for (y=0; y<img_height; y+=delta_h)
{
for (x=0; x<img_width; x+=delta_w)
{
origin[0] = x; origin[1] = y; origin[2] = z;
region[0] = delta_w; region[1] = delta_h; region[2] = delta_d;
err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
test_error(err, "clEnqueueCopyImage failed");
}
}
}
origin[0] = 0; origin[1] = 0; origin[2] = 0;
region[0] = img_width; region[1] = img_height; region[2] = img_depth;
err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
test_error(err, "clEnqueueReadImage failed");
switch (i)
{
case 0:
err = verify_uint8_image(rgba8_inptr, rgba8_outptr, num_elements);
if (err) log_error("Failed uint8\n");
break;
case 1:
err = verify_uint16_image(rgba16_inptr, rgba16_outptr, num_elements);
if (err) log_error("Failed uint16\n");
break;
case 2:
err = verify_float_image(rgbafp_inptr, rgbafp_outptr, num_elements);
if (err) log_error("Failed float\n");
break;
}
if (err)
break;
}
free(rgba8_inptr);
free(rgba16_inptr);
free(rgbafp_inptr);
free(rgba8_outptr);
free(rgba16_outptr);
free(rgbafp_outptr);
if (err)
log_error("IMAGE3D copy test failed\n");
else
log_info("IMAGE3D copy test passed\n");
return err;
}

View File

@@ -1,269 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static unsigned char *
generate_rgba8_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int x, int y, int w, int h, int img_width)
{
int i, j, indx;
for (j=y; j<(y+h); j++)
{
indx = j*img_width*4;
for (i=x*4; i<(x+w)*4; i++)
{
if (outptr[indx+i] != image[indx+i])
return -1;
}
}
return 0;
}
static unsigned short *
generate_rgba16_image(int w, int h, MTdata d)
{
unsigned short *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned short)genrand_int32(d);
return ptr;
}
static int
verify_rgba16_image(unsigned short *image, unsigned short *outptr, int x, int y, int w, int h, int img_width)
{
int i, j, indx;
for (j=y; j<(y+h); j++)
{
indx = j*img_width*4;
for (i=x*4; i<(x+w)*4; i++)
{
if (outptr[indx+i] != image[indx+i])
return -1;
}
}
return 0;
}
static float *
generate_rgbafp_image(int w, int h, MTdata d)
{
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
return ptr;
}
static int
verify_rgbafp_image(float *image, float *outptr, int x, int y, int w, int h, int img_width)
{
int i, j, indx;
for (j=y; j<(y+h); j++)
{
indx = j*img_width*4;
for (i=x*4; i<(x+w)*4; i++)
{
if (outptr[indx+i] != image[indx+i])
return -1;
}
}
return 0;
}
#define NUM_COPIES 10
static const char *test_str_names[] = { "CL_RGBA CL_UNORM_INT8", "CL_RGBA CL_UNORM_INT16", "CL_RGBA CL_FLOAT" };
int
test_imagerandomcopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_image_format img_format;
unsigned char *rgba8_inptr, *rgba8_outptr;
unsigned short *rgba16_inptr, *rgba16_outptr;
float *rgbafp_inptr, *rgbafp_outptr;
clMemWrapper streams[6];
int img_width = 512;
int img_height = 512;
int i, j;
cl_int err;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
log_info("Testing with image %d x %d.\n", img_width, img_height);
d = init_genrand( gRandomSeed );
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
for (i=0; i<3; i++)
{
void *p, *outp;
unsigned int x[2], y[2], delta_w, delta_h ;
switch (i)
{
case 0:
p = (void *)rgba8_inptr;
outp = (void *)rgba8_outptr;
break;
case 1:
p = (void *)rgba16_inptr;
outp = (void *)rgba16_outptr;
break;
case 2:
p = (void *)rgbafp_inptr;
outp = (void *)rgbafp_outptr;
break;
}
size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1};
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
// err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL);
test_error(err, "clEnqueueWriteImage failed");
for (j=0; j<NUM_COPIES; j++)
{
x[0] = (int)get_random_float(0, img_width, d);
do
{
x[1] = (int)get_random_float(0, img_width, d);
} while (x[1] <= x[0]);
y[0] = (int)get_random_float(0, img_height, d);
do
{
y[1] = (int)get_random_float(0, img_height, d);
} while (y[1] <= y[0]);
delta_w = x[1] - x[0];
delta_h = y[1] - y[0];
log_info("Testing clCopyImage for %s: x = %d, y = %d, w = %d, h = %d\n", test_str_names[i], x[0], y[0], delta_w, delta_h);
origin[0] = x[0];
origin[1] = y[0];
origin[2] = 0;
region[0] = delta_w;
region[1] = delta_h;
region[2] = 1;
err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
// err = clCopyImage(context, streams[i*2], streams[i*2+1],
// x[0], y[0], 0, x[0], y[0], 0, delta_w, delta_h, 0, NULL);
test_error(err, "clEnqueueCopyImage failed");
origin[0] = 0;
origin[1] = 0;
origin[2] = 0;
region[0] = img_width;
region[1] = img_height;
region[2] = 1;
err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
// err = clReadImage(context, streams[i*2+1], false, 0, 0, 0, img_width, img_height, 0, 0, 0, outp, NULL);
test_error(err, "clEnqueueReadImage failed");
switch (i)
{
case 0:
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, x[0], y[0], delta_w, delta_h, img_width);
break;
case 1:
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, x[0], y[0], delta_w, delta_h, img_width);
break;
case 2:
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, x[0], y[0], delta_w, delta_h, img_width);
break;
}
if (err)
break;
}
if (err)
break;
}
free_mtdata(d); d = NULL;
free(rgba8_inptr);
free(rgba16_inptr);
free(rgbafp_inptr);
free(rgba8_outptr);
free(rgba16_outptr);
free(rgbafp_outptr);
if (err)
log_error("IMAGE random copy test failed\n");
else
log_info("IMAGE random copy test passed\n");
return err;
}

View File

@@ -1,417 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static unsigned char *
generate_rgba8_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static void
update_rgba8_image(unsigned char *p, int x, int y, int w, int h, int img_width, MTdata d)
{
int i, j, indx;
for (i=y; i<y+h; i++)
{
indx = (i * img_width + x) * 4;
for (j=x; j<x+w; j++,indx+=4)
{
p[indx+0] = (unsigned char)genrand_int32(d);
p[indx+1] = (unsigned char)genrand_int32(d);
p[indx+2] = (unsigned char)genrand_int32(d);
p[indx+3] = (unsigned char)genrand_int32(d);
}
}
}
static void
update_image_from_image(void *out, void *in, int x, int y, int w, int h, int img_width, int elem_size)
{
int i, j, k, out_indx, in_indx;
in_indx = 0;
for (i=y; i<y+h; i++)
{
out_indx = (i * img_width + x) * elem_size;
for (j=x; j<x+w; j++,out_indx+=elem_size)
{
for (k=0; k<elem_size; k++)
{
((char*)out)[out_indx + k] = ((char*)in)[in_indx];
in_indx++;
}
}
}
}
static int
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
return -1;
}
}
return 0;
}
static unsigned short *
generate_rgba16_image(int w, int h, MTdata d)
{
unsigned short *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned short)genrand_int32(d);
return ptr;
}
static void
update_rgba16_image(unsigned short *p, int x, int y, int w, int h, int img_width, MTdata d)
{
int i, j, indx;
for (i=y; i<y+h; i++)
{
indx = (i * img_width + x) * 4;
for (j=x; j<x+w; j++,indx+=4)
{
p[indx+0] = (unsigned short)genrand_int32(d);
p[indx+1] = (unsigned short)genrand_int32(d);
p[indx+2] = (unsigned short)genrand_int32(d);
p[indx+3] = (unsigned short)genrand_int32(d);
}
}
}
static int
verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
return -1;
}
}
return 0;
}
static float *
generate_rgbafp_image(int w, int h, MTdata d)
{
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
return ptr;
}
static void
update_rgbafp_image(float *p, int x, int y, int w, int h, int img_width, MTdata d)
{
int i, j, indx;
for (i=y; i<y+h; i++)
{
indx = (i * img_width + x) * 4;
for (j=x; j<x+w; j++,indx+=4)
{
p[indx+0] = get_random_float(-0x40000000, 0x40000000, d);
p[indx+1] = get_random_float(-0x40000000, 0x40000000, d);
p[indx+2] = get_random_float(-0x40000000, 0x40000000, d);
p[indx+3] = get_random_float(-0x40000000, 0x40000000, d);
}
}
}
static int
verify_rgbafp_image(float *image, float *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("i = %d. Expected (%f %f %f %f), got (%f %f %f %f)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
return -1;
}
}
return 0;
}
int
test_imagereadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_image_format img_format;
unsigned char *rgba8_inptr, *rgba8_outptr;
unsigned short *rgba16_inptr, *rgba16_outptr;
float *rgbafp_inptr, *rgbafp_outptr;
clMemWrapper streams[3];
int img_width = 512;
int img_height = 512;
int num_tries = 200;
int i, j, err;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
for (i=0; i<3; i++)
{
void *p;
if (i == 0)
p = (void *)rgba8_inptr;
else if (i == 1)
p = (void *)rgba16_inptr;
else
p = (void *)rgbafp_inptr;
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, streams[i], CL_TRUE,
origin, region, 0, 0,
p, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteImage2D failed\n");
return -1;
}
}
for (i=0,j=0; i<num_tries*3; i++,j++)
{
int x = (int)get_random_float(0, img_width, d);
int y = (int)get_random_float(0, img_height, d);
int w = (int)get_random_float(1, (img_width - x), d);
int h = (int)get_random_float(1, (img_height - y), d);
size_t input_pitch;
int set_input_pitch = (int)(genrand_int32(d) & 0x01);
int packed_update = (int)(genrand_int32(d) & 0x01);
void *p, *outp;
int elem_size;
if (j == 3)
j = 0;
switch (j)
{
case 0:
//if ((w<=10) || (h<=10)) continue;
elem_size = 4;
if(packed_update)
{
p = generate_rgba8_image(w, h, d);
update_image_from_image(rgba8_inptr, p, x, y, w, h, img_width, elem_size);
}
else
{
update_rgba8_image(rgba8_inptr, x, y, w, h, img_width, d);
p = (void *)(rgba8_inptr + ((y * img_width + x) * 4));
}
outp = (void *)rgba8_outptr;
break;
case 1:
//if ((w<=8) || (h<=8)) continue;
elem_size = 2*4;
if(packed_update)
{
p = generate_rgba16_image(w, h, d);
update_image_from_image(rgba16_inptr, p, x, y, w, h, img_width, elem_size);
}
else
{
update_rgba16_image(rgba16_inptr, x, y, w, h, img_width, d);
p = (void *)(rgba16_inptr + ((y * img_width + x) * 4));
}
outp = (void *)rgba16_outptr;
break;
case 2:
//if ((w<=8) || (h<=8)) continue;
elem_size = 4*4;
if(packed_update)
{
p = generate_rgbafp_image(w, h, d);
update_image_from_image(rgbafp_inptr, p, x, y, w, h, img_width, elem_size);
}
else
{
update_rgbafp_image(rgbafp_inptr, x, y, w, h, img_width, d);
p = (void *)(rgbafp_inptr + ((y * img_width + x) * 4));
}
outp = (void *)rgbafp_outptr;
break;
}
const char* update_packed_pitch_name = "";
if(packed_update)
{
if(set_input_pitch)
{
// for packed updates the pitch does not need to be calculated here (but can be)
update_packed_pitch_name = "'packed with pitch'";
input_pitch = w*elem_size;
}
else
{
// for packed updates the pitch does not need to be calculated here
update_packed_pitch_name = "'packed without pitch'";
input_pitch = 0;
}
}
else
{
// for unpacked updates the pitch is required
update_packed_pitch_name = "'unpacked with pitch'";
input_pitch = img_width*elem_size;
}
size_t origin[3] = {x,y,0}, region[3] = {w, h, 1};
err = clEnqueueWriteImage(queue, streams[j], CL_TRUE,
origin, region, input_pitch, 0, p,
0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteImage update failed for %s %s: %d\n",
(packed_update) ? "packed" : "unpacked",
(set_input_pitch) ? "set pitch" : "unset pitch", err);
free_mtdata(d);
return -1;
}
if(packed_update)
{
free(p);
p = NULL;
}
memset(outp, 0x7, img_width*img_height*elem_size);
origin[0]=0; origin[1]=0; origin[2]=0;
region[0]=img_width; region[1]=img_height; region[2]=1;
err = clEnqueueReadImage(queue, streams[j], CL_TRUE,
origin, region, 0,0,
outp, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
free_mtdata(d);
return -1;
}
switch (j)
{
case 0:
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
if (err)
{
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
}
break;
case 1:
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
if (err)
{
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
}
break;
case 2:
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
if (err)
{
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
}
break;
}
if (err) break;
}
free_mtdata(d);
free(rgba8_inptr);
free(rgba16_inptr);
free(rgbafp_inptr);
free(rgba8_outptr);
free(rgba16_outptr);
free(rgbafp_outptr);
if (!err)
log_info("IMAGE read, write test passed\n");
return err;
}

View File

@@ -1,417 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static unsigned char *
generate_rgba8_image(int w, int h, int d, MTdata mtData)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * d *4);
int i;
for (i=0; i<w*h*d*4; i++)
ptr[i] = (unsigned char)genrand_int32(mtData);
return ptr;
}
static void
update_rgba8_image(unsigned char *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
{
int i, j, k, indx;
int img_slice = img_width * img_height;
for (k=z; k<z+d; k++)
for (j=y; j<y+h; j++)
{
indx = (k * img_slice + j * img_width + x) * 4;
for (i=x; i<x+w; i++,indx+=4)
{
p[indx+0] = (unsigned char)genrand_int32(mtData);
p[indx+1] = (unsigned char)genrand_int32(mtData);
p[indx+2] = (unsigned char)genrand_int32(mtData);
p[indx+3] = (unsigned char)genrand_int32(mtData);
}
}
}
static void
update_image_from_image(void *out, void *in, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, int elem_size)
{
int i, j, k, elem, out_indx, in_indx;
int img_slice = img_width * img_height;
in_indx = 0;
for (k=z; k<z+d; k++)
for (j=y; j<y+h; j++)
{
out_indx = (k * img_slice + j * img_width + x) * elem_size;
for (i=x; i<x+w; i++,out_indx+=elem_size)
{
for (elem=0; elem<elem_size; elem++)
{
((char*)out)[out_indx + elem] = ((char*)in)[in_indx];
in_indx++;
}
}
}
}
static int
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h, int d)
{
int i;
for (i=0; i<w*h*d*4; i++)
{
if (outptr[i] != image[i])
{
log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
return -1;
}
}
return 0;
}
static unsigned short *
generate_rgba16_image(int w, int h, int d, MTdata mtData)
{
unsigned short *ptr = (unsigned short*)malloc(w * h * d * 4 * sizeof(unsigned short));
int i;
for (i=0; i<w*h*d*4; i++)
ptr[i] = (unsigned short)genrand_int32(mtData);
return ptr;
}
static void
update_rgba16_image(unsigned short *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
{
int i, j, k, indx;
int img_slice = img_width * img_height;
for (k=z; k<z+d; k++)
for (j=y; j<y+h; j++)
{
indx = (k * img_slice + j * img_width + x) * 4;
for (i=x; i<x+w; i++,indx+=4)
{
p[indx+0] = (unsigned short)genrand_int32(mtData);
p[indx+1] = (unsigned short)genrand_int32(mtData);
p[indx+2] = (unsigned short)genrand_int32(mtData);
p[indx+3] = (unsigned short)genrand_int32(mtData);
}
}
}
static int
verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h, int d)
{
int i;
for (i=0; i<w*h*d*4; i++)
{
if (outptr[i] != image[i])
{
log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
return -1;
}
}
return 0;
}
static float *
generate_rgbafp_image(int w, int h, int d, MTdata mtData)
{
float *ptr = (float*)malloc(w * h * d *4 * sizeof(float));
int i;
for (i=0; i<w*h*d*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, mtData);
return ptr;
}
static void
update_rgbafp_image(float *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
{
int i, j, k, indx;
int img_slice = img_width * img_height;
for (k=z; k<z+d; k++)
for (j=y; j<y+h; j++)
{
indx = (k * img_slice + j * img_width + x) * 4;
for (i=x; i<x+w; i++,indx+=4)
{
p[indx+0] = get_random_float(-0x40000000, 0x40000000, mtData);
p[indx+1] = get_random_float(-0x40000000, 0x40000000, mtData);
p[indx+2] = get_random_float(-0x40000000, 0x40000000, mtData);
p[indx+3] = get_random_float(-0x40000000, 0x40000000, mtData);
}
}
}
static int
verify_rgbafp_image(float *image, float *outptr, int w, int h, int d)
{
int i;
for (i=0; i<w*h*d*4; i++)
{
if (outptr[i] != image[i])
{
log_error("i = %d. Expected (%f %f %f %f), got (%f %f %f %f)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
return -1;
}
}
return 0;
}
int
test_imagereadwrite3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_image_format img_format;
unsigned char *rgba8_inptr, *rgba8_outptr;
unsigned short *rgba16_inptr, *rgba16_outptr;
float *rgbafp_inptr, *rgbafp_outptr;
clMemWrapper streams[3];
int img_width = 64;
int img_height = 64;
int img_depth = 32;
int img_slice = img_width * img_height;
int num_tries = 30;
int i, j, err;
MTdata mtData;
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
mtData = init_genrand( gRandomSeed );
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, img_depth, mtData);
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, img_depth, mtData);
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, img_depth, mtData);
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height * img_depth);
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height * img_depth);
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height * img_depth);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
for (i=0; i<3; i++)
{
void *p;
if (i == 0)
p = (void *)rgba8_inptr;
else if (i == 1)
p = (void *)rgba16_inptr;
else
p = (void *)rgbafp_inptr;
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, img_depth};
err = clEnqueueWriteImage(queue, streams[i], CL_TRUE,
origin, region, 0, 0,
p,
0, NULL, NULL);
test_error(err, "clEnqueueWriteImage failed");
}
for (i=0,j=0; i<num_tries*3; i++,j++)
{
int x = (int)get_random_float(0, (float)img_width - 1, mtData);
int y = (int)get_random_float(0, (float)img_height - 1, mtData);
int z = (int)get_random_float(0, (float)img_depth - 1, mtData);
int w = (int)get_random_float(1, (float)(img_width - x), mtData);
int h = (int)get_random_float(1, (float)(img_height - y), mtData);
int d = (int)get_random_float(1, (float)(img_depth - z), mtData);
size_t input_pitch, input_slice_pitch;
int set_input_pitch = (int)(genrand_int32(mtData) & 0x01);
int packed_update = (int)(genrand_int32(mtData) & 0x01);
void *p, *outp;
int elem_size;
if (j == 3)
j = 0;
// packed: the source image for the write is a whole image .
// unpacked: the source image for the write is a subset within a larger image
switch (j)
{
case 0:
elem_size = 4;
if(packed_update)
{
p = generate_rgba8_image(w, h, d, mtData);
update_image_from_image(rgba8_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
}
else
{
update_rgba8_image(rgba8_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
p = (void *)(rgba8_inptr + ((z * img_slice + y * img_width + x) * 4));
}
outp = (void *)rgba8_outptr;
break;
case 1:
elem_size = 2*4;
if(packed_update)
{
p = generate_rgba16_image(w, h, d, mtData);
update_image_from_image(rgba16_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
}
else
{
update_rgba16_image(rgba16_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
p = (void *)(rgba16_inptr + ((z * img_slice + y * img_width + x) * 4));
}
outp = (void *)rgba16_outptr;
break;
case 2:
elem_size = 4*4;
if(packed_update)
{
p = generate_rgbafp_image(w, h, d, mtData);
update_image_from_image(rgbafp_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
}
else
{
update_rgbafp_image(rgbafp_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
p = (void *)(rgbafp_inptr + ((z * img_slice + y * img_width + x) * 4));
}
outp = (void *)rgbafp_outptr;
break;
}
const char* update_packed_pitch_name = "";
if(packed_update)
{
if(set_input_pitch)
{
// for packed updates the pitch does not need to be calculated here (but can be)
update_packed_pitch_name = "'packed with pitch'";
input_pitch = w*elem_size;
input_slice_pitch = w*h*elem_size;
}
else
{
// for packed updates the pitch does not need to be calculated here
update_packed_pitch_name = "'packed without pitch'";
input_pitch = 0;
input_slice_pitch = 0;
}
}
else
{
// for unpacked updates the pitch is required
update_packed_pitch_name = "'unpacked with pitch'";
input_pitch = img_width*elem_size;
input_slice_pitch = input_pitch*img_height;
}
size_t origin[3] = {x,y,z}, region[3] = {w, h, d};
err = clEnqueueWriteImage(queue, streams[j], CL_TRUE,
origin, region, input_pitch, input_slice_pitch,
p, 0, NULL, NULL);
test_error(err, "clEnqueueWriteImage failed");
if(packed_update)
{
free(p);
p = NULL;
}
memset(outp, 0x7, img_width*img_height*img_depth*elem_size);
origin[0]=0; origin[1]=0; origin[2]=0; region[0]=img_width; region[1]=img_height; region[2]=img_depth;
err = clEnqueueReadImage(queue, streams[j], CL_TRUE,
origin, region, 0, 0,
outp, 0, NULL, NULL);
test_error(err, "clEnqueueReadImage failed");
switch (j)
{
case 0:
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height, img_depth);
if (err)
{
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
}
break;
case 1:
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height, img_depth);
if (err)
{
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
}
break;
case 2:
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height, img_depth);
if (err)
{
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
}
break;
}
if (err)
break;
}
free_mtdata(mtData);
free(rgba8_inptr);
free(rgba16_inptr);
free(rgbafp_inptr);
free(rgba8_outptr);
free(rgba16_outptr);
free(rgbafp_outptr);
if (!err)
log_info("IMAGE read, write test passed\n");
return err;
}

View File

@@ -1,159 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *int2float_kernel_code =
"__kernel void test_int2float(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n";
int
verify_int2float(cl_int *inptr, cl_float *outptr, int n)
{
int i;
for (i=0; i<n; i++)
{
if (outptr[i] != (float)inptr[i])
{
log_error("INT2FLOAT test failed\n");
return -1;
}
}
log_info("INT2FLOAT test passed\n");
return 0;
}
int
test_int2float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_int *input_ptr;
cl_float *output_ptr;
cl_program program;
cl_kernel kernel;
void *values[2];
size_t lengths[1];
size_t threads[1];
int err;
int i;
MTdata d;
input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_int)get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
lengths[0] = strlen(int2float_kernel_code);
program = clCreateProgramWithSource(context, 1, &int2float_kernel_code, lengths, NULL);
if (!program)
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgramExecutable failed\n");
return -1;
}
kernel = clCreateKernel(program, "test_int2float", NULL);
if (!kernel)
{
log_error("clCreateKernel failed\n");
return -1;
}
values[0] = streams[0];
values[1] = streams[1];
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (size_t)num_elements;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_int2float(input_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -1,389 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *int_add_kernel_code =
"__kernel void test_int_add(__global int *srcA, __global int *srcB, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *int_sub_kernel_code =
"__kernel void test_int_sub(__global int *srcA, __global int *srcB, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *int_mul_kernel_code =
"__kernel void test_int_mul(__global int *srcA, __global int *srcB, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
const char *int_mad_kernel_code =
"__kernel void test_int_mad(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
"}\n";
static const float MAX_ERR = 1e-5f;
int
verify_int_add(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("INT_ADD int test failed\n");
return -1;
}
}
log_info("INT_ADD int test passed\n");
return 0;
}
int
verify_int_sub(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("INT_SUB int test failed\n");
return -1;
}
}
log_info("INT_SUB int test passed\n");
return 0;
}
int
verify_int_mul(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("INT_MUL int test failed\n");
return -1;
}
}
log_info("INT_MUL int test passed\n");
return 0;
}
int
verify_int_mad(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i] + inptrC[i];
if (r != outptr[i])
{
log_error("INT_MAD int test failed\n");
return -1;
}
}
log_info("INT_MAD int test passed\n");
return 0;
}
int
test_intmath_int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[4];
cl_kernel kernel[4];
cl_int *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_int) * num_elements;
input_ptr[0] = (cl_int*)malloc(length);
input_ptr[1] = (cl_int*)malloc(length);
input_ptr[2] = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
p = input_ptr[0];
for (i=0; i<num_elements; i++)
p[i] = (int)genrand_int32(d);
p = input_ptr[1];
for (i=0; i<num_elements; i++)
p[i] = (int)genrand_int32(d);
p = input_ptr[2];
for (i=0; i<num_elements; i++)
p[i] = (int)genrand_int32(d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
program[0] = clCreateProgramWithSource(context, 1, &int_add_kernel_code, NULL, NULL);
if (!program[0])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[0] = clCreateKernel(program[0], "test_int_add", NULL);
if (!kernel[0])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[1] = clCreateProgramWithSource(context, 1, &int_sub_kernel_code, NULL, NULL);
if (!program[1])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[1] = clCreateKernel(program[1], "test_int_sub", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[2] = clCreateProgramWithSource(context, 1, &int_mul_kernel_code, NULL, NULL);
if (!program[2])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[2] = clCreateKernel(program[2], "test_int_mul", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[3] = clCreateProgramWithSource(context, 1, &int_mad_kernel_code, NULL, NULL);
if (!program[3])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[3] = clCreateKernel(program[3], "test_int_mad", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_int_add(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_int_sub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_int_mul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 3:
err = verify_int_mad(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
break;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<4; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -1,388 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *int_add2_kernel_code =
"__kernel void test_int_add2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *int_sub2_kernel_code =
"__kernel void test_int_sub2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *int_mul2_kernel_code =
"__kernel void test_int_mul2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
const char *int_mad2_kernel_code =
"__kernel void test_int_mad2(__global int2 *srcA, __global int2 *srcB, __global int2 *srcC, __global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
"}\n";
int
verify_int_add2(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("INT_ADD int2 test failed\n");
return -1;
}
}
log_info("INT_ADD int2 test passed\n");
return 0;
}
int
verify_int_sub2(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("INT_SUB int2 test failed\n");
return -1;
}
}
log_info("INT_SUB int2 test passed\n");
return 0;
}
int
verify_int_mul2(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("INT_MUL int2 test failed\n");
return -1;
}
}
log_info("INT_MUL int2 test passed\n");
return 0;
}
int
verify_int_mad2(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i] + inptrC[i];
if (r != outptr[i])
{
log_error("INT_MAD int2 test failed\n");
return -1;
}
}
log_info("INT_MAD int2 test passed\n");
return 0;
}
int
test_intmath_int2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[4];
cl_kernel kernel[4];
cl_int *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_int) * 2 * num_elements;
input_ptr[0] = (cl_int*)malloc(length);
input_ptr[1] = (cl_int*)malloc(length);
input_ptr[2] = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
p = input_ptr[0];
for (i=0; i<num_elements*2; i++)
p[i] = (int)genrand_int32(d);
p = input_ptr[1];
for (i=0; i<num_elements*2; i++)
p[i] = (int)genrand_int32(d);
p = input_ptr[2];
for (i=0; i<num_elements*2; i++)
p[i] = (int)genrand_int32(d);
free_mtdata( d );
d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
program[0] = clCreateProgramWithSource(context, 1, &int_add2_kernel_code, NULL, NULL);
if (!program[0])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[0] = clCreateKernel(program[0], "test_int_add2", NULL);
if (!kernel[0])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[1] = clCreateProgramWithSource(context, 1, &int_sub2_kernel_code, NULL, NULL);
if (!program[1])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[1] = clCreateKernel(program[1], "test_int_sub2", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[2] = clCreateProgramWithSource(context, 1, &int_mul2_kernel_code, NULL, NULL);
if (!program[2])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[2] = clCreateKernel(program[2], "test_int_mul2", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[3] = clCreateProgramWithSource(context, 1, &int_mad2_kernel_code, NULL, NULL);
if (!program[3])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[3] = clCreateKernel(program[3], "test_int_mad2", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_int_add2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_int_sub2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_int_mul2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 3:
err = verify_int_mad2(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
break;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<4; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -1,387 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *int_add4_kernel_code =
"__kernel void test_int_add4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *int_sub4_kernel_code =
"__kernel void test_int_sub4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *int_mul4_kernel_code =
"__kernel void test_int_mul4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
const char *int_mad4_kernel_code =
"__kernel void test_int_mad4(__global int4 *srcA, __global int4 *srcB, __global int4 *srcC, __global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
"}\n";
int
verify_int_add4(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("INT_ADD int4 test failed\n");
return -1;
}
}
log_info("INT_ADD int4 test passed\n");
return 0;
}
int
verify_int_sub4(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("INT_SUB int4 test failed\n");
return -1;
}
}
log_info("INT_SUB int4 test passed\n");
return 0;
}
int
verify_int_mul4(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("INT_MUL int4 test failed\n");
return -1;
}
}
log_info("INT_MUL int4 test passed\n");
return 0;
}
int
verify_int_mad4(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i] + inptrC[i];
if (r != outptr[i])
{
log_error("INT_MAD int4 test failed\n");
return -1;
}
}
log_info("INT_MAD int4 test passed\n");
return 0;
}
int
test_intmath_int4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[4];
cl_kernel kernel[4];
cl_int *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_int) * 4 * num_elements;
input_ptr[0] = (cl_int*)malloc(length);
input_ptr[1] = (cl_int*)malloc(length);
input_ptr[2] = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
p = input_ptr[0];
for (i=0; i<num_elements*4; i++)
p[i] = (int)genrand_int32(d);
p = input_ptr[1];
for (i=0; i<num_elements*4; i++)
p[i] = (int)genrand_int32(d);
p = input_ptr[2];
for (i=0; i<num_elements*4; i++)
p[i] = (int)genrand_int32(d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
program[0] = clCreateProgramWithSource(context, 1, &int_add4_kernel_code, NULL, NULL);
if (!program[0])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[0] = clCreateKernel(program[0], "test_int_add4", NULL);
if (!kernel[0])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[1] = clCreateProgramWithSource(context, 1, &int_sub4_kernel_code, NULL, NULL);
if (!program[1])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[1] = clCreateKernel(program[1], "test_int_sub4", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[2] = clCreateProgramWithSource(context, 1, &int_mul4_kernel_code, NULL, NULL);
if (!program[2])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[2] = clCreateKernel(program[2], "test_int_mul4", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[3] = clCreateProgramWithSource(context, 1, &int_mad4_kernel_code, NULL, NULL);
if (!program[3])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[3] = clCreateKernel(program[3], "test_int_mad4", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_int_add4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_int_sub4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_int_mul4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 3:
err = verify_int_mad4(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
break;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<4; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -1,397 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *long_add_kernel_code =
"__kernel void test_long_add(__global long *srcA, __global long *srcB, __global long *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *long_sub_kernel_code =
"__kernel void test_long_sub(__global long *srcA, __global long *srcB, __global long *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *long_mul_kernel_code =
"__kernel void test_long_mul(__global long *srcA, __global long *srcB, __global long *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
const char *long_mad_kernel_code =
"__kernel void test_long_mad(__global long *srcA, __global long *srcB, __global long *srcC, __global long *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
"}\n";
static const float MAX_ERR = 1e-5f;
int
verify_long_add(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("LONG_ADD int test failed\n");
return -1;
}
}
log_info("LONG_ADD int test passed\n");
return 0;
}
int
verify_long_sub(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("LONG_SUB int test failed\n");
return -1;
}
}
log_info("LONG_SUB int test passed\n");
return 0;
}
int
verify_long_mul(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("LONG_MUL int test failed\n");
return -1;
}
}
log_info("LONG_MUL int test passed\n");
return 0;
}
int
verify_long_mad(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i] + inptrC[i];
if (r != outptr[i])
{
log_error("LONG_MAD int test failed\n");
return -1;
}
}
log_info("LONG_MAD int test passed\n");
return 0;
}
int
test_intmath_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[4];
cl_kernel kernel[4];
cl_long *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
if(! gHasLong )
{
log_info("64-bit integers are not supported by this device. Skipping test.\n");
return CL_SUCCESS;
}
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_long) * num_elements;
input_ptr[0] = (cl_long*)malloc(length);
input_ptr[1] = (cl_long*)malloc(length);
input_ptr[2] = (cl_long*)malloc(length);
output_ptr = (cl_long*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
p = input_ptr[0];
for (i=0; i<num_elements; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
p = input_ptr[1];
for (i=0; i<num_elements; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
p = input_ptr[2];
for (i=0; i<num_elements; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
free_mtdata(d);
d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
program[0] = clCreateProgramWithSource(context, 1, &long_add_kernel_code, NULL, NULL);
if (!program[0])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[0] = clCreateKernel(program[0], "test_long_add", NULL);
if (!kernel[0])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[1] = clCreateProgramWithSource(context, 1, &long_sub_kernel_code, NULL, NULL);
if (!program[1])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[1] = clCreateKernel(program[1], "test_long_sub", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[2] = clCreateProgramWithSource(context, 1, &long_mul_kernel_code, NULL, NULL);
if (!program[2])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[2] = clCreateKernel(program[2], "test_long_mul", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[3] = clCreateProgramWithSource(context, 1, &long_mad_kernel_code, NULL, NULL);
if (!program[3])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[3] = clCreateKernel(program[3], "test_long_mad", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_long_add(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_long_sub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_long_mul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 3:
err = verify_long_mad(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
break;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<4; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -1,395 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *long_add2_kernel_code =
"__kernel void test_long_add2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *long_sub2_kernel_code =
"__kernel void test_long_sub2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *long_mul2_kernel_code =
"__kernel void test_long_mul2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
const char *long_mad2_kernel_code =
"__kernel void test_long_mad2(__global long2 *srcA, __global long2 *srcB, __global long2 *srcC, __global long2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
"}\n";
int
verify_long_add2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("LONG_ADD long2 test failed\n");
return -1;
}
}
log_info("LONG_ADD long2 test passed\n");
return 0;
}
int
verify_long_sub2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("LONG_SUB long2 test failed\n");
return -1;
}
}
log_info("LONG_SUB long2 test passed\n");
return 0;
}
int
verify_long_mul2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("LONG_MUL long2 test failed\n");
return -1;
}
}
log_info("LONG_MUL long2 test passed\n");
return 0;
}
int
verify_long_mad2(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i] + inptrC[i];
if (r != outptr[i])
{
log_error("LONG_MAD long2 test failed\n");
return -1;
}
}
log_info("LONG_MAD long2 test passed\n");
return 0;
}
int
test_intmath_long2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[4];
cl_kernel kernel[4];
cl_long *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
if(! gHasLong)
{
log_info("64-bit integers are not supported in this device. Skipping test.\n");
return 0;
}
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_long) * 2* num_elements;
input_ptr[0] = (cl_long*)malloc(length);
input_ptr[1] = (cl_long*)malloc(length);
input_ptr[2] = (cl_long*)malloc(length);
output_ptr = (cl_long*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
p = input_ptr[0];
for (i=0; i<num_elements * 2; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
p = input_ptr[1];
for (i=0; i<num_elements * 2; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
p = input_ptr[2];
for (i=0; i<num_elements * 2; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
free_mtdata(d);
d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
program[0] = clCreateProgramWithSource(context, 1, &long_add2_kernel_code, NULL, NULL);
if (!program[0])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[0] = clCreateKernel(program[0], "test_long_add2", NULL);
if (!kernel[0])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[1] = clCreateProgramWithSource(context, 1, &long_sub2_kernel_code, NULL, NULL);
if (!program[1])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[1] = clCreateKernel(program[1], "test_long_sub2", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[2] = clCreateProgramWithSource(context, 1, &long_mul2_kernel_code, NULL, NULL);
if (!program[2])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[2] = clCreateKernel(program[2], "test_long_mul2", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[3] = clCreateProgramWithSource(context, 1, &long_mad2_kernel_code, NULL, NULL);
if (!program[3])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[3] = clCreateKernel(program[3], "test_long_mad2", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_long_add2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_long_sub2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_long_mul2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 3:
err = verify_long_mad2(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
break;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<4; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -1,395 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *long_add4_kernel_code =
"__kernel void test_long_add4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *long_sub4_kernel_code =
"__kernel void test_long_sub4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *long_mul4_kernel_code =
"__kernel void test_long_mul4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
const char *long_mad4_kernel_code =
"__kernel void test_long_mad4(__global long4 *srcA, __global long4 *srcB, __global long4 *srcC, __global long4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
"}\n";
int
verify_long_add4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("LONG_ADD long4 test failed\n");
return -1;
}
}
log_info("LONG_ADD long4 test passed\n");
return 0;
}
int
verify_long_sub4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("LONG_SUB long4 test failed\n");
return -1;
}
}
log_info("LONG_SUB long4 test passed\n");
return 0;
}
int
verify_long_mul4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("LONG_MUL long4 test failed\n");
return -1;
}
}
log_info("LONG_MUL long4 test passed\n");
return 0;
}
int
verify_long_mad4(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i] + inptrC[i];
if (r != outptr[i])
{
log_error("LONG_MAD long4 test failed\n");
return -1;
}
}
log_info("LONG_MAD long4 test passed\n");
return 0;
}
int
test_intmath_long4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[4];
cl_kernel kernel[4];
cl_long *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
if(! gHasLong )
{
log_info("64-bit integers are not supported by this device. Skipping test.\n");
return 0;
}
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_long) * 4 * num_elements;
input_ptr[0] = (cl_long*)malloc(length);
input_ptr[1] = (cl_long*)malloc(length);
input_ptr[2] = (cl_long*)malloc(length);
output_ptr = (cl_long*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
p = input_ptr[0];
for (i=0; i<num_elements * 4; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
p = input_ptr[1];
for (i=0; i<num_elements * 4; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
p = input_ptr[2];
for (i=0; i<num_elements * 4; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
free_mtdata(d);
d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
program[0] = clCreateProgramWithSource(context, 1, &long_add4_kernel_code, NULL, NULL);
if (!program[0])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[0] = clCreateKernel(program[0], "test_long_add4", NULL);
if (!kernel[0])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[1] = clCreateProgramWithSource(context, 1, &long_sub4_kernel_code, NULL, NULL);
if (!program[1])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[1] = clCreateKernel(program[1], "test_long_sub4", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[2] = clCreateProgramWithSource(context, 1, &long_mul4_kernel_code, NULL, NULL);
if (!program[2])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[2] = clCreateKernel(program[2], "test_long_mul4", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[3] = clCreateProgramWithSource(context, 1, &long_mad4_kernel_code, NULL, NULL);
if (!program[3])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[3] = clCreateKernel(program[3], "test_long_mad4", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_long_add4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_long_sub4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_long_mul4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 3:
err = verify_long_mad4(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
break;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<4; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -1,572 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _WIN32
#include <unistd.h>
#endif
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
#include "harness/errorHelpers.h"
// For global, local, and constant
const char *parameter_kernel_long =
"%s\n" // optional pragma
"kernel void test(global ulong *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
"{\n"
" results[0] = (ulong)&mem0[0];\n"
" results[1] = (ulong)&mem2[0];\n"
" results[2] = (ulong)&mem3[0];\n"
" results[3] = (ulong)&mem4[0];\n"
" results[4] = (ulong)&mem8[0];\n"
" results[5] = (ulong)&mem16[0];\n"
"}\n";
// For private and local
const char *local_kernel_long =
"%s\n" // optional pragma
"kernel void test(global ulong *results)\n"
"{\n"
" %s %s mem0[3];\n"
" %s %s2 mem2[3];\n"
" %s %s3 mem3[3];\n"
" %s %s4 mem4[3];\n"
" %s %s8 mem8[3];\n"
" %s %s16 mem16[3];\n"
" results[0] = (ulong)&mem0[0];\n"
" results[1] = (ulong)&mem2[0];\n"
" results[2] = (ulong)&mem3[0];\n"
" results[3] = (ulong)&mem4[0];\n"
" results[4] = (ulong)&mem8[0];\n"
" results[5] = (ulong)&mem16[0];\n"
"}\n";
// For constant
const char *constant_kernel_long =
"%s\n" // optional pragma
" constant %s mem0[3] = {0};\n"
" constant %s2 mem2[3] = {(%s2)(0)};\n"
" constant %s3 mem3[3] = {(%s3)(0)};\n"
" constant %s4 mem4[3] = {(%s4)(0)};\n"
" constant %s8 mem8[3] = {(%s8)(0)};\n"
" constant %s16 mem16[3] = {(%s16)(0)};\n"
"\n"
"kernel void test(global ulong *results)\n"
"{\n"
" results[0] = (ulong)&mem0;\n"
" results[1] = (ulong)&mem2;\n"
" results[2] = (ulong)&mem3;\n"
" results[3] = (ulong)&mem4;\n"
" results[4] = (ulong)&mem8;\n"
" results[5] = (ulong)&mem16;\n"
"}\n";
// For global, local, and constant
const char *parameter_kernel_no_long =
"%s\n" // optional pragma
"kernel void test(global uint *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
"{\n"
" results[0] = (uint)&mem0[0];\n"
" results[1] = (uint)&mem2[0];\n"
" results[2] = (uint)&mem3[0];\n"
" results[3] = (uint)&mem4[0];\n"
" results[4] = (uint)&mem8[0];\n"
" results[5] = (uint)&mem16[0];\n"
"}\n";
// For private and local
const char *local_kernel_no_long =
"%s\n" // optional pragma
"kernel void test(global uint *results)\n"
"{\n"
" %s %s mem0[3];\n"
" %s %s2 mem2[3];\n"
" %s %s3 mem3[3];\n"
" %s %s4 mem4[3];\n"
" %s %s8 mem8[3];\n"
" %s %s16 mem16[3];\n"
" results[0] = (uint)&mem0[0];\n"
" results[1] = (uint)&mem2[0];\n"
" results[2] = (uint)&mem3[0];\n"
" results[3] = (uint)&mem4[0];\n"
" results[4] = (uint)&mem8[0];\n"
" results[5] = (uint)&mem16[0];\n"
"}\n";
// For constant
const char *constant_kernel_no_long =
"%s\n" // optional pragma
" constant %s mem0[3] = {0};\n"
" constant %s2 mem2[3] = {(%s2)(0)};\n"
" constant %s3 mem3[3] = {(%s3)(0)};\n"
" constant %s4 mem4[3] = {(%s4)(0)};\n"
" constant %s8 mem8[3] = {(%s8)(0)};\n"
" constant %s16 mem16[3] = {(%s16)(0)};\n"
"\n"
"kernel void test(global uint *results)\n"
"{\n"
" results[0] = (uint)&mem0;\n"
" results[1] = (uint)&mem2;\n"
" results[2] = (uint)&mem3;\n"
" results[3] = (uint)&mem4;\n"
" results[4] = (uint)&mem8;\n"
" results[5] = (uint)&mem16;\n"
"}\n";
enum AddressSpaces
{
kGlobal = 0,
kLocal,
kConstant,
kPrivate
};
typedef enum AddressSpaces AddressSpaces;
#define DEBUG 0
const char * get_explicit_address_name( AddressSpaces address )
{
/* Quick method to avoid branching: make sure the following array matches the Enum order */
static const char *sExplicitAddressNames[] = { "global", "local", "constant", "private"};
return sExplicitAddressNames[ address ];
}
int test_kernel_memory_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, AddressSpaces address )
{
const char *constant_kernel;
const char *parameter_kernel;
const char *local_kernel;
if ( gHasLong )
{
constant_kernel = constant_kernel_long;
parameter_kernel = parameter_kernel_long;
local_kernel = local_kernel_long;
}
else
{
constant_kernel = constant_kernel_no_long;
parameter_kernel = parameter_kernel_no_long;
local_kernel = local_kernel_no_long;
}
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
char *kernel_code = (char*)malloc(4096);
cl_kernel kernel;
cl_program program;
int error;
int total_errors = 0;
cl_mem results;
cl_ulong *results_data;
cl_mem mem0, mem2, mem3, mem4, mem8, mem16;
results_data = (cl_ulong*)malloc(sizeof(cl_ulong)*6);
results = clCreateBuffer(context, 0, sizeof(cl_ulong)*6, NULL, &error);
test_error(error, "clCreateBuffer failed");
mem0 = clCreateBuffer(context, 0, sizeof(cl_long), NULL, &error);
test_error(error, "clCreateBuffer failed");
mem2 = clCreateBuffer(context, 0, sizeof(cl_long)*2, NULL, &error);
test_error(error, "clCreateBuffer failed");
mem3 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
test_error(error, "clCreateBuffer failed");
mem4 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
test_error(error, "clCreateBuffer failed");
mem8 = clCreateBuffer(context, 0, sizeof(cl_long)*8, NULL, &error);
test_error(error, "clCreateBuffer failed");
mem16 = clCreateBuffer(context, 0, sizeof(cl_long)*16, NULL, &error);
test_error(error, "clCreateBuffer failed");
// For each type
// Calculate alignment mask for each size
// For global, local, constant, private
// If global, local or constant -- do parameter_kernel
// If private or local -- do local_kernel
// If constant -- do constant kernel
int numConstantArgs;
clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(numConstantArgs), &numConstantArgs, NULL);
int typeIndex;
for (typeIndex = 0; typeIndex < 10; typeIndex++) {
// Skip double tests if we don't support doubles
if (vecType[typeIndex] == kDouble && !is_extension_available(device, "cl_khr_fp64")) {
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
continue;
}
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
continue;
log_info("Testing %s...\n", get_explicit_type_name(vecType[typeIndex]));
// Determine the expected alignment masks.
// E.g., if it is supposed to be 4 byte aligned, we should get 4-1=3 = ... 000011
// We can then and the returned address with that and we should have 0.
cl_ulong alignments[6];
alignments[0] = get_explicit_type_size(vecType[typeIndex])-1;
alignments[1] = (get_explicit_type_size(vecType[typeIndex])<<1)-1;
alignments[2] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
alignments[3] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
alignments[4] = (get_explicit_type_size(vecType[typeIndex])<<3)-1;
alignments[5] = (get_explicit_type_size(vecType[typeIndex])<<4)-1;
// Parameter kernel
if (address == kGlobal || address == kLocal || address == kConstant) {
log_info("\tTesting parameter kernel...\n");
if ( (gIsEmbedded) && (address == kConstant) && (numConstantArgs < 6)) {
sprintf(kernel_code, parameter_kernel,
vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
);
}
else {
sprintf(kernel_code, parameter_kernel,
vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
);
}
//printf("Kernel is: \n%s\n", kernel_code);
// Create the kernel
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
test_error(error, "create_single_kernel_helper failed");
// Initialize the results
memset(results_data, 0, sizeof(cl_long)*5);
error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*6, results_data, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
// Set the arguments
error = clSetKernelArg(kernel, 0, sizeof(results), &results);
test_error(error, "clSetKernelArg failed");
if (address != kLocal) {
error = clSetKernelArg(kernel, 1, sizeof(mem0), &mem0);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 2, sizeof(mem2), &mem2);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 3, sizeof(mem3), &mem3);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 4, sizeof(mem4), &mem4);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 5, sizeof(mem8), &mem8);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 6, sizeof(mem16), &mem16);
test_error(error, "clSetKernelArg failed");
} else {
error = clSetKernelArg(kernel, 1, get_explicit_type_size(vecType[typeIndex]), NULL);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 2, get_explicit_type_size(vecType[typeIndex])*2, NULL);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 3, get_explicit_type_size(vecType[typeIndex])*4, NULL);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 4, get_explicit_type_size(vecType[typeIndex])*4, NULL);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 5, get_explicit_type_size(vecType[typeIndex])*8, NULL);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 6, get_explicit_type_size(vecType[typeIndex])*16, NULL);
test_error(error, "clSetKernelArg failed");
}
// Enqueue the kernel
size_t global_size = 1;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*6, results_data, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
if (gHasLong) {
for (int i = 0; i < 6; i++) {
if ((results_data[i] & alignments[i]) != 0) {
total_errors++;
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
} else {
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
}
}
}
// Verify the results on devices that do not support longs
else {
cl_uint *results_data_no_long = (cl_uint *)results_data;
for (int i = 0; i < 6; i++) {
if ((results_data_no_long[i] & alignments[i]) != 0) {
total_errors++;
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
} else {
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
}
}
}
clReleaseKernel(kernel);
clReleaseProgram(program);
}
// Local kernel
if (address == kLocal || address == kPrivate) {
log_info("\tTesting local kernel...\n");
sprintf(kernel_code, local_kernel,
vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
);
//printf("Kernel is: \n%s\n", kernel_code);
// Create the kernel
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
test_error(error, "create_single_kernel_helper failed");
// Initialize the results
memset(results_data, 0, sizeof(cl_long)*5);
error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
// Set the arguments
error = clSetKernelArg(kernel, 0, sizeof(results), &results);
test_error(error, "clSetKernelArg failed");
// Enqueue the kernel
size_t global_size = 1;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
if (gHasLong) {
for (int i = 0; i < 5; i++) {
if ((results_data[i] & alignments[i]) != 0) {
total_errors++;
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
} else {
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
}
}
}
// Verify the results on devices that do not support longs
else {
cl_uint *results_data_no_long = (cl_uint *)results_data;
for (int i = 0; i < 5; i++) {
if ((results_data_no_long[i] & alignments[i]) != 0) {
total_errors++;
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
} else {
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
}
}
}
clReleaseKernel(kernel);
clReleaseProgram(program);
}
// Constant kernel
if (address == kConstant) {
log_info("\tTesting constant kernel...\n");
sprintf(kernel_code, constant_kernel,
vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex])
);
//printf("Kernel is: \n%s\n", kernel_code);
// Create the kernel
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
test_error(error, "create_single_kernel_helper failed");
// Initialize the results
memset(results_data, 0, sizeof(cl_long)*5);
error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
// Set the arguments
error = clSetKernelArg(kernel, 0, sizeof(results), &results);
test_error(error, "clSetKernelArg failed");
// Enqueue the kernel
size_t global_size = 1;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
if (gHasLong) {
for (int i = 0; i < 5; i++) {
if ((results_data[i] & alignments[i]) != 0) {
total_errors++;
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
} else {
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
}
}
}
// Verify the results on devices that do not support longs
else {
cl_uint *results_data_no_long = (cl_uint *)results_data;
for (int i = 0; i < 5; i++) {
if ((results_data_no_long[i] & alignments[i]) != 0) {
total_errors++;
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
} else {
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
}
}
}
clReleaseKernel(kernel);
clReleaseProgram(program);
}
}
clReleaseMemObject(results);
clReleaseMemObject(mem0);
clReleaseMemObject(mem2);
clReleaseMemObject(mem3);
clReleaseMemObject(mem4);
clReleaseMemObject(mem8);
clReleaseMemObject(mem16);
free( kernel_code );
free( results_data );
if (total_errors != 0)
return -1;
return 0;
}
int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
return test_kernel_memory_alignment( device, context, queue, n_elems, kLocal );
}
int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
return test_kernel_memory_alignment( device, context, queue, n_elems, kGlobal );
}
int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// There is a class of approved OpenCL 1.0 conformant devices out there that in some circumstances
// are unable to meaningfully take (or more precisely use) the address of constant data by virtue
// of limitations in their ISA design. This feature was not tested in 1.0, so they were declared
// conformant by Khronos. The failure is however caught here.
//
// Unfortunately, determining whether or not these devices are 1.0 conformant is not the jurisdiction
// of the 1.1 tests -- We can't fail them from 1.1 conformance here because they are not 1.1
// devices. They are merely 1.0 conformant devices that interop with 1.1 devices in a 1.1 platform.
// To add new binding tests now to conformant 1.0 devices would violate the workingroup requirement
// of no new tests for 1.0 devices. So certain allowances have to be made in intractable cases
// such as this one.
//
// There is some precedent. Similar allowances are made for other 1.0 hardware features such as
// local memory size. The minimum required local memory size grew from 16 kB to 32 kB in OpenCL 1.1.
// Detect 1.0 devices
// Get CL_DEVICE_VERSION size
size_t string_size = 0;
int err;
if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, 0, NULL, &string_size ) ) )
{
log_error( "FAILURE: Unable to get size of CL_DEVICE_VERSION string!" );
return -1;
}
//Allocate storage to hold the version string
char *version_string = (char*) malloc(string_size);
if( NULL == version_string )
{
log_error( "FAILURE: Unable to allocate memory to hold CL_DEVICE_VERSION string!" );
return -1;
}
// Get CL_DEVICE_VERSION string
if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, string_size, version_string, NULL ) ) )
{
log_error( "FAILURE: Unable to read CL_DEVICE_VERSION string!" );
return -1;
}
// easy out for 1.0 devices
const char *string_1_0 = "OpenCL 1.0 ";
if( 0 == strncmp( version_string, string_1_0, strlen(string_1_0)) )
{
log_info( "WARNING: Allowing device to escape testing of difficult constant memory alignment case.\n\tDevice is not a OpenCL 1.1 device. CL_DEVICE_VERSION: \"%s\"\n", version_string );
free(version_string);
return 0;
}
log_info( "Device version string: \"%s\"\n", version_string );
free(version_string);
// Everyone else is to be ground mercilessly under the wheels of progress
return test_kernel_memory_alignment( device, context, queue, n_elems, kConstant );
}
int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
return test_kernel_memory_alignment( device, context, queue, n_elems, kPrivate );
}

View File

@@ -1,372 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *barrier_with_localmem_kernel_code[] = {
"__kernel void compute_sum_with_localmem(__global int *a, int n, __local int *tmp_sum, __global int *sum)\n"
"{\n"
" int tid = get_local_id(0);\n"
" int lsize = get_local_size(0);\n"
" int i;\n"
"\n"
" tmp_sum[tid] = 0;\n"
" for (i=tid; i<n; i+=lsize)\n"
" tmp_sum[tid] += a[i];\n"
"\n"
" if( lsize == 1 )\n"
" {\n"
" if( tid == 0 )\n"
" *sum = tmp_sum[0];\n"
" return;\n"
" }\n"
"\n"
" do\n"
" {\n"
" barrier(CLK_LOCAL_MEM_FENCE);\n"
" if (tid < lsize/2)\n"
" {\n"
" int sum = tmp_sum[tid];\n"
" if( (lsize & 1) && tid == 0 )\n"
" sum += tmp_sum[tid + lsize - 1];\n"
" tmp_sum[tid] = sum + tmp_sum[tid + lsize/2];\n"
" }\n"
" lsize = lsize/2; \n"
" }while( lsize );\n"
"\n"
" if( tid == 0 )\n"
" *sum = tmp_sum[0];\n"
"}\n",
"__kernel void compute_sum_with_localmem(__global int *a, int n, __global int *sum)\n"
"{\n"
" __local int tmp_sum[%d];\n"
" int tid = get_local_id(0);\n"
" int lsize = get_local_size(0);\n"
" int i;\n"
"\n"
" tmp_sum[tid] = 0;\n"
" for (i=tid; i<n; i+=lsize)\n"
" tmp_sum[tid] += a[i];\n"
"\n"
" if( lsize == 1 )\n"
" {\n"
" if( tid == 0 )\n"
" *sum = tmp_sum[0];\n"
" return;\n"
" }\n"
"\n"
" do\n"
" {\n"
" barrier(CLK_LOCAL_MEM_FENCE);\n"
" if (tid < lsize/2)\n"
" {\n"
" int sum = tmp_sum[tid];\n"
" if( (lsize & 1) && tid == 0 )\n"
" sum += tmp_sum[tid + lsize - 1];\n"
" tmp_sum[tid] = sum + tmp_sum[tid + lsize/2];\n"
" }\n"
" lsize = lsize/2; \n"
" }while( lsize );\n"
"\n"
" if( tid == 0 )\n"
" *sum = tmp_sum[0];\n"
"}\n"
};
static int
verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
{
int r = 0;
int i;
for (i=0; i<n; i++)
{
r += inptr[i];
}
if (r != outptr[0])
{
log_error("LOCAL test failed: *%d vs %d\n", r, outptr[0] );
return -1;
}
log_info("LOCAL test passed\n");
return 0;
}
int test_local_arg_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_program program;
cl_kernel kernel;
cl_int *input_ptr, *output_ptr, *tmp_ptr;
size_t global_threads[1], local_threads[1];
size_t wgsize, kwgsize;
size_t max_local_workgroup_size[3];
int err, i;
MTdata d = init_genrand( gRandomSeed );
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL);
if (err) {
log_error("clGetDeviceInfo failed, %d\n\n", err);
return -1;
}
wgsize/=2;
if (wgsize < 1)
wgsize = 1;
size_t in_length = sizeof(cl_int) * num_elements;
size_t out_length = sizeof(cl_int) * wgsize;
input_ptr = (cl_int *)malloc(in_length);
output_ptr = (cl_int *)malloc(out_length);
tmp_ptr = (cl_int *)malloc(out_length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
for (i=0; i<num_elements; i++)
input_ptr[i] = (int)genrand_int32(d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, in_length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_with_localmem_kernel_code[0], "compute_sum_with_localmem" );
if (err)
return -1;
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL);
test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (kwgsize > max_local_workgroup_size[0])
kwgsize = max_local_workgroup_size[0];
// err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes);
err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
err |= clSetKernelArg(kernel, 2, wgsize * sizeof(cl_int), NULL);
err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
global_threads[0] = wgsize;
local_threads[0] = wgsize;
// Adjust the local thread size to fit and be a nice multiple.
if (kwgsize < wgsize) {
log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize);
local_threads[0] = kwgsize;
}
while (global_threads[0] % local_threads[0] != 0)
local_threads[0]--;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(tmp_ptr);
free(output_ptr);
return err;
}
int test_local_kernel_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_program program;
cl_kernel kernel;
cl_int *input_ptr, *output_ptr, *tmp_ptr;
size_t global_threads[1], local_threads[1];
size_t wgsize, kwgsize;
int err, i;
char *program_source = (char*)malloc(sizeof(char)*2048);
MTdata d = init_genrand( gRandomSeed );
size_t max_local_workgroup_size[3];
memset(program_source, 0, 2048);
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL);
if (err) {
log_error("clGetDeviceInfo failed, %d\n\n", err);
return -1;
}
wgsize/=2;
if (wgsize < 1)
wgsize = 1;
size_t in_length = sizeof(cl_int) * num_elements;
size_t out_length = sizeof(cl_int) * wgsize;
input_ptr = (cl_int *)malloc(in_length);
output_ptr = (cl_int *)malloc(out_length);
tmp_ptr = (cl_int *)malloc(out_length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_int) genrand_int32(d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, in_length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
// Validate that created kernel doesn't violate local memory size allowed by the device
cl_ulong localMemSize = 0;
err = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(localMemSize), &localMemSize, NULL);
if (err != CL_SUCCESS)
{
log_error("clGetDeviceInfo failed\n");
return -1;
}
if ( wgsize > (localMemSize / (sizeof(cl_int)*sizeof(cl_int))) )
{
wgsize = localMemSize / (sizeof(cl_int)*sizeof(cl_int));
}
sprintf(program_source, barrier_with_localmem_kernel_code[1], (int)(wgsize * sizeof(cl_int)));
err = create_single_kernel_helper(context, &program, &kernel, 1, (const char**)&program_source, "compute_sum_with_localmem" );
free(program_source);
if (err)
return -1;
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL);
test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (kwgsize > max_local_workgroup_size[0])
kwgsize = max_local_workgroup_size[0];
// err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes);
err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
err |= clSetKernelArg(kernel, 2, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
global_threads[0] = wgsize;
local_threads[0] = wgsize;
// Adjust the local thread size to fit and be a nice multiple.
if (kwgsize < wgsize) {
log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize);
local_threads[0] = kwgsize;
}
while (global_threads[0] % local_threads[0] != 0)
local_threads[0]--;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(tmp_ptr);
free(output_ptr);
return err;
}

View File

@@ -1,138 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#define MAX_LOCAL_STORAGE_SIZE 256
#define MAX_LOCAL_STORAGE_SIZE_STRING "256"
const char *kernelSource[] = {
"__kernel void test( __global unsigned int * input, __global unsigned int *outMaxes )\n"
"{\n"
" __local unsigned int localStorage[ " MAX_LOCAL_STORAGE_SIZE_STRING " ];\n"
" unsigned int theValue = input[ get_global_id( 0 ) ];\n"
"\n"
" // If we just write linearly, there's no verification that the items in a group share local data\n"
" // So we write reverse-linearly, which requires items to read the local data written by at least one\n"
" // different item\n"
" localStorage[ get_local_size( 0 ) - get_local_id( 0 ) - 1 ] = theValue;\n"
"\n"
" // The barrier ensures that all local items have written to the local storage\n"
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
" // Now we loop back through the local storage and look for the max value. We only do this if\n"
" // we're the first item in a group\n"
" unsigned int max = 0;\n"
" if( get_local_id( 0 ) == 0 )\n"
" {\n"
" for( size_t i = 0; i < get_local_size( 0 ); i++ )\n"
" {\n"
" if( localStorage[ i ] > max )\n"
" max = localStorage[ i ];\n"
" }\n"
" outMaxes[ get_group_id( 0 ) ] = max;\n"
" }\n"
"}\n"
};
int test_local_kernel_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
MTdata randSeed = init_genrand( gRandomSeed );
// Create a test kernel
error = create_single_kernel_helper( context, &program, &kernel, 1, kernelSource, "test" );
test_error( error, "Unable to create test kernel" );
// Determine an appropriate test size
size_t workGroupSize;
error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workGroupSize ), &workGroupSize, NULL );
test_error( error, "Unable to obtain kernel work group size" );
// Make sure the work group size doesn't overrun our local storage size in the kernel
while( workGroupSize > MAX_LOCAL_STORAGE_SIZE )
workGroupSize >>= 1;
size_t testSize = workGroupSize;
while( testSize < 1024 )
testSize += workGroupSize;
size_t numGroups = testSize / workGroupSize;
log_info( "\tTesting with %ld groups, %ld elements per group...\n", numGroups, workGroupSize );
// Create two buffers for operation
cl_uint *inputData = (cl_uint*)malloc( testSize * sizeof(cl_uint) );
generate_random_data( kUInt, testSize, randSeed, inputData );
free_mtdata( randSeed );
streams[ 0 ] = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, testSize * sizeof(cl_uint), inputData, &error );
test_error( error, "Unable to create input buffer" );
cl_uint *outputData = (cl_uint*)malloc( numGroups *sizeof(cl_uint) );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_WRITE_ONLY, numGroups * sizeof(cl_uint), NULL, &error );
test_error( error, "Unable to create output buffer" );
// Set up the kernel args and run
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel arg" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel arg" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &testSize, &workGroupSize, 0, NULL, NULL );
test_error( error, "Unable to enqueue kernel" );
// Read results and verify
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, numGroups * sizeof(cl_uint), outputData, 0, NULL, NULL );
test_error( error, "Unable to read output data" );
// MingW compiler seems to have a bug that otimizes the code below incorrectly.
// adding the volatile keyword to size_t decleration to avoid aggressive optimization by the compiler.
for( volatile size_t i = 0; i < numGroups; i++ )
{
// Determine the max in our case
cl_uint localMax = 0;
for( volatile size_t j = 0; j < workGroupSize; j++ )
{
if( inputData[ i * workGroupSize + j ] > localMax )
localMax = inputData[ i * workGroupSize + j ];
}
if( outputData[ i ] != localMax )
{
log_error( "ERROR: Local max validation failed! (expected %u, got %u for i=%lu)\n", localMax, outputData[ i ] , i );
free(inputData);
free(outputData);
return -1;
}
}
free(inputData);
free(outputData);
return 0;
}

View File

@@ -1,184 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *loop_kernel_code =
"__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" int n = get_global_size(0);\n"
" int i, j;\n"
"\n"
" dst[tid] = 0;\n"
" for (i=0,j=loopindx[tid]; i<loopcnt[tid]; i++,j++)\n"
" {\n"
" if (j >= n)\n"
" j = 0;\n"
" dst[tid] += src[j];\n"
" }\n"
"\n"
"}\n";
int
verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n)
{
int r, i, j, k;
for (i=0; i<n; i++)
{
r = 0;
for (j=0,k=loopindx[i]; j<loopcnt[i]; j++,k++)
{
if (k >= n)
k = 0;
r += inptr[k];
}
if (r != outptr[i])
{
log_error("LOOP test failed: %d found, expected %d\n", outptr[i], r);
return -1;
}
}
log_info("LOOP test passed\n");
return 0;
}
int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_int *input_ptr, *loop_indx, *loop_cnt, *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[1];
int err, i;
size_t length = sizeof(cl_int) * num_elements;
input_ptr = (cl_int*)malloc(length);
loop_indx = (cl_int*)malloc(length);
loop_cnt = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
MTdata d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
{
input_ptr[i] = (int)genrand_int32(d);
loop_indx[i] = (int)get_random_float(0, num_elements-1, d);
loop_cnt[i] = (int)get_random_float(0, num_elements/32, d);
}
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, loop_indx, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, loop_cnt, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &loop_kernel_code, "test_loop" );
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadArray failed\n");
return -1;
}
err = verify_loop(input_ptr, loop_indx, loop_cnt, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(loop_indx);
free(loop_cnt);
free(output_ptr);
return err;
}

View File

@@ -1,140 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *pointer_cast_kernel_code =
"__kernel void test_pointer_cast(__global unsigned char *src, __global unsigned int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" __global unsigned int *p = (__global unsigned int *)src;\n"
"\n"
" dst[tid] = p[tid];\n"
"\n"
"}\n";
int
verify_pointer_cast(unsigned char *inptr, unsigned int *outptr, int n)
{
unsigned int *p = (unsigned int *)inptr;
int i;
cl_uint r;
for (i=0; i<n; i++)
{
r = p[i];
if (r != outptr[i])
{
log_error("POINTER_CAST test failed\n");
return -1;
}
}
log_info("POINTER_CAST test passed\n");
return 0;
}
int test_pointer_cast(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
unsigned char *input_ptr;
unsigned int *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(int) * num_elements;
input_ptr = (unsigned char*)malloc(length);
output_ptr = (unsigned int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
for (i=0; i<num_elements*4; i++)
input_ptr[i] = (unsigned char)genrand_int32(d);
free_mtdata(d);
d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &pointer_cast_kernel_code, "test_pointer_cast" );
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadArray failed\n");
return -1;
}
err = verify_pointer_cast(input_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -1,97 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
static const char *sample_kernel = {
"%s\n" // optional pragma string
"__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst[tid] = src[tid];\n"
"\n"
"}\n"
};
int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
int vec_type_index, vec_size_index;
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
const char *size_names[] = {"", "2", "4", "8", "16"};
char *program_source;
program_source = (char*)malloc(sizeof(char)*4096);
for (vec_type_index=0; vec_type_index<10; vec_type_index++) {
if (vecType[vec_type_index] == kDouble) {
if (!is_extension_available(deviceID, "cl_khr_fp64")) {
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
continue;
}
log_info("Testing doubles.\n");
}
for (vec_size_index=0; vec_size_index<5; vec_size_index++) {
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper in, out;
size_t global[] = {1,1,1};
log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
program_source[0] = '\0';
sprintf(program_source, sample_kernel,
(vecType[vec_type_index] == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&program_source, "sample_test" );
if( error != 0 )
return error;
in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*10, NULL, &error);
test_error(error, "clCreateBuffer failed");
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*10, NULL, &error);
test_error(error, "clCreateBuffer failed");
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
}
}
free(program_source);
return 0;
}

View File

@@ -1,985 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
#include "harness/errorHelpers.h"
// Outputs debug information for stores
#define DEBUG 0
// Forces stores/loads to be done with offsets = tid
#define LINEAR_OFFSETS 0
#define NUM_LOADS 512
static const char *doubleExtensionPragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
#pragma mark -------------------- vload harness --------------------------
typedef void (*create_vload_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize );
int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
create_vload_program_fn createFn, size_t bufferSize, MTdata d )
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 4 ];
const size_t numLoads = (DEBUG) ? 16 : NUM_LOADS;
if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128;
size_t threads[ 1 ], localThreads[ 1 ];
clProtectedArray inBuffer( bufferSize );
char programSrc[ 10240 ];
cl_uint offsets[ numLoads ], alignmentOffsets[ numLoads ];
size_t numElements, typeSize, i;
unsigned int outVectorSize;
typeSize = get_explicit_type_size( type );
numElements = bufferSize / ( typeSize * vecSize );
bufferSize = numElements * typeSize * vecSize; // To account for rounding
if (DEBUG) log_info("Testing: numLoads: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numLoads, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
// Create some random input data and random offsets to load from
generate_random_data( type, numElements * vecSize, d, (void *)inBuffer );
for( i = 0; i < numLoads; i++ )
{
offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 1, d );
if( offsets[ i ] < numElements - 2 )
alignmentOffsets[ i ] = (cl_uint)random_in_range( 0, (int)vecSize - 1, d );
else
alignmentOffsets[ i ] = 0;
if (LINEAR_OFFSETS) offsets[i] = (cl_uint)i;
}
if (LINEAR_OFFSETS) log_info("Offsets set to thread IDs to simplify output.\n");
// 32-bit fixup
outVectorSize = vecSize;
// Declare output buffers now
#if !(defined(_WIN32) && defined(_MSC_VER))
char outBuffer[ numLoads * typeSize * outVectorSize ];
char referenceBuffer[ numLoads * typeSize * vecSize ];
#else
char* outBuffer = (char*)_malloca(numLoads * typeSize * outVectorSize * sizeof(cl_char));
char* referenceBuffer = (char*)_malloca(numLoads * typeSize * vecSize * sizeof(cl_char));
#endif
// Create the program
createFn( programSrc, numElements, type, vecSize, outVectorSize);
// Create our kernel
const char *ptr = programSrc;
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
// Get the number of args to differentiate the kernels with local storage. (They have 5)
cl_uint numArgs;
error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
test_error( error, "clGetKernelInfo failed");
// Set up parameters
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, bufferSize, (void *)inBuffer, &error );
test_error( error, "Unable to create kernel stream" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(offsets[0]), offsets, &error );
test_error( error, "Unable to create kernel stream" );
streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(alignmentOffsets[0]), alignmentOffsets, &error );
test_error( error, "Unable to create kernel stream" );
streams[ 3 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*typeSize*outVectorSize, (void *)outBuffer, &error );
test_error( error, "Unable to create kernel stream" );
// Set parameters and run
if (numArgs == 5) {
// We need to set the size of the local storage
error = clSetKernelArg(kernel, 0, bufferSize, NULL);
test_error( error, "clSetKernelArg for buffer failed");
for( i = 0; i < 4; i++ )
{
error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
test_error( error, "Unable to set kernel argument" );
}
} else {
// No local storage
for( i = 0; i < 4; i++ )
{
error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
test_error( error, "Unable to set kernel argument" );
}
}
threads[ 0 ] = numLoads;
error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
test_error( error, "Unable to get local thread size" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to exec kernel" );
// Get the results
error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, numLoads * typeSize * outVectorSize * sizeof(cl_char), (void *)outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Create the reference results
memset( referenceBuffer, 0, numLoads * typeSize * vecSize * sizeof(cl_char));
for( i = 0; i < numLoads; i++ )
{
memcpy( referenceBuffer + i * typeSize * vecSize, ( (char *)(void *)inBuffer ) + ( ( offsets[ i ] * vecSize ) + alignmentOffsets[ i ] ) * typeSize,
typeSize * vecSize );
}
// Validate the results now
char *expected = referenceBuffer;
char *actual = outBuffer;
char *in = (char *)(void *)inBuffer;
if (DEBUG) {
log_info("Memory contents:\n");
for (i=0; i<numElements; i++) {
char inString[1024];
char expectedString[ 1024 ], actualString[ 1024 ];
if (i < numLoads) {
log_info("buffer %3d: input: %s expected: %s got: %s (load offset %3d, alignment offset %3d)", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
GetDataVectorString( &(actual[i*typeSize*outVectorSize]), typeSize, vecSize, actualString ),
offsets[i], alignmentOffsets[i]);
if (memcmp(&(expected[i*typeSize*vecSize]), &(actual[i*typeSize*outVectorSize]), typeSize * vecSize) != 0)
log_error(" << ERROR\n");
else
log_info("\n");
} else {
log_info("buffer %3d: input: %s expected: %s got: %s\n", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
GetDataVectorString( &(actual[i*typeSize*outVectorSize]), typeSize, vecSize, actualString ));
}
}
}
for( i = 0; i < numLoads; i++ )
{
if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
{
char expectedString[ 1024 ], actualString[ 1024 ];
log_error( "ERROR: Data sample %d for vload of %s%d did not validate (expected {%s}, got {%s}, loaded from offset %d)\n",
(int)i, get_explicit_type_name( type ), vecSize, GetDataVectorString( expected, typeSize, vecSize, expectedString ),
GetDataVectorString( actual, typeSize, vecSize, actualString ), (int)offsets[ i ] );
return 1;
}
expected += typeSize * vecSize;
actual += typeSize * outVectorSize;
}
return 0;
}
int test_vloadset(cl_device_id device, cl_context context, cl_command_queue queue, create_vload_program_fn createFn, size_t bufferSize )
{
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
const char *size_names[] = { "2", "3", "4", "8", "16"};
unsigned int typeIdx, sizeIdx;
int error = 0;
MTdata mtData = init_genrand( gRandomSeed );
log_info("Testing with buffer size of %d.\n", (int)bufferSize);
for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
{
if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
continue;
if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
continue;
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
{
log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
int error_this_type = test_vload( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, mtData );
if (error_this_type) {
error += error_this_type;
log_error("Failure; skipping further sizes for this type.");
break;
}
}
}
free_mtdata(mtData);
return error;
}
#pragma mark -------------------- vload test cases --------------------------
void create_global_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
{
const char *pattern =
"%s%s"
"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%d tmp = vload%d( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
" results[ tid ] = tmp;\n"
"}\n";
const char *patternV3 =
"%s%s"
"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s3 tmp = vload3( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
" results[ 3*tid ] = tmp.s0;\n"
" results[ 3*tid+1 ] = tmp.s1;\n"
" results[ 3*tid+2 ] = tmp.s2;\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, typeName, typeName, typeName );
} else {
sprintf( destBuffer, pattern, type == kDouble ? doubleExtensionPragma : "",
"",
typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
(int)inVectorSize, typeName );
}
}
int test_vload_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
return test_vloadset( device, context, queue, create_global_load_code, 10240 );
}
void create_local_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
{
const char *pattern =
"%s%s"
//" __local %s%d sSharedStorage[ %d ];\n"
"__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" int lid = get_local_id( 0 );\n"
"\n"
" if( lid == 0 )\n"
" {\n"
" for( int i = 0; i < %d; i++ )\n"
" sSharedStorage[ i ] = src[ i ];\n"
" }\n"
// Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
// threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
// updated on all threads at that point
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
" %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
" results[ tid ] = tmp;\n"
"}\n";
const char *patternV3 =
"%s%s"
//" __local %s%d sSharedStorage[ %d ];\n"
"__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" int lid = get_local_id( 0 );\n"
"\n"
" if( lid == 0 )\n"
" {\n"
" for( int i = 0; i < %d; i++ ) {\n"
" sSharedStorage[ 3*i ] = src[ 3*i ];\n"
" sSharedStorage[ 3*i +1] = src[ 3*i +1];\n"
" sSharedStorage[ 3*i +2] = src[ 3*i +2];\n"
" }\n"
" }\n"
// Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
// threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
// updated on all threads at that point
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
" %s3 tmp = vload3( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
" results[ 3*tid ] = tmp.s0;\n"
" results[ 3*tid +1] = tmp.s1;\n"
" results[ 3*tid +2] = tmp.s2;\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, /*(int)inBufferSize,*/
typeName, typeName,
(int)inBufferSize,
typeName, typeName );
} else {
sprintf( destBuffer, pattern,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, (int)inVectorSize, /*(int)inBufferSize,*/
typeName, (int)inVectorSize, typeName, (int)outVectorSize,
(int)inBufferSize,
typeName, (int)inVectorSize, (int)inVectorSize, typeName );
}
}
int test_vload_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// Determine the max size of a local buffer that we can test against
cl_ulong localSize;
int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
test_error( error, "Unable to get max size of local memory buffer" );
if( localSize > 10240 )
localSize = 10240;
if (localSize > 4096)
localSize -= 2048;
else
localSize /= 2;
return test_vloadset( device, context, queue, create_local_load_code, (size_t)localSize );
}
void create_constant_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
{
const char *pattern =
"%s%s"
"__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%d tmp = vload%d( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
" results[ tid ] = tmp;\n"
"}\n";
const char *patternV3 =
"%s%s"
"__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s3 tmp = vload3( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
" results[ 3*tid ] = tmp.s0;\n"
" results[ 3*tid+1 ] = tmp.s1;\n"
" results[ 3*tid+2 ] = tmp.s2;\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, typeName, typeName,
typeName );
} else {
sprintf( destBuffer, pattern,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
(int)inVectorSize, typeName );
}
}
int test_vload_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// Determine the max size of a local buffer that we can test against
cl_ulong maxSize;
int error = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, NULL );
test_error( error, "Unable to get max size of constant memory buffer" );
if( maxSize > 10240 )
maxSize = 10240;
if (maxSize > 4096)
maxSize -= 2048;
else
maxSize /= 2;
return test_vloadset( device, context, queue, create_constant_load_code, (size_t)maxSize );
}
void create_private_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
{
const char *pattern =
"%s%s"
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
"#define PRIV_TYPE %s%d\n"
"#define PRIV_SIZE %d\n"
"__kernel void test_fn( __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
"{\n"
" __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
" int tid = get_global_id( 0 );\n"
"\n"
" for( int i = 0; i < %d; i++ )\n"
" sPrivateStorage[ i ] = src[ i ];\n"
// Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
// anybody else to sync up
"\n"
" %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
" results[ tid ] = tmp;\n"
"}\n";
const char *patternV3 =
"%s%s"
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
"#define PRIV_TYPE %s\n"
"#define PRIV_SIZE %d\n"
"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
"{\n"
" __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
" int tid = get_global_id( 0 );\n"
"\n"
" for( int i = 0; i < PRIV_SIZE; i++ )\n"
" {\n"
" sPrivateStorage[ i ] = src[ i ];\n"
" }\n"
// Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
// anybody else to sync up
"\n"
" %s3 tmp = vload3( offsets[ tid ], ( sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
" results[ 3*tid ] = tmp.s0;\n"
" results[ 3*tid+1 ] = tmp.s1;\n"
" results[ 3*tid+2 ] = tmp.s2;\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize ==3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, 3*((int)inBufferSize),
typeName, typeName,
typeName );
// log_info("Src is \"\n%s\n\"\n", destBuffer);
} else {
sprintf( destBuffer, pattern,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, (int)inVectorSize, (int)inBufferSize,
typeName, (int)inVectorSize, typeName, (int)outVectorSize,
(int)inBufferSize,
typeName, (int)inVectorSize, (int)inVectorSize, typeName );
}
}
int test_vload_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// We have no idea how much actual private storage is available, so just pick a reasonable value,
// which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
return test_vloadset( device, context, queue, create_private_load_code, 256 );
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#pragma mark -------------------- vstore harness --------------------------
typedef void (*create_vstore_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize );
int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
create_vstore_program_fn createFn, size_t bufferSize, MTdata d )
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 3 ];
size_t threads[ 1 ], localThreads[ 1 ];
size_t numElements, typeSize, numStores = (DEBUG) ? 16 : NUM_LOADS;
if (DEBUG)
bufferSize = (bufferSize < 128) ? bufferSize : 128;
typeSize = get_explicit_type_size( type );
numElements = bufferSize / ( typeSize * vecSize );
bufferSize = numElements * typeSize * vecSize; // To account for rounding
if( numStores > numElements * 2 / 3 )
{
// Note: unlike load, we have to restrict the # of stores here, since all offsets must be unique for our test
// (Plus, we leave some room for extra values to make sure didn't get written)
numStores = numElements * 2 / 3;
if( numStores < 1 )
numStores = 1;
}
if (DEBUG)
log_info("Testing: numStores: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numStores, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
#if !(defined(_WIN32) && defined(_MSC_VER))
cl_uint offsets[ numStores ];
#else
cl_uint* offsets = (cl_uint*)_malloca(numStores * sizeof(cl_uint));
#endif
char programSrc[ 10240 ];
size_t i;
#if !(defined(_WIN32) && defined(_MSC_VER))
char inBuffer[ numStores * typeSize * vecSize ];
#else
char* inBuffer = (char*)_malloca( numStores * typeSize * vecSize * sizeof(cl_char));
#endif
clProtectedArray outBuffer( numElements * typeSize * vecSize );
#if !(defined(_WIN32) && defined(_MSC_VER))
char referenceBuffer[ numElements * typeSize * vecSize ];
#else
char* referenceBuffer = (char*)_malloca(numElements * typeSize * vecSize * sizeof(cl_char));
#endif
// Create some random input data and random offsets to load from
generate_random_data( type, numStores * vecSize, d, (void *)inBuffer );
// Note: make sure no two offsets are the same, otherwise the output would depend on
// the order that threads ran in, and that would be next to impossible to verify
#if !(defined(_WIN32) && defined(_MSC_VER))
char flags[ numElements ];
#else
char* flags = (char*)_malloca( numElements * sizeof(char));
#endif
memset( flags, 0, numElements * sizeof(char) );
for( i = 0; i < numStores; i++ )
{
do
{
offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 2, d ); // Note: keep it one vec below the end for offset testing
} while( flags[ offsets[ i ] ] != 0 );
flags[ offsets[ i ] ] = -1;
if (LINEAR_OFFSETS)
offsets[i] = (int)i;
}
if (LINEAR_OFFSETS)
log_info("Offsets set to thread IDs to simplify output.\n");
createFn( programSrc, numElements, type, vecSize );
// Create our kernel
const char *ptr = programSrc;
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
// Get the number of args to differentiate the kernels with local storage. (They have 5)
cl_uint numArgs;
error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
test_error( error, "clGetKernelInfo failed");
// Set up parameters
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * typeSize * vecSize * sizeof(cl_char), (void *)inBuffer, &error );
test_error( error, "Unable to create kernel stream" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * sizeof(cl_uint), offsets, &error );
test_error( error, "Unable to create kernel stream" );
streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numElements * typeSize * vecSize, (void *)outBuffer, &error );
test_error( error, "Unable to create kernel stream" );
// Set parameters and run
if (numArgs == 5)
{
// We need to set the size of the local storage
error = clSetKernelArg(kernel, 0, bufferSize, NULL);
test_error( error, "clSetKernelArg for buffer failed");
for( i = 0; i < 3; i++ )
{
error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
test_error( error, "Unable to set kernel argument" );
}
}
else
{
// No local storage
for( i = 0; i < 3; i++ )
{
error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
if (error)
log_info("%s\n", programSrc);
test_error( error, "Unable to set kernel argument" );
}
}
threads[ 0 ] = numStores;
error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
test_error( error, "Unable to get local thread size" );
// Run in a loop, changing the address offset from 0 to ( vecSize - 1 ) each time, since
// otherwise stores might overlap each other, and it'd be a nightmare to test!
for( cl_uint addressOffset = 0; addressOffset < vecSize; addressOffset++ )
{
if (DEBUG)
log_info("\tstore addressOffset is %d, executing with threads %d\n", addressOffset, (int)threads[0]);
// Clear the results first
memset( outBuffer, 0, numElements * typeSize * vecSize );
error = clEnqueueWriteBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
test_error( error, "Unable to erase result stream" );
// Set up the new offset and run
if (numArgs == 5)
error = clSetKernelArg( kernel, 3+1, sizeof( cl_uint ), &addressOffset );
else
error = clSetKernelArg( kernel, 3, sizeof( cl_uint ), &addressOffset );
test_error( error, "Unable to set address offset argument" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to exec kernel" );
// Get the results
error = clEnqueueReadBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Create the reference results
memset( referenceBuffer, 0, numElements * typeSize * vecSize * sizeof(cl_char) );
for( i = 0; i < numStores; i++ )
{
memcpy( referenceBuffer + ( ( offsets[ i ] * vecSize ) + addressOffset ) * typeSize, inBuffer + i * typeSize * vecSize, typeSize * vecSize );
}
// Validate the results now
char *expected = referenceBuffer;
char *actual = (char *)(void *)outBuffer;
if (DEBUG)
{
log_info("Memory contents:\n");
for (i=0; i<numElements; i++)
{
char inString[1024];
char expectedString[ 1024 ], actualString[ 1024 ];
if (i < numStores)
{
log_info("buffer %3d: input: %s expected: %s got: %s (store offset %3d)", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
GetDataVectorString( &(actual[i*typeSize*vecSize]), typeSize, vecSize, actualString ),
offsets[i]);
if (memcmp(&(expected[i*typeSize*vecSize]), &(actual[i*typeSize*vecSize]), typeSize * vecSize) != 0)
log_error(" << ERROR\n");
else
log_info("\n");
}
else
{
log_info("buffer %3d: input: %s expected: %s got: %s\n", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
GetDataVectorString( &(actual[i*typeSize*vecSize]), typeSize, vecSize, actualString ));
}
}
}
for( i = 0; i < numElements; i++ )
{
if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
{
char expectedString[ 1024 ], actualString[ 1024 ];
log_error( "ERROR: Data sample %d for vstore of %s%d did not validate (expected {%s}, got {%s}",
(int)i, get_explicit_type_name( type ), vecSize, GetDataVectorString( expected, typeSize, vecSize, expectedString ),
GetDataVectorString( actual, typeSize, vecSize, actualString ) );
size_t j;
for( j = 0; j < numStores; j++ )
{
if( offsets[ j ] == (cl_uint)i )
{
log_error( ", stored from store #%d (of %d, offset = %d) with address offset of %d", (int)j, (int)numStores, offsets[j], (int)addressOffset );
break;
}
}
if( j == numStores )
log_error( ", supposed to be canary value" );
log_error( ")\n" );
return 1;
}
expected += typeSize * vecSize;
actual += typeSize * vecSize;
}
}
return 0;
}
int test_vstoreset(cl_device_id device, cl_context context, cl_command_queue queue, create_vstore_program_fn createFn, size_t bufferSize )
{
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
const char *size_names[] = { "2", "3", "4", "8", "16"};
unsigned int typeIdx, sizeIdx;
int error = 0;
MTdata d = init_genrand( gRandomSeed );
log_info("Testing with buffer size of %d.\n", (int)bufferSize);
for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
{
if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
continue;
if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
continue;
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
{
log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
int error_this_type = test_vstore( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, d );
if (error_this_type)
{
log_error("Failure; skipping further sizes for this type.\n");
error += error_this_type;
break;
}
}
}
free_mtdata(d);
return error;
}
#pragma mark -------------------- vstore test cases --------------------------
void create_global_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
{
const char *pattern =
"%s"
"__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" vstore%d( srcValues[ tid ], offsets[ tid ], destBuffer + alignmentOffset );\n"
"}\n";
const char *patternV3 =
"%s"
"__kernel void test_fn( __global %s3 *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" if((tid&3) == 0) { // if \"tid\" is a multiple of 4 \n"
" vstore3( srcValues[ 3*(tid>>2) ], offsets[ tid ], destBuffer + alignmentOffset );\n"
" } else {\n"
" vstore3( vload3(tid, (__global %s *)srcValues), offsets[ tid ], destBuffer + alignmentOffset );\n"
" }\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
typeName, typeName, typeName);
} else {
sprintf( destBuffer, pattern,
type == kDouble ? doubleExtensionPragma : "",
typeName, (int)inVectorSize, typeName, (int)inVectorSize );
}
// if(inVectorSize == 3 || inVectorSize == 4) {
// log_info("\n----\n%s\n----\n", destBuffer);
// }
}
int test_vstore_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
return test_vstoreset( device, context, queue, create_global_store_code, 10240 );
}
void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
{
const char *pattern =
"%s"
"\n"
"__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
" sSharedStorage[ offsets[tid] ] = (%s%d)(%s)0;\n"
" sSharedStorage[ offsets[tid] +1 ] = sSharedStorage[ offsets[tid] ];\n"
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
" vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local %s *)sSharedStorage ) + alignmentOffset );\n"
"\n"
// Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
// buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
// otherwise, local threads would be overwriting results from other local threads
" int i;\n"
" __local %s *sp = (__local %s*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n"
" __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
" for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ ) \n"
" dp[i] = sp[i];\n"
"}\n";
const char *patternV3 =
"%s"
"\n"
"__kernel void test_fn(__local %s *sSharedStorage, __global %s *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
" sSharedStorage[ 3*offsets[tid] ] = (%s)0;\n"
" sSharedStorage[ 3*offsets[tid] +1 ] = \n"
" sSharedStorage[ 3*offsets[tid] ];\n"
" sSharedStorage[ 3*offsets[tid] +2 ] = \n"
" sSharedStorage[ 3*offsets[tid]];\n"
" sSharedStorage[ 3*offsets[tid] +3 ] = \n"
" sSharedStorage[ 3*offsets[tid]];\n"
" sSharedStorage[ 3*offsets[tid] +4 ] = \n"
" sSharedStorage[ 3*offsets[tid] ];\n"
" sSharedStorage[ 3*offsets[tid] +5 ] = \n"
" sSharedStorage[ 3*offsets[tid]];\n"
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
" vstore3( vload3(tid,srcValues), offsets[ tid ], sSharedStorage + alignmentOffset );\n"
"\n"
// Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
// buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
// otherwise, local threads would be overwriting results from other local threads
" int i;\n"
" __local %s *sp = (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n"
" __global %s *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n"
" for( i = 0; i < 3; i++ ) \n"
" dp[i] = sp[i];\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
typeName,
typeName,
typeName, typeName,
typeName, typeName, typeName );
} else {
sprintf( destBuffer, pattern,
type == kDouble ? doubleExtensionPragma : "",
typeName, (int)inVectorSize,
typeName, (int)inVectorSize, typeName, (int)inVectorSize,
typeName, (int)inVectorSize, typeName,
(int)inVectorSize, typeName, typeName,
typeName, typeName, typeName );
}
// log_info(destBuffer);
}
int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// Determine the max size of a local buffer that we can test against
cl_ulong localSize;
int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
test_error( error, "Unable to get max size of local memory buffer" );
if( localSize > 10240 )
localSize = 10240;
if (localSize > 4096)
localSize -= 2048;
else
localSize /= 2;
return test_vstoreset( device, context, queue, create_local_store_code, (size_t)localSize );
}
void create_private_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
{
const char *pattern =
"%s"
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
"\n"
"__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
"{\n"
" __private %s%d sPrivateStorage[ %d ];\n"
" int tid = get_global_id( 0 );\n"
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
" sPrivateStorage[tid] = (%s%d)(%s)0;\n"
"\n"
" vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
"\n"
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
// otherwise, local threads would be overwriting results from other local threads
" uint i;\n"
" __private %s *sp = (__private %s*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n"
" __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
" for( i = 0; i < sizeof( sPrivateStorage[0]) / sizeof( *sp ); i++ ) \n"
" dp[i] = sp[i];\n"
"}\n";
const char *patternV3 =
"%s"
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
"\n"
"__kernel void test_fn( __global %s *srcValues, __global uint *offsets, __global %s3 *destBuffer, uint alignmentOffset )\n"
"{\n"
" __private %s3 sPrivateStorage[ %d ];\n" // keep this %d
" int tid = get_global_id( 0 );\n"
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
" sPrivateStorage[tid] = (%s3)(%s)0;\n"
"\n"
" vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
"\n"
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
// otherwise, local threads would be overwriting results from other local threads
" uint i;\n"
" __private %s *sp = ((__private %s*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n"
" __global %s *dp = ((__global %s*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n"
" for( i = 0; i < 3; i++ ) \n"
" dp[i] = sp[i];\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
typeName, typeName,
typeName, (int)inBufferSize,
typeName, typeName,
typeName, typeName, typeName, typeName, typeName );
} else {
sprintf( destBuffer, pattern,
type == kDouble ? doubleExtensionPragma : "",
typeName, (int)inVectorSize, typeName, (int)inVectorSize,
typeName, (int)inVectorSize, (int)inBufferSize,
typeName, (int)inVectorSize, typeName,
(int)inVectorSize, typeName, typeName, typeName, typeName, typeName );
}
}
int test_vstore_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// We have no idea how much actual private storage is available, so just pick a reasonable value,
// which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
return test_vstoreset( device, context, queue, create_private_store_code, 256 );
}

View File

@@ -1,177 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
typedef struct work_item_data
{
cl_uint workDim;
cl_uint globalSize[ 3 ];
cl_uint globalID[ 3 ];
cl_uint localSize[ 3 ];
cl_uint localID[ 3 ];
cl_uint numGroups[ 3 ];
cl_uint groupID[ 3 ];
};
static const char *workItemKernelCode =
"typedef struct {\n"
" uint workDim;\n"
" uint globalSize[ 3 ];\n"
" uint globalID[ 3 ];\n"
" uint localSize[ 3 ];\n"
" uint localID[ 3 ];\n"
" uint numGroups[ 3 ];\n"
" uint groupID[ 3 ];\n"
" } work_item_data;\n"
"\n"
"__kernel void sample_kernel( __global work_item_data *outData )\n"
"{\n"
" int id = get_global_id(0);\n"
" outData[ id ].workDim = (uint)get_work_dim();\n"
" for( uint i = 0; i < get_work_dim(); i++ )\n"
" {\n"
" outData[ id ].globalSize[ i ] = (uint)get_global_size( i );\n"
" outData[ id ].globalID[ i ] = (uint)get_global_id( i );\n"
" outData[ id ].localSize[ i ] = (uint)get_local_size( i );\n"
" outData[ id ].localID[ i ] = (uint)get_local_id( i );\n"
" outData[ id ].numGroups[ i ] = (uint)get_num_groups( i );\n"
" outData[ id ].groupID[ i ] = (uint)get_group_id( i );\n"
" }\n"
"}";
#define NUM_TESTS 1
int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper outData;
work_item_data testData[ 10240 ];
size_t threads[3], localThreads[3];
MTdata d;
error = create_single_kernel_helper( context, &program, &kernel, 1, &workItemKernelCode, "sample_kernel" );
test_error( error, "Unable to create testing kernel" );
outData = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( testData ), NULL, &error );
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( outData ), &outData );
test_error( error, "Unable to set kernel arg" );
d = init_genrand( gRandomSeed );
for( size_t dim = 1; dim <= 3; dim++ )
{
for( int i = 0; i < NUM_TESTS; i++ )
{
size_t numItems = 1;
for( size_t j = 0; j < dim; j++ )
{
// All of our thread sizes should be within the max local sizes, since they're all <= 20
threads[ j ] = (size_t)random_in_range( 1, 20, d );
localThreads[ j ] = threads[ j ] / (size_t)random_in_range( 1, (int)threads[ j ], d );
while( localThreads[ j ] > 1 && ( threads[ j ] % localThreads[ j ] != 0 ) )
localThreads[ j ]--;
numItems *= threads[ j ];
// Hack for now: localThreads > 1 are iffy
localThreads[ j ] = 1;
}
error = clEnqueueNDRangeKernel( queue, kernel, (cl_uint)dim, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
error = clEnqueueReadBuffer( queue, outData, CL_TRUE, 0, sizeof( testData ), testData, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Validate
for( size_t q = 0; q < threads[0]; q++ )
{
// We can't really validate the actual value of each one, but we can validate that they're within a sane range
if( testData[ q ].workDim != (cl_uint)dim )
{
log_error( "ERROR: get_work_dim() did not return proper value for %d dimensions (expected %d, got %d)\n", (int)dim, (int)dim, (int)testData[ q ].workDim );
free_mtdata(d);
return -1;
}
for( size_t j = 0; j < dim; j++ )
{
if( testData[ q ].globalSize[ j ] != (cl_uint)threads[ j ] )
{
log_error( "ERROR: get_global_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n",
(int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalSize[ j ] );
free_mtdata(d);
return -1;
}
if( testData[ q ].globalID[ j ] < 0 || testData[ q ].globalID[ j ] >= (cl_uint)threads[ j ] )
{
log_error( "ERROR: get_global_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
(int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalID[ j ] );
free_mtdata(d);
return -1;
}
if( testData[ q ].localSize[ j ] != (cl_uint)localThreads[ j ] )
{
log_error( "ERROR: get_local_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n",
(int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localSize[ j ] );
free_mtdata(d);
return -1;
}
if( testData[ q ].localID[ j ] < 0 && testData[ q ].localID[ j ] >= (cl_uint)localThreads[ j ] )
{
log_error( "ERROR: get_local_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
(int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localID[ j ] );
free_mtdata(d);
return -1;
}
size_t groupCount = ( threads[ j ] + localThreads[ j ] - 1 ) / localThreads[ j ];
if( testData[ q ].numGroups[ j ] != (cl_uint)groupCount )
{
log_error( "ERROR: get_num_groups(%d) did not return proper value for %d dimensions (expected %d with global dim %d and local dim %d, got %d)\n",
(int)j, (int)dim, (int)groupCount, (int)threads[ j ], (int)localThreads[ j ], (int)testData[ q ].numGroups[ j ] );
free_mtdata(d);
return -1;
}
if( testData[ q ].groupID[ j ] < 0 || testData[ q ].groupID[ j ] >= (cl_uint)groupCount )
{
log_error( "ERROR: get_group_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
(int)j, (int)dim, (int)groupCount, (int)testData[ q ].groupID[ j ] );
free_mtdata(d);
return -1;
}
}
}
}
}
free_mtdata(d);
return 0;
}

View File

@@ -1,188 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *rgbaFFFF_write_kernel_code =
"__kernel void test_rgbaFFFF_write(__global float *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" float4 color;\n"
"\n"
" indx *= 4;\n"
" color = (float4)(src[indx+0], src[indx+1], src[indx+2], src[indx+3]);\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
static float *
generate_float_image(int w, int h, MTdata d)
{
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
return ptr;
}
static int
verify_float_image(const char *string, float *image, float *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("%s failed\n", string);
return -1;
}
}
log_info("%s passed\n", string);
return 0;
}
int test_writeimage_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_program program;
cl_kernel kernel[2];
cl_image_format img_format;
float *input_ptr, *output_ptr;
size_t threads[2];
int img_width = 512;
int img_height = 512;
int i, err, any_err = 0;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, 1};
size_t length = img_width * img_height * 4 * sizeof(float);
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
input_ptr = generate_float_image(img_width, img_height, d);
free_mtdata(d); d = NULL;
output_ptr = (float*)malloc(length);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed\n");
return -1;
}
streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[1])
{
log_error("create_image_2d failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateArray failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgbaFFFF_write_kernel_code, "test_rgbaFFFF_write" );
if (err)
return -1;
kernel[1] = clCreateKernel(program, "test_rgbaFFFF_write", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
for (i=0; i<2; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clExecuteKernel failed\n");
return -1;
}
err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
return -1;
}
err = verify_float_image((i == 0) ? "WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_READ_WRITE" :
"WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_WRITE_ONLY",
input_ptr, output_ptr, img_width, img_height);
any_err |= err;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return any_err;
}

View File

@@ -1,194 +0,0 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *rgba16_write_kernel_code =
"__kernel void test_rgba16_write(__global unsigned short *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" float4 color;\n"
"\n"
" indx *= 4;\n"
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
" color /= 65535.0f;\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
static unsigned short *
generate_16bit_image(int w, int h, MTdata d)
{
cl_ushort *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (cl_ushort)genrand_int32(d);
return ptr;
}
// normalized 16bit ints ... get dived by 64k then muled by 64k...
// give the poor things some tolerance
#define MAX_ERR 1
static int
verify_16bit_image(const char *string, cl_ushort *image, cl_ushort *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (abs(outptr[i] - image[i]) > MAX_ERR)
{
log_error("%s failed\n", string);
return -1;
}
}
log_info("%s passed\n", string);
return 0;
}
int test_writeimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_program program;
cl_kernel kernel[2];
cl_image_format img_format;
cl_ushort *input_ptr, *output_ptr;
size_t threads[2];
int img_width = 512;
int img_height = 512;
int i, err, any_err = 0;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, 1};
size_t length = img_width * img_height * 4 * sizeof(cl_ushort);
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
MTdata d = init_genrand( gRandomSeed );
input_ptr = generate_16bit_image(img_width, img_height, d);
free_mtdata(d); d = NULL;
output_ptr = (cl_ushort*)malloc(length);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed\n");
return -1;
}
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[1])
{
log_error("create_image_2d failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateArray failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgba16_write_kernel_code, "test_rgba16_write" );
if (err)
return -1;
kernel[1] = clCreateKernel(program, "test_rgba16_write", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
for (i=0; i<2; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clExecuteKernel failed\n");
return -1;
}
err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
return -1;
}
err = verify_16bit_image((i == 0) ? "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_READ_WRITE" :
"WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_WRITE_ONLY",
input_ptr, output_ptr, img_width, img_height);
any_err |= err;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return any_err;
}