Initial open source release of OpenCL 2.2 CTS.

This commit is contained in:
Kedar Patil
2017-05-16 18:25:37 +05:30
parent 6911ba5116
commit 2821bf1323
1035 changed files with 343518 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
set(MODULE_NAME PROFILING)
set(${MODULE_NAME}_SOURCES
main.c
readArray.c
writeArray.c
readImage.c
writeImage.c
copy.c
execute.c
execute_multipass.c
../../test_common/harness/testHarness.c
../../test_common/harness/errorHelpers.c
../../test_common/harness/typeWrappers.cpp
../../test_common/harness/imageHelpers.cpp
../../test_common/harness/kernelHelpers.c
../../test_common/harness/mt19937.c
../../test_common/harness/conversions.c
../../test_common/harness/msvc9.c
../../test_common/harness/parseParameters.cpp
)
include(../CMakeCommon.txt)

View File

@@ -0,0 +1,22 @@
project
: requirements
<toolset>gcc:<cflags>-xc++
<toolset>msvc:<cflags>"/TP"
;
exe test_profiling
: copy.c
execute.c
execute_multipass.c
main.c
readArray.c
readImage.c
writeArray.c
writeImage.c
;
install dist
: test_profiling
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/profiling
<variant>release:<location>$(DIST)/release/tests/test_conformance/profiling
;

View File

@@ -0,0 +1,45 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.c readArray.c writeArray.c readImage.c writeImage.c copy.c execute.c execute_multipass.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/typeWrappers.cpp \
../../test_common/harness/imageHelpers.cpp \
../../test_common/harness/mt19937.c \
../../test_common/harness/conversions.c \
../../test_common/harness/kernelHelpers.c
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
FRAMEWORK = ${SOURCES}
HEADERS =
TARGET = test_profiling
INCLUDE =
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
#COMPILERFLAGS = -c -Wall -g -DUSE_LOCAL_THREADS
CC = c++
CFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
CXXFLAGS= $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,869 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/conversions.h"
//--- the code for the kernel executables
static const char *write_kernel_code =
"\n"
"__kernel void test_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" float4 color;\n"
"\n"
" indx *= 4;\n"
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
" color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
//--- the verify functions
static int verify_subimage( unsigned char *src, unsigned char *dst, size_t srcx, size_t srcy,
size_t dstx, size_t dsty, size_t subw, size_t subh, size_t pitch, size_t element_pitch )
{
size_t i, j, k;
size_t srcj, dstj;
size_t srcLoc, dstLoc;
for( j = 0; j < subh; j++ ){
srcj = ( j + srcy ) * pitch * element_pitch;
dstj = ( j + dsty ) * pitch * element_pitch;
for( i = 0; i < subw; i++ ){
srcLoc = srcj + ( i + srcx ) * element_pitch;
dstLoc = dstj + ( i + dstx ) * element_pitch;
for( k = 0; k < element_pitch; k++ ){ // test each channel
if( src[srcLoc+k] != dst[dstLoc+k] ){
return -1;
}
}
}
}
return 0;
}
static int verify_copy_array( int *inptr, int *outptr, int n )
{
int i;
for( i = 0; i < n; i++ ) {
if( outptr[i] != inptr[i] )
return -1;
}
return 0;
}
//----- helper functions
static cl_uchar *generate_image( int n, MTdata d )
{
cl_uchar *ptr = (cl_uchar *)malloc( n );
int i;
for( i = 0; i < n; i++ )
ptr[i] = (cl_uchar)genrand_int32(d);
return ptr;
}
static int copy_size( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, MTdata d )
{
cl_mem streams[2];
cl_event copyEvent;
cl_ulong queueStart, submitStart, writeStart, writeEnd;
cl_int *int_input_ptr, *int_output_ptr;
int err = 0;
int i;
int_input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
int_output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err );
if( !streams[0] ){
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err );
if( !streams[1] ){
log_error("clCreateBuffer failed\n");
return -1;
}
for (i=0; i<num_elements; i++){
int_input_ptr[i] = (int)genrand_int32(d);
int_output_ptr[i] = (int)genrand_int32(d) >> 30; // seed with incorrect data
}
err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int)*num_elements, (void *)int_input_ptr, 0, NULL, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clWriteArray failed" );
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( (void *)int_output_ptr );
free( (void *)int_input_ptr );
return -1;
}
err = clEnqueueCopyBuffer( queue, streams[0], streams[1], 0, 0, sizeof(cl_int)*num_elements, 0, NULL, &copyEvent );
if( err != CL_SUCCESS ){
print_error( err, "clCopyArray failed" );
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( (void *)int_output_ptr );
free( (void *)int_input_ptr );
return -1;
}
// This synchronization point is needed in order to assume the data is valid.
// Getting profiling information is not a synchronization point.
err = clWaitForEvents( 1, &copyEvent );
if( err != CL_SUCCESS )
{
clReleaseEvent(copyEvent);
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( (void *)int_output_ptr );
free( (void *)int_input_ptr );
return -1;
}
// test profiling
while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(copyEvent);
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( (void *)int_output_ptr );
free( (void *)int_input_ptr );
return -1;
}
while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(copyEvent);
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( (void *)int_output_ptr );
free( (void *)int_input_ptr );
return -1;
}
err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(copyEvent);
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( (void *)int_output_ptr );
free( (void *)int_input_ptr );
return -1;
}
err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(copyEvent);
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( (void *)int_output_ptr );
free( (void *)int_input_ptr );
return -1;
}
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)int_output_ptr, 0, NULL, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseEvent(copyEvent);
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( (void *)int_output_ptr );
free( (void *)int_input_ptr );
return -1;
}
if( verify_copy_array(int_input_ptr, int_output_ptr, num_elements) ){
log_error( "test failed\n" );
err = -1;
}
else{
log_info( "test passed\n" );
err = 0;
}
// cleanup
clReleaseEvent(copyEvent);
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( (void *)int_output_ptr );
free( (void *)int_input_ptr );
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
err = -1;
return err;
} // end copy_size()
static int copy_partial_size( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, cl_uint srcStart, cl_uint dstStart, int size, MTdata d )
{
cl_mem streams[2];
cl_event copyEvent;
cl_ulong queueStart, submitStart, writeStart, writeEnd;
cl_int *inptr, *outptr;
int err = 0;
int i;
inptr = (cl_int *)malloc(sizeof(cl_int) * num_elements);
outptr = (cl_int *)malloc(sizeof(cl_int) * num_elements);
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err );
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err );
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
for (i=0; i<num_elements; i++){
inptr[i] = (int)genrand_int32(d);
outptr[i] = (int)get_random_float( -1.f, 1.f, d ); // seed with incorrect data
}
err = clEnqueueWriteBuffer(queue, streams[0], true, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
err = clEnqueueCopyBuffer( queue, streams[0], streams[1], srcStart*sizeof(cl_int), dstStart*sizeof(cl_int),
sizeof(cl_int)*size, 0, NULL, &copyEvent );
if( err != CL_SUCCESS){
print_error( err, "clCopyArray failed" );
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( outptr );
free( inptr );
return -1;
}
// This synchronization point is needed in order to assume the data is valid.
// Getting profiling information is not a synchronization point.
err = clWaitForEvents( 1, &copyEvent );
if( err != CL_SUCCESS )
{
clReleaseEvent(copyEvent);
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( outptr );
free( inptr );
return -1;
}
// test profiling
while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(copyEvent);
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( outptr );
free( inptr );
return -1;
}
while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(copyEvent);
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( outptr );
free( inptr );
return -1;
}
err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(copyEvent);
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( outptr );
free( inptr );
return -1;
}
err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(copyEvent);
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
free( outptr );
free( inptr );
return -1;
}
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
if( err != CL_SUCCESS){
log_error("clReadVariableStream failed\n");
return -1;
}
if( verify_copy_array(inptr + srcStart, outptr + dstStart, size) ){
log_error("test failed\n");
err = -1;
}
else{
log_info("test passed\n");
err = 0;
}
// cleanup
clReleaseEvent(copyEvent);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
free(outptr);
free(inptr);
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
err = -1;
return err;
} // end copy_partial_size()
int copy_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int i, err = 0;
int size;
MTdata d = init_genrand( gRandomSeed );
// test the preset size
log_info( "set size: %d: ", num_elements );
err = copy_size( device, context, queue, num_elements, d );
// now test random sizes
for( i = 0; i < 8; i++ ){
size = (int)get_random_float(2.f,131072.f, d);
log_info( "random size: %d: ", size );
err |= copy_size( device, context, queue, size, d );
}
free_mtdata(d);
return err;
} // end copy_array()
int copy_partial_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int i, err = 0;
int size;
cl_uint srcStart, dstStart;
MTdata d = init_genrand( gRandomSeed );
// now test copy of partial sizes
for( i = 0; i < 8; i++ ){
srcStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
size = (int)get_random_float( 8.f, (float)(num_elements - srcStart), d );
dstStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - size), d );
log_info( "random partial copy from %d to %d, size: %d: ", (int)srcStart, (int)dstStart, size );
err |= copy_partial_size( device, context, queue, num_elements, srcStart, dstStart, size, d );
}
free_mtdata(d);
return err;
} // end copy_partial_array()
static int copy_image_size( cl_device_id device, cl_context context,
cl_command_queue queue, size_t srcx, size_t srcy,
size_t dstx, size_t dsty, size_t subw, size_t subh,
MTdata d )
{
cl_mem memobjs[3];
cl_program program[1];
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
cl_event copyEvent;
cl_ulong queueStart, submitStart, writeStart, writeEnd;
void *inptr;
void *dst = NULL;
cl_kernel kernel[1];
size_t threads[2];
#ifdef USE_LOCAL_THREADS
size_t localThreads[2];
#endif
int err = 0;
cl_mem_flags flags;
unsigned int num_channels = 4;
size_t w = 256, h = 256;
size_t element_nbytes;
size_t num_bytes;
size_t channel_nbytes = sizeof( cl_char );
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
element_nbytes = channel_nbytes * num_channels;
num_bytes = w * h * element_nbytes;
threads[0] = (size_t)w;
threads[1] = (size_t)h;
#ifdef USE_LOCAL_THREADS
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
test_error( err, "Unable to get thread group max size" );
localThreads[1] = localThreads[0];
if( localThreads[0] > threads[0] )
localThreads[0] = threads[0];
if( localThreads[1] > threads[1] )
localThreads[1] = threads[1];
#endif
inptr = (void *)generate_image( (int)num_bytes, d );
if( ! inptr ){
log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
return -1;
}
dst = malloc( num_bytes );
if( ! dst ){
free( (void *)inptr );
log_error("unable to allocate dst at %d x %d\n", (int)w, (int)h );
return -1;
}
// allocate the input image
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
memobjs[0] = create_image_2d(context, flags, &image_format_desc, w, h, 0, NULL, &err);
if( memobjs[0] == (cl_mem)0 ) {
free( dst );
free( (void *)inptr );
log_error("unable to create Image2D\n");
return -1;
}
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), num_bytes, NULL, &err );
if( memobjs[1] == (cl_mem)0 ) {
clReleaseMemObject(memobjs[0]);
free( dst );
free( (void *)inptr );
log_error("unable to create array\n");
return -1;
}
// allocate the input image
memobjs[2] = create_image_2d(context, flags, &image_format_desc, w, h, 0, NULL, &err);
if( memobjs[2] == (cl_mem)0 ) {
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( (void *)inptr );
log_error("unable to create Image2D\n");
return -1;
}
err = clEnqueueWriteBuffer( queue, memobjs[1], true, 0, num_bytes, inptr, 0, NULL, NULL );
if( err != CL_SUCCESS ){
log_error("clWriteArray failed\n");
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &write_kernel_code, "test_write" );
if( err ){
clReleaseMemObject( memobjs[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[2] );
free( dst );
free( inptr );
return -1;
}
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[1] );
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[0] );
if (err != CL_SUCCESS){
log_error("clSetKernelArg failed\n");
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[2] );
free( dst );
free( inptr );
return -1;
}
#ifdef USE_LOCAL_THREADS
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
#endif
if (err != CL_SUCCESS){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[2] );
free( dst );
free( inptr );
return -1;
}
// now do the copy
size_t srcPt[3] = { srcx, srcy, 0 };
size_t destPt[3] = { dstx, dsty, 0 };
size_t region[3] = { subw, subh, 1 };
err = clEnqueueCopyImage( queue, memobjs[0], memobjs[2], srcPt, destPt, region, 0, NULL, &copyEvent );
if (err != CL_SUCCESS){
print_error( err, "clCopyImage failed" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[2] );
free( dst );
free( inptr );
return -1;
}
// This synchronization point is needed in order to assume the data is valid.
// Getting profiling information is not a synchronization point.
err = clWaitForEvents( 1, &copyEvent );
if( err != CL_SUCCESS )
{
clReleaseEvent(copyEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[2] );
free( dst );
free( inptr );
return -1;
}
// test profiling
while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(copyEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[2] );
free( dst );
free( inptr );
return -1;
}
while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(copyEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[2] );
free( dst );
free( inptr );
return -1;
}
err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(copyEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[2] );
free( dst );
free( inptr );
return -1;
}
err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(copyEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[2] );
free( dst );
free( inptr );
return -1;
}
size_t origin[3] = { 0, 0, 0 };
size_t region2[3] = { w, h, 1 };
err = clEnqueueReadImage( queue, memobjs[2], true, origin, region2, 0, 0, dst, 0, NULL, NULL );
if (err != CL_SUCCESS){
print_error( err, "clReadImage failed" );
clReleaseEvent(copyEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[2] );
free( dst );
free( inptr );
return -1;
}
err = verify_subimage( (unsigned char *)inptr, (unsigned char *)dst, srcx, srcy,
dstx, dsty, subw, subh, w, 4 );
//err = verify_image( (unsigned char *)inptr, (unsigned char *)dst, w * h * 4 );
if( err ){
log_error( "Image failed to verify.\n " );
}
else{
log_info( "Image verified.\n" );
}
// cleanup
clReleaseEvent(copyEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[2] );
free( dst );
free( inptr );
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
err = -1;
return err;
} // end copy_image_size()
int copy_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int err = 0;
int i;
size_t srcx, srcy, dstx, dsty, subw, subh;
MTdata d;
srcx = srcy = dstx = dsty = 0;
subw = subh = 256;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
err = copy_image_size( device, context, queue, srcx, srcy, dstx, dsty, subw, subh, d );
if( err ){
log_error( "testing copy image, full size\n" );
}
else{
log_info( "testing copy image, full size\n" );
}
// now test random sub images
srcx = srcy = 0;
subw = subh = 16;
dstx = dsty = 0;
err = copy_image_size( device, context, queue, srcx, srcy, dstx, dsty, subw, subh, d );
if( err ){
log_error( "test copy of subimage size %d,%d %d,%d %d x %d\n", (int)srcx, (int)srcy,
(int)dstx, (int)dsty, (int)subw, (int)subh );
}
else{
log_info( "test copy of subimage size %d,%d %d,%d %d x %d\n", (int)srcx, (int)srcy,
(int)dstx, (int)dsty, (int)subw, (int)subh );
}
srcx = srcy = 8;
subw = subh = 16;
dstx = dsty = 32;
err = copy_image_size( device, context, queue, srcx, srcy, dstx, dsty, subw, subh, d );
if( err ){
log_error( "test copy of subimage size %d,%d %d,%d %d x %d\n", (int)srcx, (int)srcy,
(int)dstx, (int)dsty, (int)subw, (int)subh );
}
else{
log_info( "test copy of subimage size %d,%d %d,%d %d x %d\n", (int)srcx, (int)srcy,
(int)dstx, (int)dsty, (int)subw, (int)subh );
}
for( i = 0; i < 16; i++ ) {
srcx = (size_t)get_random_float( 0.f, 248.f, d );
srcy = (size_t)get_random_float( 0.f, 248.f, d );
subw = (size_t)get_random_float( 8.f, (float)(256 - srcx), d );
subh = (size_t)get_random_float( 8.f, (float)(256 - srcy), d );
dstx = (size_t)get_random_float( 0.f, (float)(256 - subw), d );
dsty = (size_t)get_random_float( 0.f, (float)(256 - subh), d );
err = copy_image_size( device, context, queue, srcx, srcy, dstx, dsty, subw, subh, d );
if( err ){
log_error( "test copy of subimage size %d,%d %d,%d %d x %d\n", (int)srcx, (int)srcy,
(int)dstx, (int)dsty, (int)subw, (int)subh );
}
else{
log_info( "test copy of subimage size %d,%d %d,%d %d x %d\n", (int)srcx, (int)srcy,
(int)dstx, (int)dsty, (int)subw, (int)subh );
}
}
free_mtdata(d);
return err;
} // end copy_image()
int copy_array_to_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
cl_mem memobjs[3];
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
void *inptr;
void *dst;
int err;
cl_mem_flags flags;
unsigned int num_channels = (unsigned int)get_format_channel_count( &image_format_desc );
size_t w = 256, h = 256;
size_t element_nbytes;
size_t num_bytes;
size_t channel_nbytes = sizeof( cl_char );
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
element_nbytes = channel_nbytes * num_channels;
num_bytes = w * h * element_nbytes;
d = init_genrand( gRandomSeed );
inptr = (void *)generate_image( (int)num_bytes, d );
free_mtdata(d); d = NULL;
if( ! inptr ){
log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
return -1;
}
dst = malloc( num_bytes );
if( ! dst ){
free( inptr );
log_error( " unable to allocate dst at %d x %d\n", (int)w, (int)h );
return -1;
}
// allocate the input image
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
if( memobjs[0] == (cl_mem)0 ){
free( dst );
free( inptr );
log_error( " unable to create Image2D\n" );
return -1;
}
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), channel_nbytes * num_channels*w*h, NULL, &err );
if( memobjs[1] == (cl_mem)0 ) {
clReleaseMemObject( memobjs[0] );
free( dst );
free( inptr );
log_error( " unable to create array: " );
return -1;
}
err = clEnqueueWriteBuffer( queue, memobjs[1], true, 0, num_bytes, (const void *)inptr, 0, NULL, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clWriteArray failed" );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
free( dst );
free( inptr );
return -1;
}
size_t origin[3] = { 0, 0, 0 };
size_t region[3] = { w, h, 1 };
err = clEnqueueCopyBufferToImage( queue, memobjs[1], memobjs[0], 0, origin, region, 0, NULL, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clCopyArrayToImage failed" );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
free( dst );
free( inptr );
return -1;
}
err = clEnqueueReadImage( queue, memobjs[0], true, origin, region, 0, 0, dst, 0, NULL, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clReadImage failed" );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
free( dst );
free( inptr );
return -1;
}
err = verify_subimage( (cl_uchar *)inptr, (cl_uchar *)dst, 0, 0, 0, 0, w, h, w, num_channels );
if( err ){
log_error( " test failed: " );
}
else{
log_info( " test passed: " );
}
// cleanup
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
free( dst );
free( inptr );
return err;
} // end copy_array_to_image()

View File

@@ -0,0 +1,440 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/errorHelpers.h"
#ifndef uchar
typedef unsigned char uchar;
#endif
#undef MIN
#define MIN(x,y) ( (x) < (y) ? (x) : (y) )
#undef MAX
#define MAX(x,y) ( (x) > (y) ? (x) : (y) )
//#define CREATE_OUTPUT 1
extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize );
//--- the code for kernel executables
static const char *image_filter_src =
"constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
"\n"
"__kernel void image_filter( int n, int m, __global float *filter_weights,\n"
" read_only image2d_t src_image, write_only image2d_t dst_image )\n"
"{\n"
" int i, j;\n"
" int indx = 0;\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 filter_result = (float4)( 0.f, 0.f, 0.f, 0.f );\n"
"\n"
" for (i=-m/2; i<(m+1)/2; i++){\n"
" for (j=-n/2; j<(n+1)/2; j++){\n"
" float w = filter_weights[indx++];\n"
"\n"
" if (w != 0.0f){\n"
" filter_result += w * read_imagef(src_image, sampler,\n"
" (int2)(tid_x + j, tid_y + i));\n"
" }\n"
" }\n"
" }\n"
"\n"
" write_imagef(dst_image, (int2)(tid_x, tid_y), filter_result);\n"
"}\n";
//--- equivalent non-kernel code
static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb )
{
// clamp the coords
int x0 = MIN( MAX( x, 0 ), w - 1 );
int y0 = MIN( MAX( y, 0 ), h - 1 );
// get tine index
int indx = ( y0 * w + x0 ) * nChannels;
// seed the return array
int i;
for( i = 0; i < nChannels; i++ ){
srcRgb[i] = (float)src[indx+i];
}
} // end read_imagef()
static void write_imagef( uchar *dst, int x, int y, int w, int h, int nChannels, float *dstRgb )
{
// get tine index
int indx = ( y * w + x ) * nChannels;
// seed the return array
int i;
for( i = 0; i < nChannels; i++ ){
dst[indx+i] = (uchar)dstRgb[i];
}
} // end write_imagef()
static void basicFilterPixel( int x, int y, int n, int m, int xsize, int ysize, int nChannels, const float *filter_weights, uchar *src, uchar *dst )
{
int i, j, k;
int indx = 0;
float filter_result[] = { 0.f, 0.f, 0.f, 0.f };
float srcRgb[4];
for( i = -m/2; i < (m+1)/2; i++ ){
for( j = -n/2; j < (n+1)/2; j++ ){
float w = filter_weights[indx++];
if( w != 0 ){
read_imagef( x + j, y + i, xsize, ysize, nChannels, src, srcRgb );
for( k = 0; k < nChannels; k++ ){
filter_result[k] += w * srcRgb[k];
}
}
}
}
write_imagef( dst, x, y, xsize, ysize, nChannels, filter_result );
} // end basicFilterPixel()
//--- helper functions
static uchar *createImage( int elements, MTdata d)
{
int i;
uchar *ptr = (uchar *)malloc( elements * sizeof( cl_uchar ) );
if( ! ptr )
return NULL;
for( i = 0; i < elements; i++ ){
ptr[i] = (uchar)genrand_int32(d);
}
return ptr;
} // end createImage()
static int verifyImages( uchar *ptr0, uchar *ptr1, uchar tolerance, int xsize, int ysize, int nChannels )
{
int x, y, z;
uchar *p0 = ptr0;
uchar *p1 = ptr1;
for( y = 0; y < ysize; y++ ){
for( x = 0; x < xsize; x++ ){
for( z = 0; z < nChannels; z++ ){
if( (uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
log_error( " images differ at x,y = %d,%d, channel = %d, %d to %d\n", x, y, z,
(int)p0[-1], (int)p1[-1] );
return -1;
}
}
}
}
return 0;
} // end verifyImages()
static int kernelFilter( cl_device_id device, cl_context context, cl_command_queue queue, int w, int h, int nChannels,
uchar *inptr, uchar *outptr )
{
cl_program program[1];
cl_kernel kernel[1];
cl_mem memobjs[3];
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
cl_event executeEvent;
cl_ulong queueStart, submitStart, writeStart, writeEnd;
size_t threads[2];
#ifdef USE_LOCAL_THREADS
size_t localThreads[2];
#endif
float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
int filter_w = 3, filter_h = 3;
int err = 0;
// set thread dimensions
threads[0] = w;
threads[1] = h;
#ifdef USE_LOCAL_THREADS
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
test_error( err, "Unable to get thread group max size" );
localThreads[1] = localThreads[0];
if( localThreads[0] > threads[0] )
localThreads[0] = threads[0];
if( localThreads[1] > threads[1] )
localThreads[1] = threads[1];
#endif
// allocate the input and output image memory objects
memobjs[0] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR),
&image_format_desc, w, h, 0, inptr, &err );
if( memobjs[0] == (cl_mem)0 ){
log_error( " unable to create 2D image using create_image_2d\n" );
return -1;
}
memobjs[1] = create_image_2d( context, CL_MEM_WRITE_ONLY, &image_format_desc, w, h, 0, NULL, &err );
if( memobjs[1] == (cl_mem)0 ){
log_error( " unable to create 2D image using create_image_2d\n" );
clReleaseMemObject( memobjs[0] );
return -1;
}
// allocate an array memory object to load the filter weights
memobjs[2] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR ),
sizeof( cl_float ) * filter_w * filter_h, &filter_weights, &err );
if( memobjs[2] == (cl_mem)0 ){
log_error( " unable to create array using clCreateBuffer\n" );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
// create the compute program
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &image_filter_src, "image_filter" );
if( err ){
clReleaseMemObject( memobjs[2] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
// create kernel args object and set arg values.
// set the args values
err = clSetKernelArg( kernel[0], 0, sizeof( cl_int ), (void *)&filter_w );
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_int ), (void *)&filter_h );
err |= clSetKernelArg( kernel[0], 2, sizeof( cl_mem ), (void *)&memobjs[2] );
err |= clSetKernelArg( kernel[0], 3, sizeof( cl_mem ), (void *)&memobjs[0] );
err |= clSetKernelArg( kernel[0], 4, sizeof( cl_mem ), (void *)&memobjs[1] );
if( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed\n" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[2] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
#ifdef USE_LOCAL_THREADS
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, localThreads, NULL, 0, &executeEvent );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, NULL, 0, &executeEvent );
#endif
if( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed\n" );
clReleaseEvent( executeEvent );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[2] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
// This synchronization point is needed in order to assume the data is valid.
// Getting profiling information is not a synchronization point.
err = clWaitForEvents( 1, &executeEvent );
if( err != CL_SUCCESS )
{
clReleaseEvent( executeEvent );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[2] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
// test profiling
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent( executeEvent );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[2] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent( executeEvent );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[2] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent( executeEvent );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[2] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent( executeEvent );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[2] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
// read output image
size_t origin[3] = { 0, 0, 0 };
size_t region[3] = { w, h, 1 };
err = clEnqueueReadImage( queue, memobjs[1], true, origin, region, 0, 0, outptr, 0, NULL, NULL);
if( err != CL_SUCCESS ){
print_error( err, "clReadImage failed\n" );
clReleaseEvent( executeEvent );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[2] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
// release event, kernel, program, and memory objects
clReleaseEvent( executeEvent );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[2] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
err = -1;
return err;
} // end kernelFilter()
static int basicFilter( int w, int h, int nChannels, uchar *inptr, uchar *outptr )
{
const float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
int filter_w = 3, filter_h = 3;
int x, y;
for( y = 0; y < h; y++ ){
for( x = 0; x < w; x++ ){
basicFilterPixel( x, y, filter_w, filter_h, w, h, nChannels, filter_weights, inptr, outptr );
}
}
return 0;
} // end of basicFilter()
int execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
uchar *inptr;
uchar *outptr[2];
int w = 256, h = 256;
int nChannels = 4;
int nElements = w * h * nChannels;
int err = 0;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
inptr = createImage( nElements, d );
free_mtdata( d); d = NULL;
if( ! inptr ){
log_error( " unable to allocate %d bytes of memory for image\n", nElements );
return -1;
}
outptr[0] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
if( ! outptr[0] ){
log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
free( (void *)inptr );
return -1;
}
outptr[1] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
if( ! outptr[1] ){
log_error( " unable to allocate %d bytes of memory for output image #2\n", nElements );
free( (void *)outptr[0] );
free( (void *)inptr );
return -1;
}
err = kernelFilter( device, context, queue, w, h, nChannels, inptr, outptr[0] );
if( ! err ){
basicFilter( w, h, nChannels, inptr, outptr[1] );
// verify that the images are the same
err = verifyImages( outptr[0], outptr[1], (uchar)0x1, w, h, nChannels );
if( err )
log_error( " images do not match\n" );
}
// clean up
free( (void *)outptr[1] );
free( (void *)outptr[0] );
free( (void *)inptr );
return err;
} // end execute()

View File

@@ -0,0 +1,314 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/errorHelpers.h"
static const char *read3d_kernel_code =
"\n"
"__kernel void read3d(read_only image3d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int tid_z = get_global_id(2);\n"
" int indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
" indx *= 4;\n"
" dst[indx+0] = (unsigned char)(color.x * 255.0f);\n"
" dst[indx+1] = (unsigned char)(color.y * 255.0f);\n"
" dst[indx+2] = (unsigned char)(color.z * 255.0f);\n"
" dst[indx+3] = (unsigned char)(color.w * 255.0f);\n"
"\n"
"}\n";
static cl_uchar *createImage( int elements, MTdata d )
{
int i;
cl_uchar *ptr = (cl_uchar *)malloc( elements * sizeof( cl_uchar ) );
if( ! ptr )
return NULL;
for( i = 0; i < elements; i++ ){
ptr[i] = (cl_uchar)genrand_int32(d);
}
return ptr;
} // end createImage()
static int verifyImages( cl_uchar *ptr0, cl_uchar *ptr1, cl_uchar tolerance, int xsize, int ysize, int zsize, int nChannels )
{
int x, y, z, c;
cl_uchar *p0 = ptr0;
cl_uchar *p1 = ptr1;
for( z = 0; z < zsize; z++ ){
for( y = 0; y < ysize; y++ ){
for( x = 0; x < xsize; x++ ){
for( c = 0; c < nChannels; c++ ){
if( (cl_uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
log_error( " images differ at x,y,z = %d,%d,%d channel = %d, %d to %d\n",
x, y, z, c, (int)p0[-1], (int)p1[-1] );
return -1;
}
}
}
}
}
return 0;
} // end verifyImages()
static int run_kernel( cl_device_id device, cl_context context, cl_command_queue queue,
int w, int h, int d, int nChannels, cl_uchar *inptr, cl_uchar *outptr )
{
cl_program program[1];
cl_kernel kernel[1];
cl_mem memobjs[2];
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
cl_event executeEvent = NULL;
cl_ulong queueStart, submitStart, writeStart, writeEnd;
size_t threads[3];
size_t localThreads[3];
int err = 0;
// set thread dimensions
threads[0] = w;
threads[1] = h;
threads[2] = d;
err = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof( cl_uint ), (size_t*)localThreads, NULL );
if (err)
{
localThreads[0] = 256; localThreads[1] = 1; localThreads[2] = 1;
err = 0;
}
if( localThreads[0] > threads[0] )
localThreads[0] = threads[0];
if( localThreads[1] > threads[1] )
localThreads[1] = threads[1];
cl_sampler_properties properties[] = {
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
0 };
cl_sampler sampler = clCreateSamplerWithProperties( context, properties, &err );
if( err ){
log_error( " clCreateSamplerWithProperties failed.\n" );
return -1;
}
// allocate the input and output image memory objects
memobjs[0] = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), &image_format_desc, w, h, d, 0, 0, inptr, &err );
if( memobjs[0] == (cl_mem)0 ){
log_error( " unable to create 2D image using create_image_2d\n" );
return -1;
}
// allocate an array memory object to load the filter weights
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_WRITE ), sizeof( cl_float ) * w*h*d*nChannels, NULL, &err );
if( memobjs[1] == (cl_mem)0 ){
log_error( " unable to create array using clCreateBuffer\n" );
clReleaseMemObject( memobjs[0] );
return -1;
}
// create the compute program
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &read3d_kernel_code, "read3d" );
if( err ){
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
// create kernel args object and set arg values.
// set the args values
err |= clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[0] );
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[1] );
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
if( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed\n" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[0], 3, NULL, threads, localThreads, NULL, 0, &executeEvent );
if( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed\n" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
if (executeEvent) {
// This synchronization point is needed in order to assume the data is valid.
// Getting profiling information is not a synchronization point.
err = clWaitForEvents( 1, &executeEvent );
if( err != CL_SUCCESS )
{
print_error( err, "clWaitForEvents failed\n" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
// test profiling
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
log_info( "Profiling info:\n" );
log_info( "Time from queue to start of clEnqueueNDRangeKernel: %f seconds\n", (double)(writeStart - queueStart) / 1000000000000.f );
log_info( "Time from start of clEnqueueNDRangeKernel to end: %f seconds\n", (double)(writeEnd - writeStart) / 1000000000000.f );
}
// read output image
err = clEnqueueReadBuffer(queue, memobjs[1], CL_TRUE, 0, w*h*d*nChannels*4, outptr, 0, NULL, NULL);
if( err != CL_SUCCESS ){
print_error( err, "clReadImage failed\n" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
// release kernel, program, and memory objects
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return err;
} // end run_kernel()
// The main point of this test is to exercise code that causes a multipass cld launch for a single
// kernel exec at the cl level. This is done on the gpu for 3d launches, and it's also done
// to handle gdims that excede the maximums allowed by the hardware. In this case we
// use 3d to exercise the multipass events. In the future 3d may not be multpass, in which
// case we will need to ensure that we use gdims large enough to force multipass.
int execute_multipass( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
cl_uchar *inptr;
cl_uchar *outptr;
int w = 256, h = 128, d = 32;
int nChannels = 4;
int nElements = w * h * d * nChannels;
int err = 0;
MTdata mtData;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
mtData = init_genrand( gRandomSeed );
inptr = createImage( nElements, mtData );
free_mtdata( mtData); mtData = NULL;
if( ! inptr ){
log_error( " unable to allocate %d bytes of memory for image\n", nElements );
return -1;
}
outptr = (cl_uchar *)malloc( nElements * sizeof( cl_uchar ) );
if( ! outptr ){
log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
free( (void *)inptr );
return -1;
}
err = run_kernel( device, context, queue, w, h, d, nChannels, inptr, outptr );
if( ! err ){
// verify that the images are the same
err = verifyImages( outptr, inptr, (cl_uchar)0x1, w, h, d, nChannels );
if( err )
log_error( " images do not match\n" );
}
// clean up
free( (void *)outptr );
free( (void *)inptr );
return err;
} // end execute()

View File

@@ -0,0 +1,173 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
// (for example, generate_random_image_data()), the tests are required to declare
// the following variables (<rdar://problem/11111245>):
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
bool gTestRounding = false;
basefn basefn_list[] = {
read_int_array,
read_uint_array,
read_long_array,
read_ulong_array,
read_short_array,
read_ushort_array,
read_float_array,
read_char_array,
read_uchar_array,
read_struct_array,
write_int_array,
write_uint_array,
write_long_array,
write_ulong_array,
write_short_array,
write_ushort_array,
write_float_array,
write_char_array,
write_uchar_array,
write_struct_array,
read_float_image,
read_char_image,
read_uchar_image,
write_float_image,
write_char_image,
write_uchar_image,
copy_array,
copy_partial_array,
copy_image,
copy_array_to_image,
execute
};
const char *basefn_names[] = {
"read_array_int",
"read_array_uint",
"read_array_long",
"read_array_ulong",
"read_array_short",
"read_array_ushort",
"read_array_float",
"read_array_char",
"read_array_uchar",
"read_array_struct",
"write_array_int",
"write_array_uint",
"write_array_long",
"write_array_ulong",
"write_array_short",
"write_array_ushort",
"write_array_float",
"write_array_char",
"write_array_uchar",
"write_array_struct",
"read_image_float",
"read_image_int",
"read_image_uint",
"write_image_float",
"write_image_char",
"write_image_uchar",
"copy_array",
"copy_partial_array",
"copy_image",
"copy_array_to_image",
"execute",
};
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_streamfns = sizeof(basefn_names) / sizeof(char *);
// FIXME: use timer resolution rather than hardcoding 1µs per tick.
#define QUEUE_SECONDS_LIMIT 30
#define SUBMIT_SECONDS_LIMIT 30
#define COMMAND_SECONDS_LIMIT 30
int check_times(cl_ulong queueStart, cl_ulong commandSubmit, cl_ulong commandStart, cl_ulong commandEnd, cl_device_id device) {
int err = 0;
size_t profiling_resolution = 0;
err = clGetDeviceInfo(device, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(profiling_resolution), &profiling_resolution, NULL);
test_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILING_TIMER_RESOLUTION failed.\n");
log_info("CL_PROFILING_COMMAND_QUEUED: %llu CL_PROFILING_COMMAND_SUBMIT: %llu CL_PROFILING_COMMAND_START: %llu CL_PROFILING_COMMAND_END: %llu CL_DEVICE_PROFILING_TIMER_RESOLUTION: %ld\n",
queueStart, commandSubmit, commandStart, commandEnd, profiling_resolution);
double queueTosubmitTimeS = (double)(commandSubmit - queueStart)*1e-9;
double submitToStartTimeS = (double)(commandStart - commandSubmit)*1e-9;
double startToEndTimeS = (double)(commandEnd - commandStart)*1e-9;
log_info( "Profiling info:\n" );
log_info( "Time from queue to submit : %fms\n", (double)(queueTosubmitTimeS) * 1000.f );
log_info( "Time from submit to start : %fms\n", (double)(submitToStartTimeS) * 1000.f );
log_info( "Time from start to end: %fms\n", (double)(startToEndTimeS) * 1000.f );
if(queueStart > commandSubmit) {
log_error("CL_PROFILING_COMMAND_QUEUED > CL_PROFILING_COMMAND_SUBMIT.\n");
err = -1;
}
if (commandSubmit > commandStart) {
log_error("CL_PROFILING_COMMAND_SUBMIT > CL_PROFILING_COMMAND_START.\n");
err = -1;
}
if (commandStart > commandEnd) {
log_error("CL_PROFILING_COMMAND_START > CL_PROFILING_COMMAND_END.\n");
err = -1;
}
if (queueStart == 0 && commandStart == 0 && commandEnd == 0) {
log_error("All values are 0. This is exceedingly unlikely.\n");
err = -1;
}
if (queueTosubmitTimeS > QUEUE_SECONDS_LIMIT) {
log_error("Time between queue and submit is too big: %fs, test limit: %fs.\n",
queueTosubmitTimeS , (double)QUEUE_SECONDS_LIMIT);
err = -1;
}
if (submitToStartTimeS > SUBMIT_SECONDS_LIMIT) {
log_error("Time between submit and start is too big: %fs, test limit: %fs.\n",
submitToStartTimeS , (double)QUEUE_SECONDS_LIMIT);
err = -1;
}
if (startToEndTimeS > COMMAND_SECONDS_LIMIT) {
log_error("Time between queue and start is too big: %fs, test limit: %fs.\n",
startToEndTimeS , (double)QUEUE_SECONDS_LIMIT);
err = -1;
}
return err;
}
int main( int argc, const char *argv[] )
{
return runTestHarness( argc, argv, num_streamfns, basefn_list, basefn_names,
false, false, CL_QUEUE_PROFILING_ENABLE );
}

View File

@@ -0,0 +1,65 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef __PROCS_H__
#define __PROCS_H__
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/imageHelpers.h"
#include "../../test_common/harness/mt19937.h"
extern int check_times(cl_ulong queueStart, cl_ulong submitStart, cl_ulong commandStart, cl_ulong commandEnd, cl_device_id device);
extern int read_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int read_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int write_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int copy_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int copy_partial_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int copy_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int copy_array_to_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
extern int test_parallel_kernels( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
#endif // #ifndef __PROCS_H__

View File

@@ -0,0 +1,997 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
#define TEST_PRIME_INT ((1<<16)+1)
#define TEST_PRIME_UINT ((1U<<16)+1U)
#define TEST_PRIME_LONG ((1LL<<32)+1LL)
#define TEST_PRIME_ULONG ((1ULL<<32)+1ULL)
#define TEST_PRIME_SHORT ((1S<<8)+1S)
#define TEST_PRIME_FLOAT (float)3.40282346638528860e+38
#define TEST_PRIME_HALF 119.f
#define TEST_BOOL true
#define TEST_PRIME_CHAR 0x77
#ifndef ulong
typedef unsigned long ulong;
#endif
#ifndef uchar
typedef unsigned char uchar;
#endif
#ifndef TestStruct
typedef struct{
int a;
float b;
} TestStruct;
#endif
//--- the code for the kernel executables
static const char *stream_read_int_kernel_code[] = {
"__kernel void test_stream_read_int(__global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n",
"__kernel void test_stream_read_int2(__global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n",
"__kernel void test_stream_read_int4(__global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n",
"__kernel void test_stream_read_int8(__global int8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n",
"__kernel void test_stream_read_int16(__global int16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n" };
static const char *int_kernel_name[] = { "test_stream_read_int", "test_stream_read_int2", "test_stream_read_int4", "test_stream_read_int8", "test_stream_read_int16" };
const char *stream_read_uint_kernel_code[] = {
"__kernel void test_stream_read_uint(__global uint *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n",
"__kernel void test_stream_read_uint2(__global uint2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n",
"__kernel void test_stream_read_uint4(__global uint4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n",
"__kernel void test_stream_read_uint8(__global uint8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n",
"__kernel void test_stream_read_uint16(__global uint16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n" };
const char *uint_kernel_name[] = { "test_stream_read_uint", "test_stream_read_uint2", "test_stream_read_uint4", "test_stream_read_uint8", "test_stream_read_uint16" };
const char *stream_read_long_kernel_code[] = {
"__kernel void test_stream_read_long(__global long *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n",
"__kernel void test_stream_read_long2(__global long2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n",
"__kernel void test_stream_read_long4(__global long4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n",
"__kernel void test_stream_read_long8(__global long8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n",
"__kernel void test_stream_read_long16(__global long16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n" };
const char *long_kernel_name[] = { "test_stream_read_long", "test_stream_read_long2", "test_stream_read_long4", "test_stream_read_long8", "test_stream_read_long16" };
const char *stream_read_ulong_kernel_code[] = {
"__kernel void test_stream_read_ulong(__global ulong *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n",
"__kernel void test_stream_read_ulong2(__global ulong2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n",
"__kernel void test_stream_read_ulong4(__global ulong4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n",
"__kernel void test_stream_read_ulong8(__global ulong8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n",
"__kernel void test_stream_read_ulong16(__global ulong16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n" };
const char *ulong_kernel_name[] = { "test_stream_read_ulong", "test_stream_read_ulong2", "test_stream_read_ulong4", "test_stream_read_ulong8", "test_stream_read_ulong16" };
const char *stream_read_short_kernel_code[] = {
"__kernel void test_stream_read_short(__global short *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n",
"__kernel void test_stream_read_short2(__global short2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n",
"__kernel void test_stream_read_short4(__global short4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n",
"__kernel void test_stream_read_short8(__global short8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n",
"__kernel void test_stream_read_short16(__global short16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n" };
const char *short_kernel_name[] = { "test_stream_read_short", "test_stream_read_short2", "test_stream_read_short4", "test_stream_read_short8", "test_stream_read_short16" };
const char *stream_read_ushort_kernel_code[] = {
"__kernel void test_stream_read_ushort(__global ushort *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n",
"__kernel void test_stream_read_ushort2(__global ushort2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n",
"__kernel void test_stream_read_ushort4(__global ushort4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n",
"__kernel void test_stream_read_ushort8(__global ushort8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n",
"__kernel void test_stream_read_ushort16(__global ushort16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n" };
static const char *ushort_kernel_name[] = { "test_stream_read_ushort", "test_stream_read_ushort2", "test_stream_read_ushort4", "test_stream_read_ushort8", "test_stream_read_ushort16" };
const char *stream_read_float_kernel_code[] = {
"__kernel void test_stream_read_float(__global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n",
"__kernel void test_stream_read_float2(__global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n",
"__kernel void test_stream_read_float4(__global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n",
"__kernel void test_stream_read_float8(__global float8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n",
"__kernel void test_stream_read_float16(__global float16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n" };
const char *float_kernel_name[] = { "test_stream_read_float", "test_stream_read_float2", "test_stream_read_float4", "test_stream_read_float8", "test_stream_read_float16" };
const char *stream_read_half_kernel_code[] = {
"__kernel void test_stream_read_half(__global half *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (half)119;\n"
"}\n",
"__kernel void test_stream_read_half2(__global half2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (half)119;\n"
"}\n",
"__kernel void test_stream_read_half4(__global half4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (half)119;\n"
"}\n",
"__kernel void test_stream_read_half8(__global half8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (half)119;\n"
"}\n",
"__kernel void test_stream_read_half16(__global half16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (half)119;\n"
"}\n" };
const char *half_kernel_name[] = { "test_stream_read_half", "test_stream_read_half2", "test_stream_read_half4", "test_stream_read_half8", "test_stream_read_half16" };
const char *stream_read_char_kernel_code[] = {
"__kernel void test_stream_read_char(__global char *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n",
"__kernel void test_stream_read_char2(__global char2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n",
"__kernel void test_stream_read_char4(__global char4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n",
"__kernel void test_stream_read_char8(__global char8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n",
"__kernel void test_stream_read_char16(__global char16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n" };
const char *char_kernel_name[] = { "test_stream_read_char", "test_stream_read_char2", "test_stream_read_char4", "test_stream_read_char8", "test_stream_read_char16" };
const char *stream_read_uchar_kernel_code[] = {
"__kernel void test_stream_read_uchar(__global uchar *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = 'w';\n"
"}\n",
"__kernel void test_stream_read_uchar2(__global uchar2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (uchar)'w';\n"
"}\n",
"__kernel void test_stream_read_uchar4(__global uchar4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (uchar)'w';\n"
"}\n",
"__kernel void test_stream_read_uchar8(__global uchar8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (uchar)'w';\n"
"}\n",
"__kernel void test_stream_read_uchar16(__global uchar16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (uchar)'w';\n"
"}\n" };
const char *uchar_kernel_name[] = { "test_stream_read_uchar", "test_stream_read_uchar2", "test_stream_read_uchar4", "test_stream_read_uchar8", "test_stream_read_uchar16" };
const char *stream_read_struct_kernel_code[] = {
"typedef struct{\n"
"int a;\n"
"float b;\n"
"} TestStruct;\n"
"__kernel void test_stream_read_struct(__global TestStruct *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid].a = ((1<<16)+1);\n"
" dst[tid].b = (float)3.40282346638528860e+38;\n"
"}\n" };
const char *struct_kernel_name[] = { "test_stream_read_struct" };
//--- the verify functions
static int verify_read_int(void *ptr, int n)
{
int i;
int *outptr = (int *)ptr;
for (i=0; i<n; i++){
if( outptr[i] != TEST_PRIME_INT )
return -1;
}
return 0;
}
static int verify_read_uint(void *ptr, int n)
{
int i;
cl_uint *outptr = (cl_uint *)ptr;
for (i=0; i<n; i++){
if( outptr[i] != TEST_PRIME_UINT )
return -1;
}
return 0;
}
static int verify_read_long(void *ptr, int n)
{
int i;
cl_long *outptr = (cl_long *)ptr;
for (i=0; i<n; i++){
if( outptr[i] != TEST_PRIME_LONG )
return -1;
}
return 0;
}
static int verify_read_ulong(void *ptr, int n)
{
int i;
cl_ulong *outptr = (cl_ulong *)ptr;
for (i=0; i<n; i++){
if( outptr[i] != TEST_PRIME_ULONG )
return -1;
}
return 0;
}
static int verify_read_short(void *ptr, int n)
{
int i;
short *outptr = (short *)ptr;
for (i=0; i<n; i++){
if( outptr[i] != (short)((1<<8)+1) )
return -1;
}
return 0;
}
static int verify_read_ushort(void *ptr, int n)
{
int i;
cl_ushort *outptr = (cl_ushort *)ptr;
for (i=0; i<n; i++){
if( outptr[i] != (cl_ushort)((1<<8)+1) )
return -1;
}
return 0;
}
static int verify_read_float( void *ptr, int n )
{
int i;
float *outptr = (float *)ptr;
for (i=0; i<n; i++){
if( outptr[i] != TEST_PRIME_FLOAT )
return -1;
}
return 0;
}
static int verify_read_half( void *ptr, int n )
{
int i;
float *outptr = (float *)ptr;
for( i = 0; i < n / 2; i++ ){
if( outptr[i] != TEST_PRIME_HALF )
return -1;
}
return 0;
}
static int verify_read_char(void *ptr, int n)
{
int i;
char *outptr = (char *)ptr;
for (i=0; i<n; i++){
if( outptr[i] != TEST_PRIME_CHAR )
return -1;
}
return 0;
}
static int verify_read_uchar( void *ptr, int n )
{
int i;
uchar *outptr = (uchar *)ptr;
for (i=0; i<n; i++){
if( outptr[i] != TEST_PRIME_CHAR )
return -1;
}
return 0;
}
static int verify_read_struct( void *ptr, int n )
{
int i;
TestStruct *outptr = (TestStruct *)ptr;
for ( i = 0; i < n; i++ ){
if( ( outptr[i].a != TEST_PRIME_INT ) ||
( outptr[i].b != TEST_PRIME_FLOAT ) )
return -1;
}
return 0;
}
//----- the test functions
int test_stream_read( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, size_t size, const char *type, int loops,
const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
{
cl_mem streams[5];
void *outptr[5];
cl_program program[5];
cl_kernel kernel[5];
cl_event readEvent;
cl_ulong queueStart, submitStart, readStart, readEnd;
size_t threads[1];
#ifdef USE_LOCAL_THREADS
size_t localThreads[1];
#endif
int err, err_count = 0;
int i;
size_t ptrSizes[5];
threads[0] = (size_t)num_elements;
#ifdef USE_LOCAL_THREADS
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
if( err != CL_SUCCESS ){
log_error( "Unable to get thread group max size: %d", err );
return -1;
}
if( localThreads[0] > threads[0] )
localThreads[0] = threads[0];
#endif
ptrSizes[0] = size;
ptrSizes[1] = ptrSizes[0] << 1;
ptrSizes[2] = ptrSizes[1] << 1;
ptrSizes[3] = ptrSizes[2] << 1;
ptrSizes[4] = ptrSizes[3] << 1;
for( i = 0; i < loops; i++ ){
outptr[i] = malloc( ptrSizes[i] * num_elements );
if( ! outptr[i] ){
log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
return -1;
}
streams[i] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSizes[i] * num_elements, NULL, &err );
if( !streams[i] ){
log_error( " clCreateBuffer failed\n" );
free( outptr[i] );
return -1;
}
err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
if( err ){
log_error( " Error creating program for %s\n", type );
clReleaseMemObject(streams[i]);
free( outptr[i] );
return -1;
}
err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[i] );
if( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed" );
clReleaseProgram( program[i] );
clReleaseKernel( kernel[i] );
clReleaseMemObject( streams[i] );
free( outptr[i] );
return -1;
}
#ifdef USE_LOCAL_THREADS
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
#endif
if( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
clReleaseMemObject( streams[i] );
free( outptr[i] );
return -1;
}
err = clEnqueueReadBuffer( queue, streams[i], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &readEvent );
if( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
clReleaseMemObject( streams[i] );
free( outptr[i] );
return -1;
}
err = clWaitForEvents( 1, &readEvent );
if( err != CL_SUCCESS )
{
print_error( err, "Unable to wait for event completion" );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
clReleaseMemObject( streams[i] );
free( outptr[i] );
return -1;
}
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
clReleaseMemObject( streams[i] );
free( outptr[i] );
return -1;
}
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
clReleaseMemObject( streams[i] );
free( outptr[i] );
return -1;
}
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &readStart, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
clReleaseMemObject( streams[i] );
free( outptr[i] );
return -1;
}
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &readEnd, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
clReleaseMemObject( streams[i] );
free( outptr[i] );
return -1;
}
if (fn(outptr[i], num_elements*(1<<i))){
log_error( " %s%d data failed to verify\n", type, 1<<i );
err_count++;
}
else{
log_info( " %s%d data verified\n", type, 1<<i );
}
if (check_times(queueStart, submitStart, readStart, readEnd, device))
err_count++;
// cleanup
clReleaseEvent(readEvent);
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
clReleaseMemObject( streams[i] );
free( outptr[i] );
}
return err_count;
} // end test_stream_read()
int read_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int (*foo)(void *,int);
foo = verify_read_int;
return test_stream_read( device, context, queue, num_elements, sizeof( cl_int ), "int", 5,
stream_read_int_kernel_code, int_kernel_name, foo );
}
int read_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int (*foo)(void *,int);
foo = verify_read_uint;
return test_stream_read( device, context, queue, num_elements, sizeof( cl_uint ), "uint", 5,
stream_read_uint_kernel_code, uint_kernel_name, foo );
}
int read_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int (*foo)(void *,int);
foo = verify_read_long;
if (!gHasLong)
{
log_info("read_long_array: Long types unsupported, skipping.");
return CL_SUCCESS;
}
return test_stream_read( device, context, queue, num_elements, sizeof( cl_long ), "long", 5,
stream_read_long_kernel_code, long_kernel_name, foo );
}
int read_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int (*foo)(void *,int);
foo = verify_read_ulong;
if (!gHasLong)
{
log_info("read_long_array: Long types unsupported, skipping.");
return CL_SUCCESS;
}
return test_stream_read( device, context, queue, num_elements, sizeof( cl_ulong ), "ulong", 5,
stream_read_ulong_kernel_code, ulong_kernel_name, foo );
}
int read_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int (*foo)(void *,int);
foo = verify_read_short;
return test_stream_read( device, context, queue, num_elements, sizeof( cl_short ), "short", 5,
stream_read_short_kernel_code, short_kernel_name, foo );
}
int read_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int (*foo)(void *,int);
foo = verify_read_ushort;
return test_stream_read( device, context, queue, num_elements, sizeof( cl_ushort ), "ushort", 5,
stream_read_ushort_kernel_code, ushort_kernel_name, foo );
}
int read_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int (*foo)(void *,int);
foo = verify_read_float;
return test_stream_read( device, context, queue, num_elements, sizeof( cl_float ), "float", 5,
stream_read_float_kernel_code, float_kernel_name, foo );
}
int read_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int (*foo)(void *,int);
foo = verify_read_half;
return test_stream_read( device, context, queue, num_elements, sizeof( cl_half ), "half", 5,
stream_read_half_kernel_code, half_kernel_name, foo );
}
int read_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int (*foo)(void *,int);
foo = verify_read_char;
return test_stream_read( device, context, queue, num_elements, sizeof( cl_char ), "char", 5,
stream_read_char_kernel_code, char_kernel_name, foo );
}
int read_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int (*foo)(void *,int);
foo = verify_read_uchar;
return test_stream_read( device, context, queue, num_elements, sizeof( cl_uchar ), "uchar", 5,
stream_read_uchar_kernel_code, uchar_kernel_name, foo );
}
int read_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
{
int (*foo)(void *,int);
foo = verify_read_struct;
return test_stream_read( device, context, queue, num_elements, sizeof( TestStruct ), "struct", 1,
stream_read_struct_kernel_code, struct_kernel_name, foo );
}
/*
int read_struct_array(cl_device_group device, cl_device id, cl_context context, int num_elements)
{
cl_mem streams[1];
TestStruct *output_ptr;
cl_program program[1];
cl_kernel kernel[1];
void *values[1];
size_t sizes[1] = { sizeof(cl_stream) };
size_t threads[1];
#ifdef USE_LOCAL_THREADS
size_t localThreads[1];
#endif
int err;
size_t objSize = sizeof(TestStruct);
threads[0] = (size_t)num_elements;
#ifdef USE_LOCAL_THREADS
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
if( err != CL_SUCCESS ){
log_error( "Unable to get thread group max size: %d", err );
return -1;
}
if( localThreads[0] > threads[0] )
localThreads[0] = threads[0];
#endif
output_ptr = malloc(objSize * num_elements);
if( ! output_ptr ){
log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
return -1;
}
streams[0] = clCreateBuffer( device, (cl_mem_flags)(CL_MEM_READ_WRITE), objSize * num_elements, NULL );
if( !streams[0] ){
log_error( " clCreateBuffer failed\n" );
free( output_ptr );
return -1;
}
err = create_program_and_kernel( device, stream_read_struct_kernel_code, "test_stream_read_struct", &program[0], &kernel[0]);
if( err ){
clReleaseProgram( program[0] );
free( output_ptr );
return -1;
}
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&streams[0] );
if( err != CL_SUCCESS){
print_error( err, "clSetKernelArg failed" );
clReleaseProgram( program[0] );
clReleaseKernel( kernel[0] );
clReleaseMemObject( streams[0] );
free( output_ptr );
return -1;
}
#ifdef USE_LOCAL_THREADS
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, localThreads, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
#endif
if( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseProgram( program[0] );
clReleaseKernel( kernel[0] );
clReleaseMemObject( streams[0] );
free( output_ptr );
return -1;
}
err = clEnqueueReadBuffer( queue, streams[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
if( err != CL_SUCCESS){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseProgram( program[0] );
clReleaseKernel( kernel[0] );
clReleaseMemObject( streams[0] );
free( output_ptr );
return -1;
}
if (verify_read_struct(output_ptr, num_elements)){
log_error(" struct test failed\n");
err = -1;
}
else{
log_info(" struct test passed\n");
err = 0;
}
// cleanup
clReleaseProgram( program[0] );
clReleaseKernel( kernel[0] );
clReleaseMemObject( streams[0] );
free( output_ptr );
return err;
}
*/

View File

@@ -0,0 +1,386 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/errorHelpers.h"
//--- the code for the kernel executables
static const char *readKernelCode[] = {
"__kernel void testWritef(__global uchar *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" float4 color;\n"
"\n"
" indx *= 4;\n"
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
" color /= (float4)(255.f, 255.f, 255.f, 255.f);\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n",
"__kernel void testWritei(__global char *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" int4 color;\n"
"\n"
" indx *= 4;\n"
" color.x = (int)src[indx+0];\n"
" color.y = (int)src[indx+1];\n"
" color.z = (int)src[indx+2];\n"
" color.w = (int)src[indx+3];\n"
" write_imagei(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n",
"__kernel void testWriteui(__global uchar *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" uint4 color;\n"
"\n"
" indx *= 4;\n"
" color.x = (uint)src[indx+0];\n"
" color.y = (uint)src[indx+1];\n"
" color.z = (uint)src[indx+2];\n"
" color.w = (uint)src[indx+3];\n"
" write_imageui(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n" };
static const char *readKernelName[] = { "testWritef", "testWritei", "testWriteui" };
//--- helper functions
static cl_uchar *generateImage( int n, MTdata d )
{
cl_uchar *ptr = (cl_uchar *)malloc( n * sizeof( cl_uchar ) );
int i;
for( i = 0; i < n; i++ ){
ptr[i] = (cl_uchar)genrand_int32( d );
}
return ptr;
}
static char *generateSignedImage( int n, MTdata d )
{
char *ptr = (char *)malloc( n * sizeof( char ) );
int i;
for( i = 0; i < n; i++ ){
ptr[i] = (char)genrand_int32( d );
}
return ptr;
}
static int verifyImage( cl_uchar *image, cl_uchar *outptr, int w, int h )
{
int i;
for( i = 0; i < w * h * 4; i++ ){
if( outptr[i] != image[i] ){
return -1;
}
}
return 0;
}
//----- the test functions
int read_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements, const char *code, const char *name,
cl_image_format image_format_desc )
{
cl_mem memobjs[2];
cl_program program[1];
void *inptr;
void *dst = NULL;
cl_kernel kernel[1];
cl_event readEvent;
cl_ulong queueStart, submitStart, readStart, readEnd;
size_t threads[2];
#ifdef USE_LOCAL_THREADS
size_t localThreads[2];
#endif
int err;
int w = 64, h = 64;
cl_mem_flags flags;
size_t element_nbytes;
size_t num_bytes;
size_t channel_nbytes = sizeof( cl_uchar );
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
element_nbytes = channel_nbytes * get_format_channel_count( &image_format_desc );
num_bytes = w * h * element_nbytes;
threads[0] = (size_t)w;
threads[1] = (size_t)h;
#ifdef USE_LOCAL_THREADS
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( unsigned int ), NULL );
test_error( err, "Unable to get thread group max size" );
localThreads[1] = localThreads[0];
if( localThreads[0] > threads[0] )
localThreads[0] = threads[0];
if( localThreads[1] > threads[1] )
localThreads[1] = threads[1];
#endif
d = init_genrand( gRandomSeed );
if( image_format_desc.image_channel_data_type == CL_SIGNED_INT8 )
inptr = (void *)generateSignedImage( w * h * 4, d );
else
inptr = (void *)generateImage( w * h * 4, d );
free_mtdata(d); d = NULL;
if( ! inptr ){
log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
return -1;
}
dst = malloc( num_bytes );
if( ! dst ){
free( (void *)inptr );
log_error("unable to allocate dst at %d x %d\n", (int)w, (int)h );
return -1;
}
// allocate the input and output image memory objects
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
if( memobjs[0] == (cl_mem)0 ){
free( dst );
free( (void *)inptr );
log_error("unable to create Image2D\n");
return -1;
}
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), channel_nbytes * 4 * w * h, NULL, &err );
if( memobjs[1] == (cl_mem)0 ){
free( dst );
free( (void *)inptr );
clReleaseMemObject(memobjs[0]);
log_error("unable to create array\n");
return -1;
}
err = clEnqueueWriteBuffer( queue, memobjs[1], true, 0, num_bytes, inptr, 0, NULL, NULL );
if( err != CL_SUCCESS ){
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
log_error("clWriteArray failed\n");
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &code, name );
if( err ){
log_error( "Unable to create program and kernel\n" );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[1] );
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[0] );
if( err != CL_SUCCESS ){
log_error( "clSetKernelArg failed\n" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
#ifdef USE_LOCAL_THREADS
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
#endif
if( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
size_t origin[3] = { 0, 0, 0 };
size_t region[3] = { w, h, 1 };
err = clEnqueueReadImage( queue, memobjs[0], false, origin, region, 0, 0, dst, 0, NULL, &readEvent );
if( err != CL_SUCCESS ){
print_error( err, "clReadImage2D failed" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
// This synchronization point is needed in order to assume the data is valid.
// Getting profiling information is not a synchronization point.
err = clWaitForEvents( 1, &readEvent );
if( err != CL_SUCCESS )
{
clReleaseEvent(readEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
while( ( err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(readEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
while( ( err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(readEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &readStart, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(readEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &readEnd, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(readEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
err = verifyImage( (cl_uchar *)inptr, (cl_uchar *)dst, w, h );
if( err ){
log_error( "Image failed to verify.\n" );
}
else{
log_info( "Image verified.\n" );
}
clReleaseEvent(readEvent);
clReleaseKernel(kernel[0]);
clReleaseProgram(program[0]);
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free(dst);
free(inptr);
if (check_times(queueStart, submitStart, readStart, readEnd, device))
err = -1;
return err;
} // end read_image()
int read_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
{
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// 0 to 255 for unsigned image data
return read_image( device, context, queue, numElements, readKernelCode[0], readKernelName[0], image_format_desc );
}
int read_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
{
cl_image_format image_format_desc = { CL_RGBA, CL_SIGNED_INT8 };
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// -128 to 127 for signed iamge data
return read_image( device, context, queue, numElements, readKernelCode[1], readKernelName[1], image_format_desc );
}
int read_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
{
cl_image_format image_format_desc = { CL_RGBA, CL_UNSIGNED_INT8 };
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// 0 to 255 for unsigned image data
return read_image( device, context, queue, numElements, readKernelCode[2], readKernelName[2], image_format_desc );
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,683 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/errorHelpers.h"
//--- the code for the kernel executables
static const char *readKernelCode[] = {
"__kernel void testReadf(read_only image2d_t srcimg, __global float4 *dst)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
" float4 color;\n"
"\n"
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
" dst[indx].x = color.x;\n"
" dst[indx].y = color.y;\n"
" dst[indx].z = color.z;\n"
" dst[indx].w = color.w;\n"
"\n"
"}\n",
"__kernel void testReadi(read_only image2d_t srcimg, __global uchar4 *dst)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
" int4 color;\n"
"\n"
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
" color = read_imagei(srcimg, sampler, (int2)(tid_x, tid_y));\n"
" uchar4 dst_write;\n"
" dst_write.x = (uchar)color.x;\n"
" dst_write.y = (uchar)color.y;\n"
" dst_write.z = (uchar)color.z;\n"
" dst_write.w = (uchar)color.w;\n"
" dst[indx] = dst_write;\n"
"\n"
"}\n",
"__kernel void testReadui(read_only image2d_t srcimg, __global uchar4 *dst)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
" uint4 color;\n"
"\n"
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
" color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
" uchar4 dst_write;\n"
" dst_write.x = (uchar)color.x;\n"
" dst_write.y = (uchar)color.y;\n"
" dst_write.z = (uchar)color.z;\n"
" dst_write.w = (uchar)color.w;\n"
" dst[indx] = dst_write;\n"
"\n"
"}\n",
"__kernel void testWritef(__global uchar *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" float4 color;\n"
"\n"
" indx *= 4;\n"
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
" color /= (float4)(255.f, 255.f, 255.f, 255.f);\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n",
"__kernel void testWritei(__global char *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" int4 color;\n"
"\n"
" indx *= 4;\n"
" color.x = (int)src[indx+0];\n"
" color.y = (int)src[indx+1];\n"
" color.z = (int)src[indx+2];\n"
" color.w = (int)src[indx+3];\n"
" write_imagei(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n",
"__kernel void testWriteui(__global uchar *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" uint4 color;\n"
"\n"
" indx *= 4;\n"
" color.x = (uint)src[indx+0];\n"
" color.y = (uint)src[indx+1];\n"
" color.z = (uint)src[indx+2];\n"
" color.w = (uint)src[indx+3];\n"
" write_imageui(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n",
"__kernel void testReadWriteff(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, CLK_DEFAULT_SAMPLER, (int2)(tid_x, tid_y));\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n",
"__kernel void testReadWriteii(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int4 color;\n"
"\n"
" color = read_imagei(srcimg, CLK_DEFAULT_SAMPLER, (int2)(tid_x, tid_y));\n"
" write_imagei(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n",
"__kernel void testReadWriteuiui(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" uint4 color;\n"
"\n"
" color = read_imageui(srcimg, CLK_DEFAULT_SAMPLER, (int2)(tid_x, tid_y));\n"
" write_imageui(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n",
"__kernel void testReadWritefi(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 colorf;\n"
" int4 colori;\n"
"\n"
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
" colorf = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
// since we are going from unsigned to signed, be sure to convert
// values greater 0.5 to negative values
" if( colorf.x >= 0.5f )\n"
" colori.x = (int)( ( colorf.x - 1.f ) * 255.f );\n"
" else\n"
" colori.x = (int)( colorf.x * 255.f );\n"
" if( colorf.y >= 0.5f )\n"
" colori.y = (int)( ( colorf.y - 1.f ) * 255.f );\n"
" else\n"
" colori.y = (int)( colorf.y * 255.f );\n"
" if( colorf.z >= 0.5f )\n"
" colori.z = (int)( ( colorf.z - 1.f ) * 255.f );\n"
" else\n"
" colori.z = (int)( colorf.z * 255.f );\n"
" if( colorf.w >= 0.5f )\n"
" colori.w = (int)( ( colorf.w - 1.f ) * 255.f );\n"
" else\n"
" colori.w = (int)( colorf.w * 255.f );\n"
" write_imagei(dstimg, (int2)(tid_x, tid_y), colori);\n"
"\n"
"}\n",
"__kernel void testReadWritefui(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 colorf;\n"
" uint4 colorui;\n"
"\n"
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
" colorf = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
" colorui.x = (uint)( colorf.x * 255.f );\n"
" colorui.y = (uint)( colorf.y * 255.f );\n"
" colorui.z = (uint)( colorf.z * 255.f );\n"
" colorui.w = (uint)( colorf.w * 255.f );\n"
" write_imageui(dstimg, (int2)(tid_x, tid_y), colorui);\n"
"\n"
"}\n",
"__kernel void testReadWriteif(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int4 colori;\n"
" float4 colorf;\n"
"\n"
// since we are going from signed to unsigned, we need to adjust the rgba values from
// from the signed image to add 256 to the signed image values less than 0.
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
" colori = read_imagei(srcimg, sampler, (int2)(tid_x, tid_y));\n"
" if( colori.x < 0 )\n"
" colorf.x = ( (float)colori.x + 256.f ) / 255.f;\n"
" else\n"
" colorf.x = (float)colori.x / 255.f;\n"
" if( colori.y < 0 )\n"
" colorf.y = ( (float)colori.y + 256.f ) / 255.f;\n"
" else\n"
" colorf.y = (float)colori.y / 255.f;\n"
" if( colori.z < 0 )\n"
" colorf.z = ( (float)colori.z + 256.f ) / 255.f;\n"
" else\n"
" colorf.z = (float)colori.z / 255.f;\n"
" if( colori.w < 0 )\n"
" colorf.w = ( (float)colori.w + 256.f ) / 255.f;\n"
" else\n"
" colorf.w = (float)colori.w / 255.f;\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), colorf);\n"
"\n"
"}\n",
"__kernel void testReadWriteiui(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int4 colori;\n"
" uint4 colorui;\n"
"\n"
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
" colori = read_imagei(srcimg, sampler, (int2)(tid_x, tid_y));\n"
// since we are going from signed to unsigned, we need to adjust the rgba values from
// from the signed image to add 256 to the signed image values less than 0.
" if( colori.x < 0 )\n"
" colorui.x = (uint)( colori.x + 256 );\n"
" else\n"
" colorui.x = (uint)colori.x;\n"
" if( colori.y < 0 )\n"
" colorui.y = (uint)( colori.y + 256 );\n"
" else\n"
" colorui.y = (uint)colori.y;\n"
" if( colori.z < 0 )\n"
" colorui.z = (uint)( colori.z + 256 );\n"
" else\n"
" colorui.z = (uint)colori.z;\n"
" if( colori.w < 0 )\n"
" colorui.w = (uint)( colori.w + 256 );\n"
" else\n"
" colorui.w = (uint)colori.w;\n"
" write_imageui(dstimg, (int2)(tid_x, tid_y), colorui);\n"
"\n"
"}\n",
"__kernel void testReadWriteuif(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" uint4 colorui;\n"
" float4 colorf;\n"
"\n"
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
" colorui = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
" colorf.x = (float)colorui.x / 255.f;\n"
" colorf.y = (float)colorui.y / 255.f;\n"
" colorf.z = (float)colorui.z / 255.f;\n"
" colorf.w = (float)colorui.w / 255.f;\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), colorf);\n"
"\n"
"}\n",
"__kernel void testReadWriteuii(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" uint4 colorui;\n"
" int4 colori;\n"
"\n"
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
" colorui = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
// since we are going from unsigned to signed, be sure to convert
// values greater 0.5 to negative values
" if( colorui.x >= 128U )\n"
" colori.x = (int)colorui.x - 256;\n"
" else\n"
" colori.x = (int)colorui.x;\n"
" if( colorui.y >= 128U )\n"
" colori.y = (int)colorui.y - 256;\n"
" else\n"
" colori.y = (int)colorui.y;\n"
" if( colorui.z >= 128U )\n"
" colori.z = (int)colorui.z - 256;\n"
" else\n"
" colori.z = (int)colorui.z;\n"
" if( colorui.w >= 128U )\n"
" colori.w = (int)colorui.w - 256;\n"
" else\n"
" colori.w = (int)colorui.w;\n"
" write_imagei(dstimg, (int2)(tid_x, tid_y), colori);\n"
"\n"
"}\n" };
static const char *readKernelName[] = { "testReadf", "testReadi", "testReadui", "testWritef", "testWritei", "testWriteui",
"testReadWriteff", "testReadWriteii", "testReadWriteuiui", "testReadWritefi",
"testReadWritefui", "testReadWriteif", "testReadWriteiui", "testReadWriteuif",
"testReadWriteuii" };
static cl_uchar *generateImage( int n, MTdata d )
{
cl_uchar *ptr = (cl_uchar *)malloc( n * sizeof( cl_uchar ) );
int i;
for( i = 0; i < n; i++ ){
ptr[i] = (cl_uchar)genrand_int32(d);
}
return ptr;
}
static char *generateSignedImage( int n, MTdata d )
{
char *ptr = (char *)malloc( n * sizeof( char ) );
int i;
for( i = 0; i < n; i++ ){
ptr[i] = (char)genrand_int32(d);
}
return ptr;
}
static int verifyImage( cl_uchar *image, cl_uchar *outptr, int w, int h )
{
int i;
for( i = 0; i < w * h * 4; i++ ){
if( outptr[i] != image[i] ){
log_error("Image verification failed at offset %d. Actual value=%d, expected value=%d\n", i, outptr[i], image[i]);
return -1;
}
}
return 0;
}
static int verifyImageFloat ( cl_double *refptr, cl_float *outptr, int w, int h )
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != (float)refptr[i])
{
float ulps = Ulp_Error( outptr[i], refptr[i]);
if(! (fabsf(ulps) < 1.5f) )
{
log_error( "ERROR: Data sample %d does not validate! Expected (%a), got (%a), ulp %f\n",
(int)i, refptr[i], outptr[ i ], ulps );
return -1;
}
}
}
return 0;
}
static double *prepareReference( cl_uchar *inptr, int w, int h)
{
int i;
double *refptr = (double *)malloc( w * h * 4*sizeof( double ) );
if ( !refptr )
{
log_error( "Unable to allocate refptr at %d x %d\n", (int)w, (int)h );
return 0;
}
for( i = 0; i < w * h * 4; i++ ) {
refptr[i] = ((double)inptr[i])/255;
}
return refptr;
}
//----- the test functions
int write_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements, const char *code,
const char *name, cl_image_format image_format_desc, int readFloat )
{
cl_mem memobjs[2];
cl_program program[1];
void *inptr;
double *refptr = NULL;
void *dst = NULL;
cl_kernel kernel[1];
cl_event writeEvent;
cl_ulong queueStart, submitStart, writeStart, writeEnd;
size_t threads[2];
#ifdef USE_LOCAL_THREADS
size_t localThreads[2];
#endif
int err;
int w = 64, h = 64;
cl_mem_flags flags;
size_t element_nbytes;
size_t num_bytes;
size_t channel_nbytes = sizeof( cl_uchar );
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
if (readFloat)
channel_nbytes = sizeof( cl_float );
element_nbytes = channel_nbytes * get_format_channel_count( &image_format_desc );
num_bytes = w * h * element_nbytes;
threads[0] = (size_t)w;
threads[1] = (size_t)h;
#ifdef USE_LOCAL_THREADS
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( unsigned int ), NULL );
test_error( err, "Unable to get thread group max size" );
localThreads[1] = localThreads[0];
if( localThreads[0] > threads[0] )
localThreads[0] = threads[0];
if( localThreads[1] > threads[1] )
localThreads[1] = threads[1];
#endif
d = init_genrand( gRandomSeed );
if( image_format_desc.image_channel_data_type == CL_SIGNED_INT8 )
inptr = (void *)generateSignedImage( w * h * 4, d );
else
inptr = (void *)generateImage( w * h * 4, d );
free_mtdata(d); d = NULL;
if( ! inptr ){
log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
return -1;
}
dst = malloc( num_bytes );
if( ! dst ){
free( (void *)inptr );
log_error("unable to allocate dst at %d x %d\n", (int)w, (int)h );
return -1;
}
// allocate the input and output image memory objects
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
if( memobjs[0] == (cl_mem)0 ){
free( dst );
free( (void *)inptr );
log_error("unable to create Image2D\n");
return -1;
}
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), channel_nbytes * 4 * w * h, NULL, &err );
if( memobjs[1] == (cl_mem)0 ){
free( dst );
free( (void *)inptr );
clReleaseMemObject(memobjs[0]);
log_error("unable to create array\n");
return -1;
}
size_t origin[3] = { 0, 0, 0 };
size_t region[3] = { w, h, 1 };
err = clEnqueueWriteImage( queue, memobjs[0], false, origin, region, 0, 0, inptr, 0, NULL, &writeEvent );
if( err != CL_SUCCESS ){
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
print_error(err, "clWriteImage failed");
return -1;
}
// This synchronization point is needed in order to assume the data is valid.
// Getting profiling information is not a synchronization point.
err = clWaitForEvents( 1, &writeEvent );
if( err != CL_SUCCESS )
{
print_error( err, "clWaitForEvents failed" );
clReleaseEvent(writeEvent);
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
// test profiling
while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(writeEvent);
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
CL_PROFILING_INFO_NOT_AVAILABLE );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(writeEvent);
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(writeEvent);
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseEvent(writeEvent);
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &code, name );
if( err ){
log_error( "Unable to create program and kernel\n" );
clReleaseEvent(writeEvent);
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[0] );
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[1] );
if( err != CL_SUCCESS ){
log_error( "clSetKernelArg failed\n" );
clReleaseEvent(writeEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
#ifdef USE_LOCAL_THREADS
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
#endif
if( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseEvent(writeEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
err = clEnqueueReadBuffer( queue, memobjs[1], true, 0, num_bytes, dst, 0, NULL, NULL );
if( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseEvent(writeEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
return -1;
}
if ( readFloat )
{
refptr = prepareReference( (cl_uchar *)inptr, w, h );
if ( refptr )
{
err = verifyImageFloat( refptr, (cl_float *)dst, w, h );
free ( refptr );
}
else
err = -1;
}
else
err = verifyImage( (cl_uchar *)inptr, (cl_uchar *)dst, w, h );
if( err )
{
log_error( "Image failed to verify.\n" );
}
else
{
log_info( "Image verified.\n" );
}
// cleanup
clReleaseEvent(writeEvent);
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject(memobjs[0]);
clReleaseMemObject(memobjs[1]);
free( dst );
free( inptr );
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
err = -1;
return err;
} // end write_image()
int write_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
{
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// 0 to 255 for unsigned image data
return write_image( device, context, queue, numElements, readKernelCode[0], readKernelName[0], image_format_desc, 1 );
}
int write_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
{
cl_image_format image_format_desc = { CL_RGBA, CL_SIGNED_INT8 };
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// -128 to 127 for signed iamge data
return write_image( device, context, queue, numElements, readKernelCode[1], readKernelName[1], image_format_desc, 0 );
}
int write_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
{
cl_image_format image_format_desc = { CL_RGBA, CL_UNSIGNED_INT8 };
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// 0 to 255 for unsigned image data
return write_image( device, context, queue, numElements, readKernelCode[2], readKernelName[2], image_format_desc, 0 );
}