mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Initial open source release of OpenCL 2.2 CTS.
This commit is contained in:
23
test_conformance/profiling/CMakeLists.txt
Normal file
23
test_conformance/profiling/CMakeLists.txt
Normal file
@@ -0,0 +1,23 @@
|
||||
set(MODULE_NAME PROFILING)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.c
|
||||
readArray.c
|
||||
writeArray.c
|
||||
readImage.c
|
||||
writeImage.c
|
||||
copy.c
|
||||
execute.c
|
||||
execute_multipass.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
../../test_common/harness/imageHelpers.cpp
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/msvc9.c
|
||||
../../test_common/harness/parseParameters.cpp
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
22
test_conformance/profiling/Jamfile
Normal file
22
test_conformance/profiling/Jamfile
Normal file
@@ -0,0 +1,22 @@
|
||||
project
|
||||
: requirements
|
||||
<toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_profiling
|
||||
: copy.c
|
||||
execute.c
|
||||
execute_multipass.c
|
||||
main.c
|
||||
readArray.c
|
||||
readImage.c
|
||||
writeArray.c
|
||||
writeImage.c
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_profiling
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/profiling
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/profiling
|
||||
;
|
||||
45
test_conformance/profiling/Makefile
Normal file
45
test_conformance/profiling/Makefile
Normal file
@@ -0,0 +1,45 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c readArray.c writeArray.c readImage.c writeImage.c copy.c execute.c execute_multipass.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/typeWrappers.cpp \
|
||||
../../test_common/harness/imageHelpers.cpp \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/conversions.c \
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
|
||||
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
|
||||
FRAMEWORK = ${SOURCES}
|
||||
HEADERS =
|
||||
TARGET = test_profiling
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||
#COMPILERFLAGS = -c -Wall -g -DUSE_LOCAL_THREADS
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
|
||||
CXXFLAGS= $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
869
test_conformance/profiling/copy.c
Normal file
869
test_conformance/profiling/copy.c
Normal file
@@ -0,0 +1,869 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
//--- the code for the kernel executables
|
||||
static const char *write_kernel_code =
|
||||
"\n"
|
||||
"__kernel void test_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
|
||||
" color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
//--- the verify functions
|
||||
static int verify_subimage( unsigned char *src, unsigned char *dst, size_t srcx, size_t srcy,
|
||||
size_t dstx, size_t dsty, size_t subw, size_t subh, size_t pitch, size_t element_pitch )
|
||||
{
|
||||
size_t i, j, k;
|
||||
size_t srcj, dstj;
|
||||
size_t srcLoc, dstLoc;
|
||||
|
||||
for( j = 0; j < subh; j++ ){
|
||||
srcj = ( j + srcy ) * pitch * element_pitch;
|
||||
dstj = ( j + dsty ) * pitch * element_pitch;
|
||||
for( i = 0; i < subw; i++ ){
|
||||
srcLoc = srcj + ( i + srcx ) * element_pitch;
|
||||
dstLoc = dstj + ( i + dstx ) * element_pitch;
|
||||
for( k = 0; k < element_pitch; k++ ){ // test each channel
|
||||
if( src[srcLoc+k] != dst[dstLoc+k] ){
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_copy_array( int *inptr, int *outptr, int n )
|
||||
{
|
||||
int i;
|
||||
|
||||
for( i = 0; i < n; i++ ) {
|
||||
if( outptr[i] != inptr[i] )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//----- helper functions
|
||||
static cl_uchar *generate_image( int n, MTdata d )
|
||||
{
|
||||
cl_uchar *ptr = (cl_uchar *)malloc( n );
|
||||
int i;
|
||||
|
||||
for( i = 0; i < n; i++ )
|
||||
ptr[i] = (cl_uchar)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
||||
static int copy_size( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, MTdata d )
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_event copyEvent;
|
||||
cl_ulong queueStart, submitStart, writeStart, writeEnd;
|
||||
cl_int *int_input_ptr, *int_output_ptr;
|
||||
int err = 0;
|
||||
int i;
|
||||
|
||||
int_input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
|
||||
int_output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
|
||||
|
||||
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err );
|
||||
if( !streams[0] ){
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err );
|
||||
if( !streams[1] ){
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i<num_elements; i++){
|
||||
int_input_ptr[i] = (int)genrand_int32(d);
|
||||
int_output_ptr[i] = (int)genrand_int32(d) >> 30; // seed with incorrect data
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int)*num_elements, (void *)int_input_ptr, 0, NULL, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clWriteArray failed" );
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( (void *)int_output_ptr );
|
||||
free( (void *)int_input_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueCopyBuffer( queue, streams[0], streams[1], 0, 0, sizeof(cl_int)*num_elements, 0, NULL, ©Event );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clCopyArray failed" );
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( (void *)int_output_ptr );
|
||||
free( (void *)int_input_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, ©Event );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( (void *)int_output_ptr );
|
||||
free( (void *)int_input_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// test profiling
|
||||
while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( (void *)int_output_ptr );
|
||||
free( (void *)int_input_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( (void *)int_output_ptr );
|
||||
free( (void *)int_input_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( (void *)int_output_ptr );
|
||||
free( (void *)int_input_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( (void *)int_output_ptr );
|
||||
free( (void *)int_input_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)int_output_ptr, 0, NULL, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueReadBuffer failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( (void *)int_output_ptr );
|
||||
free( (void *)int_input_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( verify_copy_array(int_input_ptr, int_output_ptr, num_elements) ){
|
||||
log_error( "test failed\n" );
|
||||
err = -1;
|
||||
}
|
||||
else{
|
||||
log_info( "test passed\n" );
|
||||
err = 0;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( (void *)int_output_ptr );
|
||||
free( (void *)int_input_ptr );
|
||||
|
||||
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
|
||||
err = -1;
|
||||
|
||||
return err;
|
||||
|
||||
} // end copy_size()
|
||||
|
||||
|
||||
static int copy_partial_size( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, cl_uint srcStart, cl_uint dstStart, int size, MTdata d )
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_event copyEvent;
|
||||
cl_ulong queueStart, submitStart, writeStart, writeEnd;
|
||||
cl_int *inptr, *outptr;
|
||||
int err = 0;
|
||||
int i;
|
||||
|
||||
inptr = (cl_int *)malloc(sizeof(cl_int) * num_elements);
|
||||
outptr = (cl_int *)malloc(sizeof(cl_int) * num_elements);
|
||||
|
||||
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err );
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err );
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i<num_elements; i++){
|
||||
inptr[i] = (int)genrand_int32(d);
|
||||
outptr[i] = (int)get_random_float( -1.f, 1.f, d ); // seed with incorrect data
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], true, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueCopyBuffer( queue, streams[0], streams[1], srcStart*sizeof(cl_int), dstStart*sizeof(cl_int),
|
||||
sizeof(cl_int)*size, 0, NULL, ©Event );
|
||||
if( err != CL_SUCCESS){
|
||||
print_error( err, "clCopyArray failed" );
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( outptr );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, ©Event );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( outptr );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// test profiling
|
||||
while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( outptr );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( outptr );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( outptr );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
free( outptr );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
|
||||
if( err != CL_SUCCESS){
|
||||
log_error("clReadVariableStream failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( verify_copy_array(inptr + srcStart, outptr + dstStart, size) ){
|
||||
log_error("test failed\n");
|
||||
err = -1;
|
||||
}
|
||||
else{
|
||||
log_info("test passed\n");
|
||||
err = 0;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(outptr);
|
||||
free(inptr);
|
||||
|
||||
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
|
||||
err = -1;
|
||||
|
||||
return err;
|
||||
|
||||
} // end copy_partial_size()
|
||||
|
||||
|
||||
int copy_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int i, err = 0;
|
||||
int size;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
// test the preset size
|
||||
log_info( "set size: %d: ", num_elements );
|
||||
err = copy_size( device, context, queue, num_elements, d );
|
||||
|
||||
// now test random sizes
|
||||
for( i = 0; i < 8; i++ ){
|
||||
size = (int)get_random_float(2.f,131072.f, d);
|
||||
log_info( "random size: %d: ", size );
|
||||
err |= copy_size( device, context, queue, size, d );
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
|
||||
return err;
|
||||
|
||||
} // end copy_array()
|
||||
|
||||
|
||||
int copy_partial_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int i, err = 0;
|
||||
int size;
|
||||
cl_uint srcStart, dstStart;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
// now test copy of partial sizes
|
||||
for( i = 0; i < 8; i++ ){
|
||||
srcStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
|
||||
size = (int)get_random_float( 8.f, (float)(num_elements - srcStart), d );
|
||||
dstStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - size), d );
|
||||
log_info( "random partial copy from %d to %d, size: %d: ", (int)srcStart, (int)dstStart, size );
|
||||
err |= copy_partial_size( device, context, queue, num_elements, srcStart, dstStart, size, d );
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
return err;
|
||||
} // end copy_partial_array()
|
||||
|
||||
|
||||
static int copy_image_size( cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, size_t srcx, size_t srcy,
|
||||
size_t dstx, size_t dsty, size_t subw, size_t subh,
|
||||
MTdata d )
|
||||
{
|
||||
cl_mem memobjs[3];
|
||||
cl_program program[1];
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
||||
cl_event copyEvent;
|
||||
cl_ulong queueStart, submitStart, writeStart, writeEnd;
|
||||
void *inptr;
|
||||
void *dst = NULL;
|
||||
cl_kernel kernel[1];
|
||||
size_t threads[2];
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
size_t localThreads[2];
|
||||
#endif
|
||||
int err = 0;
|
||||
cl_mem_flags flags;
|
||||
unsigned int num_channels = 4;
|
||||
size_t w = 256, h = 256;
|
||||
size_t element_nbytes;
|
||||
size_t num_bytes;
|
||||
size_t channel_nbytes = sizeof( cl_char );
|
||||
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
element_nbytes = channel_nbytes * num_channels;
|
||||
num_bytes = w * h * element_nbytes;
|
||||
|
||||
threads[0] = (size_t)w;
|
||||
threads[1] = (size_t)h;
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
|
||||
test_error( err, "Unable to get thread group max size" );
|
||||
localThreads[1] = localThreads[0];
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
if( localThreads[1] > threads[1] )
|
||||
localThreads[1] = threads[1];
|
||||
#endif
|
||||
|
||||
inptr = (void *)generate_image( (int)num_bytes, d );
|
||||
if( ! inptr ){
|
||||
log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
|
||||
return -1;
|
||||
}
|
||||
|
||||
dst = malloc( num_bytes );
|
||||
if( ! dst ){
|
||||
free( (void *)inptr );
|
||||
log_error("unable to allocate dst at %d x %d\n", (int)w, (int)h );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate the input image
|
||||
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
|
||||
memobjs[0] = create_image_2d(context, flags, &image_format_desc, w, h, 0, NULL, &err);
|
||||
if( memobjs[0] == (cl_mem)0 ) {
|
||||
free( dst );
|
||||
free( (void *)inptr );
|
||||
log_error("unable to create Image2D\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), num_bytes, NULL, &err );
|
||||
if( memobjs[1] == (cl_mem)0 ) {
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
free( dst );
|
||||
free( (void *)inptr );
|
||||
log_error("unable to create array\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate the input image
|
||||
memobjs[2] = create_image_2d(context, flags, &image_format_desc, w, h, 0, NULL, &err);
|
||||
if( memobjs[2] == (cl_mem)0 ) {
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( (void *)inptr );
|
||||
log_error("unable to create Image2D\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer( queue, memobjs[1], true, 0, num_bytes, inptr, 0, NULL, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &write_kernel_code, "test_write" );
|
||||
if( err ){
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[1] );
|
||||
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[0] );
|
||||
if (err != CL_SUCCESS){
|
||||
log_error("clSetKernelArg failed\n");
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
#endif
|
||||
if (err != CL_SUCCESS){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// now do the copy
|
||||
size_t srcPt[3] = { srcx, srcy, 0 };
|
||||
size_t destPt[3] = { dstx, dsty, 0 };
|
||||
size_t region[3] = { subw, subh, 1 };
|
||||
err = clEnqueueCopyImage( queue, memobjs[0], memobjs[2], srcPt, destPt, region, 0, NULL, ©Event );
|
||||
if (err != CL_SUCCESS){
|
||||
print_error( err, "clCopyImage failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, ©Event );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// test profiling
|
||||
while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = { 0, 0, 0 };
|
||||
size_t region2[3] = { w, h, 1 };
|
||||
err = clEnqueueReadImage( queue, memobjs[2], true, origin, region2, 0, 0, dst, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS){
|
||||
print_error( err, "clReadImage failed" );
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_subimage( (unsigned char *)inptr, (unsigned char *)dst, srcx, srcy,
|
||||
dstx, dsty, subw, subh, w, 4 );
|
||||
//err = verify_image( (unsigned char *)inptr, (unsigned char *)dst, w * h * 4 );
|
||||
if( err ){
|
||||
log_error( "Image failed to verify.\n " );
|
||||
}
|
||||
else{
|
||||
log_info( "Image verified.\n" );
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseEvent(copyEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
|
||||
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
|
||||
err = -1;
|
||||
|
||||
return err;
|
||||
|
||||
} // end copy_image_size()
|
||||
|
||||
|
||||
int copy_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int err = 0;
|
||||
int i;
|
||||
size_t srcx, srcy, dstx, dsty, subw, subh;
|
||||
MTdata d;
|
||||
|
||||
srcx = srcy = dstx = dsty = 0;
|
||||
subw = subh = 256;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
err = copy_image_size( device, context, queue, srcx, srcy, dstx, dsty, subw, subh, d );
|
||||
if( err ){
|
||||
log_error( "testing copy image, full size\n" );
|
||||
}
|
||||
else{
|
||||
log_info( "testing copy image, full size\n" );
|
||||
}
|
||||
|
||||
// now test random sub images
|
||||
srcx = srcy = 0;
|
||||
subw = subh = 16;
|
||||
dstx = dsty = 0;
|
||||
err = copy_image_size( device, context, queue, srcx, srcy, dstx, dsty, subw, subh, d );
|
||||
if( err ){
|
||||
log_error( "test copy of subimage size %d,%d %d,%d %d x %d\n", (int)srcx, (int)srcy,
|
||||
(int)dstx, (int)dsty, (int)subw, (int)subh );
|
||||
}
|
||||
else{
|
||||
log_info( "test copy of subimage size %d,%d %d,%d %d x %d\n", (int)srcx, (int)srcy,
|
||||
(int)dstx, (int)dsty, (int)subw, (int)subh );
|
||||
}
|
||||
|
||||
srcx = srcy = 8;
|
||||
subw = subh = 16;
|
||||
dstx = dsty = 32;
|
||||
err = copy_image_size( device, context, queue, srcx, srcy, dstx, dsty, subw, subh, d );
|
||||
if( err ){
|
||||
log_error( "test copy of subimage size %d,%d %d,%d %d x %d\n", (int)srcx, (int)srcy,
|
||||
(int)dstx, (int)dsty, (int)subw, (int)subh );
|
||||
}
|
||||
else{
|
||||
log_info( "test copy of subimage size %d,%d %d,%d %d x %d\n", (int)srcx, (int)srcy,
|
||||
(int)dstx, (int)dsty, (int)subw, (int)subh );
|
||||
}
|
||||
|
||||
for( i = 0; i < 16; i++ ) {
|
||||
srcx = (size_t)get_random_float( 0.f, 248.f, d );
|
||||
srcy = (size_t)get_random_float( 0.f, 248.f, d );
|
||||
subw = (size_t)get_random_float( 8.f, (float)(256 - srcx), d );
|
||||
subh = (size_t)get_random_float( 8.f, (float)(256 - srcy), d );
|
||||
dstx = (size_t)get_random_float( 0.f, (float)(256 - subw), d );
|
||||
dsty = (size_t)get_random_float( 0.f, (float)(256 - subh), d );
|
||||
err = copy_image_size( device, context, queue, srcx, srcy, dstx, dsty, subw, subh, d );
|
||||
if( err ){
|
||||
log_error( "test copy of subimage size %d,%d %d,%d %d x %d\n", (int)srcx, (int)srcy,
|
||||
(int)dstx, (int)dsty, (int)subw, (int)subh );
|
||||
}
|
||||
else{
|
||||
log_info( "test copy of subimage size %d,%d %d,%d %d x %d\n", (int)srcx, (int)srcy,
|
||||
(int)dstx, (int)dsty, (int)subw, (int)subh );
|
||||
}
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
|
||||
return err;
|
||||
|
||||
} // end copy_image()
|
||||
|
||||
|
||||
int copy_array_to_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
cl_mem memobjs[3];
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
||||
void *inptr;
|
||||
void *dst;
|
||||
int err;
|
||||
cl_mem_flags flags;
|
||||
unsigned int num_channels = (unsigned int)get_format_channel_count( &image_format_desc );
|
||||
size_t w = 256, h = 256;
|
||||
size_t element_nbytes;
|
||||
size_t num_bytes;
|
||||
size_t channel_nbytes = sizeof( cl_char );
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
element_nbytes = channel_nbytes * num_channels;
|
||||
num_bytes = w * h * element_nbytes;
|
||||
d = init_genrand( gRandomSeed );
|
||||
inptr = (void *)generate_image( (int)num_bytes, d );
|
||||
free_mtdata(d); d = NULL;
|
||||
if( ! inptr ){
|
||||
log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
|
||||
return -1;
|
||||
}
|
||||
|
||||
dst = malloc( num_bytes );
|
||||
if( ! dst ){
|
||||
free( inptr );
|
||||
log_error( " unable to allocate dst at %d x %d\n", (int)w, (int)h );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate the input image
|
||||
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
|
||||
memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
|
||||
if( memobjs[0] == (cl_mem)0 ){
|
||||
free( dst );
|
||||
free( inptr );
|
||||
log_error( " unable to create Image2D\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), channel_nbytes * num_channels*w*h, NULL, &err );
|
||||
if( memobjs[1] == (cl_mem)0 ) {
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
log_error( " unable to create array: " );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer( queue, memobjs[1], true, 0, num_bytes, (const void *)inptr, 0, NULL, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clWriteArray failed" );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = { 0, 0, 0 };
|
||||
size_t region[3] = { w, h, 1 };
|
||||
err = clEnqueueCopyBufferToImage( queue, memobjs[1], memobjs[0], 0, origin, region, 0, NULL, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clCopyArrayToImage failed" );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadImage( queue, memobjs[0], true, origin, region, 0, 0, dst, 0, NULL, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clReadImage failed" );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_subimage( (cl_uchar *)inptr, (cl_uchar *)dst, 0, 0, 0, 0, w, h, w, num_channels );
|
||||
if( err ){
|
||||
log_error( " test failed: " );
|
||||
}
|
||||
else{
|
||||
log_info( " test passed: " );
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
free( dst );
|
||||
free( inptr );
|
||||
|
||||
return err;
|
||||
|
||||
} // end copy_array_to_image()
|
||||
440
test_conformance/profiling/execute.c
Normal file
440
test_conformance/profiling/execute.c
Normal file
@@ -0,0 +1,440 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
#ifndef uchar
|
||||
typedef unsigned char uchar;
|
||||
#endif
|
||||
|
||||
#undef MIN
|
||||
#define MIN(x,y) ( (x) < (y) ? (x) : (y) )
|
||||
|
||||
#undef MAX
|
||||
#define MAX(x,y) ( (x) > (y) ? (x) : (y) )
|
||||
|
||||
//#define CREATE_OUTPUT 1
|
||||
|
||||
extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize );
|
||||
|
||||
|
||||
|
||||
//--- the code for kernel executables
|
||||
static const char *image_filter_src =
|
||||
"constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
|
||||
"\n"
|
||||
"__kernel void image_filter( int n, int m, __global float *filter_weights,\n"
|
||||
" read_only image2d_t src_image, write_only image2d_t dst_image )\n"
|
||||
"{\n"
|
||||
" int i, j;\n"
|
||||
" int indx = 0;\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" float4 filter_result = (float4)( 0.f, 0.f, 0.f, 0.f );\n"
|
||||
"\n"
|
||||
" for (i=-m/2; i<(m+1)/2; i++){\n"
|
||||
" for (j=-n/2; j<(n+1)/2; j++){\n"
|
||||
" float w = filter_weights[indx++];\n"
|
||||
"\n"
|
||||
" if (w != 0.0f){\n"
|
||||
" filter_result += w * read_imagef(src_image, sampler,\n"
|
||||
" (int2)(tid_x + j, tid_y + i));\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" write_imagef(dst_image, (int2)(tid_x, tid_y), filter_result);\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
//--- equivalent non-kernel code
|
||||
static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb )
|
||||
{
|
||||
// clamp the coords
|
||||
int x0 = MIN( MAX( x, 0 ), w - 1 );
|
||||
int y0 = MIN( MAX( y, 0 ), h - 1 );
|
||||
|
||||
// get tine index
|
||||
int indx = ( y0 * w + x0 ) * nChannels;
|
||||
|
||||
// seed the return array
|
||||
int i;
|
||||
for( i = 0; i < nChannels; i++ ){
|
||||
srcRgb[i] = (float)src[indx+i];
|
||||
}
|
||||
} // end read_imagef()
|
||||
|
||||
|
||||
static void write_imagef( uchar *dst, int x, int y, int w, int h, int nChannels, float *dstRgb )
|
||||
{
|
||||
// get tine index
|
||||
int indx = ( y * w + x ) * nChannels;
|
||||
|
||||
// seed the return array
|
||||
int i;
|
||||
for( i = 0; i < nChannels; i++ ){
|
||||
dst[indx+i] = (uchar)dstRgb[i];
|
||||
}
|
||||
} // end write_imagef()
|
||||
|
||||
|
||||
static void basicFilterPixel( int x, int y, int n, int m, int xsize, int ysize, int nChannels, const float *filter_weights, uchar *src, uchar *dst )
|
||||
{
|
||||
int i, j, k;
|
||||
int indx = 0;
|
||||
float filter_result[] = { 0.f, 0.f, 0.f, 0.f };
|
||||
float srcRgb[4];
|
||||
|
||||
for( i = -m/2; i < (m+1)/2; i++ ){
|
||||
for( j = -n/2; j < (n+1)/2; j++ ){
|
||||
float w = filter_weights[indx++];
|
||||
|
||||
if( w != 0 ){
|
||||
read_imagef( x + j, y + i, xsize, ysize, nChannels, src, srcRgb );
|
||||
for( k = 0; k < nChannels; k++ ){
|
||||
filter_result[k] += w * srcRgb[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
write_imagef( dst, x, y, xsize, ysize, nChannels, filter_result );
|
||||
|
||||
} // end basicFilterPixel()
|
||||
|
||||
|
||||
//--- helper functions
|
||||
static uchar *createImage( int elements, MTdata d)
|
||||
{
|
||||
int i;
|
||||
uchar *ptr = (uchar *)malloc( elements * sizeof( cl_uchar ) );
|
||||
if( ! ptr )
|
||||
return NULL;
|
||||
|
||||
for( i = 0; i < elements; i++ ){
|
||||
ptr[i] = (uchar)genrand_int32(d);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
} // end createImage()
|
||||
|
||||
|
||||
static int verifyImages( uchar *ptr0, uchar *ptr1, uchar tolerance, int xsize, int ysize, int nChannels )
|
||||
{
|
||||
int x, y, z;
|
||||
uchar *p0 = ptr0;
|
||||
uchar *p1 = ptr1;
|
||||
|
||||
for( y = 0; y < ysize; y++ ){
|
||||
for( x = 0; x < xsize; x++ ){
|
||||
for( z = 0; z < nChannels; z++ ){
|
||||
if( (uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
|
||||
log_error( " images differ at x,y = %d,%d, channel = %d, %d to %d\n", x, y, z,
|
||||
(int)p0[-1], (int)p1[-1] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
} // end verifyImages()
|
||||
|
||||
|
||||
static int kernelFilter( cl_device_id device, cl_context context, cl_command_queue queue, int w, int h, int nChannels,
|
||||
uchar *inptr, uchar *outptr )
|
||||
{
|
||||
cl_program program[1];
|
||||
cl_kernel kernel[1];
|
||||
cl_mem memobjs[3];
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
||||
cl_event executeEvent;
|
||||
cl_ulong queueStart, submitStart, writeStart, writeEnd;
|
||||
size_t threads[2];
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
size_t localThreads[2];
|
||||
#endif
|
||||
float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
|
||||
int filter_w = 3, filter_h = 3;
|
||||
int err = 0;
|
||||
|
||||
// set thread dimensions
|
||||
threads[0] = w;
|
||||
threads[1] = h;
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
|
||||
test_error( err, "Unable to get thread group max size" );
|
||||
localThreads[1] = localThreads[0];
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
if( localThreads[1] > threads[1] )
|
||||
localThreads[1] = threads[1];
|
||||
#endif
|
||||
|
||||
// allocate the input and output image memory objects
|
||||
memobjs[0] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR),
|
||||
&image_format_desc, w, h, 0, inptr, &err );
|
||||
if( memobjs[0] == (cl_mem)0 ){
|
||||
log_error( " unable to create 2D image using create_image_2d\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
memobjs[1] = create_image_2d( context, CL_MEM_WRITE_ONLY, &image_format_desc, w, h, 0, NULL, &err );
|
||||
if( memobjs[1] == (cl_mem)0 ){
|
||||
log_error( " unable to create 2D image using create_image_2d\n" );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate an array memory object to load the filter weights
|
||||
memobjs[2] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR ),
|
||||
sizeof( cl_float ) * filter_w * filter_h, &filter_weights, &err );
|
||||
if( memobjs[2] == (cl_mem)0 ){
|
||||
log_error( " unable to create array using clCreateBuffer\n" );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// create the compute program
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &image_filter_src, "image_filter" );
|
||||
if( err ){
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// create kernel args object and set arg values.
|
||||
// set the args values
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_int ), (void *)&filter_w );
|
||||
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_int ), (void *)&filter_h );
|
||||
err |= clSetKernelArg( kernel[0], 2, sizeof( cl_mem ), (void *)&memobjs[2] );
|
||||
err |= clSetKernelArg( kernel[0], 3, sizeof( cl_mem ), (void *)&memobjs[0] );
|
||||
err |= clSetKernelArg( kernel[0], 4, sizeof( cl_mem ), (void *)&memobjs[1] );
|
||||
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clSetKernelArg failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, localThreads, NULL, 0, &executeEvent );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, NULL, 0, &executeEvent );
|
||||
#endif
|
||||
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed\n" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, &executeEvent );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// test profiling
|
||||
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// read output image
|
||||
size_t origin[3] = { 0, 0, 0 };
|
||||
size_t region[3] = { w, h, 1 };
|
||||
err = clEnqueueReadImage( queue, memobjs[1], true, origin, region, 0, 0, outptr, 0, NULL, NULL);
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clReadImage failed\n" );
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// release event, kernel, program, and memory objects
|
||||
clReleaseEvent( executeEvent );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[2] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
|
||||
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
|
||||
err = -1;
|
||||
|
||||
return err;
|
||||
|
||||
} // end kernelFilter()
|
||||
|
||||
|
||||
static int basicFilter( int w, int h, int nChannels, uchar *inptr, uchar *outptr )
|
||||
{
|
||||
const float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
|
||||
int filter_w = 3, filter_h = 3;
|
||||
int x, y;
|
||||
|
||||
for( y = 0; y < h; y++ ){
|
||||
for( x = 0; x < w; x++ ){
|
||||
basicFilterPixel( x, y, filter_w, filter_h, w, h, nChannels, filter_weights, inptr, outptr );
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
} // end of basicFilter()
|
||||
|
||||
|
||||
int execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
uchar *inptr;
|
||||
uchar *outptr[2];
|
||||
int w = 256, h = 256;
|
||||
int nChannels = 4;
|
||||
int nElements = w * h * nChannels;
|
||||
int err = 0;
|
||||
MTdata d;
|
||||
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
inptr = createImage( nElements, d );
|
||||
free_mtdata( d); d = NULL;
|
||||
|
||||
if( ! inptr ){
|
||||
log_error( " unable to allocate %d bytes of memory for image\n", nElements );
|
||||
return -1;
|
||||
}
|
||||
|
||||
outptr[0] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
|
||||
if( ! outptr[0] ){
|
||||
log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
|
||||
free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
outptr[1] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
|
||||
if( ! outptr[1] ){
|
||||
log_error( " unable to allocate %d bytes of memory for output image #2\n", nElements );
|
||||
free( (void *)outptr[0] );
|
||||
free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = kernelFilter( device, context, queue, w, h, nChannels, inptr, outptr[0] );
|
||||
|
||||
if( ! err ){
|
||||
basicFilter( w, h, nChannels, inptr, outptr[1] );
|
||||
|
||||
// verify that the images are the same
|
||||
err = verifyImages( outptr[0], outptr[1], (uchar)0x1, w, h, nChannels );
|
||||
if( err )
|
||||
log_error( " images do not match\n" );
|
||||
}
|
||||
|
||||
// clean up
|
||||
free( (void *)outptr[1] );
|
||||
free( (void *)outptr[0] );
|
||||
free( (void *)inptr );
|
||||
|
||||
return err;
|
||||
|
||||
} // end execute()
|
||||
|
||||
|
||||
|
||||
314
test_conformance/profiling/execute_multipass.c
Normal file
314
test_conformance/profiling/execute_multipass.c
Normal file
@@ -0,0 +1,314 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
static const char *read3d_kernel_code =
|
||||
"\n"
|
||||
"__kernel void read3d(read_only image3d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int tid_z = get_global_id(2);\n"
|
||||
" int indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
|
||||
" indx *= 4;\n"
|
||||
" dst[indx+0] = (unsigned char)(color.x * 255.0f);\n"
|
||||
" dst[indx+1] = (unsigned char)(color.y * 255.0f);\n"
|
||||
" dst[indx+2] = (unsigned char)(color.z * 255.0f);\n"
|
||||
" dst[indx+3] = (unsigned char)(color.w * 255.0f);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static cl_uchar *createImage( int elements, MTdata d )
|
||||
{
|
||||
int i;
|
||||
cl_uchar *ptr = (cl_uchar *)malloc( elements * sizeof( cl_uchar ) );
|
||||
if( ! ptr )
|
||||
return NULL;
|
||||
|
||||
for( i = 0; i < elements; i++ ){
|
||||
ptr[i] = (cl_uchar)genrand_int32(d);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
} // end createImage()
|
||||
|
||||
|
||||
static int verifyImages( cl_uchar *ptr0, cl_uchar *ptr1, cl_uchar tolerance, int xsize, int ysize, int zsize, int nChannels )
|
||||
{
|
||||
int x, y, z, c;
|
||||
cl_uchar *p0 = ptr0;
|
||||
cl_uchar *p1 = ptr1;
|
||||
|
||||
for( z = 0; z < zsize; z++ ){
|
||||
for( y = 0; y < ysize; y++ ){
|
||||
for( x = 0; x < xsize; x++ ){
|
||||
for( c = 0; c < nChannels; c++ ){
|
||||
if( (cl_uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
|
||||
log_error( " images differ at x,y,z = %d,%d,%d channel = %d, %d to %d\n",
|
||||
x, y, z, c, (int)p0[-1], (int)p1[-1] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
} // end verifyImages()
|
||||
|
||||
|
||||
static int run_kernel( cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
int w, int h, int d, int nChannels, cl_uchar *inptr, cl_uchar *outptr )
|
||||
{
|
||||
cl_program program[1];
|
||||
cl_kernel kernel[1];
|
||||
cl_mem memobjs[2];
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
||||
cl_event executeEvent = NULL;
|
||||
cl_ulong queueStart, submitStart, writeStart, writeEnd;
|
||||
size_t threads[3];
|
||||
size_t localThreads[3];
|
||||
int err = 0;
|
||||
|
||||
// set thread dimensions
|
||||
threads[0] = w;
|
||||
threads[1] = h;
|
||||
threads[2] = d;
|
||||
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof( cl_uint ), (size_t*)localThreads, NULL );
|
||||
if (err)
|
||||
{
|
||||
localThreads[0] = 256; localThreads[1] = 1; localThreads[2] = 1;
|
||||
err = 0;
|
||||
}
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
if( localThreads[1] > threads[1] )
|
||||
localThreads[1] = threads[1];
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties( context, properties, &err );
|
||||
if( err ){
|
||||
log_error( " clCreateSamplerWithProperties failed.\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate the input and output image memory objects
|
||||
memobjs[0] = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), &image_format_desc, w, h, d, 0, 0, inptr, &err );
|
||||
if( memobjs[0] == (cl_mem)0 ){
|
||||
log_error( " unable to create 2D image using create_image_2d\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate an array memory object to load the filter weights
|
||||
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_WRITE ), sizeof( cl_float ) * w*h*d*nChannels, NULL, &err );
|
||||
if( memobjs[1] == (cl_mem)0 ){
|
||||
log_error( " unable to create array using clCreateBuffer\n" );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// create the compute program
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &read3d_kernel_code, "read3d" );
|
||||
if( err ){
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// create kernel args object and set arg values.
|
||||
// set the args values
|
||||
err |= clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[0] );
|
||||
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[1] );
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
|
||||
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clSetKernelArg failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 3, NULL, threads, localThreads, NULL, 0, &executeEvent );
|
||||
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (executeEvent) {
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, &executeEvent );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
print_error( err, "clWaitForEvents failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// test profiling
|
||||
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info( "Profiling info:\n" );
|
||||
log_info( "Time from queue to start of clEnqueueNDRangeKernel: %f seconds\n", (double)(writeStart - queueStart) / 1000000000000.f );
|
||||
log_info( "Time from start of clEnqueueNDRangeKernel to end: %f seconds\n", (double)(writeEnd - writeStart) / 1000000000000.f );
|
||||
}
|
||||
|
||||
// read output image
|
||||
err = clEnqueueReadBuffer(queue, memobjs[1], CL_TRUE, 0, w*h*d*nChannels*4, outptr, 0, NULL, NULL);
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clReadImage failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// release kernel, program, and memory objects
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject( memobjs[1] );
|
||||
clReleaseMemObject( memobjs[0] );
|
||||
|
||||
return err;
|
||||
|
||||
} // end run_kernel()
|
||||
|
||||
|
||||
// The main point of this test is to exercise code that causes a multipass cld launch for a single
|
||||
// kernel exec at the cl level. This is done on the gpu for 3d launches, and it's also done
|
||||
// to handle gdims that excede the maximums allowed by the hardware. In this case we
|
||||
// use 3d to exercise the multipass events. In the future 3d may not be multpass, in which
|
||||
// case we will need to ensure that we use gdims large enough to force multipass.
|
||||
|
||||
int execute_multipass( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
cl_uchar *inptr;
|
||||
cl_uchar *outptr;
|
||||
int w = 256, h = 128, d = 32;
|
||||
int nChannels = 4;
|
||||
int nElements = w * h * d * nChannels;
|
||||
int err = 0;
|
||||
MTdata mtData;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
mtData = init_genrand( gRandomSeed );
|
||||
inptr = createImage( nElements, mtData );
|
||||
free_mtdata( mtData); mtData = NULL;
|
||||
if( ! inptr ){
|
||||
log_error( " unable to allocate %d bytes of memory for image\n", nElements );
|
||||
return -1;
|
||||
}
|
||||
|
||||
outptr = (cl_uchar *)malloc( nElements * sizeof( cl_uchar ) );
|
||||
if( ! outptr ){
|
||||
log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
|
||||
free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
err = run_kernel( device, context, queue, w, h, d, nChannels, inptr, outptr );
|
||||
|
||||
if( ! err ){
|
||||
// verify that the images are the same
|
||||
err = verifyImages( outptr, inptr, (cl_uchar)0x1, w, h, d, nChannels );
|
||||
if( err )
|
||||
log_error( " images do not match\n" );
|
||||
}
|
||||
|
||||
// clean up
|
||||
free( (void *)outptr );
|
||||
free( (void *)inptr );
|
||||
|
||||
return err;
|
||||
|
||||
} // end execute()
|
||||
|
||||
|
||||
|
||||
173
test_conformance/profiling/main.c
Normal file
173
test_conformance/profiling/main.c
Normal file
@@ -0,0 +1,173 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
|
||||
// (for example, generate_random_image_data()), the tests are required to declare
|
||||
// the following variables (<rdar://problem/11111245>):
|
||||
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
|
||||
bool gTestRounding = false;
|
||||
|
||||
basefn basefn_list[] = {
|
||||
read_int_array,
|
||||
read_uint_array,
|
||||
read_long_array,
|
||||
read_ulong_array,
|
||||
read_short_array,
|
||||
read_ushort_array,
|
||||
read_float_array,
|
||||
read_char_array,
|
||||
read_uchar_array,
|
||||
read_struct_array,
|
||||
write_int_array,
|
||||
write_uint_array,
|
||||
write_long_array,
|
||||
write_ulong_array,
|
||||
write_short_array,
|
||||
write_ushort_array,
|
||||
write_float_array,
|
||||
write_char_array,
|
||||
write_uchar_array,
|
||||
write_struct_array,
|
||||
read_float_image,
|
||||
read_char_image,
|
||||
read_uchar_image,
|
||||
write_float_image,
|
||||
write_char_image,
|
||||
write_uchar_image,
|
||||
copy_array,
|
||||
copy_partial_array,
|
||||
copy_image,
|
||||
copy_array_to_image,
|
||||
execute
|
||||
};
|
||||
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"read_array_int",
|
||||
"read_array_uint",
|
||||
"read_array_long",
|
||||
"read_array_ulong",
|
||||
"read_array_short",
|
||||
"read_array_ushort",
|
||||
"read_array_float",
|
||||
"read_array_char",
|
||||
"read_array_uchar",
|
||||
"read_array_struct",
|
||||
"write_array_int",
|
||||
"write_array_uint",
|
||||
"write_array_long",
|
||||
"write_array_ulong",
|
||||
"write_array_short",
|
||||
"write_array_ushort",
|
||||
"write_array_float",
|
||||
"write_array_char",
|
||||
"write_array_uchar",
|
||||
"write_array_struct",
|
||||
"read_image_float",
|
||||
"read_image_int",
|
||||
"read_image_uint",
|
||||
"write_image_float",
|
||||
"write_image_char",
|
||||
"write_image_uchar",
|
||||
"copy_array",
|
||||
"copy_partial_array",
|
||||
"copy_image",
|
||||
"copy_array_to_image",
|
||||
"execute",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_streamfns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
// FIXME: use timer resolution rather than hardcoding 1µs per tick.
|
||||
|
||||
#define QUEUE_SECONDS_LIMIT 30
|
||||
#define SUBMIT_SECONDS_LIMIT 30
|
||||
#define COMMAND_SECONDS_LIMIT 30
|
||||
int check_times(cl_ulong queueStart, cl_ulong commandSubmit, cl_ulong commandStart, cl_ulong commandEnd, cl_device_id device) {
|
||||
int err = 0;
|
||||
|
||||
size_t profiling_resolution = 0;
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(profiling_resolution), &profiling_resolution, NULL);
|
||||
test_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILING_TIMER_RESOLUTION failed.\n");
|
||||
|
||||
log_info("CL_PROFILING_COMMAND_QUEUED: %llu CL_PROFILING_COMMAND_SUBMIT: %llu CL_PROFILING_COMMAND_START: %llu CL_PROFILING_COMMAND_END: %llu CL_DEVICE_PROFILING_TIMER_RESOLUTION: %ld\n",
|
||||
queueStart, commandSubmit, commandStart, commandEnd, profiling_resolution);
|
||||
|
||||
double queueTosubmitTimeS = (double)(commandSubmit - queueStart)*1e-9;
|
||||
double submitToStartTimeS = (double)(commandStart - commandSubmit)*1e-9;
|
||||
double startToEndTimeS = (double)(commandEnd - commandStart)*1e-9;
|
||||
|
||||
log_info( "Profiling info:\n" );
|
||||
log_info( "Time from queue to submit : %fms\n", (double)(queueTosubmitTimeS) * 1000.f );
|
||||
log_info( "Time from submit to start : %fms\n", (double)(submitToStartTimeS) * 1000.f );
|
||||
log_info( "Time from start to end: %fms\n", (double)(startToEndTimeS) * 1000.f );
|
||||
|
||||
if(queueStart > commandSubmit) {
|
||||
log_error("CL_PROFILING_COMMAND_QUEUED > CL_PROFILING_COMMAND_SUBMIT.\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (commandSubmit > commandStart) {
|
||||
log_error("CL_PROFILING_COMMAND_SUBMIT > CL_PROFILING_COMMAND_START.\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (commandStart > commandEnd) {
|
||||
log_error("CL_PROFILING_COMMAND_START > CL_PROFILING_COMMAND_END.\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (queueStart == 0 && commandStart == 0 && commandEnd == 0) {
|
||||
log_error("All values are 0. This is exceedingly unlikely.\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (queueTosubmitTimeS > QUEUE_SECONDS_LIMIT) {
|
||||
log_error("Time between queue and submit is too big: %fs, test limit: %fs.\n",
|
||||
queueTosubmitTimeS , (double)QUEUE_SECONDS_LIMIT);
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (submitToStartTimeS > SUBMIT_SECONDS_LIMIT) {
|
||||
log_error("Time between submit and start is too big: %fs, test limit: %fs.\n",
|
||||
submitToStartTimeS , (double)QUEUE_SECONDS_LIMIT);
|
||||
err = -1;
|
||||
}
|
||||
|
||||
if (startToEndTimeS > COMMAND_SECONDS_LIMIT) {
|
||||
log_error("Time between queue and start is too big: %fs, test limit: %fs.\n",
|
||||
startToEndTimeS , (double)QUEUE_SECONDS_LIMIT);
|
||||
err = -1;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int main( int argc, const char *argv[] )
|
||||
{
|
||||
return runTestHarness( argc, argv, num_streamfns, basefn_list, basefn_names,
|
||||
false, false, CL_QUEUE_PROFILING_ENABLE );
|
||||
}
|
||||
|
||||
|
||||
65
test_conformance/profiling/procs.h
Normal file
65
test_conformance/profiling/procs.h
Normal file
@@ -0,0 +1,65 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef __PROCS_H__
|
||||
#define __PROCS_H__
|
||||
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/imageHelpers.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
|
||||
extern int check_times(cl_ulong queueStart, cl_ulong submitStart, cl_ulong commandStart, cl_ulong commandEnd, cl_device_id device);
|
||||
|
||||
extern int read_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int read_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int write_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int copy_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int copy_partial_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int copy_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int copy_array_to_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_parallel_kernels( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
|
||||
|
||||
|
||||
#endif // #ifndef __PROCS_H__
|
||||
|
||||
|
||||
997
test_conformance/profiling/readArray.c
Normal file
997
test_conformance/profiling/readArray.c
Normal file
@@ -0,0 +1,997 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
#define TEST_PRIME_INT ((1<<16)+1)
|
||||
#define TEST_PRIME_UINT ((1U<<16)+1U)
|
||||
#define TEST_PRIME_LONG ((1LL<<32)+1LL)
|
||||
#define TEST_PRIME_ULONG ((1ULL<<32)+1ULL)
|
||||
#define TEST_PRIME_SHORT ((1S<<8)+1S)
|
||||
#define TEST_PRIME_FLOAT (float)3.40282346638528860e+38
|
||||
#define TEST_PRIME_HALF 119.f
|
||||
#define TEST_BOOL true
|
||||
#define TEST_PRIME_CHAR 0x77
|
||||
|
||||
|
||||
#ifndef ulong
|
||||
typedef unsigned long ulong;
|
||||
#endif
|
||||
|
||||
#ifndef uchar
|
||||
typedef unsigned char uchar;
|
||||
#endif
|
||||
|
||||
#ifndef TestStruct
|
||||
typedef struct{
|
||||
int a;
|
||||
float b;
|
||||
} TestStruct;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//--- the code for the kernel executables
|
||||
static const char *stream_read_int_kernel_code[] = {
|
||||
"__kernel void test_stream_read_int(__global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1<<16)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_int2(__global int2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1<<16)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_int4(__global int4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1<<16)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_int8(__global int8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1<<16)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_int16(__global int16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1<<16)+1);\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *int_kernel_name[] = { "test_stream_read_int", "test_stream_read_int2", "test_stream_read_int4", "test_stream_read_int8", "test_stream_read_int16" };
|
||||
|
||||
const char *stream_read_uint_kernel_code[] = {
|
||||
"__kernel void test_stream_read_uint(__global uint *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1U<<16)+1U);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_uint2(__global uint2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1U<<16)+1U);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_uint4(__global uint4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1U<<16)+1U);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_uint8(__global uint8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1U<<16)+1U);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_uint16(__global uint16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1U<<16)+1U);\n"
|
||||
"}\n" };
|
||||
|
||||
const char *uint_kernel_name[] = { "test_stream_read_uint", "test_stream_read_uint2", "test_stream_read_uint4", "test_stream_read_uint8", "test_stream_read_uint16" };
|
||||
|
||||
const char *stream_read_long_kernel_code[] = {
|
||||
"__kernel void test_stream_read_long(__global long *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1L<<32)+1L);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_long2(__global long2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1L<<32)+1L);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_long4(__global long4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1L<<32)+1L);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_long8(__global long8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1L<<32)+1L);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_long16(__global long16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1L<<32)+1L);\n"
|
||||
"}\n" };
|
||||
|
||||
const char *long_kernel_name[] = { "test_stream_read_long", "test_stream_read_long2", "test_stream_read_long4", "test_stream_read_long8", "test_stream_read_long16" };
|
||||
|
||||
const char *stream_read_ulong_kernel_code[] = {
|
||||
"__kernel void test_stream_read_ulong(__global ulong *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1UL<<32)+1UL);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_ulong2(__global ulong2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1UL<<32)+1UL);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_ulong4(__global ulong4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1UL<<32)+1UL);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_ulong8(__global ulong8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1UL<<32)+1UL);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_ulong16(__global ulong16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1UL<<32)+1UL);\n"
|
||||
"}\n" };
|
||||
|
||||
const char *ulong_kernel_name[] = { "test_stream_read_ulong", "test_stream_read_ulong2", "test_stream_read_ulong4", "test_stream_read_ulong8", "test_stream_read_ulong16" };
|
||||
|
||||
const char *stream_read_short_kernel_code[] = {
|
||||
"__kernel void test_stream_read_short(__global short *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (short)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_short2(__global short2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (short)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_short4(__global short4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (short)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_short8(__global short8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (short)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_short16(__global short16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (short)((1<<8)+1);\n"
|
||||
"}\n" };
|
||||
|
||||
const char *short_kernel_name[] = { "test_stream_read_short", "test_stream_read_short2", "test_stream_read_short4", "test_stream_read_short8", "test_stream_read_short16" };
|
||||
|
||||
|
||||
const char *stream_read_ushort_kernel_code[] = {
|
||||
"__kernel void test_stream_read_ushort(__global ushort *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (ushort)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_ushort2(__global ushort2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (ushort)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_ushort4(__global ushort4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (ushort)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_ushort8(__global ushort8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (ushort)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_ushort16(__global ushort16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (ushort)((1<<8)+1);\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *ushort_kernel_name[] = { "test_stream_read_ushort", "test_stream_read_ushort2", "test_stream_read_ushort4", "test_stream_read_ushort8", "test_stream_read_ushort16" };
|
||||
|
||||
|
||||
const char *stream_read_float_kernel_code[] = {
|
||||
"__kernel void test_stream_read_float(__global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)3.40282346638528860e+38;\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_float2(__global float2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)3.40282346638528860e+38;\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_float4(__global float4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)3.40282346638528860e+38;\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_float8(__global float8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)3.40282346638528860e+38;\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_float16(__global float16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)3.40282346638528860e+38;\n"
|
||||
"}\n" };
|
||||
|
||||
const char *float_kernel_name[] = { "test_stream_read_float", "test_stream_read_float2", "test_stream_read_float4", "test_stream_read_float8", "test_stream_read_float16" };
|
||||
|
||||
|
||||
const char *stream_read_half_kernel_code[] = {
|
||||
"__kernel void test_stream_read_half(__global half *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (half)119;\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_half2(__global half2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (half)119;\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_half4(__global half4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (half)119;\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_half8(__global half8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (half)119;\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_half16(__global half16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (half)119;\n"
|
||||
"}\n" };
|
||||
|
||||
const char *half_kernel_name[] = { "test_stream_read_half", "test_stream_read_half2", "test_stream_read_half4", "test_stream_read_half8", "test_stream_read_half16" };
|
||||
|
||||
|
||||
const char *stream_read_char_kernel_code[] = {
|
||||
"__kernel void test_stream_read_char(__global char *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (char)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_char2(__global char2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (char)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_char4(__global char4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (char)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_char8(__global char8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (char)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_char16(__global char16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (char)'w';\n"
|
||||
"}\n" };
|
||||
|
||||
const char *char_kernel_name[] = { "test_stream_read_char", "test_stream_read_char2", "test_stream_read_char4", "test_stream_read_char8", "test_stream_read_char16" };
|
||||
|
||||
|
||||
const char *stream_read_uchar_kernel_code[] = {
|
||||
"__kernel void test_stream_read_uchar(__global uchar *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = 'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_uchar2(__global uchar2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (uchar)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_uchar4(__global uchar4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (uchar)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_uchar8(__global uchar8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (uchar)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_stream_read_uchar16(__global uchar16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (uchar)'w';\n"
|
||||
"}\n" };
|
||||
|
||||
const char *uchar_kernel_name[] = { "test_stream_read_uchar", "test_stream_read_uchar2", "test_stream_read_uchar4", "test_stream_read_uchar8", "test_stream_read_uchar16" };
|
||||
|
||||
|
||||
const char *stream_read_struct_kernel_code[] = {
|
||||
"typedef struct{\n"
|
||||
"int a;\n"
|
||||
"float b;\n"
|
||||
"} TestStruct;\n"
|
||||
"__kernel void test_stream_read_struct(__global TestStruct *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid].a = ((1<<16)+1);\n"
|
||||
" dst[tid].b = (float)3.40282346638528860e+38;\n"
|
||||
"}\n" };
|
||||
|
||||
const char *struct_kernel_name[] = { "test_stream_read_struct" };
|
||||
|
||||
|
||||
|
||||
//--- the verify functions
|
||||
static int verify_read_int(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
int *outptr = (int *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if( outptr[i] != TEST_PRIME_INT )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_uint(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
cl_uint *outptr = (cl_uint *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if( outptr[i] != TEST_PRIME_UINT )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_long(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
cl_long *outptr = (cl_long *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if( outptr[i] != TEST_PRIME_LONG )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_ulong(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
cl_ulong *outptr = (cl_ulong *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if( outptr[i] != TEST_PRIME_ULONG )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_short(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
short *outptr = (short *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if( outptr[i] != (short)((1<<8)+1) )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_ushort(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
cl_ushort *outptr = (cl_ushort *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if( outptr[i] != (cl_ushort)((1<<8)+1) )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_float( void *ptr, int n )
|
||||
{
|
||||
int i;
|
||||
float *outptr = (float *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if( outptr[i] != TEST_PRIME_FLOAT )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_half( void *ptr, int n )
|
||||
{
|
||||
int i;
|
||||
float *outptr = (float *)ptr;
|
||||
|
||||
for( i = 0; i < n / 2; i++ ){
|
||||
if( outptr[i] != TEST_PRIME_HALF )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_char(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
char *outptr = (char *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if( outptr[i] != TEST_PRIME_CHAR )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_uchar( void *ptr, int n )
|
||||
{
|
||||
int i;
|
||||
uchar *outptr = (uchar *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if( outptr[i] != TEST_PRIME_CHAR )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_struct( void *ptr, int n )
|
||||
{
|
||||
int i;
|
||||
TestStruct *outptr = (TestStruct *)ptr;
|
||||
|
||||
for ( i = 0; i < n; i++ ){
|
||||
if( ( outptr[i].a != TEST_PRIME_INT ) ||
|
||||
( outptr[i].b != TEST_PRIME_FLOAT ) )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//----- the test functions
|
||||
int test_stream_read( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, size_t size, const char *type, int loops,
|
||||
const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
|
||||
{
|
||||
cl_mem streams[5];
|
||||
void *outptr[5];
|
||||
cl_program program[5];
|
||||
cl_kernel kernel[5];
|
||||
cl_event readEvent;
|
||||
cl_ulong queueStart, submitStart, readStart, readEnd;
|
||||
size_t threads[1];
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
size_t localThreads[1];
|
||||
#endif
|
||||
int err, err_count = 0;
|
||||
int i;
|
||||
size_t ptrSizes[5];
|
||||
|
||||
threads[0] = (size_t)num_elements;
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
log_error( "Unable to get thread group max size: %d", err );
|
||||
return -1;
|
||||
}
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
#endif
|
||||
|
||||
ptrSizes[0] = size;
|
||||
ptrSizes[1] = ptrSizes[0] << 1;
|
||||
ptrSizes[2] = ptrSizes[1] << 1;
|
||||
ptrSizes[3] = ptrSizes[2] << 1;
|
||||
ptrSizes[4] = ptrSizes[3] << 1;
|
||||
for( i = 0; i < loops; i++ ){
|
||||
outptr[i] = malloc( ptrSizes[i] * num_elements );
|
||||
if( ! outptr[i] ){
|
||||
log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
|
||||
return -1;
|
||||
}
|
||||
streams[i] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSizes[i] * num_elements, NULL, &err );
|
||||
if( !streams[i] ){
|
||||
log_error( " clCreateBuffer failed\n" );
|
||||
free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
|
||||
if( err ){
|
||||
log_error( " Error creating program for %s\n", type );
|
||||
clReleaseMemObject(streams[i]);
|
||||
free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[i] );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clSetKernelArg failed" );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseMemObject( streams[i] );
|
||||
free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
#endif
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed" );
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseMemObject( streams[i] );
|
||||
free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[i], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &readEvent );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueReadBuffer failed" );
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseMemObject( streams[i] );
|
||||
free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
err = clWaitForEvents( 1, &readEvent );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
print_error( err, "Unable to wait for event completion" );
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseMemObject( streams[i] );
|
||||
free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseMemObject( streams[i] );
|
||||
free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseMemObject( streams[i] );
|
||||
free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &readStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseMemObject( streams[i] );
|
||||
free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &readEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseMemObject( streams[i] );
|
||||
free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (fn(outptr[i], num_elements*(1<<i))){
|
||||
log_error( " %s%d data failed to verify\n", type, 1<<i );
|
||||
err_count++;
|
||||
}
|
||||
else{
|
||||
log_info( " %s%d data verified\n", type, 1<<i );
|
||||
}
|
||||
|
||||
if (check_times(queueStart, submitStart, readStart, readEnd, device))
|
||||
err_count++;
|
||||
|
||||
// cleanup
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseMemObject( streams[i] );
|
||||
free( outptr[i] );
|
||||
}
|
||||
|
||||
return err_count;
|
||||
|
||||
} // end test_stream_read()
|
||||
|
||||
|
||||
int read_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int (*foo)(void *,int);
|
||||
foo = verify_read_int;
|
||||
|
||||
return test_stream_read( device, context, queue, num_elements, sizeof( cl_int ), "int", 5,
|
||||
stream_read_int_kernel_code, int_kernel_name, foo );
|
||||
}
|
||||
|
||||
|
||||
int read_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int (*foo)(void *,int);
|
||||
foo = verify_read_uint;
|
||||
|
||||
return test_stream_read( device, context, queue, num_elements, sizeof( cl_uint ), "uint", 5,
|
||||
stream_read_uint_kernel_code, uint_kernel_name, foo );
|
||||
}
|
||||
|
||||
|
||||
int read_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int (*foo)(void *,int);
|
||||
foo = verify_read_long;
|
||||
|
||||
if (!gHasLong)
|
||||
{
|
||||
log_info("read_long_array: Long types unsupported, skipping.");
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
return test_stream_read( device, context, queue, num_elements, sizeof( cl_long ), "long", 5,
|
||||
stream_read_long_kernel_code, long_kernel_name, foo );
|
||||
}
|
||||
|
||||
|
||||
int read_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int (*foo)(void *,int);
|
||||
foo = verify_read_ulong;
|
||||
|
||||
if (!gHasLong)
|
||||
{
|
||||
log_info("read_long_array: Long types unsupported, skipping.");
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
return test_stream_read( device, context, queue, num_elements, sizeof( cl_ulong ), "ulong", 5,
|
||||
stream_read_ulong_kernel_code, ulong_kernel_name, foo );
|
||||
}
|
||||
|
||||
|
||||
int read_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int (*foo)(void *,int);
|
||||
foo = verify_read_short;
|
||||
|
||||
return test_stream_read( device, context, queue, num_elements, sizeof( cl_short ), "short", 5,
|
||||
stream_read_short_kernel_code, short_kernel_name, foo );
|
||||
}
|
||||
|
||||
|
||||
int read_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int (*foo)(void *,int);
|
||||
foo = verify_read_ushort;
|
||||
|
||||
return test_stream_read( device, context, queue, num_elements, sizeof( cl_ushort ), "ushort", 5,
|
||||
stream_read_ushort_kernel_code, ushort_kernel_name, foo );
|
||||
}
|
||||
|
||||
|
||||
int read_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int (*foo)(void *,int);
|
||||
foo = verify_read_float;
|
||||
|
||||
return test_stream_read( device, context, queue, num_elements, sizeof( cl_float ), "float", 5,
|
||||
stream_read_float_kernel_code, float_kernel_name, foo );
|
||||
}
|
||||
|
||||
|
||||
int read_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int (*foo)(void *,int);
|
||||
foo = verify_read_half;
|
||||
|
||||
return test_stream_read( device, context, queue, num_elements, sizeof( cl_half ), "half", 5,
|
||||
stream_read_half_kernel_code, half_kernel_name, foo );
|
||||
}
|
||||
|
||||
|
||||
int read_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int (*foo)(void *,int);
|
||||
foo = verify_read_char;
|
||||
|
||||
return test_stream_read( device, context, queue, num_elements, sizeof( cl_char ), "char", 5,
|
||||
stream_read_char_kernel_code, char_kernel_name, foo );
|
||||
}
|
||||
|
||||
|
||||
int read_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int (*foo)(void *,int);
|
||||
foo = verify_read_uchar;
|
||||
|
||||
return test_stream_read( device, context, queue, num_elements, sizeof( cl_uchar ), "uchar", 5,
|
||||
stream_read_uchar_kernel_code, uchar_kernel_name, foo );
|
||||
}
|
||||
|
||||
|
||||
int read_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int (*foo)(void *,int);
|
||||
foo = verify_read_struct;
|
||||
|
||||
return test_stream_read( device, context, queue, num_elements, sizeof( TestStruct ), "struct", 1,
|
||||
stream_read_struct_kernel_code, struct_kernel_name, foo );
|
||||
}
|
||||
|
||||
/*
|
||||
int read_struct_array(cl_device_group device, cl_device id, cl_context context, int num_elements)
|
||||
{
|
||||
cl_mem streams[1];
|
||||
TestStruct *output_ptr;
|
||||
cl_program program[1];
|
||||
cl_kernel kernel[1];
|
||||
void *values[1];
|
||||
size_t sizes[1] = { sizeof(cl_stream) };
|
||||
size_t threads[1];
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
size_t localThreads[1];
|
||||
#endif
|
||||
int err;
|
||||
size_t objSize = sizeof(TestStruct);
|
||||
|
||||
threads[0] = (size_t)num_elements;
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
log_error( "Unable to get thread group max size: %d", err );
|
||||
return -1;
|
||||
}
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
#endif
|
||||
|
||||
output_ptr = malloc(objSize * num_elements);
|
||||
if( ! output_ptr ){
|
||||
log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
|
||||
return -1;
|
||||
}
|
||||
streams[0] = clCreateBuffer( device, (cl_mem_flags)(CL_MEM_READ_WRITE), objSize * num_elements, NULL );
|
||||
if( !streams[0] ){
|
||||
log_error( " clCreateBuffer failed\n" );
|
||||
free( output_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_program_and_kernel( device, stream_read_struct_kernel_code, "test_stream_read_struct", &program[0], &kernel[0]);
|
||||
if( err ){
|
||||
clReleaseProgram( program[0] );
|
||||
free( output_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&streams[0] );
|
||||
if( err != CL_SUCCESS){
|
||||
print_error( err, "clSetKernelArg failed" );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseMemObject( streams[0] );
|
||||
free( output_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
#endif
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed" );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseMemObject( streams[0] );
|
||||
free( output_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
|
||||
if( err != CL_SUCCESS){
|
||||
print_error( err, "clEnqueueReadBuffer failed" );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseMemObject( streams[0] );
|
||||
free( output_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (verify_read_struct(output_ptr, num_elements)){
|
||||
log_error(" struct test failed\n");
|
||||
err = -1;
|
||||
}
|
||||
else{
|
||||
log_info(" struct test passed\n");
|
||||
err = 0;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseMemObject( streams[0] );
|
||||
free( output_ptr );
|
||||
|
||||
return err;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
386
test_conformance/profiling/readImage.c
Normal file
386
test_conformance/profiling/readImage.c
Normal file
@@ -0,0 +1,386 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
//--- the code for the kernel executables
|
||||
static const char *readKernelCode[] = {
|
||||
"__kernel void testWritef(__global uchar *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
|
||||
" color /= (float4)(255.f, 255.f, 255.f, 255.f);\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testWritei(__global char *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" int4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color.x = (int)src[indx+0];\n"
|
||||
" color.y = (int)src[indx+1];\n"
|
||||
" color.z = (int)src[indx+2];\n"
|
||||
" color.w = (int)src[indx+3];\n"
|
||||
" write_imagei(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testWriteui(__global uchar *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" uint4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color.x = (uint)src[indx+0];\n"
|
||||
" color.y = (uint)src[indx+1];\n"
|
||||
" color.z = (uint)src[indx+2];\n"
|
||||
" color.w = (uint)src[indx+3];\n"
|
||||
" write_imageui(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *readKernelName[] = { "testWritef", "testWritei", "testWriteui" };
|
||||
|
||||
|
||||
//--- helper functions
|
||||
static cl_uchar *generateImage( int n, MTdata d )
|
||||
{
|
||||
cl_uchar *ptr = (cl_uchar *)malloc( n * sizeof( cl_uchar ) );
|
||||
int i;
|
||||
|
||||
for( i = 0; i < n; i++ ){
|
||||
ptr[i] = (cl_uchar)genrand_int32( d );
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static char *generateSignedImage( int n, MTdata d )
|
||||
{
|
||||
char *ptr = (char *)malloc( n * sizeof( char ) );
|
||||
int i;
|
||||
|
||||
for( i = 0; i < n; i++ ){
|
||||
ptr[i] = (char)genrand_int32( d );
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static int verifyImage( cl_uchar *image, cl_uchar *outptr, int w, int h )
|
||||
{
|
||||
int i;
|
||||
|
||||
for( i = 0; i < w * h * 4; i++ ){
|
||||
if( outptr[i] != image[i] ){
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//----- the test functions
|
||||
int read_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements, const char *code, const char *name,
|
||||
cl_image_format image_format_desc )
|
||||
{
|
||||
cl_mem memobjs[2];
|
||||
cl_program program[1];
|
||||
void *inptr;
|
||||
void *dst = NULL;
|
||||
cl_kernel kernel[1];
|
||||
cl_event readEvent;
|
||||
cl_ulong queueStart, submitStart, readStart, readEnd;
|
||||
size_t threads[2];
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
size_t localThreads[2];
|
||||
#endif
|
||||
int err;
|
||||
int w = 64, h = 64;
|
||||
cl_mem_flags flags;
|
||||
size_t element_nbytes;
|
||||
size_t num_bytes;
|
||||
size_t channel_nbytes = sizeof( cl_uchar );
|
||||
MTdata d;
|
||||
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
element_nbytes = channel_nbytes * get_format_channel_count( &image_format_desc );
|
||||
num_bytes = w * h * element_nbytes;
|
||||
|
||||
threads[0] = (size_t)w;
|
||||
threads[1] = (size_t)h;
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( unsigned int ), NULL );
|
||||
test_error( err, "Unable to get thread group max size" );
|
||||
localThreads[1] = localThreads[0];
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
if( localThreads[1] > threads[1] )
|
||||
localThreads[1] = threads[1];
|
||||
#endif
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
if( image_format_desc.image_channel_data_type == CL_SIGNED_INT8 )
|
||||
inptr = (void *)generateSignedImage( w * h * 4, d );
|
||||
else
|
||||
inptr = (void *)generateImage( w * h * 4, d );
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
if( ! inptr ){
|
||||
log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
|
||||
return -1;
|
||||
}
|
||||
|
||||
dst = malloc( num_bytes );
|
||||
if( ! dst ){
|
||||
free( (void *)inptr );
|
||||
log_error("unable to allocate dst at %d x %d\n", (int)w, (int)h );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate the input and output image memory objects
|
||||
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
|
||||
memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
|
||||
if( memobjs[0] == (cl_mem)0 ){
|
||||
free( dst );
|
||||
free( (void *)inptr );
|
||||
log_error("unable to create Image2D\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), channel_nbytes * 4 * w * h, NULL, &err );
|
||||
if( memobjs[1] == (cl_mem)0 ){
|
||||
free( dst );
|
||||
free( (void *)inptr );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
log_error("unable to create array\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer( queue, memobjs[1], true, 0, num_bytes, inptr, 0, NULL, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &code, name );
|
||||
if( err ){
|
||||
log_error( "Unable to create program and kernel\n" );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[1] );
|
||||
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[0] );
|
||||
if( err != CL_SUCCESS ){
|
||||
log_error( "clSetKernelArg failed\n" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
#endif
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = { 0, 0, 0 };
|
||||
size_t region[3] = { w, h, 1 };
|
||||
err = clEnqueueReadImage( queue, memobjs[0], false, origin, region, 0, 0, dst, 0, NULL, &readEvent );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clReadImage2D failed" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, &readEvent );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &readStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &readEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verifyImage( (cl_uchar *)inptr, (cl_uchar *)dst, w, h );
|
||||
if( err ){
|
||||
log_error( "Image failed to verify.\n" );
|
||||
}
|
||||
else{
|
||||
log_info( "Image verified.\n" );
|
||||
}
|
||||
|
||||
clReleaseEvent(readEvent);
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseProgram(program[0]);
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free(dst);
|
||||
free(inptr);
|
||||
|
||||
if (check_times(queueStart, submitStart, readStart, readEnd, device))
|
||||
err = -1;
|
||||
|
||||
return err;
|
||||
|
||||
} // end read_image()
|
||||
|
||||
|
||||
int read_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
// 0 to 255 for unsigned image data
|
||||
return read_image( device, context, queue, numElements, readKernelCode[0], readKernelName[0], image_format_desc );
|
||||
|
||||
}
|
||||
|
||||
|
||||
int read_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_SIGNED_INT8 };
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
// -128 to 127 for signed iamge data
|
||||
return read_image( device, context, queue, numElements, readKernelCode[1], readKernelName[1], image_format_desc );
|
||||
|
||||
}
|
||||
|
||||
|
||||
int read_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNSIGNED_INT8 };
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
// 0 to 255 for unsigned image data
|
||||
return read_image( device, context, queue, numElements, readKernelCode[2], readKernelName[2], image_format_desc );
|
||||
|
||||
}
|
||||
|
||||
|
||||
1375
test_conformance/profiling/writeArray.c
Normal file
1375
test_conformance/profiling/writeArray.c
Normal file
File diff suppressed because it is too large
Load Diff
683
test_conformance/profiling/writeImage.c
Normal file
683
test_conformance/profiling/writeImage.c
Normal file
@@ -0,0 +1,683 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
//--- the code for the kernel executables
|
||||
static const char *readKernelCode[] = {
|
||||
"__kernel void testReadf(read_only image2d_t srcimg, __global float4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
|
||||
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" dst[indx].x = color.x;\n"
|
||||
" dst[indx].y = color.y;\n"
|
||||
" dst[indx].z = color.z;\n"
|
||||
" dst[indx].w = color.w;\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testReadi(read_only image2d_t srcimg, __global uchar4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
|
||||
" int4 color;\n"
|
||||
"\n"
|
||||
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
|
||||
" color = read_imagei(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" uchar4 dst_write;\n"
|
||||
" dst_write.x = (uchar)color.x;\n"
|
||||
" dst_write.y = (uchar)color.y;\n"
|
||||
" dst_write.z = (uchar)color.z;\n"
|
||||
" dst_write.w = (uchar)color.w;\n"
|
||||
" dst[indx] = dst_write;\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testReadui(read_only image2d_t srcimg, __global uchar4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
|
||||
" uint4 color;\n"
|
||||
"\n"
|
||||
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
|
||||
" color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" uchar4 dst_write;\n"
|
||||
" dst_write.x = (uchar)color.x;\n"
|
||||
" dst_write.y = (uchar)color.y;\n"
|
||||
" dst_write.z = (uchar)color.z;\n"
|
||||
" dst_write.w = (uchar)color.w;\n"
|
||||
" dst[indx] = dst_write;\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testWritef(__global uchar *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
|
||||
" color /= (float4)(255.f, 255.f, 255.f, 255.f);\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testWritei(__global char *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" int4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color.x = (int)src[indx+0];\n"
|
||||
" color.y = (int)src[indx+1];\n"
|
||||
" color.z = (int)src[indx+2];\n"
|
||||
" color.w = (int)src[indx+3];\n"
|
||||
" write_imagei(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testWriteui(__global uchar *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" uint4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color.x = (uint)src[indx+0];\n"
|
||||
" color.y = (uint)src[indx+1];\n"
|
||||
" color.z = (uint)src[indx+2];\n"
|
||||
" color.w = (uint)src[indx+3];\n"
|
||||
" write_imageui(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testReadWriteff(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, CLK_DEFAULT_SAMPLER, (int2)(tid_x, tid_y));\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testReadWriteii(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagei(srcimg, CLK_DEFAULT_SAMPLER, (int2)(tid_x, tid_y));\n"
|
||||
" write_imagei(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testReadWriteuiui(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" uint4 color;\n"
|
||||
"\n"
|
||||
" color = read_imageui(srcimg, CLK_DEFAULT_SAMPLER, (int2)(tid_x, tid_y));\n"
|
||||
" write_imageui(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testReadWritefi(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" float4 colorf;\n"
|
||||
" int4 colori;\n"
|
||||
"\n"
|
||||
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
|
||||
" colorf = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
// since we are going from unsigned to signed, be sure to convert
|
||||
// values greater 0.5 to negative values
|
||||
" if( colorf.x >= 0.5f )\n"
|
||||
" colori.x = (int)( ( colorf.x - 1.f ) * 255.f );\n"
|
||||
" else\n"
|
||||
" colori.x = (int)( colorf.x * 255.f );\n"
|
||||
" if( colorf.y >= 0.5f )\n"
|
||||
" colori.y = (int)( ( colorf.y - 1.f ) * 255.f );\n"
|
||||
" else\n"
|
||||
" colori.y = (int)( colorf.y * 255.f );\n"
|
||||
" if( colorf.z >= 0.5f )\n"
|
||||
" colori.z = (int)( ( colorf.z - 1.f ) * 255.f );\n"
|
||||
" else\n"
|
||||
" colori.z = (int)( colorf.z * 255.f );\n"
|
||||
" if( colorf.w >= 0.5f )\n"
|
||||
" colori.w = (int)( ( colorf.w - 1.f ) * 255.f );\n"
|
||||
" else\n"
|
||||
" colori.w = (int)( colorf.w * 255.f );\n"
|
||||
" write_imagei(dstimg, (int2)(tid_x, tid_y), colori);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testReadWritefui(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" float4 colorf;\n"
|
||||
" uint4 colorui;\n"
|
||||
"\n"
|
||||
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
|
||||
" colorf = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" colorui.x = (uint)( colorf.x * 255.f );\n"
|
||||
" colorui.y = (uint)( colorf.y * 255.f );\n"
|
||||
" colorui.z = (uint)( colorf.z * 255.f );\n"
|
||||
" colorui.w = (uint)( colorf.w * 255.f );\n"
|
||||
" write_imageui(dstimg, (int2)(tid_x, tid_y), colorui);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testReadWriteif(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int4 colori;\n"
|
||||
" float4 colorf;\n"
|
||||
"\n"
|
||||
// since we are going from signed to unsigned, we need to adjust the rgba values from
|
||||
// from the signed image to add 256 to the signed image values less than 0.
|
||||
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
|
||||
" colori = read_imagei(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" if( colori.x < 0 )\n"
|
||||
" colorf.x = ( (float)colori.x + 256.f ) / 255.f;\n"
|
||||
" else\n"
|
||||
" colorf.x = (float)colori.x / 255.f;\n"
|
||||
" if( colori.y < 0 )\n"
|
||||
" colorf.y = ( (float)colori.y + 256.f ) / 255.f;\n"
|
||||
" else\n"
|
||||
" colorf.y = (float)colori.y / 255.f;\n"
|
||||
" if( colori.z < 0 )\n"
|
||||
" colorf.z = ( (float)colori.z + 256.f ) / 255.f;\n"
|
||||
" else\n"
|
||||
" colorf.z = (float)colori.z / 255.f;\n"
|
||||
" if( colori.w < 0 )\n"
|
||||
" colorf.w = ( (float)colori.w + 256.f ) / 255.f;\n"
|
||||
" else\n"
|
||||
" colorf.w = (float)colori.w / 255.f;\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), colorf);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testReadWriteiui(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int4 colori;\n"
|
||||
" uint4 colorui;\n"
|
||||
"\n"
|
||||
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
|
||||
" colori = read_imagei(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
// since we are going from signed to unsigned, we need to adjust the rgba values from
|
||||
// from the signed image to add 256 to the signed image values less than 0.
|
||||
" if( colori.x < 0 )\n"
|
||||
" colorui.x = (uint)( colori.x + 256 );\n"
|
||||
" else\n"
|
||||
" colorui.x = (uint)colori.x;\n"
|
||||
" if( colori.y < 0 )\n"
|
||||
" colorui.y = (uint)( colori.y + 256 );\n"
|
||||
" else\n"
|
||||
" colorui.y = (uint)colori.y;\n"
|
||||
" if( colori.z < 0 )\n"
|
||||
" colorui.z = (uint)( colori.z + 256 );\n"
|
||||
" else\n"
|
||||
" colorui.z = (uint)colori.z;\n"
|
||||
" if( colori.w < 0 )\n"
|
||||
" colorui.w = (uint)( colori.w + 256 );\n"
|
||||
" else\n"
|
||||
" colorui.w = (uint)colori.w;\n"
|
||||
" write_imageui(dstimg, (int2)(tid_x, tid_y), colorui);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testReadWriteuif(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" uint4 colorui;\n"
|
||||
" float4 colorf;\n"
|
||||
"\n"
|
||||
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
|
||||
" colorui = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" colorf.x = (float)colorui.x / 255.f;\n"
|
||||
" colorf.y = (float)colorui.y / 255.f;\n"
|
||||
" colorf.z = (float)colorui.z / 255.f;\n"
|
||||
" colorf.w = (float)colorui.w / 255.f;\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), colorf);\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void testReadWriteuii(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" uint4 colorui;\n"
|
||||
" int4 colori;\n"
|
||||
"\n"
|
||||
" const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
|
||||
" colorui = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
// since we are going from unsigned to signed, be sure to convert
|
||||
// values greater 0.5 to negative values
|
||||
" if( colorui.x >= 128U )\n"
|
||||
" colori.x = (int)colorui.x - 256;\n"
|
||||
" else\n"
|
||||
" colori.x = (int)colorui.x;\n"
|
||||
" if( colorui.y >= 128U )\n"
|
||||
" colori.y = (int)colorui.y - 256;\n"
|
||||
" else\n"
|
||||
" colori.y = (int)colorui.y;\n"
|
||||
" if( colorui.z >= 128U )\n"
|
||||
" colori.z = (int)colorui.z - 256;\n"
|
||||
" else\n"
|
||||
" colori.z = (int)colorui.z;\n"
|
||||
" if( colorui.w >= 128U )\n"
|
||||
" colori.w = (int)colorui.w - 256;\n"
|
||||
" else\n"
|
||||
" colori.w = (int)colorui.w;\n"
|
||||
" write_imagei(dstimg, (int2)(tid_x, tid_y), colori);\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *readKernelName[] = { "testReadf", "testReadi", "testReadui", "testWritef", "testWritei", "testWriteui",
|
||||
"testReadWriteff", "testReadWriteii", "testReadWriteuiui", "testReadWritefi",
|
||||
"testReadWritefui", "testReadWriteif", "testReadWriteiui", "testReadWriteuif",
|
||||
"testReadWriteuii" };
|
||||
|
||||
|
||||
static cl_uchar *generateImage( int n, MTdata d )
|
||||
{
|
||||
cl_uchar *ptr = (cl_uchar *)malloc( n * sizeof( cl_uchar ) );
|
||||
int i;
|
||||
|
||||
for( i = 0; i < n; i++ ){
|
||||
ptr[i] = (cl_uchar)genrand_int32(d);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static char *generateSignedImage( int n, MTdata d )
|
||||
{
|
||||
char *ptr = (char *)malloc( n * sizeof( char ) );
|
||||
int i;
|
||||
|
||||
for( i = 0; i < n; i++ ){
|
||||
ptr[i] = (char)genrand_int32(d);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static int verifyImage( cl_uchar *image, cl_uchar *outptr, int w, int h )
|
||||
{
|
||||
int i;
|
||||
|
||||
for( i = 0; i < w * h * 4; i++ ){
|
||||
if( outptr[i] != image[i] ){
|
||||
log_error("Image verification failed at offset %d. Actual value=%d, expected value=%d\n", i, outptr[i], image[i]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verifyImageFloat ( cl_double *refptr, cl_float *outptr, int w, int h )
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != (float)refptr[i])
|
||||
{
|
||||
float ulps = Ulp_Error( outptr[i], refptr[i]);
|
||||
|
||||
if(! (fabsf(ulps) < 1.5f) )
|
||||
{
|
||||
log_error( "ERROR: Data sample %d does not validate! Expected (%a), got (%a), ulp %f\n",
|
||||
(int)i, refptr[i], outptr[ i ], ulps );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static double *prepareReference( cl_uchar *inptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
double *refptr = (double *)malloc( w * h * 4*sizeof( double ) );
|
||||
if ( !refptr )
|
||||
{
|
||||
log_error( "Unable to allocate refptr at %d x %d\n", (int)w, (int)h );
|
||||
return 0;
|
||||
}
|
||||
for( i = 0; i < w * h * 4; i++ ) {
|
||||
refptr[i] = ((double)inptr[i])/255;
|
||||
}
|
||||
return refptr;
|
||||
}
|
||||
|
||||
//----- the test functions
|
||||
int write_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements, const char *code,
|
||||
const char *name, cl_image_format image_format_desc, int readFloat )
|
||||
{
|
||||
cl_mem memobjs[2];
|
||||
cl_program program[1];
|
||||
void *inptr;
|
||||
double *refptr = NULL;
|
||||
void *dst = NULL;
|
||||
cl_kernel kernel[1];
|
||||
cl_event writeEvent;
|
||||
cl_ulong queueStart, submitStart, writeStart, writeEnd;
|
||||
size_t threads[2];
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
size_t localThreads[2];
|
||||
#endif
|
||||
int err;
|
||||
int w = 64, h = 64;
|
||||
cl_mem_flags flags;
|
||||
size_t element_nbytes;
|
||||
size_t num_bytes;
|
||||
size_t channel_nbytes = sizeof( cl_uchar );
|
||||
MTdata d;
|
||||
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
if (readFloat)
|
||||
channel_nbytes = sizeof( cl_float );
|
||||
|
||||
element_nbytes = channel_nbytes * get_format_channel_count( &image_format_desc );
|
||||
num_bytes = w * h * element_nbytes;
|
||||
|
||||
threads[0] = (size_t)w;
|
||||
threads[1] = (size_t)h;
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( unsigned int ), NULL );
|
||||
test_error( err, "Unable to get thread group max size" );
|
||||
localThreads[1] = localThreads[0];
|
||||
if( localThreads[0] > threads[0] )
|
||||
localThreads[0] = threads[0];
|
||||
if( localThreads[1] > threads[1] )
|
||||
localThreads[1] = threads[1];
|
||||
#endif
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
if( image_format_desc.image_channel_data_type == CL_SIGNED_INT8 )
|
||||
inptr = (void *)generateSignedImage( w * h * 4, d );
|
||||
else
|
||||
inptr = (void *)generateImage( w * h * 4, d );
|
||||
free_mtdata(d); d = NULL;
|
||||
if( ! inptr ){
|
||||
log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
|
||||
return -1;
|
||||
}
|
||||
|
||||
dst = malloc( num_bytes );
|
||||
if( ! dst ){
|
||||
free( (void *)inptr );
|
||||
log_error("unable to allocate dst at %d x %d\n", (int)w, (int)h );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate the input and output image memory objects
|
||||
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
|
||||
memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
|
||||
if( memobjs[0] == (cl_mem)0 ){
|
||||
free( dst );
|
||||
free( (void *)inptr );
|
||||
log_error("unable to create Image2D\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), channel_nbytes * 4 * w * h, NULL, &err );
|
||||
if( memobjs[1] == (cl_mem)0 ){
|
||||
free( dst );
|
||||
free( (void *)inptr );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
log_error("unable to create array\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = { 0, 0, 0 };
|
||||
size_t region[3] = { w, h, 1 };
|
||||
err = clEnqueueWriteImage( queue, memobjs[0], false, origin, region, 0, 0, inptr, 0, NULL, &writeEvent );
|
||||
if( err != CL_SUCCESS ){
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
print_error(err, "clWriteImage failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This synchronization point is needed in order to assume the data is valid.
|
||||
// Getting profiling information is not a synchronization point.
|
||||
err = clWaitForEvents( 1, &writeEvent );
|
||||
if( err != CL_SUCCESS )
|
||||
{
|
||||
print_error( err, "clWaitForEvents failed" );
|
||||
clReleaseEvent(writeEvent);
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// test profiling
|
||||
while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(writeEvent);
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
|
||||
CL_PROFILING_INFO_NOT_AVAILABLE );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(writeEvent);
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(writeEvent);
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clGetEventProfilingInfo failed" );
|
||||
clReleaseEvent(writeEvent);
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &code, name );
|
||||
if( err ){
|
||||
log_error( "Unable to create program and kernel\n" );
|
||||
clReleaseEvent(writeEvent);
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[0] );
|
||||
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[1] );
|
||||
if( err != CL_SUCCESS ){
|
||||
log_error( "clSetKernelArg failed\n" );
|
||||
clReleaseEvent(writeEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_THREADS
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
#endif
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed" );
|
||||
clReleaseEvent(writeEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, memobjs[1], true, 0, num_bytes, dst, 0, NULL, NULL );
|
||||
if( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueReadBuffer failed" );
|
||||
clReleaseEvent(writeEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( readFloat )
|
||||
{
|
||||
refptr = prepareReference( (cl_uchar *)inptr, w, h );
|
||||
if ( refptr )
|
||||
{
|
||||
err = verifyImageFloat( refptr, (cl_float *)dst, w, h );
|
||||
free ( refptr );
|
||||
}
|
||||
else
|
||||
err = -1;
|
||||
}
|
||||
else
|
||||
err = verifyImage( (cl_uchar *)inptr, (cl_uchar *)dst, w, h );
|
||||
|
||||
if( err )
|
||||
{
|
||||
log_error( "Image failed to verify.\n" );
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info( "Image verified.\n" );
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseEvent(writeEvent);
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
clReleaseMemObject(memobjs[0]);
|
||||
clReleaseMemObject(memobjs[1]);
|
||||
free( dst );
|
||||
free( inptr );
|
||||
|
||||
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
|
||||
err = -1;
|
||||
|
||||
return err;
|
||||
|
||||
} // end write_image()
|
||||
|
||||
|
||||
int write_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
// 0 to 255 for unsigned image data
|
||||
return write_image( device, context, queue, numElements, readKernelCode[0], readKernelName[0], image_format_desc, 1 );
|
||||
|
||||
}
|
||||
|
||||
|
||||
int write_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_SIGNED_INT8 };
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
// -128 to 127 for signed iamge data
|
||||
return write_image( device, context, queue, numElements, readKernelCode[1], readKernelName[1], image_format_desc, 0 );
|
||||
|
||||
}
|
||||
|
||||
|
||||
int write_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
cl_image_format image_format_desc = { CL_RGBA, CL_UNSIGNED_INT8 };
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
// 0 to 255 for unsigned image data
|
||||
return write_image( device, context, queue, numElements, readKernelCode[2], readKernelName[2], image_format_desc, 0 );
|
||||
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user